xref: /titanic_53/usr/src/uts/sun4v/io/vsw.c (revision e1ebb9ec908bc2d0a8810f137ebd6566cc8a8061)
11ae08745Sheppo /*
21ae08745Sheppo  * CDDL HEADER START
31ae08745Sheppo  *
41ae08745Sheppo  * The contents of this file are subject to the terms of the
51ae08745Sheppo  * Common Development and Distribution License (the "License").
61ae08745Sheppo  * You may not use this file except in compliance with the License.
71ae08745Sheppo  *
81ae08745Sheppo  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
91ae08745Sheppo  * or http://www.opensolaris.org/os/licensing.
101ae08745Sheppo  * See the License for the specific language governing permissions
111ae08745Sheppo  * and limitations under the License.
121ae08745Sheppo  *
131ae08745Sheppo  * When distributing Covered Code, include this CDDL HEADER in each
141ae08745Sheppo  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
151ae08745Sheppo  * If applicable, add the following below this CDDL HEADER, with the
161ae08745Sheppo  * fields enclosed by brackets "[]" replaced with your own identifying
171ae08745Sheppo  * information: Portions Copyright [yyyy] [name of copyright owner]
181ae08745Sheppo  *
191ae08745Sheppo  * CDDL HEADER END
201ae08745Sheppo  */
211ae08745Sheppo 
221ae08745Sheppo /*
231ae08745Sheppo  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
241ae08745Sheppo  * Use is subject to license terms.
251ae08745Sheppo  */
261ae08745Sheppo 
271ae08745Sheppo #pragma ident	"%Z%%M%	%I%	%E% SMI"
281ae08745Sheppo 
291ae08745Sheppo #include <sys/types.h>
301ae08745Sheppo #include <sys/errno.h>
311ae08745Sheppo #include <sys/debug.h>
321ae08745Sheppo #include <sys/time.h>
331ae08745Sheppo #include <sys/sysmacros.h>
341ae08745Sheppo #include <sys/systm.h>
351ae08745Sheppo #include <sys/user.h>
361ae08745Sheppo #include <sys/stropts.h>
371ae08745Sheppo #include <sys/stream.h>
381ae08745Sheppo #include <sys/strlog.h>
391ae08745Sheppo #include <sys/strsubr.h>
401ae08745Sheppo #include <sys/cmn_err.h>
411ae08745Sheppo #include <sys/cpu.h>
421ae08745Sheppo #include <sys/kmem.h>
431ae08745Sheppo #include <sys/conf.h>
441ae08745Sheppo #include <sys/ddi.h>
451ae08745Sheppo #include <sys/sunddi.h>
461ae08745Sheppo #include <sys/ksynch.h>
471ae08745Sheppo #include <sys/stat.h>
481ae08745Sheppo #include <sys/kstat.h>
491ae08745Sheppo #include <sys/vtrace.h>
501ae08745Sheppo #include <sys/strsun.h>
511ae08745Sheppo #include <sys/dlpi.h>
521ae08745Sheppo #include <sys/ethernet.h>
531ae08745Sheppo #include <net/if.h>
541ae08745Sheppo #include <sys/varargs.h>
551ae08745Sheppo #include <sys/machsystm.h>
561ae08745Sheppo #include <sys/modctl.h>
571ae08745Sheppo #include <sys/modhash.h>
581ae08745Sheppo #include <sys/mac.h>
59ba2e4443Sseb #include <sys/mac_ether.h>
601ae08745Sheppo #include <sys/taskq.h>
611ae08745Sheppo #include <sys/note.h>
621ae08745Sheppo #include <sys/mach_descrip.h>
631ae08745Sheppo #include <sys/mac.h>
641ae08745Sheppo #include <sys/mdeg.h>
651ae08745Sheppo #include <sys/ldc.h>
661ae08745Sheppo #include <sys/vsw_fdb.h>
671ae08745Sheppo #include <sys/vsw.h>
681ae08745Sheppo #include <sys/vio_mailbox.h>
691ae08745Sheppo #include <sys/vnet_mailbox.h>
701ae08745Sheppo #include <sys/vnet_common.h>
71d10e4ef2Snarayan #include <sys/vio_util.h>
72d10e4ef2Snarayan #include <sys/sdt.h>
731ae08745Sheppo 
741ae08745Sheppo /*
751ae08745Sheppo  * Function prototypes.
761ae08745Sheppo  */
771ae08745Sheppo static	int vsw_attach(dev_info_t *, ddi_attach_cmd_t);
781ae08745Sheppo static	int vsw_detach(dev_info_t *, ddi_detach_cmd_t);
791ae08745Sheppo static	int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
801ae08745Sheppo static	void vsw_get_md_properties(vsw_t *vswp);
81*e1ebb9ecSlm66018 static	int vsw_get_physaddr(vsw_t *);
821ae08745Sheppo static	int vsw_setup_layer2(vsw_t *);
831ae08745Sheppo static	int vsw_setup_layer3(vsw_t *);
841ae08745Sheppo 
851ae08745Sheppo /* MAC layer routines */
861ae08745Sheppo static	int vsw_mac_attach(vsw_t *vswp);
871ae08745Sheppo static	void vsw_mac_detach(vsw_t *vswp);
88*e1ebb9ecSlm66018 static	int vsw_get_hw_maddr(vsw_t *);
89*e1ebb9ecSlm66018 static	int vsw_set_hw(vsw_t *, vsw_port_t *);
90*e1ebb9ecSlm66018 static	int vsw_set_hw_promisc(vsw_t *, vsw_port_t *);
91*e1ebb9ecSlm66018 static	int vsw_unset_hw(vsw_t *, vsw_port_t *);
92*e1ebb9ecSlm66018 static	int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *);
93*e1ebb9ecSlm66018 static	int vsw_reconfig_hw(vsw_t *);
941ae08745Sheppo static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *);
951ae08745Sheppo static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *);
961ae08745Sheppo static int vsw_mac_register(vsw_t *);
971ae08745Sheppo static int vsw_mac_unregister(vsw_t *);
98ba2e4443Sseb static int vsw_m_stat(void *, uint_t, uint64_t *);
991ae08745Sheppo static void vsw_m_stop(void *arg);
1001ae08745Sheppo static int vsw_m_start(void *arg);
1011ae08745Sheppo static int vsw_m_unicst(void *arg, const uint8_t *);
1021ae08745Sheppo static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *);
1031ae08745Sheppo static int vsw_m_promisc(void *arg, boolean_t);
1041ae08745Sheppo static mblk_t *vsw_m_tx(void *arg, mblk_t *);
1051ae08745Sheppo 
1061ae08745Sheppo /* MDEG routines */
1071ae08745Sheppo static	void vsw_mdeg_register(vsw_t *vswp);
1081ae08745Sheppo static	void vsw_mdeg_unregister(vsw_t *vswp);
1091ae08745Sheppo static	int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *);
1101ae08745Sheppo 
1111ae08745Sheppo /* Port add/deletion routines */
1121ae08745Sheppo static	int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node);
1131ae08745Sheppo static	int vsw_port_attach(vsw_t *vswp, int p_instance,
1141ae08745Sheppo 	uint64_t *ldcids, int nids, struct ether_addr *macaddr);
1151ae08745Sheppo static	int vsw_detach_ports(vsw_t *vswp);
1161ae08745Sheppo static	int vsw_port_detach(vsw_t *vswp, int p_instance);
1171ae08745Sheppo static	int vsw_port_delete(vsw_port_t *port);
1181ae08745Sheppo static	int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id);
1191ae08745Sheppo static	int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id);
1201ae08745Sheppo static	int vsw_init_ldcs(vsw_port_t *port);
1211ae08745Sheppo static	int vsw_uninit_ldcs(vsw_port_t *port);
1221ae08745Sheppo static	int vsw_ldc_init(vsw_ldc_t *ldcp);
1231ae08745Sheppo static	int vsw_ldc_uninit(vsw_ldc_t *ldcp);
1241ae08745Sheppo static	int vsw_drain_ldcs(vsw_port_t *port);
1251ae08745Sheppo static	int vsw_drain_port_taskq(vsw_port_t *port);
1261ae08745Sheppo static	void vsw_marker_task(void *);
1271ae08745Sheppo static	vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance);
1281ae08745Sheppo static	int vsw_plist_del_node(vsw_t *, vsw_port_t *port);
1291ae08745Sheppo 
1301ae08745Sheppo /* Interrupt routines */
1311ae08745Sheppo static	uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg);
1321ae08745Sheppo 
1331ae08745Sheppo /* Handshake routines */
1341ae08745Sheppo static	void vsw_restart_handshake(vsw_ldc_t *);
1351ae08745Sheppo static	int vsw_check_flag(vsw_ldc_t *, int, uint64_t);
1361ae08745Sheppo static	void vsw_next_milestone(vsw_ldc_t *);
1371ae08745Sheppo static	int vsw_supported_version(vio_ver_msg_t *);
1381ae08745Sheppo 
1391ae08745Sheppo /* Data processing routines */
1401ae08745Sheppo static void vsw_process_pkt(void *);
1411ae08745Sheppo static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t);
1421ae08745Sheppo static void vsw_process_ctrl_pkt(void *);
1431ae08745Sheppo static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *);
1441ae08745Sheppo static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *);
1451ae08745Sheppo static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *);
1461ae08745Sheppo static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *);
1471ae08745Sheppo static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *);
1481ae08745Sheppo static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *);
1491ae08745Sheppo static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t);
1501ae08745Sheppo static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *);
1511ae08745Sheppo static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *);
1521ae08745Sheppo static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *);
1531ae08745Sheppo static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t);
1541ae08745Sheppo 
1551ae08745Sheppo /* Switching/data transmit routines */
1561ae08745Sheppo static	void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
1571ae08745Sheppo 	    vsw_port_t *port, mac_resource_handle_t);
1581ae08745Sheppo static	void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
1591ae08745Sheppo 	    vsw_port_t *port, mac_resource_handle_t);
1601ae08745Sheppo static	int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller,
1611ae08745Sheppo 	    vsw_port_t *port);
1621ae08745Sheppo static	int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller,
1631ae08745Sheppo 	    vsw_port_t *port);
1641ae08745Sheppo static	int vsw_portsend(vsw_port_t *, mblk_t *);
1651ae08745Sheppo static	int vsw_dringsend(vsw_ldc_t *, mblk_t *);
1661ae08745Sheppo static	int vsw_descrsend(vsw_ldc_t *, mblk_t *);
1671ae08745Sheppo 
1681ae08745Sheppo /* Packet creation routines */
1691ae08745Sheppo static void vsw_send_ver(vsw_ldc_t *);
1701ae08745Sheppo static void vsw_send_attr(vsw_ldc_t *);
1711ae08745Sheppo static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *);
1721ae08745Sheppo static void vsw_send_dring_info(vsw_ldc_t *);
1731ae08745Sheppo static void vsw_send_rdx(vsw_ldc_t *);
1741ae08745Sheppo 
1751ae08745Sheppo static void vsw_send_msg(vsw_ldc_t *, void *, int);
1761ae08745Sheppo 
1771ae08745Sheppo /* Forwarding database (FDB) routines */
1781ae08745Sheppo static	int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port);
1791ae08745Sheppo static	int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port);
1801ae08745Sheppo static	vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *);
1811ae08745Sheppo static	int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
1821ae08745Sheppo static	int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
1831ae08745Sheppo static	int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
1841ae08745Sheppo static	void vsw_del_addr(uint8_t, void *, uint64_t);
1851ae08745Sheppo static	void vsw_del_mcst_port(vsw_port_t *);
1861ae08745Sheppo static	void vsw_del_mcst_vsw(vsw_t *);
1871ae08745Sheppo 
1881ae08745Sheppo /* Dring routines */
1891ae08745Sheppo static dring_info_t *vsw_create_dring(vsw_ldc_t *);
1901ae08745Sheppo static void vsw_create_privring(vsw_ldc_t *);
1911ae08745Sheppo static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp);
1921ae08745Sheppo static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **,
1931ae08745Sheppo     int *);
1941ae08745Sheppo static dring_info_t *vsw_ident2dring(lane_t *, uint64_t);
1951ae08745Sheppo 
1961ae08745Sheppo static void vsw_set_lane_attr(vsw_t *, lane_t *);
1971ae08745Sheppo static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *);
1981ae08745Sheppo static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg);
1991ae08745Sheppo static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *);
2001ae08745Sheppo static int vsw_check_dring_info(vio_dring_reg_msg_t *);
2011ae08745Sheppo 
2021ae08745Sheppo /* Misc support routines */
2031ae08745Sheppo static	caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf);
2041ae08745Sheppo static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t);
2051ae08745Sheppo static int vsw_free_ring(dring_info_t *);
2061ae08745Sheppo 
207d10e4ef2Snarayan 
2081ae08745Sheppo /* Debugging routines */
2091ae08745Sheppo static void dump_flags(uint64_t);
2101ae08745Sheppo static void display_state(void);
2111ae08745Sheppo static void display_lane(lane_t *);
2121ae08745Sheppo static void display_ring(dring_info_t *);
2131ae08745Sheppo 
2141ae08745Sheppo int	vsw_num_handshakes = 3;		/* # of handshake attempts */
2151ae08745Sheppo int	vsw_wretries = 100;		/* # of write attempts */
216d10e4ef2Snarayan int	vsw_chain_len = 150;		/* max # of mblks in msg chain */
217d10e4ef2Snarayan int	vsw_desc_delay = 0;		/* delay in us */
218d10e4ef2Snarayan int	vsw_read_attempts = 5;		/* # of reads of descriptor */
219d10e4ef2Snarayan 
220d10e4ef2Snarayan uint32_t	vsw_mblk_size = VSW_MBLK_SIZE;
221d10e4ef2Snarayan uint32_t	vsw_num_mblks = VSW_NUM_MBLKS;
222d10e4ef2Snarayan 
2231ae08745Sheppo 
2241ae08745Sheppo /*
2251ae08745Sheppo  * mode specific frame switching function
2261ae08745Sheppo  */
2271ae08745Sheppo void		(*vsw_switch_frame)(vsw_t *, mblk_t *, int, vsw_port_t *,
2281ae08745Sheppo 			mac_resource_handle_t);
2291ae08745Sheppo 
230ba2e4443Sseb static	mac_callbacks_t	vsw_m_callbacks = {
231ba2e4443Sseb 	0,
232ba2e4443Sseb 	vsw_m_stat,
233ba2e4443Sseb 	vsw_m_start,
234ba2e4443Sseb 	vsw_m_stop,
235ba2e4443Sseb 	vsw_m_promisc,
236ba2e4443Sseb 	vsw_m_multicst,
237ba2e4443Sseb 	vsw_m_unicst,
238ba2e4443Sseb 	vsw_m_tx,
239ba2e4443Sseb 	NULL,
240ba2e4443Sseb 	NULL,
241ba2e4443Sseb 	NULL
242ba2e4443Sseb };
243ba2e4443Sseb 
2441ae08745Sheppo static	struct	cb_ops	vsw_cb_ops = {
2451ae08745Sheppo 	nulldev,			/* cb_open */
2461ae08745Sheppo 	nulldev,			/* cb_close */
2471ae08745Sheppo 	nodev,				/* cb_strategy */
2481ae08745Sheppo 	nodev,				/* cb_print */
2491ae08745Sheppo 	nodev,				/* cb_dump */
2501ae08745Sheppo 	nodev,				/* cb_read */
2511ae08745Sheppo 	nodev,				/* cb_write */
2521ae08745Sheppo 	nodev,				/* cb_ioctl */
2531ae08745Sheppo 	nodev,				/* cb_devmap */
2541ae08745Sheppo 	nodev,				/* cb_mmap */
2551ae08745Sheppo 	nodev,				/* cb_segmap */
2561ae08745Sheppo 	nochpoll,			/* cb_chpoll */
2571ae08745Sheppo 	ddi_prop_op,			/* cb_prop_op */
2581ae08745Sheppo 	NULL,				/* cb_stream */
2591ae08745Sheppo 	D_MP,				/* cb_flag */
2601ae08745Sheppo 	CB_REV,				/* rev */
2611ae08745Sheppo 	nodev,				/* int (*cb_aread)() */
2621ae08745Sheppo 	nodev				/* int (*cb_awrite)() */
2631ae08745Sheppo };
2641ae08745Sheppo 
2651ae08745Sheppo static	struct	dev_ops	vsw_ops = {
2661ae08745Sheppo 	DEVO_REV,		/* devo_rev */
2671ae08745Sheppo 	0,			/* devo_refcnt */
2681ae08745Sheppo 	vsw_getinfo,		/* devo_getinfo */
2691ae08745Sheppo 	nulldev,		/* devo_identify */
2701ae08745Sheppo 	nulldev,		/* devo_probe */
2711ae08745Sheppo 	vsw_attach,		/* devo_attach */
2721ae08745Sheppo 	vsw_detach,		/* devo_detach */
2731ae08745Sheppo 	nodev,			/* devo_reset */
2741ae08745Sheppo 	&vsw_cb_ops,		/* devo_cb_ops */
2751ae08745Sheppo 	(struct bus_ops *)NULL,	/* devo_bus_ops */
2761ae08745Sheppo 	ddi_power		/* devo_power */
2771ae08745Sheppo };
2781ae08745Sheppo 
2791ae08745Sheppo extern	struct	mod_ops	mod_driverops;
2801ae08745Sheppo static struct modldrv vswmodldrv = {
2811ae08745Sheppo 	&mod_driverops,
2821ae08745Sheppo 	"sun4v Virtual Switch Driver %I%",
2831ae08745Sheppo 	&vsw_ops,
2841ae08745Sheppo };
2851ae08745Sheppo 
2861ae08745Sheppo #define	LDC_ENTER_LOCK(ldcp)	\
2871ae08745Sheppo 				mutex_enter(&((ldcp)->ldc_cblock));\
2881ae08745Sheppo 				mutex_enter(&((ldcp)->ldc_txlock));
2891ae08745Sheppo #define	LDC_EXIT_LOCK(ldcp)	\
2901ae08745Sheppo 				mutex_exit(&((ldcp)->ldc_txlock));\
2911ae08745Sheppo 				mutex_exit(&((ldcp)->ldc_cblock));
2921ae08745Sheppo 
2931ae08745Sheppo /* Driver soft state ptr  */
2941ae08745Sheppo static void	*vsw_state;
2951ae08745Sheppo 
2961ae08745Sheppo /*
2971ae08745Sheppo  * Linked list of "vsw_t" structures - one per instance.
2981ae08745Sheppo  */
2991ae08745Sheppo vsw_t		*vsw_head = NULL;
3001ae08745Sheppo krwlock_t	vsw_rw;
3011ae08745Sheppo 
3021ae08745Sheppo /*
3031ae08745Sheppo  * Property names
3041ae08745Sheppo  */
3051ae08745Sheppo static char vdev_propname[] = "virtual-device";
3061ae08745Sheppo static char vsw_propname[] = "virtual-network-switch";
3071ae08745Sheppo static char physdev_propname[] = "vsw-phys-dev";
3081ae08745Sheppo static char smode_propname[] = "vsw-switch-mode";
3091ae08745Sheppo static char macaddr_propname[] = "local-mac-address";
3101ae08745Sheppo static char remaddr_propname[] = "remote-mac-address";
3111ae08745Sheppo static char ldcids_propname[] = "ldc-ids";
3121ae08745Sheppo static char chan_propname[] = "channel-endpoint";
3131ae08745Sheppo static char id_propname[] = "id";
3141ae08745Sheppo static char reg_propname[] = "reg";
3151ae08745Sheppo 
3161ae08745Sheppo /* supported versions */
3171ae08745Sheppo static	ver_sup_t	vsw_versions[] = { {1, 0} };
3181ae08745Sheppo 
3191ae08745Sheppo /*
3201ae08745Sheppo  * Matching criteria passed to the MDEG to register interest
3211ae08745Sheppo  * in changes to 'virtual-device-port' nodes identified by their
3221ae08745Sheppo  * 'id' property.
3231ae08745Sheppo  */
3241ae08745Sheppo static md_prop_match_t vport_prop_match[] = {
3251ae08745Sheppo 	{ MDET_PROP_VAL,    "id"   },
3261ae08745Sheppo 	{ MDET_LIST_END,    NULL    }
3271ae08745Sheppo };
3281ae08745Sheppo 
3291ae08745Sheppo static mdeg_node_match_t vport_match = { "virtual-device-port",
3301ae08745Sheppo 						vport_prop_match };
3311ae08745Sheppo 
3321ae08745Sheppo /*
3331ae08745Sheppo  * Specification of an MD node passed to the MDEG to filter any
3341ae08745Sheppo  * 'vport' nodes that do not belong to the specified node. This
3351ae08745Sheppo  * template is copied for each vsw instance and filled in with
3361ae08745Sheppo  * the appropriate 'cfg-handle' value before being passed to the MDEG.
3371ae08745Sheppo  */
3381ae08745Sheppo static mdeg_prop_spec_t vsw_prop_template[] = {
3391ae08745Sheppo 	{ MDET_PROP_STR,    "name",		vsw_propname },
3401ae08745Sheppo 	{ MDET_PROP_VAL,    "cfg-handle",	NULL	},
3411ae08745Sheppo 	{ MDET_LIST_END,    NULL,		NULL	}
3421ae08745Sheppo };
3431ae08745Sheppo 
3441ae08745Sheppo #define	VSW_SET_MDEG_PROP_INST(specp, val)	(specp)[1].ps_val = (val);
3451ae08745Sheppo 
3461ae08745Sheppo /*
3471ae08745Sheppo  * Print debug messages - set to 0x1f to enable all msgs
3481ae08745Sheppo  * or 0x0 to turn all off.
3491ae08745Sheppo  */
3501ae08745Sheppo int vswdbg = 0x0;
3511ae08745Sheppo 
3521ae08745Sheppo /*
3531ae08745Sheppo  * debug levels:
3541ae08745Sheppo  * 0x01:	Function entry/exit tracing
3551ae08745Sheppo  * 0x02:	Internal function messages
3561ae08745Sheppo  * 0x04:	Verbose internal messages
3571ae08745Sheppo  * 0x08:	Warning messages
3581ae08745Sheppo  * 0x10:	Error messages
3591ae08745Sheppo  */
3601ae08745Sheppo 
3611ae08745Sheppo static void
3621ae08745Sheppo vswdebug(vsw_t *vswp, const char *fmt, ...)
3631ae08745Sheppo {
3641ae08745Sheppo 	char buf[512];
3651ae08745Sheppo 	va_list ap;
3661ae08745Sheppo 
3671ae08745Sheppo 	va_start(ap, fmt);
3681ae08745Sheppo 	(void) vsprintf(buf, fmt, ap);
3691ae08745Sheppo 	va_end(ap);
3701ae08745Sheppo 
3711ae08745Sheppo 	if (vswp == NULL)
3721ae08745Sheppo 		cmn_err(CE_CONT, "%s\n", buf);
3731ae08745Sheppo 	else
3741ae08745Sheppo 		cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf);
3751ae08745Sheppo }
3761ae08745Sheppo 
3771ae08745Sheppo /*
3781ae08745Sheppo  * For the moment the state dump routines have their own
3791ae08745Sheppo  * private flag.
3801ae08745Sheppo  */
3811ae08745Sheppo #define	DUMP_STATE	0
3821ae08745Sheppo 
3831ae08745Sheppo #if DUMP_STATE
3841ae08745Sheppo 
3851ae08745Sheppo #define	DUMP_TAG(tag) \
3861ae08745Sheppo {			\
3871ae08745Sheppo 	D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \
3881ae08745Sheppo 	D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype);	\
3891ae08745Sheppo 	D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env);	\
3901ae08745Sheppo }
3911ae08745Sheppo 
3921ae08745Sheppo #define	DUMP_TAG_PTR(tag) \
3931ae08745Sheppo {			\
3941ae08745Sheppo 	D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \
3951ae08745Sheppo 	D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype);	\
3961ae08745Sheppo 	D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env);	\
3971ae08745Sheppo }
3981ae08745Sheppo 
3991ae08745Sheppo #define	DUMP_FLAGS(flags) dump_flags(flags);
4001ae08745Sheppo #define	DISPLAY_STATE()	display_state()
4011ae08745Sheppo 
4021ae08745Sheppo #else
4031ae08745Sheppo 
4041ae08745Sheppo #define	DUMP_TAG(tag)
4051ae08745Sheppo #define	DUMP_TAG_PTR(tag)
4061ae08745Sheppo #define	DUMP_FLAGS(state)
4071ae08745Sheppo #define	DISPLAY_STATE()
4081ae08745Sheppo 
4091ae08745Sheppo #endif	/* DUMP_STATE */
4101ae08745Sheppo 
4111ae08745Sheppo #ifdef DEBUG
4121ae08745Sheppo 
4131ae08745Sheppo #define	D1		\
4141ae08745Sheppo if (vswdbg & 0x01)	\
4151ae08745Sheppo 	vswdebug
4161ae08745Sheppo 
4171ae08745Sheppo #define	D2		\
4181ae08745Sheppo if (vswdbg & 0x02)	\
4191ae08745Sheppo 	vswdebug
4201ae08745Sheppo 
4211ae08745Sheppo #define	D3		\
4221ae08745Sheppo if (vswdbg & 0x04)	\
4231ae08745Sheppo 	vswdebug
4241ae08745Sheppo 
4251ae08745Sheppo #define	DWARN		\
4261ae08745Sheppo if (vswdbg & 0x08)	\
4271ae08745Sheppo 	vswdebug
4281ae08745Sheppo 
4291ae08745Sheppo #define	DERR		\
4301ae08745Sheppo if (vswdbg & 0x10)	\
4311ae08745Sheppo 	vswdebug
4321ae08745Sheppo 
4331ae08745Sheppo #else
4341ae08745Sheppo 
4351ae08745Sheppo #define	DERR		if (0)	vswdebug
4361ae08745Sheppo #define	DWARN		if (0)	vswdebug
4371ae08745Sheppo #define	D1		if (0)	vswdebug
4381ae08745Sheppo #define	D2		if (0)	vswdebug
4391ae08745Sheppo #define	D3		if (0)	vswdebug
4401ae08745Sheppo 
4411ae08745Sheppo #endif	/* DEBUG */
4421ae08745Sheppo 
4431ae08745Sheppo static struct modlinkage modlinkage = {
4441ae08745Sheppo 	MODREV_1,
4451ae08745Sheppo 	&vswmodldrv,
4461ae08745Sheppo 	NULL
4471ae08745Sheppo };
4481ae08745Sheppo 
4491ae08745Sheppo int
4501ae08745Sheppo _init(void)
4511ae08745Sheppo {
4521ae08745Sheppo 	int status;
4531ae08745Sheppo 
4541ae08745Sheppo 	rw_init(&vsw_rw, NULL, RW_DRIVER, NULL);
4551ae08745Sheppo 
4561ae08745Sheppo 	status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1);
4571ae08745Sheppo 	if (status != 0) {
4581ae08745Sheppo 		return (status);
4591ae08745Sheppo 	}
4601ae08745Sheppo 
4611ae08745Sheppo 	mac_init_ops(&vsw_ops, "vsw");
4621ae08745Sheppo 	status = mod_install(&modlinkage);
4631ae08745Sheppo 	if (status != 0) {
4641ae08745Sheppo 		ddi_soft_state_fini(&vsw_state);
4651ae08745Sheppo 	}
4661ae08745Sheppo 	return (status);
4671ae08745Sheppo }
4681ae08745Sheppo 
4691ae08745Sheppo int
4701ae08745Sheppo _fini(void)
4711ae08745Sheppo {
4721ae08745Sheppo 	int status;
4731ae08745Sheppo 
4741ae08745Sheppo 	status = mod_remove(&modlinkage);
4751ae08745Sheppo 	if (status != 0)
4761ae08745Sheppo 		return (status);
4771ae08745Sheppo 	mac_fini_ops(&vsw_ops);
4781ae08745Sheppo 	ddi_soft_state_fini(&vsw_state);
4791ae08745Sheppo 
4801ae08745Sheppo 	rw_destroy(&vsw_rw);
4811ae08745Sheppo 
4821ae08745Sheppo 	return (status);
4831ae08745Sheppo }
4841ae08745Sheppo 
4851ae08745Sheppo int
4861ae08745Sheppo _info(struct modinfo *modinfop)
4871ae08745Sheppo {
4881ae08745Sheppo 	return (mod_info(&modlinkage, modinfop));
4891ae08745Sheppo }
4901ae08745Sheppo 
4911ae08745Sheppo static int
4921ae08745Sheppo vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
4931ae08745Sheppo {
4941ae08745Sheppo 	vsw_t		*vswp;
495*e1ebb9ecSlm66018 	int		instance, i;
4961ae08745Sheppo 	char		hashname[MAXNAMELEN];
4971ae08745Sheppo 	char		qname[TASKQ_NAMELEN];
4981ae08745Sheppo 	int		rv = 1;
4991ae08745Sheppo 	enum		{ PROG_init = 0x0, PROG_if_lock = 0x1,
5001ae08745Sheppo 				PROG_fdb = 0x2, PROG_mfdb = 0x4,
5011ae08745Sheppo 				PROG_report_dev = 0x8, PROG_plist = 0x10,
5021ae08745Sheppo 				PROG_taskq = 0x20}
5031ae08745Sheppo 			progress;
5041ae08745Sheppo 
5051ae08745Sheppo 	progress = PROG_init;
5061ae08745Sheppo 
5071ae08745Sheppo 	switch (cmd) {
5081ae08745Sheppo 	case DDI_ATTACH:
5091ae08745Sheppo 		break;
5101ae08745Sheppo 	case DDI_RESUME:
5111ae08745Sheppo 		/* nothing to do for this non-device */
5121ae08745Sheppo 		return (DDI_SUCCESS);
5131ae08745Sheppo 	case DDI_PM_RESUME:
5141ae08745Sheppo 	default:
5151ae08745Sheppo 		return (DDI_FAILURE);
5161ae08745Sheppo 	}
5171ae08745Sheppo 
5181ae08745Sheppo 	instance = ddi_get_instance(dip);
5191ae08745Sheppo 	if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) {
5201ae08745Sheppo 		DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance);
5211ae08745Sheppo 		return (DDI_FAILURE);
5221ae08745Sheppo 	}
5231ae08745Sheppo 	vswp = ddi_get_soft_state(vsw_state, instance);
5241ae08745Sheppo 
5251ae08745Sheppo 	if (vswp == NULL) {
5261ae08745Sheppo 		DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance);
5271ae08745Sheppo 		goto vsw_attach_fail;
5281ae08745Sheppo 	}
5291ae08745Sheppo 
5301ae08745Sheppo 	vswp->dip = dip;
5311ae08745Sheppo 	vswp->instance = instance;
5321ae08745Sheppo 	ddi_set_driver_private(dip, (caddr_t)vswp);
5331ae08745Sheppo 
5341ae08745Sheppo 	rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL);
5351ae08745Sheppo 
5361ae08745Sheppo 	progress |= PROG_if_lock;
5371ae08745Sheppo 
5381ae08745Sheppo 	/*
5391ae08745Sheppo 	 * Get the various properties such as physical device name
5401ae08745Sheppo 	 * (vsw-phys-dev), switch mode etc from the MD.
5411ae08745Sheppo 	 */
5421ae08745Sheppo 	vsw_get_md_properties(vswp);
5431ae08745Sheppo 
5441ae08745Sheppo 	/* setup the unicast forwarding database  */
5451ae08745Sheppo 	(void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d",
5461ae08745Sheppo 							vswp->instance);
5471ae08745Sheppo 	D2(vswp, "creating unicast hash table (%s)...", hashname);
5481ae08745Sheppo 	vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
5491ae08745Sheppo 		mod_hash_null_valdtor, sizeof (void *));
5501ae08745Sheppo 
5511ae08745Sheppo 	progress |= PROG_fdb;
5521ae08745Sheppo 
5531ae08745Sheppo 	/* setup the multicast fowarding database */
5541ae08745Sheppo 	(void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d",
5551ae08745Sheppo 							vswp->instance);
5561ae08745Sheppo 	D2(vswp, "creating multicast hash table %s)...", hashname);
5571ae08745Sheppo 	rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL);
5581ae08745Sheppo 	vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS,
5591ae08745Sheppo 			mod_hash_null_valdtor, sizeof (void *));
5601ae08745Sheppo 
5611ae08745Sheppo 	progress |= PROG_mfdb;
5621ae08745Sheppo 
5631ae08745Sheppo 	/*
5641ae08745Sheppo 	 * create lock protecting list of multicast addresses
5651ae08745Sheppo 	 * which could come via m_multicst() entry point when plumbed.
5661ae08745Sheppo 	 */
5671ae08745Sheppo 	mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL);
5681ae08745Sheppo 	vswp->mcap = NULL;
5691ae08745Sheppo 
5701ae08745Sheppo 	ddi_report_dev(vswp->dip);
5711ae08745Sheppo 
5721ae08745Sheppo 	progress |= PROG_report_dev;
5731ae08745Sheppo 
5741ae08745Sheppo 	WRITE_ENTER(&vsw_rw);
5751ae08745Sheppo 	vswp->next = vsw_head;
5761ae08745Sheppo 	vsw_head = vswp;
5771ae08745Sheppo 	RW_EXIT(&vsw_rw);
5781ae08745Sheppo 
5791ae08745Sheppo 	/* setup the port list */
5801ae08745Sheppo 	rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL);
5811ae08745Sheppo 	vswp->plist.head = NULL;
5821ae08745Sheppo 
5831ae08745Sheppo 	progress |= PROG_plist;
5841ae08745Sheppo 
5851ae08745Sheppo 	/*
5861ae08745Sheppo 	 * Create the taskq which will process all the VIO
5871ae08745Sheppo 	 * control messages.
5881ae08745Sheppo 	 */
5891ae08745Sheppo 	(void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance);
5901ae08745Sheppo 	if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1,
5911ae08745Sheppo 					TASKQ_DEFAULTPRI, 0)) == NULL) {
5921ae08745Sheppo 		cmn_err(CE_WARN, "Unable to create task queue");
5931ae08745Sheppo 		goto vsw_attach_fail;
5941ae08745Sheppo 	}
5951ae08745Sheppo 
5961ae08745Sheppo 	progress |= PROG_taskq;
5971ae08745Sheppo 
5981ae08745Sheppo 	/* select best switching mode */
599*e1ebb9ecSlm66018 	for (i = 0; i < vswp->smode_num; i++) {
600*e1ebb9ecSlm66018 		vswp->smode_idx = i;
601*e1ebb9ecSlm66018 		switch (vswp->smode[i]) {
6021ae08745Sheppo 		case VSW_LAYER2:
6031ae08745Sheppo 		case VSW_LAYER2_PROMISC:
6041ae08745Sheppo 			rv = vsw_setup_layer2(vswp);
6051ae08745Sheppo 			break;
6061ae08745Sheppo 
6071ae08745Sheppo 		case VSW_LAYER3:
6081ae08745Sheppo 			rv = vsw_setup_layer3(vswp);
6091ae08745Sheppo 			break;
6101ae08745Sheppo 
6111ae08745Sheppo 		default:
6121ae08745Sheppo 			DERR(vswp, "unknown switch mode");
613*e1ebb9ecSlm66018 			rv = 1;
6141ae08745Sheppo 			break;
6151ae08745Sheppo 		}
6161ae08745Sheppo 
617*e1ebb9ecSlm66018 		if (rv == 0)
6181ae08745Sheppo 			break;
6191ae08745Sheppo 	}
6201ae08745Sheppo 
6211ae08745Sheppo 	if (rv == 1) {
6221ae08745Sheppo 		cmn_err(CE_WARN, "Unable to setup switching mode");
6231ae08745Sheppo 		goto vsw_attach_fail;
6241ae08745Sheppo 	}
6251ae08745Sheppo 
6261ae08745Sheppo 	D2(vswp, "Operating in mode %d", vswp->smode[vswp->smode_idx]);
6271ae08745Sheppo 
6281ae08745Sheppo 	/*
6291ae08745Sheppo 	 * Register with the MAC layer as a network device so
6301ae08745Sheppo 	 * we can be plumbed if desired.
6311ae08745Sheppo 	 *
6321ae08745Sheppo 	 * Do this in both layer 2 and layer 3 mode.
6331ae08745Sheppo 	 */
6341ae08745Sheppo 	vswp->if_state &= ~VSW_IF_UP;
635*e1ebb9ecSlm66018 	if (vswp->mdprops & (VSW_MD_MACADDR | VSW_DEV_MACADDR)) {
6361ae08745Sheppo 		if (vsw_mac_register(vswp) != 0) {
6371ae08745Sheppo 			cmn_err(CE_WARN, "Unable to register as provider "
6381ae08745Sheppo 				" with MAC layer, continuing with attach");
6391ae08745Sheppo 		}
6401ae08745Sheppo 	}
6411ae08745Sheppo 
642d10e4ef2Snarayan 	/* prevent auto-detaching */
643d10e4ef2Snarayan 	if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip,
644d10e4ef2Snarayan 				DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) {
645d10e4ef2Snarayan 		cmn_err(CE_NOTE, "Unable to set \"%s\" property for "
646d10e4ef2Snarayan 			"instance %u", DDI_NO_AUTODETACH, instance);
647d10e4ef2Snarayan 	}
648d10e4ef2Snarayan 
6491ae08745Sheppo 	/*
6501ae08745Sheppo 	 * Now we have everything setup, register for MD change
6511ae08745Sheppo 	 * events.
6521ae08745Sheppo 	 */
6531ae08745Sheppo 	vsw_mdeg_register(vswp);
6541ae08745Sheppo 
6551ae08745Sheppo 	return (DDI_SUCCESS);
6561ae08745Sheppo 
6571ae08745Sheppo vsw_attach_fail:
6581ae08745Sheppo 	DERR(NULL, "vsw_attach: failed");
6591ae08745Sheppo 
6601ae08745Sheppo 	if (progress & PROG_taskq)
6611ae08745Sheppo 		ddi_taskq_destroy(vswp->taskq_p);
6621ae08745Sheppo 
6631ae08745Sheppo 	if (progress & PROG_plist)
6641ae08745Sheppo 		rw_destroy(&vswp->plist.lockrw);
6651ae08745Sheppo 
6661ae08745Sheppo 	if (progress & PROG_report_dev) {
6671ae08745Sheppo 		ddi_remove_minor_node(dip, NULL);
6681ae08745Sheppo 		mutex_destroy(&vswp->mca_lock);
6691ae08745Sheppo 	}
6701ae08745Sheppo 
6711ae08745Sheppo 	if (progress & PROG_mfdb) {
6721ae08745Sheppo 		mod_hash_destroy_hash(vswp->mfdb);
6731ae08745Sheppo 		vswp->mfdb = NULL;
6741ae08745Sheppo 		rw_destroy(&vswp->mfdbrw);
6751ae08745Sheppo 	}
6761ae08745Sheppo 
6771ae08745Sheppo 	if (progress & PROG_fdb) {
6781ae08745Sheppo 		mod_hash_destroy_hash(vswp->fdb);
6791ae08745Sheppo 		vswp->fdb = NULL;
6801ae08745Sheppo 	}
6811ae08745Sheppo 
6821ae08745Sheppo 	if (progress & PROG_if_lock)
6831ae08745Sheppo 		rw_destroy(&vswp->if_lockrw);
6841ae08745Sheppo 
6851ae08745Sheppo 	ddi_soft_state_free(vsw_state, instance);
6861ae08745Sheppo 	return (DDI_FAILURE);
6871ae08745Sheppo }
6881ae08745Sheppo 
6891ae08745Sheppo static int
6901ae08745Sheppo vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
6911ae08745Sheppo {
692d10e4ef2Snarayan 	vio_mblk_pool_t		*poolp, *npoolp;
6931ae08745Sheppo 	vsw_t			**vswpp, *vswp;
6941ae08745Sheppo 	int 			instance;
6951ae08745Sheppo 
6961ae08745Sheppo 	instance = ddi_get_instance(dip);
6971ae08745Sheppo 	vswp = ddi_get_soft_state(vsw_state, instance);
6981ae08745Sheppo 
6991ae08745Sheppo 	if (vswp == NULL) {
7001ae08745Sheppo 		return (DDI_FAILURE);
7011ae08745Sheppo 	}
7021ae08745Sheppo 
7031ae08745Sheppo 	switch (cmd) {
7041ae08745Sheppo 	case DDI_DETACH:
7051ae08745Sheppo 		break;
7061ae08745Sheppo 	case DDI_SUSPEND:
7071ae08745Sheppo 	case DDI_PM_SUSPEND:
7081ae08745Sheppo 	default:
7091ae08745Sheppo 		return (DDI_FAILURE);
7101ae08745Sheppo 	}
7111ae08745Sheppo 
7121ae08745Sheppo 	D2(vswp, "detaching instance %d", instance);
7131ae08745Sheppo 
714*e1ebb9ecSlm66018 	if (vswp->mdprops & (VSW_MD_MACADDR | VSW_DEV_MACADDR)) {
7151ae08745Sheppo 		if (vsw_mac_unregister(vswp) != 0) {
7161ae08745Sheppo 			cmn_err(CE_WARN, "Unable to detach from MAC layer");
7171ae08745Sheppo 			return (DDI_FAILURE);
7181ae08745Sheppo 		}
7191ae08745Sheppo 		rw_destroy(&vswp->if_lockrw);
720d10e4ef2Snarayan 	}
7211ae08745Sheppo 
7221ae08745Sheppo 	vsw_mdeg_unregister(vswp);
7231ae08745Sheppo 
724*e1ebb9ecSlm66018 	/* remove mac layer callback */
725*e1ebb9ecSlm66018 	if ((vswp->mh != NULL) && (vswp->mrh != NULL)) {
726*e1ebb9ecSlm66018 		mac_rx_remove(vswp->mh, vswp->mrh);
727*e1ebb9ecSlm66018 		vswp->mrh = NULL;
7281ae08745Sheppo 	}
7291ae08745Sheppo 
7301ae08745Sheppo 	if (vsw_detach_ports(vswp) != 0) {
7311ae08745Sheppo 		cmn_err(CE_WARN, "Unable to detach ports");
7321ae08745Sheppo 		return (DDI_FAILURE);
7331ae08745Sheppo 	}
7341ae08745Sheppo 
7351ae08745Sheppo 	/*
736*e1ebb9ecSlm66018 	 * Now that the ports have been deleted, stop and close
737*e1ebb9ecSlm66018 	 * the physical device.
738*e1ebb9ecSlm66018 	 */
739*e1ebb9ecSlm66018 	if (vswp->mh != NULL) {
740*e1ebb9ecSlm66018 		mac_stop(vswp->mh);
741*e1ebb9ecSlm66018 		mac_close(vswp->mh);
742*e1ebb9ecSlm66018 
743*e1ebb9ecSlm66018 		vswp->mh = NULL;
744*e1ebb9ecSlm66018 		vswp->txinfo = NULL;
745*e1ebb9ecSlm66018 	}
746*e1ebb9ecSlm66018 
747*e1ebb9ecSlm66018 	/*
748d10e4ef2Snarayan 	 * Destroy any free pools that may still exist.
749d10e4ef2Snarayan 	 */
750d10e4ef2Snarayan 	poolp = vswp->rxh;
751d10e4ef2Snarayan 	while (poolp != NULL) {
752d10e4ef2Snarayan 		npoolp = vswp->rxh = poolp->nextp;
753d10e4ef2Snarayan 		if (vio_destroy_mblks(poolp) != 0) {
754d10e4ef2Snarayan 			vswp->rxh = poolp;
755d10e4ef2Snarayan 			return (DDI_FAILURE);
756d10e4ef2Snarayan 		}
757d10e4ef2Snarayan 		poolp = npoolp;
758d10e4ef2Snarayan 	}
759d10e4ef2Snarayan 
760d10e4ef2Snarayan 	/*
7611ae08745Sheppo 	 * Remove this instance from any entries it may be on in
7621ae08745Sheppo 	 * the hash table by using the list of addresses maintained
7631ae08745Sheppo 	 * in the vsw_t structure.
7641ae08745Sheppo 	 */
7651ae08745Sheppo 	vsw_del_mcst_vsw(vswp);
7661ae08745Sheppo 
7671ae08745Sheppo 	vswp->mcap = NULL;
7681ae08745Sheppo 	mutex_destroy(&vswp->mca_lock);
7691ae08745Sheppo 
7701ae08745Sheppo 	/*
7711ae08745Sheppo 	 * By now any pending tasks have finished and the underlying
7721ae08745Sheppo 	 * ldc's have been destroyed, so its safe to delete the control
7731ae08745Sheppo 	 * message taskq.
7741ae08745Sheppo 	 */
7751ae08745Sheppo 	if (vswp->taskq_p != NULL)
7761ae08745Sheppo 		ddi_taskq_destroy(vswp->taskq_p);
7771ae08745Sheppo 
7781ae08745Sheppo 	/*
7791ae08745Sheppo 	 * At this stage all the data pointers in the hash table
7801ae08745Sheppo 	 * should be NULL, as all the ports have been removed and will
7811ae08745Sheppo 	 * have deleted themselves from the port lists which the data
7821ae08745Sheppo 	 * pointers point to. Hence we can destroy the table using the
7831ae08745Sheppo 	 * default destructors.
7841ae08745Sheppo 	 */
7851ae08745Sheppo 	D2(vswp, "vsw_detach: destroying hash tables..");
7861ae08745Sheppo 	mod_hash_destroy_hash(vswp->fdb);
7871ae08745Sheppo 	vswp->fdb = NULL;
7881ae08745Sheppo 
7891ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
7901ae08745Sheppo 	mod_hash_destroy_hash(vswp->mfdb);
7911ae08745Sheppo 	vswp->mfdb = NULL;
7921ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
7931ae08745Sheppo 	rw_destroy(&vswp->mfdbrw);
7941ae08745Sheppo 
7951ae08745Sheppo 	ddi_remove_minor_node(dip, NULL);
7961ae08745Sheppo 
7971ae08745Sheppo 	rw_destroy(&vswp->plist.lockrw);
7981ae08745Sheppo 	WRITE_ENTER(&vsw_rw);
7991ae08745Sheppo 	for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) {
8001ae08745Sheppo 		if (*vswpp == vswp) {
8011ae08745Sheppo 			*vswpp = vswp->next;
8021ae08745Sheppo 			break;
8031ae08745Sheppo 		}
8041ae08745Sheppo 	}
8051ae08745Sheppo 	RW_EXIT(&vsw_rw);
8061ae08745Sheppo 	ddi_soft_state_free(vsw_state, instance);
8071ae08745Sheppo 
8081ae08745Sheppo 	return (DDI_SUCCESS);
8091ae08745Sheppo }
8101ae08745Sheppo 
8111ae08745Sheppo static int
8121ae08745Sheppo vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
8131ae08745Sheppo {
8141ae08745Sheppo 	_NOTE(ARGUNUSED(dip))
8151ae08745Sheppo 
8161ae08745Sheppo 	vsw_t	*vswp = NULL;
8171ae08745Sheppo 	dev_t	dev = (dev_t)arg;
8181ae08745Sheppo 	int	instance;
8191ae08745Sheppo 
8201ae08745Sheppo 	instance = getminor(dev);
8211ae08745Sheppo 
8221ae08745Sheppo 	switch (infocmd) {
8231ae08745Sheppo 	case DDI_INFO_DEVT2DEVINFO:
8241ae08745Sheppo 		if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) {
8251ae08745Sheppo 			*result = NULL;
8261ae08745Sheppo 			return (DDI_FAILURE);
8271ae08745Sheppo 		}
8281ae08745Sheppo 		*result = vswp->dip;
8291ae08745Sheppo 		return (DDI_SUCCESS);
8301ae08745Sheppo 
8311ae08745Sheppo 	case DDI_INFO_DEVT2INSTANCE:
8321ae08745Sheppo 		*result = (void *)(uintptr_t)instance;
8331ae08745Sheppo 		return (DDI_SUCCESS);
8341ae08745Sheppo 
8351ae08745Sheppo 	default:
8361ae08745Sheppo 		*result = NULL;
8371ae08745Sheppo 		return (DDI_FAILURE);
8381ae08745Sheppo 	}
8391ae08745Sheppo }
8401ae08745Sheppo 
8411ae08745Sheppo /*
8421ae08745Sheppo  * Get the properties from our MD node.
8431ae08745Sheppo  */
8441ae08745Sheppo static void
8451ae08745Sheppo vsw_get_md_properties(vsw_t *vswp)
8461ae08745Sheppo {
8471ae08745Sheppo 	md_t		*mdp = NULL;
8481ae08745Sheppo 	int		num_nodes = 0;
8491ae08745Sheppo 	int		len = 0, listsz = 0;
8501ae08745Sheppo 	int		num_vdev = 0;
8511ae08745Sheppo 	int		i, idx;
8521ae08745Sheppo 	boolean_t	found_node = B_FALSE;
8531ae08745Sheppo 	char		*smode = NULL;
8541ae08745Sheppo 	char		*curr_mode = NULL;
8551ae08745Sheppo 	char		*physname = NULL;
8561ae08745Sheppo 	char		*node_name = NULL;
8571ae08745Sheppo 	char		*dev;
8581ae08745Sheppo 	uint64_t 	macaddr = 0;
8591ae08745Sheppo 	uint64_t	md_inst, obp_inst;
8601ae08745Sheppo 	mde_cookie_t	*listp = NULL;
8611ae08745Sheppo 	mde_cookie_t	rootnode;
8621ae08745Sheppo 
8631ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
8641ae08745Sheppo 
8651ae08745Sheppo 	/*
8661ae08745Sheppo 	 * Further down we compare the obp 'reg' property to the
8671ae08745Sheppo 	 * 'cfg-handle' property in the vsw MD node to determine
8681ae08745Sheppo 	 * if the node refers to this particular instance. So if
8691ae08745Sheppo 	 * we can't read the obp value then there is no point
8701ae08745Sheppo 	 * in proceeding further.
8711ae08745Sheppo 	 */
8721ae08745Sheppo 	if (ddi_prop_exists(DDI_DEV_T_ANY, vswp->dip,
8731ae08745Sheppo 			DDI_PROP_DONTPASS, reg_propname) != 1) {
8741ae08745Sheppo 		cmn_err(CE_WARN, "Unable to read %s property "
8751ae08745Sheppo 			"from OBP device node", reg_propname);
8761ae08745Sheppo 		return;
8771ae08745Sheppo 	}
8781ae08745Sheppo 
8791ae08745Sheppo 	obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
8801ae08745Sheppo 		DDI_PROP_DONTPASS, reg_propname, 0);
8811ae08745Sheppo 
8821ae08745Sheppo 	D2(vswp, "%s: obp_inst 0x%llx", __func__, obp_inst);
8831ae08745Sheppo 
8841ae08745Sheppo 	if ((mdp = md_get_handle()) == NULL) {
8851ae08745Sheppo 		DERR(vswp, "%s: unable to init MD", __func__);
8861ae08745Sheppo 		return;
8871ae08745Sheppo 	}
8881ae08745Sheppo 
8891ae08745Sheppo 	if ((num_nodes = md_node_count(mdp)) <= 0) {
8901ae08745Sheppo 		DERR(vswp, "%s: invalid number of  nodes found %d",
8911ae08745Sheppo 			__func__, num_nodes);
8921ae08745Sheppo 		(void) md_fini_handle(mdp);
8931ae08745Sheppo 		return;
8941ae08745Sheppo 	}
8951ae08745Sheppo 
8961ae08745Sheppo 	D2(vswp, "%s: %d nodes in total in MD", __func__, num_nodes);
8971ae08745Sheppo 
8981ae08745Sheppo 	/* allocate enough space for node list */
8991ae08745Sheppo 	listsz = num_nodes * sizeof (mde_cookie_t);
9001ae08745Sheppo 	listp = kmem_zalloc(listsz, KM_SLEEP);
9011ae08745Sheppo 
9021ae08745Sheppo 	rootnode = md_root_node(mdp);
9031ae08745Sheppo 
9041ae08745Sheppo 	/* Get the list of virtual devices */
9051ae08745Sheppo 	num_vdev = md_scan_dag(mdp, rootnode,
9061ae08745Sheppo 		md_find_name(mdp, vdev_propname),
9071ae08745Sheppo 		md_find_name(mdp, "fwd"), listp);
9081ae08745Sheppo 
9091ae08745Sheppo 	if (num_vdev <= 0) {
9101ae08745Sheppo 		DERR(vswp, "%s: didn't find any virtual-device nodes in MD",
9111ae08745Sheppo 			__func__);
9121ae08745Sheppo 		goto md_prop_exit;
9131ae08745Sheppo 	}
9141ae08745Sheppo 
9151ae08745Sheppo 	D2(vswp, "%s: %d virtual-device nodes found", __func__, num_vdev);
9161ae08745Sheppo 
9171ae08745Sheppo 	/* Look for the virtual switch nodes in the list */
9181ae08745Sheppo 	for (idx = 0; idx < num_vdev; idx++) {
9191ae08745Sheppo 		if (md_get_prop_str(mdp, listp[idx],
9201ae08745Sheppo 				"name", &node_name) != 0) {
9211ae08745Sheppo 			DERR(vswp, "%s: unable to get node name", __func__);
9221ae08745Sheppo 			continue;
9231ae08745Sheppo 
9241ae08745Sheppo 		}
9251ae08745Sheppo 
9261ae08745Sheppo 		if (strcmp(node_name, vsw_propname) == 0) {
9271ae08745Sheppo 			/* Virtual switch node */
9281ae08745Sheppo 			if (md_get_prop_val(mdp, listp[idx],
9291ae08745Sheppo 				"cfg-handle", &md_inst) != 0) {
9301ae08745Sheppo 				DERR(vswp, "%s: unable to get cfg-handle from"
9311ae08745Sheppo 					" node %d", __func__, idx);
9321ae08745Sheppo 				goto md_prop_exit;
9331ae08745Sheppo 			} else if (md_inst == obp_inst) {
9341ae08745Sheppo 				D2(vswp, "%s: found matching node (%d)"
9351ae08745Sheppo 					" 0x%llx == 0x%llx", __func__, idx,
9361ae08745Sheppo 					md_inst, obp_inst);
9371ae08745Sheppo 				found_node = B_TRUE;
9381ae08745Sheppo 				break;
9391ae08745Sheppo 			}
9401ae08745Sheppo 		}
9411ae08745Sheppo 	}
9421ae08745Sheppo 
9431ae08745Sheppo 	if (!found_node) {
9441ae08745Sheppo 		DWARN(vswp, "%s: couldn't find correct vsw node", __func__);
9451ae08745Sheppo 		goto md_prop_exit;
9461ae08745Sheppo 	}
9471ae08745Sheppo 
9481ae08745Sheppo 	/*
9491ae08745Sheppo 	 * Now, having found the correct node, get the various properties.
9501ae08745Sheppo 	 */
9511ae08745Sheppo 
9521ae08745Sheppo 	if (md_get_prop_data(mdp, listp[idx], physdev_propname,
9531ae08745Sheppo 				(uint8_t **)(&physname), &len) != 0) {
9541ae08745Sheppo 		cmn_err(CE_WARN, "%s: unable to get name(s) of physical "
9551ae08745Sheppo 			"device(s) from MD", __func__);
9561ae08745Sheppo 	} else if ((strlen(physname) + 1) > LIFNAMSIZ) {
9571ae08745Sheppo 		cmn_err(CE_WARN, "%s is too long a device name", physname);
9581ae08745Sheppo 	} else {
9591ae08745Sheppo 		(void) strncpy(vswp->physname, physname, strlen(physname) + 1);
9601ae08745Sheppo 		vswp->mdprops |= VSW_MD_PHYSNAME;
9611ae08745Sheppo 		D2(vswp, "%s: using first device specified (%s)",
9621ae08745Sheppo 			__func__, vswp->physname);
9631ae08745Sheppo 	}
9641ae08745Sheppo 
9651ae08745Sheppo #ifdef DEBUG
9661ae08745Sheppo 	/*
9671ae08745Sheppo 	 * As a temporary measure to aid testing we check to see if there
9681ae08745Sheppo 	 * is a vsw.conf file present. If there is we use the value of the
9691ae08745Sheppo 	 * vsw_physname property in the file as the name of the physical
9701ae08745Sheppo 	 * device, overriding the value from the MD.
9711ae08745Sheppo 	 *
9721ae08745Sheppo 	 * There may be multiple devices listed, but for the moment
9731ae08745Sheppo 	 * we just use the first one.
9741ae08745Sheppo 	 */
9751ae08745Sheppo 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0,
9761ae08745Sheppo 		"vsw_physname", &dev) == DDI_PROP_SUCCESS) {
9771ae08745Sheppo 		if ((strlen(dev) + 1) > LIFNAMSIZ) {
9781ae08745Sheppo 			cmn_err(CE_WARN, "%s is too long a device name", dev);
9791ae08745Sheppo 		} else {
9801ae08745Sheppo 			cmn_err(CE_NOTE, "%s: using device name (%s) from "
9811ae08745Sheppo 				"config file", __func__, dev);
9821ae08745Sheppo 
9831ae08745Sheppo 			(void) strncpy(vswp->physname, dev, strlen(dev) + 1);
9841ae08745Sheppo 			vswp->mdprops |= VSW_MD_PHYSNAME;
9851ae08745Sheppo 		}
9861ae08745Sheppo 
9871ae08745Sheppo 		ddi_prop_free(dev);
9881ae08745Sheppo 
9891ae08745Sheppo 	}
9901ae08745Sheppo #endif
9911ae08745Sheppo 
992*e1ebb9ecSlm66018 	/* mac address for vswitch device itself */
9931ae08745Sheppo 	if (md_get_prop_val(mdp, listp[idx],
9941ae08745Sheppo 			macaddr_propname, &macaddr) != 0) {
995*e1ebb9ecSlm66018 		cmn_err(CE_WARN, "!Unable to get MAC address from MD");
996*e1ebb9ecSlm66018 
997*e1ebb9ecSlm66018 		/*
998*e1ebb9ecSlm66018 		 * Fallback to using the mac address of the physical
999*e1ebb9ecSlm66018 		 * device.
1000*e1ebb9ecSlm66018 		 */
1001*e1ebb9ecSlm66018 		if (vsw_get_physaddr(vswp) == 0) {
1002*e1ebb9ecSlm66018 			cmn_err(CE_NOTE, "!Using MAC address from physical "
1003*e1ebb9ecSlm66018 				"device (%s)", vswp->physname);
1004*e1ebb9ecSlm66018 		}
10051ae08745Sheppo 	} else {
10061ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
10071ae08745Sheppo 		for (i = ETHERADDRL - 1; i >= 0; i--) {
10081ae08745Sheppo 			vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF;
10091ae08745Sheppo 			macaddr >>= 8;
10101ae08745Sheppo 		}
10111ae08745Sheppo 		RW_EXIT(&vswp->if_lockrw);
10121ae08745Sheppo 		vswp->mdprops |= VSW_MD_MACADDR;
10131ae08745Sheppo 	}
10141ae08745Sheppo 
10151ae08745Sheppo 	/*
10161ae08745Sheppo 	 * Get the switch-mode property. The modes are listed in
10171ae08745Sheppo 	 * decreasing order of preference, i.e. prefered mode is
10181ae08745Sheppo 	 * first item in list.
10191ae08745Sheppo 	 */
10201ae08745Sheppo 	len = 0;
1021*e1ebb9ecSlm66018 	vswp->smode_num = 0;
10221ae08745Sheppo 	if (md_get_prop_data(mdp, listp[idx], smode_propname,
10231ae08745Sheppo 				(uint8_t **)(&smode), &len) != 0) {
10241ae08745Sheppo 		/*
1025*e1ebb9ecSlm66018 		 * Unable to get switch-mode property from MD, nothing
1026*e1ebb9ecSlm66018 		 * more we can do.
10271ae08745Sheppo 		 */
1028*e1ebb9ecSlm66018 		cmn_err(CE_WARN, "!unable to get switch mode property");
1029*e1ebb9ecSlm66018 		goto md_prop_exit;
1030*e1ebb9ecSlm66018 	}
1031*e1ebb9ecSlm66018 
10321ae08745Sheppo 	curr_mode = smode;
10331ae08745Sheppo 	/*
10341ae08745Sheppo 	 * Modes of operation:
10351ae08745Sheppo 	 * 'switched'	 - layer 2 switching, underlying HW in
1036*e1ebb9ecSlm66018 	 *			programmed mode.
10371ae08745Sheppo 	 * 'promiscuous' - layer 2 switching, underlying HW in
10381ae08745Sheppo 	 *			promiscuous mode.
10391ae08745Sheppo 	 * 'routed'	 - layer 3 (i.e. IP) routing, underlying HW
10401ae08745Sheppo 	 *			in non-promiscuous mode.
10411ae08745Sheppo 	 */
1042*e1ebb9ecSlm66018 	while ((curr_mode < (smode + len)) && (vswp->smode_num < NUM_SMODES)) {
10431ae08745Sheppo 		D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode);
1044*e1ebb9ecSlm66018 		if (strcmp(curr_mode, "switched") == 0) {
1045*e1ebb9ecSlm66018 			vswp->smode[vswp->smode_num++] = VSW_LAYER2;
1046*e1ebb9ecSlm66018 		} else if (strcmp(curr_mode, "promiscuous") == 0) {
1047*e1ebb9ecSlm66018 			vswp->smode[vswp->smode_num++] = VSW_LAYER2_PROMISC;
1048*e1ebb9ecSlm66018 		} else if (strcmp(curr_mode, "routed") == 0) {
1049*e1ebb9ecSlm66018 			vswp->smode[vswp->smode_num++] = VSW_LAYER3;
1050*e1ebb9ecSlm66018 		} else {
1051*e1ebb9ecSlm66018 			cmn_err(CE_WARN, "Unknown switch mode %s, setting to"
1052*e1ebb9ecSlm66018 				" default switched mode", curr_mode);
1053*e1ebb9ecSlm66018 			vswp->smode[vswp->smode_num++] = VSW_LAYER2;
10541ae08745Sheppo 		}
10551ae08745Sheppo 		curr_mode += strlen(curr_mode) + 1;
10561ae08745Sheppo 	}
10571ae08745Sheppo 
1058*e1ebb9ecSlm66018 	D2(vswp, "%d switching modes specified", vswp->smode_num);
1059*e1ebb9ecSlm66018 
1060*e1ebb9ecSlm66018 	if (vswp->smode_num > 0)
10611ae08745Sheppo 		vswp->mdprops |= VSW_MD_SMODE;
10621ae08745Sheppo 
10631ae08745Sheppo md_prop_exit:
10641ae08745Sheppo 	(void) md_fini_handle(mdp);
10651ae08745Sheppo 
10661ae08745Sheppo 	kmem_free(listp, listsz);
10671ae08745Sheppo 
10681ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
10691ae08745Sheppo }
10701ae08745Sheppo 
1071*e1ebb9ecSlm66018 /*
1072*e1ebb9ecSlm66018  * Get the mac address of the physical device.
1073*e1ebb9ecSlm66018  *
1074*e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1075*e1ebb9ecSlm66018  */
1076*e1ebb9ecSlm66018 static int
1077*e1ebb9ecSlm66018 vsw_get_physaddr(vsw_t *vswp)
1078*e1ebb9ecSlm66018 {
1079*e1ebb9ecSlm66018 	mac_handle_t	mh;
1080*e1ebb9ecSlm66018 	char		drv[LIFNAMSIZ];
1081*e1ebb9ecSlm66018 	uint_t		ddi_instance;
1082*e1ebb9ecSlm66018 
1083*e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1084*e1ebb9ecSlm66018 
1085*e1ebb9ecSlm66018 	if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS)
1086*e1ebb9ecSlm66018 		return (1);
1087*e1ebb9ecSlm66018 
1088*e1ebb9ecSlm66018 	if (mac_open(vswp->physname, ddi_instance, &mh) != 0) {
1089*e1ebb9ecSlm66018 		cmn_err(CE_WARN, "!mac_open %s failed", vswp->physname);
1090*e1ebb9ecSlm66018 		return (1);
1091*e1ebb9ecSlm66018 	}
1092*e1ebb9ecSlm66018 
1093*e1ebb9ecSlm66018 	READ_ENTER(&vswp->if_lockrw);
1094*e1ebb9ecSlm66018 	mac_unicst_get(mh, vswp->if_addr.ether_addr_octet);
1095*e1ebb9ecSlm66018 	RW_EXIT(&vswp->if_lockrw);
1096*e1ebb9ecSlm66018 
1097*e1ebb9ecSlm66018 	mac_close(mh);
1098*e1ebb9ecSlm66018 
1099*e1ebb9ecSlm66018 	vswp->mdprops |= VSW_DEV_MACADDR;
1100*e1ebb9ecSlm66018 
1101*e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1102*e1ebb9ecSlm66018 
1103*e1ebb9ecSlm66018 	return (0);
1104*e1ebb9ecSlm66018 }
1105*e1ebb9ecSlm66018 
1106*e1ebb9ecSlm66018 /*
1107*e1ebb9ecSlm66018  * Check to see if the card supports the setting of multiple unicst
1108*e1ebb9ecSlm66018  * addresses.
1109*e1ebb9ecSlm66018  *
1110*e1ebb9ecSlm66018  * Returns 0 if card supports the programming of multiple unicast addresses
1111*e1ebb9ecSlm66018  * and there are free address slots available, otherwise returns 1.
1112*e1ebb9ecSlm66018  */
1113*e1ebb9ecSlm66018 static int
1114*e1ebb9ecSlm66018 vsw_get_hw_maddr(vsw_t *vswp)
1115*e1ebb9ecSlm66018 {
1116*e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1117*e1ebb9ecSlm66018 
1118*e1ebb9ecSlm66018 	if (vswp->mh == NULL) {
1119*e1ebb9ecSlm66018 		return (1);
1120*e1ebb9ecSlm66018 	}
1121*e1ebb9ecSlm66018 
1122*e1ebb9ecSlm66018 	if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) {
1123*e1ebb9ecSlm66018 		DWARN(vswp, "Unable to get capabilities of"
1124*e1ebb9ecSlm66018 			" underlying device (%s)", vswp->physname);
1125*e1ebb9ecSlm66018 		return (1);
1126*e1ebb9ecSlm66018 	}
1127*e1ebb9ecSlm66018 
1128*e1ebb9ecSlm66018 	if (vswp->maddr.maddr_naddrfree == 0) {
1129*e1ebb9ecSlm66018 		cmn_err(CE_WARN, "!device %s has no free unicast address slots",
1130*e1ebb9ecSlm66018 			vswp->physname);
1131*e1ebb9ecSlm66018 		return (1);
1132*e1ebb9ecSlm66018 	}
1133*e1ebb9ecSlm66018 
1134*e1ebb9ecSlm66018 	D2(vswp, "%s: %d addrs : %d free", __func__,
1135*e1ebb9ecSlm66018 		vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree);
1136*e1ebb9ecSlm66018 
1137*e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1138*e1ebb9ecSlm66018 
1139*e1ebb9ecSlm66018 	return (0);
1140*e1ebb9ecSlm66018 }
1141*e1ebb9ecSlm66018 
1142*e1ebb9ecSlm66018 /*
1143*e1ebb9ecSlm66018  * Setup for layer 2 switching.
1144*e1ebb9ecSlm66018  *
1145*e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1146*e1ebb9ecSlm66018  */
11471ae08745Sheppo static int
11481ae08745Sheppo vsw_setup_layer2(vsw_t *vswp)
11491ae08745Sheppo {
11501ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
11511ae08745Sheppo 
11521ae08745Sheppo 	vsw_switch_frame = vsw_switch_l2_frame;
11531ae08745Sheppo 
11541ae08745Sheppo 	/*
11551ae08745Sheppo 	 * Attempt to link into the MAC layer so we can get
11561ae08745Sheppo 	 * and send packets out over the physical adapter.
11571ae08745Sheppo 	 */
11581ae08745Sheppo 	if (vswp->mdprops & VSW_MD_PHYSNAME) {
11591ae08745Sheppo 		if (vsw_mac_attach(vswp) != 0) {
11601ae08745Sheppo 			/*
11611ae08745Sheppo 			 * Registration with the MAC layer has failed,
11621ae08745Sheppo 			 * so return 1 so that can fall back to next
11631ae08745Sheppo 			 * prefered switching method.
11641ae08745Sheppo 			 */
1165*e1ebb9ecSlm66018 			cmn_err(CE_WARN, "!Unable to join as MAC layer "
1166*e1ebb9ecSlm66018 				"client");
1167*e1ebb9ecSlm66018 			return (1);
11681ae08745Sheppo 		}
1169*e1ebb9ecSlm66018 
1170*e1ebb9ecSlm66018 		if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) {
1171*e1ebb9ecSlm66018 			/*
1172*e1ebb9ecSlm66018 			 * Verify that underlying device can support multiple
1173*e1ebb9ecSlm66018 			 * unicast mac addresses, and has free capacity.
1174*e1ebb9ecSlm66018 			 */
1175*e1ebb9ecSlm66018 			if (vsw_get_hw_maddr(vswp) != 0) {
1176*e1ebb9ecSlm66018 				cmn_err(CE_WARN, "!unable to setup switching");
1177*e1ebb9ecSlm66018 				vsw_mac_detach(vswp);
1178*e1ebb9ecSlm66018 				return (1);
1179*e1ebb9ecSlm66018 			}
1180*e1ebb9ecSlm66018 		}
1181*e1ebb9ecSlm66018 
11821ae08745Sheppo 	} else {
1183*e1ebb9ecSlm66018 		/*
1184*e1ebb9ecSlm66018 		 * No physical device name found in MD which is
1185*e1ebb9ecSlm66018 		 * required for layer 2.
1186*e1ebb9ecSlm66018 		 */
1187*e1ebb9ecSlm66018 		cmn_err(CE_WARN, "!no physical device name specified");
1188*e1ebb9ecSlm66018 		return (1);
11891ae08745Sheppo 	}
11901ae08745Sheppo 
11911ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
11921ae08745Sheppo 
1193*e1ebb9ecSlm66018 	return (0);
11941ae08745Sheppo }
11951ae08745Sheppo 
11961ae08745Sheppo static int
11971ae08745Sheppo vsw_setup_layer3(vsw_t *vswp)
11981ae08745Sheppo {
11991ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
12001ae08745Sheppo 
12011ae08745Sheppo 	D2(vswp, "%s: operating in layer 3 mode", __func__);
12021ae08745Sheppo 	vsw_switch_frame = vsw_switch_l3_frame;
12031ae08745Sheppo 
12041ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
12051ae08745Sheppo 
12061ae08745Sheppo 	return (0);
12071ae08745Sheppo }
12081ae08745Sheppo 
12091ae08745Sheppo /*
12101ae08745Sheppo  * Link into the MAC layer to gain access to the services provided by
12111ae08745Sheppo  * the underlying physical device driver (which should also have
12121ae08745Sheppo  * registered with the MAC layer).
12131ae08745Sheppo  *
12141ae08745Sheppo  * Only when in layer 2 mode.
12151ae08745Sheppo  */
12161ae08745Sheppo static int
12171ae08745Sheppo vsw_mac_attach(vsw_t *vswp)
12181ae08745Sheppo {
1219ba2e4443Sseb 	char	drv[LIFNAMSIZ];
1220ba2e4443Sseb 	uint_t	ddi_instance;
1221ba2e4443Sseb 
12221ae08745Sheppo 	D1(vswp, "vsw_mac_attach: enter");
12231ae08745Sheppo 
12241ae08745Sheppo 	vswp->mh = NULL;
12251ae08745Sheppo 	vswp->mrh = NULL;
12261ae08745Sheppo 
12271ae08745Sheppo 	ASSERT(vswp->mdprops & VSW_MD_PHYSNAME);
12281ae08745Sheppo 
1229ba2e4443Sseb 	if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) {
1230ba2e4443Sseb 		cmn_err(CE_WARN, "invalid device name: %s", vswp->physname);
1231ba2e4443Sseb 		goto mac_fail_exit;
1232ba2e4443Sseb 	}
1233ba2e4443Sseb 	if ((mac_open(vswp->physname, ddi_instance, &vswp->mh)) != 0) {
12341ae08745Sheppo 		cmn_err(CE_WARN, "mac_open %s failed", vswp->physname);
12351ae08745Sheppo 		goto mac_fail_exit;
12361ae08745Sheppo 	}
12371ae08745Sheppo 
12381ae08745Sheppo 	D2(vswp, "vsw_mac_attach: using device %s", vswp->physname);
12391ae08745Sheppo 
12401ae08745Sheppo 	/* register our rx callback function */
12411ae08745Sheppo 	vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp);
12421ae08745Sheppo 
12431ae08745Sheppo 	/* get the MAC tx fn */
12441ae08745Sheppo 	vswp->txinfo = mac_tx_get(vswp->mh);
12451ae08745Sheppo 
12461ae08745Sheppo 	/* start the interface */
12471ae08745Sheppo 	if (mac_start(vswp->mh) != 0) {
12481ae08745Sheppo 		cmn_err(CE_WARN, "could not start mac interface");
12491ae08745Sheppo 		goto mac_fail_exit;
12501ae08745Sheppo 	}
12511ae08745Sheppo 
12521ae08745Sheppo 	D1(vswp, "vsw_mac_attach: exit");
12531ae08745Sheppo 	return (0);
12541ae08745Sheppo 
12551ae08745Sheppo mac_fail_exit:
12561ae08745Sheppo 	if (vswp->mh != NULL) {
12571ae08745Sheppo 		if (vswp->mrh != NULL)
12581ae08745Sheppo 			mac_rx_remove(vswp->mh, vswp->mrh);
12591ae08745Sheppo 
12601ae08745Sheppo 		mac_close(vswp->mh);
12611ae08745Sheppo 	}
12621ae08745Sheppo 
12631ae08745Sheppo 	vswp->mrh = NULL;
12641ae08745Sheppo 	vswp->mh = NULL;
12651ae08745Sheppo 	vswp->txinfo = NULL;
12661ae08745Sheppo 
12671ae08745Sheppo 	D1(vswp, "vsw_mac_attach: fail exit");
12681ae08745Sheppo 	return (1);
12691ae08745Sheppo }
12701ae08745Sheppo 
12711ae08745Sheppo static void
12721ae08745Sheppo vsw_mac_detach(vsw_t *vswp)
12731ae08745Sheppo {
12741ae08745Sheppo 	D1(vswp, "vsw_mac_detach: enter");
12751ae08745Sheppo 
12761ae08745Sheppo 	if (vswp->mh != NULL) {
12771ae08745Sheppo 		if (vswp->mrh != NULL)
12781ae08745Sheppo 			mac_rx_remove(vswp->mh, vswp->mrh);
12791ae08745Sheppo 
1280*e1ebb9ecSlm66018 		mac_stop(vswp->mh);
12811ae08745Sheppo 		mac_close(vswp->mh);
12821ae08745Sheppo 	}
12831ae08745Sheppo 
12841ae08745Sheppo 	vswp->mrh = NULL;
12851ae08745Sheppo 	vswp->mh = NULL;
12861ae08745Sheppo 	vswp->txinfo = NULL;
12871ae08745Sheppo 
12881ae08745Sheppo 	D1(vswp, "vsw_mac_detach: exit");
12891ae08745Sheppo }
12901ae08745Sheppo 
12911ae08745Sheppo /*
1292*e1ebb9ecSlm66018  * Depending on the mode specified, the capabilites and capacity
1293*e1ebb9ecSlm66018  * of the underlying device setup the physical device.
12941ae08745Sheppo  *
1295*e1ebb9ecSlm66018  * If in layer 3 mode, then do nothing.
1296*e1ebb9ecSlm66018  *
1297*e1ebb9ecSlm66018  * If in layer 2 programmed mode attempt to program the unicast address
1298*e1ebb9ecSlm66018  * associated with the port into the physical device. If this is not
1299*e1ebb9ecSlm66018  * possible due to resource exhaustion or simply because the device does
1300*e1ebb9ecSlm66018  * not support multiple unicast addresses then if required fallback onto
1301*e1ebb9ecSlm66018  * putting the card into promisc mode.
1302*e1ebb9ecSlm66018  *
1303*e1ebb9ecSlm66018  * If in promisc mode then simply set the card into promisc mode.
1304*e1ebb9ecSlm66018  *
1305*e1ebb9ecSlm66018  * Returns 0 success, 1 on failure.
13061ae08745Sheppo  */
1307*e1ebb9ecSlm66018 static int
1308*e1ebb9ecSlm66018 vsw_set_hw(vsw_t *vswp, vsw_port_t *port)
13091ae08745Sheppo {
1310*e1ebb9ecSlm66018 	mac_multi_addr_t	mac_addr;
1311*e1ebb9ecSlm66018 	void			*mah;
1312*e1ebb9ecSlm66018 	int			err;
13131ae08745Sheppo 
1314*e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1315*e1ebb9ecSlm66018 
1316*e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER3)
1317*e1ebb9ecSlm66018 		return (0);
1318*e1ebb9ecSlm66018 
1319*e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) {
1320*e1ebb9ecSlm66018 		return (vsw_set_hw_promisc(vswp, port));
1321*e1ebb9ecSlm66018 	}
1322*e1ebb9ecSlm66018 
1323*e1ebb9ecSlm66018 	if (vswp->maddr.maddr_handle == NULL)
1324*e1ebb9ecSlm66018 		return (1);
1325*e1ebb9ecSlm66018 
1326*e1ebb9ecSlm66018 	mah = vswp->maddr.maddr_handle;
1327*e1ebb9ecSlm66018 
1328*e1ebb9ecSlm66018 	/*
1329*e1ebb9ecSlm66018 	 * Attempt to program the unicast address into the HW.
1330*e1ebb9ecSlm66018 	 */
1331*e1ebb9ecSlm66018 	mac_addr.mma_addrlen = ETHERADDRL;
1332*e1ebb9ecSlm66018 	ether_copy(&port->p_macaddr, &mac_addr.mma_addr);
1333*e1ebb9ecSlm66018 
1334*e1ebb9ecSlm66018 	err = vswp->maddr.maddr_add(mah, &mac_addr);
1335*e1ebb9ecSlm66018 	if (err != 0) {
1336*e1ebb9ecSlm66018 		cmn_err(CE_WARN, "!failed to program addr "
1337*e1ebb9ecSlm66018 			"%x:%x:%x:%x:%x:%x for port %d into device %s "
1338*e1ebb9ecSlm66018 			": err %d", port->p_macaddr.ether_addr_octet[0],
1339*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[1],
1340*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[2],
1341*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[3],
1342*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[4],
1343*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[5],
1344*e1ebb9ecSlm66018 			port->p_instance, vswp->physname, err);
1345*e1ebb9ecSlm66018 
1346*e1ebb9ecSlm66018 		/*
1347*e1ebb9ecSlm66018 		 * Mark that attempt should be made to re-config sometime
1348*e1ebb9ecSlm66018 		 * in future if a port is deleted.
1349*e1ebb9ecSlm66018 		 */
1350*e1ebb9ecSlm66018 		vswp->recfg_reqd = B_TRUE;
1351*e1ebb9ecSlm66018 
1352*e1ebb9ecSlm66018 		/*
1353*e1ebb9ecSlm66018 		 * Only 1 mode specified, nothing more to do.
1354*e1ebb9ecSlm66018 		 */
1355*e1ebb9ecSlm66018 		if (vswp->smode_num == 1)
1356*e1ebb9ecSlm66018 			return (err);
1357*e1ebb9ecSlm66018 
1358*e1ebb9ecSlm66018 		/*
1359*e1ebb9ecSlm66018 		 * If promiscuous was next mode specified try to
1360*e1ebb9ecSlm66018 		 * set the card into that mode.
1361*e1ebb9ecSlm66018 		 */
1362*e1ebb9ecSlm66018 		if ((vswp->smode_idx <= (vswp->smode_num - 2)) &&
1363*e1ebb9ecSlm66018 			(vswp->smode[vswp->smode_idx + 1]
1364*e1ebb9ecSlm66018 					== VSW_LAYER2_PROMISC)) {
1365*e1ebb9ecSlm66018 			vswp->smode_idx += 1;
1366*e1ebb9ecSlm66018 			return (vsw_set_hw_promisc(vswp, port));
1367*e1ebb9ecSlm66018 		}
1368*e1ebb9ecSlm66018 		return (err);
1369*e1ebb9ecSlm66018 	}
1370*e1ebb9ecSlm66018 
1371*e1ebb9ecSlm66018 	port->addr_slot = mac_addr.mma_slot;
1372*e1ebb9ecSlm66018 	port->addr_set = VSW_ADDR_HW;
1373*e1ebb9ecSlm66018 
1374*e1ebb9ecSlm66018 	D2(vswp, "programmed addr %x:%x:%x:%x:%x:%x for port %d "
1375*e1ebb9ecSlm66018 		"into slot %d of device %s",
1376*e1ebb9ecSlm66018 		port->p_macaddr.ether_addr_octet[0],
1377*e1ebb9ecSlm66018 		port->p_macaddr.ether_addr_octet[1],
1378*e1ebb9ecSlm66018 		port->p_macaddr.ether_addr_octet[2],
1379*e1ebb9ecSlm66018 		port->p_macaddr.ether_addr_octet[3],
1380*e1ebb9ecSlm66018 		port->p_macaddr.ether_addr_octet[4],
1381*e1ebb9ecSlm66018 		port->p_macaddr.ether_addr_octet[5],
1382*e1ebb9ecSlm66018 		port->p_instance, port->addr_slot, vswp->physname);
1383*e1ebb9ecSlm66018 
1384*e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1385*e1ebb9ecSlm66018 
1386*e1ebb9ecSlm66018 	return (0);
1387*e1ebb9ecSlm66018 }
1388*e1ebb9ecSlm66018 
1389*e1ebb9ecSlm66018 /*
1390*e1ebb9ecSlm66018  * If in layer 3 mode do nothing.
1391*e1ebb9ecSlm66018  *
1392*e1ebb9ecSlm66018  * If in layer 2 switched mode remove the address from the physical
1393*e1ebb9ecSlm66018  * device.
1394*e1ebb9ecSlm66018  *
1395*e1ebb9ecSlm66018  * If in layer 2 promiscuous mode disable promisc mode.
1396*e1ebb9ecSlm66018  *
1397*e1ebb9ecSlm66018  * Returns 0 on success.
1398*e1ebb9ecSlm66018  */
1399*e1ebb9ecSlm66018 static int
1400*e1ebb9ecSlm66018 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port)
1401*e1ebb9ecSlm66018 {
1402*e1ebb9ecSlm66018 	int		err;
1403*e1ebb9ecSlm66018 	void		*mah;
1404*e1ebb9ecSlm66018 
1405*e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1406*e1ebb9ecSlm66018 
1407*e1ebb9ecSlm66018 	if (vswp->smode[vswp->smode_idx] == VSW_LAYER3)
1408*e1ebb9ecSlm66018 		return (0);
1409*e1ebb9ecSlm66018 
1410*e1ebb9ecSlm66018 	if (port->addr_set == VSW_ADDR_PROMISC) {
1411*e1ebb9ecSlm66018 		return (vsw_unset_hw_promisc(vswp, port));
1412*e1ebb9ecSlm66018 	}
1413*e1ebb9ecSlm66018 
1414*e1ebb9ecSlm66018 	if (port->addr_set == VSW_ADDR_HW) {
1415*e1ebb9ecSlm66018 		if (vswp->mh == NULL)
1416*e1ebb9ecSlm66018 			return (1);
1417*e1ebb9ecSlm66018 
1418*e1ebb9ecSlm66018 		if (vswp->maddr.maddr_handle == NULL)
1419*e1ebb9ecSlm66018 			return (1);
1420*e1ebb9ecSlm66018 
1421*e1ebb9ecSlm66018 		mah = vswp->maddr.maddr_handle;
1422*e1ebb9ecSlm66018 
1423*e1ebb9ecSlm66018 		err = vswp->maddr.maddr_remove(mah, port->addr_slot);
1424*e1ebb9ecSlm66018 		if (err != 0) {
1425*e1ebb9ecSlm66018 			cmn_err(CE_WARN, "!Unable to remove addr "
1426*e1ebb9ecSlm66018 				"%x:%x:%x:%x:%x:%x for port %d from device %s"
1427*e1ebb9ecSlm66018 				" : (err %d)",
1428*e1ebb9ecSlm66018 				port->p_macaddr.ether_addr_octet[0],
1429*e1ebb9ecSlm66018 				port->p_macaddr.ether_addr_octet[1],
1430*e1ebb9ecSlm66018 				port->p_macaddr.ether_addr_octet[2],
1431*e1ebb9ecSlm66018 				port->p_macaddr.ether_addr_octet[3],
1432*e1ebb9ecSlm66018 				port->p_macaddr.ether_addr_octet[4],
1433*e1ebb9ecSlm66018 				port->p_macaddr.ether_addr_octet[5],
1434*e1ebb9ecSlm66018 				port->p_instance, vswp->physname, err);
1435*e1ebb9ecSlm66018 			return (err);
1436*e1ebb9ecSlm66018 		}
1437*e1ebb9ecSlm66018 
1438*e1ebb9ecSlm66018 		port->addr_set = VSW_ADDR_UNSET;
1439*e1ebb9ecSlm66018 
1440*e1ebb9ecSlm66018 		D2(vswp, "removed addr %x:%x:%x:%x:%x:%x for "
1441*e1ebb9ecSlm66018 			"port %d from device %s",
1442*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[0],
1443*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[1],
1444*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[2],
1445*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[3],
1446*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[4],
1447*e1ebb9ecSlm66018 			port->p_macaddr.ether_addr_octet[5],
1448*e1ebb9ecSlm66018 			port->p_instance, vswp->physname);
1449*e1ebb9ecSlm66018 	}
1450*e1ebb9ecSlm66018 
1451*e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1452*e1ebb9ecSlm66018 	return (0);
1453*e1ebb9ecSlm66018 }
1454*e1ebb9ecSlm66018 
1455*e1ebb9ecSlm66018 /*
1456*e1ebb9ecSlm66018  * Set network card into promisc mode.
1457*e1ebb9ecSlm66018  *
1458*e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1459*e1ebb9ecSlm66018  */
1460*e1ebb9ecSlm66018 static int
1461*e1ebb9ecSlm66018 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port)
1462*e1ebb9ecSlm66018 {
1463*e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1464*e1ebb9ecSlm66018 
1465*e1ebb9ecSlm66018 	if (vswp->mh == NULL)
1466*e1ebb9ecSlm66018 		return (1);
1467*e1ebb9ecSlm66018 
1468*e1ebb9ecSlm66018 	if (vswp->promisc_cnt++ == 0) {
1469*e1ebb9ecSlm66018 		if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) {
1470*e1ebb9ecSlm66018 			vswp->promisc_cnt--;
1471*e1ebb9ecSlm66018 			return (1);
1472*e1ebb9ecSlm66018 		}
1473*e1ebb9ecSlm66018 		cmn_err(CE_NOTE, "!switching device %s into promiscuous mode",
1474*e1ebb9ecSlm66018 				vswp->physname);
1475*e1ebb9ecSlm66018 	}
1476*e1ebb9ecSlm66018 	port->addr_set = VSW_ADDR_PROMISC;
1477*e1ebb9ecSlm66018 
1478*e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1479*e1ebb9ecSlm66018 
1480*e1ebb9ecSlm66018 	return (0);
1481*e1ebb9ecSlm66018 }
1482*e1ebb9ecSlm66018 
1483*e1ebb9ecSlm66018 /*
1484*e1ebb9ecSlm66018  * Turn off promiscuous mode on network card.
1485*e1ebb9ecSlm66018  *
1486*e1ebb9ecSlm66018  * Returns 0 on success, 1 on failure.
1487*e1ebb9ecSlm66018  */
1488*e1ebb9ecSlm66018 static int
1489*e1ebb9ecSlm66018 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port)
1490*e1ebb9ecSlm66018 {
1491*e1ebb9ecSlm66018 	vsw_port_list_t 	*plist = &vswp->plist;
1492*e1ebb9ecSlm66018 
1493*e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1494*e1ebb9ecSlm66018 
1495*e1ebb9ecSlm66018 	if (vswp->mh == NULL)
1496*e1ebb9ecSlm66018 		return (1);
1497*e1ebb9ecSlm66018 
1498*e1ebb9ecSlm66018 	ASSERT(port->addr_set == VSW_ADDR_PROMISC);
1499*e1ebb9ecSlm66018 
1500*e1ebb9ecSlm66018 	if (--vswp->promisc_cnt == 0) {
1501*e1ebb9ecSlm66018 		if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) {
1502*e1ebb9ecSlm66018 			vswp->promisc_cnt++;
1503*e1ebb9ecSlm66018 			return (1);
1504*e1ebb9ecSlm66018 		}
1505*e1ebb9ecSlm66018 
1506*e1ebb9ecSlm66018 		/*
1507*e1ebb9ecSlm66018 		 * We are exiting promisc mode either because we were
1508*e1ebb9ecSlm66018 		 * only in promisc mode because we had failed over from
1509*e1ebb9ecSlm66018 		 * switched mode due to HW resource issues, or the user
1510*e1ebb9ecSlm66018 		 * wanted the card in promisc mode for all the ports and
1511*e1ebb9ecSlm66018 		 * the last port is now being deleted. Tweak the message
1512*e1ebb9ecSlm66018 		 * accordingly.
1513*e1ebb9ecSlm66018 		 */
1514*e1ebb9ecSlm66018 		if (plist->num_ports != 0) {
1515*e1ebb9ecSlm66018 			cmn_err(CE_NOTE, "!switching device %s back to "
1516*e1ebb9ecSlm66018 				"programmed mode", vswp->physname);
15171ae08745Sheppo 		} else {
1518*e1ebb9ecSlm66018 			cmn_err(CE_NOTE, "!switching device %s out of "
1519*e1ebb9ecSlm66018 				"promiscuous mode", vswp->physname);
15201ae08745Sheppo 		}
15211ae08745Sheppo 	}
1522*e1ebb9ecSlm66018 	port->addr_set = VSW_ADDR_UNSET;
1523*e1ebb9ecSlm66018 
1524*e1ebb9ecSlm66018 	D1(vswp, "%s: exit", __func__);
1525*e1ebb9ecSlm66018 	return (0);
1526*e1ebb9ecSlm66018 }
1527*e1ebb9ecSlm66018 
1528*e1ebb9ecSlm66018 /*
1529*e1ebb9ecSlm66018  * Determine whether or not we are operating in our prefered
1530*e1ebb9ecSlm66018  * mode and if not whether the physical resources now allow us
1531*e1ebb9ecSlm66018  * to operate in it.
1532*e1ebb9ecSlm66018  *
1533*e1ebb9ecSlm66018  * Should only be invoked after port which is being deleted has been
1534*e1ebb9ecSlm66018  * removed from the port list.
1535*e1ebb9ecSlm66018  */
1536*e1ebb9ecSlm66018 static int
1537*e1ebb9ecSlm66018 vsw_reconfig_hw(vsw_t *vswp)
1538*e1ebb9ecSlm66018 {
1539*e1ebb9ecSlm66018 	vsw_port_list_t 	*plist = &vswp->plist;
1540*e1ebb9ecSlm66018 	mac_multi_addr_t	mac_addr;
1541*e1ebb9ecSlm66018 	vsw_port_t		*tp;
1542*e1ebb9ecSlm66018 	void			*mah;
1543*e1ebb9ecSlm66018 	int			rv = 0;
1544*e1ebb9ecSlm66018 	int			s_idx;
1545*e1ebb9ecSlm66018 
1546*e1ebb9ecSlm66018 	D1(vswp, "%s: enter", __func__);
1547*e1ebb9ecSlm66018 
1548*e1ebb9ecSlm66018 	if (vswp->maddr.maddr_handle == NULL)
1549*e1ebb9ecSlm66018 		return (1);
1550*e1ebb9ecSlm66018 
1551*e1ebb9ecSlm66018 	/*
1552*e1ebb9ecSlm66018 	 * Check if there are now sufficient HW resources to
1553*e1ebb9ecSlm66018 	 * attempt a re-config.
1554*e1ebb9ecSlm66018 	 */
1555*e1ebb9ecSlm66018 	if (plist->num_ports > vswp->maddr.maddr_naddrfree)
1556*e1ebb9ecSlm66018 		return (1);
1557*e1ebb9ecSlm66018 
1558*e1ebb9ecSlm66018 	/*
1559*e1ebb9ecSlm66018 	 * If we are in layer 2 (i.e. switched) or would like to be
1560*e1ebb9ecSlm66018 	 * in layer 2 then check if any ports need to be programmed
1561*e1ebb9ecSlm66018 	 * into the HW.
1562*e1ebb9ecSlm66018 	 *
1563*e1ebb9ecSlm66018 	 * This can happen in two cases - switched was specified as
1564*e1ebb9ecSlm66018 	 * the prefered mode of operation but we exhausted the HW
1565*e1ebb9ecSlm66018 	 * resources and so failed over to the next specifed mode,
1566*e1ebb9ecSlm66018 	 * or switched was the only mode specified so after HW
1567*e1ebb9ecSlm66018 	 * resources were exhausted there was nothing more we
1568*e1ebb9ecSlm66018 	 * could do.
1569*e1ebb9ecSlm66018 	 */
1570*e1ebb9ecSlm66018 	if (vswp->smode_idx > 0)
1571*e1ebb9ecSlm66018 		s_idx = vswp->smode_idx - 1;
1572*e1ebb9ecSlm66018 	else
1573*e1ebb9ecSlm66018 		s_idx = vswp->smode_idx;
1574*e1ebb9ecSlm66018 
1575*e1ebb9ecSlm66018 	if (vswp->smode[s_idx] == VSW_LAYER2) {
1576*e1ebb9ecSlm66018 		mah = vswp->maddr.maddr_handle;
1577*e1ebb9ecSlm66018 
1578*e1ebb9ecSlm66018 		D2(vswp, "%s: attempting reconfig..", __func__);
1579*e1ebb9ecSlm66018 
1580*e1ebb9ecSlm66018 		/*
1581*e1ebb9ecSlm66018 		 * Scan the port list for any port whose address has not
1582*e1ebb9ecSlm66018 		 * be programmed in HW - there should be a max of one.
1583*e1ebb9ecSlm66018 		 */
1584*e1ebb9ecSlm66018 		for (tp = plist->head; tp != NULL; tp = tp->p_next) {
1585*e1ebb9ecSlm66018 			if (tp->addr_set != VSW_ADDR_HW) {
1586*e1ebb9ecSlm66018 				mac_addr.mma_addrlen = ETHERADDRL;
1587*e1ebb9ecSlm66018 				ether_copy(&tp->p_macaddr, &mac_addr.mma_addr);
1588*e1ebb9ecSlm66018 
1589*e1ebb9ecSlm66018 				rv = vswp->maddr.maddr_add(mah, &mac_addr);
1590*e1ebb9ecSlm66018 				if (rv != 0) {
1591*e1ebb9ecSlm66018 					DWARN(vswp, "Error setting addr in "
1592*e1ebb9ecSlm66018 						"HW for port %d err %d",
1593*e1ebb9ecSlm66018 						tp->p_instance, rv);
1594*e1ebb9ecSlm66018 					goto reconfig_err_exit;
1595*e1ebb9ecSlm66018 				}
1596*e1ebb9ecSlm66018 				tp->addr_slot = mac_addr.mma_slot;
1597*e1ebb9ecSlm66018 
1598*e1ebb9ecSlm66018 				D2(vswp, "re-programmed port %d "
1599*e1ebb9ecSlm66018 					"addr %x:%x:%x:%x:%x:%x into slot %d"
1600*e1ebb9ecSlm66018 					" of device %s", tp->p_instance,
1601*e1ebb9ecSlm66018 					tp->p_macaddr.ether_addr_octet[0],
1602*e1ebb9ecSlm66018 					tp->p_macaddr.ether_addr_octet[1],
1603*e1ebb9ecSlm66018 					tp->p_macaddr.ether_addr_octet[2],
1604*e1ebb9ecSlm66018 					tp->p_macaddr.ether_addr_octet[3],
1605*e1ebb9ecSlm66018 					tp->p_macaddr.ether_addr_octet[4],
1606*e1ebb9ecSlm66018 					tp->p_macaddr.ether_addr_octet[5],
1607*e1ebb9ecSlm66018 					tp->addr_slot, vswp->physname);
1608*e1ebb9ecSlm66018 
1609*e1ebb9ecSlm66018 				/*
1610*e1ebb9ecSlm66018 				 * If up to now we had to put the card into
1611*e1ebb9ecSlm66018 				 * promisc mode to see this address, we
1612*e1ebb9ecSlm66018 				 * can now safely disable promisc mode.
1613*e1ebb9ecSlm66018 				 */
1614*e1ebb9ecSlm66018 				if (tp->addr_set == VSW_ADDR_PROMISC)
1615*e1ebb9ecSlm66018 					(void) vsw_unset_hw_promisc(vswp, tp);
1616*e1ebb9ecSlm66018 
1617*e1ebb9ecSlm66018 				tp->addr_set = VSW_ADDR_HW;
1618*e1ebb9ecSlm66018 			}
1619*e1ebb9ecSlm66018 		}
1620*e1ebb9ecSlm66018 
1621*e1ebb9ecSlm66018 		/* no further re-config needed */
1622*e1ebb9ecSlm66018 		vswp->recfg_reqd = B_FALSE;
1623*e1ebb9ecSlm66018 
1624*e1ebb9ecSlm66018 		vswp->smode_idx = s_idx;
1625*e1ebb9ecSlm66018 
1626*e1ebb9ecSlm66018 		return (0);
1627*e1ebb9ecSlm66018 	}
1628*e1ebb9ecSlm66018 
1629*e1ebb9ecSlm66018 reconfig_err_exit:
1630*e1ebb9ecSlm66018 	return (rv);
16311ae08745Sheppo }
16321ae08745Sheppo 
16331ae08745Sheppo /*
16341ae08745Sheppo  * receive callback routine. Invoked by MAC layer when there
16351ae08745Sheppo  * are pkts being passed up from physical device.
16361ae08745Sheppo  *
16371ae08745Sheppo  * PERF: It may be more efficient when the card is in promisc
16381ae08745Sheppo  * mode to check the dest address of the pkts here (against
16391ae08745Sheppo  * the FDB) rather than checking later. Needs to be investigated.
16401ae08745Sheppo  */
16411ae08745Sheppo static void
16421ae08745Sheppo vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp)
16431ae08745Sheppo {
16441ae08745Sheppo 	_NOTE(ARGUNUSED(mrh))
16451ae08745Sheppo 
16461ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
16471ae08745Sheppo 
16481ae08745Sheppo 	ASSERT(vswp != NULL);
16491ae08745Sheppo 
16501ae08745Sheppo 	D1(vswp, "vsw_rx_cb: enter");
16511ae08745Sheppo 
16521ae08745Sheppo 	/* switch the chain of packets received */
16531ae08745Sheppo 	vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL);
16541ae08745Sheppo 
16551ae08745Sheppo 	D1(vswp, "vsw_rx_cb: exit");
16561ae08745Sheppo }
16571ae08745Sheppo 
16581ae08745Sheppo /*
16591ae08745Sheppo  * Send a message out over the physical device via the MAC layer.
16601ae08745Sheppo  *
16611ae08745Sheppo  * Returns any mblks that it was unable to transmit.
16621ae08745Sheppo  */
16631ae08745Sheppo static mblk_t *
16641ae08745Sheppo vsw_tx_msg(vsw_t *vswp, mblk_t *mp)
16651ae08745Sheppo {
16661ae08745Sheppo 	const mac_txinfo_t	*mtp;
16671ae08745Sheppo 	mblk_t			*nextp;
16681ae08745Sheppo 
16691ae08745Sheppo 	if (vswp->mh == NULL) {
16701ae08745Sheppo 		DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail");
16711ae08745Sheppo 		return (mp);
16721ae08745Sheppo 	} else {
16731ae08745Sheppo 		for (;;) {
16741ae08745Sheppo 			nextp = mp->b_next;
16751ae08745Sheppo 			mp->b_next = NULL;
16761ae08745Sheppo 
16771ae08745Sheppo 			mtp = vswp->txinfo;
16781ae08745Sheppo 			if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) {
16791ae08745Sheppo 				mp->b_next = nextp;
16801ae08745Sheppo 				break;
16811ae08745Sheppo 			}
16821ae08745Sheppo 
16831ae08745Sheppo 			if ((mp = nextp) == NULL)
16841ae08745Sheppo 				break;
16851ae08745Sheppo 
16861ae08745Sheppo 		}
16871ae08745Sheppo 
16881ae08745Sheppo 	}
16891ae08745Sheppo 
16901ae08745Sheppo 	return (mp);
16911ae08745Sheppo }
16921ae08745Sheppo 
16931ae08745Sheppo /*
16941ae08745Sheppo  * Register with the MAC layer as a network device, so we
16951ae08745Sheppo  * can be plumbed if necessary.
16961ae08745Sheppo  */
16971ae08745Sheppo static int
16981ae08745Sheppo vsw_mac_register(vsw_t *vswp)
16991ae08745Sheppo {
1700ba2e4443Sseb 	mac_register_t	*macp;
1701ba2e4443Sseb 	int		rv;
17021ae08745Sheppo 
17031ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
17041ae08745Sheppo 
1705ba2e4443Sseb 	if ((macp = mac_alloc(MAC_VERSION)) == NULL)
1706ba2e4443Sseb 		return (EINVAL);
1707ba2e4443Sseb 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
17081ae08745Sheppo 	macp->m_driver = vswp;
1709ba2e4443Sseb 	macp->m_dip = vswp->dip;
1710ba2e4443Sseb 	macp->m_src_addr = (uint8_t *)&vswp->if_addr;
1711ba2e4443Sseb 	macp->m_callbacks = &vsw_m_callbacks;
1712ba2e4443Sseb 	macp->m_min_sdu = 0;
1713ba2e4443Sseb 	macp->m_max_sdu = ETHERMTU;
1714ba2e4443Sseb 	rv = mac_register(macp, &vswp->if_mh);
1715ba2e4443Sseb 	mac_free(macp);
1716ba2e4443Sseb 	if (rv == 0)
1717ba2e4443Sseb 		vswp->if_state |= VSW_IF_REG;
17181ae08745Sheppo 
17191ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
17201ae08745Sheppo 
17211ae08745Sheppo 	return (rv);
17221ae08745Sheppo }
17231ae08745Sheppo 
17241ae08745Sheppo static int
17251ae08745Sheppo vsw_mac_unregister(vsw_t *vswp)
17261ae08745Sheppo {
17271ae08745Sheppo 	int		rv = 0;
17281ae08745Sheppo 
17291ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
17301ae08745Sheppo 
17311ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
17321ae08745Sheppo 
1733ba2e4443Sseb 	if (vswp->if_state & VSW_IF_REG) {
1734ba2e4443Sseb 		rv = mac_unregister(vswp->if_mh);
17351ae08745Sheppo 		if (rv != 0) {
17361ae08745Sheppo 			DWARN(vswp, "%s: unable to unregister from MAC "
17371ae08745Sheppo 				"framework", __func__);
17381ae08745Sheppo 
17391ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
17401ae08745Sheppo 			D1(vswp, "%s: fail exit", __func__);
17411ae08745Sheppo 			return (rv);
17421ae08745Sheppo 		}
17431ae08745Sheppo 
1744ba2e4443Sseb 		/* mark i/f as down and unregistered */
1745ba2e4443Sseb 		vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG);
17461ae08745Sheppo 	}
17471ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
17481ae08745Sheppo 
1749*e1ebb9ecSlm66018 	vswp->mdprops &= ~(VSW_MD_MACADDR | VSW_DEV_MACADDR);
1750d10e4ef2Snarayan 
17511ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
17521ae08745Sheppo 
17531ae08745Sheppo 	return (rv);
17541ae08745Sheppo }
17551ae08745Sheppo 
1756ba2e4443Sseb static int
1757ba2e4443Sseb vsw_m_stat(void *arg, uint_t stat, uint64_t *val)
17581ae08745Sheppo {
17591ae08745Sheppo 	vsw_t			*vswp = (vsw_t *)arg;
17601ae08745Sheppo 
17611ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
17621ae08745Sheppo 
1763ba2e4443Sseb 	if (vswp->mh == NULL)
1764ba2e4443Sseb 		return (EINVAL);
17651ae08745Sheppo 
17661ae08745Sheppo 	/* return stats from underlying device */
1767ba2e4443Sseb 	*val = mac_stat_get(vswp->mh, stat);
1768ba2e4443Sseb 	return (0);
17691ae08745Sheppo }
17701ae08745Sheppo 
17711ae08745Sheppo static void
17721ae08745Sheppo vsw_m_stop(void *arg)
17731ae08745Sheppo {
17741ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
17751ae08745Sheppo 
17761ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
17771ae08745Sheppo 
17781ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
17791ae08745Sheppo 	vswp->if_state &= ~VSW_IF_UP;
17801ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
17811ae08745Sheppo 
17821ae08745Sheppo 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
17831ae08745Sheppo }
17841ae08745Sheppo 
17851ae08745Sheppo static int
17861ae08745Sheppo vsw_m_start(void *arg)
17871ae08745Sheppo {
17881ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
17891ae08745Sheppo 
17901ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
17911ae08745Sheppo 
17921ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
17931ae08745Sheppo 	vswp->if_state |= VSW_IF_UP;
17941ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
17951ae08745Sheppo 
17961ae08745Sheppo 	D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state);
17971ae08745Sheppo 	return (0);
17981ae08745Sheppo }
17991ae08745Sheppo 
18001ae08745Sheppo /*
18011ae08745Sheppo  * Change the local interface address.
18021ae08745Sheppo  */
18031ae08745Sheppo static int
18041ae08745Sheppo vsw_m_unicst(void *arg, const uint8_t *macaddr)
18051ae08745Sheppo {
18061ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
18071ae08745Sheppo 
18081ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
18091ae08745Sheppo 
18101ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
18111ae08745Sheppo 	ether_copy(macaddr, &vswp->if_addr);
18121ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
18131ae08745Sheppo 
18141ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
18151ae08745Sheppo 
18161ae08745Sheppo 	return (0);
18171ae08745Sheppo }
18181ae08745Sheppo 
18191ae08745Sheppo static int
18201ae08745Sheppo vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca)
18211ae08745Sheppo {
18221ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
18231ae08745Sheppo 	mcst_addr_t	*mcst_p = NULL;
18241ae08745Sheppo 	uint64_t	addr = 0x0;
1825*e1ebb9ecSlm66018 	int		i, ret = 0;
18261ae08745Sheppo 
18271ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
18281ae08745Sheppo 
18291ae08745Sheppo 	/*
18301ae08745Sheppo 	 * Convert address into form that can be used
18311ae08745Sheppo 	 * as hash table key.
18321ae08745Sheppo 	 */
18331ae08745Sheppo 	for (i = 0; i < ETHERADDRL; i++) {
18341ae08745Sheppo 		addr = (addr << 8) | mca[i];
18351ae08745Sheppo 	}
18361ae08745Sheppo 
18371ae08745Sheppo 	D2(vswp, "%s: addr = 0x%llx", __func__, addr);
18381ae08745Sheppo 
18391ae08745Sheppo 	if (add) {
18401ae08745Sheppo 		D2(vswp, "%s: adding multicast", __func__);
18411ae08745Sheppo 		if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
18421ae08745Sheppo 			/*
18431ae08745Sheppo 			 * Update the list of multicast addresses
18441ae08745Sheppo 			 * contained within the vsw_t structure to
18451ae08745Sheppo 			 * include this new one.
18461ae08745Sheppo 			 */
18471ae08745Sheppo 			mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP);
18481ae08745Sheppo 			if (mcst_p == NULL) {
18491ae08745Sheppo 				DERR(vswp, "%s unable to alloc mem", __func__);
18501ae08745Sheppo 				return (1);
18511ae08745Sheppo 			}
18521ae08745Sheppo 			mcst_p->addr = addr;
18531ae08745Sheppo 
18541ae08745Sheppo 			mutex_enter(&vswp->mca_lock);
18551ae08745Sheppo 			mcst_p->nextp = vswp->mcap;
18561ae08745Sheppo 			vswp->mcap = mcst_p;
18571ae08745Sheppo 			mutex_exit(&vswp->mca_lock);
18581ae08745Sheppo 
18591ae08745Sheppo 			/*
18601ae08745Sheppo 			 * Call into the underlying driver to program the
18611ae08745Sheppo 			 * address into HW.
18621ae08745Sheppo 			 */
1863*e1ebb9ecSlm66018 			if (vswp->mh != NULL) {
1864*e1ebb9ecSlm66018 				ret = mac_multicst_add(vswp->mh, mca);
1865*e1ebb9ecSlm66018 				if (ret != 0) {
1866*e1ebb9ecSlm66018 					cmn_err(CE_WARN, "!unable to add "
1867*e1ebb9ecSlm66018 						"multicast address");
1868*e1ebb9ecSlm66018 					goto vsw_remove_addr;
1869*e1ebb9ecSlm66018 				}
18701ae08745Sheppo 			}
18711ae08745Sheppo 		} else {
1872*e1ebb9ecSlm66018 			cmn_err(CE_WARN, "!unable to add multicast address");
1873*e1ebb9ecSlm66018 		}
1874*e1ebb9ecSlm66018 		return (ret);
1875*e1ebb9ecSlm66018 	}
1876*e1ebb9ecSlm66018 
1877*e1ebb9ecSlm66018 vsw_remove_addr:
1878*e1ebb9ecSlm66018 
18791ae08745Sheppo 	D2(vswp, "%s: removing multicast", __func__);
18801ae08745Sheppo 	/*
18811ae08745Sheppo 	 * Remove the address from the hash table..
18821ae08745Sheppo 	 */
18831ae08745Sheppo 	if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) {
18841ae08745Sheppo 
18851ae08745Sheppo 		/*
18861ae08745Sheppo 		 * ..and then from the list maintained in the
18871ae08745Sheppo 		 * vsw_t structure.
18881ae08745Sheppo 		 */
18891ae08745Sheppo 		vsw_del_addr(VSW_LOCALDEV, vswp, addr);
18901ae08745Sheppo 
18911ae08745Sheppo 		if (vswp->mh != NULL)
18921ae08745Sheppo 			(void) mac_multicst_remove(vswp->mh, mca);
18931ae08745Sheppo 	}
18941ae08745Sheppo 
18951ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
18961ae08745Sheppo 
18971ae08745Sheppo 	return (0);
18981ae08745Sheppo }
18991ae08745Sheppo 
19001ae08745Sheppo static int
19011ae08745Sheppo vsw_m_promisc(void *arg, boolean_t on)
19021ae08745Sheppo {
19031ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
19041ae08745Sheppo 
19051ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
19061ae08745Sheppo 
19071ae08745Sheppo 	WRITE_ENTER(&vswp->if_lockrw);
19081ae08745Sheppo 	if (on)
19091ae08745Sheppo 		vswp->if_state |= VSW_IF_PROMISC;
19101ae08745Sheppo 	else
19111ae08745Sheppo 		vswp->if_state &= ~VSW_IF_PROMISC;
19121ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
19131ae08745Sheppo 
19141ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
19151ae08745Sheppo 
19161ae08745Sheppo 	return (0);
19171ae08745Sheppo }
19181ae08745Sheppo 
19191ae08745Sheppo static mblk_t *
19201ae08745Sheppo vsw_m_tx(void *arg, mblk_t *mp)
19211ae08745Sheppo {
19221ae08745Sheppo 	vsw_t		*vswp = (vsw_t *)arg;
19231ae08745Sheppo 
19241ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
19251ae08745Sheppo 
19261ae08745Sheppo 	vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL);
19271ae08745Sheppo 
19281ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
19291ae08745Sheppo 
19301ae08745Sheppo 	return (NULL);
19311ae08745Sheppo }
19321ae08745Sheppo 
19331ae08745Sheppo /*
19341ae08745Sheppo  * Register for machine description (MD) updates.
19351ae08745Sheppo  */
19361ae08745Sheppo static void
19371ae08745Sheppo vsw_mdeg_register(vsw_t *vswp)
19381ae08745Sheppo {
19391ae08745Sheppo 	mdeg_prop_spec_t	*pspecp;
19401ae08745Sheppo 	mdeg_node_spec_t	*inst_specp;
19411ae08745Sheppo 	mdeg_handle_t		mdeg_hdl;
19421ae08745Sheppo 	size_t			templatesz;
19431ae08745Sheppo 	int			inst, rv;
19441ae08745Sheppo 
19451ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
19461ae08745Sheppo 
19471ae08745Sheppo 	inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip,
19481ae08745Sheppo 		DDI_PROP_DONTPASS, reg_propname, -1);
19491ae08745Sheppo 	if (inst == -1) {
19501ae08745Sheppo 		DERR(vswp, "%s: unable to get %s property",
19511ae08745Sheppo 						__func__, reg_propname);
19521ae08745Sheppo 		return;
19531ae08745Sheppo 	}
19541ae08745Sheppo 
19551ae08745Sheppo 	D2(vswp, "%s: instance %d registering with mdeg", __func__, inst);
19561ae08745Sheppo 
19571ae08745Sheppo 	/*
19581ae08745Sheppo 	 * Allocate and initialize a per-instance copy
19591ae08745Sheppo 	 * of the global property spec array that will
19601ae08745Sheppo 	 * uniquely identify this vsw instance.
19611ae08745Sheppo 	 */
19621ae08745Sheppo 	templatesz = sizeof (vsw_prop_template);
19631ae08745Sheppo 	pspecp = kmem_zalloc(templatesz, KM_SLEEP);
19641ae08745Sheppo 
19651ae08745Sheppo 	bcopy(vsw_prop_template, pspecp, templatesz);
19661ae08745Sheppo 
19671ae08745Sheppo 	VSW_SET_MDEG_PROP_INST(pspecp, inst);
19681ae08745Sheppo 
19691ae08745Sheppo 	/* initialize the complete prop spec structure */
19701ae08745Sheppo 	inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP);
19711ae08745Sheppo 	inst_specp->namep = "virtual-device";
19721ae08745Sheppo 	inst_specp->specp = pspecp;
19731ae08745Sheppo 
19741ae08745Sheppo 	/* perform the registration */
19751ae08745Sheppo 	rv = mdeg_register(inst_specp, &vport_match, vsw_mdeg_cb,
19761ae08745Sheppo 	    (void *)vswp, &mdeg_hdl);
19771ae08745Sheppo 
19781ae08745Sheppo 	if (rv != MDEG_SUCCESS) {
19791ae08745Sheppo 		DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv);
19801ae08745Sheppo 		kmem_free(inst_specp, sizeof (mdeg_node_spec_t));
19811ae08745Sheppo 		kmem_free(pspecp, templatesz);
19821ae08745Sheppo 		return;
19831ae08745Sheppo 	}
19841ae08745Sheppo 
19851ae08745Sheppo 	/* save off data that will be needed later */
19861ae08745Sheppo 	vswp->inst_spec = inst_specp;
19871ae08745Sheppo 	vswp->mdeg_hdl = mdeg_hdl;
19881ae08745Sheppo 
19891ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
19901ae08745Sheppo }
19911ae08745Sheppo 
19921ae08745Sheppo static void
19931ae08745Sheppo vsw_mdeg_unregister(vsw_t *vswp)
19941ae08745Sheppo {
19951ae08745Sheppo 	D1(vswp, "vsw_mdeg_unregister: enter");
19961ae08745Sheppo 
19971ae08745Sheppo 	(void) mdeg_unregister(vswp->mdeg_hdl);
19981ae08745Sheppo 
19991ae08745Sheppo 	if (vswp->inst_spec->specp != NULL) {
20001ae08745Sheppo 		(void) kmem_free(vswp->inst_spec->specp,
20011ae08745Sheppo 			sizeof (vsw_prop_template));
20021ae08745Sheppo 		vswp->inst_spec->specp = NULL;
20031ae08745Sheppo 	}
20041ae08745Sheppo 
20051ae08745Sheppo 	if (vswp->inst_spec != NULL) {
20061ae08745Sheppo 		(void) kmem_free(vswp->inst_spec,
20071ae08745Sheppo 			sizeof (mdeg_node_spec_t));
20081ae08745Sheppo 		vswp->inst_spec = NULL;
20091ae08745Sheppo 	}
20101ae08745Sheppo 
20111ae08745Sheppo 	D1(vswp, "vsw_mdeg_unregister: exit");
20121ae08745Sheppo }
20131ae08745Sheppo 
20141ae08745Sheppo static int
20151ae08745Sheppo vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp)
20161ae08745Sheppo {
20171ae08745Sheppo 	vsw_t		*vswp;
20181ae08745Sheppo 	int		idx;
20191ae08745Sheppo 	md_t		*mdp;
20201ae08745Sheppo 	mde_cookie_t	node;
20211ae08745Sheppo 	uint64_t	inst;
20221ae08745Sheppo 
20231ae08745Sheppo 	if (resp == NULL)
20241ae08745Sheppo 		return (MDEG_FAILURE);
20251ae08745Sheppo 
20261ae08745Sheppo 	vswp = (vsw_t *)cb_argp;
20271ae08745Sheppo 
20281ae08745Sheppo 	D1(vswp, "%s: added %d : removed %d : matched %d",
20291ae08745Sheppo 		__func__, resp->added.nelem, resp->removed.nelem,
20301ae08745Sheppo 		resp->match_prev.nelem);
20311ae08745Sheppo 
20321ae08745Sheppo 	/* process added ports */
20331ae08745Sheppo 	for (idx = 0; idx < resp->added.nelem; idx++) {
20341ae08745Sheppo 		mdp = resp->added.mdp;
20351ae08745Sheppo 		node = resp->added.mdep[idx];
20361ae08745Sheppo 
20371ae08745Sheppo 		D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node);
20381ae08745Sheppo 
20391ae08745Sheppo 		if (vsw_port_add(vswp, mdp, &node) != 0) {
20401ae08745Sheppo 			cmn_err(CE_WARN, "Unable to add new port (0x%lx)",
20411ae08745Sheppo 					node);
20421ae08745Sheppo 		}
20431ae08745Sheppo 	}
20441ae08745Sheppo 
20451ae08745Sheppo 	/* process removed ports */
20461ae08745Sheppo 	for (idx = 0; idx < resp->removed.nelem; idx++) {
20471ae08745Sheppo 		mdp = resp->removed.mdp;
20481ae08745Sheppo 		node = resp->removed.mdep[idx];
20491ae08745Sheppo 
20501ae08745Sheppo 		if (md_get_prop_val(mdp, node, id_propname, &inst)) {
20511ae08745Sheppo 			DERR(vswp, "%s: prop(%s) not found port(%d)",
20521ae08745Sheppo 				__func__, id_propname, idx);
20531ae08745Sheppo 			continue;
20541ae08745Sheppo 		}
20551ae08745Sheppo 
20561ae08745Sheppo 		D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node);
20571ae08745Sheppo 
20581ae08745Sheppo 		if (vsw_port_detach(vswp, inst) != 0) {
20591ae08745Sheppo 			cmn_err(CE_WARN, "Unable to remove port %ld", inst);
20601ae08745Sheppo 		}
20611ae08745Sheppo 	}
20621ae08745Sheppo 
20631ae08745Sheppo 	/*
20641ae08745Sheppo 	 * Currently no support for updating already active ports.
20651ae08745Sheppo 	 * So, ignore the match_curr and match_priv arrays for now.
20661ae08745Sheppo 	 */
20671ae08745Sheppo 
20681ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
20691ae08745Sheppo 
20701ae08745Sheppo 	return (MDEG_SUCCESS);
20711ae08745Sheppo }
20721ae08745Sheppo 
20731ae08745Sheppo /*
20741ae08745Sheppo  * Add a new port to the system.
20751ae08745Sheppo  *
20761ae08745Sheppo  * Returns 0 on success, 1 on failure.
20771ae08745Sheppo  */
20781ae08745Sheppo int
20791ae08745Sheppo vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node)
20801ae08745Sheppo {
20811ae08745Sheppo 	uint64_t		ldc_id;
20821ae08745Sheppo 	uint8_t			*addrp;
20831ae08745Sheppo 	int			i, addrsz;
20841ae08745Sheppo 	int			num_nodes = 0, nchan = 0;
20851ae08745Sheppo 	int			listsz = 0;
20861ae08745Sheppo 	mde_cookie_t		*listp = NULL;
20871ae08745Sheppo 	struct ether_addr	ea;
20881ae08745Sheppo 	uint64_t		macaddr;
20891ae08745Sheppo 	uint64_t		inst = 0;
20901ae08745Sheppo 	vsw_port_t		*port;
20911ae08745Sheppo 
20921ae08745Sheppo 	if (md_get_prop_val(mdp, *node, id_propname, &inst)) {
20931ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found", __func__,
20941ae08745Sheppo 			id_propname);
20951ae08745Sheppo 		return (1);
20961ae08745Sheppo 	}
20971ae08745Sheppo 
20981ae08745Sheppo 	/*
20991ae08745Sheppo 	 * Find the channel endpoint node(s) (which should be under this
21001ae08745Sheppo 	 * port node) which contain the channel id(s).
21011ae08745Sheppo 	 */
21021ae08745Sheppo 	if ((num_nodes = md_node_count(mdp)) <= 0) {
21031ae08745Sheppo 		DERR(vswp, "%s: invalid number of nodes found (%d)",
21041ae08745Sheppo 			__func__, num_nodes);
21051ae08745Sheppo 		return (1);
21061ae08745Sheppo 	}
21071ae08745Sheppo 
21081ae08745Sheppo 	/* allocate enough space for node list */
21091ae08745Sheppo 	listsz = num_nodes * sizeof (mde_cookie_t);
21101ae08745Sheppo 	listp = kmem_zalloc(listsz, KM_SLEEP);
21111ae08745Sheppo 
21121ae08745Sheppo 	nchan = md_scan_dag(mdp, *node,
21131ae08745Sheppo 		md_find_name(mdp, chan_propname),
21141ae08745Sheppo 		md_find_name(mdp, "fwd"), listp);
21151ae08745Sheppo 
21161ae08745Sheppo 	if (nchan <= 0) {
21171ae08745Sheppo 		DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname);
21181ae08745Sheppo 		kmem_free(listp, listsz);
21191ae08745Sheppo 		return (1);
21201ae08745Sheppo 	}
21211ae08745Sheppo 
21221ae08745Sheppo 	D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname);
21231ae08745Sheppo 
21241ae08745Sheppo 	/* use property from first node found */
21251ae08745Sheppo 	if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) {
21261ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found\n", __func__,
21271ae08745Sheppo 			id_propname);
21281ae08745Sheppo 		kmem_free(listp, listsz);
21291ae08745Sheppo 		return (1);
21301ae08745Sheppo 	}
21311ae08745Sheppo 
21321ae08745Sheppo 	/* don't need list any more */
21331ae08745Sheppo 	kmem_free(listp, listsz);
21341ae08745Sheppo 
21351ae08745Sheppo 	D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id);
21361ae08745Sheppo 
21371ae08745Sheppo 	/* read mac-address property */
21381ae08745Sheppo 	if (md_get_prop_data(mdp, *node, remaddr_propname,
21391ae08745Sheppo 					&addrp, &addrsz)) {
21401ae08745Sheppo 		DWARN(vswp, "%s: prop(%s) not found",
21411ae08745Sheppo 				__func__, remaddr_propname);
21421ae08745Sheppo 		return (1);
21431ae08745Sheppo 	}
21441ae08745Sheppo 
21451ae08745Sheppo 	if (addrsz < ETHERADDRL) {
21461ae08745Sheppo 		DWARN(vswp, "%s: invalid address size", __func__);
21471ae08745Sheppo 		return (1);
21481ae08745Sheppo 	}
21491ae08745Sheppo 
21501ae08745Sheppo 	macaddr = *((uint64_t *)addrp);
21511ae08745Sheppo 	D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr);
21521ae08745Sheppo 
21531ae08745Sheppo 	for (i = ETHERADDRL - 1; i >= 0; i--) {
21541ae08745Sheppo 		ea.ether_addr_octet[i] = macaddr & 0xFF;
21551ae08745Sheppo 		macaddr >>= 8;
21561ae08745Sheppo 	}
21571ae08745Sheppo 
21581ae08745Sheppo 	if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) {
21591ae08745Sheppo 		DERR(vswp, "%s: failed to attach port", __func__);
21601ae08745Sheppo 		return (1);
21611ae08745Sheppo 	}
21621ae08745Sheppo 
21631ae08745Sheppo 	port = vsw_lookup_port(vswp, (int)inst);
21641ae08745Sheppo 
21651ae08745Sheppo 	/* just successfuly created the port, so it should exist */
21661ae08745Sheppo 	ASSERT(port != NULL);
21671ae08745Sheppo 
21681ae08745Sheppo 	return (0);
21691ae08745Sheppo }
21701ae08745Sheppo 
21711ae08745Sheppo /*
21721ae08745Sheppo  * Attach the specified port.
21731ae08745Sheppo  *
21741ae08745Sheppo  * Returns 0 on success, 1 on failure.
21751ae08745Sheppo  */
21761ae08745Sheppo static int
21771ae08745Sheppo vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids,
21781ae08745Sheppo struct ether_addr *macaddr)
21791ae08745Sheppo {
21801ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
21811ae08745Sheppo 	vsw_port_t		*port, **prev_port;
21821ae08745Sheppo 	int			i;
21831ae08745Sheppo 
21841ae08745Sheppo 	D1(vswp, "%s: enter : port %d", __func__, p_instance);
21851ae08745Sheppo 
21861ae08745Sheppo 	/* port already exists? */
21871ae08745Sheppo 	READ_ENTER(&plist->lockrw);
21881ae08745Sheppo 	for (port = plist->head; port != NULL; port = port->p_next) {
21891ae08745Sheppo 		if (port->p_instance == p_instance) {
21901ae08745Sheppo 			DWARN(vswp, "%s: port instance %d already attached",
21911ae08745Sheppo 				__func__, p_instance);
21921ae08745Sheppo 			RW_EXIT(&plist->lockrw);
21931ae08745Sheppo 			return (1);
21941ae08745Sheppo 		}
21951ae08745Sheppo 	}
21961ae08745Sheppo 	RW_EXIT(&plist->lockrw);
21971ae08745Sheppo 
21981ae08745Sheppo 	port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP);
21991ae08745Sheppo 	port->p_vswp = vswp;
22001ae08745Sheppo 	port->p_instance = p_instance;
22011ae08745Sheppo 	port->p_ldclist.num_ldcs = 0;
22021ae08745Sheppo 	port->p_ldclist.head = NULL;
2203*e1ebb9ecSlm66018 	port->addr_set = VSW_ADDR_UNSET;
22041ae08745Sheppo 
22051ae08745Sheppo 	rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL);
22061ae08745Sheppo 
22071ae08745Sheppo 	mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL);
22081ae08745Sheppo 	mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL);
22091ae08745Sheppo 
22101ae08745Sheppo 	mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL);
22111ae08745Sheppo 	cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL);
22121ae08745Sheppo 
22131ae08745Sheppo 	mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL);
22141ae08745Sheppo 	cv_init(&port->state_cv, NULL, CV_DRIVER, NULL);
22151ae08745Sheppo 	port->state = VSW_PORT_INIT;
22161ae08745Sheppo 
22171ae08745Sheppo 	if (nids > VSW_PORT_MAX_LDCS) {
22181ae08745Sheppo 		D2(vswp, "%s: using first of %d ldc ids",
22191ae08745Sheppo 			__func__, nids);
22201ae08745Sheppo 		nids = VSW_PORT_MAX_LDCS;
22211ae08745Sheppo 	}
22221ae08745Sheppo 
22231ae08745Sheppo 	D2(vswp, "%s: %d nids", __func__, nids);
22241ae08745Sheppo 	for (i = 0; i < nids; i++) {
22251ae08745Sheppo 		D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]);
22261ae08745Sheppo 		if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) {
22271ae08745Sheppo 			DERR(vswp, "%s: ldc_attach failed", __func__);
22281ae08745Sheppo 
22291ae08745Sheppo 			rw_destroy(&port->p_ldclist.lockrw);
22301ae08745Sheppo 
22311ae08745Sheppo 			cv_destroy(&port->ref_cv);
22321ae08745Sheppo 			mutex_destroy(&port->ref_lock);
22331ae08745Sheppo 
22341ae08745Sheppo 			cv_destroy(&port->state_cv);
22351ae08745Sheppo 			mutex_destroy(&port->state_lock);
22361ae08745Sheppo 
22371ae08745Sheppo 			mutex_destroy(&port->tx_lock);
22381ae08745Sheppo 			mutex_destroy(&port->mca_lock);
22391ae08745Sheppo 			kmem_free(port, sizeof (vsw_port_t));
22401ae08745Sheppo 			return (1);
22411ae08745Sheppo 		}
22421ae08745Sheppo 	}
22431ae08745Sheppo 
22441ae08745Sheppo 	ether_copy(macaddr, &port->p_macaddr);
22451ae08745Sheppo 
22461ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
22471ae08745Sheppo 
22481ae08745Sheppo 	/* create the fdb entry for this port/mac address */
22491ae08745Sheppo 	(void) vsw_add_fdb(vswp, port);
22501ae08745Sheppo 
2251*e1ebb9ecSlm66018 	(void) vsw_set_hw(vswp, port);
2252*e1ebb9ecSlm66018 
22531ae08745Sheppo 	/* link it into the list of ports for this vsw instance */
22541ae08745Sheppo 	prev_port = (vsw_port_t **)(&plist->head);
22551ae08745Sheppo 	port->p_next = *prev_port;
22561ae08745Sheppo 	*prev_port = port;
22571ae08745Sheppo 	plist->num_ports++;
22581ae08745Sheppo 	RW_EXIT(&plist->lockrw);
22591ae08745Sheppo 
22601ae08745Sheppo 	/*
22611ae08745Sheppo 	 * Initialise the port and any ldc's under it.
22621ae08745Sheppo 	 */
22631ae08745Sheppo 	(void) vsw_init_ldcs(port);
22641ae08745Sheppo 
22651ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
22661ae08745Sheppo 	return (0);
22671ae08745Sheppo }
22681ae08745Sheppo 
22691ae08745Sheppo /*
22701ae08745Sheppo  * Detach the specified port.
22711ae08745Sheppo  *
22721ae08745Sheppo  * Returns 0 on success, 1 on failure.
22731ae08745Sheppo  */
22741ae08745Sheppo static int
22751ae08745Sheppo vsw_port_detach(vsw_t *vswp, int p_instance)
22761ae08745Sheppo {
22771ae08745Sheppo 	vsw_port_t	*port = NULL;
22781ae08745Sheppo 	vsw_port_list_t	*plist = &vswp->plist;
22791ae08745Sheppo 
22801ae08745Sheppo 	D1(vswp, "%s: enter: port id %d", __func__, p_instance);
22811ae08745Sheppo 
22821ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
22831ae08745Sheppo 
22841ae08745Sheppo 	if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) {
22851ae08745Sheppo 		RW_EXIT(&plist->lockrw);
22861ae08745Sheppo 		return (1);
22871ae08745Sheppo 	}
22881ae08745Sheppo 
22891ae08745Sheppo 	if (vsw_plist_del_node(vswp, port)) {
22901ae08745Sheppo 		RW_EXIT(&plist->lockrw);
22911ae08745Sheppo 		return (1);
22921ae08745Sheppo 	}
22931ae08745Sheppo 
2294*e1ebb9ecSlm66018 	/* Remove address if was programmed into HW. */
2295*e1ebb9ecSlm66018 	(void) vsw_unset_hw(vswp, port);
2296*e1ebb9ecSlm66018 
22971ae08745Sheppo 	/* Remove the fdb entry for this port/mac address */
22981ae08745Sheppo 	(void) vsw_del_fdb(vswp, port);
22991ae08745Sheppo 
23001ae08745Sheppo 	/* Remove any multicast addresses.. */
23011ae08745Sheppo 	vsw_del_mcst_port(port);
23021ae08745Sheppo 
23031ae08745Sheppo 	/*
2304*e1ebb9ecSlm66018 	 * No longer need to hold writer lock on port list now
2305*e1ebb9ecSlm66018 	 * that we have unlinked the target port from the list.
23061ae08745Sheppo 	 */
23071ae08745Sheppo 	RW_EXIT(&plist->lockrw);
23081ae08745Sheppo 
2309*e1ebb9ecSlm66018 	READ_ENTER(&plist->lockrw);
2310*e1ebb9ecSlm66018 
2311*e1ebb9ecSlm66018 	if (vswp->recfg_reqd)
2312*e1ebb9ecSlm66018 		(void) vsw_reconfig_hw(vswp);
2313*e1ebb9ecSlm66018 
2314*e1ebb9ecSlm66018 	RW_EXIT(&plist->lockrw);
2315*e1ebb9ecSlm66018 
23161ae08745Sheppo 	if (vsw_port_delete(port)) {
23171ae08745Sheppo 		return (1);
23181ae08745Sheppo 	}
23191ae08745Sheppo 
23201ae08745Sheppo 	D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance);
23211ae08745Sheppo 	return (0);
23221ae08745Sheppo }
23231ae08745Sheppo 
23241ae08745Sheppo /*
23251ae08745Sheppo  * Detach all active ports.
23261ae08745Sheppo  *
23271ae08745Sheppo  * Returns 0 on success, 1 on failure.
23281ae08745Sheppo  */
23291ae08745Sheppo static int
23301ae08745Sheppo vsw_detach_ports(vsw_t *vswp)
23311ae08745Sheppo {
23321ae08745Sheppo 	vsw_port_list_t 	*plist = &vswp->plist;
23331ae08745Sheppo 	vsw_port_t		*port = NULL;
23341ae08745Sheppo 
23351ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
23361ae08745Sheppo 
23371ae08745Sheppo 	WRITE_ENTER(&plist->lockrw);
23381ae08745Sheppo 
23391ae08745Sheppo 	while ((port = plist->head) != NULL) {
23401ae08745Sheppo 		if (vsw_plist_del_node(vswp, port)) {
23411ae08745Sheppo 			DERR(vswp, "%s: Error deleting port %d"
23421ae08745Sheppo 				" from port list", __func__,
23431ae08745Sheppo 				port->p_instance);
23441ae08745Sheppo 			RW_EXIT(&plist->lockrw);
23451ae08745Sheppo 			return (1);
23461ae08745Sheppo 		}
23471ae08745Sheppo 
2348*e1ebb9ecSlm66018 		/* Remove address if was programmed into HW. */
2349*e1ebb9ecSlm66018 		(void) vsw_unset_hw(vswp, port);
2350*e1ebb9ecSlm66018 
23511ae08745Sheppo 		/* Remove the fdb entry for this port/mac address */
23521ae08745Sheppo 		(void) vsw_del_fdb(vswp, port);
23531ae08745Sheppo 
23541ae08745Sheppo 		/* Remove any multicast addresses.. */
23551ae08745Sheppo 		vsw_del_mcst_port(port);
23561ae08745Sheppo 
23571ae08745Sheppo 		/*
23581ae08745Sheppo 		 * No longer need to hold the lock on the port list
23591ae08745Sheppo 		 * now that we have unlinked the target port from the
23601ae08745Sheppo 		 * list.
23611ae08745Sheppo 		 */
23621ae08745Sheppo 		RW_EXIT(&plist->lockrw);
23631ae08745Sheppo 		if (vsw_port_delete(port)) {
23641ae08745Sheppo 			DERR(vswp, "%s: Error deleting port %d",
23651ae08745Sheppo 				__func__, port->p_instance);
23661ae08745Sheppo 			return (1);
23671ae08745Sheppo 		}
23681ae08745Sheppo 		WRITE_ENTER(&plist->lockrw);
23691ae08745Sheppo 	}
23701ae08745Sheppo 	RW_EXIT(&plist->lockrw);
23711ae08745Sheppo 
23721ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
23731ae08745Sheppo 
23741ae08745Sheppo 	return (0);
23751ae08745Sheppo }
23761ae08745Sheppo 
23771ae08745Sheppo /*
23781ae08745Sheppo  * Delete the specified port.
23791ae08745Sheppo  *
23801ae08745Sheppo  * Returns 0 on success, 1 on failure.
23811ae08745Sheppo  */
23821ae08745Sheppo static int
23831ae08745Sheppo vsw_port_delete(vsw_port_t *port)
23841ae08745Sheppo {
23851ae08745Sheppo 	vsw_ldc_list_t 		*ldcl;
23861ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
23871ae08745Sheppo 
23881ae08745Sheppo 	D1(vswp, "%s: enter : port id %d", __func__, port->p_instance);
23891ae08745Sheppo 
23901ae08745Sheppo 	(void) vsw_uninit_ldcs(port);
23911ae08745Sheppo 
23921ae08745Sheppo 	/*
23931ae08745Sheppo 	 * Wait for any pending ctrl msg tasks which reference this
23941ae08745Sheppo 	 * port to finish.
23951ae08745Sheppo 	 */
23961ae08745Sheppo 	if (vsw_drain_port_taskq(port))
23971ae08745Sheppo 		return (1);
23981ae08745Sheppo 
23991ae08745Sheppo 	/*
24001ae08745Sheppo 	 * Wait for port reference count to hit zero.
24011ae08745Sheppo 	 */
24021ae08745Sheppo 	mutex_enter(&port->ref_lock);
24031ae08745Sheppo 	while (port->ref_cnt != 0)
24041ae08745Sheppo 		cv_wait(&port->ref_cv, &port->ref_lock);
24051ae08745Sheppo 	mutex_exit(&port->ref_lock);
24061ae08745Sheppo 
24071ae08745Sheppo 	/*
24081ae08745Sheppo 	 * Wait for any active callbacks to finish
24091ae08745Sheppo 	 */
24101ae08745Sheppo 	if (vsw_drain_ldcs(port))
24111ae08745Sheppo 		return (1);
24121ae08745Sheppo 
24131ae08745Sheppo 	ldcl = &port->p_ldclist;
24141ae08745Sheppo 	WRITE_ENTER(&ldcl->lockrw);
24151ae08745Sheppo 	while (ldcl->num_ldcs > 0) {
24161ae08745Sheppo 		if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) {;
24171ae08745Sheppo 			cmn_err(CE_WARN, "unable to detach ldc %ld",
24181ae08745Sheppo 					ldcl->head->ldc_id);
24191ae08745Sheppo 			RW_EXIT(&ldcl->lockrw);
24201ae08745Sheppo 			return (1);
24211ae08745Sheppo 		}
24221ae08745Sheppo 	}
24231ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
24241ae08745Sheppo 
24251ae08745Sheppo 	rw_destroy(&port->p_ldclist.lockrw);
24261ae08745Sheppo 
24271ae08745Sheppo 	mutex_destroy(&port->mca_lock);
24281ae08745Sheppo 	mutex_destroy(&port->tx_lock);
24291ae08745Sheppo 	cv_destroy(&port->ref_cv);
24301ae08745Sheppo 	mutex_destroy(&port->ref_lock);
24311ae08745Sheppo 
24321ae08745Sheppo 	cv_destroy(&port->state_cv);
24331ae08745Sheppo 	mutex_destroy(&port->state_lock);
24341ae08745Sheppo 
24351ae08745Sheppo 	kmem_free(port, sizeof (vsw_port_t));
24361ae08745Sheppo 
24371ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
24381ae08745Sheppo 
24391ae08745Sheppo 	return (0);
24401ae08745Sheppo }
24411ae08745Sheppo 
24421ae08745Sheppo /*
24431ae08745Sheppo  * Attach a logical domain channel (ldc) under a specified port.
24441ae08745Sheppo  *
24451ae08745Sheppo  * Returns 0 on success, 1 on failure.
24461ae08745Sheppo  */
24471ae08745Sheppo static int
24481ae08745Sheppo vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id)
24491ae08745Sheppo {
24501ae08745Sheppo 	vsw_t 		*vswp = port->p_vswp;
24511ae08745Sheppo 	vsw_ldc_list_t *ldcl = &port->p_ldclist;
24521ae08745Sheppo 	vsw_ldc_t 	*ldcp = NULL;
24531ae08745Sheppo 	ldc_attr_t 	attr;
24541ae08745Sheppo 	ldc_status_t	istatus;
24551ae08745Sheppo 	int 		status = DDI_FAILURE;
2456d10e4ef2Snarayan 	int		rv;
24571ae08745Sheppo 
24581ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
24591ae08745Sheppo 
24601ae08745Sheppo 	ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP);
24611ae08745Sheppo 	if (ldcp == NULL) {
24621ae08745Sheppo 		DERR(vswp, "%s: kmem_zalloc failed", __func__);
24631ae08745Sheppo 		return (1);
24641ae08745Sheppo 	}
24651ae08745Sheppo 	ldcp->ldc_id = ldc_id;
24661ae08745Sheppo 
2467d10e4ef2Snarayan 	/* allocate pool of receive mblks */
2468d10e4ef2Snarayan 	rv = vio_create_mblks(vsw_num_mblks, vsw_mblk_size, &(ldcp->rxh));
2469d10e4ef2Snarayan 	if (rv) {
2470d10e4ef2Snarayan 		DWARN(vswp, "%s: unable to create free mblk pool for"
2471d10e4ef2Snarayan 			" channel %ld (rv %d)", __func__, ldc_id, rv);
2472d10e4ef2Snarayan 		kmem_free(ldcp, sizeof (vsw_ldc_t));
2473d10e4ef2Snarayan 		return (1);
2474d10e4ef2Snarayan 	}
2475d10e4ef2Snarayan 
24761ae08745Sheppo 	mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL);
24771ae08745Sheppo 	mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL);
24781ae08745Sheppo 	mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL);
24791ae08745Sheppo 	cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL);
24801ae08745Sheppo 
24811ae08745Sheppo 	/* required for handshake with peer */
24821ae08745Sheppo 	ldcp->local_session = (uint64_t)ddi_get_lbolt();
24831ae08745Sheppo 	ldcp->peer_session = 0;
24841ae08745Sheppo 	ldcp->session_status = 0;
24851ae08745Sheppo 
24861ae08745Sheppo 	mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL);
24871ae08745Sheppo 	ldcp->hss_id = 1;	/* Initial handshake session id */
24881ae08745Sheppo 
24891ae08745Sheppo 	/* only set for outbound lane, inbound set by peer */
2490d10e4ef2Snarayan 	mutex_init(&ldcp->lane_in.seq_lock, NULL, MUTEX_DRIVER, NULL);
2491d10e4ef2Snarayan 	mutex_init(&ldcp->lane_out.seq_lock, NULL, MUTEX_DRIVER, NULL);
24921ae08745Sheppo 	vsw_set_lane_attr(vswp, &ldcp->lane_out);
24931ae08745Sheppo 
24941ae08745Sheppo 	attr.devclass = LDC_DEV_NT_SVC;
24951ae08745Sheppo 	attr.instance = ddi_get_instance(vswp->dip);
24961ae08745Sheppo 	attr.mode = LDC_MODE_UNRELIABLE;
2497*e1ebb9ecSlm66018 	attr.mtu = VSW_LDC_MTU;
24981ae08745Sheppo 	status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle);
24991ae08745Sheppo 	if (status != 0) {
25001ae08745Sheppo 		DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)",
25011ae08745Sheppo 		    __func__, ldc_id, status);
2502d10e4ef2Snarayan 		goto ldc_attach_fail;
25031ae08745Sheppo 	}
25041ae08745Sheppo 
25051ae08745Sheppo 	status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp);
25061ae08745Sheppo 	if (status != 0) {
25071ae08745Sheppo 		DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)",
25081ae08745Sheppo 		    __func__, ldc_id, status);
25091ae08745Sheppo 		(void) ldc_fini(ldcp->ldc_handle);
2510d10e4ef2Snarayan 		goto ldc_attach_fail;
25111ae08745Sheppo 	}
25121ae08745Sheppo 
25131ae08745Sheppo 
25141ae08745Sheppo 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
25151ae08745Sheppo 		DERR(vswp, "%s: ldc_status failed", __func__);
25161ae08745Sheppo 		return (1);
25171ae08745Sheppo 	}
25181ae08745Sheppo 
25191ae08745Sheppo 	ldcp->ldc_status = istatus;
25201ae08745Sheppo 	ldcp->ldc_port = port;
25211ae08745Sheppo 	ldcp->ldc_vswp = vswp;
25221ae08745Sheppo 
25231ae08745Sheppo 	/* link it into the list of channels for this port */
25241ae08745Sheppo 	WRITE_ENTER(&ldcl->lockrw);
25251ae08745Sheppo 	ldcp->ldc_next = ldcl->head;
25261ae08745Sheppo 	ldcl->head = ldcp;
25271ae08745Sheppo 	ldcl->num_ldcs++;
25281ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
25291ae08745Sheppo 
25301ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
25311ae08745Sheppo 	return (0);
2532d10e4ef2Snarayan 
2533d10e4ef2Snarayan ldc_attach_fail:
2534d10e4ef2Snarayan 	mutex_destroy(&ldcp->ldc_txlock);
2535d10e4ef2Snarayan 	mutex_destroy(&ldcp->ldc_cblock);
2536d10e4ef2Snarayan 
2537d10e4ef2Snarayan 	cv_destroy(&ldcp->drain_cv);
2538d10e4ef2Snarayan 
2539d10e4ef2Snarayan 	if (ldcp->rxh != NULL) {
2540d10e4ef2Snarayan 		if (vio_destroy_mblks(ldcp->rxh) != 0) {
2541d10e4ef2Snarayan 			/*
2542d10e4ef2Snarayan 			 * Something odd has happened, as the destroy
2543d10e4ef2Snarayan 			 * will only fail if some mblks have been allocated
2544d10e4ef2Snarayan 			 * from the pool already (which shouldn't happen)
2545d10e4ef2Snarayan 			 * and have not been returned.
2546d10e4ef2Snarayan 			 *
2547d10e4ef2Snarayan 			 * Add the pool pointer to a list maintained in
2548d10e4ef2Snarayan 			 * the device instance. Another attempt will be made
2549d10e4ef2Snarayan 			 * to free the pool when the device itself detaches.
2550d10e4ef2Snarayan 			 */
2551d10e4ef2Snarayan 			cmn_err(CE_WARN, "Creation of ldc channel %ld failed"
2552d10e4ef2Snarayan 				" and cannot destroy associated mblk pool",
2553d10e4ef2Snarayan 				ldc_id);
2554d10e4ef2Snarayan 			ldcp->rxh->nextp =  vswp->rxh;
2555d10e4ef2Snarayan 			vswp->rxh = ldcp->rxh;
2556d10e4ef2Snarayan 		}
2557d10e4ef2Snarayan 	}
2558d10e4ef2Snarayan 	mutex_destroy(&ldcp->drain_cv_lock);
2559d10e4ef2Snarayan 	mutex_destroy(&ldcp->hss_lock);
2560d10e4ef2Snarayan 
2561d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_in.seq_lock);
2562d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_out.seq_lock);
2563d10e4ef2Snarayan 	kmem_free(ldcp, sizeof (vsw_ldc_t));
2564d10e4ef2Snarayan 
2565d10e4ef2Snarayan 	return (1);
25661ae08745Sheppo }
25671ae08745Sheppo 
25681ae08745Sheppo /*
25691ae08745Sheppo  * Detach a logical domain channel (ldc) belonging to a
25701ae08745Sheppo  * particular port.
25711ae08745Sheppo  *
25721ae08745Sheppo  * Returns 0 on success, 1 on failure.
25731ae08745Sheppo  */
25741ae08745Sheppo static int
25751ae08745Sheppo vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id)
25761ae08745Sheppo {
25771ae08745Sheppo 	vsw_t 		*vswp = port->p_vswp;
25781ae08745Sheppo 	vsw_ldc_t 	*ldcp, *prev_ldcp;
25791ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
25801ae08745Sheppo 	int 		rv;
25811ae08745Sheppo 
25821ae08745Sheppo 	prev_ldcp = ldcl->head;
25831ae08745Sheppo 	for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) {
25841ae08745Sheppo 		if (ldcp->ldc_id == ldc_id) {
25851ae08745Sheppo 			break;
25861ae08745Sheppo 		}
25871ae08745Sheppo 	}
25881ae08745Sheppo 
25891ae08745Sheppo 	/* specified ldc id not found */
25901ae08745Sheppo 	if (ldcp == NULL) {
25911ae08745Sheppo 		DERR(vswp, "%s: ldcp = NULL", __func__);
25921ae08745Sheppo 		return (1);
25931ae08745Sheppo 	}
25941ae08745Sheppo 
25951ae08745Sheppo 	D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id);
25961ae08745Sheppo 
25971ae08745Sheppo 	/*
25981ae08745Sheppo 	 * Before we can close the channel we must release any mapped
25991ae08745Sheppo 	 * resources (e.g. drings).
26001ae08745Sheppo 	 */
26011ae08745Sheppo 	vsw_free_lane_resources(ldcp, INBOUND);
26021ae08745Sheppo 	vsw_free_lane_resources(ldcp, OUTBOUND);
26031ae08745Sheppo 
26041ae08745Sheppo 	/*
26051ae08745Sheppo 	 * If the close fails we are in serious trouble, as won't
26061ae08745Sheppo 	 * be able to delete the parent port.
26071ae08745Sheppo 	 */
26081ae08745Sheppo 	if ((rv = ldc_close(ldcp->ldc_handle)) != 0) {
26091ae08745Sheppo 		DERR(vswp, "%s: error %d closing channel %lld",
26101ae08745Sheppo 			__func__, rv, ldcp->ldc_id);
26111ae08745Sheppo 		return (1);
26121ae08745Sheppo 	}
26131ae08745Sheppo 
26141ae08745Sheppo 	(void) ldc_fini(ldcp->ldc_handle);
26151ae08745Sheppo 
26161ae08745Sheppo 	ldcp->ldc_status = LDC_INIT;
26171ae08745Sheppo 	ldcp->ldc_handle = NULL;
26181ae08745Sheppo 	ldcp->ldc_vswp = NULL;
2619d10e4ef2Snarayan 
2620d10e4ef2Snarayan 	if (ldcp->rxh != NULL) {
2621d10e4ef2Snarayan 		if (vio_destroy_mblks(ldcp->rxh)) {
2622d10e4ef2Snarayan 			/*
2623d10e4ef2Snarayan 			 * Mostly likely some mblks are still in use and
2624d10e4ef2Snarayan 			 * have not been returned to the pool. Add the pool
2625d10e4ef2Snarayan 			 * to the list maintained in the device instance.
2626d10e4ef2Snarayan 			 * Another attempt will be made to destroy the pool
2627d10e4ef2Snarayan 			 * when the device detaches.
2628d10e4ef2Snarayan 			 */
2629d10e4ef2Snarayan 			ldcp->rxh->nextp =  vswp->rxh;
2630d10e4ef2Snarayan 			vswp->rxh = ldcp->rxh;
2631d10e4ef2Snarayan 		}
2632d10e4ef2Snarayan 	}
2633d10e4ef2Snarayan 
26341ae08745Sheppo 	mutex_destroy(&ldcp->ldc_txlock);
26351ae08745Sheppo 	mutex_destroy(&ldcp->ldc_cblock);
26361ae08745Sheppo 	cv_destroy(&ldcp->drain_cv);
26371ae08745Sheppo 	mutex_destroy(&ldcp->drain_cv_lock);
26381ae08745Sheppo 	mutex_destroy(&ldcp->hss_lock);
2639d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_in.seq_lock);
2640d10e4ef2Snarayan 	mutex_destroy(&ldcp->lane_out.seq_lock);
26411ae08745Sheppo 
26421ae08745Sheppo 	/* unlink it from the list */
26431ae08745Sheppo 	prev_ldcp = ldcp->ldc_next;
26441ae08745Sheppo 	ldcl->num_ldcs--;
26451ae08745Sheppo 	kmem_free(ldcp, sizeof (vsw_ldc_t));
26461ae08745Sheppo 
26471ae08745Sheppo 	return (0);
26481ae08745Sheppo }
26491ae08745Sheppo 
26501ae08745Sheppo /*
26511ae08745Sheppo  * Open and attempt to bring up the channel. Note that channel
26521ae08745Sheppo  * can only be brought up if peer has also opened channel.
26531ae08745Sheppo  *
26541ae08745Sheppo  * Returns 0 if can open and bring up channel, otherwise
26551ae08745Sheppo  * returns 1.
26561ae08745Sheppo  */
26571ae08745Sheppo static int
26581ae08745Sheppo vsw_ldc_init(vsw_ldc_t *ldcp)
26591ae08745Sheppo {
26601ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
26611ae08745Sheppo 	ldc_status_t	istatus = 0;
26621ae08745Sheppo 	int		rv;
26631ae08745Sheppo 
26641ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
26651ae08745Sheppo 
26661ae08745Sheppo 	LDC_ENTER_LOCK(ldcp);
26671ae08745Sheppo 
26681ae08745Sheppo 	/* don't start at 0 in case clients don't like that */
26691ae08745Sheppo 	ldcp->next_ident = 1;
26701ae08745Sheppo 
26711ae08745Sheppo 	rv = ldc_open(ldcp->ldc_handle);
26721ae08745Sheppo 	if (rv != 0) {
26731ae08745Sheppo 		DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)",
26741ae08745Sheppo 		    __func__, ldcp->ldc_id, rv);
26751ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
26761ae08745Sheppo 		return (1);
26771ae08745Sheppo 	}
26781ae08745Sheppo 
26791ae08745Sheppo 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
26801ae08745Sheppo 		DERR(vswp, "%s: unable to get status", __func__);
26811ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
26821ae08745Sheppo 		return (1);
26831ae08745Sheppo 
26841ae08745Sheppo 	} else if (istatus != LDC_OPEN && istatus != LDC_READY) {
26851ae08745Sheppo 		DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY",
26861ae08745Sheppo 		    __func__, ldcp->ldc_id, istatus);
26871ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
26881ae08745Sheppo 		return (1);
26891ae08745Sheppo 	}
26901ae08745Sheppo 
26911ae08745Sheppo 	ldcp->ldc_status = istatus;
26921ae08745Sheppo 	rv = ldc_up(ldcp->ldc_handle);
26931ae08745Sheppo 	if (rv != 0) {
26941ae08745Sheppo 		/*
26951ae08745Sheppo 		 * Not a fatal error for ldc_up() to fail, as peer
26961ae08745Sheppo 		 * end point may simply not be ready yet.
26971ae08745Sheppo 		 */
26981ae08745Sheppo 		D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__,
26991ae08745Sheppo 			ldcp->ldc_id, rv);
27001ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
27011ae08745Sheppo 		return (1);
27021ae08745Sheppo 	}
27031ae08745Sheppo 
27041ae08745Sheppo 	/*
27051ae08745Sheppo 	 * ldc_up() call is non-blocking so need to explicitly
27061ae08745Sheppo 	 * check channel status to see if in fact the channel
27071ae08745Sheppo 	 * is UP.
27081ae08745Sheppo 	 */
27091ae08745Sheppo 	if (ldc_status(ldcp->ldc_handle, &istatus) != 0) {
27101ae08745Sheppo 		DERR(vswp, "%s: unable to get status", __func__);
27111ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
27121ae08745Sheppo 		return (1);
27131ae08745Sheppo 
27141ae08745Sheppo 	} else if (istatus != LDC_UP) {
27151ae08745Sheppo 		DERR(vswp, "%s: id(%lld) status(%d) is not UP",
27161ae08745Sheppo 		    __func__, ldcp->ldc_id, istatus);
27171ae08745Sheppo 	} else {
27181ae08745Sheppo 		ldcp->ldc_status = istatus;
27191ae08745Sheppo 	}
27201ae08745Sheppo 
27211ae08745Sheppo 	LDC_EXIT_LOCK(ldcp);
27221ae08745Sheppo 
27231ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
27241ae08745Sheppo 	return (0);
27251ae08745Sheppo }
27261ae08745Sheppo 
27271ae08745Sheppo /* disable callbacks on the channel */
27281ae08745Sheppo static int
27291ae08745Sheppo vsw_ldc_uninit(vsw_ldc_t *ldcp)
27301ae08745Sheppo {
27311ae08745Sheppo 	vsw_t	*vswp = ldcp->ldc_vswp;
27321ae08745Sheppo 	int	rv;
27331ae08745Sheppo 
27341ae08745Sheppo 	D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id);
27351ae08745Sheppo 
27361ae08745Sheppo 	LDC_ENTER_LOCK(ldcp);
27371ae08745Sheppo 
27381ae08745Sheppo 	rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE);
27391ae08745Sheppo 	if (rv != 0) {
27401ae08745Sheppo 		DERR(vswp, "vsw_ldc_uninit(%lld): error disabling "
27411ae08745Sheppo 			"interrupts (rv = %d)\n", ldcp->ldc_id, rv);
27421ae08745Sheppo 		LDC_EXIT_LOCK(ldcp);
27431ae08745Sheppo 		return (1);
27441ae08745Sheppo 	}
27451ae08745Sheppo 
27461ae08745Sheppo 	ldcp->ldc_status = LDC_INIT;
27471ae08745Sheppo 
27481ae08745Sheppo 	LDC_EXIT_LOCK(ldcp);
27491ae08745Sheppo 
27501ae08745Sheppo 	D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id);
27511ae08745Sheppo 
27521ae08745Sheppo 	return (0);
27531ae08745Sheppo }
27541ae08745Sheppo 
27551ae08745Sheppo static int
27561ae08745Sheppo vsw_init_ldcs(vsw_port_t *port)
27571ae08745Sheppo {
27581ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
27591ae08745Sheppo 	vsw_ldc_t	*ldcp;
27601ae08745Sheppo 
27611ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
27621ae08745Sheppo 	ldcp =  ldcl->head;
27631ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
27641ae08745Sheppo 		(void) vsw_ldc_init(ldcp);
27651ae08745Sheppo 	}
27661ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
27671ae08745Sheppo 
27681ae08745Sheppo 	return (0);
27691ae08745Sheppo }
27701ae08745Sheppo 
27711ae08745Sheppo static int
27721ae08745Sheppo vsw_uninit_ldcs(vsw_port_t *port)
27731ae08745Sheppo {
27741ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
27751ae08745Sheppo 	vsw_ldc_t	*ldcp;
27761ae08745Sheppo 
27771ae08745Sheppo 	D1(NULL, "vsw_uninit_ldcs: enter\n");
27781ae08745Sheppo 
27791ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
27801ae08745Sheppo 	ldcp =  ldcl->head;
27811ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
27821ae08745Sheppo 		(void) vsw_ldc_uninit(ldcp);
27831ae08745Sheppo 	}
27841ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
27851ae08745Sheppo 
27861ae08745Sheppo 	D1(NULL, "vsw_uninit_ldcs: exit\n");
27871ae08745Sheppo 
27881ae08745Sheppo 	return (0);
27891ae08745Sheppo }
27901ae08745Sheppo 
27911ae08745Sheppo /*
27921ae08745Sheppo  * Wait until the callback(s) associated with the ldcs under the specified
27931ae08745Sheppo  * port have completed.
27941ae08745Sheppo  *
27951ae08745Sheppo  * Prior to this function being invoked each channel under this port
27961ae08745Sheppo  * should have been quiesced via ldc_set_cb_mode(DISABLE).
27971ae08745Sheppo  *
27981ae08745Sheppo  * A short explaination of what we are doing below..
27991ae08745Sheppo  *
28001ae08745Sheppo  * The simplest approach would be to have a reference counter in
28011ae08745Sheppo  * the ldc structure which is increment/decremented by the callbacks as
28021ae08745Sheppo  * they use the channel. The drain function could then simply disable any
28031ae08745Sheppo  * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately
28041ae08745Sheppo  * there is a tiny window here - before the callback is able to get the lock
28051ae08745Sheppo  * on the channel it is interrupted and this function gets to execute. It
28061ae08745Sheppo  * sees that the ref count is zero and believes its free to delete the
28071ae08745Sheppo  * associated data structures.
28081ae08745Sheppo  *
28091ae08745Sheppo  * We get around this by taking advantage of the fact that before the ldc
28101ae08745Sheppo  * framework invokes a callback it sets a flag to indicate that there is a
28111ae08745Sheppo  * callback active (or about to become active). If when we attempt to
28121ae08745Sheppo  * unregister a callback when this active flag is set then the unregister
28131ae08745Sheppo  * will fail with EWOULDBLOCK.
28141ae08745Sheppo  *
28151ae08745Sheppo  * If the unregister fails we do a cv_timedwait. We will either be signaled
28161ae08745Sheppo  * by the callback as it is exiting (note we have to wait a short period to
28171ae08745Sheppo  * allow the callback to return fully to the ldc framework and it to clear
28181ae08745Sheppo  * the active flag), or by the timer expiring. In either case we again attempt
28191ae08745Sheppo  * the unregister. We repeat this until we can succesfully unregister the
28201ae08745Sheppo  * callback.
28211ae08745Sheppo  *
28221ae08745Sheppo  * The reason we use a cv_timedwait rather than a simple cv_wait is to catch
28231ae08745Sheppo  * the case where the callback has finished but the ldc framework has not yet
28241ae08745Sheppo  * cleared the active flag. In this case we would never get a cv_signal.
28251ae08745Sheppo  */
28261ae08745Sheppo static int
28271ae08745Sheppo vsw_drain_ldcs(vsw_port_t *port)
28281ae08745Sheppo {
28291ae08745Sheppo 	vsw_ldc_list_t	*ldcl = &port->p_ldclist;
28301ae08745Sheppo 	vsw_ldc_t	*ldcp;
28311ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
28321ae08745Sheppo 
28331ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
28341ae08745Sheppo 
28351ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
28361ae08745Sheppo 
28371ae08745Sheppo 	ldcp = ldcl->head;
28381ae08745Sheppo 
28391ae08745Sheppo 	for (; ldcp  != NULL; ldcp = ldcp->ldc_next) {
28401ae08745Sheppo 		/*
28411ae08745Sheppo 		 * If we can unregister the channel callback then we
28421ae08745Sheppo 		 * know that there is no callback either running or
28431ae08745Sheppo 		 * scheduled to run for this channel so move on to next
28441ae08745Sheppo 		 * channel in the list.
28451ae08745Sheppo 		 */
28461ae08745Sheppo 		mutex_enter(&ldcp->drain_cv_lock);
28471ae08745Sheppo 
28481ae08745Sheppo 		/* prompt active callbacks to quit */
28491ae08745Sheppo 		ldcp->drain_state = VSW_LDC_DRAINING;
28501ae08745Sheppo 
28511ae08745Sheppo 		if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) {
28521ae08745Sheppo 			D2(vswp, "%s: unreg callback for chan %ld", __func__,
28531ae08745Sheppo 				ldcp->ldc_id);
28541ae08745Sheppo 			mutex_exit(&ldcp->drain_cv_lock);
28551ae08745Sheppo 			continue;
28561ae08745Sheppo 		} else {
28571ae08745Sheppo 			/*
28581ae08745Sheppo 			 * If we end up here we know that either 1) a callback
28591ae08745Sheppo 			 * is currently executing, 2) is about to start (i.e.
28601ae08745Sheppo 			 * the ldc framework has set the active flag but
28611ae08745Sheppo 			 * has not actually invoked the callback yet, or 3)
28621ae08745Sheppo 			 * has finished and has returned to the ldc framework
28631ae08745Sheppo 			 * but the ldc framework has not yet cleared the
28641ae08745Sheppo 			 * active bit.
28651ae08745Sheppo 			 *
28661ae08745Sheppo 			 * Wait for it to finish.
28671ae08745Sheppo 			 */
28681ae08745Sheppo 			while (ldc_unreg_callback(ldcp->ldc_handle)
28691ae08745Sheppo 								== EWOULDBLOCK)
28701ae08745Sheppo 				(void) cv_timedwait(&ldcp->drain_cv,
28711ae08745Sheppo 					&ldcp->drain_cv_lock, lbolt + hz);
28721ae08745Sheppo 
28731ae08745Sheppo 			mutex_exit(&ldcp->drain_cv_lock);
28741ae08745Sheppo 			D2(vswp, "%s: unreg callback for chan %ld after "
28751ae08745Sheppo 				"timeout", __func__, ldcp->ldc_id);
28761ae08745Sheppo 		}
28771ae08745Sheppo 	}
28781ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
28791ae08745Sheppo 
28801ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
28811ae08745Sheppo 	return (0);
28821ae08745Sheppo }
28831ae08745Sheppo 
28841ae08745Sheppo /*
28851ae08745Sheppo  * Wait until all tasks which reference this port have completed.
28861ae08745Sheppo  *
28871ae08745Sheppo  * Prior to this function being invoked each channel under this port
28881ae08745Sheppo  * should have been quiesced via ldc_set_cb_mode(DISABLE).
28891ae08745Sheppo  */
28901ae08745Sheppo static int
28911ae08745Sheppo vsw_drain_port_taskq(vsw_port_t *port)
28921ae08745Sheppo {
28931ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
28941ae08745Sheppo 
28951ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
28961ae08745Sheppo 
28971ae08745Sheppo 	/*
28981ae08745Sheppo 	 * Mark the port as in the process of being detached, and
28991ae08745Sheppo 	 * dispatch a marker task to the queue so we know when all
29001ae08745Sheppo 	 * relevant tasks have completed.
29011ae08745Sheppo 	 */
29021ae08745Sheppo 	mutex_enter(&port->state_lock);
29031ae08745Sheppo 	port->state = VSW_PORT_DETACHING;
29041ae08745Sheppo 
29051ae08745Sheppo 	if ((vswp->taskq_p == NULL) ||
29061ae08745Sheppo 		(ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task,
29071ae08745Sheppo 			port, DDI_NOSLEEP) != DDI_SUCCESS)) {
29081ae08745Sheppo 		DERR(vswp, "%s: unable to dispatch marker task",
29091ae08745Sheppo 			__func__);
29101ae08745Sheppo 		mutex_exit(&port->state_lock);
29111ae08745Sheppo 		return (1);
29121ae08745Sheppo 	}
29131ae08745Sheppo 
29141ae08745Sheppo 	/*
29151ae08745Sheppo 	 * Wait for the marker task to finish.
29161ae08745Sheppo 	 */
29171ae08745Sheppo 	while (port->state != VSW_PORT_DETACHABLE)
29181ae08745Sheppo 		cv_wait(&port->state_cv, &port->state_lock);
29191ae08745Sheppo 
29201ae08745Sheppo 	mutex_exit(&port->state_lock);
29211ae08745Sheppo 
29221ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
29231ae08745Sheppo 
29241ae08745Sheppo 	return (0);
29251ae08745Sheppo }
29261ae08745Sheppo 
29271ae08745Sheppo static void
29281ae08745Sheppo vsw_marker_task(void *arg)
29291ae08745Sheppo {
29301ae08745Sheppo 	vsw_port_t	*port = arg;
29311ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
29321ae08745Sheppo 
29331ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
29341ae08745Sheppo 
29351ae08745Sheppo 	mutex_enter(&port->state_lock);
29361ae08745Sheppo 
29371ae08745Sheppo 	/*
29381ae08745Sheppo 	 * No further tasks should be dispatched which reference
29391ae08745Sheppo 	 * this port so ok to mark it as safe to detach.
29401ae08745Sheppo 	 */
29411ae08745Sheppo 	port->state = VSW_PORT_DETACHABLE;
29421ae08745Sheppo 
29431ae08745Sheppo 	cv_signal(&port->state_cv);
29441ae08745Sheppo 
29451ae08745Sheppo 	mutex_exit(&port->state_lock);
29461ae08745Sheppo 
29471ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
29481ae08745Sheppo }
29491ae08745Sheppo 
29501ae08745Sheppo static vsw_port_t *
29511ae08745Sheppo vsw_lookup_port(vsw_t *vswp, int p_instance)
29521ae08745Sheppo {
29531ae08745Sheppo 	vsw_port_list_t *plist = &vswp->plist;
29541ae08745Sheppo 	vsw_port_t	*port;
29551ae08745Sheppo 
29561ae08745Sheppo 	for (port = plist->head; port != NULL; port = port->p_next) {
29571ae08745Sheppo 		if (port->p_instance == p_instance) {
29581ae08745Sheppo 			D2(vswp, "vsw_lookup_port: found p_instance\n");
29591ae08745Sheppo 			return (port);
29601ae08745Sheppo 		}
29611ae08745Sheppo 	}
29621ae08745Sheppo 
29631ae08745Sheppo 	return (NULL);
29641ae08745Sheppo }
29651ae08745Sheppo 
29661ae08745Sheppo /*
29671ae08745Sheppo  * Search for and remove the specified port from the port
29681ae08745Sheppo  * list. Returns 0 if able to locate and remove port, otherwise
29691ae08745Sheppo  * returns 1.
29701ae08745Sheppo  */
29711ae08745Sheppo static int
29721ae08745Sheppo vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port)
29731ae08745Sheppo {
29741ae08745Sheppo 	vsw_port_list_t *plist = &vswp->plist;
29751ae08745Sheppo 	vsw_port_t	*curr_p, *prev_p;
29761ae08745Sheppo 
29771ae08745Sheppo 	if (plist->head == NULL)
29781ae08745Sheppo 		return (1);
29791ae08745Sheppo 
29801ae08745Sheppo 	curr_p = prev_p = plist->head;
29811ae08745Sheppo 
29821ae08745Sheppo 	while (curr_p != NULL) {
29831ae08745Sheppo 		if (curr_p == port) {
29841ae08745Sheppo 			if (prev_p == curr_p) {
29851ae08745Sheppo 				plist->head = curr_p->p_next;
29861ae08745Sheppo 			} else {
29871ae08745Sheppo 				prev_p->p_next = curr_p->p_next;
29881ae08745Sheppo 			}
29891ae08745Sheppo 			plist->num_ports--;
29901ae08745Sheppo 			break;
29911ae08745Sheppo 		} else {
29921ae08745Sheppo 			prev_p = curr_p;
29931ae08745Sheppo 			curr_p = curr_p->p_next;
29941ae08745Sheppo 		}
29951ae08745Sheppo 	}
29961ae08745Sheppo 	return (0);
29971ae08745Sheppo }
29981ae08745Sheppo 
29991ae08745Sheppo /*
30001ae08745Sheppo  * Interrupt handler for ldc messages.
30011ae08745Sheppo  */
30021ae08745Sheppo static uint_t
30031ae08745Sheppo vsw_ldc_cb(uint64_t event, caddr_t arg)
30041ae08745Sheppo {
30051ae08745Sheppo 	vsw_ldc_t	*ldcp = (vsw_ldc_t  *)arg;
30061ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
30071ae08745Sheppo 	ldc_status_t	lstatus;
30081ae08745Sheppo 	int		rv;
30091ae08745Sheppo 
30101ae08745Sheppo 	D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
30111ae08745Sheppo 
30121ae08745Sheppo 	mutex_enter(&ldcp->ldc_cblock);
30131ae08745Sheppo 
30141ae08745Sheppo 	if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) {
30151ae08745Sheppo 		mutex_exit(&ldcp->ldc_cblock);
30161ae08745Sheppo 		return (LDC_SUCCESS);
30171ae08745Sheppo 	}
30181ae08745Sheppo 
30191ae08745Sheppo 	if (event & LDC_EVT_UP) {
30201ae08745Sheppo 		/*
30211ae08745Sheppo 		 * Channel has come up, get the state and then start
30221ae08745Sheppo 		 * the handshake.
30231ae08745Sheppo 		 */
30241ae08745Sheppo 		rv = ldc_status(ldcp->ldc_handle, &lstatus);
30251ae08745Sheppo 		if (rv != 0) {
30261ae08745Sheppo 			cmn_err(CE_WARN, "Unable to read channel state");
30271ae08745Sheppo 		}
30281ae08745Sheppo 		ldcp->ldc_status = lstatus;
30291ae08745Sheppo 
30301ae08745Sheppo 		D2(vswp, "%s: id(%ld) event(%llx) UP:  status(%ld)",
30311ae08745Sheppo 			__func__, ldcp->ldc_id, event, ldcp->ldc_status);
30321ae08745Sheppo 
30331ae08745Sheppo 		vsw_restart_handshake(ldcp);
30341ae08745Sheppo 
30351ae08745Sheppo 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
30361ae08745Sheppo 	}
30371ae08745Sheppo 
30381ae08745Sheppo 	if (event & LDC_EVT_READ) {
30391ae08745Sheppo 		/*
30401ae08745Sheppo 		 * Data available for reading.
30411ae08745Sheppo 		 */
30421ae08745Sheppo 		D2(vswp, "%s: id(ld) event(%llx) data READ",
30431ae08745Sheppo 				__func__, ldcp->ldc_id, event);
30441ae08745Sheppo 
30451ae08745Sheppo 		vsw_process_pkt(ldcp);
30461ae08745Sheppo 
30471ae08745Sheppo 		ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0);
30481ae08745Sheppo 
30491ae08745Sheppo 		goto vsw_cb_exit;
30501ae08745Sheppo 	}
30511ae08745Sheppo 
30521ae08745Sheppo 	if (event & LDC_EVT_RESET) {
30531ae08745Sheppo 		rv = ldc_status(ldcp->ldc_handle, &lstatus);
30541ae08745Sheppo 		if (rv != 0) {
30551ae08745Sheppo 			cmn_err(CE_WARN, "Unable to read channel state");
30561ae08745Sheppo 		} else {
30571ae08745Sheppo 			ldcp->ldc_status = lstatus;
30581ae08745Sheppo 		}
30591ae08745Sheppo 		D2(vswp, "%s: id(%ld) event(%llx) RESET:  status (%ld)",
30601ae08745Sheppo 			__func__, ldcp->ldc_id, event, ldcp->ldc_status);
30611ae08745Sheppo 	}
30621ae08745Sheppo 
30631ae08745Sheppo 	if (event & LDC_EVT_DOWN) {
30641ae08745Sheppo 		rv = ldc_status(ldcp->ldc_handle, &lstatus);
30651ae08745Sheppo 		if (rv != 0) {
30661ae08745Sheppo 			cmn_err(CE_WARN, "Unable to read channel state");
30671ae08745Sheppo 		} else {
30681ae08745Sheppo 			ldcp->ldc_status = lstatus;
30691ae08745Sheppo 		}
30701ae08745Sheppo 
30711ae08745Sheppo 		D2(vswp, "%s: id(%ld) event(%llx) DOWN:  status (%ld)",
30721ae08745Sheppo 			__func__, ldcp->ldc_id, event, ldcp->ldc_status);
30731ae08745Sheppo 
30741ae08745Sheppo 	}
30751ae08745Sheppo 
30761ae08745Sheppo 	/*
30771ae08745Sheppo 	 * Catch either LDC_EVT_WRITE which we don't support or any
30781ae08745Sheppo 	 * unknown event.
30791ae08745Sheppo 	 */
30801ae08745Sheppo 	if (event & ~(LDC_EVT_UP | LDC_EVT_RESET
30811ae08745Sheppo 					| LDC_EVT_DOWN | LDC_EVT_READ)) {
30821ae08745Sheppo 
30831ae08745Sheppo 		DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)",
30841ae08745Sheppo 			__func__, ldcp->ldc_id, event, ldcp->ldc_status);
30851ae08745Sheppo 	}
30861ae08745Sheppo 
30871ae08745Sheppo vsw_cb_exit:
30881ae08745Sheppo 	mutex_exit(&ldcp->ldc_cblock);
30891ae08745Sheppo 
30901ae08745Sheppo 	/*
30911ae08745Sheppo 	 * Let the drain function know we are finishing if it
30921ae08745Sheppo 	 * is waiting.
30931ae08745Sheppo 	 */
30941ae08745Sheppo 	mutex_enter(&ldcp->drain_cv_lock);
30951ae08745Sheppo 	if (ldcp->drain_state == VSW_LDC_DRAINING)
30961ae08745Sheppo 		cv_signal(&ldcp->drain_cv);
30971ae08745Sheppo 	mutex_exit(&ldcp->drain_cv_lock);
30981ae08745Sheppo 
30991ae08745Sheppo 	return (LDC_SUCCESS);
31001ae08745Sheppo }
31011ae08745Sheppo 
31021ae08745Sheppo /*
31031ae08745Sheppo  * (Re)start a handshake with our peer by sending them
31041ae08745Sheppo  * our version info.
31051ae08745Sheppo  */
31061ae08745Sheppo static void
31071ae08745Sheppo vsw_restart_handshake(vsw_ldc_t *ldcp)
31081ae08745Sheppo {
31091ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
31101ae08745Sheppo 	vsw_port_t	*port;
31111ae08745Sheppo 	vsw_ldc_list_t	*ldcl;
31121ae08745Sheppo 
31131ae08745Sheppo 	D1(vswp, "vsw_restart_handshake: enter");
31141ae08745Sheppo 
31151ae08745Sheppo 	port = ldcp->ldc_port;
31161ae08745Sheppo 	ldcl = &port->p_ldclist;
31171ae08745Sheppo 
31181ae08745Sheppo 	WRITE_ENTER(&ldcl->lockrw);
31191ae08745Sheppo 
31201ae08745Sheppo 	D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__,
31211ae08745Sheppo 		ldcp->lane_in.lstate, ldcp->lane_out.lstate);
31221ae08745Sheppo 
31231ae08745Sheppo 	vsw_free_lane_resources(ldcp, INBOUND);
31241ae08745Sheppo 	vsw_free_lane_resources(ldcp, OUTBOUND);
31251ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
31261ae08745Sheppo 
31271ae08745Sheppo 	ldcp->lane_in.lstate = 0;
31281ae08745Sheppo 	ldcp->lane_out.lstate = 0;
31291ae08745Sheppo 
31301ae08745Sheppo 	/*
31311ae08745Sheppo 	 * Remove parent port from any multicast groups
31321ae08745Sheppo 	 * it may have registered with. Client must resend
31331ae08745Sheppo 	 * multicast add command after handshake completes.
31341ae08745Sheppo 	 */
31351ae08745Sheppo 	(void) vsw_del_fdb(vswp, port);
31361ae08745Sheppo 
31371ae08745Sheppo 	vsw_del_mcst_port(port);
31381ae08745Sheppo 
31391ae08745Sheppo 	ldcp->hphase = VSW_MILESTONE0;
31401ae08745Sheppo 
31411ae08745Sheppo 	ldcp->peer_session = 0;
31421ae08745Sheppo 	ldcp->session_status = 0;
31431ae08745Sheppo 
31441ae08745Sheppo 	/*
31451ae08745Sheppo 	 * We now increment the transaction group id. This allows
31461ae08745Sheppo 	 * us to identify and disard any tasks which are still pending
31471ae08745Sheppo 	 * on the taskq and refer to the handshake session we are about
31481ae08745Sheppo 	 * to restart. These stale messages no longer have any real
31491ae08745Sheppo 	 * meaning.
31501ae08745Sheppo 	 */
31511ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
31521ae08745Sheppo 	ldcp->hss_id++;
31531ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
31541ae08745Sheppo 
31551ae08745Sheppo 	if (ldcp->hcnt++ > vsw_num_handshakes) {
31561ae08745Sheppo 		cmn_err(CE_WARN, "exceeded number of permitted "
31571ae08745Sheppo 			"handshake attempts (%d) on channel %ld",
31581ae08745Sheppo 			ldcp->hcnt, ldcp->ldc_id);
31591ae08745Sheppo 		return;
31601ae08745Sheppo 	}
31611ae08745Sheppo 
31621ae08745Sheppo 	vsw_send_ver(ldcp);
31631ae08745Sheppo 
31641ae08745Sheppo 	D1(vswp, "vsw_restart_handshake: exit");
31651ae08745Sheppo }
31661ae08745Sheppo 
31671ae08745Sheppo /*
31681ae08745Sheppo  * returns 0 if legal for event signified by flag to have
31691ae08745Sheppo  * occured at the time it did. Otherwise returns 1.
31701ae08745Sheppo  */
31711ae08745Sheppo int
31721ae08745Sheppo vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag)
31731ae08745Sheppo {
31741ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
31751ae08745Sheppo 	uint64_t	state;
31761ae08745Sheppo 	uint64_t	phase;
31771ae08745Sheppo 
31781ae08745Sheppo 	if (dir == INBOUND)
31791ae08745Sheppo 		state = ldcp->lane_in.lstate;
31801ae08745Sheppo 	else
31811ae08745Sheppo 		state = ldcp->lane_out.lstate;
31821ae08745Sheppo 
31831ae08745Sheppo 	phase = ldcp->hphase;
31841ae08745Sheppo 
31851ae08745Sheppo 	switch (flag) {
31861ae08745Sheppo 	case VSW_VER_INFO_RECV:
31871ae08745Sheppo 		if (phase > VSW_MILESTONE0) {
31881ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV"
31891ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
31901ae08745Sheppo 			vsw_restart_handshake(ldcp);
31911ae08745Sheppo 			return (1);
31921ae08745Sheppo 		}
31931ae08745Sheppo 		break;
31941ae08745Sheppo 
31951ae08745Sheppo 	case VSW_VER_ACK_RECV:
31961ae08745Sheppo 	case VSW_VER_NACK_RECV:
31971ae08745Sheppo 		if (!(state & VSW_VER_INFO_SENT)) {
31981ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK"
31991ae08745Sheppo 				" or VER_NACK when in state %d\n",
32001ae08745Sheppo 				ldcp->ldc_id, phase);
32011ae08745Sheppo 			vsw_restart_handshake(ldcp);
32021ae08745Sheppo 			return (1);
32031ae08745Sheppo 		} else
32041ae08745Sheppo 			state &= ~VSW_VER_INFO_SENT;
32051ae08745Sheppo 		break;
32061ae08745Sheppo 
32071ae08745Sheppo 	case VSW_ATTR_INFO_RECV:
32081ae08745Sheppo 		if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) {
32091ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV"
32101ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
32111ae08745Sheppo 			vsw_restart_handshake(ldcp);
32121ae08745Sheppo 			return (1);
32131ae08745Sheppo 		}
32141ae08745Sheppo 		break;
32151ae08745Sheppo 
32161ae08745Sheppo 	case VSW_ATTR_ACK_RECV:
32171ae08745Sheppo 	case VSW_ATTR_NACK_RECV:
32181ae08745Sheppo 		if (!(state & VSW_ATTR_INFO_SENT)) {
32191ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK"
32201ae08745Sheppo 				" or ATTR_NACK when in state %d\n",
32211ae08745Sheppo 				ldcp->ldc_id, phase);
32221ae08745Sheppo 			vsw_restart_handshake(ldcp);
32231ae08745Sheppo 			return (1);
32241ae08745Sheppo 		} else
32251ae08745Sheppo 			state &= ~VSW_ATTR_INFO_SENT;
32261ae08745Sheppo 		break;
32271ae08745Sheppo 
32281ae08745Sheppo 	case VSW_DRING_INFO_RECV:
32291ae08745Sheppo 		if (phase < VSW_MILESTONE1) {
32301ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV"
32311ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
32321ae08745Sheppo 			vsw_restart_handshake(ldcp);
32331ae08745Sheppo 			return (1);
32341ae08745Sheppo 		}
32351ae08745Sheppo 		break;
32361ae08745Sheppo 
32371ae08745Sheppo 	case VSW_DRING_ACK_RECV:
32381ae08745Sheppo 	case VSW_DRING_NACK_RECV:
32391ae08745Sheppo 		if (!(state & VSW_DRING_INFO_SENT)) {
32401ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK"
32411ae08745Sheppo 				" or DRING_NACK when in state %d\n",
32421ae08745Sheppo 				ldcp->ldc_id, phase);
32431ae08745Sheppo 			vsw_restart_handshake(ldcp);
32441ae08745Sheppo 			return (1);
32451ae08745Sheppo 		} else
32461ae08745Sheppo 			state &= ~VSW_DRING_INFO_SENT;
32471ae08745Sheppo 		break;
32481ae08745Sheppo 
32491ae08745Sheppo 	case VSW_RDX_INFO_RECV:
32501ae08745Sheppo 		if (phase < VSW_MILESTONE3) {
32511ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV"
32521ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
32531ae08745Sheppo 			vsw_restart_handshake(ldcp);
32541ae08745Sheppo 			return (1);
32551ae08745Sheppo 		}
32561ae08745Sheppo 		break;
32571ae08745Sheppo 
32581ae08745Sheppo 	case VSW_RDX_ACK_RECV:
32591ae08745Sheppo 	case VSW_RDX_NACK_RECV:
32601ae08745Sheppo 		if (!(state & VSW_RDX_INFO_SENT)) {
32611ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK"
32621ae08745Sheppo 				" or RDX_NACK when in state %d\n",
32631ae08745Sheppo 				ldcp->ldc_id, phase);
32641ae08745Sheppo 			vsw_restart_handshake(ldcp);
32651ae08745Sheppo 			return (1);
32661ae08745Sheppo 		} else
32671ae08745Sheppo 			state &= ~VSW_RDX_INFO_SENT;
32681ae08745Sheppo 		break;
32691ae08745Sheppo 
32701ae08745Sheppo 	case VSW_MCST_INFO_RECV:
32711ae08745Sheppo 		if (phase < VSW_MILESTONE3) {
32721ae08745Sheppo 			DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV"
32731ae08745Sheppo 				" when in state %d\n", ldcp->ldc_id, phase);
32741ae08745Sheppo 			vsw_restart_handshake(ldcp);
32751ae08745Sheppo 			return (1);
32761ae08745Sheppo 		}
32771ae08745Sheppo 		break;
32781ae08745Sheppo 
32791ae08745Sheppo 	default:
32801ae08745Sheppo 		DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)",
32811ae08745Sheppo 				ldcp->ldc_id, flag);
32821ae08745Sheppo 		return (1);
32831ae08745Sheppo 	}
32841ae08745Sheppo 
32851ae08745Sheppo 	if (dir == INBOUND)
32861ae08745Sheppo 		ldcp->lane_in.lstate = state;
32871ae08745Sheppo 	else
32881ae08745Sheppo 		ldcp->lane_out.lstate = state;
32891ae08745Sheppo 
32901ae08745Sheppo 	D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id);
32911ae08745Sheppo 
32921ae08745Sheppo 	return (0);
32931ae08745Sheppo }
32941ae08745Sheppo 
32951ae08745Sheppo void
32961ae08745Sheppo vsw_next_milestone(vsw_ldc_t *ldcp)
32971ae08745Sheppo {
32981ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
32991ae08745Sheppo 
33001ae08745Sheppo 	D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__,
33011ae08745Sheppo 		ldcp->ldc_id, ldcp->hphase);
33021ae08745Sheppo 
33031ae08745Sheppo 	DUMP_FLAGS(ldcp->lane_in.lstate);
33041ae08745Sheppo 	DUMP_FLAGS(ldcp->lane_out.lstate);
33051ae08745Sheppo 
33061ae08745Sheppo 	switch (ldcp->hphase) {
33071ae08745Sheppo 
33081ae08745Sheppo 	case VSW_MILESTONE0:
33091ae08745Sheppo 		/*
33101ae08745Sheppo 		 * If we haven't started to handshake with our peer,
33111ae08745Sheppo 		 * start to do so now.
33121ae08745Sheppo 		 */
33131ae08745Sheppo 		if (ldcp->lane_out.lstate == 0) {
33141ae08745Sheppo 			D2(vswp, "%s: (chan %lld) starting handshake "
33151ae08745Sheppo 				"with peer", __func__, ldcp->ldc_id);
33161ae08745Sheppo 			vsw_restart_handshake(ldcp);
33171ae08745Sheppo 		}
33181ae08745Sheppo 
33191ae08745Sheppo 		/*
33201ae08745Sheppo 		 * Only way to pass this milestone is to have successfully
33211ae08745Sheppo 		 * negotiated version info.
33221ae08745Sheppo 		 */
33231ae08745Sheppo 		if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) &&
33241ae08745Sheppo 			(ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) {
33251ae08745Sheppo 
33261ae08745Sheppo 			D2(vswp, "%s: (chan %lld) leaving milestone 0",
33271ae08745Sheppo 				__func__, ldcp->ldc_id);
33281ae08745Sheppo 
33291ae08745Sheppo 			/*
33301ae08745Sheppo 			 * Next milestone is passed when attribute
33311ae08745Sheppo 			 * information has been successfully exchanged.
33321ae08745Sheppo 			 */
33331ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE1;
33341ae08745Sheppo 			vsw_send_attr(ldcp);
33351ae08745Sheppo 
33361ae08745Sheppo 		}
33371ae08745Sheppo 		break;
33381ae08745Sheppo 
33391ae08745Sheppo 	case VSW_MILESTONE1:
33401ae08745Sheppo 		/*
33411ae08745Sheppo 		 * Only way to pass this milestone is to have successfully
33421ae08745Sheppo 		 * negotiated attribute information.
33431ae08745Sheppo 		 */
33441ae08745Sheppo 		if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) {
33451ae08745Sheppo 
33461ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE2;
33471ae08745Sheppo 
33481ae08745Sheppo 			/*
33491ae08745Sheppo 			 * If the peer device has said it wishes to
33501ae08745Sheppo 			 * use descriptor rings then we send it our ring
33511ae08745Sheppo 			 * info, otherwise we just set up a private ring
33521ae08745Sheppo 			 * which we use an internal buffer
33531ae08745Sheppo 			 */
33541ae08745Sheppo 			if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE)
33551ae08745Sheppo 				vsw_send_dring_info(ldcp);
33561ae08745Sheppo 		}
33571ae08745Sheppo 		break;
33581ae08745Sheppo 
33591ae08745Sheppo 
33601ae08745Sheppo 	case VSW_MILESTONE2:
33611ae08745Sheppo 		/*
33621ae08745Sheppo 		 * If peer has indicated in its attribute message that
33631ae08745Sheppo 		 * it wishes to use descriptor rings then the only way
33641ae08745Sheppo 		 * to pass this milestone is for us to have received
33651ae08745Sheppo 		 * valid dring info.
33661ae08745Sheppo 		 *
33671ae08745Sheppo 		 * If peer is not using descriptor rings then just fall
33681ae08745Sheppo 		 * through.
33691ae08745Sheppo 		 */
33701ae08745Sheppo 		if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) &&
33711ae08745Sheppo 			(!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT)))
33721ae08745Sheppo 			break;
33731ae08745Sheppo 
33741ae08745Sheppo 		D2(vswp, "%s: (chan %lld) leaving milestone 2",
33751ae08745Sheppo 				__func__, ldcp->ldc_id);
33761ae08745Sheppo 
33771ae08745Sheppo 		ldcp->hphase = VSW_MILESTONE3;
33781ae08745Sheppo 		vsw_send_rdx(ldcp);
33791ae08745Sheppo 		break;
33801ae08745Sheppo 
33811ae08745Sheppo 	case VSW_MILESTONE3:
33821ae08745Sheppo 		/*
33831ae08745Sheppo 		 * Pass this milestone when all paramaters have been
33841ae08745Sheppo 		 * successfully exchanged and RDX sent in both directions.
33851ae08745Sheppo 		 *
33861ae08745Sheppo 		 * Mark outbound lane as available to transmit data.
33871ae08745Sheppo 		 */
33881ae08745Sheppo 		if ((ldcp->lane_in.lstate & VSW_RDX_ACK_SENT) &&
33891ae08745Sheppo 			(ldcp->lane_out.lstate & VSW_RDX_ACK_RECV)) {
33901ae08745Sheppo 
33911ae08745Sheppo 			D2(vswp, "%s: (chan %lld) leaving milestone 3",
33921ae08745Sheppo 				__func__, ldcp->ldc_id);
33931ae08745Sheppo 			D2(vswp, "%s: ** handshake complete **", __func__);
33941ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_LANE_ACTIVE;
33951ae08745Sheppo 			ldcp->hphase = VSW_MILESTONE4;
33961ae08745Sheppo 			ldcp->hcnt = 0;
33971ae08745Sheppo 			DISPLAY_STATE();
33981ae08745Sheppo 		}
33991ae08745Sheppo 		break;
34001ae08745Sheppo 
34011ae08745Sheppo 	case VSW_MILESTONE4:
34021ae08745Sheppo 		D2(vswp, "%s: (chan %lld) in milestone 4", __func__,
34031ae08745Sheppo 							ldcp->ldc_id);
34041ae08745Sheppo 		break;
34051ae08745Sheppo 
34061ae08745Sheppo 	default:
34071ae08745Sheppo 		DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__,
34081ae08745Sheppo 			ldcp->ldc_id, ldcp->hphase);
34091ae08745Sheppo 	}
34101ae08745Sheppo 
34111ae08745Sheppo 	D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id,
34121ae08745Sheppo 		ldcp->hphase);
34131ae08745Sheppo }
34141ae08745Sheppo 
34151ae08745Sheppo /*
34161ae08745Sheppo  * Check if major version is supported.
34171ae08745Sheppo  *
34181ae08745Sheppo  * Returns 0 if finds supported major number, and if necessary
34191ae08745Sheppo  * adjusts the minor field.
34201ae08745Sheppo  *
34211ae08745Sheppo  * Returns 1 if can't match major number exactly. Sets mjor/minor
34221ae08745Sheppo  * to next lowest support values, or to zero if no other values possible.
34231ae08745Sheppo  */
34241ae08745Sheppo static int
34251ae08745Sheppo vsw_supported_version(vio_ver_msg_t *vp)
34261ae08745Sheppo {
34271ae08745Sheppo 	int	i;
34281ae08745Sheppo 
34291ae08745Sheppo 	D1(NULL, "vsw_supported_version: enter");
34301ae08745Sheppo 
34311ae08745Sheppo 	for (i = 0; i < VSW_NUM_VER; i++) {
34321ae08745Sheppo 		if (vsw_versions[i].ver_major == vp->ver_major) {
34331ae08745Sheppo 			/*
34341ae08745Sheppo 			 * Matching or lower major version found. Update
34351ae08745Sheppo 			 * minor number if necessary.
34361ae08745Sheppo 			 */
34371ae08745Sheppo 			if (vp->ver_minor > vsw_versions[i].ver_minor) {
34381ae08745Sheppo 				D2(NULL, "%s: adjusting minor value"
34391ae08745Sheppo 					" from %d to %d", __func__,
34401ae08745Sheppo 					vp->ver_minor,
34411ae08745Sheppo 					vsw_versions[i].ver_minor);
34421ae08745Sheppo 				vp->ver_minor = vsw_versions[i].ver_minor;
34431ae08745Sheppo 			}
34441ae08745Sheppo 
34451ae08745Sheppo 			return (0);
34461ae08745Sheppo 		}
34471ae08745Sheppo 
34481ae08745Sheppo 		if (vsw_versions[i].ver_major < vp->ver_major) {
34491ae08745Sheppo 			if (vp->ver_minor > vsw_versions[i].ver_minor) {
34501ae08745Sheppo 				D2(NULL, "%s: adjusting minor value"
34511ae08745Sheppo 					" from %d to %d", __func__,
34521ae08745Sheppo 					vp->ver_minor,
34531ae08745Sheppo 					vsw_versions[i].ver_minor);
34541ae08745Sheppo 				vp->ver_minor = vsw_versions[i].ver_minor;
34551ae08745Sheppo 			}
34561ae08745Sheppo 			return (1);
34571ae08745Sheppo 		}
34581ae08745Sheppo 	}
34591ae08745Sheppo 
34601ae08745Sheppo 	/* No match was possible, zero out fields */
34611ae08745Sheppo 	vp->ver_major = 0;
34621ae08745Sheppo 	vp->ver_minor = 0;
34631ae08745Sheppo 
34641ae08745Sheppo 	D1(NULL, "vsw_supported_version: exit");
34651ae08745Sheppo 
34661ae08745Sheppo 	return (1);
34671ae08745Sheppo }
34681ae08745Sheppo 
34691ae08745Sheppo /*
34701ae08745Sheppo  * Main routine for processing messages received over LDC.
34711ae08745Sheppo  */
34721ae08745Sheppo static void
34731ae08745Sheppo vsw_process_pkt(void *arg)
34741ae08745Sheppo {
34751ae08745Sheppo 	vsw_ldc_t	*ldcp = (vsw_ldc_t  *)arg;
34761ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
34771ae08745Sheppo 	size_t		msglen;
34781ae08745Sheppo 	vio_msg_tag_t	tag;
34791ae08745Sheppo 	def_msg_t	dmsg;
34801ae08745Sheppo 	int 		rv = 0;
34811ae08745Sheppo 
34821ae08745Sheppo 	D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id);
34831ae08745Sheppo 
34841ae08745Sheppo 	/*
34851ae08745Sheppo 	 * If channel is up read messages until channel is empty.
34861ae08745Sheppo 	 */
34871ae08745Sheppo 	do {
34881ae08745Sheppo 		msglen = sizeof (dmsg);
34891ae08745Sheppo 		rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen);
34901ae08745Sheppo 
34911ae08745Sheppo 		if (rv != 0) {
34921ae08745Sheppo 			DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) "
34931ae08745Sheppo 				"len(%d)\n", __func__, ldcp->ldc_id,
34941ae08745Sheppo 							rv, msglen);
34951ae08745Sheppo 			break;
34961ae08745Sheppo 		}
34971ae08745Sheppo 
34981ae08745Sheppo 		if (msglen == 0) {
34991ae08745Sheppo 			D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__,
35001ae08745Sheppo 			ldcp->ldc_id);
35011ae08745Sheppo 			break;
35021ae08745Sheppo 		}
35031ae08745Sheppo 
35041ae08745Sheppo 		D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__,
35051ae08745Sheppo 		    ldcp->ldc_id, msglen);
35061ae08745Sheppo 
35071ae08745Sheppo 		/*
35081ae08745Sheppo 		 * Figure out what sort of packet we have gotten by
35091ae08745Sheppo 		 * examining the msg tag, and then switch it appropriately.
35101ae08745Sheppo 		 */
35111ae08745Sheppo 		bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t));
35121ae08745Sheppo 
35131ae08745Sheppo 		switch (tag.vio_msgtype) {
35141ae08745Sheppo 		case VIO_TYPE_CTRL:
35151ae08745Sheppo 			vsw_dispatch_ctrl_task(ldcp, &dmsg, tag);
35161ae08745Sheppo 			break;
35171ae08745Sheppo 		case VIO_TYPE_DATA:
35181ae08745Sheppo 			vsw_process_data_pkt(ldcp, &dmsg, tag);
35191ae08745Sheppo 			break;
35201ae08745Sheppo 		case VIO_TYPE_ERR:
35211ae08745Sheppo 			vsw_process_err_pkt(ldcp, &dmsg, tag);
35221ae08745Sheppo 			break;
35231ae08745Sheppo 		default:
35241ae08745Sheppo 			DERR(vswp, "%s: Unknown tag(%lx) ", __func__,
35251ae08745Sheppo 				"id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id);
35261ae08745Sheppo 			break;
35271ae08745Sheppo 		}
35281ae08745Sheppo 	} while (msglen);
35291ae08745Sheppo 
35301ae08745Sheppo 	D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id);
35311ae08745Sheppo }
35321ae08745Sheppo 
35331ae08745Sheppo /*
35341ae08745Sheppo  * Dispatch a task to process a VIO control message.
35351ae08745Sheppo  */
35361ae08745Sheppo static void
35371ae08745Sheppo vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag)
35381ae08745Sheppo {
35391ae08745Sheppo 	vsw_ctrl_task_t		*ctaskp = NULL;
35401ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
35411ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
35421ae08745Sheppo 
35431ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
35441ae08745Sheppo 
35451ae08745Sheppo 	/*
35461ae08745Sheppo 	 * We need to handle RDX ACK messages in-band as once they
35471ae08745Sheppo 	 * are exchanged it is possible that we will get an
35481ae08745Sheppo 	 * immediate (legitimate) data packet.
35491ae08745Sheppo 	 */
35501ae08745Sheppo 	if ((tag.vio_subtype_env == VIO_RDX) &&
35511ae08745Sheppo 		(tag.vio_subtype == VIO_SUBTYPE_ACK)) {
35521ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_ACK_RECV))
35531ae08745Sheppo 			return;
35541ae08745Sheppo 
35551ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_RDX_ACK_RECV;
35561ae08745Sheppo 		vsw_next_milestone(ldcp);
35571ae08745Sheppo 		D2(vswp, "%s (%ld) handling RDX_ACK in place", __func__,
35581ae08745Sheppo 			ldcp->ldc_id);
35591ae08745Sheppo 		return;
35601ae08745Sheppo 	}
35611ae08745Sheppo 
35621ae08745Sheppo 	ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP);
35631ae08745Sheppo 
35641ae08745Sheppo 	if (ctaskp == NULL) {
35651ae08745Sheppo 		DERR(vswp, "%s: unable to alloc space for ctrl"
35661ae08745Sheppo 			" msg", __func__);
35671ae08745Sheppo 		vsw_restart_handshake(ldcp);
35681ae08745Sheppo 		return;
35691ae08745Sheppo 	}
35701ae08745Sheppo 
35711ae08745Sheppo 	ctaskp->ldcp = ldcp;
35721ae08745Sheppo 	bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t));
35731ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
35741ae08745Sheppo 	ctaskp->hss_id = ldcp->hss_id;
35751ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
35761ae08745Sheppo 
35771ae08745Sheppo 	/*
35781ae08745Sheppo 	 * Dispatch task to processing taskq if port is not in
35791ae08745Sheppo 	 * the process of being detached.
35801ae08745Sheppo 	 */
35811ae08745Sheppo 	mutex_enter(&port->state_lock);
35821ae08745Sheppo 	if (port->state == VSW_PORT_INIT) {
35831ae08745Sheppo 		if ((vswp->taskq_p == NULL) ||
35841ae08745Sheppo 			(ddi_taskq_dispatch(vswp->taskq_p,
35851ae08745Sheppo 			vsw_process_ctrl_pkt, ctaskp, DDI_NOSLEEP)
35861ae08745Sheppo 							!= DDI_SUCCESS)) {
35871ae08745Sheppo 			DERR(vswp, "%s: unable to dispatch task to taskq",
35881ae08745Sheppo 				__func__);
35891ae08745Sheppo 			kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
35901ae08745Sheppo 			mutex_exit(&port->state_lock);
35911ae08745Sheppo 			vsw_restart_handshake(ldcp);
35921ae08745Sheppo 			return;
35931ae08745Sheppo 		}
35941ae08745Sheppo 	} else {
35951ae08745Sheppo 		DWARN(vswp, "%s: port %d detaching, not dispatching "
35961ae08745Sheppo 			"task", __func__, port->p_instance);
35971ae08745Sheppo 	}
35981ae08745Sheppo 
35991ae08745Sheppo 	mutex_exit(&port->state_lock);
36001ae08745Sheppo 
36011ae08745Sheppo 	D2(vswp, "%s: dispatched task to taskq for chan %d", __func__,
36021ae08745Sheppo 			ldcp->ldc_id);
36031ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
36041ae08745Sheppo }
36051ae08745Sheppo 
36061ae08745Sheppo /*
36071ae08745Sheppo  * Process a VIO ctrl message. Invoked from taskq.
36081ae08745Sheppo  */
36091ae08745Sheppo static void
36101ae08745Sheppo vsw_process_ctrl_pkt(void *arg)
36111ae08745Sheppo {
36121ae08745Sheppo 	vsw_ctrl_task_t	*ctaskp = (vsw_ctrl_task_t *)arg;
36131ae08745Sheppo 	vsw_ldc_t	*ldcp = ctaskp->ldcp;
36141ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
36151ae08745Sheppo 	vio_msg_tag_t	tag;
36161ae08745Sheppo 	uint16_t	env;
36171ae08745Sheppo 
36181ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
36191ae08745Sheppo 
36201ae08745Sheppo 	bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t));
36211ae08745Sheppo 	env = tag.vio_subtype_env;
36221ae08745Sheppo 
36231ae08745Sheppo 	/* stale pkt check */
36241ae08745Sheppo 	mutex_enter(&ldcp->hss_lock);
36251ae08745Sheppo 	if (ctaskp->hss_id < ldcp->hss_id) {
36261ae08745Sheppo 		DWARN(vswp, "%s: discarding stale packet belonging to"
36271ae08745Sheppo 			" earlier (%ld) handshake session", __func__,
36281ae08745Sheppo 			ctaskp->hss_id);
36291ae08745Sheppo 		mutex_exit(&ldcp->hss_lock);
36301ae08745Sheppo 		return;
36311ae08745Sheppo 	}
36321ae08745Sheppo 	mutex_exit(&ldcp->hss_lock);
36331ae08745Sheppo 
36341ae08745Sheppo 	/* session id check */
36351ae08745Sheppo 	if (ldcp->session_status & VSW_PEER_SESSION) {
36361ae08745Sheppo 		if (ldcp->peer_session != tag.vio_sid) {
36371ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid session id (%llx)",
36381ae08745Sheppo 				__func__, ldcp->ldc_id, tag.vio_sid);
36391ae08745Sheppo 			kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
36401ae08745Sheppo 			vsw_restart_handshake(ldcp);
36411ae08745Sheppo 			return;
36421ae08745Sheppo 		}
36431ae08745Sheppo 	}
36441ae08745Sheppo 
36451ae08745Sheppo 	/*
36461ae08745Sheppo 	 * Switch on vio_subtype envelope, then let lower routines
36471ae08745Sheppo 	 * decide if its an INFO, ACK or NACK packet.
36481ae08745Sheppo 	 */
36491ae08745Sheppo 	switch (env) {
36501ae08745Sheppo 	case VIO_VER_INFO:
36511ae08745Sheppo 		vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp);
36521ae08745Sheppo 		break;
36531ae08745Sheppo 	case VIO_DRING_REG:
36541ae08745Sheppo 		vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp);
36551ae08745Sheppo 		break;
36561ae08745Sheppo 	case VIO_DRING_UNREG:
36571ae08745Sheppo 		vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp);
36581ae08745Sheppo 		break;
36591ae08745Sheppo 	case VIO_ATTR_INFO:
36601ae08745Sheppo 		vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp);
36611ae08745Sheppo 		break;
36621ae08745Sheppo 	case VNET_MCAST_INFO:
36631ae08745Sheppo 		vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp);
36641ae08745Sheppo 		break;
36651ae08745Sheppo 	case VIO_RDX:
36661ae08745Sheppo 		vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp);
36671ae08745Sheppo 		break;
36681ae08745Sheppo 	default:
36691ae08745Sheppo 		DERR(vswp, "%s : unknown vio_subtype_env (%x)\n",
36701ae08745Sheppo 							__func__, env);
36711ae08745Sheppo 	}
36721ae08745Sheppo 
36731ae08745Sheppo 	kmem_free(ctaskp, sizeof (vsw_ctrl_task_t));
36741ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
36751ae08745Sheppo }
36761ae08745Sheppo 
36771ae08745Sheppo /*
36781ae08745Sheppo  * Version negotiation. We can end up here either because our peer
36791ae08745Sheppo  * has responded to a handshake message we have sent it, or our peer
36801ae08745Sheppo  * has initiated a handshake with us. If its the former then can only
36811ae08745Sheppo  * be ACK or NACK, if its the later can only be INFO.
36821ae08745Sheppo  *
36831ae08745Sheppo  * If its an ACK we move to the next stage of the handshake, namely
36841ae08745Sheppo  * attribute exchange. If its a NACK we see if we can specify another
36851ae08745Sheppo  * version, if we can't we stop.
36861ae08745Sheppo  *
36871ae08745Sheppo  * If it is an INFO we reset all params associated with communication
36881ae08745Sheppo  * in that direction over this channel (remember connection is
36891ae08745Sheppo  * essentially 2 independent simplex channels).
36901ae08745Sheppo  */
36911ae08745Sheppo void
36921ae08745Sheppo vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt)
36931ae08745Sheppo {
36941ae08745Sheppo 	vio_ver_msg_t	*ver_pkt;
36951ae08745Sheppo 	vsw_t 		*vswp = ldcp->ldc_vswp;
36961ae08745Sheppo 
36971ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
36981ae08745Sheppo 
36991ae08745Sheppo 	/*
37001ae08745Sheppo 	 * We know this is a ctrl/version packet so
37011ae08745Sheppo 	 * cast it into the correct structure.
37021ae08745Sheppo 	 */
37031ae08745Sheppo 	ver_pkt = (vio_ver_msg_t *)pkt;
37041ae08745Sheppo 
37051ae08745Sheppo 	switch (ver_pkt->tag.vio_subtype) {
37061ae08745Sheppo 	case VIO_SUBTYPE_INFO:
37071ae08745Sheppo 		D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n");
37081ae08745Sheppo 
37091ae08745Sheppo 		/*
37101ae08745Sheppo 		 * Record the session id, which we will use from now
37111ae08745Sheppo 		 * until we see another VER_INFO msg. Even then the
37121ae08745Sheppo 		 * session id in most cases will be unchanged, execpt
37131ae08745Sheppo 		 * if channel was reset.
37141ae08745Sheppo 		 */
37151ae08745Sheppo 		if ((ldcp->session_status & VSW_PEER_SESSION) &&
37161ae08745Sheppo 			(ldcp->peer_session != ver_pkt->tag.vio_sid)) {
37171ae08745Sheppo 			DERR(vswp, "%s: updating session id for chan %lld "
37181ae08745Sheppo 				"from %llx to %llx", __func__, ldcp->ldc_id,
37191ae08745Sheppo 				ldcp->peer_session, ver_pkt->tag.vio_sid);
37201ae08745Sheppo 		}
37211ae08745Sheppo 
37221ae08745Sheppo 		ldcp->peer_session = ver_pkt->tag.vio_sid;
37231ae08745Sheppo 		ldcp->session_status |= VSW_PEER_SESSION;
37241ae08745Sheppo 
37251ae08745Sheppo 		/* Legal message at this time ? */
37261ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV))
37271ae08745Sheppo 			return;
37281ae08745Sheppo 
37291ae08745Sheppo 		/*
37301ae08745Sheppo 		 * First check the device class. Currently only expect
37311ae08745Sheppo 		 * to be talking to a network device. In the future may
37321ae08745Sheppo 		 * also talk to another switch.
37331ae08745Sheppo 		 */
37341ae08745Sheppo 		if (ver_pkt->dev_class != VDEV_NETWORK) {
37351ae08745Sheppo 			DERR(vswp, "%s: illegal device class %d", __func__,
37361ae08745Sheppo 				ver_pkt->dev_class);
37371ae08745Sheppo 
37381ae08745Sheppo 			ver_pkt->tag.vio_sid = ldcp->local_session;
37391ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
37401ae08745Sheppo 
37411ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
37421ae08745Sheppo 
37431ae08745Sheppo 			vsw_send_msg(ldcp, (void *)ver_pkt,
37441ae08745Sheppo 					sizeof (vio_ver_msg_t));
37451ae08745Sheppo 
37461ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
37471ae08745Sheppo 			vsw_next_milestone(ldcp);
37481ae08745Sheppo 			return;
37491ae08745Sheppo 		} else {
37501ae08745Sheppo 			ldcp->dev_class = ver_pkt->dev_class;
37511ae08745Sheppo 		}
37521ae08745Sheppo 
37531ae08745Sheppo 		/*
37541ae08745Sheppo 		 * Now check the version.
37551ae08745Sheppo 		 */
37561ae08745Sheppo 		if (vsw_supported_version(ver_pkt) == 0) {
37571ae08745Sheppo 			/*
37581ae08745Sheppo 			 * Support this major version and possibly
37591ae08745Sheppo 			 * adjusted minor version.
37601ae08745Sheppo 			 */
37611ae08745Sheppo 
37621ae08745Sheppo 			D2(vswp, "%s: accepted ver %d:%d", __func__,
37631ae08745Sheppo 				ver_pkt->ver_major, ver_pkt->ver_minor);
37641ae08745Sheppo 
37651ae08745Sheppo 			/* Store accepted values */
37661ae08745Sheppo 			ldcp->lane_in.ver_major = ver_pkt->ver_major;
37671ae08745Sheppo 			ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
37681ae08745Sheppo 
37691ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
37701ae08745Sheppo 
37711ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_ACK_SENT;
37721ae08745Sheppo 		} else {
37731ae08745Sheppo 			/*
37741ae08745Sheppo 			 * NACK back with the next lower major/minor
37751ae08745Sheppo 			 * pairing we support (if don't suuport any more
37761ae08745Sheppo 			 * versions then they will be set to zero.
37771ae08745Sheppo 			 */
37781ae08745Sheppo 
37791ae08745Sheppo 			D2(vswp, "%s: replying with ver %d:%d", __func__,
37801ae08745Sheppo 				ver_pkt->ver_major, ver_pkt->ver_minor);
37811ae08745Sheppo 
37821ae08745Sheppo 			/* Store updated values */
37831ae08745Sheppo 			ldcp->lane_in.ver_major = ver_pkt->ver_major;
37841ae08745Sheppo 			ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
37851ae08745Sheppo 
37861ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
37871ae08745Sheppo 
37881ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_VER_NACK_SENT;
37891ae08745Sheppo 		}
37901ae08745Sheppo 
37911ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
37921ae08745Sheppo 		ver_pkt->tag.vio_sid = ldcp->local_session;
37931ae08745Sheppo 		vsw_send_msg(ldcp, (void *)ver_pkt, sizeof (vio_ver_msg_t));
37941ae08745Sheppo 
37951ae08745Sheppo 		vsw_next_milestone(ldcp);
37961ae08745Sheppo 		break;
37971ae08745Sheppo 
37981ae08745Sheppo 	case VIO_SUBTYPE_ACK:
37991ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__);
38001ae08745Sheppo 
38011ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV))
38021ae08745Sheppo 			return;
38031ae08745Sheppo 
38041ae08745Sheppo 		/* Store updated values */
38051ae08745Sheppo 		ldcp->lane_in.ver_major = ver_pkt->ver_major;
38061ae08745Sheppo 		ldcp->lane_in.ver_minor = ver_pkt->ver_minor;
38071ae08745Sheppo 
38081ae08745Sheppo 
38091ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_VER_ACK_RECV;
38101ae08745Sheppo 		vsw_next_milestone(ldcp);
38111ae08745Sheppo 
38121ae08745Sheppo 		break;
38131ae08745Sheppo 
38141ae08745Sheppo 	case VIO_SUBTYPE_NACK:
38151ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__);
38161ae08745Sheppo 
38171ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV))
38181ae08745Sheppo 			return;
38191ae08745Sheppo 
38201ae08745Sheppo 		/*
38211ae08745Sheppo 		 * If our peer sent us a NACK with the ver fields set to
38221ae08745Sheppo 		 * zero then there is nothing more we can do. Otherwise see
38231ae08745Sheppo 		 * if we support either the version suggested, or a lesser
38241ae08745Sheppo 		 * one.
38251ae08745Sheppo 		 */
38261ae08745Sheppo 		if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
38271ae08745Sheppo 			DERR(vswp, "%s: peer unable to negotiate any "
38281ae08745Sheppo 				"further.", __func__);
38291ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
38301ae08745Sheppo 			vsw_next_milestone(ldcp);
38311ae08745Sheppo 			return;
38321ae08745Sheppo 		}
38331ae08745Sheppo 
38341ae08745Sheppo 		/*
38351ae08745Sheppo 		 * Check to see if we support this major version or
38361ae08745Sheppo 		 * a lower one. If we don't then maj/min will be set
38371ae08745Sheppo 		 * to zero.
38381ae08745Sheppo 		 */
38391ae08745Sheppo 		(void) vsw_supported_version(ver_pkt);
38401ae08745Sheppo 		if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) {
38411ae08745Sheppo 			/* Nothing more we can do */
38421ae08745Sheppo 			DERR(vswp, "%s: version negotiation failed.\n",
38431ae08745Sheppo 								__func__);
38441ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_NACK_RECV;
38451ae08745Sheppo 			vsw_next_milestone(ldcp);
38461ae08745Sheppo 		} else {
38471ae08745Sheppo 			/* found a supported major version */
38481ae08745Sheppo 			ldcp->lane_out.ver_major = ver_pkt->ver_major;
38491ae08745Sheppo 			ldcp->lane_out.ver_minor = ver_pkt->ver_minor;
38501ae08745Sheppo 
38511ae08745Sheppo 			D2(vswp, "%s: resending with updated values (%x, %x)",
38521ae08745Sheppo 				__func__, ver_pkt->ver_major,
38531ae08745Sheppo 				ver_pkt->ver_minor);
38541ae08745Sheppo 
38551ae08745Sheppo 			ldcp->lane_out.lstate |= VSW_VER_INFO_SENT;
38561ae08745Sheppo 			ver_pkt->tag.vio_sid = ldcp->local_session;
38571ae08745Sheppo 			ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
38581ae08745Sheppo 
38591ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt);
38601ae08745Sheppo 
38611ae08745Sheppo 			vsw_send_msg(ldcp, (void *)ver_pkt,
38621ae08745Sheppo 					sizeof (vio_ver_msg_t));
38631ae08745Sheppo 
38641ae08745Sheppo 			vsw_next_milestone(ldcp);
38651ae08745Sheppo 
38661ae08745Sheppo 		}
38671ae08745Sheppo 		break;
38681ae08745Sheppo 
38691ae08745Sheppo 	default:
38701ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
38711ae08745Sheppo 			ver_pkt->tag.vio_subtype);
38721ae08745Sheppo 	}
38731ae08745Sheppo 
38741ae08745Sheppo 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
38751ae08745Sheppo }
38761ae08745Sheppo 
38771ae08745Sheppo /*
38781ae08745Sheppo  * Process an attribute packet. We can end up here either because our peer
38791ae08745Sheppo  * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our
38801ae08745Sheppo  * peer has sent us an attribute INFO message
38811ae08745Sheppo  *
38821ae08745Sheppo  * If its an ACK we then move to the next stage of the handshake which
38831ae08745Sheppo  * is to send our descriptor ring info to our peer. If its a NACK then
38841ae08745Sheppo  * there is nothing more we can (currently) do.
38851ae08745Sheppo  *
38861ae08745Sheppo  * If we get a valid/acceptable INFO packet (and we have already negotiated
38871ae08745Sheppo  * a version) we ACK back and set channel state to ATTR_RECV, otherwise we
38881ae08745Sheppo  * NACK back and reset channel state to INACTIV.
38891ae08745Sheppo  *
38901ae08745Sheppo  * FUTURE: in time we will probably negotiate over attributes, but for
38911ae08745Sheppo  * the moment unacceptable attributes are regarded as a fatal error.
38921ae08745Sheppo  *
38931ae08745Sheppo  */
38941ae08745Sheppo void
38951ae08745Sheppo vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt)
38961ae08745Sheppo {
38971ae08745Sheppo 	vnet_attr_msg_t		*attr_pkt;
38981ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
38991ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
39001ae08745Sheppo 	uint64_t		macaddr = 0;
39011ae08745Sheppo 	int			i;
39021ae08745Sheppo 
39031ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
39041ae08745Sheppo 
39051ae08745Sheppo 	/*
39061ae08745Sheppo 	 * We know this is a ctrl/attr packet so
39071ae08745Sheppo 	 * cast it into the correct structure.
39081ae08745Sheppo 	 */
39091ae08745Sheppo 	attr_pkt = (vnet_attr_msg_t *)pkt;
39101ae08745Sheppo 
39111ae08745Sheppo 	switch (attr_pkt->tag.vio_subtype) {
39121ae08745Sheppo 	case VIO_SUBTYPE_INFO:
39131ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
39141ae08745Sheppo 
39151ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV))
39161ae08745Sheppo 			return;
39171ae08745Sheppo 
39181ae08745Sheppo 		/*
39191ae08745Sheppo 		 * If the attributes are unacceptable then we NACK back.
39201ae08745Sheppo 		 */
39211ae08745Sheppo 		if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) {
39221ae08745Sheppo 
39231ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid attributes",
39241ae08745Sheppo 				__func__, ldcp->ldc_id);
39251ae08745Sheppo 
39261ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
39271ae08745Sheppo 
39281ae08745Sheppo 			attr_pkt->tag.vio_sid = ldcp->local_session;
39291ae08745Sheppo 			attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
39301ae08745Sheppo 
39311ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
39321ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT;
39331ae08745Sheppo 			vsw_send_msg(ldcp, (void *)attr_pkt,
39341ae08745Sheppo 					sizeof (vnet_attr_msg_t));
39351ae08745Sheppo 
39361ae08745Sheppo 			vsw_next_milestone(ldcp);
39371ae08745Sheppo 			return;
39381ae08745Sheppo 		}
39391ae08745Sheppo 
39401ae08745Sheppo 		/*
39411ae08745Sheppo 		 * Otherwise store attributes for this lane and update
39421ae08745Sheppo 		 * lane state.
39431ae08745Sheppo 		 */
39441ae08745Sheppo 		ldcp->lane_in.mtu = attr_pkt->mtu;
39451ae08745Sheppo 		ldcp->lane_in.addr = attr_pkt->addr;
39461ae08745Sheppo 		ldcp->lane_in.addr_type = attr_pkt->addr_type;
39471ae08745Sheppo 		ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode;
39481ae08745Sheppo 		ldcp->lane_in.ack_freq = attr_pkt->ack_freq;
39491ae08745Sheppo 
39501ae08745Sheppo 		macaddr = ldcp->lane_in.addr;
39511ae08745Sheppo 		for (i = ETHERADDRL - 1; i >= 0; i--) {
39521ae08745Sheppo 			port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF;
39531ae08745Sheppo 			macaddr >>= 8;
39541ae08745Sheppo 		}
39551ae08745Sheppo 
39561ae08745Sheppo 		/* create the fdb entry for this port/mac address */
39571ae08745Sheppo 		(void) vsw_add_fdb(vswp, port);
39581ae08745Sheppo 
39591ae08745Sheppo 		/* setup device specifc xmit routines */
39601ae08745Sheppo 		mutex_enter(&port->tx_lock);
39611ae08745Sheppo 		if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) {
39621ae08745Sheppo 			D2(vswp, "%s: mode = VIO_DRING_MODE", __func__);
39631ae08745Sheppo 			port->transmit = vsw_dringsend;
39641ae08745Sheppo 		} else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) {
39651ae08745Sheppo 			D2(vswp, "%s: mode = VIO_DESC_MODE", __func__);
39661ae08745Sheppo 			vsw_create_privring(ldcp);
39671ae08745Sheppo 			port->transmit = vsw_descrsend;
39681ae08745Sheppo 		}
39691ae08745Sheppo 		mutex_exit(&port->tx_lock);
39701ae08745Sheppo 
39711ae08745Sheppo 		attr_pkt->tag.vio_sid = ldcp->local_session;
39721ae08745Sheppo 		attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
39731ae08745Sheppo 
39741ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt);
39751ae08745Sheppo 
39761ae08745Sheppo 		ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT;
39771ae08745Sheppo 
39781ae08745Sheppo 		vsw_send_msg(ldcp, (void *)attr_pkt,
39791ae08745Sheppo 					sizeof (vnet_attr_msg_t));
39801ae08745Sheppo 
39811ae08745Sheppo 		vsw_next_milestone(ldcp);
39821ae08745Sheppo 		break;
39831ae08745Sheppo 
39841ae08745Sheppo 	case VIO_SUBTYPE_ACK:
39851ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
39861ae08745Sheppo 
39871ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV))
39881ae08745Sheppo 			return;
39891ae08745Sheppo 
39901ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV;
39911ae08745Sheppo 		vsw_next_milestone(ldcp);
39921ae08745Sheppo 		break;
39931ae08745Sheppo 
39941ae08745Sheppo 	case VIO_SUBTYPE_NACK:
39951ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
39961ae08745Sheppo 
39971ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV))
39981ae08745Sheppo 			return;
39991ae08745Sheppo 
40001ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV;
40011ae08745Sheppo 		vsw_next_milestone(ldcp);
40021ae08745Sheppo 		break;
40031ae08745Sheppo 
40041ae08745Sheppo 	default:
40051ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
40061ae08745Sheppo 			attr_pkt->tag.vio_subtype);
40071ae08745Sheppo 	}
40081ae08745Sheppo 
40091ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
40101ae08745Sheppo }
40111ae08745Sheppo 
40121ae08745Sheppo /*
40131ae08745Sheppo  * Process a dring info packet. We can end up here either because our peer
40141ae08745Sheppo  * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our
40151ae08745Sheppo  * peer has sent us a dring INFO message.
40161ae08745Sheppo  *
40171ae08745Sheppo  * If we get a valid/acceptable INFO packet (and we have already negotiated
40181ae08745Sheppo  * a version) we ACK back and update the lane state, otherwise we NACK back.
40191ae08745Sheppo  *
40201ae08745Sheppo  * FUTURE: nothing to stop client from sending us info on multiple dring's
40211ae08745Sheppo  * but for the moment we will just use the first one we are given.
40221ae08745Sheppo  *
40231ae08745Sheppo  */
40241ae08745Sheppo void
40251ae08745Sheppo vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt)
40261ae08745Sheppo {
40271ae08745Sheppo 	vio_dring_reg_msg_t	*dring_pkt;
40281ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
40291ae08745Sheppo 	ldc_mem_info_t		minfo;
40301ae08745Sheppo 	dring_info_t		*dp, *dbp;
40311ae08745Sheppo 	int			dring_found = 0;
40321ae08745Sheppo 
40331ae08745Sheppo 	/*
40341ae08745Sheppo 	 * We know this is a ctrl/dring packet so
40351ae08745Sheppo 	 * cast it into the correct structure.
40361ae08745Sheppo 	 */
40371ae08745Sheppo 	dring_pkt = (vio_dring_reg_msg_t *)pkt;
40381ae08745Sheppo 
40391ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
40401ae08745Sheppo 
40411ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
40421ae08745Sheppo 	case VIO_SUBTYPE_INFO:
40431ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
40441ae08745Sheppo 
40451ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
40461ae08745Sheppo 			return;
40471ae08745Sheppo 
40481ae08745Sheppo 		/*
40491ae08745Sheppo 		 * If the dring params are unacceptable then we NACK back.
40501ae08745Sheppo 		 */
40511ae08745Sheppo 		if (vsw_check_dring_info(dring_pkt)) {
40521ae08745Sheppo 
40531ae08745Sheppo 			DERR(vswp, "%s (%lld): invalid dring info",
40541ae08745Sheppo 				__func__, ldcp->ldc_id);
40551ae08745Sheppo 
40561ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
40571ae08745Sheppo 
40581ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
40591ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
40601ae08745Sheppo 
40611ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
40621ae08745Sheppo 
40631ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
40641ae08745Sheppo 
40651ae08745Sheppo 			vsw_send_msg(ldcp, (void *)dring_pkt,
40661ae08745Sheppo 					sizeof (vio_dring_reg_msg_t));
40671ae08745Sheppo 
40681ae08745Sheppo 			vsw_next_milestone(ldcp);
40691ae08745Sheppo 			return;
40701ae08745Sheppo 		}
40711ae08745Sheppo 
40721ae08745Sheppo 		/*
40731ae08745Sheppo 		 * Otherwise, attempt to map in the dring using the
40741ae08745Sheppo 		 * cookie. If that succeeds we send back a unique dring
40751ae08745Sheppo 		 * identifier that the sending side will use in future
40761ae08745Sheppo 		 * to refer to this descriptor ring.
40771ae08745Sheppo 		 */
40781ae08745Sheppo 		dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
40791ae08745Sheppo 
40801ae08745Sheppo 		dp->num_descriptors = dring_pkt->num_descriptors;
40811ae08745Sheppo 		dp->descriptor_size = dring_pkt->descriptor_size;
40821ae08745Sheppo 		dp->options = dring_pkt->options;
40831ae08745Sheppo 		dp->ncookies = dring_pkt->ncookies;
40841ae08745Sheppo 
40851ae08745Sheppo 		/*
40861ae08745Sheppo 		 * Note: should only get one cookie. Enforced in
40871ae08745Sheppo 		 * the ldc layer.
40881ae08745Sheppo 		 */
40891ae08745Sheppo 		bcopy(&dring_pkt->cookie[0], &dp->cookie[0],
40901ae08745Sheppo 			sizeof (ldc_mem_cookie_t));
40911ae08745Sheppo 
40921ae08745Sheppo 		D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__,
40931ae08745Sheppo 			dp->num_descriptors, dp->descriptor_size);
40941ae08745Sheppo 		D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__,
40951ae08745Sheppo 			dp->options, dp->ncookies);
40961ae08745Sheppo 
40971ae08745Sheppo 		if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0],
40981ae08745Sheppo 			dp->ncookies, dp->num_descriptors,
40991ae08745Sheppo 			dp->descriptor_size, LDC_SHADOW_MAP,
41001ae08745Sheppo 			&(dp->handle))) != 0) {
41011ae08745Sheppo 
41021ae08745Sheppo 			DERR(vswp, "%s: dring_map failed\n", __func__);
41031ae08745Sheppo 
41041ae08745Sheppo 			kmem_free(dp, sizeof (dring_info_t));
41051ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
41061ae08745Sheppo 
41071ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
41081ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
41091ae08745Sheppo 
41101ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
41111ae08745Sheppo 
41121ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
41131ae08745Sheppo 			vsw_send_msg(ldcp, (void *)dring_pkt,
41141ae08745Sheppo 				sizeof (vio_dring_reg_msg_t));
41151ae08745Sheppo 
41161ae08745Sheppo 			vsw_next_milestone(ldcp);
41171ae08745Sheppo 			return;
41181ae08745Sheppo 		}
41191ae08745Sheppo 
41201ae08745Sheppo 		if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
41211ae08745Sheppo 
41221ae08745Sheppo 			DERR(vswp, "%s: dring_addr failed\n", __func__);
41231ae08745Sheppo 
41241ae08745Sheppo 			kmem_free(dp, sizeof (dring_info_t));
41251ae08745Sheppo 			vsw_free_lane_resources(ldcp, INBOUND);
41261ae08745Sheppo 
41271ae08745Sheppo 			dring_pkt->tag.vio_sid = ldcp->local_session;
41281ae08745Sheppo 			dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK;
41291ae08745Sheppo 
41301ae08745Sheppo 			DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt);
41311ae08745Sheppo 
41321ae08745Sheppo 			ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT;
41331ae08745Sheppo 			vsw_send_msg(ldcp, (void *)dring_pkt,
41341ae08745Sheppo 				sizeof (vio_dring_reg_msg_t));
41351ae08745Sheppo 
41361ae08745Sheppo 			vsw_next_milestone(ldcp);
41371ae08745Sheppo 			return;
41381ae08745Sheppo 		} else {
41391ae08745Sheppo 			/* store the address of the pub part of ring */
41401ae08745Sheppo 			dp->pub_addr = minfo.vaddr;
41411ae08745Sheppo 		}
41421ae08745Sheppo 
41431ae08745Sheppo 		/* no private section as we are importing */
41441ae08745Sheppo 		dp->priv_addr = NULL;
41451ae08745Sheppo 
41461ae08745Sheppo 		/*
41471ae08745Sheppo 		 * Using simple mono increasing int for ident at
41481ae08745Sheppo 		 * the moment.
41491ae08745Sheppo 		 */
41501ae08745Sheppo 		dp->ident = ldcp->next_ident;
41511ae08745Sheppo 		ldcp->next_ident++;
41521ae08745Sheppo 
41531ae08745Sheppo 		dp->end_idx = 0;
41541ae08745Sheppo 		dp->next = NULL;
41551ae08745Sheppo 
41561ae08745Sheppo 		/*
41571ae08745Sheppo 		 * Link it onto the end of the list of drings
41581ae08745Sheppo 		 * for this lane.
41591ae08745Sheppo 		 */
41601ae08745Sheppo 		if (ldcp->lane_in.dringp == NULL) {
41611ae08745Sheppo 			D2(vswp, "%s: adding first INBOUND dring", __func__);
41621ae08745Sheppo 			ldcp->lane_in.dringp = dp;
41631ae08745Sheppo 		} else {
41641ae08745Sheppo 			dbp = ldcp->lane_in.dringp;
41651ae08745Sheppo 
41661ae08745Sheppo 			while (dbp->next != NULL)
41671ae08745Sheppo 				dbp = dbp->next;
41681ae08745Sheppo 
41691ae08745Sheppo 			dbp->next = dp;
41701ae08745Sheppo 		}
41711ae08745Sheppo 
41721ae08745Sheppo 		/* acknowledge it */
41731ae08745Sheppo 		dring_pkt->tag.vio_sid = ldcp->local_session;
41741ae08745Sheppo 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
41751ae08745Sheppo 		dring_pkt->dring_ident = dp->ident;
41761ae08745Sheppo 
41771ae08745Sheppo 		vsw_send_msg(ldcp, (void *)dring_pkt,
41781ae08745Sheppo 				sizeof (vio_dring_reg_msg_t));
41791ae08745Sheppo 
41801ae08745Sheppo 		ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT;
41811ae08745Sheppo 		vsw_next_milestone(ldcp);
41821ae08745Sheppo 		break;
41831ae08745Sheppo 
41841ae08745Sheppo 	case VIO_SUBTYPE_ACK:
41851ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
41861ae08745Sheppo 
41871ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV))
41881ae08745Sheppo 			return;
41891ae08745Sheppo 
41901ae08745Sheppo 		/*
41911ae08745Sheppo 		 * Peer is acknowledging our dring info and will have
41921ae08745Sheppo 		 * sent us a dring identifier which we will use to
41931ae08745Sheppo 		 * refer to this ring w.r.t. our peer.
41941ae08745Sheppo 		 */
41951ae08745Sheppo 		dp = ldcp->lane_out.dringp;
41961ae08745Sheppo 		if (dp != NULL) {
41971ae08745Sheppo 			/*
41981ae08745Sheppo 			 * Find the ring this ident should be associated
41991ae08745Sheppo 			 * with.
42001ae08745Sheppo 			 */
42011ae08745Sheppo 			if (vsw_dring_match(dp, dring_pkt)) {
42021ae08745Sheppo 				dring_found = 1;
42031ae08745Sheppo 
42041ae08745Sheppo 			} else while (dp != NULL) {
42051ae08745Sheppo 				if (vsw_dring_match(dp, dring_pkt)) {
42061ae08745Sheppo 					dring_found = 1;
42071ae08745Sheppo 					break;
42081ae08745Sheppo 				}
42091ae08745Sheppo 				dp = dp->next;
42101ae08745Sheppo 			}
42111ae08745Sheppo 
42121ae08745Sheppo 			if (dring_found == 0) {
42131ae08745Sheppo 				DERR(NULL, "%s: unrecognised ring cookie",
42141ae08745Sheppo 					__func__);
42151ae08745Sheppo 				vsw_restart_handshake(ldcp);
42161ae08745Sheppo 				return;
42171ae08745Sheppo 			}
42181ae08745Sheppo 
42191ae08745Sheppo 		} else {
42201ae08745Sheppo 			DERR(vswp, "%s: DRING ACK received but no drings "
42211ae08745Sheppo 				"allocated", __func__);
42221ae08745Sheppo 			vsw_restart_handshake(ldcp);
42231ae08745Sheppo 			return;
42241ae08745Sheppo 		}
42251ae08745Sheppo 
42261ae08745Sheppo 		/* store ident */
42271ae08745Sheppo 		dp->ident = dring_pkt->dring_ident;
42281ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV;
42291ae08745Sheppo 		vsw_next_milestone(ldcp);
42301ae08745Sheppo 		break;
42311ae08745Sheppo 
42321ae08745Sheppo 	case VIO_SUBTYPE_NACK:
42331ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
42341ae08745Sheppo 
42351ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV))
42361ae08745Sheppo 			return;
42371ae08745Sheppo 
42381ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV;
42391ae08745Sheppo 		vsw_next_milestone(ldcp);
42401ae08745Sheppo 		break;
42411ae08745Sheppo 
42421ae08745Sheppo 	default:
42431ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
42441ae08745Sheppo 			dring_pkt->tag.vio_subtype);
42451ae08745Sheppo 	}
42461ae08745Sheppo 
42471ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
42481ae08745Sheppo }
42491ae08745Sheppo 
42501ae08745Sheppo /*
42511ae08745Sheppo  * Process a request from peer to unregister a dring.
42521ae08745Sheppo  *
42531ae08745Sheppo  * For the moment we just restart the handshake if our
42541ae08745Sheppo  * peer endpoint attempts to unregister a dring.
42551ae08745Sheppo  */
42561ae08745Sheppo void
42571ae08745Sheppo vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt)
42581ae08745Sheppo {
42591ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
42601ae08745Sheppo 	vio_dring_unreg_msg_t	*dring_pkt;
42611ae08745Sheppo 
42621ae08745Sheppo 	/*
42631ae08745Sheppo 	 * We know this is a ctrl/dring packet so
42641ae08745Sheppo 	 * cast it into the correct structure.
42651ae08745Sheppo 	 */
42661ae08745Sheppo 	dring_pkt = (vio_dring_unreg_msg_t *)pkt;
42671ae08745Sheppo 
42681ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
42691ae08745Sheppo 
42701ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
42711ae08745Sheppo 	case VIO_SUBTYPE_INFO:
42721ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
42731ae08745Sheppo 
42741ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
42751ae08745Sheppo 		vsw_restart_handshake(ldcp);
42761ae08745Sheppo 		break;
42771ae08745Sheppo 
42781ae08745Sheppo 	case VIO_SUBTYPE_ACK:
42791ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
42801ae08745Sheppo 
42811ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
42821ae08745Sheppo 		vsw_restart_handshake(ldcp);
42831ae08745Sheppo 		break;
42841ae08745Sheppo 
42851ae08745Sheppo 	case VIO_SUBTYPE_NACK:
42861ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
42871ae08745Sheppo 
42881ae08745Sheppo 		DWARN(vswp, "%s: restarting handshake..", __func__);
42891ae08745Sheppo 		vsw_restart_handshake(ldcp);
42901ae08745Sheppo 		break;
42911ae08745Sheppo 
42921ae08745Sheppo 	default:
42931ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
42941ae08745Sheppo 			dring_pkt->tag.vio_subtype);
42951ae08745Sheppo 		vsw_restart_handshake(ldcp);
42961ae08745Sheppo 	}
42971ae08745Sheppo 
42981ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
42991ae08745Sheppo }
43001ae08745Sheppo 
43011ae08745Sheppo #define	SND_MCST_NACK(ldcp, pkt) \
43021ae08745Sheppo 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
43031ae08745Sheppo 	pkt->tag.vio_sid = ldcp->local_session; \
43041ae08745Sheppo 	vsw_send_msg(ldcp, (void *)pkt, sizeof (vnet_mcast_msg_t));
43051ae08745Sheppo 
43061ae08745Sheppo /*
43071ae08745Sheppo  * Process a multicast request from a vnet.
43081ae08745Sheppo  *
43091ae08745Sheppo  * Vnet's specify a multicast address that they are interested in. This
43101ae08745Sheppo  * address is used as a key into the hash table which forms the multicast
43111ae08745Sheppo  * forwarding database (mFDB).
43121ae08745Sheppo  *
43131ae08745Sheppo  * The table keys are the multicast addresses, while the table entries
43141ae08745Sheppo  * are pointers to lists of ports which wish to receive packets for the
43151ae08745Sheppo  * specified multicast address.
43161ae08745Sheppo  *
43171ae08745Sheppo  * When a multicast packet is being switched we use the address as a key
43181ae08745Sheppo  * into the hash table, and then walk the appropriate port list forwarding
43191ae08745Sheppo  * the pkt to each port in turn.
43201ae08745Sheppo  *
43211ae08745Sheppo  * If a vnet is no longer interested in a particular multicast grouping
43221ae08745Sheppo  * we simply find the correct location in the hash table and then delete
43231ae08745Sheppo  * the relevant port from the port list.
43241ae08745Sheppo  *
43251ae08745Sheppo  * To deal with the case whereby a port is being deleted without first
43261ae08745Sheppo  * removing itself from the lists in the hash table, we maintain a list
43271ae08745Sheppo  * of multicast addresses the port has registered an interest in, within
43281ae08745Sheppo  * the port structure itself. We then simply walk that list of addresses
43291ae08745Sheppo  * using them as keys into the hash table and remove the port from the
43301ae08745Sheppo  * appropriate lists.
43311ae08745Sheppo  */
43321ae08745Sheppo static void
43331ae08745Sheppo vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt)
43341ae08745Sheppo {
43351ae08745Sheppo 	vnet_mcast_msg_t	*mcst_pkt;
43361ae08745Sheppo 	vsw_port_t		*port = ldcp->ldc_port;
43371ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
43381ae08745Sheppo 	int			i;
43391ae08745Sheppo 
43401ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
43411ae08745Sheppo 
43421ae08745Sheppo 	/*
43431ae08745Sheppo 	 * We know this is a ctrl/mcast packet so
43441ae08745Sheppo 	 * cast it into the correct structure.
43451ae08745Sheppo 	 */
43461ae08745Sheppo 	mcst_pkt = (vnet_mcast_msg_t *)pkt;
43471ae08745Sheppo 
43481ae08745Sheppo 	switch (mcst_pkt->tag.vio_subtype) {
43491ae08745Sheppo 	case VIO_SUBTYPE_INFO:
43501ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
43511ae08745Sheppo 
43521ae08745Sheppo 		/*
43531ae08745Sheppo 		 * Check if in correct state to receive a multicast
43541ae08745Sheppo 		 * message (i.e. handshake complete). If not reset
43551ae08745Sheppo 		 * the handshake.
43561ae08745Sheppo 		 */
43571ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV))
43581ae08745Sheppo 			return;
43591ae08745Sheppo 
43601ae08745Sheppo 		/*
43611ae08745Sheppo 		 * Before attempting to add or remove address check
43621ae08745Sheppo 		 * that they are valid multicast addresses.
43631ae08745Sheppo 		 * If not, then NACK back.
43641ae08745Sheppo 		 */
43651ae08745Sheppo 		for (i = 0; i < mcst_pkt->count; i++) {
43661ae08745Sheppo 			if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) {
43671ae08745Sheppo 				DERR(vswp, "%s: invalid multicast address",
43681ae08745Sheppo 								__func__);
43691ae08745Sheppo 				SND_MCST_NACK(ldcp, mcst_pkt);
43701ae08745Sheppo 				return;
43711ae08745Sheppo 			}
43721ae08745Sheppo 		}
43731ae08745Sheppo 
43741ae08745Sheppo 		/*
43751ae08745Sheppo 		 * Now add/remove the addresses. If this fails we
43761ae08745Sheppo 		 * NACK back.
43771ae08745Sheppo 		 */
43781ae08745Sheppo 		if (vsw_add_rem_mcst(mcst_pkt, port) != 0) {
43791ae08745Sheppo 			SND_MCST_NACK(ldcp, mcst_pkt);
43801ae08745Sheppo 			return;
43811ae08745Sheppo 		}
43821ae08745Sheppo 
43831ae08745Sheppo 		mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
43841ae08745Sheppo 		mcst_pkt->tag.vio_sid = ldcp->local_session;
43851ae08745Sheppo 
43861ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt);
43871ae08745Sheppo 
43881ae08745Sheppo 		vsw_send_msg(ldcp, (void *)mcst_pkt,
43891ae08745Sheppo 					sizeof (vnet_mcast_msg_t));
43901ae08745Sheppo 		break;
43911ae08745Sheppo 
43921ae08745Sheppo 	case VIO_SUBTYPE_ACK:
43931ae08745Sheppo 		DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
43941ae08745Sheppo 
43951ae08745Sheppo 		/*
43961ae08745Sheppo 		 * We shouldn't ever get a multicast ACK message as
43971ae08745Sheppo 		 * at the moment we never request multicast addresses
43981ae08745Sheppo 		 * to be set on some other device. This may change in
43991ae08745Sheppo 		 * the future if we have cascading switches.
44001ae08745Sheppo 		 */
44011ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV))
44021ae08745Sheppo 			return;
44031ae08745Sheppo 
44041ae08745Sheppo 				/* Do nothing */
44051ae08745Sheppo 		break;
44061ae08745Sheppo 
44071ae08745Sheppo 	case VIO_SUBTYPE_NACK:
44081ae08745Sheppo 		DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
44091ae08745Sheppo 
44101ae08745Sheppo 		/*
44111ae08745Sheppo 		 * We shouldn't get a multicast NACK packet for the
44121ae08745Sheppo 		 * same reasons as we shouldn't get a ACK packet.
44131ae08745Sheppo 		 */
44141ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV))
44151ae08745Sheppo 			return;
44161ae08745Sheppo 
44171ae08745Sheppo 				/* Do nothing */
44181ae08745Sheppo 		break;
44191ae08745Sheppo 
44201ae08745Sheppo 	default:
44211ae08745Sheppo 		DERR(vswp, "%s: unknown vio_subtype %x\n", __func__,
44221ae08745Sheppo 			mcst_pkt->tag.vio_subtype);
44231ae08745Sheppo 	}
44241ae08745Sheppo 
44251ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
44261ae08745Sheppo }
44271ae08745Sheppo 
44281ae08745Sheppo static void
44291ae08745Sheppo vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt)
44301ae08745Sheppo {
44311ae08745Sheppo 	vio_rdx_msg_t	*rdx_pkt;
44321ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
44331ae08745Sheppo 
44341ae08745Sheppo 	/*
44351ae08745Sheppo 	 * We know this is a ctrl/rdx packet so
44361ae08745Sheppo 	 * cast it into the correct structure.
44371ae08745Sheppo 	 */
44381ae08745Sheppo 	rdx_pkt = (vio_rdx_msg_t *)pkt;
44391ae08745Sheppo 
44401ae08745Sheppo 	D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id);
44411ae08745Sheppo 
44421ae08745Sheppo 	switch (rdx_pkt->tag.vio_subtype) {
44431ae08745Sheppo 	case VIO_SUBTYPE_INFO:
44441ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
44451ae08745Sheppo 
44461ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_INFO_RECV))
44471ae08745Sheppo 			return;
44481ae08745Sheppo 
44491ae08745Sheppo 		rdx_pkt->tag.vio_sid = ldcp->local_session;
44501ae08745Sheppo 		rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
44511ae08745Sheppo 
44521ae08745Sheppo 		DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt);
44531ae08745Sheppo 
44541ae08745Sheppo 		ldcp->lane_in.lstate |= VSW_RDX_ACK_SENT;
44551ae08745Sheppo 
44561ae08745Sheppo 		vsw_send_msg(ldcp, (void *)rdx_pkt,
44571ae08745Sheppo 				sizeof (vio_rdx_msg_t));
44581ae08745Sheppo 
44591ae08745Sheppo 		vsw_next_milestone(ldcp);
44601ae08745Sheppo 		break;
44611ae08745Sheppo 
44621ae08745Sheppo 	case VIO_SUBTYPE_ACK:
44631ae08745Sheppo 		/*
44641ae08745Sheppo 		 * Should be handled in-band by callback handler.
44651ae08745Sheppo 		 */
44661ae08745Sheppo 		DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__);
44671ae08745Sheppo 		vsw_restart_handshake(ldcp);
44681ae08745Sheppo 		break;
44691ae08745Sheppo 
44701ae08745Sheppo 	case VIO_SUBTYPE_NACK:
44711ae08745Sheppo 		D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
44721ae08745Sheppo 
44731ae08745Sheppo 		if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_NACK_RECV))
44741ae08745Sheppo 			return;
44751ae08745Sheppo 
44761ae08745Sheppo 		ldcp->lane_out.lstate |= VSW_RDX_NACK_RECV;
44771ae08745Sheppo 		vsw_next_milestone(ldcp);
44781ae08745Sheppo 		break;
44791ae08745Sheppo 
44801ae08745Sheppo 	default:
44811ae08745Sheppo 		DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__,
44821ae08745Sheppo 			rdx_pkt->tag.vio_subtype);
44831ae08745Sheppo 	}
44841ae08745Sheppo 
44851ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
44861ae08745Sheppo }
44871ae08745Sheppo 
44881ae08745Sheppo static void
44891ae08745Sheppo vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag)
44901ae08745Sheppo {
44911ae08745Sheppo 	uint16_t	env = tag.vio_subtype_env;
44921ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
44931ae08745Sheppo 
44941ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
44951ae08745Sheppo 
44961ae08745Sheppo 	/* session id check */
44971ae08745Sheppo 	if (ldcp->session_status & VSW_PEER_SESSION) {
44981ae08745Sheppo 		if (ldcp->peer_session != tag.vio_sid) {
44991ae08745Sheppo 			DERR(vswp, "%s (chan %d): invalid session id (%llx)",
45001ae08745Sheppo 				__func__, ldcp->ldc_id, tag.vio_sid);
45011ae08745Sheppo 			vsw_restart_handshake(ldcp);
45021ae08745Sheppo 			return;
45031ae08745Sheppo 		}
45041ae08745Sheppo 	}
45051ae08745Sheppo 
45061ae08745Sheppo 	/*
45071ae08745Sheppo 	 * It is an error for us to be getting data packets
45081ae08745Sheppo 	 * before the handshake has completed.
45091ae08745Sheppo 	 */
45101ae08745Sheppo 	if (ldcp->hphase != VSW_MILESTONE4) {
45111ae08745Sheppo 		DERR(vswp, "%s: got data packet before handshake complete "
45121ae08745Sheppo 			"hphase %d (%x: %x)", __func__, ldcp->hphase,
45131ae08745Sheppo 			ldcp->lane_in.lstate, ldcp->lane_out.lstate);
45141ae08745Sheppo 		DUMP_FLAGS(ldcp->lane_in.lstate);
45151ae08745Sheppo 		DUMP_FLAGS(ldcp->lane_out.lstate);
45161ae08745Sheppo 		vsw_restart_handshake(ldcp);
45171ae08745Sheppo 		return;
45181ae08745Sheppo 	}
45191ae08745Sheppo 
45201ae08745Sheppo 	/*
45211ae08745Sheppo 	 * Switch on vio_subtype envelope, then let lower routines
45221ae08745Sheppo 	 * decide if its an INFO, ACK or NACK packet.
45231ae08745Sheppo 	 */
45241ae08745Sheppo 	if (env == VIO_DRING_DATA) {
45251ae08745Sheppo 		vsw_process_data_dring_pkt(ldcp, dpkt);
45261ae08745Sheppo 	} else if (env == VIO_PKT_DATA) {
45271ae08745Sheppo 		vsw_process_data_raw_pkt(ldcp, dpkt);
45281ae08745Sheppo 	} else if (env == VIO_DESC_DATA) {
45291ae08745Sheppo 		vsw_process_data_ibnd_pkt(ldcp, dpkt);
45301ae08745Sheppo 	} else {
45311ae08745Sheppo 		DERR(vswp, "%s : unknown vio_subtype_env (%x)\n",
45321ae08745Sheppo 							__func__, env);
45331ae08745Sheppo 	}
45341ae08745Sheppo 
45351ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
45361ae08745Sheppo }
45371ae08745Sheppo 
45381ae08745Sheppo #define	SND_DRING_NACK(ldcp, pkt) \
45391ae08745Sheppo 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
45401ae08745Sheppo 	pkt->tag.vio_sid = ldcp->local_session; \
45411ae08745Sheppo 	vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_dring_msg_t));
45421ae08745Sheppo 
45431ae08745Sheppo static void
45441ae08745Sheppo vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt)
45451ae08745Sheppo {
45461ae08745Sheppo 	vio_dring_msg_t		*dring_pkt;
45471ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
45481ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
45491ae08745Sheppo 	dring_info_t		*dp = NULL;
45501ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
45511ae08745Sheppo 	mblk_t			*mp = NULL;
45521ae08745Sheppo 	mblk_t			*bp = NULL;
45531ae08745Sheppo 	mblk_t			*bpt = NULL;
45541ae08745Sheppo 	size_t			nbytes = 0;
45551ae08745Sheppo 	size_t			off = 0;
45561ae08745Sheppo 	uint64_t		ncookies = 0;
45571ae08745Sheppo 	uint64_t		chain = 0;
4558d10e4ef2Snarayan 	uint64_t		j, len;
4559d10e4ef2Snarayan 	uint32_t		pos, start, datalen;
4560d10e4ef2Snarayan 	uint32_t		range_start, range_end;
4561d10e4ef2Snarayan 	int32_t			end, num, cnt = 0;
4562d10e4ef2Snarayan 	int			i, rv;
45631ae08745Sheppo 	boolean_t		ack_needed = B_FALSE;
4564d10e4ef2Snarayan 	boolean_t		prev_desc_ack = B_FALSE;
4565d10e4ef2Snarayan 	int			read_attempts = 0;
45661ae08745Sheppo 
45671ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
45681ae08745Sheppo 
45691ae08745Sheppo 	/*
45701ae08745Sheppo 	 * We know this is a data/dring packet so
45711ae08745Sheppo 	 * cast it into the correct structure.
45721ae08745Sheppo 	 */
45731ae08745Sheppo 	dring_pkt = (vio_dring_msg_t *)dpkt;
45741ae08745Sheppo 
45751ae08745Sheppo 	/*
45761ae08745Sheppo 	 * Switch on the vio_subtype. If its INFO then we need to
45771ae08745Sheppo 	 * process the data. If its an ACK we need to make sure
45781ae08745Sheppo 	 * it makes sense (i.e did we send an earlier data/info),
45791ae08745Sheppo 	 * and if its a NACK then we maybe attempt a retry.
45801ae08745Sheppo 	 */
45811ae08745Sheppo 	switch (dring_pkt->tag.vio_subtype) {
45821ae08745Sheppo 	case VIO_SUBTYPE_INFO:
45831ae08745Sheppo 		D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id);
45841ae08745Sheppo 
45851ae08745Sheppo 		if ((dp = vsw_ident2dring(&ldcp->lane_in,
45861ae08745Sheppo 				dring_pkt->dring_ident)) == NULL) {
45871ae08745Sheppo 
45881ae08745Sheppo 			DERR(vswp, "%s(%lld): unable to find dring from "
45891ae08745Sheppo 				"ident 0x%llx", __func__, ldcp->ldc_id,
45901ae08745Sheppo 				dring_pkt->dring_ident);
45911ae08745Sheppo 
45921ae08745Sheppo 			SND_DRING_NACK(ldcp, dring_pkt);
45931ae08745Sheppo 			return;
45941ae08745Sheppo 		}
45951ae08745Sheppo 
4596d10e4ef2Snarayan 		start = pos = dring_pkt->start_idx;
45971ae08745Sheppo 		end = dring_pkt->end_idx;
4598d10e4ef2Snarayan 		len = dp->num_descriptors;
45991ae08745Sheppo 
4600d10e4ef2Snarayan 		range_start = range_end = pos;
4601d10e4ef2Snarayan 
4602d10e4ef2Snarayan 		D2(vswp, "%s(%lld): start index %ld : end %ld\n",
46031ae08745Sheppo 			__func__, ldcp->ldc_id, start, end);
46041ae08745Sheppo 
4605d10e4ef2Snarayan 		if (end == -1) {
4606d10e4ef2Snarayan 			num = -1;
4607d10e4ef2Snarayan 		} else if (num >= 0) {
4608d10e4ef2Snarayan 			num = end >= pos ?
4609d10e4ef2Snarayan 				end - pos + 1: (len - pos + 1) + end;
4610d10e4ef2Snarayan 
46111ae08745Sheppo 			/* basic sanity check */
46121ae08745Sheppo 			if (end > len) {
4613d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): endpoint %lld outside "
4614d10e4ef2Snarayan 					"ring length %lld", __func__,
4615d10e4ef2Snarayan 					ldcp->ldc_id, end, len);
46161ae08745Sheppo 
46171ae08745Sheppo 				SND_DRING_NACK(ldcp, dring_pkt);
46181ae08745Sheppo 				return;
46191ae08745Sheppo 			}
4620d10e4ef2Snarayan 		} else {
4621d10e4ef2Snarayan 			DERR(vswp, "%s(%lld): invalid endpoint %lld",
4622d10e4ef2Snarayan 				__func__, ldcp->ldc_id, end);
4623d10e4ef2Snarayan 			SND_DRING_NACK(ldcp, dring_pkt);
46241ae08745Sheppo 			return;
46251ae08745Sheppo 		}
46261ae08745Sheppo 
4627d10e4ef2Snarayan 		while (cnt != num) {
4628d10e4ef2Snarayan vsw_recheck_desc:
4629d10e4ef2Snarayan 			if ((rv = ldc_mem_dring_acquire(dp->handle,
4630d10e4ef2Snarayan 							pos, pos)) != 0) {
4631d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): unable to acquire "
4632d10e4ef2Snarayan 					"descriptor at pos %d: err %d",
4633d10e4ef2Snarayan 					__func__, pos, ldcp->ldc_id, rv);
4634d10e4ef2Snarayan 				SND_DRING_NACK(ldcp, dring_pkt);
4635d10e4ef2Snarayan 				return;
4636d10e4ef2Snarayan 			}
46371ae08745Sheppo 
4638d10e4ef2Snarayan 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos;
46391ae08745Sheppo 
4640d10e4ef2Snarayan 			/*
4641d10e4ef2Snarayan 			 * When given a bounded range of descriptors
4642d10e4ef2Snarayan 			 * to process, its an error to hit a descriptor
4643d10e4ef2Snarayan 			 * which is not ready. In the non-bounded case
4644d10e4ef2Snarayan 			 * (end_idx == -1) this simply indicates we have
4645d10e4ef2Snarayan 			 * reached the end of the current active range.
4646d10e4ef2Snarayan 			 */
4647d10e4ef2Snarayan 			if (pub_addr->hdr.dstate != VIO_DESC_READY) {
4648d10e4ef2Snarayan 				/* unbound - no error */
4649d10e4ef2Snarayan 				if (end == -1) {
4650d10e4ef2Snarayan 					if (read_attempts == vsw_read_attempts)
4651d10e4ef2Snarayan 						break;
46521ae08745Sheppo 
4653d10e4ef2Snarayan 					delay(drv_usectohz(vsw_desc_delay));
4654d10e4ef2Snarayan 					read_attempts++;
4655d10e4ef2Snarayan 					goto vsw_recheck_desc;
4656d10e4ef2Snarayan 				}
46571ae08745Sheppo 
4658d10e4ef2Snarayan 				/* bounded - error - so NACK back */
4659d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): descriptor not READY "
4660d10e4ef2Snarayan 					"(%d)", __func__, ldcp->ldc_id,
4661d10e4ef2Snarayan 					pub_addr->hdr.dstate);
4662d10e4ef2Snarayan 				SND_DRING_NACK(ldcp, dring_pkt);
4663d10e4ef2Snarayan 				return;
4664d10e4ef2Snarayan 			}
4665d10e4ef2Snarayan 
4666d10e4ef2Snarayan 			DTRACE_PROBE1(read_attempts, int, read_attempts);
4667d10e4ef2Snarayan 
4668d10e4ef2Snarayan 			range_end = pos;
4669d10e4ef2Snarayan 
4670d10e4ef2Snarayan 			/*
4671d10e4ef2Snarayan 			 * If we ACK'd the previous descriptor then now
4672d10e4ef2Snarayan 			 * record the new range start position for later
4673d10e4ef2Snarayan 			 * ACK's.
4674d10e4ef2Snarayan 			 */
4675d10e4ef2Snarayan 			if (prev_desc_ack) {
4676d10e4ef2Snarayan 				range_start = pos;
4677d10e4ef2Snarayan 
4678d10e4ef2Snarayan 				D2(vswp, "%s(%lld): updating range start "
4679d10e4ef2Snarayan 					"to be %d", __func__, ldcp->ldc_id,
4680d10e4ef2Snarayan 					range_start);
4681d10e4ef2Snarayan 
4682d10e4ef2Snarayan 				prev_desc_ack = B_FALSE;
4683d10e4ef2Snarayan 			}
46841ae08745Sheppo 
46851ae08745Sheppo 			/*
46861ae08745Sheppo 			 * Data is padded to align on 8 byte boundary,
46871ae08745Sheppo 			 * datalen is actual data length, i.e. minus that
46881ae08745Sheppo 			 * padding.
46891ae08745Sheppo 			 */
46901ae08745Sheppo 			datalen = pub_addr->nbytes;
46911ae08745Sheppo 
46921ae08745Sheppo 			/*
46931ae08745Sheppo 			 * Does peer wish us to ACK when we have finished
46941ae08745Sheppo 			 * with this descriptor ?
46951ae08745Sheppo 			 */
46961ae08745Sheppo 			if (pub_addr->hdr.ack)
46971ae08745Sheppo 				ack_needed = B_TRUE;
46981ae08745Sheppo 
46991ae08745Sheppo 			D2(vswp, "%s(%lld): processing desc %lld at pos"
47001ae08745Sheppo 				" 0x%llx : dstate 0x%lx : datalen 0x%lx",
4701d10e4ef2Snarayan 				__func__, ldcp->ldc_id, pos, pub_addr,
47021ae08745Sheppo 				pub_addr->hdr.dstate, datalen);
47031ae08745Sheppo 
47041ae08745Sheppo 			/*
47051ae08745Sheppo 			 * Mark that we are starting to process descriptor.
47061ae08745Sheppo 			 */
47071ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_ACCEPTED;
47081ae08745Sheppo 
4709d10e4ef2Snarayan 			mp = vio_allocb(ldcp->rxh);
4710d10e4ef2Snarayan 			if (mp == NULL) {
47111ae08745Sheppo 				/*
4712d10e4ef2Snarayan 				 * No free receive buffers available, so
4713d10e4ef2Snarayan 				 * fallback onto allocb(9F). Make sure that
4714d10e4ef2Snarayan 				 * we get a data buffer which is a multiple
4715d10e4ef2Snarayan 				 * of 8 as this is required by ldc_mem_copy.
47161ae08745Sheppo 				 */
4717d10e4ef2Snarayan 				DTRACE_PROBE(allocb);
4718d10e4ef2Snarayan 				mp = allocb(datalen + VNET_IPALIGN + 8,
4719d10e4ef2Snarayan 								BPRI_MED);
4720d10e4ef2Snarayan 			}
4721d10e4ef2Snarayan 
4722d10e4ef2Snarayan 			/*
4723d10e4ef2Snarayan 			 * Ensure that we ask ldc for an aligned
4724d10e4ef2Snarayan 			 * number of bytes.
4725d10e4ef2Snarayan 			 */
4726d10e4ef2Snarayan 			nbytes = datalen + VNET_IPALIGN;
47271ae08745Sheppo 			if (nbytes & 0x7) {
47281ae08745Sheppo 				off = 8 - (nbytes & 0x7);
47291ae08745Sheppo 				nbytes += off;
47301ae08745Sheppo 			}
47311ae08745Sheppo 
47321ae08745Sheppo 			ncookies = pub_addr->ncookies;
47331ae08745Sheppo 			rv = ldc_mem_copy(ldcp->ldc_handle,
47341ae08745Sheppo 				(caddr_t)mp->b_rptr, 0, &nbytes,
47351ae08745Sheppo 				pub_addr->memcookie, ncookies,
47361ae08745Sheppo 				LDC_COPY_IN);
47371ae08745Sheppo 
47381ae08745Sheppo 			if (rv != 0) {
47391ae08745Sheppo 				DERR(vswp, "%s(%d): unable to copy in "
4740d10e4ef2Snarayan 					"data from %d cookies in desc %d"
4741d10e4ef2Snarayan 					" (rv %d)", __func__, ldcp->ldc_id,
4742d10e4ef2Snarayan 					ncookies, pos, rv);
47431ae08745Sheppo 				freemsg(mp);
4744d10e4ef2Snarayan 
4745d10e4ef2Snarayan 				pub_addr->hdr.dstate = VIO_DESC_DONE;
47461ae08745Sheppo 				(void) ldc_mem_dring_release(dp->handle,
4747d10e4ef2Snarayan 								pos, pos);
4748d10e4ef2Snarayan 				break;
47491ae08745Sheppo 			} else {
47501ae08745Sheppo 				D2(vswp, "%s(%d): copied in %ld bytes"
47511ae08745Sheppo 					" using %d cookies", __func__,
47521ae08745Sheppo 					ldcp->ldc_id, nbytes, ncookies);
47531ae08745Sheppo 			}
47541ae08745Sheppo 
4755d10e4ef2Snarayan 			/* adjust the read pointer to skip over the padding */
4756d10e4ef2Snarayan 			mp->b_rptr += VNET_IPALIGN;
4757d10e4ef2Snarayan 
47581ae08745Sheppo 			/* point to the actual end of data */
47591ae08745Sheppo 			mp->b_wptr = mp->b_rptr + datalen;
47601ae08745Sheppo 
47611ae08745Sheppo 			/* build a chain of received packets */
47621ae08745Sheppo 			if (bp == NULL) {
47631ae08745Sheppo 				/* first pkt */
47641ae08745Sheppo 				bp = mp;
47651ae08745Sheppo 				bp->b_next = bp->b_prev = NULL;
47661ae08745Sheppo 				bpt = bp;
47671ae08745Sheppo 				chain = 1;
47681ae08745Sheppo 			} else {
47691ae08745Sheppo 				mp->b_next = NULL;
47701ae08745Sheppo 				mp->b_prev = bpt;
47711ae08745Sheppo 				bpt->b_next = mp;
47721ae08745Sheppo 				bpt = mp;
47731ae08745Sheppo 				chain++;
47741ae08745Sheppo 			}
47751ae08745Sheppo 
47761ae08745Sheppo 			/* mark we are finished with this descriptor */
47771ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_DONE;
47781ae08745Sheppo 
4779d10e4ef2Snarayan 			(void) ldc_mem_dring_release(dp->handle, pos, pos);
4780d10e4ef2Snarayan 
47811ae08745Sheppo 			/*
4782d10e4ef2Snarayan 			 * Send an ACK back to peer if requested.
47831ae08745Sheppo 			 */
47841ae08745Sheppo 			if (ack_needed) {
47851ae08745Sheppo 				ack_needed = B_FALSE;
47861ae08745Sheppo 
4787d10e4ef2Snarayan 				dring_pkt->start_idx = range_start;
4788d10e4ef2Snarayan 				dring_pkt->end_idx = range_end;
47891ae08745Sheppo 
4790d10e4ef2Snarayan 				DERR(vswp, "%s(%lld): processed %d %d, ACK"
4791d10e4ef2Snarayan 					" requested", __func__, ldcp->ldc_id,
4792d10e4ef2Snarayan 					dring_pkt->start_idx,
4793d10e4ef2Snarayan 					dring_pkt->end_idx);
47941ae08745Sheppo 
4795d10e4ef2Snarayan 				dring_pkt->dring_process_state = VIO_DP_ACTIVE;
47961ae08745Sheppo 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
47971ae08745Sheppo 				dring_pkt->tag.vio_sid = ldcp->local_session;
47981ae08745Sheppo 				vsw_send_msg(ldcp, (void *)dring_pkt,
47991ae08745Sheppo 					sizeof (vio_dring_msg_t));
4800d10e4ef2Snarayan 
4801d10e4ef2Snarayan 				prev_desc_ack = B_TRUE;
4802d10e4ef2Snarayan 				range_start = pos;
48031ae08745Sheppo 			}
48041ae08745Sheppo 
4805d10e4ef2Snarayan 			/* next descriptor */
4806d10e4ef2Snarayan 			pos = (pos + 1) % len;
4807d10e4ef2Snarayan 			cnt++;
4808d10e4ef2Snarayan 
4809d10e4ef2Snarayan 			/*
4810d10e4ef2Snarayan 			 * Break out of loop here and stop processing to
4811d10e4ef2Snarayan 			 * allow some other network device (or disk) to
4812d10e4ef2Snarayan 			 * get access to the cpu.
4813d10e4ef2Snarayan 			 */
4814d10e4ef2Snarayan 			/* send the chain of packets to be switched */
4815d10e4ef2Snarayan 			if (chain > vsw_chain_len) {
4816d10e4ef2Snarayan 				D3(vswp, "%s(%lld): switching chain of %d "
4817d10e4ef2Snarayan 					"msgs", __func__, ldcp->ldc_id, chain);
4818d10e4ef2Snarayan 				vsw_switch_frame(vswp, bp, VSW_VNETPORT,
4819d10e4ef2Snarayan 							ldcp->ldc_port, NULL);
4820d10e4ef2Snarayan 				bp = NULL;
4821d10e4ef2Snarayan 				break;
48221ae08745Sheppo 			}
48231ae08745Sheppo 		}
48241ae08745Sheppo 
48251ae08745Sheppo 		/* send the chain of packets to be switched */
4826d10e4ef2Snarayan 		if (bp != NULL) {
4827d10e4ef2Snarayan 			D3(vswp, "%s(%lld): switching chain of %d msgs",
4828d10e4ef2Snarayan 					__func__, ldcp->ldc_id, chain);
48291ae08745Sheppo 			vsw_switch_frame(vswp, bp, VSW_VNETPORT,
48301ae08745Sheppo 							ldcp->ldc_port, NULL);
4831d10e4ef2Snarayan 		}
48321ae08745Sheppo 
4833d10e4ef2Snarayan 		DTRACE_PROBE1(msg_cnt, int, cnt);
4834d10e4ef2Snarayan 
4835d10e4ef2Snarayan 		/*
4836d10e4ef2Snarayan 		 * We are now finished so ACK back with the state
4837d10e4ef2Snarayan 		 * set to STOPPING so our peer knows we are finished
4838d10e4ef2Snarayan 		 */
4839d10e4ef2Snarayan 		dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK;
4840d10e4ef2Snarayan 		dring_pkt->tag.vio_sid = ldcp->local_session;
4841d10e4ef2Snarayan 
4842d10e4ef2Snarayan 		dring_pkt->dring_process_state = VIO_DP_STOPPED;
4843d10e4ef2Snarayan 
4844d10e4ef2Snarayan 		DTRACE_PROBE(stop_process_sent);
4845d10e4ef2Snarayan 
4846d10e4ef2Snarayan 		/*
4847d10e4ef2Snarayan 		 * We have not processed any more descriptors beyond
4848d10e4ef2Snarayan 		 * the last one we ACK'd.
4849d10e4ef2Snarayan 		 */
4850d10e4ef2Snarayan 		if (prev_desc_ack)
4851d10e4ef2Snarayan 			range_start = range_end;
4852d10e4ef2Snarayan 
4853d10e4ef2Snarayan 		dring_pkt->start_idx = range_start;
4854d10e4ef2Snarayan 		dring_pkt->end_idx = range_end;
4855d10e4ef2Snarayan 
4856d10e4ef2Snarayan 		D2(vswp, "%s(%lld) processed : %d : %d, now stopping",
4857d10e4ef2Snarayan 			__func__, ldcp->ldc_id, dring_pkt->start_idx,
4858d10e4ef2Snarayan 			dring_pkt->end_idx);
4859d10e4ef2Snarayan 
4860d10e4ef2Snarayan 		vsw_send_msg(ldcp, (void *)dring_pkt,
4861d10e4ef2Snarayan 					sizeof (vio_dring_msg_t));
48621ae08745Sheppo 		break;
48631ae08745Sheppo 
48641ae08745Sheppo 	case VIO_SUBTYPE_ACK:
48651ae08745Sheppo 		D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id);
48661ae08745Sheppo 		/*
48671ae08745Sheppo 		 * Verify that the relevant descriptors are all
48681ae08745Sheppo 		 * marked as DONE
48691ae08745Sheppo 		 */
48701ae08745Sheppo 		if ((dp = vsw_ident2dring(&ldcp->lane_out,
48711ae08745Sheppo 			dring_pkt->dring_ident)) == NULL) {
48721ae08745Sheppo 			DERR(vswp, "%s: unknown ident in ACK", __func__);
48731ae08745Sheppo 			return;
48741ae08745Sheppo 		}
48751ae08745Sheppo 
48761ae08745Sheppo 		pub_addr = (vnet_public_desc_t *)dp->pub_addr;
48771ae08745Sheppo 		priv_addr = (vsw_private_desc_t *)dp->priv_addr;
48781ae08745Sheppo 
48791ae08745Sheppo 		start = end = 0;
48801ae08745Sheppo 		start = dring_pkt->start_idx;
48811ae08745Sheppo 		end = dring_pkt->end_idx;
48821ae08745Sheppo 		len = dp->num_descriptors;
48831ae08745Sheppo 
48841ae08745Sheppo 		j = num = 0;
48851ae08745Sheppo 		/* calculate # descriptors taking into a/c wrap around */
48861ae08745Sheppo 		num = end >= start ? end - start + 1: (len - start + 1) + end;
48871ae08745Sheppo 
48881ae08745Sheppo 		D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n",
48891ae08745Sheppo 			__func__, ldcp->ldc_id, start, end, num);
48901ae08745Sheppo 
4891d10e4ef2Snarayan 		mutex_enter(&dp->dlock);
4892d10e4ef2Snarayan 		dp->last_ack_recv = end;
4893d10e4ef2Snarayan 		mutex_exit(&dp->dlock);
4894d10e4ef2Snarayan 
48951ae08745Sheppo 		for (i = start; j < num; i = (i + 1) % len, j++) {
48961ae08745Sheppo 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
48971ae08745Sheppo 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
48981ae08745Sheppo 
4899d10e4ef2Snarayan 			/*
4900d10e4ef2Snarayan 			 * If the last descriptor in a range has the ACK
4901d10e4ef2Snarayan 			 * bit set then we will get two messages from our
4902d10e4ef2Snarayan 			 * peer relating to it. The normal ACK msg and then
4903d10e4ef2Snarayan 			 * a subsequent STOP msg. The first message will have
4904d10e4ef2Snarayan 			 * resulted in the descriptor being reclaimed and
4905d10e4ef2Snarayan 			 * its state set to FREE so when we encounter a non
4906d10e4ef2Snarayan 			 * DONE descriptor we need to check to see if its
4907d10e4ef2Snarayan 			 * because we have just reclaimed it.
4908d10e4ef2Snarayan 			 */
4909d10e4ef2Snarayan 			mutex_enter(&priv_addr->dstate_lock);
4910d10e4ef2Snarayan 			if (pub_addr->hdr.dstate == VIO_DESC_DONE) {
49111ae08745Sheppo 				/* clear all the fields */
49121ae08745Sheppo 				bzero(priv_addr->datap, priv_addr->datalen);
49131ae08745Sheppo 				priv_addr->datalen = 0;
49141ae08745Sheppo 
49151ae08745Sheppo 				pub_addr->hdr.dstate = VIO_DESC_FREE;
49161ae08745Sheppo 				pub_addr->hdr.ack = 0;
4917d10e4ef2Snarayan 
49181ae08745Sheppo 				priv_addr->dstate = VIO_DESC_FREE;
4919d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
49201ae08745Sheppo 
49211ae08745Sheppo 				D3(vswp, "clearing descp %d : pub state "
49221ae08745Sheppo 					"0x%llx : priv state 0x%llx", i,
49231ae08745Sheppo 					pub_addr->hdr.dstate,
49241ae08745Sheppo 					priv_addr->dstate);
4925d10e4ef2Snarayan 
4926d10e4ef2Snarayan 			} else {
4927d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
4928d10e4ef2Snarayan 
4929d10e4ef2Snarayan 				if (dring_pkt->dring_process_state !=
4930d10e4ef2Snarayan 							VIO_DP_STOPPED) {
4931d10e4ef2Snarayan 					DERR(vswp, "%s: descriptor %lld at pos "
4932d10e4ef2Snarayan 						" 0x%llx not DONE (0x%lx)\n",
4933d10e4ef2Snarayan 						__func__, i, pub_addr,
4934d10e4ef2Snarayan 						pub_addr->hdr.dstate);
4935d10e4ef2Snarayan 					return;
4936d10e4ef2Snarayan 				}
49371ae08745Sheppo 			}
49381ae08745Sheppo 		}
49391ae08745Sheppo 
4940d10e4ef2Snarayan 		/*
4941d10e4ef2Snarayan 		 * If our peer is stopping processing descriptors then
4942d10e4ef2Snarayan 		 * we check to make sure it has processed all the descriptors
4943d10e4ef2Snarayan 		 * we have updated. If not then we send it a new message
4944d10e4ef2Snarayan 		 * to prompt it to restart.
4945d10e4ef2Snarayan 		 */
4946d10e4ef2Snarayan 		if (dring_pkt->dring_process_state == VIO_DP_STOPPED) {
4947d10e4ef2Snarayan 			DTRACE_PROBE(stop_process_recv);
4948d10e4ef2Snarayan 			D2(vswp, "%s(%lld): got stopping msg : %d : %d",
4949d10e4ef2Snarayan 				__func__, ldcp->ldc_id, dring_pkt->start_idx,
4950d10e4ef2Snarayan 				dring_pkt->end_idx);
4951d10e4ef2Snarayan 
4952d10e4ef2Snarayan 			/*
4953d10e4ef2Snarayan 			 * Check next descriptor in public section of ring.
4954d10e4ef2Snarayan 			 * If its marked as READY then we need to prompt our
4955d10e4ef2Snarayan 			 * peer to start processing the ring again.
4956d10e4ef2Snarayan 			 */
4957d10e4ef2Snarayan 			i = (end + 1) % len;
4958d10e4ef2Snarayan 			pub_addr = (vnet_public_desc_t *)dp->pub_addr + i;
4959d10e4ef2Snarayan 			priv_addr = (vsw_private_desc_t *)dp->priv_addr + i;
4960d10e4ef2Snarayan 
4961d10e4ef2Snarayan 			/*
4962d10e4ef2Snarayan 			 * Hold the restart lock across all of this to
4963d10e4ef2Snarayan 			 * make sure that its not possible for us to
4964d10e4ef2Snarayan 			 * decide that a msg needs to be sent in the future
4965d10e4ef2Snarayan 			 * but the sending code having already checked is
4966d10e4ef2Snarayan 			 * about to exit.
4967d10e4ef2Snarayan 			 */
4968d10e4ef2Snarayan 			mutex_enter(&dp->restart_lock);
4969d10e4ef2Snarayan 			mutex_enter(&priv_addr->dstate_lock);
4970d10e4ef2Snarayan 			if (pub_addr->hdr.dstate == VIO_DESC_READY) {
4971d10e4ef2Snarayan 
4972d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
4973d10e4ef2Snarayan 
4974d10e4ef2Snarayan 				dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO;
4975d10e4ef2Snarayan 				dring_pkt->tag.vio_sid = ldcp->local_session;
4976d10e4ef2Snarayan 
4977d10e4ef2Snarayan 				mutex_enter(&ldcp->lane_out.seq_lock);
4978d10e4ef2Snarayan 				dring_pkt->seq_num = ldcp->lane_out.seq_num++;
4979d10e4ef2Snarayan 				mutex_exit(&ldcp->lane_out.seq_lock);
4980d10e4ef2Snarayan 
4981d10e4ef2Snarayan 				dring_pkt->start_idx = (end + 1) % len;
4982d10e4ef2Snarayan 				dring_pkt->end_idx = -1;
4983d10e4ef2Snarayan 
4984d10e4ef2Snarayan 				D2(vswp, "%s(%lld) : sending restart msg:"
4985d10e4ef2Snarayan 					" %d : %d", __func__, ldcp->ldc_id,
4986d10e4ef2Snarayan 					dring_pkt->start_idx,
4987d10e4ef2Snarayan 					dring_pkt->end_idx);
4988d10e4ef2Snarayan 
4989d10e4ef2Snarayan 				vsw_send_msg(ldcp, (void *)dring_pkt,
4990d10e4ef2Snarayan 						sizeof (vio_dring_msg_t));
4991d10e4ef2Snarayan 			} else {
4992d10e4ef2Snarayan 				mutex_exit(&priv_addr->dstate_lock);
4993d10e4ef2Snarayan 				dp->restart_reqd = B_TRUE;
4994d10e4ef2Snarayan 			}
4995d10e4ef2Snarayan 			mutex_exit(&dp->restart_lock);
4996d10e4ef2Snarayan 		}
49971ae08745Sheppo 		break;
49981ae08745Sheppo 
49991ae08745Sheppo 	case VIO_SUBTYPE_NACK:
50001ae08745Sheppo 		DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK",
50011ae08745Sheppo 						__func__, ldcp->ldc_id);
50021ae08745Sheppo 		/*
50031ae08745Sheppo 		 * Something is badly wrong if we are getting NACK's
50041ae08745Sheppo 		 * for our data pkts. So reset the channel.
50051ae08745Sheppo 		 */
50061ae08745Sheppo 		vsw_restart_handshake(ldcp);
50071ae08745Sheppo 
50081ae08745Sheppo 		break;
50091ae08745Sheppo 
50101ae08745Sheppo 	default:
50111ae08745Sheppo 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
50121ae08745Sheppo 			ldcp->ldc_id, dring_pkt->tag.vio_subtype);
50131ae08745Sheppo 	}
50141ae08745Sheppo 
50151ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
50161ae08745Sheppo }
50171ae08745Sheppo 
50181ae08745Sheppo /*
50191ae08745Sheppo  * VIO_PKT_DATA (a.k.a raw data mode )
50201ae08745Sheppo  *
50211ae08745Sheppo  * Note - currently not supported. Do nothing.
50221ae08745Sheppo  */
50231ae08745Sheppo static void
50241ae08745Sheppo vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt)
50251ae08745Sheppo {
50261ae08745Sheppo 	_NOTE(ARGUNUSED(dpkt))
50271ae08745Sheppo 
50281ae08745Sheppo 	D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
50291ae08745Sheppo 
50301ae08745Sheppo 	DERR(NULL, "%s (%lld): currently  not supported",
50311ae08745Sheppo 						__func__, ldcp->ldc_id);
50321ae08745Sheppo 
50331ae08745Sheppo 	D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
50341ae08745Sheppo }
50351ae08745Sheppo 
50361ae08745Sheppo #define	SND_IBND_DESC_NACK(ldcp, pkt) \
50371ae08745Sheppo 	pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \
50381ae08745Sheppo 	pkt->tag.vio_sid = ldcp->local_session; \
50391ae08745Sheppo 	vsw_send_msg(ldcp, (void *)pkt, sizeof (vio_ibnd_desc_t));
50401ae08745Sheppo 
50411ae08745Sheppo /*
50421ae08745Sheppo  * Process an in-band descriptor message (most likely from
50431ae08745Sheppo  * OBP).
50441ae08745Sheppo  */
50451ae08745Sheppo static void
50461ae08745Sheppo vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt)
50471ae08745Sheppo {
50481ae08745Sheppo 	vio_ibnd_desc_t		*ibnd_desc;
50491ae08745Sheppo 	dring_info_t		*dp = NULL;
50501ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
50511ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
50521ae08745Sheppo 	mblk_t			*mp = NULL;
50531ae08745Sheppo 	size_t			nbytes = 0;
50541ae08745Sheppo 	size_t			off = 0;
50551ae08745Sheppo 	uint64_t		idx = 0;
50561ae08745Sheppo 	uint32_t		datalen = 0;
50571ae08745Sheppo 	uint64_t		ncookies = 0;
50581ae08745Sheppo 	int			rv;
50591ae08745Sheppo 
50601ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
50611ae08745Sheppo 
50621ae08745Sheppo 	ibnd_desc = (vio_ibnd_desc_t *)pkt;
50631ae08745Sheppo 
50641ae08745Sheppo 	switch (ibnd_desc->hdr.tag.vio_subtype) {
50651ae08745Sheppo 	case VIO_SUBTYPE_INFO:
50661ae08745Sheppo 		D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__);
50671ae08745Sheppo 
50681ae08745Sheppo 		if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV))
50691ae08745Sheppo 			return;
50701ae08745Sheppo 
50711ae08745Sheppo 		/*
50721ae08745Sheppo 		 * Data is padded to align on a 8 byte boundary,
50731ae08745Sheppo 		 * nbytes is actual data length, i.e. minus that
50741ae08745Sheppo 		 * padding.
50751ae08745Sheppo 		 */
50761ae08745Sheppo 		datalen = ibnd_desc->nbytes;
50771ae08745Sheppo 
50781ae08745Sheppo 		D2(vswp, "%s(%lld): processing inband desc : "
50791ae08745Sheppo 			": datalen 0x%lx", __func__, ldcp->ldc_id, datalen);
50801ae08745Sheppo 
50811ae08745Sheppo 		ncookies = ibnd_desc->ncookies;
50821ae08745Sheppo 
50831ae08745Sheppo 		/*
50841ae08745Sheppo 		 * allocb(9F) returns an aligned data block. We
50851ae08745Sheppo 		 * need to ensure that we ask ldc for an aligned
50861ae08745Sheppo 		 * number of bytes also.
50871ae08745Sheppo 		 */
50881ae08745Sheppo 		nbytes = datalen;
50891ae08745Sheppo 		if (nbytes & 0x7) {
50901ae08745Sheppo 			off = 8 - (nbytes & 0x7);
50911ae08745Sheppo 			nbytes += off;
50921ae08745Sheppo 		}
50931ae08745Sheppo 
50941ae08745Sheppo 		mp = allocb(datalen, BPRI_MED);
50951ae08745Sheppo 		if (mp == NULL) {
50961ae08745Sheppo 			DERR(vswp, "%s(%lld): allocb failed",
50971ae08745Sheppo 					__func__, ldcp->ldc_id);
50981ae08745Sheppo 			return;
50991ae08745Sheppo 		}
51001ae08745Sheppo 
51011ae08745Sheppo 		rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr,
51021ae08745Sheppo 			0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies,
51031ae08745Sheppo 			LDC_COPY_IN);
51041ae08745Sheppo 
51051ae08745Sheppo 		if (rv != 0) {
51061ae08745Sheppo 			DERR(vswp, "%s(%d): unable to copy in data from "
51071ae08745Sheppo 				"%d cookie(s)", __func__,
51081ae08745Sheppo 				ldcp->ldc_id, ncookies);
51091ae08745Sheppo 			freemsg(mp);
51101ae08745Sheppo 			return;
51111ae08745Sheppo 		} else {
51121ae08745Sheppo 			D2(vswp, "%s(%d): copied in %ld bytes using %d "
51131ae08745Sheppo 				"cookies", __func__, ldcp->ldc_id, nbytes,
51141ae08745Sheppo 				ncookies);
51151ae08745Sheppo 		}
51161ae08745Sheppo 
51171ae08745Sheppo 		/* point to the actual end of data */
51181ae08745Sheppo 		mp->b_wptr = mp->b_rptr + datalen;
51191ae08745Sheppo 
51201ae08745Sheppo 		/*
51211ae08745Sheppo 		 * We ACK back every in-band descriptor message we process
51221ae08745Sheppo 		 */
51231ae08745Sheppo 		ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK;
51241ae08745Sheppo 		ibnd_desc->hdr.tag.vio_sid = ldcp->local_session;
51251ae08745Sheppo 		vsw_send_msg(ldcp, (void *)ibnd_desc,
51261ae08745Sheppo 				sizeof (vio_ibnd_desc_t));
51271ae08745Sheppo 
51281ae08745Sheppo 		/* send the packet to be switched */
51291ae08745Sheppo 		vsw_switch_frame(vswp, mp, VSW_VNETPORT,
51301ae08745Sheppo 					ldcp->ldc_port, NULL);
51311ae08745Sheppo 
51321ae08745Sheppo 		break;
51331ae08745Sheppo 
51341ae08745Sheppo 	case VIO_SUBTYPE_ACK:
51351ae08745Sheppo 		D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__);
51361ae08745Sheppo 
51371ae08745Sheppo 		/* Verify the ACK is valid */
51381ae08745Sheppo 		idx = ibnd_desc->hdr.desc_handle;
51391ae08745Sheppo 
51401ae08745Sheppo 		if (idx >= VSW_RING_NUM_EL) {
51411ae08745Sheppo 			cmn_err(CE_WARN, "%s: corrupted ACK received "
51421ae08745Sheppo 				"(idx %ld)", __func__, idx);
51431ae08745Sheppo 			return;
51441ae08745Sheppo 		}
51451ae08745Sheppo 
51461ae08745Sheppo 		if ((dp = ldcp->lane_out.dringp) == NULL) {
51471ae08745Sheppo 			DERR(vswp, "%s: no dring found", __func__);
51481ae08745Sheppo 			return;
51491ae08745Sheppo 		}
51501ae08745Sheppo 
51511ae08745Sheppo 		priv_addr = (vsw_private_desc_t *)dp->priv_addr;
51521ae08745Sheppo 
51531ae08745Sheppo 		/* move to correct location in ring */
51541ae08745Sheppo 		priv_addr += idx;
51551ae08745Sheppo 
51561ae08745Sheppo 		/*
51571ae08745Sheppo 		 * When we sent the in-band message to our peer we
51581ae08745Sheppo 		 * marked the copy in our private ring as READY. We now
51591ae08745Sheppo 		 * check that the descriptor we are being ACK'ed for is in
51601ae08745Sheppo 		 * fact READY, i.e. it is one we have shared with our peer.
51611ae08745Sheppo 		 */
5162d10e4ef2Snarayan 		mutex_enter(&priv_addr->dstate_lock);
51631ae08745Sheppo 		if (priv_addr->dstate != VIO_DESC_READY) {
5164d10e4ef2Snarayan 			mutex_exit(&priv_addr->dstate_lock);
51651ae08745Sheppo 			cmn_err(CE_WARN, "%s: (%ld) desc at index %ld not "
51661ae08745Sheppo 				"READY (0x%lx)", __func__, ldcp->ldc_id, idx,
51671ae08745Sheppo 				priv_addr->dstate);
51681ae08745Sheppo 			cmn_err(CE_CONT, "%s: bound %d: ncookies %ld\n",
51691ae08745Sheppo 				__func__, priv_addr->bound,
51701ae08745Sheppo 				priv_addr->ncookies);
51711ae08745Sheppo 			cmn_err(CE_CONT, "datalen %ld\n", priv_addr->datalen);
51721ae08745Sheppo 			return;
51731ae08745Sheppo 		} else {
51741ae08745Sheppo 			D2(vswp, "%s: (%lld) freeing descp at %lld", __func__,
51751ae08745Sheppo 				ldcp->ldc_id, idx);
51761ae08745Sheppo 
51771ae08745Sheppo 			/* release resources associated with sent msg */
51781ae08745Sheppo 			bzero(priv_addr->datap, priv_addr->datalen);
51791ae08745Sheppo 			priv_addr->datalen = 0;
51801ae08745Sheppo 			priv_addr->dstate = VIO_DESC_FREE;
5181d10e4ef2Snarayan 			mutex_exit(&priv_addr->dstate_lock);
51821ae08745Sheppo 		}
51831ae08745Sheppo 		break;
51841ae08745Sheppo 
51851ae08745Sheppo 	case VIO_SUBTYPE_NACK:
51861ae08745Sheppo 		DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__);
51871ae08745Sheppo 
51881ae08745Sheppo 		/*
51891ae08745Sheppo 		 * We should only get a NACK if our peer doesn't like
51901ae08745Sheppo 		 * something about a message we have sent it. If this
51911ae08745Sheppo 		 * happens we just release the resources associated with
51921ae08745Sheppo 		 * the message. (We are relying on higher layers to decide
51931ae08745Sheppo 		 * whether or not to resend.
51941ae08745Sheppo 		 */
51951ae08745Sheppo 
51961ae08745Sheppo 		/* limit check */
51971ae08745Sheppo 		idx = ibnd_desc->hdr.desc_handle;
51981ae08745Sheppo 
51991ae08745Sheppo 		if (idx >= VSW_RING_NUM_EL) {
52001ae08745Sheppo 			DERR(vswp, "%s: corrupted NACK received (idx %lld)",
52011ae08745Sheppo 				__func__, idx);
52021ae08745Sheppo 			return;
52031ae08745Sheppo 		}
52041ae08745Sheppo 
52051ae08745Sheppo 		if ((dp = ldcp->lane_out.dringp) == NULL) {
52061ae08745Sheppo 			DERR(vswp, "%s: no dring found", __func__);
52071ae08745Sheppo 			return;
52081ae08745Sheppo 		}
52091ae08745Sheppo 
52101ae08745Sheppo 		priv_addr = (vsw_private_desc_t *)dp->priv_addr;
52111ae08745Sheppo 
52121ae08745Sheppo 		/* move to correct location in ring */
52131ae08745Sheppo 		priv_addr += idx;
52141ae08745Sheppo 
52151ae08745Sheppo 		/* release resources associated with sent msg */
5216d10e4ef2Snarayan 		mutex_enter(&priv_addr->dstate_lock);
52171ae08745Sheppo 		bzero(priv_addr->datap, priv_addr->datalen);
52181ae08745Sheppo 		priv_addr->datalen = 0;
52191ae08745Sheppo 		priv_addr->dstate = VIO_DESC_FREE;
5220d10e4ef2Snarayan 		mutex_exit(&priv_addr->dstate_lock);
52211ae08745Sheppo 
52221ae08745Sheppo 		break;
52231ae08745Sheppo 
52241ae08745Sheppo 	default:
52251ae08745Sheppo 		DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__,
52261ae08745Sheppo 			ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype);
52271ae08745Sheppo 	}
52281ae08745Sheppo 
52291ae08745Sheppo 	D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id);
52301ae08745Sheppo }
52311ae08745Sheppo 
52321ae08745Sheppo static void
52331ae08745Sheppo vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag)
52341ae08745Sheppo {
52351ae08745Sheppo 	_NOTE(ARGUNUSED(epkt))
52361ae08745Sheppo 
52371ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
52381ae08745Sheppo 	uint16_t	env = tag.vio_subtype_env;
52391ae08745Sheppo 
52401ae08745Sheppo 	D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id);
52411ae08745Sheppo 
52421ae08745Sheppo 	/*
52431ae08745Sheppo 	 * Error vio_subtypes have yet to be defined. So for
52441ae08745Sheppo 	 * the moment we can't do anything.
52451ae08745Sheppo 	 */
52461ae08745Sheppo 	D2(vswp, "%s: (%x) vio_subtype env", __func__, env);
52471ae08745Sheppo 
52481ae08745Sheppo 	D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id);
52491ae08745Sheppo }
52501ae08745Sheppo 
52511ae08745Sheppo /*
52521ae08745Sheppo  * Switch the given ethernet frame when operating in layer 2 mode.
52531ae08745Sheppo  *
52541ae08745Sheppo  * vswp: pointer to the vsw instance
52551ae08745Sheppo  * mp: pointer to chain of ethernet frame(s) to be switched
52561ae08745Sheppo  * caller: identifies the source of this frame as:
52571ae08745Sheppo  * 		1. VSW_VNETPORT - a vsw port (connected to a vnet).
52581ae08745Sheppo  *		2. VSW_PHYSDEV - the physical ethernet device
52591ae08745Sheppo  *		3. VSW_LOCALDEV - vsw configured as a virtual interface
52601ae08745Sheppo  * arg: argument provided by the caller.
52611ae08745Sheppo  *		1. for VNETPORT - pointer to the corresponding vsw_port_t.
52621ae08745Sheppo  *		2. for PHYSDEV - NULL
52631ae08745Sheppo  *		3. for LOCALDEV - pointer to to this vsw_t(self)
52641ae08745Sheppo  */
52651ae08745Sheppo void
52661ae08745Sheppo vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
52671ae08745Sheppo 			vsw_port_t *arg, mac_resource_handle_t mrh)
52681ae08745Sheppo {
52691ae08745Sheppo 	struct ether_header	*ehp;
52701ae08745Sheppo 	vsw_port_t		*port = NULL;
52711ae08745Sheppo 	mblk_t			*bp, *ret_m;
52721ae08745Sheppo 	mblk_t			*nmp = NULL;
52731ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
52741ae08745Sheppo 
52751ae08745Sheppo 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
52761ae08745Sheppo 
52771ae08745Sheppo 	/*
52781ae08745Sheppo 	 * PERF: rather than breaking up the chain here, scan it
52791ae08745Sheppo 	 * to find all mblks heading to same destination and then
52801ae08745Sheppo 	 * pass that sub-chain to the lower transmit functions.
52811ae08745Sheppo 	 */
52821ae08745Sheppo 
52831ae08745Sheppo 	/* process the chain of packets */
52841ae08745Sheppo 	bp = mp;
52851ae08745Sheppo 	while (bp) {
52861ae08745Sheppo 		mp = bp;
52871ae08745Sheppo 		bp = bp->b_next;
52881ae08745Sheppo 		mp->b_next = mp->b_prev = NULL;
52891ae08745Sheppo 		ehp = (struct ether_header *)mp->b_rptr;
52901ae08745Sheppo 
52911ae08745Sheppo 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
52921ae08745Sheppo 			__func__, MBLKSIZE(mp), MBLKL(mp));
52931ae08745Sheppo 
52941ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
52951ae08745Sheppo 		if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
52961ae08745Sheppo 			/*
52971ae08745Sheppo 			 * If destination is VSW_LOCALDEV (vsw as an eth
52981ae08745Sheppo 			 * interface) and if the device is up & running,
52991ae08745Sheppo 			 * send the packet up the stack on this host.
53001ae08745Sheppo 			 * If the virtual interface is down, drop the packet.
53011ae08745Sheppo 			 */
53021ae08745Sheppo 			if (caller != VSW_LOCALDEV) {
53031ae08745Sheppo 				if (vswp->if_state & VSW_IF_UP) {
53041ae08745Sheppo 					RW_EXIT(&vswp->if_lockrw);
5305ba2e4443Sseb 					mac_rx(vswp->if_mh, mrh, mp);
53061ae08745Sheppo 				} else {
53071ae08745Sheppo 					RW_EXIT(&vswp->if_lockrw);
53081ae08745Sheppo 					/* Interface down, drop pkt */
53091ae08745Sheppo 					freemsg(mp);
53101ae08745Sheppo 				}
53111ae08745Sheppo 			} else {
53121ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
53131ae08745Sheppo 				freemsg(mp);
53141ae08745Sheppo 			}
53151ae08745Sheppo 			continue;
53161ae08745Sheppo 		}
53171ae08745Sheppo 		RW_EXIT(&vswp->if_lockrw);
53181ae08745Sheppo 
53191ae08745Sheppo 		READ_ENTER(&plist->lockrw);
53201ae08745Sheppo 		port = vsw_lookup_fdb(vswp, ehp);
53211ae08745Sheppo 		if (port) {
53221ae08745Sheppo 			/*
53231ae08745Sheppo 			 * Mark the port as in-use.
53241ae08745Sheppo 			 */
53251ae08745Sheppo 			mutex_enter(&port->ref_lock);
53261ae08745Sheppo 			port->ref_cnt++;
53271ae08745Sheppo 			mutex_exit(&port->ref_lock);
53281ae08745Sheppo 			RW_EXIT(&plist->lockrw);
53291ae08745Sheppo 
53301ae08745Sheppo 			/*
53311ae08745Sheppo 			 * If plumbed and in promisc mode then copy msg
53321ae08745Sheppo 			 * and send up the stack.
53331ae08745Sheppo 			 */
53341ae08745Sheppo 			READ_ENTER(&vswp->if_lockrw);
53351ae08745Sheppo 			if (VSW_U_P(vswp->if_state)) {
53361ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
53371ae08745Sheppo 				nmp = copymsg(mp);
53381ae08745Sheppo 				if (nmp)
5339ba2e4443Sseb 					mac_rx(vswp->if_mh, mrh, nmp);
53401ae08745Sheppo 			} else {
53411ae08745Sheppo 				RW_EXIT(&vswp->if_lockrw);
53421ae08745Sheppo 			}
53431ae08745Sheppo 
53441ae08745Sheppo 			/*
53451ae08745Sheppo 			 * If the destination is in FDB, the packet
53461ae08745Sheppo 			 * should be forwarded to the correponding
53471ae08745Sheppo 			 * vsw_port (connected to a vnet device -
53481ae08745Sheppo 			 * VSW_VNETPORT)
53491ae08745Sheppo 			 */
53501ae08745Sheppo 			(void) vsw_portsend(port, mp);
53511ae08745Sheppo 
53521ae08745Sheppo 			/*
53531ae08745Sheppo 			 * Decrement use count in port and check if
53541ae08745Sheppo 			 * should wake delete thread.
53551ae08745Sheppo 			 */
53561ae08745Sheppo 			mutex_enter(&port->ref_lock);
53571ae08745Sheppo 			port->ref_cnt--;
53581ae08745Sheppo 			if (port->ref_cnt == 0)
53591ae08745Sheppo 				cv_signal(&port->ref_cv);
53601ae08745Sheppo 			mutex_exit(&port->ref_lock);
53611ae08745Sheppo 		} else {
53621ae08745Sheppo 			RW_EXIT(&plist->lockrw);
53631ae08745Sheppo 			/*
53641ae08745Sheppo 			 * Destination not in FDB.
53651ae08745Sheppo 			 *
53661ae08745Sheppo 			 * If the destination is broadcast or
53671ae08745Sheppo 			 * multicast forward the packet to all
53681ae08745Sheppo 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
53691ae08745Sheppo 			 * except the caller.
53701ae08745Sheppo 			 */
53711ae08745Sheppo 			if (IS_BROADCAST(ehp)) {
53721ae08745Sheppo 				D3(vswp, "%s: BROADCAST pkt", __func__);
53731ae08745Sheppo 				(void) vsw_forward_all(vswp, mp,
53741ae08745Sheppo 								caller, arg);
53751ae08745Sheppo 			} else if (IS_MULTICAST(ehp)) {
53761ae08745Sheppo 				D3(vswp, "%s: MULTICAST pkt", __func__);
53771ae08745Sheppo 				(void) vsw_forward_grp(vswp, mp,
53781ae08745Sheppo 							caller, arg);
53791ae08745Sheppo 			} else {
53801ae08745Sheppo 				/*
53811ae08745Sheppo 				 * If the destination is unicast, and came
53821ae08745Sheppo 				 * from either a logical network device or
53831ae08745Sheppo 				 * the switch itself when it is plumbed, then
53841ae08745Sheppo 				 * send it out on the physical device and also
53851ae08745Sheppo 				 * up the stack if the logical interface is
53861ae08745Sheppo 				 * in promiscious mode.
53871ae08745Sheppo 				 *
53881ae08745Sheppo 				 * NOTE:  The assumption here is that if we
53891ae08745Sheppo 				 * cannot find the destination in our fdb, its
53901ae08745Sheppo 				 * a unicast address, and came from either a
53911ae08745Sheppo 				 * vnet or down the stack (when plumbed) it
53921ae08745Sheppo 				 * must be destinded for an ethernet device
53931ae08745Sheppo 				 * outside our ldoms.
53941ae08745Sheppo 				 */
53951ae08745Sheppo 				if (caller == VSW_VNETPORT) {
53961ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
53971ae08745Sheppo 					if (VSW_U_P(vswp->if_state)) {
53981ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
53991ae08745Sheppo 						nmp = copymsg(mp);
54001ae08745Sheppo 						if (nmp)
5401ba2e4443Sseb 							mac_rx(vswp->if_mh,
54021ae08745Sheppo 								mrh, nmp);
54031ae08745Sheppo 					} else {
54041ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
54051ae08745Sheppo 					}
54061ae08745Sheppo 					if ((ret_m = vsw_tx_msg(vswp, mp))
54071ae08745Sheppo 								!= NULL) {
54081ae08745Sheppo 						DERR(vswp, "%s: drop mblks to "
54091ae08745Sheppo 							"phys dev", __func__);
54101ae08745Sheppo 						freemsg(ret_m);
54111ae08745Sheppo 					}
54121ae08745Sheppo 
54131ae08745Sheppo 				} else if (caller == VSW_PHYSDEV) {
54141ae08745Sheppo 					/*
54151ae08745Sheppo 					 * Pkt seen because card in promisc
54161ae08745Sheppo 					 * mode. Send up stack if plumbed in
54171ae08745Sheppo 					 * promisc mode, else drop it.
54181ae08745Sheppo 					 */
54191ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
54201ae08745Sheppo 					if (VSW_U_P(vswp->if_state)) {
54211ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
5422ba2e4443Sseb 						mac_rx(vswp->if_mh, mrh, mp);
54231ae08745Sheppo 					} else {
54241ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
54251ae08745Sheppo 						freemsg(mp);
54261ae08745Sheppo 					}
54271ae08745Sheppo 
54281ae08745Sheppo 				} else if (caller == VSW_LOCALDEV) {
54291ae08745Sheppo 					/*
54301ae08745Sheppo 					 * Pkt came down the stack, send out
54311ae08745Sheppo 					 * over physical device.
54321ae08745Sheppo 					 */
54331ae08745Sheppo 					if ((ret_m = vsw_tx_msg(vswp, mp))
54341ae08745Sheppo 								!= NULL) {
54351ae08745Sheppo 						DERR(vswp, "%s: drop mblks to "
54361ae08745Sheppo 							"phys dev", __func__);
54371ae08745Sheppo 						freemsg(ret_m);
54381ae08745Sheppo 					}
54391ae08745Sheppo 				}
54401ae08745Sheppo 			}
54411ae08745Sheppo 		}
54421ae08745Sheppo 	}
54431ae08745Sheppo 	D1(vswp, "%s: exit\n", __func__);
54441ae08745Sheppo }
54451ae08745Sheppo 
54461ae08745Sheppo /*
54471ae08745Sheppo  * Switch ethernet frame when in layer 3 mode (i.e. using IP
54481ae08745Sheppo  * layer to do the routing).
54491ae08745Sheppo  *
54501ae08745Sheppo  * There is a large amount of overlap between this function and
54511ae08745Sheppo  * vsw_switch_l2_frame. At some stage we need to revisit and refactor
54521ae08745Sheppo  * both these functions.
54531ae08745Sheppo  */
54541ae08745Sheppo void
54551ae08745Sheppo vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
54561ae08745Sheppo 			vsw_port_t *arg, mac_resource_handle_t mrh)
54571ae08745Sheppo {
54581ae08745Sheppo 	struct ether_header	*ehp;
54591ae08745Sheppo 	vsw_port_t		*port = NULL;
54601ae08745Sheppo 	mblk_t			*bp = NULL;
54611ae08745Sheppo 	vsw_port_list_t		*plist = &vswp->plist;
54621ae08745Sheppo 
54631ae08745Sheppo 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
54641ae08745Sheppo 
54651ae08745Sheppo 	/*
54661ae08745Sheppo 	 * In layer 3 mode should only ever be switching packets
54671ae08745Sheppo 	 * between IP layer and vnet devices. So make sure thats
54681ae08745Sheppo 	 * who is invoking us.
54691ae08745Sheppo 	 */
54701ae08745Sheppo 	if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
54711ae08745Sheppo 		DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
54721ae08745Sheppo 		freemsgchain(mp);
54731ae08745Sheppo 		return;
54741ae08745Sheppo 	}
54751ae08745Sheppo 
54761ae08745Sheppo 	/* process the chain of packets */
54771ae08745Sheppo 	bp = mp;
54781ae08745Sheppo 	while (bp) {
54791ae08745Sheppo 		mp = bp;
54801ae08745Sheppo 		bp = bp->b_next;
54811ae08745Sheppo 		mp->b_next = mp->b_prev = NULL;
54821ae08745Sheppo 		ehp = (struct ether_header *)mp->b_rptr;
54831ae08745Sheppo 
54841ae08745Sheppo 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
54851ae08745Sheppo 			__func__, MBLKSIZE(mp), MBLKL(mp));
54861ae08745Sheppo 
54871ae08745Sheppo 		READ_ENTER(&plist->lockrw);
54881ae08745Sheppo 		port = vsw_lookup_fdb(vswp, ehp);
54891ae08745Sheppo 		if (port) {
54901ae08745Sheppo 			/*
54911ae08745Sheppo 			 * Mark port as in-use.
54921ae08745Sheppo 			 */
54931ae08745Sheppo 			mutex_enter(&port->ref_lock);
54941ae08745Sheppo 			port->ref_cnt++;
54951ae08745Sheppo 			mutex_exit(&port->ref_lock);
54961ae08745Sheppo 			RW_EXIT(&plist->lockrw);
54971ae08745Sheppo 
54981ae08745Sheppo 			D2(vswp, "%s: sending to target port", __func__);
54991ae08745Sheppo 			(void) vsw_portsend(port, mp);
55001ae08745Sheppo 
55011ae08745Sheppo 			/*
55021ae08745Sheppo 			 * Finished with port so decrement ref count and
55031ae08745Sheppo 			 * check if should wake delete thread.
55041ae08745Sheppo 			 */
55051ae08745Sheppo 			mutex_enter(&port->ref_lock);
55061ae08745Sheppo 			port->ref_cnt--;
55071ae08745Sheppo 			if (port->ref_cnt == 0)
55081ae08745Sheppo 				cv_signal(&port->ref_cv);
55091ae08745Sheppo 			mutex_exit(&port->ref_lock);
55101ae08745Sheppo 		} else {
55111ae08745Sheppo 			RW_EXIT(&plist->lockrw);
55121ae08745Sheppo 			/*
55131ae08745Sheppo 			 * Destination not in FDB
55141ae08745Sheppo 			 *
55151ae08745Sheppo 			 * If the destination is broadcast or
55161ae08745Sheppo 			 * multicast forward the packet to all
55171ae08745Sheppo 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
55181ae08745Sheppo 			 * except the caller.
55191ae08745Sheppo 			 */
55201ae08745Sheppo 			if (IS_BROADCAST(ehp)) {
55211ae08745Sheppo 				D2(vswp, "%s: BROADCAST pkt", __func__);
55221ae08745Sheppo 				(void) vsw_forward_all(vswp, mp,
55231ae08745Sheppo 								caller, arg);
55241ae08745Sheppo 			} else if (IS_MULTICAST(ehp)) {
55251ae08745Sheppo 				D2(vswp, "%s: MULTICAST pkt", __func__);
55261ae08745Sheppo 				(void) vsw_forward_grp(vswp, mp,
55271ae08745Sheppo 							caller, arg);
55281ae08745Sheppo 			} else {
55291ae08745Sheppo 				/*
55301ae08745Sheppo 				 * Unicast pkt from vnet that we don't have
55311ae08745Sheppo 				 * an FDB entry for, so must be destinded for
55321ae08745Sheppo 				 * the outside world. Attempt to send up to the
55331ae08745Sheppo 				 * IP layer to allow it to deal with it.
55341ae08745Sheppo 				 */
55351ae08745Sheppo 				if (caller == VSW_VNETPORT) {
55361ae08745Sheppo 					READ_ENTER(&vswp->if_lockrw);
55371ae08745Sheppo 					if (vswp->if_state & VSW_IF_UP) {
55381ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
55391ae08745Sheppo 						D2(vswp, "%s: sending up",
55401ae08745Sheppo 							__func__);
5541ba2e4443Sseb 						mac_rx(vswp->if_mh, mrh, mp);
55421ae08745Sheppo 					} else {
55431ae08745Sheppo 						RW_EXIT(&vswp->if_lockrw);
55441ae08745Sheppo 						/* Interface down, drop pkt */
55451ae08745Sheppo 						D2(vswp, "%s I/F down",
55461ae08745Sheppo 								__func__);
55471ae08745Sheppo 						freemsg(mp);
55481ae08745Sheppo 					}
55491ae08745Sheppo 				}
55501ae08745Sheppo 			}
55511ae08745Sheppo 		}
55521ae08745Sheppo 	}
55531ae08745Sheppo 
55541ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
55551ae08745Sheppo }
55561ae08745Sheppo 
55571ae08745Sheppo /*
55581ae08745Sheppo  * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
55591ae08745Sheppo  * except the caller (port on which frame arrived).
55601ae08745Sheppo  */
55611ae08745Sheppo static int
55621ae08745Sheppo vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
55631ae08745Sheppo {
55641ae08745Sheppo 	vsw_port_list_t	*plist = &vswp->plist;
55651ae08745Sheppo 	vsw_port_t	*portp;
55661ae08745Sheppo 	mblk_t		*nmp = NULL;
55671ae08745Sheppo 	mblk_t		*ret_m = NULL;
55681ae08745Sheppo 	int		skip_port = 0;
55691ae08745Sheppo 
55701ae08745Sheppo 	D1(vswp, "vsw_forward_all: enter\n");
55711ae08745Sheppo 
55721ae08745Sheppo 	/*
55731ae08745Sheppo 	 * Broadcast message from inside ldoms so send to outside
55741ae08745Sheppo 	 * world if in either of layer 2 modes.
55751ae08745Sheppo 	 */
55761ae08745Sheppo 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
55771ae08745Sheppo 		(vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
55781ae08745Sheppo 		((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
55791ae08745Sheppo 
55801ae08745Sheppo 		nmp = dupmsg(mp);
55811ae08745Sheppo 		if (nmp) {
55821ae08745Sheppo 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
55831ae08745Sheppo 				DERR(vswp, "%s: dropping pkt(s) "
55841ae08745Sheppo 				"consisting of %ld bytes of data for"
55851ae08745Sheppo 				" physical device", __func__, MBLKL(ret_m));
55861ae08745Sheppo 			freemsg(ret_m);
55871ae08745Sheppo 			}
55881ae08745Sheppo 		}
55891ae08745Sheppo 	}
55901ae08745Sheppo 
55911ae08745Sheppo 	if (caller == VSW_VNETPORT)
55921ae08745Sheppo 		skip_port = 1;
55931ae08745Sheppo 
55941ae08745Sheppo 	/*
55951ae08745Sheppo 	 * Broadcast message from other vnet (layer 2 or 3) or outside
55961ae08745Sheppo 	 * world (layer 2 only), send up stack if plumbed.
55971ae08745Sheppo 	 */
55981ae08745Sheppo 	if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
55991ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
56001ae08745Sheppo 		if (vswp->if_state & VSW_IF_UP) {
56011ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
56021ae08745Sheppo 			nmp = copymsg(mp);
56031ae08745Sheppo 			if (nmp)
5604ba2e4443Sseb 				mac_rx(vswp->if_mh, NULL, nmp);
56051ae08745Sheppo 		} else {
56061ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
56071ae08745Sheppo 		}
56081ae08745Sheppo 	}
56091ae08745Sheppo 
56101ae08745Sheppo 	/* send it to all VNETPORTs */
56111ae08745Sheppo 	READ_ENTER(&plist->lockrw);
56121ae08745Sheppo 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
56131ae08745Sheppo 		D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
56141ae08745Sheppo 		/*
56151ae08745Sheppo 		 * Caution ! - don't reorder these two checks as arg
56161ae08745Sheppo 		 * will be NULL if the caller is PHYSDEV. skip_port is
56171ae08745Sheppo 		 * only set if caller is VNETPORT.
56181ae08745Sheppo 		 */
56191ae08745Sheppo 		if ((skip_port) && (portp == arg))
56201ae08745Sheppo 			continue;
56211ae08745Sheppo 		else {
56221ae08745Sheppo 			nmp = dupmsg(mp);
56231ae08745Sheppo 			if (nmp) {
56241ae08745Sheppo 				(void) vsw_portsend(portp, nmp);
56251ae08745Sheppo 			} else {
56261ae08745Sheppo 				DERR(vswp, "vsw_forward_all: nmp NULL");
56271ae08745Sheppo 			}
56281ae08745Sheppo 		}
56291ae08745Sheppo 	}
56301ae08745Sheppo 	RW_EXIT(&plist->lockrw);
56311ae08745Sheppo 
56321ae08745Sheppo 	freemsg(mp);
56331ae08745Sheppo 
56341ae08745Sheppo 	D1(vswp, "vsw_forward_all: exit\n");
56351ae08745Sheppo 	return (0);
56361ae08745Sheppo }
56371ae08745Sheppo 
56381ae08745Sheppo /*
56391ae08745Sheppo  * Forward pkts to any devices or interfaces which have registered
56401ae08745Sheppo  * an interest in them (i.e. multicast groups).
56411ae08745Sheppo  */
56421ae08745Sheppo static int
56431ae08745Sheppo vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
56441ae08745Sheppo {
56451ae08745Sheppo 	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
56461ae08745Sheppo 	mfdb_ent_t		*entp = NULL;
56471ae08745Sheppo 	mfdb_ent_t		*tpp = NULL;
56481ae08745Sheppo 	vsw_port_t 		*port;
56491ae08745Sheppo 	uint64_t		key = 0;
56501ae08745Sheppo 	mblk_t			*nmp = NULL;
56511ae08745Sheppo 	mblk_t			*ret_m = NULL;
56521ae08745Sheppo 	boolean_t		check_if = B_TRUE;
56531ae08745Sheppo 
56541ae08745Sheppo 	/*
56551ae08745Sheppo 	 * Convert address to hash table key
56561ae08745Sheppo 	 */
56571ae08745Sheppo 	KEY_HASH(key, ehp->ether_dhost);
56581ae08745Sheppo 
56591ae08745Sheppo 	D1(vswp, "%s: key 0x%llx", __func__, key);
56601ae08745Sheppo 
56611ae08745Sheppo 	/*
56621ae08745Sheppo 	 * If pkt came from either a vnet or down the stack (if we are
56631ae08745Sheppo 	 * plumbed) and we are in layer 2 mode, then we send the pkt out
56641ae08745Sheppo 	 * over the physical adapter, and then check to see if any other
56651ae08745Sheppo 	 * vnets are interested in it.
56661ae08745Sheppo 	 */
56671ae08745Sheppo 	if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) ||
56681ae08745Sheppo 		(vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) &&
56691ae08745Sheppo 		((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
56701ae08745Sheppo 		nmp = dupmsg(mp);
56711ae08745Sheppo 		if (nmp) {
56721ae08745Sheppo 			if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) {
56731ae08745Sheppo 				DERR(vswp, "%s: dropping pkt(s) "
56741ae08745Sheppo 					"consisting of %ld bytes of "
56751ae08745Sheppo 					"data for physical device",
56761ae08745Sheppo 					__func__, MBLKL(ret_m));
56771ae08745Sheppo 				freemsg(ret_m);
56781ae08745Sheppo 			}
56791ae08745Sheppo 		}
56801ae08745Sheppo 	}
56811ae08745Sheppo 
56821ae08745Sheppo 	READ_ENTER(&vswp->mfdbrw);
56831ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
56841ae08745Sheppo 				(mod_hash_val_t *)&entp) != 0) {
56851ae08745Sheppo 		D3(vswp, "%s: no table entry found for addr 0x%llx",
56861ae08745Sheppo 								__func__, key);
56871ae08745Sheppo 	} else {
56881ae08745Sheppo 		/*
56891ae08745Sheppo 		 * Send to list of devices associated with this address...
56901ae08745Sheppo 		 */
56911ae08745Sheppo 		for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
56921ae08745Sheppo 
56931ae08745Sheppo 			/* dont send to ourselves */
56941ae08745Sheppo 			if ((caller == VSW_VNETPORT) &&
56951ae08745Sheppo 				(tpp->d_addr == (void *)arg)) {
56961ae08745Sheppo 				port = (vsw_port_t *)tpp->d_addr;
56971ae08745Sheppo 				D3(vswp, "%s: not sending to ourselves"
56981ae08745Sheppo 					" : port %d", __func__,
56991ae08745Sheppo 					port->p_instance);
57001ae08745Sheppo 				continue;
57011ae08745Sheppo 
57021ae08745Sheppo 			} else if ((caller == VSW_LOCALDEV) &&
57031ae08745Sheppo 				(tpp->d_type == VSW_LOCALDEV)) {
57041ae08745Sheppo 				D3(vswp, "%s: not sending back up stack",
57051ae08745Sheppo 					__func__);
57061ae08745Sheppo 				continue;
57071ae08745Sheppo 			}
57081ae08745Sheppo 
57091ae08745Sheppo 			if (tpp->d_type == VSW_VNETPORT) {
57101ae08745Sheppo 				port = (vsw_port_t *)tpp->d_addr;
57111ae08745Sheppo 				D3(vswp, "%s: sending to port %ld for "
57121ae08745Sheppo 					" addr 0x%llx", __func__,
57131ae08745Sheppo 					port->p_instance, key);
57141ae08745Sheppo 
57151ae08745Sheppo 				nmp = dupmsg(mp);
57161ae08745Sheppo 				if (nmp)
57171ae08745Sheppo 					(void) vsw_portsend(port, nmp);
57181ae08745Sheppo 			} else {
57191ae08745Sheppo 				if (vswp->if_state & VSW_IF_UP) {
57201ae08745Sheppo 					nmp = copymsg(mp);
57211ae08745Sheppo 					if (nmp)
5722ba2e4443Sseb 						mac_rx(vswp->if_mh, NULL, nmp);
57231ae08745Sheppo 					check_if = B_FALSE;
57241ae08745Sheppo 					D3(vswp, "%s: sending up stack"
57251ae08745Sheppo 						" for addr 0x%llx", __func__,
57261ae08745Sheppo 						key);
57271ae08745Sheppo 				}
57281ae08745Sheppo 			}
57291ae08745Sheppo 		}
57301ae08745Sheppo 	}
57311ae08745Sheppo 
57321ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
57331ae08745Sheppo 
57341ae08745Sheppo 	/*
57351ae08745Sheppo 	 * If the pkt came from either a vnet or from physical device,
57361ae08745Sheppo 	 * and if we havent already sent the pkt up the stack then we
57371ae08745Sheppo 	 * check now if we can/should (i.e. the interface is plumbed
57381ae08745Sheppo 	 * and in promisc mode).
57391ae08745Sheppo 	 */
57401ae08745Sheppo 	if ((check_if) &&
57411ae08745Sheppo 		((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
57421ae08745Sheppo 		READ_ENTER(&vswp->if_lockrw);
57431ae08745Sheppo 		if (VSW_U_P(vswp->if_state)) {
57441ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
57451ae08745Sheppo 			D3(vswp, "%s: (caller %d) finally sending up stack"
57461ae08745Sheppo 				" for addr 0x%llx", __func__, caller, key);
57471ae08745Sheppo 			nmp = copymsg(mp);
57481ae08745Sheppo 			if (nmp)
5749ba2e4443Sseb 				mac_rx(vswp->if_mh, NULL, nmp);
57501ae08745Sheppo 		} else {
57511ae08745Sheppo 			RW_EXIT(&vswp->if_lockrw);
57521ae08745Sheppo 		}
57531ae08745Sheppo 	}
57541ae08745Sheppo 
57551ae08745Sheppo 	freemsg(mp);
57561ae08745Sheppo 
57571ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
57581ae08745Sheppo 
57591ae08745Sheppo 	return (0);
57601ae08745Sheppo }
57611ae08745Sheppo 
57621ae08745Sheppo /* transmit the packet over the given port */
57631ae08745Sheppo static int
57641ae08745Sheppo vsw_portsend(vsw_port_t *port, mblk_t *mp)
57651ae08745Sheppo {
57661ae08745Sheppo 	vsw_ldc_list_t 	*ldcl = &port->p_ldclist;
57671ae08745Sheppo 	vsw_ldc_t 	*ldcp;
57681ae08745Sheppo 	int		status = 0;
57691ae08745Sheppo 
57701ae08745Sheppo 
57711ae08745Sheppo 	READ_ENTER(&ldcl->lockrw);
57721ae08745Sheppo 	/*
57731ae08745Sheppo 	 * Note for now, we have a single channel.
57741ae08745Sheppo 	 */
57751ae08745Sheppo 	ldcp = ldcl->head;
57761ae08745Sheppo 	if (ldcp == NULL) {
57771ae08745Sheppo 		DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n");
57781ae08745Sheppo 		freemsg(mp);
57791ae08745Sheppo 		RW_EXIT(&ldcl->lockrw);
57801ae08745Sheppo 		return (1);
57811ae08745Sheppo 	}
57821ae08745Sheppo 
57831ae08745Sheppo 	/*
57841ae08745Sheppo 	 * Send the message out using the appropriate
57851ae08745Sheppo 	 * transmit function which will free mblock when it
57861ae08745Sheppo 	 * is finished with it.
57871ae08745Sheppo 	 */
57881ae08745Sheppo 	mutex_enter(&port->tx_lock);
57891ae08745Sheppo 	if (port->transmit != NULL)
57901ae08745Sheppo 		status = (*port->transmit)(ldcp, mp);
57911ae08745Sheppo 	else {
57921ae08745Sheppo 		freemsg(mp);
57931ae08745Sheppo 	}
57941ae08745Sheppo 	mutex_exit(&port->tx_lock);
57951ae08745Sheppo 
57961ae08745Sheppo 	RW_EXIT(&ldcl->lockrw);
57971ae08745Sheppo 
57981ae08745Sheppo 	return (status);
57991ae08745Sheppo }
58001ae08745Sheppo 
58011ae08745Sheppo /*
58021ae08745Sheppo  * Send packet out via descriptor ring to a logical device.
58031ae08745Sheppo  */
58041ae08745Sheppo static int
58051ae08745Sheppo vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp)
58061ae08745Sheppo {
58071ae08745Sheppo 	vio_dring_msg_t		dring_pkt;
58081ae08745Sheppo 	dring_info_t		*dp = NULL;
58091ae08745Sheppo 	vsw_private_desc_t	*priv_desc = NULL;
5810d10e4ef2Snarayan 	vnet_public_desc_t	*pub = NULL;
58111ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
58121ae08745Sheppo 	mblk_t			*bp;
58131ae08745Sheppo 	size_t			n, size;
58141ae08745Sheppo 	caddr_t			bufp;
58151ae08745Sheppo 	int			idx;
58161ae08745Sheppo 	int			status = LDC_TX_SUCCESS;
58171ae08745Sheppo 
58181ae08745Sheppo 	D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id);
58191ae08745Sheppo 
58201ae08745Sheppo 	/* TODO: make test a macro */
58211ae08745Sheppo 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
58221ae08745Sheppo 		(ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
58231ae08745Sheppo 		DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping "
58241ae08745Sheppo 			"packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status,
58251ae08745Sheppo 			ldcp->lane_out.lstate);
58261ae08745Sheppo 		freemsg(mp);
58271ae08745Sheppo 		return (LDC_TX_FAILURE);
58281ae08745Sheppo 	}
58291ae08745Sheppo 
58301ae08745Sheppo 	/*
58311ae08745Sheppo 	 * Note - using first ring only, this may change
58321ae08745Sheppo 	 * in the future.
58331ae08745Sheppo 	 */
58341ae08745Sheppo 	if ((dp = ldcp->lane_out.dringp) == NULL) {
58351ae08745Sheppo 		DERR(vswp, "%s(%lld): no dring for outbound lane on"
58361ae08745Sheppo 			" channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id);
58371ae08745Sheppo 		freemsg(mp);
58381ae08745Sheppo 		return (LDC_TX_FAILURE);
58391ae08745Sheppo 	}
58401ae08745Sheppo 
58411ae08745Sheppo 	size = msgsize(mp);
58421ae08745Sheppo 	if (size > (size_t)ETHERMAX) {
58431ae08745Sheppo 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
58441ae08745Sheppo 		    ldcp->ldc_id, size);
5845d10e4ef2Snarayan 		freemsg(mp);
5846d10e4ef2Snarayan 		return (LDC_TX_FAILURE);
58471ae08745Sheppo 	}
58481ae08745Sheppo 
58491ae08745Sheppo 	/*
58501ae08745Sheppo 	 * Find a free descriptor
58511ae08745Sheppo 	 *
58521ae08745Sheppo 	 * Note: for the moment we are assuming that we will only
58531ae08745Sheppo 	 * have one dring going from the switch to each of its
58541ae08745Sheppo 	 * peers. This may change in the future.
58551ae08745Sheppo 	 */
58561ae08745Sheppo 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
5857d10e4ef2Snarayan 		D2(vswp, "%s(%lld): no descriptor available for ring "
58581ae08745Sheppo 			"at 0x%llx", __func__, ldcp->ldc_id, dp);
58591ae08745Sheppo 
58601ae08745Sheppo 		/* nothing more we can do */
58611ae08745Sheppo 		status = LDC_TX_NORESOURCES;
58621ae08745Sheppo 		goto vsw_dringsend_free_exit;
58631ae08745Sheppo 	} else {
58641ae08745Sheppo 		D2(vswp, "%s(%lld): free private descriptor found at pos "
58651ae08745Sheppo 			"%ld addr 0x%llx\n", __func__, ldcp->ldc_id, idx,
58661ae08745Sheppo 			priv_desc);
58671ae08745Sheppo 	}
58681ae08745Sheppo 
58691ae08745Sheppo 	/* copy data into the descriptor */
58701ae08745Sheppo 	bufp = priv_desc->datap;
5871d10e4ef2Snarayan 	bufp += VNET_IPALIGN;
58721ae08745Sheppo 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
58731ae08745Sheppo 		n = MBLKL(bp);
58741ae08745Sheppo 		bcopy(bp->b_rptr, bufp, n);
58751ae08745Sheppo 		bufp += n;
58761ae08745Sheppo 	}
58771ae08745Sheppo 
58781ae08745Sheppo 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
5879d10e4ef2Snarayan 
5880d10e4ef2Snarayan 	pub = priv_desc->descp;
5881d10e4ef2Snarayan 	pub->nbytes = priv_desc->datalen;
5882d10e4ef2Snarayan 
5883d10e4ef2Snarayan 	mutex_enter(&priv_desc->dstate_lock);
5884d10e4ef2Snarayan 	pub->hdr.dstate = VIO_DESC_READY;
5885d10e4ef2Snarayan 	mutex_exit(&priv_desc->dstate_lock);
58861ae08745Sheppo 
58871ae08745Sheppo 	/*
5888d10e4ef2Snarayan 	 * Determine whether or not we need to send a message to our
5889d10e4ef2Snarayan 	 * peer prompting them to read our newly updated descriptor(s).
58901ae08745Sheppo 	 */
5891d10e4ef2Snarayan 	mutex_enter(&dp->restart_lock);
5892d10e4ef2Snarayan 	if (dp->restart_reqd) {
5893d10e4ef2Snarayan 		dp->restart_reqd = B_FALSE;
5894d10e4ef2Snarayan 		mutex_exit(&dp->restart_lock);
58951ae08745Sheppo 
58961ae08745Sheppo 		/*
58971ae08745Sheppo 		 * Send a vio_dring_msg to peer to prompt them to read
58981ae08745Sheppo 		 * the updated descriptor ring.
58991ae08745Sheppo 		 */
59001ae08745Sheppo 		dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA;
59011ae08745Sheppo 		dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO;
59021ae08745Sheppo 		dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA;
59031ae08745Sheppo 		dring_pkt.tag.vio_sid = ldcp->local_session;
59041ae08745Sheppo 
59051ae08745Sheppo 		/* Note - for now using first ring */
59061ae08745Sheppo 		dring_pkt.dring_ident = dp->ident;
59071ae08745Sheppo 
5908d10e4ef2Snarayan 		mutex_enter(&ldcp->lane_out.seq_lock);
59091ae08745Sheppo 		dring_pkt.seq_num = ldcp->lane_out.seq_num++;
5910d10e4ef2Snarayan 		mutex_exit(&ldcp->lane_out.seq_lock);
59111ae08745Sheppo 
5912d10e4ef2Snarayan 		/*
5913d10e4ef2Snarayan 		 * If last_ack_recv is -1 then we know we've not
5914d10e4ef2Snarayan 		 * received any ack's yet, so this must be the first
5915d10e4ef2Snarayan 		 * msg sent, so set the start to the begining of the ring.
5916d10e4ef2Snarayan 		 */
5917d10e4ef2Snarayan 		mutex_enter(&dp->dlock);
5918d10e4ef2Snarayan 		if (dp->last_ack_recv == -1) {
5919d10e4ef2Snarayan 			dring_pkt.start_idx = 0;
5920d10e4ef2Snarayan 		} else {
5921d10e4ef2Snarayan 			dring_pkt.start_idx = (dp->last_ack_recv + 1) %
5922d10e4ef2Snarayan 						dp->num_descriptors;
5923d10e4ef2Snarayan 		}
5924d10e4ef2Snarayan 		dring_pkt.end_idx = -1;
5925d10e4ef2Snarayan 		mutex_exit(&dp->dlock);
59261ae08745Sheppo 
59271ae08745Sheppo 		D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__,
59281ae08745Sheppo 			ldcp->ldc_id, dp, dring_pkt.dring_ident);
5929d10e4ef2Snarayan 		D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n",
5930d10e4ef2Snarayan 			__func__, ldcp->ldc_id, dring_pkt.start_idx,
5931d10e4ef2Snarayan 			dring_pkt.end_idx, dring_pkt.seq_num);
59321ae08745Sheppo 
5933d10e4ef2Snarayan 		vsw_send_msg(ldcp, (void *)&dring_pkt,
5934d10e4ef2Snarayan 						sizeof (vio_dring_msg_t));
5935d10e4ef2Snarayan 	} else {
5936d10e4ef2Snarayan 		mutex_exit(&dp->restart_lock);
5937d10e4ef2Snarayan 		D2(vswp, "%s(%lld): updating descp %d", __func__,
5938d10e4ef2Snarayan 			ldcp->ldc_id, idx);
5939d10e4ef2Snarayan 	}
59401ae08745Sheppo 
59411ae08745Sheppo vsw_dringsend_free_exit:
59421ae08745Sheppo 
59431ae08745Sheppo 	/* free the message block */
59441ae08745Sheppo 	freemsg(mp);
59451ae08745Sheppo 
59461ae08745Sheppo 	D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id);
59471ae08745Sheppo 	return (status);
59481ae08745Sheppo }
59491ae08745Sheppo 
59501ae08745Sheppo /*
59511ae08745Sheppo  * Send an in-band descriptor message over ldc.
59521ae08745Sheppo  */
59531ae08745Sheppo static int
59541ae08745Sheppo vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp)
59551ae08745Sheppo {
59561ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
59571ae08745Sheppo 	vio_ibnd_desc_t		ibnd_msg;
59581ae08745Sheppo 	vsw_private_desc_t	*priv_desc = NULL;
59591ae08745Sheppo 	dring_info_t		*dp = NULL;
59601ae08745Sheppo 	size_t			n, size = 0;
59611ae08745Sheppo 	caddr_t			bufp;
59621ae08745Sheppo 	mblk_t			*bp;
59631ae08745Sheppo 	int			idx, i;
59641ae08745Sheppo 	int			status = LDC_TX_SUCCESS;
59651ae08745Sheppo 	static int		warn_msg = 1;
59661ae08745Sheppo 
59671ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
59681ae08745Sheppo 
59691ae08745Sheppo 	ASSERT(mp != NULL);
59701ae08745Sheppo 
59711ae08745Sheppo 	if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) ||
59721ae08745Sheppo 		(ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) {
59731ae08745Sheppo 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt",
59741ae08745Sheppo 			__func__, ldcp->ldc_id, ldcp->ldc_status,
59751ae08745Sheppo 			ldcp->lane_out.lstate);
59761ae08745Sheppo 		freemsg(mp);
59771ae08745Sheppo 		return (LDC_TX_FAILURE);
59781ae08745Sheppo 	}
59791ae08745Sheppo 
59801ae08745Sheppo 	/*
59811ae08745Sheppo 	 * only expect single dring to exist, which we use
59821ae08745Sheppo 	 * as an internal buffer, rather than a transfer channel.
59831ae08745Sheppo 	 */
59841ae08745Sheppo 	if ((dp = ldcp->lane_out.dringp) == NULL) {
59851ae08745Sheppo 		DERR(vswp, "%s(%lld): no dring for outbound lane",
59861ae08745Sheppo 			__func__, ldcp->ldc_id);
59871ae08745Sheppo 		DERR(vswp, "%s(%lld) status(%d) state (0x%llx)",
59881ae08745Sheppo 			__func__, ldcp->ldc_id, ldcp->ldc_status,
59891ae08745Sheppo 			ldcp->lane_out.lstate);
59901ae08745Sheppo 		freemsg(mp);
59911ae08745Sheppo 		return (LDC_TX_FAILURE);
59921ae08745Sheppo 	}
59931ae08745Sheppo 
59941ae08745Sheppo 	size = msgsize(mp);
59951ae08745Sheppo 	if (size > (size_t)ETHERMAX) {
59961ae08745Sheppo 		DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__,
59971ae08745Sheppo 		    ldcp->ldc_id, size);
5998d10e4ef2Snarayan 		freemsg(mp);
5999d10e4ef2Snarayan 		return (LDC_TX_FAILURE);
60001ae08745Sheppo 	}
60011ae08745Sheppo 
60021ae08745Sheppo 	/*
60031ae08745Sheppo 	 * Find a free descriptor in our buffer ring
60041ae08745Sheppo 	 */
60051ae08745Sheppo 	if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) {
60061ae08745Sheppo 		if (warn_msg) {
60071ae08745Sheppo 			DERR(vswp, "%s(%lld): no descriptor available for ring "
60081ae08745Sheppo 			"at 0x%llx", __func__, ldcp->ldc_id, dp);
60091ae08745Sheppo 			warn_msg = 0;
60101ae08745Sheppo 		}
60111ae08745Sheppo 
60121ae08745Sheppo 		/* nothing more we can do */
60131ae08745Sheppo 		status = LDC_TX_NORESOURCES;
60141ae08745Sheppo 		goto vsw_descrsend_free_exit;
60151ae08745Sheppo 	} else {
60161ae08745Sheppo 		D2(vswp, "%s(%lld): free private descriptor found at pos "
60171ae08745Sheppo 			"%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx,
60181ae08745Sheppo 			priv_desc);
60191ae08745Sheppo 		warn_msg = 1;
60201ae08745Sheppo 	}
60211ae08745Sheppo 
60221ae08745Sheppo 	/* copy data into the descriptor */
60231ae08745Sheppo 	bufp = priv_desc->datap;
60241ae08745Sheppo 	for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) {
60251ae08745Sheppo 		n = MBLKL(bp);
60261ae08745Sheppo 		bcopy(bp->b_rptr, bufp, n);
60271ae08745Sheppo 		bufp += n;
60281ae08745Sheppo 	}
60291ae08745Sheppo 
60301ae08745Sheppo 	priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size;
60311ae08745Sheppo 
60321ae08745Sheppo 	/* create and send the in-band descp msg */
60331ae08745Sheppo 	ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA;
60341ae08745Sheppo 	ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO;
60351ae08745Sheppo 	ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA;
60361ae08745Sheppo 	ibnd_msg.hdr.tag.vio_sid = ldcp->local_session;
60371ae08745Sheppo 
6038d10e4ef2Snarayan 	mutex_enter(&ldcp->lane_out.seq_lock);
60391ae08745Sheppo 	ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++;
6040d10e4ef2Snarayan 	mutex_exit(&ldcp->lane_out.seq_lock);
60411ae08745Sheppo 
60421ae08745Sheppo 	/*
60431ae08745Sheppo 	 * Copy the mem cookies describing the data from the
60441ae08745Sheppo 	 * private region of the descriptor ring into the inband
60451ae08745Sheppo 	 * descriptor.
60461ae08745Sheppo 	 */
60471ae08745Sheppo 	for (i = 0; i < priv_desc->ncookies; i++) {
60481ae08745Sheppo 		bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i],
60491ae08745Sheppo 			sizeof (ldc_mem_cookie_t));
60501ae08745Sheppo 	}
60511ae08745Sheppo 
60521ae08745Sheppo 	ibnd_msg.hdr.desc_handle = idx;
60531ae08745Sheppo 	ibnd_msg.ncookies = priv_desc->ncookies;
60541ae08745Sheppo 	ibnd_msg.nbytes = size;
60551ae08745Sheppo 
60561ae08745Sheppo 	vsw_send_msg(ldcp, (void *)&ibnd_msg, sizeof (vio_ibnd_desc_t));
60571ae08745Sheppo 
60581ae08745Sheppo vsw_descrsend_free_exit:
60591ae08745Sheppo 
60601ae08745Sheppo 	/* free the allocated message blocks */
60611ae08745Sheppo 	freemsg(mp);
60621ae08745Sheppo 
60631ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
60641ae08745Sheppo 	return (status);
60651ae08745Sheppo }
60661ae08745Sheppo 
60671ae08745Sheppo static void
60681ae08745Sheppo vsw_send_ver(vsw_ldc_t *ldcp)
60691ae08745Sheppo {
60701ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
60711ae08745Sheppo 	lane_t		*lp = &ldcp->lane_out;
60721ae08745Sheppo 	vio_ver_msg_t	ver_msg;
60731ae08745Sheppo 
60741ae08745Sheppo 	D1(vswp, "%s enter", __func__);
60751ae08745Sheppo 
60761ae08745Sheppo 	ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
60771ae08745Sheppo 	ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
60781ae08745Sheppo 	ver_msg.tag.vio_subtype_env = VIO_VER_INFO;
60791ae08745Sheppo 	ver_msg.tag.vio_sid = ldcp->local_session;
60801ae08745Sheppo 
60811ae08745Sheppo 	ver_msg.ver_major = vsw_versions[0].ver_major;
60821ae08745Sheppo 	ver_msg.ver_minor = vsw_versions[0].ver_minor;
60831ae08745Sheppo 	ver_msg.dev_class = VDEV_NETWORK_SWITCH;
60841ae08745Sheppo 
60851ae08745Sheppo 	lp->lstate |= VSW_VER_INFO_SENT;
60861ae08745Sheppo 	lp->ver_major = ver_msg.ver_major;
60871ae08745Sheppo 	lp->ver_minor = ver_msg.ver_minor;
60881ae08745Sheppo 
60891ae08745Sheppo 	DUMP_TAG(ver_msg.tag);
60901ae08745Sheppo 
60911ae08745Sheppo 	vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t));
60921ae08745Sheppo 
60931ae08745Sheppo 	D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id);
60941ae08745Sheppo }
60951ae08745Sheppo 
60961ae08745Sheppo static void
60971ae08745Sheppo vsw_send_attr(vsw_ldc_t *ldcp)
60981ae08745Sheppo {
60991ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
61001ae08745Sheppo 	lane_t			*lp = &ldcp->lane_out;
61011ae08745Sheppo 	vnet_attr_msg_t		attr_msg;
61021ae08745Sheppo 
61031ae08745Sheppo 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
61041ae08745Sheppo 
61051ae08745Sheppo 	/*
61061ae08745Sheppo 	 * Subtype is set to INFO by default
61071ae08745Sheppo 	 */
61081ae08745Sheppo 	attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
61091ae08745Sheppo 	attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
61101ae08745Sheppo 	attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO;
61111ae08745Sheppo 	attr_msg.tag.vio_sid = ldcp->local_session;
61121ae08745Sheppo 
61131ae08745Sheppo 	/* payload copied from default settings for lane */
61141ae08745Sheppo 	attr_msg.mtu = lp->mtu;
61151ae08745Sheppo 	attr_msg.addr_type = lp->addr_type;
61161ae08745Sheppo 	attr_msg.xfer_mode = lp->xfer_mode;
61171ae08745Sheppo 	attr_msg.ack_freq = lp->xfer_mode;
61181ae08745Sheppo 
61191ae08745Sheppo 	READ_ENTER(&vswp->if_lockrw);
61201ae08745Sheppo 	bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL);
61211ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
61221ae08745Sheppo 
61231ae08745Sheppo 	ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT;
61241ae08745Sheppo 
61251ae08745Sheppo 	DUMP_TAG(attr_msg.tag);
61261ae08745Sheppo 
61271ae08745Sheppo 	vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t));
61281ae08745Sheppo 
61291ae08745Sheppo 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
61301ae08745Sheppo }
61311ae08745Sheppo 
61321ae08745Sheppo /*
61331ae08745Sheppo  * Create dring info msg (which also results in the creation of
61341ae08745Sheppo  * a dring).
61351ae08745Sheppo  */
61361ae08745Sheppo static vio_dring_reg_msg_t *
61371ae08745Sheppo vsw_create_dring_info_pkt(vsw_ldc_t *ldcp)
61381ae08745Sheppo {
61391ae08745Sheppo 	vio_dring_reg_msg_t	*mp;
61401ae08745Sheppo 	dring_info_t		*dp;
61411ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
61421ae08745Sheppo 
61431ae08745Sheppo 	D1(vswp, "vsw_create_dring_info_pkt enter\n");
61441ae08745Sheppo 
61451ae08745Sheppo 	/*
61461ae08745Sheppo 	 * If we can't create a dring, obviously no point sending
61471ae08745Sheppo 	 * a message.
61481ae08745Sheppo 	 */
61491ae08745Sheppo 	if ((dp = vsw_create_dring(ldcp)) == NULL)
61501ae08745Sheppo 		return (NULL);
61511ae08745Sheppo 
61521ae08745Sheppo 	mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP);
61531ae08745Sheppo 
61541ae08745Sheppo 	mp->tag.vio_msgtype = VIO_TYPE_CTRL;
61551ae08745Sheppo 	mp->tag.vio_subtype = VIO_SUBTYPE_INFO;
61561ae08745Sheppo 	mp->tag.vio_subtype_env = VIO_DRING_REG;
61571ae08745Sheppo 	mp->tag.vio_sid = ldcp->local_session;
61581ae08745Sheppo 
61591ae08745Sheppo 	/* payload */
61601ae08745Sheppo 	mp->num_descriptors = dp->num_descriptors;
61611ae08745Sheppo 	mp->descriptor_size = dp->descriptor_size;
61621ae08745Sheppo 	mp->options = dp->options;
61631ae08745Sheppo 	mp->ncookies = dp->ncookies;
61641ae08745Sheppo 	bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t));
61651ae08745Sheppo 
61661ae08745Sheppo 	mp->dring_ident = 0;
61671ae08745Sheppo 
61681ae08745Sheppo 	D1(vswp, "vsw_create_dring_info_pkt exit\n");
61691ae08745Sheppo 
61701ae08745Sheppo 	return (mp);
61711ae08745Sheppo }
61721ae08745Sheppo 
61731ae08745Sheppo static void
61741ae08745Sheppo vsw_send_dring_info(vsw_ldc_t *ldcp)
61751ae08745Sheppo {
61761ae08745Sheppo 	vio_dring_reg_msg_t	*dring_msg;
61771ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
61781ae08745Sheppo 
61791ae08745Sheppo 	D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id);
61801ae08745Sheppo 
61811ae08745Sheppo 	dring_msg = vsw_create_dring_info_pkt(ldcp);
61821ae08745Sheppo 	if (dring_msg == NULL) {
61831ae08745Sheppo 		cmn_err(CE_WARN, "vsw_send_dring_info: error creating msg");
61841ae08745Sheppo 		return;
61851ae08745Sheppo 	}
61861ae08745Sheppo 
61871ae08745Sheppo 	ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT;
61881ae08745Sheppo 
61891ae08745Sheppo 	DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg);
61901ae08745Sheppo 
61911ae08745Sheppo 	vsw_send_msg(ldcp, dring_msg,
61921ae08745Sheppo 		sizeof (vio_dring_reg_msg_t));
61931ae08745Sheppo 
61941ae08745Sheppo 	kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t));
61951ae08745Sheppo 
61961ae08745Sheppo 	D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id);
61971ae08745Sheppo }
61981ae08745Sheppo 
61991ae08745Sheppo static void
62001ae08745Sheppo vsw_send_rdx(vsw_ldc_t *ldcp)
62011ae08745Sheppo {
62021ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
62031ae08745Sheppo 	vio_rdx_msg_t	rdx_msg;
62041ae08745Sheppo 
62051ae08745Sheppo 	D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id);
62061ae08745Sheppo 
62071ae08745Sheppo 	rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL;
62081ae08745Sheppo 	rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO;
62091ae08745Sheppo 	rdx_msg.tag.vio_subtype_env = VIO_RDX;
62101ae08745Sheppo 	rdx_msg.tag.vio_sid = ldcp->local_session;
62111ae08745Sheppo 
62121ae08745Sheppo 	ldcp->lane_out.lstate |= VSW_RDX_INFO_SENT;
62131ae08745Sheppo 
62141ae08745Sheppo 	DUMP_TAG(rdx_msg.tag);
62151ae08745Sheppo 
62161ae08745Sheppo 	vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t));
62171ae08745Sheppo 
62181ae08745Sheppo 	D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id);
62191ae08745Sheppo }
62201ae08745Sheppo 
62211ae08745Sheppo /*
62221ae08745Sheppo  * Generic routine to send message out over ldc channel.
62231ae08745Sheppo  */
62241ae08745Sheppo static void
62251ae08745Sheppo vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size)
62261ae08745Sheppo {
62271ae08745Sheppo 	int		rv;
62281ae08745Sheppo 	size_t		msglen = size;
62291ae08745Sheppo 	vio_msg_tag_t	*tag = (vio_msg_tag_t *)msgp;
62301ae08745Sheppo 	vsw_t		*vswp = ldcp->ldc_vswp;
62311ae08745Sheppo 
62321ae08745Sheppo 	D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes",
62331ae08745Sheppo 			ldcp->ldc_id, size);
62341ae08745Sheppo 
62351ae08745Sheppo 	D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype);
62361ae08745Sheppo 	D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype);
62371ae08745Sheppo 	D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env);
62381ae08745Sheppo 
62391ae08745Sheppo 	mutex_enter(&ldcp->ldc_txlock);
62401ae08745Sheppo 	do {
62411ae08745Sheppo 		msglen = size;
62421ae08745Sheppo 		rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen);
62431ae08745Sheppo 	} while (rv == EWOULDBLOCK && --vsw_wretries > 0);
62441ae08745Sheppo 
62451ae08745Sheppo 	mutex_exit(&ldcp->ldc_txlock);
62461ae08745Sheppo 
62471ae08745Sheppo 	if ((rv != 0) || (msglen != size)) {
62481ae08745Sheppo 		DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) "
62491ae08745Sheppo 			"rv(%d) size (%d) msglen(%d)\n", ldcp->ldc_id,
62501ae08745Sheppo 			rv, size, msglen);
62511ae08745Sheppo 	}
62521ae08745Sheppo 
62531ae08745Sheppo 	D1(vswp, "vsw_send_msg (%lld) exit : sent %d bytes",
62541ae08745Sheppo 			ldcp->ldc_id, msglen);
62551ae08745Sheppo }
62561ae08745Sheppo 
62571ae08745Sheppo /*
62581ae08745Sheppo  * Add an entry into FDB, for the given mac address and port_id.
62591ae08745Sheppo  * Returns 0 on success, 1 on failure.
62601ae08745Sheppo  *
62611ae08745Sheppo  * Lock protecting FDB must be held by calling process.
62621ae08745Sheppo  */
62631ae08745Sheppo static int
62641ae08745Sheppo vsw_add_fdb(vsw_t *vswp, vsw_port_t *port)
62651ae08745Sheppo {
62661ae08745Sheppo 	uint64_t	addr = 0;
62671ae08745Sheppo 
62681ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
62691ae08745Sheppo 
62701ae08745Sheppo 	KEY_HASH(addr, port->p_macaddr);
62711ae08745Sheppo 
62721ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
62731ae08745Sheppo 
62741ae08745Sheppo 	/*
62751ae08745Sheppo 	 * Note: duplicate keys will be rejected by mod_hash.
62761ae08745Sheppo 	 */
62771ae08745Sheppo 	if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr,
62781ae08745Sheppo 				(mod_hash_val_t)port) != 0) {
62791ae08745Sheppo 		DERR(vswp, "%s: unable to add entry into fdb.", __func__);
62801ae08745Sheppo 		return (1);
62811ae08745Sheppo 	}
62821ae08745Sheppo 
62831ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
62841ae08745Sheppo 	return (0);
62851ae08745Sheppo }
62861ae08745Sheppo 
62871ae08745Sheppo /*
62881ae08745Sheppo  * Remove an entry from FDB.
62891ae08745Sheppo  * Returns 0 on success, 1 on failure.
62901ae08745Sheppo  */
62911ae08745Sheppo static int
62921ae08745Sheppo vsw_del_fdb(vsw_t *vswp, vsw_port_t *port)
62931ae08745Sheppo {
62941ae08745Sheppo 	uint64_t	addr = 0;
62951ae08745Sheppo 
62961ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
62971ae08745Sheppo 
62981ae08745Sheppo 	KEY_HASH(addr, port->p_macaddr);
62991ae08745Sheppo 
63001ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, addr);
63011ae08745Sheppo 
63021ae08745Sheppo 	(void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr);
63031ae08745Sheppo 
63041ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
63051ae08745Sheppo 
63061ae08745Sheppo 	return (0);
63071ae08745Sheppo }
63081ae08745Sheppo 
63091ae08745Sheppo /*
63101ae08745Sheppo  * Search fdb for a given mac address.
63111ae08745Sheppo  * Returns pointer to the entry if found, else returns NULL.
63121ae08745Sheppo  */
63131ae08745Sheppo static vsw_port_t *
63141ae08745Sheppo vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp)
63151ae08745Sheppo {
63161ae08745Sheppo 	uint64_t	key = 0;
63171ae08745Sheppo 	vsw_port_t	*port = NULL;
63181ae08745Sheppo 
63191ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
63201ae08745Sheppo 
63211ae08745Sheppo 	KEY_HASH(key, ehp->ether_dhost);
63221ae08745Sheppo 
63231ae08745Sheppo 	D2(vswp, "%s: key = 0x%llx", __func__, key);
63241ae08745Sheppo 
63251ae08745Sheppo 	if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key,
63261ae08745Sheppo 				(mod_hash_val_t *)&port) != 0) {
63271ae08745Sheppo 		return (NULL);
63281ae08745Sheppo 	}
63291ae08745Sheppo 
63301ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
63311ae08745Sheppo 
63321ae08745Sheppo 	return (port);
63331ae08745Sheppo }
63341ae08745Sheppo 
63351ae08745Sheppo /*
63361ae08745Sheppo  * Add or remove multicast address(es).
63371ae08745Sheppo  *
63381ae08745Sheppo  * Returns 0 on success, 1 on failure.
63391ae08745Sheppo  */
63401ae08745Sheppo static int
63411ae08745Sheppo vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
63421ae08745Sheppo {
63431ae08745Sheppo 	mcst_addr_t		*mcst_p = NULL;
63441ae08745Sheppo 	vsw_t			*vswp = port->p_vswp;
63451ae08745Sheppo 	uint64_t		addr = 0x0;
6346*e1ebb9ecSlm66018 	int			i, ret;
63471ae08745Sheppo 
63481ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
63491ae08745Sheppo 
63501ae08745Sheppo 	D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
63511ae08745Sheppo 
6352*e1ebb9ecSlm66018 	if (vswp->mh == NULL)
6353*e1ebb9ecSlm66018 		return (1);
6354*e1ebb9ecSlm66018 
63551ae08745Sheppo 	for (i = 0; i < mcst_pkt->count; i++) {
63561ae08745Sheppo 		/*
63571ae08745Sheppo 		 * Convert address into form that can be used
63581ae08745Sheppo 		 * as hash table key.
63591ae08745Sheppo 		 */
63601ae08745Sheppo 		KEY_HASH(addr, mcst_pkt->mca[i]);
63611ae08745Sheppo 
63621ae08745Sheppo 		/*
63631ae08745Sheppo 		 * Add or delete the specified address/port combination.
63641ae08745Sheppo 		 */
63651ae08745Sheppo 		if (mcst_pkt->set == 0x1) {
63661ae08745Sheppo 			D3(vswp, "%s: adding multicast address 0x%llx for "
63671ae08745Sheppo 				"port %ld", __func__, addr, port->p_instance);
63681ae08745Sheppo 			if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
63691ae08745Sheppo 				/*
63701ae08745Sheppo 				 * Update the list of multicast
63711ae08745Sheppo 				 * addresses contained within the
63721ae08745Sheppo 				 * port structure to include this new
63731ae08745Sheppo 				 * one.
63741ae08745Sheppo 				 */
63751ae08745Sheppo 				mcst_p = kmem_alloc(sizeof (mcst_addr_t),
63761ae08745Sheppo 								KM_NOSLEEP);
63771ae08745Sheppo 				if (mcst_p == NULL) {
63781ae08745Sheppo 					DERR(vswp, "%s: unable to alloc mem",
63791ae08745Sheppo 						__func__);
63801ae08745Sheppo 					return (1);
63811ae08745Sheppo 				}
63821ae08745Sheppo 
63831ae08745Sheppo 				mcst_p->nextp = NULL;
63841ae08745Sheppo 				mcst_p->addr = addr;
63851ae08745Sheppo 
63861ae08745Sheppo 				mutex_enter(&port->mca_lock);
63871ae08745Sheppo 				mcst_p->nextp = port->mcap;
63881ae08745Sheppo 				port->mcap = mcst_p;
63891ae08745Sheppo 				mutex_exit(&port->mca_lock);
63901ae08745Sheppo 
63911ae08745Sheppo 				/*
63921ae08745Sheppo 				 * Program the address into HW. If the addr
63931ae08745Sheppo 				 * has already been programmed then the MAC
63941ae08745Sheppo 				 * just increments a ref counter (which is
63951ae08745Sheppo 				 * used when the address is being deleted)
63961ae08745Sheppo 				 */
6397*e1ebb9ecSlm66018 				ret = mac_multicst_add(vswp->mh,
63981ae08745Sheppo 						(uchar_t *)&mcst_pkt->mca[i]);
6399*e1ebb9ecSlm66018 				if (ret) {
6400*e1ebb9ecSlm66018 					cmn_err(CE_WARN, "!unable to add "
6401*e1ebb9ecSlm66018 						"multicast address");
6402*e1ebb9ecSlm66018 					(void) vsw_del_mcst(vswp, VSW_VNETPORT,
6403*e1ebb9ecSlm66018 						addr, port);
6404*e1ebb9ecSlm66018 					vsw_del_addr(VSW_VNETPORT, port, addr);
6405*e1ebb9ecSlm66018 					return (ret);
6406*e1ebb9ecSlm66018 				}
64071ae08745Sheppo 
64081ae08745Sheppo 			} else {
64091ae08745Sheppo 				DERR(vswp, "%s: error adding multicast "
64101ae08745Sheppo 					"address 0x%llx for port %ld",
64111ae08745Sheppo 					__func__, addr, port->p_instance);
64121ae08745Sheppo 				return (1);
64131ae08745Sheppo 			}
64141ae08745Sheppo 		} else {
64151ae08745Sheppo 			/*
64161ae08745Sheppo 			 * Delete an entry from the multicast hash
64171ae08745Sheppo 			 * table and update the address list
64181ae08745Sheppo 			 * appropriately.
64191ae08745Sheppo 			 */
64201ae08745Sheppo 			if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
64211ae08745Sheppo 				D3(vswp, "%s: deleting multicast address "
64221ae08745Sheppo 					"0x%llx for port %ld", __func__, addr,
64231ae08745Sheppo 					port->p_instance);
64241ae08745Sheppo 
64251ae08745Sheppo 				vsw_del_addr(VSW_VNETPORT, port, addr);
64261ae08745Sheppo 
64271ae08745Sheppo 				/*
64281ae08745Sheppo 				 * Remove the address from HW. The address
64291ae08745Sheppo 				 * will actually only be removed once the ref
64301ae08745Sheppo 				 * count within the MAC layer has dropped to
64311ae08745Sheppo 				 * zero. I.e. we can safely call this fn even
64321ae08745Sheppo 				 * if other ports are interested in this
64331ae08745Sheppo 				 * address.
64341ae08745Sheppo 				 */
64351ae08745Sheppo 				(void) mac_multicst_remove(vswp->mh,
64361ae08745Sheppo 						(uchar_t *)&mcst_pkt->mca[i]);
64371ae08745Sheppo 
64381ae08745Sheppo 			} else {
64391ae08745Sheppo 				DERR(vswp, "%s: error deleting multicast "
64401ae08745Sheppo 					"addr 0x%llx for port %ld",
64411ae08745Sheppo 					__func__, addr, port->p_instance);
64421ae08745Sheppo 				return (1);
64431ae08745Sheppo 			}
64441ae08745Sheppo 		}
64451ae08745Sheppo 	}
64461ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
64471ae08745Sheppo 	return (0);
64481ae08745Sheppo }
64491ae08745Sheppo 
64501ae08745Sheppo /*
64511ae08745Sheppo  * Add a new multicast entry.
64521ae08745Sheppo  *
64531ae08745Sheppo  * Search hash table based on address. If match found then
64541ae08745Sheppo  * update associated val (which is chain of ports), otherwise
64551ae08745Sheppo  * create new key/val (addr/port) pair and insert into table.
64561ae08745Sheppo  */
64571ae08745Sheppo static int
64581ae08745Sheppo vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
64591ae08745Sheppo {
64601ae08745Sheppo 	int		dup = 0;
64611ae08745Sheppo 	int		rv = 0;
64621ae08745Sheppo 	mfdb_ent_t	*ment = NULL;
64631ae08745Sheppo 	mfdb_ent_t	*tmp_ent = NULL;
64641ae08745Sheppo 	mfdb_ent_t	*new_ent = NULL;
64651ae08745Sheppo 	void		*tgt = NULL;
64661ae08745Sheppo 
64671ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
64681ae08745Sheppo 		/*
64691ae08745Sheppo 		 * Being invoked from a vnet.
64701ae08745Sheppo 		 */
64711ae08745Sheppo 		ASSERT(arg != NULL);
64721ae08745Sheppo 		tgt = arg;
64731ae08745Sheppo 		D2(NULL, "%s: port %d : address 0x%llx", __func__,
64741ae08745Sheppo 			((vsw_port_t *)arg)->p_instance, addr);
64751ae08745Sheppo 	} else {
64761ae08745Sheppo 		/*
64771ae08745Sheppo 		 * We are being invoked via the m_multicst mac entry
64781ae08745Sheppo 		 * point.
64791ae08745Sheppo 		 */
64801ae08745Sheppo 		D2(NULL, "%s: address 0x%llx", __func__, addr);
64811ae08745Sheppo 		tgt = (void *)vswp;
64821ae08745Sheppo 	}
64831ae08745Sheppo 
64841ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
64851ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
64861ae08745Sheppo 				(mod_hash_val_t *)&ment) != 0) {
64871ae08745Sheppo 
64881ae08745Sheppo 		/* address not currently in table */
64891ae08745Sheppo 		ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
64901ae08745Sheppo 		ment->d_addr = (void *)tgt;
64911ae08745Sheppo 		ment->d_type = devtype;
64921ae08745Sheppo 		ment->nextp = NULL;
64931ae08745Sheppo 
64941ae08745Sheppo 		if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
64951ae08745Sheppo 			(mod_hash_val_t)ment) != 0) {
64961ae08745Sheppo 			DERR(vswp, "%s: hash table insertion failed", __func__);
64971ae08745Sheppo 			kmem_free(ment, sizeof (mfdb_ent_t));
64981ae08745Sheppo 			rv = 1;
64991ae08745Sheppo 		} else {
65001ae08745Sheppo 			D2(vswp, "%s: added initial entry for 0x%llx to "
65011ae08745Sheppo 				"table", __func__, addr);
65021ae08745Sheppo 		}
65031ae08745Sheppo 	} else {
65041ae08745Sheppo 		/*
65051ae08745Sheppo 		 * Address in table. Check to see if specified port
65061ae08745Sheppo 		 * is already associated with the address. If not add
65071ae08745Sheppo 		 * it now.
65081ae08745Sheppo 		 */
65091ae08745Sheppo 		tmp_ent = ment;
65101ae08745Sheppo 		while (tmp_ent != NULL) {
65111ae08745Sheppo 			if (tmp_ent->d_addr == (void *)tgt) {
65121ae08745Sheppo 				if (devtype == VSW_VNETPORT) {
65131ae08745Sheppo 					DERR(vswp, "%s: duplicate port entry "
65141ae08745Sheppo 						"found for portid %ld and key "
65151ae08745Sheppo 						"0x%llx", __func__,
65161ae08745Sheppo 						((vsw_port_t *)arg)->p_instance,
65171ae08745Sheppo 						addr);
65181ae08745Sheppo 				} else {
65191ae08745Sheppo 					DERR(vswp, "%s: duplicate entry found"
65201ae08745Sheppo 						"for key 0x%llx",
65211ae08745Sheppo 						__func__, addr);
65221ae08745Sheppo 				}
65231ae08745Sheppo 				rv = 1;
65241ae08745Sheppo 				dup = 1;
65251ae08745Sheppo 				break;
65261ae08745Sheppo 			}
65271ae08745Sheppo 			tmp_ent = tmp_ent->nextp;
65281ae08745Sheppo 		}
65291ae08745Sheppo 
65301ae08745Sheppo 		/*
65311ae08745Sheppo 		 * Port not on list so add it to end now.
65321ae08745Sheppo 		 */
65331ae08745Sheppo 		if (0 == dup) {
65341ae08745Sheppo 			D2(vswp, "%s: added entry for 0x%llx to table",
65351ae08745Sheppo 				__func__, addr);
65361ae08745Sheppo 			new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
65371ae08745Sheppo 			new_ent->d_addr = (void *)tgt;
65381ae08745Sheppo 			new_ent->d_type = devtype;
65391ae08745Sheppo 			new_ent->nextp = NULL;
65401ae08745Sheppo 
65411ae08745Sheppo 			tmp_ent = ment;
65421ae08745Sheppo 			while (tmp_ent->nextp != NULL)
65431ae08745Sheppo 				tmp_ent = tmp_ent->nextp;
65441ae08745Sheppo 
65451ae08745Sheppo 			tmp_ent->nextp = new_ent;
65461ae08745Sheppo 		}
65471ae08745Sheppo 	}
65481ae08745Sheppo 
65491ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
65501ae08745Sheppo 	return (rv);
65511ae08745Sheppo }
65521ae08745Sheppo 
65531ae08745Sheppo /*
65541ae08745Sheppo  * Remove a multicast entry from the hashtable.
65551ae08745Sheppo  *
65561ae08745Sheppo  * Search hash table based on address. If match found, scan
65571ae08745Sheppo  * list of ports associated with address. If specified port
65581ae08745Sheppo  * found remove it from list.
65591ae08745Sheppo  */
65601ae08745Sheppo static int
65611ae08745Sheppo vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
65621ae08745Sheppo {
65631ae08745Sheppo 	mfdb_ent_t	*ment = NULL;
65641ae08745Sheppo 	mfdb_ent_t	*curr_p, *prev_p;
65651ae08745Sheppo 	void		*tgt = NULL;
65661ae08745Sheppo 
65671ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
65681ae08745Sheppo 
65691ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
65701ae08745Sheppo 		tgt = (vsw_port_t *)arg;
65711ae08745Sheppo 		D2(vswp, "%s: removing port %d from mFDB for address"
65721ae08745Sheppo 			" 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance,
65731ae08745Sheppo 			addr);
65741ae08745Sheppo 	} else {
65751ae08745Sheppo 		D2(vswp, "%s: removing entry", __func__);
65761ae08745Sheppo 		tgt = (void *)vswp;
65771ae08745Sheppo 	}
65781ae08745Sheppo 
65791ae08745Sheppo 	WRITE_ENTER(&vswp->mfdbrw);
65801ae08745Sheppo 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
65811ae08745Sheppo 				(mod_hash_val_t *)&ment) != 0) {
65821ae08745Sheppo 		D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
65831ae08745Sheppo 		RW_EXIT(&vswp->mfdbrw);
65841ae08745Sheppo 		return (1);
65851ae08745Sheppo 	}
65861ae08745Sheppo 
65871ae08745Sheppo 	prev_p = curr_p = ment;
65881ae08745Sheppo 
65891ae08745Sheppo 	while (curr_p != NULL) {
65901ae08745Sheppo 		if (curr_p->d_addr == (void *)tgt) {
65911ae08745Sheppo 			if (devtype == VSW_VNETPORT) {
65921ae08745Sheppo 				D2(vswp, "%s: port %d found", __func__,
65931ae08745Sheppo 					((vsw_port_t *)tgt)->p_instance);
65941ae08745Sheppo 			} else {
65951ae08745Sheppo 				D2(vswp, "%s: instance found", __func__);
65961ae08745Sheppo 			}
65971ae08745Sheppo 
65981ae08745Sheppo 			if (prev_p == curr_p) {
65991ae08745Sheppo 				/*
66001ae08745Sheppo 				 * head of list, if no other element is in
66011ae08745Sheppo 				 * list then destroy this entry, otherwise
66021ae08745Sheppo 				 * just replace it with updated value.
66031ae08745Sheppo 				 */
66041ae08745Sheppo 				ment = curr_p->nextp;
66051ae08745Sheppo 				kmem_free(curr_p, sizeof (mfdb_ent_t));
66061ae08745Sheppo 				if (ment == NULL) {
66071ae08745Sheppo 					(void) mod_hash_destroy(vswp->mfdb,
66081ae08745Sheppo 							(mod_hash_val_t)addr);
66091ae08745Sheppo 				} else {
66101ae08745Sheppo 					(void) mod_hash_replace(vswp->mfdb,
66111ae08745Sheppo 							(mod_hash_key_t)addr,
66121ae08745Sheppo 							(mod_hash_val_t)ment);
66131ae08745Sheppo 				}
66141ae08745Sheppo 			} else {
66151ae08745Sheppo 				/*
66161ae08745Sheppo 				 * Not head of list, no need to do
66171ae08745Sheppo 				 * replacement, just adjust list pointers.
66181ae08745Sheppo 				 */
66191ae08745Sheppo 				prev_p->nextp = curr_p->nextp;
66201ae08745Sheppo 				kmem_free(curr_p, sizeof (mfdb_ent_t));
66211ae08745Sheppo 			}
66221ae08745Sheppo 			break;
66231ae08745Sheppo 		}
66241ae08745Sheppo 
66251ae08745Sheppo 		prev_p = curr_p;
66261ae08745Sheppo 		curr_p = curr_p->nextp;
66271ae08745Sheppo 	}
66281ae08745Sheppo 
66291ae08745Sheppo 	RW_EXIT(&vswp->mfdbrw);
66301ae08745Sheppo 
66311ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
66321ae08745Sheppo 
66331ae08745Sheppo 	return (0);
66341ae08745Sheppo }
66351ae08745Sheppo 
66361ae08745Sheppo /*
66371ae08745Sheppo  * Port is being deleted, but has registered an interest in one
66381ae08745Sheppo  * or more multicast groups. Using the list of addresses maintained
66391ae08745Sheppo  * within the port structure find the appropriate entry in the hash
66401ae08745Sheppo  * table and remove this port from the list of interested ports.
66411ae08745Sheppo  */
66421ae08745Sheppo static void
66431ae08745Sheppo vsw_del_mcst_port(vsw_port_t *port)
66441ae08745Sheppo {
66451ae08745Sheppo 	mcst_addr_t	*mcst_p = NULL;
66461ae08745Sheppo 	vsw_t		*vswp = port->p_vswp;
66471ae08745Sheppo 
66481ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
66491ae08745Sheppo 
66501ae08745Sheppo 	mutex_enter(&port->mca_lock);
66511ae08745Sheppo 	while (port->mcap != NULL) {
66521ae08745Sheppo 		(void) vsw_del_mcst(vswp, VSW_VNETPORT,
66531ae08745Sheppo 					port->mcap->addr, port);
66541ae08745Sheppo 
66551ae08745Sheppo 		mcst_p = port->mcap->nextp;
66561ae08745Sheppo 		kmem_free(port->mcap, sizeof (mcst_addr_t));
66571ae08745Sheppo 		port->mcap = mcst_p;
66581ae08745Sheppo 	}
66591ae08745Sheppo 	mutex_exit(&port->mca_lock);
66601ae08745Sheppo 
66611ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
66621ae08745Sheppo }
66631ae08745Sheppo 
66641ae08745Sheppo /*
66651ae08745Sheppo  * This vsw instance is detaching, but has registered an interest in one
66661ae08745Sheppo  * or more multicast groups. Using the list of addresses maintained
66671ae08745Sheppo  * within the vsw structure find the appropriate entry in the hash
66681ae08745Sheppo  * table and remove this instance from the list of interested ports.
66691ae08745Sheppo  */
66701ae08745Sheppo static void
66711ae08745Sheppo vsw_del_mcst_vsw(vsw_t *vswp)
66721ae08745Sheppo {
66731ae08745Sheppo 	mcst_addr_t	*next_p = NULL;
66741ae08745Sheppo 
66751ae08745Sheppo 	D1(vswp, "%s: enter", __func__);
66761ae08745Sheppo 
66771ae08745Sheppo 	mutex_enter(&vswp->mca_lock);
66781ae08745Sheppo 
66791ae08745Sheppo 	while (vswp->mcap != NULL) {
66801ae08745Sheppo 		DERR(vswp, "%s: deleting addr 0x%llx",
66811ae08745Sheppo 			__func__, vswp->mcap->addr);
66821ae08745Sheppo 		(void) vsw_del_mcst(vswp, VSW_LOCALDEV,
66831ae08745Sheppo 				vswp->mcap->addr, NULL);
66841ae08745Sheppo 
66851ae08745Sheppo 		next_p = vswp->mcap->nextp;
66861ae08745Sheppo 		kmem_free(vswp->mcap, sizeof (mcst_addr_t));
66871ae08745Sheppo 		vswp->mcap = next_p;
66881ae08745Sheppo 	}
66891ae08745Sheppo 
66901ae08745Sheppo 	vswp->mcap = NULL;
66911ae08745Sheppo 	mutex_exit(&vswp->mca_lock);
66921ae08745Sheppo 
66931ae08745Sheppo 	D1(vswp, "%s: exit", __func__);
66941ae08745Sheppo }
66951ae08745Sheppo 
66961ae08745Sheppo 
66971ae08745Sheppo /*
66981ae08745Sheppo  * Remove the specified address from the list of address maintained
66991ae08745Sheppo  * in this port node.
67001ae08745Sheppo  */
67011ae08745Sheppo static void
67021ae08745Sheppo vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr)
67031ae08745Sheppo {
67041ae08745Sheppo 	vsw_t		*vswp = NULL;
67051ae08745Sheppo 	vsw_port_t	*port = NULL;
67061ae08745Sheppo 	mcst_addr_t	*prev_p = NULL;
67071ae08745Sheppo 	mcst_addr_t	*curr_p = NULL;
67081ae08745Sheppo 
67091ae08745Sheppo 	D1(NULL, "%s: enter : devtype %d : addr 0x%llx",
67101ae08745Sheppo 		__func__, devtype, addr);
67111ae08745Sheppo 
67121ae08745Sheppo 	if (devtype == VSW_VNETPORT) {
67131ae08745Sheppo 		port = (vsw_port_t *)arg;
67141ae08745Sheppo 		mutex_enter(&port->mca_lock);
67151ae08745Sheppo 		prev_p = curr_p = port->mcap;
67161ae08745Sheppo 	} else {
67171ae08745Sheppo 		vswp = (vsw_t *)arg;
67181ae08745Sheppo 		mutex_enter(&vswp->mca_lock);
67191ae08745Sheppo 		prev_p = curr_p = vswp->mcap;
67201ae08745Sheppo 	}
67211ae08745Sheppo 
67221ae08745Sheppo 	while (curr_p != NULL) {
67231ae08745Sheppo 		if (curr_p->addr == addr) {
67241ae08745Sheppo 			D2(NULL, "%s: address found", __func__);
67251ae08745Sheppo 			/* match found */
67261ae08745Sheppo 			if (prev_p == curr_p) {
67271ae08745Sheppo 				/* list head */
67281ae08745Sheppo 				if (devtype == VSW_VNETPORT)
67291ae08745Sheppo 					port->mcap = curr_p->nextp;
67301ae08745Sheppo 				else
67311ae08745Sheppo 					vswp->mcap = curr_p->nextp;
67321ae08745Sheppo 			} else {
67331ae08745Sheppo 				prev_p->nextp = curr_p->nextp;
67341ae08745Sheppo 			}
67351ae08745Sheppo 			kmem_free(curr_p, sizeof (mcst_addr_t));
67361ae08745Sheppo 			break;
67371ae08745Sheppo 		} else {
67381ae08745Sheppo 			prev_p = curr_p;
67391ae08745Sheppo 			curr_p = curr_p->nextp;
67401ae08745Sheppo 		}
67411ae08745Sheppo 	}
67421ae08745Sheppo 
67431ae08745Sheppo 	if (devtype == VSW_VNETPORT)
67441ae08745Sheppo 		mutex_exit(&port->mca_lock);
67451ae08745Sheppo 	else
67461ae08745Sheppo 		mutex_exit(&vswp->mca_lock);
67471ae08745Sheppo 
67481ae08745Sheppo 	D1(NULL, "%s: exit", __func__);
67491ae08745Sheppo }
67501ae08745Sheppo 
67511ae08745Sheppo /*
67521ae08745Sheppo  * Creates a descriptor ring (dring) and links it into the
67531ae08745Sheppo  * link of outbound drings for this channel.
67541ae08745Sheppo  *
67551ae08745Sheppo  * Returns NULL if creation failed.
67561ae08745Sheppo  */
67571ae08745Sheppo static dring_info_t *
67581ae08745Sheppo vsw_create_dring(vsw_ldc_t *ldcp)
67591ae08745Sheppo {
67601ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
67611ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
67621ae08745Sheppo 	ldc_mem_info_t		minfo;
67631ae08745Sheppo 	dring_info_t		*dp, *tp;
67641ae08745Sheppo 	int			i;
67651ae08745Sheppo 
67661ae08745Sheppo 	dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
67671ae08745Sheppo 
67681ae08745Sheppo 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
67691ae08745Sheppo 
67701ae08745Sheppo 	/* create public section of ring */
67711ae08745Sheppo 	if ((ldc_mem_dring_create(VSW_RING_NUM_EL,
67721ae08745Sheppo 			VSW_PUB_SIZE, &dp->handle)) != 0) {
67731ae08745Sheppo 
67741ae08745Sheppo 		DERR(vswp, "vsw_create_dring(%lld): ldc dring create "
67751ae08745Sheppo 			"failed", ldcp->ldc_id);
67761ae08745Sheppo 		goto create_fail_exit;
67771ae08745Sheppo 	}
67781ae08745Sheppo 
67791ae08745Sheppo 	ASSERT(dp->handle != NULL);
67801ae08745Sheppo 
67811ae08745Sheppo 	/*
67821ae08745Sheppo 	 * Get the base address of the public section of the ring.
67831ae08745Sheppo 	 */
67841ae08745Sheppo 	if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) {
67851ae08745Sheppo 		DERR(vswp, "vsw_create_dring(%lld): dring info failed\n",
67861ae08745Sheppo 			ldcp->ldc_id);
67871ae08745Sheppo 		goto dring_fail_exit;
67881ae08745Sheppo 	} else {
67891ae08745Sheppo 		ASSERT(minfo.vaddr != 0);
67901ae08745Sheppo 		dp->pub_addr = minfo.vaddr;
67911ae08745Sheppo 	}
67921ae08745Sheppo 
67931ae08745Sheppo 	dp->num_descriptors = VSW_RING_NUM_EL;
67941ae08745Sheppo 	dp->descriptor_size = VSW_PUB_SIZE;
67951ae08745Sheppo 	dp->options = VIO_TX_DRING;
67961ae08745Sheppo 	dp->ncookies = 1;	/* guaranteed by ldc */
67971ae08745Sheppo 
67981ae08745Sheppo 	/*
67991ae08745Sheppo 	 * create private portion of ring
68001ae08745Sheppo 	 */
68011ae08745Sheppo 	dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc(
68021ae08745Sheppo 		(sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP);
68031ae08745Sheppo 
68041ae08745Sheppo 	if (vsw_setup_ring(ldcp, dp)) {
68051ae08745Sheppo 		DERR(vswp, "%s: unable to setup ring", __func__);
68061ae08745Sheppo 		goto dring_fail_exit;
68071ae08745Sheppo 	}
68081ae08745Sheppo 
68091ae08745Sheppo 	/* haven't used any descriptors yet */
68101ae08745Sheppo 	dp->end_idx = 0;
6811d10e4ef2Snarayan 	dp->last_ack_recv = -1;
68121ae08745Sheppo 
68131ae08745Sheppo 	/* bind dring to the channel */
68141ae08745Sheppo 	if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle,
68151ae08745Sheppo 		LDC_SHADOW_MAP, LDC_MEM_RW,
68161ae08745Sheppo 		&dp->cookie[0], &dp->ncookies)) != 0) {
68171ae08745Sheppo 		DERR(vswp, "vsw_create_dring: unable to bind to channel "
68181ae08745Sheppo 			"%lld", ldcp->ldc_id);
68191ae08745Sheppo 		goto dring_fail_exit;
68201ae08745Sheppo 	}
68211ae08745Sheppo 
6822d10e4ef2Snarayan 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
6823d10e4ef2Snarayan 	dp->restart_reqd = B_TRUE;
6824d10e4ef2Snarayan 
68251ae08745Sheppo 	/*
68261ae08745Sheppo 	 * Only ever create rings for outgoing lane. Link it onto
68271ae08745Sheppo 	 * end of list.
68281ae08745Sheppo 	 */
68291ae08745Sheppo 	if (ldcp->lane_out.dringp == NULL) {
68301ae08745Sheppo 		D2(vswp, "vsw_create_dring: adding first outbound ring");
68311ae08745Sheppo 		ldcp->lane_out.dringp = dp;
68321ae08745Sheppo 	} else {
68331ae08745Sheppo 		tp = ldcp->lane_out.dringp;
68341ae08745Sheppo 		while (tp->next != NULL)
68351ae08745Sheppo 			tp = tp->next;
68361ae08745Sheppo 
68371ae08745Sheppo 		tp->next = dp;
68381ae08745Sheppo 	}
68391ae08745Sheppo 
68401ae08745Sheppo 	return (dp);
68411ae08745Sheppo 
68421ae08745Sheppo dring_fail_exit:
68431ae08745Sheppo 	(void) ldc_mem_dring_destroy(dp->handle);
68441ae08745Sheppo 
68451ae08745Sheppo create_fail_exit:
68461ae08745Sheppo 	if (dp->priv_addr != NULL) {
68471ae08745Sheppo 		priv_addr = dp->priv_addr;
68481ae08745Sheppo 		for (i = 0; i < VSW_RING_NUM_EL; i++) {
68491ae08745Sheppo 			if (priv_addr->memhandle != NULL)
68501ae08745Sheppo 				(void) ldc_mem_free_handle(
68511ae08745Sheppo 						priv_addr->memhandle);
68521ae08745Sheppo 			priv_addr++;
68531ae08745Sheppo 		}
68541ae08745Sheppo 		kmem_free(dp->priv_addr,
68551ae08745Sheppo 			(sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
68561ae08745Sheppo 	}
68571ae08745Sheppo 	mutex_destroy(&dp->dlock);
68581ae08745Sheppo 
68591ae08745Sheppo 	kmem_free(dp, sizeof (dring_info_t));
68601ae08745Sheppo 	return (NULL);
68611ae08745Sheppo }
68621ae08745Sheppo 
68631ae08745Sheppo /*
68641ae08745Sheppo  * Create a ring consisting of just a private portion and link
68651ae08745Sheppo  * it into the list of rings for the outbound lane.
68661ae08745Sheppo  *
68671ae08745Sheppo  * These type of rings are used primarily for temporary data
68681ae08745Sheppo  * storage (i.e. as data buffers).
68691ae08745Sheppo  */
68701ae08745Sheppo void
68711ae08745Sheppo vsw_create_privring(vsw_ldc_t *ldcp)
68721ae08745Sheppo {
68731ae08745Sheppo 	dring_info_t		*dp, *tp;
68741ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
68751ae08745Sheppo 
68761ae08745Sheppo 	D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id);
68771ae08745Sheppo 
68781ae08745Sheppo 	dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP);
68791ae08745Sheppo 
68801ae08745Sheppo 	mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL);
68811ae08745Sheppo 
68821ae08745Sheppo 	/* no public section */
68831ae08745Sheppo 	dp->pub_addr = NULL;
68841ae08745Sheppo 
68851ae08745Sheppo 	dp->priv_addr = kmem_zalloc((sizeof (vsw_private_desc_t) *
68861ae08745Sheppo 					VSW_RING_NUM_EL), KM_SLEEP);
68871ae08745Sheppo 
68881ae08745Sheppo 	if (vsw_setup_ring(ldcp, dp)) {
68891ae08745Sheppo 		DERR(vswp, "%s: setup of ring failed", __func__);
68901ae08745Sheppo 		kmem_free(dp->priv_addr,
68911ae08745Sheppo 			(sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL));
68921ae08745Sheppo 		mutex_destroy(&dp->dlock);
68931ae08745Sheppo 		kmem_free(dp, sizeof (dring_info_t));
68941ae08745Sheppo 		return;
68951ae08745Sheppo 	}
68961ae08745Sheppo 
68971ae08745Sheppo 	/* haven't used any descriptors yet */
68981ae08745Sheppo 	dp->end_idx = 0;
68991ae08745Sheppo 
6900d10e4ef2Snarayan 	mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL);
6901d10e4ef2Snarayan 	dp->restart_reqd = B_TRUE;
6902d10e4ef2Snarayan 
69031ae08745Sheppo 	/*
69041ae08745Sheppo 	 * Only ever create rings for outgoing lane. Link it onto
69051ae08745Sheppo 	 * end of list.
69061ae08745Sheppo 	 */
69071ae08745Sheppo 	if (ldcp->lane_out.dringp == NULL) {
69081ae08745Sheppo 		D2(vswp, "%s: adding first outbound privring", __func__);
69091ae08745Sheppo 		ldcp->lane_out.dringp = dp;
69101ae08745Sheppo 	} else {
69111ae08745Sheppo 		tp = ldcp->lane_out.dringp;
69121ae08745Sheppo 		while (tp->next != NULL)
69131ae08745Sheppo 			tp = tp->next;
69141ae08745Sheppo 
69151ae08745Sheppo 		tp->next = dp;
69161ae08745Sheppo 	}
69171ae08745Sheppo 
69181ae08745Sheppo 	D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id);
69191ae08745Sheppo }
69201ae08745Sheppo 
69211ae08745Sheppo /*
69221ae08745Sheppo  * Setup the descriptors in the dring. Returns 0 on success, 1 on
69231ae08745Sheppo  * failure.
69241ae08745Sheppo  */
69251ae08745Sheppo int
69261ae08745Sheppo vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp)
69271ae08745Sheppo {
69281ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
69291ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
69301ae08745Sheppo 	vsw_t			*vswp = ldcp->ldc_vswp;
69311ae08745Sheppo 	uint64_t		*tmpp;
69321ae08745Sheppo 	uint64_t		offset = 0;
69331ae08745Sheppo 	uint32_t		ncookies = 0;
69341ae08745Sheppo 	static char		*name = "vsw_setup_ring";
6935d10e4ef2Snarayan 	int			i, j, nc, rv;
69361ae08745Sheppo 
69371ae08745Sheppo 	priv_addr = dp->priv_addr;
69381ae08745Sheppo 	pub_addr = dp->pub_addr;
69391ae08745Sheppo 
6940d10e4ef2Snarayan 	/* public section may be null but private should never be */
6941d10e4ef2Snarayan 	ASSERT(priv_addr != NULL);
6942d10e4ef2Snarayan 
69431ae08745Sheppo 	/*
69441ae08745Sheppo 	 * Allocate the region of memory which will be used to hold
69451ae08745Sheppo 	 * the data the descriptors will refer to.
69461ae08745Sheppo 	 */
69471ae08745Sheppo 	dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ);
69481ae08745Sheppo 	dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP);
69491ae08745Sheppo 
69501ae08745Sheppo 	D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name,
69511ae08745Sheppo 		dp->data_sz, dp->data_addr);
69521ae08745Sheppo 
69531ae08745Sheppo 	tmpp = (uint64_t *)dp->data_addr;
69541ae08745Sheppo 	offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp);
69551ae08745Sheppo 
69561ae08745Sheppo 	/*
69571ae08745Sheppo 	 * Initialise some of the private and public (if they exist)
69581ae08745Sheppo 	 * descriptor fields.
69591ae08745Sheppo 	 */
69601ae08745Sheppo 	for (i = 0; i < VSW_RING_NUM_EL; i++) {
6961d10e4ef2Snarayan 		mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL);
6962d10e4ef2Snarayan 
69631ae08745Sheppo 		if ((ldc_mem_alloc_handle(ldcp->ldc_handle,
69641ae08745Sheppo 			&priv_addr->memhandle)) != 0) {
69651ae08745Sheppo 			DERR(vswp, "%s: alloc mem handle failed", name);
69661ae08745Sheppo 			goto setup_ring_cleanup;
69671ae08745Sheppo 		}
69681ae08745Sheppo 
69691ae08745Sheppo 		priv_addr->datap = (void *)tmpp;
69701ae08745Sheppo 
69711ae08745Sheppo 		rv = ldc_mem_bind_handle(priv_addr->memhandle,
69721ae08745Sheppo 			(caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ,
69731ae08745Sheppo 			LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W,
69741ae08745Sheppo 			&(priv_addr->memcookie[0]), &ncookies);
69751ae08745Sheppo 		if (rv != 0) {
69761ae08745Sheppo 			DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed "
69771ae08745Sheppo 				"(rv %d)", name, ldcp->ldc_id, rv);
69781ae08745Sheppo 			goto setup_ring_cleanup;
69791ae08745Sheppo 		}
69801ae08745Sheppo 		priv_addr->bound = 1;
69811ae08745Sheppo 
69821ae08745Sheppo 		D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx",
69831ae08745Sheppo 			name, i, priv_addr->memcookie[0].addr,
69841ae08745Sheppo 			priv_addr->memcookie[0].size);
69851ae08745Sheppo 
69861ae08745Sheppo 		if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) {
69871ae08745Sheppo 			DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned "
69881ae08745Sheppo 				"invalid num of cookies (%d) for size 0x%llx",
69891ae08745Sheppo 				name, ldcp->ldc_id, ncookies,
69901ae08745Sheppo 				VSW_RING_EL_DATA_SZ);
69911ae08745Sheppo 
69921ae08745Sheppo 			goto setup_ring_cleanup;
69931ae08745Sheppo 		} else {
69941ae08745Sheppo 			for (j = 1; j < ncookies; j++) {
69951ae08745Sheppo 				rv = ldc_mem_nextcookie(priv_addr->memhandle,
69961ae08745Sheppo 					&(priv_addr->memcookie[j]));
69971ae08745Sheppo 				if (rv != 0) {
69981ae08745Sheppo 					DERR(vswp, "%s: ldc_mem_nextcookie "
69991ae08745Sheppo 						"failed rv (%d)", name, rv);
70001ae08745Sheppo 					goto setup_ring_cleanup;
70011ae08745Sheppo 				}
70021ae08745Sheppo 				D3(vswp, "%s: memcookie %d : addr 0x%llx : "
70031ae08745Sheppo 					"size 0x%llx", name, j,
70041ae08745Sheppo 					priv_addr->memcookie[j].addr,
70051ae08745Sheppo 					priv_addr->memcookie[j].size);
70061ae08745Sheppo 			}
70071ae08745Sheppo 
70081ae08745Sheppo 		}
70091ae08745Sheppo 		priv_addr->ncookies = ncookies;
70101ae08745Sheppo 		priv_addr->dstate = VIO_DESC_FREE;
70111ae08745Sheppo 
70121ae08745Sheppo 		if (pub_addr != NULL) {
70131ae08745Sheppo 
70141ae08745Sheppo 			/* link pub and private sides */
70151ae08745Sheppo 			priv_addr->descp = pub_addr;
70161ae08745Sheppo 
7017d10e4ef2Snarayan 			pub_addr->ncookies = priv_addr->ncookies;
7018d10e4ef2Snarayan 
7019d10e4ef2Snarayan 			for (nc = 0; nc < pub_addr->ncookies; nc++) {
7020d10e4ef2Snarayan 				bcopy(&priv_addr->memcookie[nc],
7021d10e4ef2Snarayan 					&pub_addr->memcookie[nc],
7022d10e4ef2Snarayan 					sizeof (ldc_mem_cookie_t));
7023d10e4ef2Snarayan 			}
7024d10e4ef2Snarayan 
70251ae08745Sheppo 			pub_addr->hdr.dstate = VIO_DESC_FREE;
70261ae08745Sheppo 			pub_addr++;
70271ae08745Sheppo 		}
70281ae08745Sheppo 
70291ae08745Sheppo 		/*
70301ae08745Sheppo 		 * move to next element in the dring and the next
70311ae08745Sheppo 		 * position in the data buffer.
70321ae08745Sheppo 		 */
70331ae08745Sheppo 		priv_addr++;
70341ae08745Sheppo 		tmpp += offset;
70351ae08745Sheppo 	}
70361ae08745Sheppo 
70371ae08745Sheppo 	return (0);
70381ae08745Sheppo 
70391ae08745Sheppo setup_ring_cleanup:
70401ae08745Sheppo 	priv_addr = dp->priv_addr;
70411ae08745Sheppo 
7042d10e4ef2Snarayan 	for (j = 0; j < i; j++) {
70431ae08745Sheppo 		(void) ldc_mem_unbind_handle(priv_addr->memhandle);
70441ae08745Sheppo 		(void) ldc_mem_free_handle(priv_addr->memhandle);
70451ae08745Sheppo 
7046d10e4ef2Snarayan 		mutex_destroy(&priv_addr->dstate_lock);
7047d10e4ef2Snarayan 
70481ae08745Sheppo 		priv_addr++;
70491ae08745Sheppo 	}
70501ae08745Sheppo 	kmem_free(dp->data_addr, dp->data_sz);
70511ae08745Sheppo 
70521ae08745Sheppo 	return (1);
70531ae08745Sheppo }
70541ae08745Sheppo 
70551ae08745Sheppo /*
70561ae08745Sheppo  * Searches the private section of a ring for a free descriptor,
70571ae08745Sheppo  * starting at the location of the last free descriptor found
70581ae08745Sheppo  * previously.
70591ae08745Sheppo  *
7060d10e4ef2Snarayan  * Returns 0 if free descriptor is available, and updates state
7061d10e4ef2Snarayan  * of private descriptor to VIO_DESC_READY,  otherwise returns 1.
70621ae08745Sheppo  *
70631ae08745Sheppo  * FUTURE: might need to return contiguous range of descriptors
70641ae08745Sheppo  * as dring info msg assumes all will be contiguous.
70651ae08745Sheppo  */
70661ae08745Sheppo static int
70671ae08745Sheppo vsw_dring_find_free_desc(dring_info_t *dringp,
70681ae08745Sheppo 		vsw_private_desc_t **priv_p, int *idx)
70691ae08745Sheppo {
7070d10e4ef2Snarayan 	vsw_private_desc_t	*addr = NULL;
70711ae08745Sheppo 	int			num = VSW_RING_NUM_EL;
70721ae08745Sheppo 	int			ret = 1;
70731ae08745Sheppo 
70741ae08745Sheppo 	D1(NULL, "%s enter\n", __func__);
70751ae08745Sheppo 
7076d10e4ef2Snarayan 	ASSERT(dringp->priv_addr != NULL);
70771ae08745Sheppo 
70781ae08745Sheppo 	D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld",
7079d10e4ef2Snarayan 			__func__, dringp, dringp->end_idx);
70801ae08745Sheppo 
7081d10e4ef2Snarayan 	addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx;
7082d10e4ef2Snarayan 
7083d10e4ef2Snarayan 	mutex_enter(&addr->dstate_lock);
70841ae08745Sheppo 	if (addr->dstate == VIO_DESC_FREE) {
7085d10e4ef2Snarayan 		addr->dstate = VIO_DESC_READY;
70861ae08745Sheppo 		*priv_p = addr;
7087d10e4ef2Snarayan 		*idx = dringp->end_idx;
7088d10e4ef2Snarayan 		dringp->end_idx = (dringp->end_idx + 1) % num;
70891ae08745Sheppo 		ret = 0;
7090d10e4ef2Snarayan 
70911ae08745Sheppo 	}
7092d10e4ef2Snarayan 	mutex_exit(&addr->dstate_lock);
70931ae08745Sheppo 
70941ae08745Sheppo 	/* ring full */
70951ae08745Sheppo 	if (ret == 1) {
7096d10e4ef2Snarayan 		D2(NULL, "%s: no desp free: started at %d", __func__,
7097d10e4ef2Snarayan 			dringp->end_idx);
70981ae08745Sheppo 	}
70991ae08745Sheppo 
71001ae08745Sheppo 	D1(NULL, "%s: exit\n", __func__);
71011ae08745Sheppo 
71021ae08745Sheppo 	return (ret);
71031ae08745Sheppo }
71041ae08745Sheppo 
71051ae08745Sheppo /*
71061ae08745Sheppo  * Map from a dring identifier to the ring itself. Returns
71071ae08745Sheppo  * pointer to ring or NULL if no match found.
71081ae08745Sheppo  */
71091ae08745Sheppo static dring_info_t *
71101ae08745Sheppo vsw_ident2dring(lane_t *lane, uint64_t ident)
71111ae08745Sheppo {
71121ae08745Sheppo 	dring_info_t	*dp = NULL;
71131ae08745Sheppo 
71141ae08745Sheppo 	if ((dp = lane->dringp) == NULL) {
71151ae08745Sheppo 		return (NULL);
71161ae08745Sheppo 	} else {
71171ae08745Sheppo 		if (dp->ident == ident)
71181ae08745Sheppo 			return (dp);
71191ae08745Sheppo 
71201ae08745Sheppo 		while (dp != NULL) {
71211ae08745Sheppo 			if (dp->ident == ident)
71221ae08745Sheppo 				break;
71231ae08745Sheppo 			dp = dp->next;
71241ae08745Sheppo 		}
71251ae08745Sheppo 	}
71261ae08745Sheppo 
71271ae08745Sheppo 	return (dp);
71281ae08745Sheppo }
71291ae08745Sheppo 
71301ae08745Sheppo /*
71311ae08745Sheppo  * Set the default lane attributes. These are copied into
71321ae08745Sheppo  * the attr msg we send to our peer. If they are not acceptable
71331ae08745Sheppo  * then (currently) the handshake ends.
71341ae08745Sheppo  */
71351ae08745Sheppo static void
71361ae08745Sheppo vsw_set_lane_attr(vsw_t *vswp, lane_t *lp)
71371ae08745Sheppo {
71381ae08745Sheppo 	bzero(lp, sizeof (lane_t));
71391ae08745Sheppo 
71401ae08745Sheppo 	READ_ENTER(&vswp->if_lockrw);
71411ae08745Sheppo 	ether_copy(&(vswp->if_addr), &(lp->addr));
71421ae08745Sheppo 	RW_EXIT(&vswp->if_lockrw);
71431ae08745Sheppo 
71441ae08745Sheppo 	lp->mtu = VSW_MTU;
71451ae08745Sheppo 	lp->addr_type = ADDR_TYPE_MAC;
71461ae08745Sheppo 	lp->xfer_mode = VIO_DRING_MODE;
71471ae08745Sheppo 	lp->ack_freq = 0;	/* for shared mode */
7148d10e4ef2Snarayan 
7149d10e4ef2Snarayan 	mutex_enter(&lp->seq_lock);
71501ae08745Sheppo 	lp->seq_num = VNET_ISS;
7151d10e4ef2Snarayan 	mutex_exit(&lp->seq_lock);
71521ae08745Sheppo }
71531ae08745Sheppo 
71541ae08745Sheppo /*
71551ae08745Sheppo  * Verify that the attributes are acceptable.
71561ae08745Sheppo  *
71571ae08745Sheppo  * FUTURE: If some attributes are not acceptable, change them
71581ae08745Sheppo  * our desired values.
71591ae08745Sheppo  */
71601ae08745Sheppo static int
71611ae08745Sheppo vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port)
71621ae08745Sheppo {
71631ae08745Sheppo 	int	ret = 0;
71641ae08745Sheppo 
71651ae08745Sheppo 	D1(NULL, "vsw_check_attr enter\n");
71661ae08745Sheppo 
71671ae08745Sheppo 	/*
71681ae08745Sheppo 	 * Note we currently only support in-band descriptors
71691ae08745Sheppo 	 * and descriptor rings, not packet based transfer (VIO_PKT_MODE)
71701ae08745Sheppo 	 */
71711ae08745Sheppo 	if ((pkt->xfer_mode != VIO_DESC_MODE) &&
71721ae08745Sheppo 			(pkt->xfer_mode != VIO_DRING_MODE)) {
71731ae08745Sheppo 		D2(NULL, "vsw_check_attr: unknown mode %x\n",
71741ae08745Sheppo 			pkt->xfer_mode);
71751ae08745Sheppo 		ret = 1;
71761ae08745Sheppo 	}
71771ae08745Sheppo 
71781ae08745Sheppo 	/* Only support MAC addresses at moment. */
71791ae08745Sheppo 	if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) {
71801ae08745Sheppo 		D2(NULL, "vsw_check_attr: invalid addr_type %x, "
71811ae08745Sheppo 			"or address 0x%llx\n", pkt->addr_type,
71821ae08745Sheppo 			pkt->addr);
71831ae08745Sheppo 		ret = 1;
71841ae08745Sheppo 	}
71851ae08745Sheppo 
71861ae08745Sheppo 	/*
71871ae08745Sheppo 	 * MAC address supplied by device should match that stored
71881ae08745Sheppo 	 * in the vsw-port OBP node. Need to decide what to do if they
71891ae08745Sheppo 	 * don't match, for the moment just warn but don't fail.
71901ae08745Sheppo 	 */
71911ae08745Sheppo 	if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) {
71921ae08745Sheppo 		DERR(NULL, "vsw_check_attr: device supplied address "
71931ae08745Sheppo 			"0x%llx doesn't match node address 0x%llx\n",
71941ae08745Sheppo 			pkt->addr, port->p_macaddr);
71951ae08745Sheppo 	}
71961ae08745Sheppo 
71971ae08745Sheppo 	/*
71981ae08745Sheppo 	 * Ack freq only makes sense in pkt mode, in shared
71991ae08745Sheppo 	 * mode the ring descriptors say whether or not to
72001ae08745Sheppo 	 * send back an ACK.
72011ae08745Sheppo 	 */
72021ae08745Sheppo 	if ((pkt->xfer_mode == VIO_DRING_MODE) &&
72031ae08745Sheppo 				(pkt->ack_freq > 0)) {
72041ae08745Sheppo 		D2(NULL, "vsw_check_attr: non zero ack freq "
72051ae08745Sheppo 			" in SHM mode\n");
72061ae08745Sheppo 		ret = 1;
72071ae08745Sheppo 	}
72081ae08745Sheppo 
72091ae08745Sheppo 	/*
72101ae08745Sheppo 	 * Note: for the moment we only support ETHER
72111ae08745Sheppo 	 * frames. This may change in the future.
72121ae08745Sheppo 	 */
72131ae08745Sheppo 	if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) {
72141ae08745Sheppo 		D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n",
72151ae08745Sheppo 			pkt->mtu);
72161ae08745Sheppo 		ret = 1;
72171ae08745Sheppo 	}
72181ae08745Sheppo 
72191ae08745Sheppo 	D1(NULL, "vsw_check_attr exit\n");
72201ae08745Sheppo 
72211ae08745Sheppo 	return (ret);
72221ae08745Sheppo }
72231ae08745Sheppo 
72241ae08745Sheppo /*
72251ae08745Sheppo  * Returns 1 if there is a problem, 0 otherwise.
72261ae08745Sheppo  */
72271ae08745Sheppo static int
72281ae08745Sheppo vsw_check_dring_info(vio_dring_reg_msg_t *pkt)
72291ae08745Sheppo {
72301ae08745Sheppo 	_NOTE(ARGUNUSED(pkt))
72311ae08745Sheppo 
72321ae08745Sheppo 	int	ret = 0;
72331ae08745Sheppo 
72341ae08745Sheppo 	D1(NULL, "vsw_check_dring_info enter\n");
72351ae08745Sheppo 
72361ae08745Sheppo 	if ((pkt->num_descriptors == 0) ||
72371ae08745Sheppo 		(pkt->descriptor_size == 0) ||
72381ae08745Sheppo 		(pkt->ncookies != 1)) {
72391ae08745Sheppo 		DERR(NULL, "vsw_check_dring_info: invalid dring msg");
72401ae08745Sheppo 		ret = 1;
72411ae08745Sheppo 	}
72421ae08745Sheppo 
72431ae08745Sheppo 	D1(NULL, "vsw_check_dring_info exit\n");
72441ae08745Sheppo 
72451ae08745Sheppo 	return (ret);
72461ae08745Sheppo }
72471ae08745Sheppo 
72481ae08745Sheppo /*
72491ae08745Sheppo  * Returns 1 if two memory cookies match. Otherwise returns 0.
72501ae08745Sheppo  */
72511ae08745Sheppo static int
72521ae08745Sheppo vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2)
72531ae08745Sheppo {
72541ae08745Sheppo 	if ((m1->addr != m2->addr) ||
72551ae08745Sheppo 		(m2->size != m2->size)) {
72561ae08745Sheppo 		return (0);
72571ae08745Sheppo 	} else {
72581ae08745Sheppo 		return (1);
72591ae08745Sheppo 	}
72601ae08745Sheppo }
72611ae08745Sheppo 
72621ae08745Sheppo /*
72631ae08745Sheppo  * Returns 1 if ring described in reg message matches that
72641ae08745Sheppo  * described by dring_info structure. Otherwise returns 0.
72651ae08745Sheppo  */
72661ae08745Sheppo static int
72671ae08745Sheppo vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg)
72681ae08745Sheppo {
72691ae08745Sheppo 	if ((msg->descriptor_size != dp->descriptor_size) ||
72701ae08745Sheppo 		(msg->num_descriptors != dp->num_descriptors) ||
72711ae08745Sheppo 		(msg->ncookies != dp->ncookies) ||
72721ae08745Sheppo 		!(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) {
72731ae08745Sheppo 		return (0);
72741ae08745Sheppo 	} else {
72751ae08745Sheppo 		return (1);
72761ae08745Sheppo 	}
72771ae08745Sheppo 
72781ae08745Sheppo }
72791ae08745Sheppo 
72801ae08745Sheppo static caddr_t
72811ae08745Sheppo vsw_print_ethaddr(uint8_t *a, char *ebuf)
72821ae08745Sheppo {
72831ae08745Sheppo 	(void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x",
72841ae08745Sheppo 	    a[0], a[1], a[2], a[3], a[4], a[5]);
72851ae08745Sheppo 	return (ebuf);
72861ae08745Sheppo }
72871ae08745Sheppo 
72881ae08745Sheppo /*
72891ae08745Sheppo  * Reset and free all the resources associated with
72901ae08745Sheppo  * the channel.
72911ae08745Sheppo  */
72921ae08745Sheppo static void
72931ae08745Sheppo vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir)
72941ae08745Sheppo {
72951ae08745Sheppo 	dring_info_t		*dp, *dpp;
72961ae08745Sheppo 	lane_t			*lp = NULL;
72971ae08745Sheppo 	int			rv = 0;
72981ae08745Sheppo 
72991ae08745Sheppo 	ASSERT(ldcp != NULL);
73001ae08745Sheppo 
73011ae08745Sheppo 	D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id);
73021ae08745Sheppo 
73031ae08745Sheppo 	if (dir == INBOUND) {
73041ae08745Sheppo 		D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane"
73051ae08745Sheppo 			" of channel %lld", __func__, ldcp->ldc_id);
73061ae08745Sheppo 		lp = &ldcp->lane_in;
73071ae08745Sheppo 	} else {
73081ae08745Sheppo 		D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane"
73091ae08745Sheppo 			" of channel %lld", __func__, ldcp->ldc_id);
73101ae08745Sheppo 		lp = &ldcp->lane_out;
73111ae08745Sheppo 	}
73121ae08745Sheppo 
73131ae08745Sheppo 	lp->lstate = VSW_LANE_INACTIV;
7314d10e4ef2Snarayan 	mutex_enter(&lp->seq_lock);
73151ae08745Sheppo 	lp->seq_num = VNET_ISS;
7316d10e4ef2Snarayan 	mutex_exit(&lp->seq_lock);
73171ae08745Sheppo 	if (lp->dringp) {
73181ae08745Sheppo 		if (dir == INBOUND) {
73191ae08745Sheppo 			dp = lp->dringp;
73201ae08745Sheppo 			while (dp != NULL) {
73211ae08745Sheppo 				dpp = dp->next;
73221ae08745Sheppo 				if (dp->handle != NULL)
73231ae08745Sheppo 					(void) ldc_mem_dring_unmap(dp->handle);
73241ae08745Sheppo 				kmem_free(dp, sizeof (dring_info_t));
73251ae08745Sheppo 				dp = dpp;
73261ae08745Sheppo 			}
73271ae08745Sheppo 		} else {
73281ae08745Sheppo 			/*
73291ae08745Sheppo 			 * unbind, destroy exported dring, free dring struct
73301ae08745Sheppo 			 */
73311ae08745Sheppo 			dp = lp->dringp;
73321ae08745Sheppo 			rv = vsw_free_ring(dp);
73331ae08745Sheppo 		}
73341ae08745Sheppo 		if (rv == 0) {
73351ae08745Sheppo 			lp->dringp = NULL;
73361ae08745Sheppo 		}
73371ae08745Sheppo 	}
73381ae08745Sheppo 
73391ae08745Sheppo 	D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id);
73401ae08745Sheppo }
73411ae08745Sheppo 
73421ae08745Sheppo /*
73431ae08745Sheppo  * Free ring and all associated resources.
73441ae08745Sheppo  */
73451ae08745Sheppo static int
73461ae08745Sheppo vsw_free_ring(dring_info_t *dp)
73471ae08745Sheppo {
73481ae08745Sheppo 	vsw_private_desc_t	*paddr = NULL;
73491ae08745Sheppo 	dring_info_t		*dpp;
73501ae08745Sheppo 	int			i, rv = 1;
73511ae08745Sheppo 
73521ae08745Sheppo 	while (dp != NULL) {
73531ae08745Sheppo 		mutex_enter(&dp->dlock);
73541ae08745Sheppo 		dpp = dp->next;
73551ae08745Sheppo 		if (dp->priv_addr != NULL) {
73561ae08745Sheppo 			/*
73571ae08745Sheppo 			 * First unbind and free the memory handles
73581ae08745Sheppo 			 * stored in each descriptor within the ring.
73591ae08745Sheppo 			 */
73601ae08745Sheppo 			for (i = 0; i < VSW_RING_NUM_EL; i++) {
73611ae08745Sheppo 				paddr = (vsw_private_desc_t *)
73621ae08745Sheppo 						dp->priv_addr + i;
73631ae08745Sheppo 				if (paddr->memhandle != NULL) {
73641ae08745Sheppo 					if (paddr->bound == 1) {
73651ae08745Sheppo 						rv = ldc_mem_unbind_handle(
73661ae08745Sheppo 							paddr->memhandle);
73671ae08745Sheppo 
73681ae08745Sheppo 						if (rv != 0) {
73691ae08745Sheppo 							DERR(NULL, "error "
73701ae08745Sheppo 							"unbinding handle for "
73711ae08745Sheppo 							"ring 0x%llx at pos %d",
73721ae08745Sheppo 							dp, i);
73731ae08745Sheppo 							mutex_exit(&dp->dlock);
73741ae08745Sheppo 							return (rv);
73751ae08745Sheppo 						}
73761ae08745Sheppo 						paddr->bound = 0;
73771ae08745Sheppo 					}
73781ae08745Sheppo 
73791ae08745Sheppo 					rv = ldc_mem_free_handle(
73801ae08745Sheppo 							paddr->memhandle);
73811ae08745Sheppo 					if (rv != 0) {
73821ae08745Sheppo 						DERR(NULL, "error freeing "
73831ae08745Sheppo 							"handle for ring "
73841ae08745Sheppo 							"0x%llx at pos %d",
73851ae08745Sheppo 							dp, i);
73861ae08745Sheppo 						mutex_exit(&dp->dlock);
73871ae08745Sheppo 						return (rv);
73881ae08745Sheppo 					}
73891ae08745Sheppo 					paddr->memhandle = NULL;
73901ae08745Sheppo 				}
7391d10e4ef2Snarayan 				mutex_destroy(&paddr->dstate_lock);
73921ae08745Sheppo 			}
73931ae08745Sheppo 			kmem_free(dp->priv_addr, (sizeof (vsw_private_desc_t)
73941ae08745Sheppo 					* VSW_RING_NUM_EL));
73951ae08745Sheppo 		}
73961ae08745Sheppo 
73971ae08745Sheppo 		/*
73981ae08745Sheppo 		 * Now unbind and destroy the ring itself.
73991ae08745Sheppo 		 */
74001ae08745Sheppo 		if (dp->handle != NULL) {
74011ae08745Sheppo 			(void) ldc_mem_dring_unbind(dp->handle);
74021ae08745Sheppo 			(void) ldc_mem_dring_destroy(dp->handle);
74031ae08745Sheppo 		}
74041ae08745Sheppo 
74051ae08745Sheppo 		if (dp->data_addr != NULL) {
74061ae08745Sheppo 			kmem_free(dp->data_addr, dp->data_sz);
74071ae08745Sheppo 		}
74081ae08745Sheppo 
74091ae08745Sheppo 		mutex_exit(&dp->dlock);
74101ae08745Sheppo 		mutex_destroy(&dp->dlock);
7411d10e4ef2Snarayan 		mutex_destroy(&dp->restart_lock);
74121ae08745Sheppo 		kmem_free(dp, sizeof (dring_info_t));
74131ae08745Sheppo 
74141ae08745Sheppo 		dp = dpp;
74151ae08745Sheppo 	}
74161ae08745Sheppo 	return (0);
74171ae08745Sheppo }
74181ae08745Sheppo 
74191ae08745Sheppo /*
74201ae08745Sheppo  * Debugging routines
74211ae08745Sheppo  */
74221ae08745Sheppo static void
74231ae08745Sheppo display_state(void)
74241ae08745Sheppo {
74251ae08745Sheppo 	vsw_t		*vswp;
74261ae08745Sheppo 	vsw_port_list_t	*plist;
74271ae08745Sheppo 	vsw_port_t 	*port;
74281ae08745Sheppo 	vsw_ldc_list_t	*ldcl;
74291ae08745Sheppo 	vsw_ldc_t 	*ldcp;
74301ae08745Sheppo 
74311ae08745Sheppo 	cmn_err(CE_NOTE, "***** system state *****");
74321ae08745Sheppo 
74331ae08745Sheppo 	for (vswp = vsw_head; vswp; vswp = vswp->next) {
74341ae08745Sheppo 		plist = &vswp->plist;
74351ae08745Sheppo 		READ_ENTER(&plist->lockrw);
74361ae08745Sheppo 		cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n",
74371ae08745Sheppo 			vswp->instance, plist->num_ports);
74381ae08745Sheppo 
74391ae08745Sheppo 		for (port = plist->head; port != NULL; port = port->p_next) {
74401ae08745Sheppo 			ldcl = &port->p_ldclist;
74411ae08745Sheppo 			cmn_err(CE_CONT, "port %d : %d ldcs attached\n",
74421ae08745Sheppo 				port->p_instance, ldcl->num_ldcs);
74431ae08745Sheppo 			READ_ENTER(&ldcl->lockrw);
74441ae08745Sheppo 			ldcp = ldcl->head;
74451ae08745Sheppo 			for (; ldcp != NULL; ldcp = ldcp->ldc_next) {
74461ae08745Sheppo 				cmn_err(CE_CONT, "chan %lu : dev %d : "
74471ae08745Sheppo 					"status %d : phase %u\n",
74481ae08745Sheppo 					ldcp->ldc_id, ldcp->dev_class,
74491ae08745Sheppo 					ldcp->ldc_status, ldcp->hphase);
74501ae08745Sheppo 				cmn_err(CE_CONT, "chan %lu : lsession %lu : "
74511ae08745Sheppo 					"psession %lu\n",
74521ae08745Sheppo 					ldcp->ldc_id,
74531ae08745Sheppo 					ldcp->local_session,
74541ae08745Sheppo 					ldcp->peer_session);
74551ae08745Sheppo 
74561ae08745Sheppo 				cmn_err(CE_CONT, "Inbound lane:\n");
74571ae08745Sheppo 				display_lane(&ldcp->lane_in);
74581ae08745Sheppo 				cmn_err(CE_CONT, "Outbound lane:\n");
74591ae08745Sheppo 				display_lane(&ldcp->lane_out);
74601ae08745Sheppo 			}
74611ae08745Sheppo 			RW_EXIT(&ldcl->lockrw);
74621ae08745Sheppo 		}
74631ae08745Sheppo 		RW_EXIT(&plist->lockrw);
74641ae08745Sheppo 	}
74651ae08745Sheppo 	cmn_err(CE_NOTE, "***** system state *****");
74661ae08745Sheppo }
74671ae08745Sheppo 
74681ae08745Sheppo static void
74691ae08745Sheppo display_lane(lane_t *lp)
74701ae08745Sheppo {
74711ae08745Sheppo 	dring_info_t	*drp;
74721ae08745Sheppo 
74731ae08745Sheppo 	cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n",
74741ae08745Sheppo 		lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu);
74751ae08745Sheppo 	cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n",
74761ae08745Sheppo 		lp->addr_type, lp->addr, lp->xfer_mode);
74771ae08745Sheppo 	cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp);
74781ae08745Sheppo 
74791ae08745Sheppo 	cmn_err(CE_CONT, "Dring info:\n");
74801ae08745Sheppo 	for (drp = lp->dringp; drp != NULL; drp = drp->next) {
74811ae08745Sheppo 		cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n",
74821ae08745Sheppo 			drp->num_descriptors, drp->descriptor_size);
74831ae08745Sheppo 		cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle);
74841ae08745Sheppo 		cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n",
74851ae08745Sheppo 			(uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr);
74861ae08745Sheppo 		cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n",
74871ae08745Sheppo 			drp->ident, drp->end_idx);
74881ae08745Sheppo 		display_ring(drp);
74891ae08745Sheppo 	}
74901ae08745Sheppo }
74911ae08745Sheppo 
74921ae08745Sheppo static void
74931ae08745Sheppo display_ring(dring_info_t *dringp)
74941ae08745Sheppo {
74951ae08745Sheppo 	uint64_t		i;
74961ae08745Sheppo 	uint64_t		priv_count = 0;
74971ae08745Sheppo 	uint64_t		pub_count = 0;
74981ae08745Sheppo 	vnet_public_desc_t	*pub_addr = NULL;
74991ae08745Sheppo 	vsw_private_desc_t	*priv_addr = NULL;
75001ae08745Sheppo 
75011ae08745Sheppo 	for (i = 0; i < VSW_RING_NUM_EL; i++) {
75021ae08745Sheppo 		if (dringp->pub_addr != NULL) {
75031ae08745Sheppo 			pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i;
75041ae08745Sheppo 
75051ae08745Sheppo 			if (pub_addr->hdr.dstate == VIO_DESC_FREE)
75061ae08745Sheppo 				pub_count++;
75071ae08745Sheppo 		}
75081ae08745Sheppo 
75091ae08745Sheppo 		if (dringp->priv_addr != NULL) {
75101ae08745Sheppo 			priv_addr =
75111ae08745Sheppo 				(vsw_private_desc_t *)dringp->priv_addr + i;
75121ae08745Sheppo 
75131ae08745Sheppo 			if (priv_addr->dstate == VIO_DESC_FREE)
75141ae08745Sheppo 				priv_count++;
75151ae08745Sheppo 		}
75161ae08745Sheppo 	}
75171ae08745Sheppo 	cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n",
75181ae08745Sheppo 			i, priv_count, pub_count);
75191ae08745Sheppo }
75201ae08745Sheppo 
75211ae08745Sheppo static void
75221ae08745Sheppo dump_flags(uint64_t state)
75231ae08745Sheppo {
75241ae08745Sheppo 	int	i;
75251ae08745Sheppo 
75261ae08745Sheppo 	typedef struct flag_name {
75271ae08745Sheppo 		int	flag_val;
75281ae08745Sheppo 		char	*flag_name;
75291ae08745Sheppo 	} flag_name_t;
75301ae08745Sheppo 
75311ae08745Sheppo 	flag_name_t	flags[] = {
75321ae08745Sheppo 		VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT",
75331ae08745Sheppo 		VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV",
75341ae08745Sheppo 		VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV",
75351ae08745Sheppo 		VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT",
75361ae08745Sheppo 		VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV",
75371ae08745Sheppo 		VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT",
75381ae08745Sheppo 		VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT",
75391ae08745Sheppo 		VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV",
75401ae08745Sheppo 		VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT",
75411ae08745Sheppo 		VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV",
75421ae08745Sheppo 		VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT",
75431ae08745Sheppo 		VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV",
75441ae08745Sheppo 		VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT",
75451ae08745Sheppo 		VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV",
75461ae08745Sheppo 		VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT",
75471ae08745Sheppo 		VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV",
75481ae08745Sheppo 		VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT",
75491ae08745Sheppo 		VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV",
75501ae08745Sheppo 		VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT",
75511ae08745Sheppo 		VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV",
75521ae08745Sheppo 		VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT",
75531ae08745Sheppo 		VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV",
75541ae08745Sheppo 		VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT",
75551ae08745Sheppo 		VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV",
75561ae08745Sheppo 		VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT",
75571ae08745Sheppo 		VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV",
75581ae08745Sheppo 		VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT",
75591ae08745Sheppo 		VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV",
75601ae08745Sheppo 		VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT",
75611ae08745Sheppo 		VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV",
75621ae08745Sheppo 		VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"};
75631ae08745Sheppo 
75641ae08745Sheppo 	DERR(NULL, "DUMP_FLAGS: %llx\n", state);
75651ae08745Sheppo 	for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) {
75661ae08745Sheppo 		if (state & flags[i].flag_val)
75671ae08745Sheppo 			DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name);
75681ae08745Sheppo 	}
75691ae08745Sheppo }
7570