11ae08745Sheppo /* 21ae08745Sheppo * CDDL HEADER START 31ae08745Sheppo * 41ae08745Sheppo * The contents of this file are subject to the terms of the 51ae08745Sheppo * Common Development and Distribution License (the "License"). 61ae08745Sheppo * You may not use this file except in compliance with the License. 71ae08745Sheppo * 81ae08745Sheppo * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 91ae08745Sheppo * or http://www.opensolaris.org/os/licensing. 101ae08745Sheppo * See the License for the specific language governing permissions 111ae08745Sheppo * and limitations under the License. 121ae08745Sheppo * 131ae08745Sheppo * When distributing Covered Code, include this CDDL HEADER in each 141ae08745Sheppo * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 151ae08745Sheppo * If applicable, add the following below this CDDL HEADER, with the 161ae08745Sheppo * fields enclosed by brackets "[]" replaced with your own identifying 171ae08745Sheppo * information: Portions Copyright [yyyy] [name of copyright owner] 181ae08745Sheppo * 191ae08745Sheppo * CDDL HEADER END 201ae08745Sheppo */ 211ae08745Sheppo 221ae08745Sheppo /* 23b071742bSsg70180 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 241ae08745Sheppo * Use is subject to license terms. 251ae08745Sheppo */ 261ae08745Sheppo 271ae08745Sheppo #pragma ident "%Z%%M% %I% %E% SMI" 281ae08745Sheppo 291ae08745Sheppo #include <sys/types.h> 301ae08745Sheppo #include <sys/errno.h> 311ae08745Sheppo #include <sys/debug.h> 321ae08745Sheppo #include <sys/time.h> 331ae08745Sheppo #include <sys/sysmacros.h> 341ae08745Sheppo #include <sys/systm.h> 351ae08745Sheppo #include <sys/user.h> 361ae08745Sheppo #include <sys/stropts.h> 371ae08745Sheppo #include <sys/stream.h> 381ae08745Sheppo #include <sys/strlog.h> 391ae08745Sheppo #include <sys/strsubr.h> 401ae08745Sheppo #include <sys/cmn_err.h> 411ae08745Sheppo #include <sys/cpu.h> 421ae08745Sheppo #include <sys/kmem.h> 431ae08745Sheppo #include <sys/conf.h> 441ae08745Sheppo #include <sys/ddi.h> 451ae08745Sheppo #include <sys/sunddi.h> 461ae08745Sheppo #include <sys/ksynch.h> 471ae08745Sheppo #include <sys/stat.h> 481ae08745Sheppo #include <sys/kstat.h> 491ae08745Sheppo #include <sys/vtrace.h> 501ae08745Sheppo #include <sys/strsun.h> 511ae08745Sheppo #include <sys/dlpi.h> 521ae08745Sheppo #include <sys/ethernet.h> 531ae08745Sheppo #include <net/if.h> 541ae08745Sheppo #include <sys/varargs.h> 551ae08745Sheppo #include <sys/machsystm.h> 561ae08745Sheppo #include <sys/modctl.h> 571ae08745Sheppo #include <sys/modhash.h> 581ae08745Sheppo #include <sys/mac.h> 59ba2e4443Sseb #include <sys/mac_ether.h> 601ae08745Sheppo #include <sys/taskq.h> 611ae08745Sheppo #include <sys/note.h> 621ae08745Sheppo #include <sys/mach_descrip.h> 631ae08745Sheppo #include <sys/mac.h> 641ae08745Sheppo #include <sys/mdeg.h> 651ae08745Sheppo #include <sys/ldc.h> 661ae08745Sheppo #include <sys/vsw_fdb.h> 671ae08745Sheppo #include <sys/vsw.h> 681ae08745Sheppo #include <sys/vio_mailbox.h> 691ae08745Sheppo #include <sys/vnet_mailbox.h> 701ae08745Sheppo #include <sys/vnet_common.h> 71d10e4ef2Snarayan #include <sys/vio_util.h> 72d10e4ef2Snarayan #include <sys/sdt.h> 73*19b65a69Ssb155480 #include <sys/atomic.h> 741ae08745Sheppo 751ae08745Sheppo /* 761ae08745Sheppo * Function prototypes. 771ae08745Sheppo */ 781ae08745Sheppo static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 791ae08745Sheppo static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 801ae08745Sheppo static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 8134683adeSsg70180 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 8234683adeSsg70180 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 83*19b65a69Ssb155480 static void vsw_setup_switching_timeout(void *arg); 84*19b65a69Ssb155480 static void vsw_stop_switching_timeout(vsw_t *vswp); 8534683adeSsg70180 static int vsw_setup_switching(vsw_t *); 861ae08745Sheppo static int vsw_setup_layer2(vsw_t *); 871ae08745Sheppo static int vsw_setup_layer3(vsw_t *); 881ae08745Sheppo 897636cb21Slm66018 /* MAC Ring table functions. */ 907636cb21Slm66018 static void vsw_mac_ring_tbl_init(vsw_t *vswp); 917636cb21Slm66018 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp); 927636cb21Slm66018 static void vsw_queue_worker(vsw_mac_ring_t *rrp); 937636cb21Slm66018 static void vsw_queue_stop(vsw_queue_t *vqp); 947636cb21Slm66018 static vsw_queue_t *vsw_queue_create(); 957636cb21Slm66018 static void vsw_queue_destroy(vsw_queue_t *vqp); 967636cb21Slm66018 971ae08745Sheppo /* MAC layer routines */ 987636cb21Slm66018 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg, 997636cb21Slm66018 mac_resource_t *mrp); 100e1ebb9ecSlm66018 static int vsw_get_hw_maddr(vsw_t *); 1015f94e909Ssg70180 static int vsw_set_hw(vsw_t *, vsw_port_t *, int); 1025f94e909Ssg70180 static int vsw_set_hw_addr(vsw_t *, mac_multi_addr_t *); 1035f94e909Ssg70180 static int vsw_set_hw_promisc(vsw_t *, vsw_port_t *, int); 1045f94e909Ssg70180 static int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 1055f94e909Ssg70180 static int vsw_unset_hw_addr(vsw_t *, int); 1065f94e909Ssg70180 static int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *, int); 1075f94e909Ssg70180 static void vsw_reconfig_hw(vsw_t *); 1085f94e909Ssg70180 static int vsw_prog_if(vsw_t *); 1095f94e909Ssg70180 static int vsw_prog_ports(vsw_t *); 1107636cb21Slm66018 static int vsw_mac_attach(vsw_t *vswp); 1117636cb21Slm66018 static void vsw_mac_detach(vsw_t *vswp); 112*19b65a69Ssb155480 static int vsw_mac_open(vsw_t *vswp); 113*19b65a69Ssb155480 static void vsw_mac_close(vsw_t *vswp); 114*19b65a69Ssb155480 static void vsw_set_addrs(vsw_t *vswp); 115*19b65a69Ssb155480 static void vsw_unset_addrs(vsw_t *vswp); 1167636cb21Slm66018 1177636cb21Slm66018 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *); 1181ae08745Sheppo static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 1191ae08745Sheppo static mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 1201ae08745Sheppo static int vsw_mac_register(vsw_t *); 1211ae08745Sheppo static int vsw_mac_unregister(vsw_t *); 122ba2e4443Sseb static int vsw_m_stat(void *, uint_t, uint64_t *); 1231ae08745Sheppo static void vsw_m_stop(void *arg); 1241ae08745Sheppo static int vsw_m_start(void *arg); 1251ae08745Sheppo static int vsw_m_unicst(void *arg, const uint8_t *); 1261ae08745Sheppo static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 1271ae08745Sheppo static int vsw_m_promisc(void *arg, boolean_t); 1281ae08745Sheppo static mblk_t *vsw_m_tx(void *arg, mblk_t *); 1291ae08745Sheppo 1301ae08745Sheppo /* MDEG routines */ 13134683adeSsg70180 static int vsw_mdeg_register(vsw_t *vswp); 1321ae08745Sheppo static void vsw_mdeg_unregister(vsw_t *vswp); 1331ae08745Sheppo static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 13434683adeSsg70180 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 135*19b65a69Ssb155480 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 13634683adeSsg70180 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 137*19b65a69Ssb155480 static int vsw_read_mdprops(vsw_t *vswp); 1381ae08745Sheppo 1391ae08745Sheppo /* Port add/deletion routines */ 1401ae08745Sheppo static int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 1411ae08745Sheppo static int vsw_port_attach(vsw_t *vswp, int p_instance, 1421ae08745Sheppo uint64_t *ldcids, int nids, struct ether_addr *macaddr); 1431ae08745Sheppo static int vsw_detach_ports(vsw_t *vswp); 1441ae08745Sheppo static int vsw_port_detach(vsw_t *vswp, int p_instance); 1451ae08745Sheppo static int vsw_port_delete(vsw_port_t *port); 1461ae08745Sheppo static int vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id); 1471ae08745Sheppo static int vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id); 1481ae08745Sheppo static int vsw_init_ldcs(vsw_port_t *port); 1491ae08745Sheppo static int vsw_uninit_ldcs(vsw_port_t *port); 1501ae08745Sheppo static int vsw_ldc_init(vsw_ldc_t *ldcp); 1511ae08745Sheppo static int vsw_ldc_uninit(vsw_ldc_t *ldcp); 1521ae08745Sheppo static int vsw_drain_ldcs(vsw_port_t *port); 1531ae08745Sheppo static int vsw_drain_port_taskq(vsw_port_t *port); 1541ae08745Sheppo static void vsw_marker_task(void *); 1551ae08745Sheppo static vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 1561ae08745Sheppo static int vsw_plist_del_node(vsw_t *, vsw_port_t *port); 1571ae08745Sheppo 1581ae08745Sheppo /* Interrupt routines */ 1591ae08745Sheppo static uint_t vsw_ldc_cb(uint64_t cb, caddr_t arg); 1601ae08745Sheppo 1611ae08745Sheppo /* Handshake routines */ 162b071742bSsg70180 static void vsw_ldc_reinit(vsw_ldc_t *); 163b071742bSsg70180 static void vsw_process_conn_evt(vsw_ldc_t *, uint16_t); 164b071742bSsg70180 static void vsw_conn_task(void *); 1651ae08745Sheppo static int vsw_check_flag(vsw_ldc_t *, int, uint64_t); 1661ae08745Sheppo static void vsw_next_milestone(vsw_ldc_t *); 1671ae08745Sheppo static int vsw_supported_version(vio_ver_msg_t *); 1681ae08745Sheppo 1691ae08745Sheppo /* Data processing routines */ 1701ae08745Sheppo static void vsw_process_pkt(void *); 1711ae08745Sheppo static void vsw_dispatch_ctrl_task(vsw_ldc_t *, void *, vio_msg_tag_t); 1721ae08745Sheppo static void vsw_process_ctrl_pkt(void *); 1731ae08745Sheppo static void vsw_process_ctrl_ver_pkt(vsw_ldc_t *, void *); 1741ae08745Sheppo static void vsw_process_ctrl_attr_pkt(vsw_ldc_t *, void *); 1751ae08745Sheppo static void vsw_process_ctrl_mcst_pkt(vsw_ldc_t *, void *); 1761ae08745Sheppo static void vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *, void *); 1771ae08745Sheppo static void vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *, void *); 1781ae08745Sheppo static void vsw_process_ctrl_rdx_pkt(vsw_ldc_t *, void *); 1791ae08745Sheppo static void vsw_process_data_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 1801ae08745Sheppo static void vsw_process_data_dring_pkt(vsw_ldc_t *, void *); 1811ae08745Sheppo static void vsw_process_data_raw_pkt(vsw_ldc_t *, void *); 1821ae08745Sheppo static void vsw_process_data_ibnd_pkt(vsw_ldc_t *, void *); 1831ae08745Sheppo static void vsw_process_err_pkt(vsw_ldc_t *, void *, vio_msg_tag_t); 1841ae08745Sheppo 1851ae08745Sheppo /* Switching/data transmit routines */ 1861ae08745Sheppo static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 1871ae08745Sheppo vsw_port_t *port, mac_resource_handle_t); 1881ae08745Sheppo static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 1891ae08745Sheppo vsw_port_t *port, mac_resource_handle_t); 1901ae08745Sheppo static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, 1911ae08745Sheppo vsw_port_t *port); 1921ae08745Sheppo static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, 1931ae08745Sheppo vsw_port_t *port); 1941ae08745Sheppo static int vsw_portsend(vsw_port_t *, mblk_t *); 1951ae08745Sheppo static int vsw_dringsend(vsw_ldc_t *, mblk_t *); 1961ae08745Sheppo static int vsw_descrsend(vsw_ldc_t *, mblk_t *); 1971ae08745Sheppo 1981ae08745Sheppo /* Packet creation routines */ 1993af08d82Slm66018 static void vsw_send_ver(void *); 2001ae08745Sheppo static void vsw_send_attr(vsw_ldc_t *); 2011ae08745Sheppo static vio_dring_reg_msg_t *vsw_create_dring_info_pkt(vsw_ldc_t *); 2021ae08745Sheppo static void vsw_send_dring_info(vsw_ldc_t *); 2031ae08745Sheppo static void vsw_send_rdx(vsw_ldc_t *); 2041ae08745Sheppo 205b071742bSsg70180 static int vsw_send_msg(vsw_ldc_t *, void *, int, boolean_t); 2061ae08745Sheppo 2071ae08745Sheppo /* Forwarding database (FDB) routines */ 2081ae08745Sheppo static int vsw_add_fdb(vsw_t *vswp, vsw_port_t *port); 2091ae08745Sheppo static int vsw_del_fdb(vsw_t *vswp, vsw_port_t *port); 2101ae08745Sheppo static vsw_port_t *vsw_lookup_fdb(vsw_t *vswp, struct ether_header *); 2111ae08745Sheppo static int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 2121ae08745Sheppo static int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 2131ae08745Sheppo static int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 214*19b65a69Ssb155480 static mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 2151ae08745Sheppo static void vsw_del_mcst_port(vsw_port_t *); 2161ae08745Sheppo static void vsw_del_mcst_vsw(vsw_t *); 2171ae08745Sheppo 2181ae08745Sheppo /* Dring routines */ 2191ae08745Sheppo static dring_info_t *vsw_create_dring(vsw_ldc_t *); 2201ae08745Sheppo static void vsw_create_privring(vsw_ldc_t *); 2211ae08745Sheppo static int vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp); 2221ae08745Sheppo static int vsw_dring_find_free_desc(dring_info_t *, vsw_private_desc_t **, 2231ae08745Sheppo int *); 2241ae08745Sheppo static dring_info_t *vsw_ident2dring(lane_t *, uint64_t); 2251ae08745Sheppo 2261ae08745Sheppo static void vsw_set_lane_attr(vsw_t *, lane_t *); 2271ae08745Sheppo static int vsw_check_attr(vnet_attr_msg_t *, vsw_port_t *); 2281ae08745Sheppo static int vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg); 2291ae08745Sheppo static int vsw_mem_cookie_match(ldc_mem_cookie_t *, ldc_mem_cookie_t *); 2301ae08745Sheppo static int vsw_check_dring_info(vio_dring_reg_msg_t *); 2311ae08745Sheppo 2321ae08745Sheppo /* Misc support routines */ 2331ae08745Sheppo static caddr_t vsw_print_ethaddr(uint8_t *addr, char *ebuf); 2341ae08745Sheppo static void vsw_free_lane_resources(vsw_ldc_t *, uint64_t); 2351ae08745Sheppo static int vsw_free_ring(dring_info_t *); 236*19b65a69Ssb155480 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 2371ae08745Sheppo 2381ae08745Sheppo /* Debugging routines */ 2391ae08745Sheppo static void dump_flags(uint64_t); 2401ae08745Sheppo static void display_state(void); 2411ae08745Sheppo static void display_lane(lane_t *); 2421ae08745Sheppo static void display_ring(dring_info_t *); 2431ae08745Sheppo 244445b4c2eSsb155480 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 2451ae08745Sheppo int vsw_wretries = 100; /* # of write attempts */ 246d10e4ef2Snarayan int vsw_chain_len = 150; /* max # of mblks in msg chain */ 247d10e4ef2Snarayan int vsw_desc_delay = 0; /* delay in us */ 248d10e4ef2Snarayan int vsw_read_attempts = 5; /* # of reads of descriptor */ 249*19b65a69Ssb155480 int vsw_mac_open_retries = 20; /* max # of mac_open() retries */ 250*19b65a69Ssb155480 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 251d10e4ef2Snarayan 252d10e4ef2Snarayan uint32_t vsw_mblk_size = VSW_MBLK_SIZE; 253d10e4ef2Snarayan uint32_t vsw_num_mblks = VSW_NUM_MBLKS; 254d10e4ef2Snarayan 255ba2e4443Sseb static mac_callbacks_t vsw_m_callbacks = { 256ba2e4443Sseb 0, 257ba2e4443Sseb vsw_m_stat, 258ba2e4443Sseb vsw_m_start, 259ba2e4443Sseb vsw_m_stop, 260ba2e4443Sseb vsw_m_promisc, 261ba2e4443Sseb vsw_m_multicst, 262ba2e4443Sseb vsw_m_unicst, 263ba2e4443Sseb vsw_m_tx, 264ba2e4443Sseb NULL, 265ba2e4443Sseb NULL, 266ba2e4443Sseb NULL 267ba2e4443Sseb }; 268ba2e4443Sseb 2691ae08745Sheppo static struct cb_ops vsw_cb_ops = { 2701ae08745Sheppo nulldev, /* cb_open */ 2711ae08745Sheppo nulldev, /* cb_close */ 2721ae08745Sheppo nodev, /* cb_strategy */ 2731ae08745Sheppo nodev, /* cb_print */ 2741ae08745Sheppo nodev, /* cb_dump */ 2751ae08745Sheppo nodev, /* cb_read */ 2761ae08745Sheppo nodev, /* cb_write */ 2771ae08745Sheppo nodev, /* cb_ioctl */ 2781ae08745Sheppo nodev, /* cb_devmap */ 2791ae08745Sheppo nodev, /* cb_mmap */ 2801ae08745Sheppo nodev, /* cb_segmap */ 2811ae08745Sheppo nochpoll, /* cb_chpoll */ 2821ae08745Sheppo ddi_prop_op, /* cb_prop_op */ 2831ae08745Sheppo NULL, /* cb_stream */ 2841ae08745Sheppo D_MP, /* cb_flag */ 2851ae08745Sheppo CB_REV, /* rev */ 2861ae08745Sheppo nodev, /* int (*cb_aread)() */ 2871ae08745Sheppo nodev /* int (*cb_awrite)() */ 2881ae08745Sheppo }; 2891ae08745Sheppo 2901ae08745Sheppo static struct dev_ops vsw_ops = { 2911ae08745Sheppo DEVO_REV, /* devo_rev */ 2921ae08745Sheppo 0, /* devo_refcnt */ 2931ae08745Sheppo vsw_getinfo, /* devo_getinfo */ 2941ae08745Sheppo nulldev, /* devo_identify */ 2951ae08745Sheppo nulldev, /* devo_probe */ 2961ae08745Sheppo vsw_attach, /* devo_attach */ 2971ae08745Sheppo vsw_detach, /* devo_detach */ 2981ae08745Sheppo nodev, /* devo_reset */ 2991ae08745Sheppo &vsw_cb_ops, /* devo_cb_ops */ 3001ae08745Sheppo (struct bus_ops *)NULL, /* devo_bus_ops */ 3011ae08745Sheppo ddi_power /* devo_power */ 3021ae08745Sheppo }; 3031ae08745Sheppo 3041ae08745Sheppo extern struct mod_ops mod_driverops; 3051ae08745Sheppo static struct modldrv vswmodldrv = { 3061ae08745Sheppo &mod_driverops, 307205eeb1aSlm66018 "sun4v Virtual Switch", 3081ae08745Sheppo &vsw_ops, 3091ae08745Sheppo }; 3101ae08745Sheppo 3111ae08745Sheppo #define LDC_ENTER_LOCK(ldcp) \ 3121ae08745Sheppo mutex_enter(&((ldcp)->ldc_cblock));\ 3131ae08745Sheppo mutex_enter(&((ldcp)->ldc_txlock)); 3141ae08745Sheppo #define LDC_EXIT_LOCK(ldcp) \ 3151ae08745Sheppo mutex_exit(&((ldcp)->ldc_txlock));\ 3161ae08745Sheppo mutex_exit(&((ldcp)->ldc_cblock)); 3171ae08745Sheppo 3181ae08745Sheppo /* Driver soft state ptr */ 3191ae08745Sheppo static void *vsw_state; 3201ae08745Sheppo 3211ae08745Sheppo /* 3221ae08745Sheppo * Linked list of "vsw_t" structures - one per instance. 3231ae08745Sheppo */ 3241ae08745Sheppo vsw_t *vsw_head = NULL; 3251ae08745Sheppo krwlock_t vsw_rw; 3261ae08745Sheppo 3271ae08745Sheppo /* 3281ae08745Sheppo * Property names 3291ae08745Sheppo */ 3301ae08745Sheppo static char vdev_propname[] = "virtual-device"; 3311ae08745Sheppo static char vsw_propname[] = "virtual-network-switch"; 3321ae08745Sheppo static char physdev_propname[] = "vsw-phys-dev"; 3331ae08745Sheppo static char smode_propname[] = "vsw-switch-mode"; 3341ae08745Sheppo static char macaddr_propname[] = "local-mac-address"; 3351ae08745Sheppo static char remaddr_propname[] = "remote-mac-address"; 3361ae08745Sheppo static char ldcids_propname[] = "ldc-ids"; 3371ae08745Sheppo static char chan_propname[] = "channel-endpoint"; 3381ae08745Sheppo static char id_propname[] = "id"; 3391ae08745Sheppo static char reg_propname[] = "reg"; 3401ae08745Sheppo 3411ae08745Sheppo /* supported versions */ 3421ae08745Sheppo static ver_sup_t vsw_versions[] = { {1, 0} }; 3431ae08745Sheppo 3441ae08745Sheppo /* 3451ae08745Sheppo * Matching criteria passed to the MDEG to register interest 3461ae08745Sheppo * in changes to 'virtual-device-port' nodes identified by their 3471ae08745Sheppo * 'id' property. 3481ae08745Sheppo */ 3491ae08745Sheppo static md_prop_match_t vport_prop_match[] = { 3501ae08745Sheppo { MDET_PROP_VAL, "id" }, 3511ae08745Sheppo { MDET_LIST_END, NULL } 3521ae08745Sheppo }; 3531ae08745Sheppo 3541ae08745Sheppo static mdeg_node_match_t vport_match = { "virtual-device-port", 3551ae08745Sheppo vport_prop_match }; 3561ae08745Sheppo 3571ae08745Sheppo /* 35834683adeSsg70180 * Matching criteria passed to the MDEG to register interest 35934683adeSsg70180 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 36034683adeSsg70180 * by their 'name' and 'cfg-handle' properties. 36134683adeSsg70180 */ 36234683adeSsg70180 static md_prop_match_t vdev_prop_match[] = { 36334683adeSsg70180 { MDET_PROP_STR, "name" }, 36434683adeSsg70180 { MDET_PROP_VAL, "cfg-handle" }, 36534683adeSsg70180 { MDET_LIST_END, NULL } 36634683adeSsg70180 }; 36734683adeSsg70180 36834683adeSsg70180 static mdeg_node_match_t vdev_match = { "virtual-device", 36934683adeSsg70180 vdev_prop_match }; 37034683adeSsg70180 37134683adeSsg70180 37234683adeSsg70180 /* 3731ae08745Sheppo * Specification of an MD node passed to the MDEG to filter any 3741ae08745Sheppo * 'vport' nodes that do not belong to the specified node. This 3751ae08745Sheppo * template is copied for each vsw instance and filled in with 3761ae08745Sheppo * the appropriate 'cfg-handle' value before being passed to the MDEG. 3771ae08745Sheppo */ 3781ae08745Sheppo static mdeg_prop_spec_t vsw_prop_template[] = { 3791ae08745Sheppo { MDET_PROP_STR, "name", vsw_propname }, 3801ae08745Sheppo { MDET_PROP_VAL, "cfg-handle", NULL }, 3811ae08745Sheppo { MDET_LIST_END, NULL, NULL } 3821ae08745Sheppo }; 3831ae08745Sheppo 3841ae08745Sheppo #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 3851ae08745Sheppo 3861ae08745Sheppo /* 3877636cb21Slm66018 * From /etc/system enable/disable thread per ring. This is a mode 3887636cb21Slm66018 * selection that is done a vsw driver attach time. 3897636cb21Slm66018 */ 3907636cb21Slm66018 boolean_t vsw_multi_ring_enable = B_FALSE; 3917636cb21Slm66018 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 3927636cb21Slm66018 3937636cb21Slm66018 /* 3941ae08745Sheppo * Print debug messages - set to 0x1f to enable all msgs 3951ae08745Sheppo * or 0x0 to turn all off. 3961ae08745Sheppo */ 3971ae08745Sheppo int vswdbg = 0x0; 3981ae08745Sheppo 3991ae08745Sheppo /* 4001ae08745Sheppo * debug levels: 4011ae08745Sheppo * 0x01: Function entry/exit tracing 4021ae08745Sheppo * 0x02: Internal function messages 4031ae08745Sheppo * 0x04: Verbose internal messages 4041ae08745Sheppo * 0x08: Warning messages 4051ae08745Sheppo * 0x10: Error messages 4061ae08745Sheppo */ 4071ae08745Sheppo 4081ae08745Sheppo static void 4091ae08745Sheppo vswdebug(vsw_t *vswp, const char *fmt, ...) 4101ae08745Sheppo { 4111ae08745Sheppo char buf[512]; 4121ae08745Sheppo va_list ap; 4131ae08745Sheppo 4141ae08745Sheppo va_start(ap, fmt); 4151ae08745Sheppo (void) vsprintf(buf, fmt, ap); 4161ae08745Sheppo va_end(ap); 4171ae08745Sheppo 4181ae08745Sheppo if (vswp == NULL) 4191ae08745Sheppo cmn_err(CE_CONT, "%s\n", buf); 4201ae08745Sheppo else 4211ae08745Sheppo cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 4221ae08745Sheppo } 4231ae08745Sheppo 4241ae08745Sheppo /* 4251ae08745Sheppo * For the moment the state dump routines have their own 4261ae08745Sheppo * private flag. 4271ae08745Sheppo */ 4281ae08745Sheppo #define DUMP_STATE 0 4291ae08745Sheppo 4301ae08745Sheppo #if DUMP_STATE 4311ae08745Sheppo 4321ae08745Sheppo #define DUMP_TAG(tag) \ 4331ae08745Sheppo { \ 4341ae08745Sheppo D1(NULL, "DUMP_TAG: type 0x%llx", (tag).vio_msgtype); \ 4351ae08745Sheppo D1(NULL, "DUMP_TAG: stype 0x%llx", (tag).vio_subtype); \ 4361ae08745Sheppo D1(NULL, "DUMP_TAG: senv 0x%llx", (tag).vio_subtype_env); \ 4371ae08745Sheppo } 4381ae08745Sheppo 4391ae08745Sheppo #define DUMP_TAG_PTR(tag) \ 4401ae08745Sheppo { \ 4411ae08745Sheppo D1(NULL, "DUMP_TAG: type 0x%llx", (tag)->vio_msgtype); \ 4421ae08745Sheppo D1(NULL, "DUMP_TAG: stype 0x%llx", (tag)->vio_subtype); \ 4431ae08745Sheppo D1(NULL, "DUMP_TAG: senv 0x%llx", (tag)->vio_subtype_env); \ 4441ae08745Sheppo } 4451ae08745Sheppo 4461ae08745Sheppo #define DUMP_FLAGS(flags) dump_flags(flags); 4471ae08745Sheppo #define DISPLAY_STATE() display_state() 4481ae08745Sheppo 4491ae08745Sheppo #else 4501ae08745Sheppo 4511ae08745Sheppo #define DUMP_TAG(tag) 4521ae08745Sheppo #define DUMP_TAG_PTR(tag) 4531ae08745Sheppo #define DUMP_FLAGS(state) 4541ae08745Sheppo #define DISPLAY_STATE() 4551ae08745Sheppo 4561ae08745Sheppo #endif /* DUMP_STATE */ 4571ae08745Sheppo 4581ae08745Sheppo #ifdef DEBUG 4591ae08745Sheppo 4601ae08745Sheppo #define D1 \ 4611ae08745Sheppo if (vswdbg & 0x01) \ 4621ae08745Sheppo vswdebug 4631ae08745Sheppo 4641ae08745Sheppo #define D2 \ 4651ae08745Sheppo if (vswdbg & 0x02) \ 4661ae08745Sheppo vswdebug 4671ae08745Sheppo 4681ae08745Sheppo #define D3 \ 4691ae08745Sheppo if (vswdbg & 0x04) \ 4701ae08745Sheppo vswdebug 4711ae08745Sheppo 4721ae08745Sheppo #define DWARN \ 4731ae08745Sheppo if (vswdbg & 0x08) \ 4741ae08745Sheppo vswdebug 4751ae08745Sheppo 4761ae08745Sheppo #define DERR \ 4771ae08745Sheppo if (vswdbg & 0x10) \ 4781ae08745Sheppo vswdebug 4791ae08745Sheppo 4801ae08745Sheppo #else 4811ae08745Sheppo 4821ae08745Sheppo #define DERR if (0) vswdebug 4831ae08745Sheppo #define DWARN if (0) vswdebug 4841ae08745Sheppo #define D1 if (0) vswdebug 4851ae08745Sheppo #define D2 if (0) vswdebug 4861ae08745Sheppo #define D3 if (0) vswdebug 4871ae08745Sheppo 4881ae08745Sheppo #endif /* DEBUG */ 4891ae08745Sheppo 4901ae08745Sheppo static struct modlinkage modlinkage = { 4911ae08745Sheppo MODREV_1, 4921ae08745Sheppo &vswmodldrv, 4931ae08745Sheppo NULL 4941ae08745Sheppo }; 4951ae08745Sheppo 4961ae08745Sheppo int 4971ae08745Sheppo _init(void) 4981ae08745Sheppo { 4991ae08745Sheppo int status; 5001ae08745Sheppo 5011ae08745Sheppo rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 5021ae08745Sheppo 5031ae08745Sheppo status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 5041ae08745Sheppo if (status != 0) { 5051ae08745Sheppo return (status); 5061ae08745Sheppo } 5071ae08745Sheppo 5081ae08745Sheppo mac_init_ops(&vsw_ops, "vsw"); 5091ae08745Sheppo status = mod_install(&modlinkage); 5101ae08745Sheppo if (status != 0) { 5111ae08745Sheppo ddi_soft_state_fini(&vsw_state); 5121ae08745Sheppo } 5131ae08745Sheppo return (status); 5141ae08745Sheppo } 5151ae08745Sheppo 5161ae08745Sheppo int 5171ae08745Sheppo _fini(void) 5181ae08745Sheppo { 5191ae08745Sheppo int status; 5201ae08745Sheppo 5211ae08745Sheppo status = mod_remove(&modlinkage); 5221ae08745Sheppo if (status != 0) 5231ae08745Sheppo return (status); 5241ae08745Sheppo mac_fini_ops(&vsw_ops); 5251ae08745Sheppo ddi_soft_state_fini(&vsw_state); 5261ae08745Sheppo 5271ae08745Sheppo rw_destroy(&vsw_rw); 5281ae08745Sheppo 5291ae08745Sheppo return (status); 5301ae08745Sheppo } 5311ae08745Sheppo 5321ae08745Sheppo int 5331ae08745Sheppo _info(struct modinfo *modinfop) 5341ae08745Sheppo { 5351ae08745Sheppo return (mod_info(&modlinkage, modinfop)); 5361ae08745Sheppo } 5371ae08745Sheppo 5381ae08745Sheppo static int 5391ae08745Sheppo vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5401ae08745Sheppo { 5411ae08745Sheppo vsw_t *vswp; 54234683adeSsg70180 int instance; 5431ae08745Sheppo char hashname[MAXNAMELEN]; 5441ae08745Sheppo char qname[TASKQ_NAMELEN]; 5457636cb21Slm66018 enum { PROG_init = 0x00, 546*19b65a69Ssb155480 PROG_locks = 0x01, 547*19b65a69Ssb155480 PROG_readmd = 0x02, 548*19b65a69Ssb155480 PROG_fdb = 0x04, 549*19b65a69Ssb155480 PROG_mfdb = 0x08, 550*19b65a69Ssb155480 PROG_taskq = 0x10, 551*19b65a69Ssb155480 PROG_swmode = 0x20, 552*19b65a69Ssb155480 PROG_macreg = 0x40, 553*19b65a69Ssb155480 PROG_mdreg = 0x80} 5541ae08745Sheppo progress; 5551ae08745Sheppo 5561ae08745Sheppo progress = PROG_init; 557*19b65a69Ssb155480 int rv; 5581ae08745Sheppo 5591ae08745Sheppo switch (cmd) { 5601ae08745Sheppo case DDI_ATTACH: 5611ae08745Sheppo break; 5621ae08745Sheppo case DDI_RESUME: 5631ae08745Sheppo /* nothing to do for this non-device */ 5641ae08745Sheppo return (DDI_SUCCESS); 5651ae08745Sheppo case DDI_PM_RESUME: 5661ae08745Sheppo default: 5671ae08745Sheppo return (DDI_FAILURE); 5681ae08745Sheppo } 5691ae08745Sheppo 5701ae08745Sheppo instance = ddi_get_instance(dip); 5711ae08745Sheppo if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 5721ae08745Sheppo DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 5731ae08745Sheppo return (DDI_FAILURE); 5741ae08745Sheppo } 5751ae08745Sheppo vswp = ddi_get_soft_state(vsw_state, instance); 5761ae08745Sheppo 5771ae08745Sheppo if (vswp == NULL) { 5781ae08745Sheppo DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 5791ae08745Sheppo goto vsw_attach_fail; 5801ae08745Sheppo } 5811ae08745Sheppo 5821ae08745Sheppo vswp->dip = dip; 5831ae08745Sheppo vswp->instance = instance; 5841ae08745Sheppo ddi_set_driver_private(dip, (caddr_t)vswp); 5851ae08745Sheppo 5865f94e909Ssg70180 mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL); 58734683adeSsg70180 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 588*19b65a69Ssb155480 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 589*19b65a69Ssb155480 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 5901ae08745Sheppo rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 591*19b65a69Ssb155480 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 592*19b65a69Ssb155480 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 593*19b65a69Ssb155480 594*19b65a69Ssb155480 progress |= PROG_locks; 595*19b65a69Ssb155480 596*19b65a69Ssb155480 rv = vsw_read_mdprops(vswp); 597*19b65a69Ssb155480 if (rv != 0) 598*19b65a69Ssb155480 goto vsw_attach_fail; 599*19b65a69Ssb155480 600*19b65a69Ssb155480 progress |= PROG_readmd; 6011ae08745Sheppo 6021ae08745Sheppo /* setup the unicast forwarding database */ 6031ae08745Sheppo (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 6041ae08745Sheppo vswp->instance); 6051ae08745Sheppo D2(vswp, "creating unicast hash table (%s)...", hashname); 6061ae08745Sheppo vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 6071ae08745Sheppo mod_hash_null_valdtor, sizeof (void *)); 6081ae08745Sheppo 6091ae08745Sheppo progress |= PROG_fdb; 6101ae08745Sheppo 6111ae08745Sheppo /* setup the multicast fowarding database */ 6121ae08745Sheppo (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 6131ae08745Sheppo vswp->instance); 6141ae08745Sheppo D2(vswp, "creating multicast hash table %s)...", hashname); 6151ae08745Sheppo vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 6161ae08745Sheppo mod_hash_null_valdtor, sizeof (void *)); 6171ae08745Sheppo 6181ae08745Sheppo progress |= PROG_mfdb; 6191ae08745Sheppo 6201ae08745Sheppo /* 6211ae08745Sheppo * Create the taskq which will process all the VIO 6221ae08745Sheppo * control messages. 6231ae08745Sheppo */ 6241ae08745Sheppo (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 6251ae08745Sheppo if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 6261ae08745Sheppo TASKQ_DEFAULTPRI, 0)) == NULL) { 62734683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 62834683adeSsg70180 vswp->instance); 6291ae08745Sheppo goto vsw_attach_fail; 6301ae08745Sheppo } 6311ae08745Sheppo 6321ae08745Sheppo progress |= PROG_taskq; 6331ae08745Sheppo 634d10e4ef2Snarayan /* prevent auto-detaching */ 635d10e4ef2Snarayan if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 636d10e4ef2Snarayan DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 63734683adeSsg70180 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 638d10e4ef2Snarayan "instance %u", DDI_NO_AUTODETACH, instance); 639d10e4ef2Snarayan } 640d10e4ef2Snarayan 6411ae08745Sheppo /* 642*19b65a69Ssb155480 * Setup the required switching mode, 643*19b65a69Ssb155480 * based on the mdprops that we read earlier. 644*19b65a69Ssb155480 */ 645*19b65a69Ssb155480 rv = vsw_setup_switching(vswp); 646*19b65a69Ssb155480 if (rv == EAGAIN) { 647*19b65a69Ssb155480 /* 648*19b65a69Ssb155480 * Unable to setup switching mode; 649*19b65a69Ssb155480 * as the error is EAGAIN, schedule a timeout to retry. 650*19b65a69Ssb155480 */ 651*19b65a69Ssb155480 mutex_enter(&vswp->swtmout_lock); 652*19b65a69Ssb155480 653*19b65a69Ssb155480 vswp->swtmout_enabled = B_TRUE; 654*19b65a69Ssb155480 vswp->swtmout_id = 655*19b65a69Ssb155480 timeout(vsw_setup_switching_timeout, vswp, 656*19b65a69Ssb155480 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 657*19b65a69Ssb155480 658*19b65a69Ssb155480 mutex_exit(&vswp->swtmout_lock); 659*19b65a69Ssb155480 } else if (rv != 0) { 660*19b65a69Ssb155480 goto vsw_attach_fail; 661*19b65a69Ssb155480 } 662*19b65a69Ssb155480 663*19b65a69Ssb155480 progress |= PROG_swmode; 664*19b65a69Ssb155480 665*19b65a69Ssb155480 /* Register with mac layer as a provider */ 666*19b65a69Ssb155480 rv = vsw_mac_register(vswp); 667*19b65a69Ssb155480 if (rv != 0) 668*19b65a69Ssb155480 goto vsw_attach_fail; 669*19b65a69Ssb155480 670*19b65a69Ssb155480 progress |= PROG_macreg; 671*19b65a69Ssb155480 672*19b65a69Ssb155480 /* 67334683adeSsg70180 * Now we have everything setup, register an interest in 67434683adeSsg70180 * specific MD nodes. 67534683adeSsg70180 * 67634683adeSsg70180 * The callback is invoked in 2 cases, firstly if upon mdeg 67734683adeSsg70180 * registration there are existing nodes which match our specified 67834683adeSsg70180 * criteria, and secondly if the MD is changed (and again, there 67934683adeSsg70180 * are nodes which we are interested in present within it. Note 68034683adeSsg70180 * that our callback will be invoked even if our specified nodes 68134683adeSsg70180 * have not actually changed). 68234683adeSsg70180 * 6831ae08745Sheppo */ 684*19b65a69Ssb155480 rv = vsw_mdeg_register(vswp); 685*19b65a69Ssb155480 if (rv != 0) 68634683adeSsg70180 goto vsw_attach_fail; 6871ae08745Sheppo 688*19b65a69Ssb155480 progress |= PROG_mdreg; 689*19b65a69Ssb155480 690*19b65a69Ssb155480 WRITE_ENTER(&vsw_rw); 691*19b65a69Ssb155480 vswp->next = vsw_head; 692*19b65a69Ssb155480 vsw_head = vswp; 693*19b65a69Ssb155480 RW_EXIT(&vsw_rw); 694*19b65a69Ssb155480 695*19b65a69Ssb155480 ddi_report_dev(vswp->dip); 6961ae08745Sheppo return (DDI_SUCCESS); 6971ae08745Sheppo 6981ae08745Sheppo vsw_attach_fail: 6991ae08745Sheppo DERR(NULL, "vsw_attach: failed"); 7001ae08745Sheppo 701*19b65a69Ssb155480 if (progress & PROG_mdreg) { 702*19b65a69Ssb155480 vsw_mdeg_unregister(vswp); 703*19b65a69Ssb155480 (void) vsw_detach_ports(vswp); 704*19b65a69Ssb155480 } 705*19b65a69Ssb155480 706*19b65a69Ssb155480 if (progress & PROG_macreg) 707*19b65a69Ssb155480 (void) vsw_mac_unregister(vswp); 708*19b65a69Ssb155480 709*19b65a69Ssb155480 if (progress & PROG_swmode) { 710*19b65a69Ssb155480 vsw_stop_switching_timeout(vswp); 711*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 712*19b65a69Ssb155480 vsw_mac_detach(vswp); 713*19b65a69Ssb155480 vsw_mac_close(vswp); 714*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 715*19b65a69Ssb155480 } 716*19b65a69Ssb155480 7171ae08745Sheppo if (progress & PROG_taskq) 7181ae08745Sheppo ddi_taskq_destroy(vswp->taskq_p); 7191ae08745Sheppo 720*19b65a69Ssb155480 if (progress & PROG_mfdb) 7211ae08745Sheppo mod_hash_destroy_hash(vswp->mfdb); 7221ae08745Sheppo 723*19b65a69Ssb155480 if (progress & PROG_fdb) 7241ae08745Sheppo mod_hash_destroy_hash(vswp->fdb); 7251ae08745Sheppo 726*19b65a69Ssb155480 if (progress & PROG_locks) { 727*19b65a69Ssb155480 rw_destroy(&vswp->plist.lockrw); 728*19b65a69Ssb155480 rw_destroy(&vswp->mfdbrw); 7291ae08745Sheppo rw_destroy(&vswp->if_lockrw); 730*19b65a69Ssb155480 mutex_destroy(&vswp->swtmout_lock); 731*19b65a69Ssb155480 mutex_destroy(&vswp->mca_lock); 73234683adeSsg70180 mutex_destroy(&vswp->mac_lock); 7335f94e909Ssg70180 mutex_destroy(&vswp->hw_lock); 73434683adeSsg70180 } 7351ae08745Sheppo 7361ae08745Sheppo ddi_soft_state_free(vsw_state, instance); 7371ae08745Sheppo return (DDI_FAILURE); 7381ae08745Sheppo } 7391ae08745Sheppo 7401ae08745Sheppo static int 7411ae08745Sheppo vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 7421ae08745Sheppo { 743d10e4ef2Snarayan vio_mblk_pool_t *poolp, *npoolp; 7441ae08745Sheppo vsw_t **vswpp, *vswp; 7451ae08745Sheppo int instance; 7461ae08745Sheppo 7471ae08745Sheppo instance = ddi_get_instance(dip); 7481ae08745Sheppo vswp = ddi_get_soft_state(vsw_state, instance); 7491ae08745Sheppo 7501ae08745Sheppo if (vswp == NULL) { 7511ae08745Sheppo return (DDI_FAILURE); 7521ae08745Sheppo } 7531ae08745Sheppo 7541ae08745Sheppo switch (cmd) { 7551ae08745Sheppo case DDI_DETACH: 7561ae08745Sheppo break; 7571ae08745Sheppo case DDI_SUSPEND: 7581ae08745Sheppo case DDI_PM_SUSPEND: 7591ae08745Sheppo default: 7601ae08745Sheppo return (DDI_FAILURE); 7611ae08745Sheppo } 7621ae08745Sheppo 7631ae08745Sheppo D2(vswp, "detaching instance %d", instance); 7641ae08745Sheppo 765*19b65a69Ssb155480 /* Stop any pending timeout to setup switching mode. */ 766*19b65a69Ssb155480 vsw_stop_switching_timeout(vswp); 767*19b65a69Ssb155480 76834683adeSsg70180 if (vswp->if_state & VSW_IF_REG) { 7691ae08745Sheppo if (vsw_mac_unregister(vswp) != 0) { 77034683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 77134683adeSsg70180 "MAC layer", vswp->instance); 7721ae08745Sheppo return (DDI_FAILURE); 7731ae08745Sheppo } 774d10e4ef2Snarayan } 7751ae08745Sheppo 7761ae08745Sheppo vsw_mdeg_unregister(vswp); 7771ae08745Sheppo 778e1ebb9ecSlm66018 /* remove mac layer callback */ 77934683adeSsg70180 mutex_enter(&vswp->mac_lock); 780e1ebb9ecSlm66018 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 7811f8aaf0dSethindra mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 782e1ebb9ecSlm66018 vswp->mrh = NULL; 7831ae08745Sheppo } 78434683adeSsg70180 mutex_exit(&vswp->mac_lock); 7851ae08745Sheppo 7861ae08745Sheppo if (vsw_detach_ports(vswp) != 0) { 78734683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to detach ports", 78834683adeSsg70180 vswp->instance); 7891ae08745Sheppo return (DDI_FAILURE); 7901ae08745Sheppo } 7911ae08745Sheppo 79234683adeSsg70180 rw_destroy(&vswp->if_lockrw); 79334683adeSsg70180 7945f94e909Ssg70180 mutex_destroy(&vswp->hw_lock); 7955f94e909Ssg70180 7961ae08745Sheppo /* 797e1ebb9ecSlm66018 * Now that the ports have been deleted, stop and close 798e1ebb9ecSlm66018 * the physical device. 799e1ebb9ecSlm66018 */ 80034683adeSsg70180 mutex_enter(&vswp->mac_lock); 801e1ebb9ecSlm66018 802*19b65a69Ssb155480 vsw_mac_detach(vswp); 803*19b65a69Ssb155480 vsw_mac_close(vswp); 804*19b65a69Ssb155480 80534683adeSsg70180 mutex_exit(&vswp->mac_lock); 806*19b65a69Ssb155480 80734683adeSsg70180 mutex_destroy(&vswp->mac_lock); 808*19b65a69Ssb155480 mutex_destroy(&vswp->swtmout_lock); 809e1ebb9ecSlm66018 810e1ebb9ecSlm66018 /* 811d10e4ef2Snarayan * Destroy any free pools that may still exist. 812d10e4ef2Snarayan */ 813d10e4ef2Snarayan poolp = vswp->rxh; 814d10e4ef2Snarayan while (poolp != NULL) { 815d10e4ef2Snarayan npoolp = vswp->rxh = poolp->nextp; 816d10e4ef2Snarayan if (vio_destroy_mblks(poolp) != 0) { 817d10e4ef2Snarayan vswp->rxh = poolp; 818d10e4ef2Snarayan return (DDI_FAILURE); 819d10e4ef2Snarayan } 820d10e4ef2Snarayan poolp = npoolp; 821d10e4ef2Snarayan } 822d10e4ef2Snarayan 823d10e4ef2Snarayan /* 8241ae08745Sheppo * Remove this instance from any entries it may be on in 8251ae08745Sheppo * the hash table by using the list of addresses maintained 8261ae08745Sheppo * in the vsw_t structure. 8271ae08745Sheppo */ 8281ae08745Sheppo vsw_del_mcst_vsw(vswp); 8291ae08745Sheppo 8301ae08745Sheppo vswp->mcap = NULL; 8311ae08745Sheppo mutex_destroy(&vswp->mca_lock); 8321ae08745Sheppo 8331ae08745Sheppo /* 8341ae08745Sheppo * By now any pending tasks have finished and the underlying 8351ae08745Sheppo * ldc's have been destroyed, so its safe to delete the control 8361ae08745Sheppo * message taskq. 8371ae08745Sheppo */ 8381ae08745Sheppo if (vswp->taskq_p != NULL) 8391ae08745Sheppo ddi_taskq_destroy(vswp->taskq_p); 8401ae08745Sheppo 8411ae08745Sheppo /* 8421ae08745Sheppo * At this stage all the data pointers in the hash table 8431ae08745Sheppo * should be NULL, as all the ports have been removed and will 8441ae08745Sheppo * have deleted themselves from the port lists which the data 8451ae08745Sheppo * pointers point to. Hence we can destroy the table using the 8461ae08745Sheppo * default destructors. 8471ae08745Sheppo */ 8481ae08745Sheppo D2(vswp, "vsw_detach: destroying hash tables.."); 8491ae08745Sheppo mod_hash_destroy_hash(vswp->fdb); 8501ae08745Sheppo vswp->fdb = NULL; 8511ae08745Sheppo 8521ae08745Sheppo WRITE_ENTER(&vswp->mfdbrw); 8531ae08745Sheppo mod_hash_destroy_hash(vswp->mfdb); 8541ae08745Sheppo vswp->mfdb = NULL; 8551ae08745Sheppo RW_EXIT(&vswp->mfdbrw); 8561ae08745Sheppo rw_destroy(&vswp->mfdbrw); 8571ae08745Sheppo 8581ae08745Sheppo ddi_remove_minor_node(dip, NULL); 8591ae08745Sheppo 8601ae08745Sheppo rw_destroy(&vswp->plist.lockrw); 8611ae08745Sheppo WRITE_ENTER(&vsw_rw); 8621ae08745Sheppo for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 8631ae08745Sheppo if (*vswpp == vswp) { 8641ae08745Sheppo *vswpp = vswp->next; 8651ae08745Sheppo break; 8661ae08745Sheppo } 8671ae08745Sheppo } 8681ae08745Sheppo RW_EXIT(&vsw_rw); 8691ae08745Sheppo ddi_soft_state_free(vsw_state, instance); 8701ae08745Sheppo 8711ae08745Sheppo return (DDI_SUCCESS); 8721ae08745Sheppo } 8731ae08745Sheppo 8741ae08745Sheppo static int 8751ae08745Sheppo vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 8761ae08745Sheppo { 8771ae08745Sheppo _NOTE(ARGUNUSED(dip)) 8781ae08745Sheppo 8791ae08745Sheppo vsw_t *vswp = NULL; 8801ae08745Sheppo dev_t dev = (dev_t)arg; 8811ae08745Sheppo int instance; 8821ae08745Sheppo 8831ae08745Sheppo instance = getminor(dev); 8841ae08745Sheppo 8851ae08745Sheppo switch (infocmd) { 8861ae08745Sheppo case DDI_INFO_DEVT2DEVINFO: 8871ae08745Sheppo if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 8881ae08745Sheppo *result = NULL; 8891ae08745Sheppo return (DDI_FAILURE); 8901ae08745Sheppo } 8911ae08745Sheppo *result = vswp->dip; 8921ae08745Sheppo return (DDI_SUCCESS); 8931ae08745Sheppo 8941ae08745Sheppo case DDI_INFO_DEVT2INSTANCE: 8951ae08745Sheppo *result = (void *)(uintptr_t)instance; 8961ae08745Sheppo return (DDI_SUCCESS); 8971ae08745Sheppo 8981ae08745Sheppo default: 8991ae08745Sheppo *result = NULL; 9001ae08745Sheppo return (DDI_FAILURE); 9011ae08745Sheppo } 9021ae08745Sheppo } 9031ae08745Sheppo 9041ae08745Sheppo /* 90534683adeSsg70180 * Get the value of the "vsw-phys-dev" property in the specified 90634683adeSsg70180 * node. This property is the name of the physical device that 90734683adeSsg70180 * the virtual switch will use to talk to the outside world. 90834683adeSsg70180 * 90934683adeSsg70180 * Note it is valid for this property to be NULL (but the property 91034683adeSsg70180 * itself must exist). Callers of this routine should verify that 91134683adeSsg70180 * the value returned is what they expected (i.e. either NULL or non NULL). 91234683adeSsg70180 * 91334683adeSsg70180 * On success returns value of the property in region pointed to by 91434683adeSsg70180 * the 'name' argument, and with return value of 0. Otherwise returns 1. 9151ae08745Sheppo */ 91634683adeSsg70180 static int 91734683adeSsg70180 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 9181ae08745Sheppo { 91934683adeSsg70180 int len = 0; 9201ae08745Sheppo char *physname = NULL; 9211ae08745Sheppo char *dev; 9221ae08745Sheppo 92334683adeSsg70180 if (md_get_prop_data(mdp, node, physdev_propname, 9241ae08745Sheppo (uint8_t **)(&physname), &len) != 0) { 92534683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 92634683adeSsg70180 "device(s) from MD", vswp->instance); 92734683adeSsg70180 return (1); 9281ae08745Sheppo } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 92934683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 93034683adeSsg70180 vswp->instance, physname); 93134683adeSsg70180 return (1); 9321ae08745Sheppo } else { 93334683adeSsg70180 (void) strncpy(name, physname, strlen(physname) + 1); 9341ae08745Sheppo D2(vswp, "%s: using first device specified (%s)", 93534683adeSsg70180 __func__, physname); 9361ae08745Sheppo } 9371ae08745Sheppo 9381ae08745Sheppo #ifdef DEBUG 9391ae08745Sheppo /* 9401ae08745Sheppo * As a temporary measure to aid testing we check to see if there 9411ae08745Sheppo * is a vsw.conf file present. If there is we use the value of the 9421ae08745Sheppo * vsw_physname property in the file as the name of the physical 9431ae08745Sheppo * device, overriding the value from the MD. 9441ae08745Sheppo * 9451ae08745Sheppo * There may be multiple devices listed, but for the moment 9461ae08745Sheppo * we just use the first one. 9471ae08745Sheppo */ 9481ae08745Sheppo if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 9491ae08745Sheppo "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 9501ae08745Sheppo if ((strlen(dev) + 1) > LIFNAMSIZ) { 95134683adeSsg70180 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 95234683adeSsg70180 vswp->instance, dev); 95334683adeSsg70180 ddi_prop_free(dev); 95434683adeSsg70180 return (1); 9551ae08745Sheppo } else { 95634683adeSsg70180 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 95734683adeSsg70180 "config file", vswp->instance, dev); 9581ae08745Sheppo 95934683adeSsg70180 (void) strncpy(name, dev, strlen(dev) + 1); 9601ae08745Sheppo } 9611ae08745Sheppo 9621ae08745Sheppo ddi_prop_free(dev); 9631ae08745Sheppo } 9641ae08745Sheppo #endif 9651ae08745Sheppo 96634683adeSsg70180 return (0); 96734683adeSsg70180 } 968e1ebb9ecSlm66018 969e1ebb9ecSlm66018 /* 97034683adeSsg70180 * Read the 'vsw-switch-mode' property from the specified MD node. 97134683adeSsg70180 * 97234683adeSsg70180 * Returns 0 on success and the number of modes found in 'found', 97334683adeSsg70180 * otherwise returns 1. 974e1ebb9ecSlm66018 */ 97534683adeSsg70180 static int 97634683adeSsg70180 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 97734683adeSsg70180 uint8_t *modes, int *found) 97834683adeSsg70180 { 97934683adeSsg70180 int len = 0; 98034683adeSsg70180 int smode_num = 0; 98134683adeSsg70180 char *smode = NULL; 98234683adeSsg70180 char *curr_mode = NULL; 98334683adeSsg70180 98434683adeSsg70180 D1(vswp, "%s: enter", __func__); 9851ae08745Sheppo 9861ae08745Sheppo /* 9871ae08745Sheppo * Get the switch-mode property. The modes are listed in 9881ae08745Sheppo * decreasing order of preference, i.e. prefered mode is 9891ae08745Sheppo * first item in list. 9901ae08745Sheppo */ 9911ae08745Sheppo len = 0; 99234683adeSsg70180 smode_num = 0; 99334683adeSsg70180 if (md_get_prop_data(mdp, node, smode_propname, 9941ae08745Sheppo (uint8_t **)(&smode), &len) != 0) { 9951ae08745Sheppo /* 996e1ebb9ecSlm66018 * Unable to get switch-mode property from MD, nothing 997e1ebb9ecSlm66018 * more we can do. 9981ae08745Sheppo */ 99934683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 100034683adeSsg70180 " from the MD", vswp->instance); 100134683adeSsg70180 *found = 0; 100234683adeSsg70180 return (1); 1003e1ebb9ecSlm66018 } 1004e1ebb9ecSlm66018 10051ae08745Sheppo curr_mode = smode; 10061ae08745Sheppo /* 10071ae08745Sheppo * Modes of operation: 10081ae08745Sheppo * 'switched' - layer 2 switching, underlying HW in 1009e1ebb9ecSlm66018 * programmed mode. 10101ae08745Sheppo * 'promiscuous' - layer 2 switching, underlying HW in 10111ae08745Sheppo * promiscuous mode. 10121ae08745Sheppo * 'routed' - layer 3 (i.e. IP) routing, underlying HW 10131ae08745Sheppo * in non-promiscuous mode. 10141ae08745Sheppo */ 101534683adeSsg70180 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 10161ae08745Sheppo D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 1017e1ebb9ecSlm66018 if (strcmp(curr_mode, "switched") == 0) { 101834683adeSsg70180 modes[smode_num++] = VSW_LAYER2; 1019e1ebb9ecSlm66018 } else if (strcmp(curr_mode, "promiscuous") == 0) { 102034683adeSsg70180 modes[smode_num++] = VSW_LAYER2_PROMISC; 1021e1ebb9ecSlm66018 } else if (strcmp(curr_mode, "routed") == 0) { 102234683adeSsg70180 modes[smode_num++] = VSW_LAYER3; 1023e1ebb9ecSlm66018 } else { 102434683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 102534683adeSsg70180 "setting to default switched mode", 102634683adeSsg70180 vswp->instance, curr_mode); 102734683adeSsg70180 modes[smode_num++] = VSW_LAYER2; 10281ae08745Sheppo } 10291ae08745Sheppo curr_mode += strlen(curr_mode) + 1; 10301ae08745Sheppo } 103134683adeSsg70180 *found = smode_num; 10321ae08745Sheppo 103334683adeSsg70180 D2(vswp, "%s: %d modes found", __func__, smode_num); 10341ae08745Sheppo 10351ae08745Sheppo D1(vswp, "%s: exit", __func__); 103634683adeSsg70180 103734683adeSsg70180 return (0); 10381ae08745Sheppo } 10391ae08745Sheppo 1040e1ebb9ecSlm66018 /* 1041e1ebb9ecSlm66018 * Check to see if the card supports the setting of multiple unicst 1042e1ebb9ecSlm66018 * addresses. 1043e1ebb9ecSlm66018 * 10445f94e909Ssg70180 * Returns 0 if card supports the programming of multiple unicast addresses, 10455f94e909Ssg70180 * otherwise returns 1. 1046e1ebb9ecSlm66018 */ 1047e1ebb9ecSlm66018 static int 1048e1ebb9ecSlm66018 vsw_get_hw_maddr(vsw_t *vswp) 1049e1ebb9ecSlm66018 { 1050e1ebb9ecSlm66018 D1(vswp, "%s: enter", __func__); 1051e1ebb9ecSlm66018 1052*19b65a69Ssb155480 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 1053*19b65a69Ssb155480 1054*19b65a69Ssb155480 if (vswp->mh == NULL) 1055e1ebb9ecSlm66018 return (1); 1056e1ebb9ecSlm66018 1057e1ebb9ecSlm66018 if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) { 10585f94e909Ssg70180 cmn_err(CE_WARN, "!vsw%d: device (%s) does not support " 10595f94e909Ssg70180 "setting multiple unicast addresses", vswp->instance, 10605f94e909Ssg70180 vswp->physname); 1061e1ebb9ecSlm66018 return (1); 1062e1ebb9ecSlm66018 } 1063e1ebb9ecSlm66018 1064e1ebb9ecSlm66018 D2(vswp, "%s: %d addrs : %d free", __func__, 1065e1ebb9ecSlm66018 vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree); 1066e1ebb9ecSlm66018 1067e1ebb9ecSlm66018 D1(vswp, "%s: exit", __func__); 1068e1ebb9ecSlm66018 1069e1ebb9ecSlm66018 return (0); 1070e1ebb9ecSlm66018 } 1071e1ebb9ecSlm66018 1072e1ebb9ecSlm66018 /* 1073*19b65a69Ssb155480 * Program unicast and multicast addresses of vsw interface and the ports 1074*19b65a69Ssb155480 * into the physical device. 1075*19b65a69Ssb155480 */ 1076*19b65a69Ssb155480 static void 1077*19b65a69Ssb155480 vsw_set_addrs(vsw_t *vswp) 1078*19b65a69Ssb155480 { 1079*19b65a69Ssb155480 vsw_port_list_t *plist = &vswp->plist; 1080*19b65a69Ssb155480 vsw_port_t *port; 1081*19b65a69Ssb155480 mcst_addr_t *mcap; 1082*19b65a69Ssb155480 int rv; 1083*19b65a69Ssb155480 1084*19b65a69Ssb155480 READ_ENTER(&vswp->if_lockrw); 1085*19b65a69Ssb155480 1086*19b65a69Ssb155480 if (vswp->if_state & VSW_IF_UP) { 1087*19b65a69Ssb155480 1088*19b65a69Ssb155480 /* program unicst addr of vsw interface in the physdev */ 1089*19b65a69Ssb155480 if (vswp->addr_set == VSW_ADDR_UNSET) { 1090*19b65a69Ssb155480 mutex_enter(&vswp->hw_lock); 1091*19b65a69Ssb155480 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1092*19b65a69Ssb155480 mutex_exit(&vswp->hw_lock); 1093*19b65a69Ssb155480 if (rv != 0) { 1094*19b65a69Ssb155480 cmn_err(CE_NOTE, 1095*19b65a69Ssb155480 "!vsw%d: failed to program interface " 1096*19b65a69Ssb155480 "unicast address\n", vswp->instance); 1097*19b65a69Ssb155480 } 1098*19b65a69Ssb155480 /* 1099*19b65a69Ssb155480 * Notify the MAC layer of the changed address. 1100*19b65a69Ssb155480 */ 1101*19b65a69Ssb155480 mac_unicst_update(vswp->if_mh, 1102*19b65a69Ssb155480 (uint8_t *)&vswp->if_addr); 1103*19b65a69Ssb155480 } 1104*19b65a69Ssb155480 1105*19b65a69Ssb155480 /* program mcast addrs of vsw interface in the physdev */ 1106*19b65a69Ssb155480 mutex_enter(&vswp->mca_lock); 1107*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 1108*19b65a69Ssb155480 for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) { 1109*19b65a69Ssb155480 if (mcap->mac_added) 1110*19b65a69Ssb155480 continue; 1111*19b65a69Ssb155480 rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca); 1112*19b65a69Ssb155480 if (rv == 0) { 1113*19b65a69Ssb155480 mcap->mac_added = B_TRUE; 1114*19b65a69Ssb155480 } else { 1115*19b65a69Ssb155480 cmn_err(CE_WARN, "!vsw%d: unable to add " 1116*19b65a69Ssb155480 "multicast address: %s\n", vswp->instance, 1117*19b65a69Ssb155480 ether_sprintf((void *)&mcap->mca)); 1118*19b65a69Ssb155480 } 1119*19b65a69Ssb155480 } 1120*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 1121*19b65a69Ssb155480 mutex_exit(&vswp->mca_lock); 1122*19b65a69Ssb155480 1123*19b65a69Ssb155480 } 1124*19b65a69Ssb155480 1125*19b65a69Ssb155480 RW_EXIT(&vswp->if_lockrw); 1126*19b65a69Ssb155480 1127*19b65a69Ssb155480 WRITE_ENTER(&plist->lockrw); 1128*19b65a69Ssb155480 1129*19b65a69Ssb155480 /* program unicast address of ports in the physical device */ 1130*19b65a69Ssb155480 mutex_enter(&vswp->hw_lock); 1131*19b65a69Ssb155480 for (port = plist->head; port != NULL; port = port->p_next) { 1132*19b65a69Ssb155480 if (port->addr_set != VSW_ADDR_UNSET) /* addr already set */ 1133*19b65a69Ssb155480 continue; 1134*19b65a69Ssb155480 if (vsw_set_hw(vswp, port, VSW_VNETPORT)) { 1135*19b65a69Ssb155480 cmn_err(CE_NOTE, 1136*19b65a69Ssb155480 "!vsw%d: port:%d failed to set unicast address\n", 1137*19b65a69Ssb155480 vswp->instance, port->p_instance); 1138*19b65a69Ssb155480 } 1139*19b65a69Ssb155480 } 1140*19b65a69Ssb155480 mutex_exit(&vswp->hw_lock); 1141*19b65a69Ssb155480 1142*19b65a69Ssb155480 /* program multicast addresses of ports in the physdev */ 1143*19b65a69Ssb155480 for (port = plist->head; port != NULL; port = port->p_next) { 1144*19b65a69Ssb155480 mutex_enter(&port->mca_lock); 1145*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 1146*19b65a69Ssb155480 for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) { 1147*19b65a69Ssb155480 if (mcap->mac_added) 1148*19b65a69Ssb155480 continue; 1149*19b65a69Ssb155480 rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca); 1150*19b65a69Ssb155480 if (rv == 0) { 1151*19b65a69Ssb155480 mcap->mac_added = B_TRUE; 1152*19b65a69Ssb155480 } else { 1153*19b65a69Ssb155480 cmn_err(CE_WARN, "!vsw%d: unable to add " 1154*19b65a69Ssb155480 "multicast address: %s\n", vswp->instance, 1155*19b65a69Ssb155480 ether_sprintf((void *)&mcap->mca)); 1156*19b65a69Ssb155480 } 1157*19b65a69Ssb155480 } 1158*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 1159*19b65a69Ssb155480 mutex_exit(&port->mca_lock); 1160*19b65a69Ssb155480 } 1161*19b65a69Ssb155480 1162*19b65a69Ssb155480 RW_EXIT(&plist->lockrw); 1163*19b65a69Ssb155480 } 1164*19b65a69Ssb155480 1165*19b65a69Ssb155480 /* 1166*19b65a69Ssb155480 * Remove unicast and multicast addresses of vsw interface and the ports 1167*19b65a69Ssb155480 * from the physical device. 1168*19b65a69Ssb155480 */ 1169*19b65a69Ssb155480 static void 1170*19b65a69Ssb155480 vsw_unset_addrs(vsw_t *vswp) 1171*19b65a69Ssb155480 { 1172*19b65a69Ssb155480 vsw_port_list_t *plist = &vswp->plist; 1173*19b65a69Ssb155480 vsw_port_t *port; 1174*19b65a69Ssb155480 mcst_addr_t *mcap; 1175*19b65a69Ssb155480 1176*19b65a69Ssb155480 READ_ENTER(&vswp->if_lockrw); 1177*19b65a69Ssb155480 1178*19b65a69Ssb155480 if (vswp->if_state & VSW_IF_UP) { 1179*19b65a69Ssb155480 1180*19b65a69Ssb155480 /* 1181*19b65a69Ssb155480 * Remove unicast addr of vsw interfce 1182*19b65a69Ssb155480 * from current physdev 1183*19b65a69Ssb155480 */ 1184*19b65a69Ssb155480 mutex_enter(&vswp->hw_lock); 1185*19b65a69Ssb155480 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1186*19b65a69Ssb155480 mutex_exit(&vswp->hw_lock); 1187*19b65a69Ssb155480 1188*19b65a69Ssb155480 /* 1189*19b65a69Ssb155480 * Remove mcast addrs of vsw interface 1190*19b65a69Ssb155480 * from current physdev 1191*19b65a69Ssb155480 */ 1192*19b65a69Ssb155480 mutex_enter(&vswp->mca_lock); 1193*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 1194*19b65a69Ssb155480 for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) { 1195*19b65a69Ssb155480 if (!mcap->mac_added) 1196*19b65a69Ssb155480 continue; 1197*19b65a69Ssb155480 (void) mac_multicst_remove(vswp->mh, 1198*19b65a69Ssb155480 (uchar_t *)&mcap->mca); 1199*19b65a69Ssb155480 mcap->mac_added = B_FALSE; 1200*19b65a69Ssb155480 } 1201*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 1202*19b65a69Ssb155480 mutex_exit(&vswp->mca_lock); 1203*19b65a69Ssb155480 1204*19b65a69Ssb155480 } 1205*19b65a69Ssb155480 1206*19b65a69Ssb155480 RW_EXIT(&vswp->if_lockrw); 1207*19b65a69Ssb155480 1208*19b65a69Ssb155480 WRITE_ENTER(&plist->lockrw); 1209*19b65a69Ssb155480 1210*19b65a69Ssb155480 /* 1211*19b65a69Ssb155480 * Remove unicast address of ports from the current physical device 1212*19b65a69Ssb155480 */ 1213*19b65a69Ssb155480 mutex_enter(&vswp->hw_lock); 1214*19b65a69Ssb155480 for (port = plist->head; port != NULL; port = port->p_next) { 1215*19b65a69Ssb155480 /* Remove address if was programmed into HW. */ 1216*19b65a69Ssb155480 if (port->addr_set == VSW_ADDR_UNSET) 1217*19b65a69Ssb155480 continue; 1218*19b65a69Ssb155480 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 1219*19b65a69Ssb155480 } 1220*19b65a69Ssb155480 mutex_exit(&vswp->hw_lock); 1221*19b65a69Ssb155480 1222*19b65a69Ssb155480 /* Remove multicast addresses of ports from the current physdev */ 1223*19b65a69Ssb155480 for (port = plist->head; port != NULL; port = port->p_next) { 1224*19b65a69Ssb155480 mutex_enter(&port->mca_lock); 1225*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 1226*19b65a69Ssb155480 for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) { 1227*19b65a69Ssb155480 if (!mcap->mac_added) 1228*19b65a69Ssb155480 continue; 1229*19b65a69Ssb155480 (void) mac_multicst_remove(vswp->mh, 1230*19b65a69Ssb155480 (uchar_t *)&mcap->mca); 1231*19b65a69Ssb155480 mcap->mac_added = B_FALSE; 1232*19b65a69Ssb155480 } 1233*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 1234*19b65a69Ssb155480 mutex_exit(&port->mca_lock); 1235*19b65a69Ssb155480 } 1236*19b65a69Ssb155480 1237*19b65a69Ssb155480 RW_EXIT(&plist->lockrw); 1238*19b65a69Ssb155480 } 1239*19b65a69Ssb155480 1240*19b65a69Ssb155480 /* copy mac address of vsw into soft state structure */ 1241*19b65a69Ssb155480 static void 1242*19b65a69Ssb155480 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 1243*19b65a69Ssb155480 { 1244*19b65a69Ssb155480 int i; 1245*19b65a69Ssb155480 1246*19b65a69Ssb155480 WRITE_ENTER(&vswp->if_lockrw); 1247*19b65a69Ssb155480 for (i = ETHERADDRL - 1; i >= 0; i--) { 1248*19b65a69Ssb155480 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 1249*19b65a69Ssb155480 macaddr >>= 8; 1250*19b65a69Ssb155480 } 1251*19b65a69Ssb155480 RW_EXIT(&vswp->if_lockrw); 1252*19b65a69Ssb155480 } 1253*19b65a69Ssb155480 1254*19b65a69Ssb155480 /* 1255*19b65a69Ssb155480 * Timeout routine to setup switching mode: 1256*19b65a69Ssb155480 * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop() 1257*19b65a69Ssb155480 * initially. If it fails and the error is EAGAIN, then this timeout handler 1258*19b65a69Ssb155480 * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried 1259*19b65a69Ssb155480 * until we successfully finish it; or the returned error is not EAGAIN. 1260*19b65a69Ssb155480 */ 1261*19b65a69Ssb155480 static void 1262*19b65a69Ssb155480 vsw_setup_switching_timeout(void *arg) 1263*19b65a69Ssb155480 { 1264*19b65a69Ssb155480 vsw_t *vswp = (vsw_t *)arg; 1265*19b65a69Ssb155480 int rv; 1266*19b65a69Ssb155480 1267*19b65a69Ssb155480 if (vswp->swtmout_enabled == B_FALSE) 1268*19b65a69Ssb155480 return; 1269*19b65a69Ssb155480 1270*19b65a69Ssb155480 rv = vsw_setup_switching(vswp); 1271*19b65a69Ssb155480 1272*19b65a69Ssb155480 if (rv == 0) { 1273*19b65a69Ssb155480 /* 1274*19b65a69Ssb155480 * Successfully setup switching mode. 1275*19b65a69Ssb155480 * Program unicst, mcst addrs of vsw 1276*19b65a69Ssb155480 * interface and ports in the physdev. 1277*19b65a69Ssb155480 */ 1278*19b65a69Ssb155480 vsw_set_addrs(vswp); 1279*19b65a69Ssb155480 } 1280*19b65a69Ssb155480 1281*19b65a69Ssb155480 mutex_enter(&vswp->swtmout_lock); 1282*19b65a69Ssb155480 1283*19b65a69Ssb155480 if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) { 1284*19b65a69Ssb155480 /* 1285*19b65a69Ssb155480 * Reschedule timeout() if the error is EAGAIN and the 1286*19b65a69Ssb155480 * timeout is still enabled. For errors other than EAGAIN, 1287*19b65a69Ssb155480 * we simply return without rescheduling timeout(). 1288*19b65a69Ssb155480 */ 1289*19b65a69Ssb155480 vswp->swtmout_id = 1290*19b65a69Ssb155480 timeout(vsw_setup_switching_timeout, vswp, 1291*19b65a69Ssb155480 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 1292*19b65a69Ssb155480 goto exit; 1293*19b65a69Ssb155480 } 1294*19b65a69Ssb155480 1295*19b65a69Ssb155480 /* timeout handler completed */ 1296*19b65a69Ssb155480 vswp->swtmout_enabled = B_FALSE; 1297*19b65a69Ssb155480 vswp->swtmout_id = 0; 1298*19b65a69Ssb155480 1299*19b65a69Ssb155480 exit: 1300*19b65a69Ssb155480 mutex_exit(&vswp->swtmout_lock); 1301*19b65a69Ssb155480 } 1302*19b65a69Ssb155480 1303*19b65a69Ssb155480 /* 1304*19b65a69Ssb155480 * Cancel the timeout handler to setup switching mode. 1305*19b65a69Ssb155480 */ 1306*19b65a69Ssb155480 static void 1307*19b65a69Ssb155480 vsw_stop_switching_timeout(vsw_t *vswp) 1308*19b65a69Ssb155480 { 1309*19b65a69Ssb155480 timeout_id_t tid; 1310*19b65a69Ssb155480 1311*19b65a69Ssb155480 mutex_enter(&vswp->swtmout_lock); 1312*19b65a69Ssb155480 1313*19b65a69Ssb155480 tid = vswp->swtmout_id; 1314*19b65a69Ssb155480 1315*19b65a69Ssb155480 if (tid != 0) { 1316*19b65a69Ssb155480 /* signal timeout handler to stop */ 1317*19b65a69Ssb155480 vswp->swtmout_enabled = B_FALSE; 1318*19b65a69Ssb155480 vswp->swtmout_id = 0; 1319*19b65a69Ssb155480 mutex_exit(&vswp->swtmout_lock); 1320*19b65a69Ssb155480 1321*19b65a69Ssb155480 (void) untimeout(tid); 1322*19b65a69Ssb155480 } else { 1323*19b65a69Ssb155480 mutex_exit(&vswp->swtmout_lock); 1324*19b65a69Ssb155480 } 1325*19b65a69Ssb155480 1326*19b65a69Ssb155480 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 1327*19b65a69Ssb155480 1328*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 1329*19b65a69Ssb155480 vswp->mac_open_retries = 0; 1330*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 1331*19b65a69Ssb155480 } 1332*19b65a69Ssb155480 1333*19b65a69Ssb155480 /* 133434683adeSsg70180 * Setup the required switching mode. 1335*19b65a69Ssb155480 * This routine is invoked from vsw_attach() or vsw_update_md_prop() 1336*19b65a69Ssb155480 * initially. If it fails and the error is EAGAIN, then a timeout handler 1337*19b65a69Ssb155480 * is started to retry vsw_setup_switching(), until it successfully finishes; 1338*19b65a69Ssb155480 * or the returned error is not EAGAIN. 133934683adeSsg70180 * 1340*19b65a69Ssb155480 * Returns: 1341*19b65a69Ssb155480 * 0 on success. 1342*19b65a69Ssb155480 * EAGAIN if retry is needed. 1343*19b65a69Ssb155480 * 1 on all other failures. 134434683adeSsg70180 */ 134534683adeSsg70180 static int 134634683adeSsg70180 vsw_setup_switching(vsw_t *vswp) 134734683adeSsg70180 { 134834683adeSsg70180 int i, rv = 1; 134934683adeSsg70180 135034683adeSsg70180 D1(vswp, "%s: enter", __func__); 135134683adeSsg70180 1352*19b65a69Ssb155480 /* 1353*19b65a69Ssb155480 * Select best switching mode. 1354*19b65a69Ssb155480 * Note that we start from the saved smode_idx. This is done as 1355*19b65a69Ssb155480 * this routine can be called from the timeout handler to retry 1356*19b65a69Ssb155480 * setting up a specific mode. Currently only the function which 1357*19b65a69Ssb155480 * sets up layer2/promisc mode returns EAGAIN if the underlying 1358*19b65a69Ssb155480 * physical device is not available yet, causing retries. 1359*19b65a69Ssb155480 */ 1360*19b65a69Ssb155480 for (i = vswp->smode_idx; i < vswp->smode_num; i++) { 136134683adeSsg70180 vswp->smode_idx = i; 136234683adeSsg70180 switch (vswp->smode[i]) { 136334683adeSsg70180 case VSW_LAYER2: 136434683adeSsg70180 case VSW_LAYER2_PROMISC: 136534683adeSsg70180 rv = vsw_setup_layer2(vswp); 136634683adeSsg70180 break; 136734683adeSsg70180 136834683adeSsg70180 case VSW_LAYER3: 136934683adeSsg70180 rv = vsw_setup_layer3(vswp); 137034683adeSsg70180 break; 137134683adeSsg70180 137234683adeSsg70180 default: 137334683adeSsg70180 DERR(vswp, "unknown switch mode"); 1374*19b65a69Ssb155480 break; 1375*19b65a69Ssb155480 } 1376*19b65a69Ssb155480 1377*19b65a69Ssb155480 if ((rv == 0) || (rv == EAGAIN)) 1378*19b65a69Ssb155480 break; 1379*19b65a69Ssb155480 1380*19b65a69Ssb155480 /* all other errors(rv != 0): continue & select the next mode */ 138134683adeSsg70180 rv = 1; 138234683adeSsg70180 } 138334683adeSsg70180 1384*19b65a69Ssb155480 if (rv && (rv != EAGAIN)) { 138534683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 138634683adeSsg70180 "switching mode", vswp->instance); 1387*19b65a69Ssb155480 } else if (rv == 0) { 1388*19b65a69Ssb155480 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 138934683adeSsg70180 } 139034683adeSsg70180 139134683adeSsg70180 D2(vswp, "%s: Operating in mode %d", __func__, 139234683adeSsg70180 vswp->smode[vswp->smode_idx]); 139334683adeSsg70180 139434683adeSsg70180 D1(vswp, "%s: exit", __func__); 139534683adeSsg70180 1396*19b65a69Ssb155480 return (rv); 139734683adeSsg70180 } 139834683adeSsg70180 139934683adeSsg70180 /* 1400e1ebb9ecSlm66018 * Setup for layer 2 switching. 1401e1ebb9ecSlm66018 * 1402*19b65a69Ssb155480 * Returns: 1403*19b65a69Ssb155480 * 0 on success. 1404*19b65a69Ssb155480 * EAGAIN if retry is needed. 1405*19b65a69Ssb155480 * EIO on all other failures. 1406e1ebb9ecSlm66018 */ 14071ae08745Sheppo static int 14081ae08745Sheppo vsw_setup_layer2(vsw_t *vswp) 14091ae08745Sheppo { 1410*19b65a69Ssb155480 int rv; 1411*19b65a69Ssb155480 14121ae08745Sheppo D1(vswp, "%s: enter", __func__); 14131ae08745Sheppo 141434683adeSsg70180 vswp->vsw_switch_frame = vsw_switch_l2_frame; 14151ae08745Sheppo 1416*19b65a69Ssb155480 rv = strlen(vswp->physname); 1417*19b65a69Ssb155480 if (rv == 0) { 14181ae08745Sheppo /* 1419*19b65a69Ssb155480 * Physical device name is NULL, which is 1420*19b65a69Ssb155480 * required for layer 2. 14211ae08745Sheppo */ 1422*19b65a69Ssb155480 cmn_err(CE_WARN, "!vsw%d: no physical device name specified", 1423*19b65a69Ssb155480 vswp->instance); 1424*19b65a69Ssb155480 return (EIO); 1425*19b65a69Ssb155480 } 1426*19b65a69Ssb155480 1427*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 1428*19b65a69Ssb155480 1429*19b65a69Ssb155480 rv = vsw_mac_open(vswp); 1430*19b65a69Ssb155480 if (rv != 0) { 1431*19b65a69Ssb155480 if (rv != EAGAIN) { 1432*19b65a69Ssb155480 cmn_err(CE_WARN, "!vsw%d: Unable to open physical " 1433*19b65a69Ssb155480 "device: %s\n", vswp->instance, vswp->physname); 1434*19b65a69Ssb155480 } 1435*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 1436*19b65a69Ssb155480 return (rv); 14371ae08745Sheppo } 1438e1ebb9ecSlm66018 1439e1ebb9ecSlm66018 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 1440e1ebb9ecSlm66018 /* 1441e1ebb9ecSlm66018 * Verify that underlying device can support multiple 14425f94e909Ssg70180 * unicast mac addresses. 1443e1ebb9ecSlm66018 */ 1444*19b65a69Ssb155480 rv = vsw_get_hw_maddr(vswp); 1445*19b65a69Ssb155480 if (rv != 0) { 144634683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to setup " 14475f94e909Ssg70180 "layer2 switching", vswp->instance); 1448*19b65a69Ssb155480 goto exit_error; 1449e1ebb9ecSlm66018 } 1450e1ebb9ecSlm66018 } 1451e1ebb9ecSlm66018 1452e1ebb9ecSlm66018 /* 1453*19b65a69Ssb155480 * Attempt to link into the MAC layer so we can get 1454*19b65a69Ssb155480 * and send packets out over the physical adapter. 1455e1ebb9ecSlm66018 */ 1456*19b65a69Ssb155480 rv = vsw_mac_attach(vswp); 1457*19b65a69Ssb155480 if (rv != 0) { 1458*19b65a69Ssb155480 /* 1459*19b65a69Ssb155480 * Registration with the MAC layer has failed, 1460*19b65a69Ssb155480 * so return error so that can fall back to next 1461*19b65a69Ssb155480 * prefered switching method. 1462*19b65a69Ssb155480 */ 1463*19b65a69Ssb155480 cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: " 1464*19b65a69Ssb155480 "%s\n", vswp->instance, vswp->physname); 1465*19b65a69Ssb155480 goto exit_error; 14661ae08745Sheppo } 14671ae08745Sheppo 14681ae08745Sheppo D1(vswp, "%s: exit", __func__); 14691ae08745Sheppo 1470*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 1471e1ebb9ecSlm66018 return (0); 1472*19b65a69Ssb155480 1473*19b65a69Ssb155480 exit_error: 1474*19b65a69Ssb155480 vsw_mac_close(vswp); 1475*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 1476*19b65a69Ssb155480 return (EIO); 14771ae08745Sheppo } 14781ae08745Sheppo 14791ae08745Sheppo static int 14801ae08745Sheppo vsw_setup_layer3(vsw_t *vswp) 14811ae08745Sheppo { 14821ae08745Sheppo D1(vswp, "%s: enter", __func__); 14831ae08745Sheppo 14841ae08745Sheppo D2(vswp, "%s: operating in layer 3 mode", __func__); 148534683adeSsg70180 vswp->vsw_switch_frame = vsw_switch_l3_frame; 14861ae08745Sheppo 14871ae08745Sheppo D1(vswp, "%s: exit", __func__); 14881ae08745Sheppo 14891ae08745Sheppo return (0); 14901ae08745Sheppo } 14911ae08745Sheppo 14921ae08745Sheppo /* 1493*19b65a69Ssb155480 * Open the underlying physical device for access in layer2 mode. 1494*19b65a69Ssb155480 * Returns: 1495*19b65a69Ssb155480 * 0 on success 1496*19b65a69Ssb155480 * EAGAIN if mac_open() fails due to the device being not available yet. 1497*19b65a69Ssb155480 * EIO on any other failures. 1498*19b65a69Ssb155480 */ 1499*19b65a69Ssb155480 static int 1500*19b65a69Ssb155480 vsw_mac_open(vsw_t *vswp) 1501*19b65a69Ssb155480 { 1502*19b65a69Ssb155480 char drv[LIFNAMSIZ]; 1503*19b65a69Ssb155480 uint_t ddi_instance; 1504*19b65a69Ssb155480 int rv; 1505*19b65a69Ssb155480 1506*19b65a69Ssb155480 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 1507*19b65a69Ssb155480 1508*19b65a69Ssb155480 if (vswp->mh != NULL) { 1509*19b65a69Ssb155480 /* already open */ 1510*19b65a69Ssb155480 return (0); 1511*19b65a69Ssb155480 } 1512*19b65a69Ssb155480 1513*19b65a69Ssb155480 if (vswp->mac_open_retries++ >= vsw_mac_open_retries) { 1514*19b65a69Ssb155480 /* exceeded max retries */ 1515*19b65a69Ssb155480 return (EIO); 1516*19b65a69Ssb155480 } 1517*19b65a69Ssb155480 1518*19b65a69Ssb155480 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) { 1519*19b65a69Ssb155480 cmn_err(CE_WARN, "!vsw%d: invalid device name: %s", 1520*19b65a69Ssb155480 vswp->instance, vswp->physname); 1521*19b65a69Ssb155480 return (EIO); 1522*19b65a69Ssb155480 } 1523*19b65a69Ssb155480 1524*19b65a69Ssb155480 /* 1525*19b65a69Ssb155480 * Aggregation devices are special in that the device instance 1526*19b65a69Ssb155480 * must be set to zero when they are being mac_open()'ed. 1527*19b65a69Ssb155480 * 1528*19b65a69Ssb155480 * The only way to determine if we are being passed an aggregated 1529*19b65a69Ssb155480 * device is to check the device name. 1530*19b65a69Ssb155480 */ 1531*19b65a69Ssb155480 if (strcmp(drv, "aggr") == 0) { 1532*19b65a69Ssb155480 ddi_instance = 0; 1533*19b65a69Ssb155480 } 1534*19b65a69Ssb155480 1535*19b65a69Ssb155480 rv = mac_open(vswp->physname, ddi_instance, &vswp->mh); 1536*19b65a69Ssb155480 if (rv != 0) { 1537*19b65a69Ssb155480 /* 1538*19b65a69Ssb155480 * If mac_open() failed and the error indicates that the 1539*19b65a69Ssb155480 * device is not available yet, then, we return EAGAIN to 1540*19b65a69Ssb155480 * indicate that it needs to be retried. 1541*19b65a69Ssb155480 * For example, this may happen during boot up, as the 1542*19b65a69Ssb155480 * required link aggregation groups(devices) have not been 1543*19b65a69Ssb155480 * created yet. 1544*19b65a69Ssb155480 */ 1545*19b65a69Ssb155480 if (rv == ENOENT) { 1546*19b65a69Ssb155480 return (EAGAIN); 1547*19b65a69Ssb155480 } else { 1548*19b65a69Ssb155480 cmn_err(CE_WARN, "vsw%d: mac_open %s failed rv:%x", 1549*19b65a69Ssb155480 vswp->instance, vswp->physname, rv); 1550*19b65a69Ssb155480 return (EIO); 1551*19b65a69Ssb155480 } 1552*19b65a69Ssb155480 } 1553*19b65a69Ssb155480 1554*19b65a69Ssb155480 vswp->mac_open_retries = 0; 1555*19b65a69Ssb155480 1556*19b65a69Ssb155480 return (0); 1557*19b65a69Ssb155480 } 1558*19b65a69Ssb155480 1559*19b65a69Ssb155480 /* 1560*19b65a69Ssb155480 * Close the underlying physical device. 1561*19b65a69Ssb155480 */ 1562*19b65a69Ssb155480 static void 1563*19b65a69Ssb155480 vsw_mac_close(vsw_t *vswp) 1564*19b65a69Ssb155480 { 1565*19b65a69Ssb155480 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 1566*19b65a69Ssb155480 1567*19b65a69Ssb155480 if (vswp->mh != NULL) { 1568*19b65a69Ssb155480 mac_close(vswp->mh); 1569*19b65a69Ssb155480 vswp->mh = NULL; 1570*19b65a69Ssb155480 } 1571*19b65a69Ssb155480 } 1572*19b65a69Ssb155480 1573*19b65a69Ssb155480 /* 15741ae08745Sheppo * Link into the MAC layer to gain access to the services provided by 15751ae08745Sheppo * the underlying physical device driver (which should also have 15761ae08745Sheppo * registered with the MAC layer). 15771ae08745Sheppo * 15781ae08745Sheppo * Only when in layer 2 mode. 15791ae08745Sheppo */ 15801ae08745Sheppo static int 15811ae08745Sheppo vsw_mac_attach(vsw_t *vswp) 15821ae08745Sheppo { 15837636cb21Slm66018 D1(vswp, "%s: enter", __func__); 15841ae08745Sheppo 158534683adeSsg70180 ASSERT(vswp->mrh == NULL); 158634683adeSsg70180 ASSERT(vswp->mstarted == B_FALSE); 158734683adeSsg70180 ASSERT(vswp->mresources == B_FALSE); 15881ae08745Sheppo 1589*19b65a69Ssb155480 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 15901ae08745Sheppo 15917636cb21Slm66018 ASSERT(vswp->mh != NULL); 15927636cb21Slm66018 15931ae08745Sheppo D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 15941ae08745Sheppo 15957636cb21Slm66018 if (vsw_multi_ring_enable) { 159634683adeSsg70180 /* 159734683adeSsg70180 * Initialize the ring table. 159834683adeSsg70180 */ 15997636cb21Slm66018 vsw_mac_ring_tbl_init(vswp); 16001ae08745Sheppo 16017636cb21Slm66018 /* 160234683adeSsg70180 * Register our rx callback function. 16037636cb21Slm66018 */ 16047636cb21Slm66018 vswp->mrh = mac_rx_add(vswp->mh, 16057636cb21Slm66018 vsw_rx_queue_cb, (void *)vswp); 160634683adeSsg70180 ASSERT(vswp->mrh != NULL); 16077636cb21Slm66018 16087636cb21Slm66018 /* 16097636cb21Slm66018 * Register our mac resource callback. 16107636cb21Slm66018 */ 16117636cb21Slm66018 mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp); 16127636cb21Slm66018 vswp->mresources = B_TRUE; 16137636cb21Slm66018 16147636cb21Slm66018 /* 16157636cb21Slm66018 * Get the ring resources available to us from 16167636cb21Slm66018 * the mac below us. 16177636cb21Slm66018 */ 16187636cb21Slm66018 mac_resources(vswp->mh); 16197636cb21Slm66018 } else { 16207636cb21Slm66018 /* 16217636cb21Slm66018 * Just register our rx callback function 16227636cb21Slm66018 */ 16237636cb21Slm66018 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 16247636cb21Slm66018 ASSERT(vswp->mrh != NULL); 162534683adeSsg70180 } 16267636cb21Slm66018 16277636cb21Slm66018 /* Get the MAC tx fn */ 16281ae08745Sheppo vswp->txinfo = mac_tx_get(vswp->mh); 16291ae08745Sheppo 16301ae08745Sheppo /* start the interface */ 16311ae08745Sheppo if (mac_start(vswp->mh) != 0) { 163234683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Could not start mac interface", 163334683adeSsg70180 vswp->instance); 16341ae08745Sheppo goto mac_fail_exit; 16351ae08745Sheppo } 16361ae08745Sheppo 16377636cb21Slm66018 vswp->mstarted = B_TRUE; 16387636cb21Slm66018 16397636cb21Slm66018 D1(vswp, "%s: exit", __func__); 16401ae08745Sheppo return (0); 16411ae08745Sheppo 16421ae08745Sheppo mac_fail_exit: 16437636cb21Slm66018 vsw_mac_detach(vswp); 16441ae08745Sheppo 16457636cb21Slm66018 D1(vswp, "%s: exit", __func__); 16461ae08745Sheppo return (1); 16471ae08745Sheppo } 16481ae08745Sheppo 16491ae08745Sheppo static void 16501ae08745Sheppo vsw_mac_detach(vsw_t *vswp) 16511ae08745Sheppo { 16521ae08745Sheppo D1(vswp, "vsw_mac_detach: enter"); 16531ae08745Sheppo 16547636cb21Slm66018 ASSERT(vswp != NULL); 1655*19b65a69Ssb155480 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 16567636cb21Slm66018 16577636cb21Slm66018 if (vsw_multi_ring_enable) { 16587636cb21Slm66018 vsw_mac_ring_tbl_destroy(vswp); 16597636cb21Slm66018 } 16607636cb21Slm66018 1661b9a6d57aSsg70180 if (vswp->mh != NULL) { 16627636cb21Slm66018 if (vswp->mstarted) 16637636cb21Slm66018 mac_stop(vswp->mh); 16641ae08745Sheppo if (vswp->mrh != NULL) 16651f8aaf0dSethindra mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 16667636cb21Slm66018 if (vswp->mresources) 16677636cb21Slm66018 mac_resource_set(vswp->mh, NULL, NULL); 1668b9a6d57aSsg70180 } 16691ae08745Sheppo 16701ae08745Sheppo vswp->mrh = NULL; 16711ae08745Sheppo vswp->txinfo = NULL; 16727636cb21Slm66018 vswp->mstarted = B_FALSE; 16731ae08745Sheppo 16741ae08745Sheppo D1(vswp, "vsw_mac_detach: exit"); 16751ae08745Sheppo } 16761ae08745Sheppo 16771ae08745Sheppo /* 1678e1ebb9ecSlm66018 * Depending on the mode specified, the capabilites and capacity 1679e1ebb9ecSlm66018 * of the underlying device setup the physical device. 16801ae08745Sheppo * 1681e1ebb9ecSlm66018 * If in layer 3 mode, then do nothing. 1682e1ebb9ecSlm66018 * 1683e1ebb9ecSlm66018 * If in layer 2 programmed mode attempt to program the unicast address 1684e1ebb9ecSlm66018 * associated with the port into the physical device. If this is not 1685e1ebb9ecSlm66018 * possible due to resource exhaustion or simply because the device does 1686e1ebb9ecSlm66018 * not support multiple unicast addresses then if required fallback onto 1687e1ebb9ecSlm66018 * putting the card into promisc mode. 1688e1ebb9ecSlm66018 * 1689e1ebb9ecSlm66018 * If in promisc mode then simply set the card into promisc mode. 1690e1ebb9ecSlm66018 * 1691e1ebb9ecSlm66018 * Returns 0 success, 1 on failure. 16921ae08745Sheppo */ 1693e1ebb9ecSlm66018 static int 16945f94e909Ssg70180 vsw_set_hw(vsw_t *vswp, vsw_port_t *port, int type) 16951ae08745Sheppo { 1696e1ebb9ecSlm66018 mac_multi_addr_t mac_addr; 1697e1ebb9ecSlm66018 int err; 16981ae08745Sheppo 1699e1ebb9ecSlm66018 D1(vswp, "%s: enter", __func__); 1700e1ebb9ecSlm66018 17015f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 17025f94e909Ssg70180 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 17035f94e909Ssg70180 1704e1ebb9ecSlm66018 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 1705e1ebb9ecSlm66018 return (0); 1706e1ebb9ecSlm66018 1707e1ebb9ecSlm66018 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) { 17085f94e909Ssg70180 return (vsw_set_hw_promisc(vswp, port, type)); 1709e1ebb9ecSlm66018 } 1710e1ebb9ecSlm66018 1711e1ebb9ecSlm66018 /* 1712e1ebb9ecSlm66018 * Attempt to program the unicast address into the HW. 1713e1ebb9ecSlm66018 */ 1714e1ebb9ecSlm66018 mac_addr.mma_addrlen = ETHERADDRL; 17155f94e909Ssg70180 if (type == VSW_VNETPORT) { 17165f94e909Ssg70180 ASSERT(port != NULL); 1717e1ebb9ecSlm66018 ether_copy(&port->p_macaddr, &mac_addr.mma_addr); 17185f94e909Ssg70180 } else { 17195f94e909Ssg70180 ether_copy(&vswp->if_addr, &mac_addr.mma_addr); 17205f94e909Ssg70180 } 1721e1ebb9ecSlm66018 17225f94e909Ssg70180 err = vsw_set_hw_addr(vswp, &mac_addr); 1723*19b65a69Ssb155480 if (err == ENOSPC) { 1724e1ebb9ecSlm66018 /* 1725e1ebb9ecSlm66018 * Mark that attempt should be made to re-config sometime 1726e1ebb9ecSlm66018 * in future if a port is deleted. 1727e1ebb9ecSlm66018 */ 1728e1ebb9ecSlm66018 vswp->recfg_reqd = B_TRUE; 1729e1ebb9ecSlm66018 1730e1ebb9ecSlm66018 /* 1731e1ebb9ecSlm66018 * Only 1 mode specified, nothing more to do. 1732e1ebb9ecSlm66018 */ 1733e1ebb9ecSlm66018 if (vswp->smode_num == 1) 1734e1ebb9ecSlm66018 return (err); 1735e1ebb9ecSlm66018 1736e1ebb9ecSlm66018 /* 1737e1ebb9ecSlm66018 * If promiscuous was next mode specified try to 1738e1ebb9ecSlm66018 * set the card into that mode. 1739e1ebb9ecSlm66018 */ 1740e1ebb9ecSlm66018 if ((vswp->smode_idx <= (vswp->smode_num - 2)) && 1741205eeb1aSlm66018 (vswp->smode[vswp->smode_idx + 1] == 1742205eeb1aSlm66018 VSW_LAYER2_PROMISC)) { 1743e1ebb9ecSlm66018 vswp->smode_idx += 1; 17445f94e909Ssg70180 return (vsw_set_hw_promisc(vswp, port, type)); 1745e1ebb9ecSlm66018 } 1746e1ebb9ecSlm66018 return (err); 1747e1ebb9ecSlm66018 } 1748e1ebb9ecSlm66018 1749*19b65a69Ssb155480 if (err != 0) 1750*19b65a69Ssb155480 return (err); 1751*19b65a69Ssb155480 17525f94e909Ssg70180 if (type == VSW_VNETPORT) { 1753e1ebb9ecSlm66018 port->addr_slot = mac_addr.mma_slot; 1754e1ebb9ecSlm66018 port->addr_set = VSW_ADDR_HW; 17555f94e909Ssg70180 } else { 17565f94e909Ssg70180 vswp->addr_slot = mac_addr.mma_slot; 17575f94e909Ssg70180 vswp->addr_set = VSW_ADDR_HW; 17585f94e909Ssg70180 } 1759e1ebb9ecSlm66018 1760*19b65a69Ssb155480 D2(vswp, "programmed addr %s into slot %d " 1761*19b65a69Ssb155480 "of device %s", ether_sprintf((void *)mac_addr.mma_addr), 17625f94e909Ssg70180 mac_addr.mma_slot, vswp->physname); 1763e1ebb9ecSlm66018 1764e1ebb9ecSlm66018 D1(vswp, "%s: exit", __func__); 1765e1ebb9ecSlm66018 1766e1ebb9ecSlm66018 return (0); 1767e1ebb9ecSlm66018 } 1768e1ebb9ecSlm66018 1769e1ebb9ecSlm66018 /* 1770e1ebb9ecSlm66018 * If in layer 3 mode do nothing. 1771e1ebb9ecSlm66018 * 1772e1ebb9ecSlm66018 * If in layer 2 switched mode remove the address from the physical 1773e1ebb9ecSlm66018 * device. 1774e1ebb9ecSlm66018 * 1775e1ebb9ecSlm66018 * If in layer 2 promiscuous mode disable promisc mode. 1776e1ebb9ecSlm66018 * 1777e1ebb9ecSlm66018 * Returns 0 on success. 1778e1ebb9ecSlm66018 */ 1779e1ebb9ecSlm66018 static int 17805f94e909Ssg70180 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port, int type) 1781e1ebb9ecSlm66018 { 17825f94e909Ssg70180 mac_addr_slot_t slot; 17835f94e909Ssg70180 int rv; 1784e1ebb9ecSlm66018 1785e1ebb9ecSlm66018 D1(vswp, "%s: enter", __func__); 1786e1ebb9ecSlm66018 17875f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 17885f94e909Ssg70180 1789e1ebb9ecSlm66018 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 1790e1ebb9ecSlm66018 return (0); 1791e1ebb9ecSlm66018 17925f94e909Ssg70180 switch (type) { 17935f94e909Ssg70180 case VSW_VNETPORT: 17945f94e909Ssg70180 ASSERT(port != NULL); 17955f94e909Ssg70180 1796e1ebb9ecSlm66018 if (port->addr_set == VSW_ADDR_PROMISC) { 17975f94e909Ssg70180 return (vsw_unset_hw_promisc(vswp, port, type)); 17985f94e909Ssg70180 17995f94e909Ssg70180 } else if (port->addr_set == VSW_ADDR_HW) { 18005f94e909Ssg70180 slot = port->addr_slot; 18015f94e909Ssg70180 if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0) 18025f94e909Ssg70180 port->addr_set = VSW_ADDR_UNSET; 1803e1ebb9ecSlm66018 } 1804e1ebb9ecSlm66018 18055f94e909Ssg70180 break; 18065f94e909Ssg70180 18075f94e909Ssg70180 case VSW_LOCALDEV: 18085f94e909Ssg70180 if (vswp->addr_set == VSW_ADDR_PROMISC) { 18095f94e909Ssg70180 return (vsw_unset_hw_promisc(vswp, NULL, type)); 18105f94e909Ssg70180 18115f94e909Ssg70180 } else if (vswp->addr_set == VSW_ADDR_HW) { 18125f94e909Ssg70180 slot = vswp->addr_slot; 18135f94e909Ssg70180 if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0) 18145f94e909Ssg70180 vswp->addr_set = VSW_ADDR_UNSET; 18155f94e909Ssg70180 } 18165f94e909Ssg70180 18175f94e909Ssg70180 break; 18185f94e909Ssg70180 18195f94e909Ssg70180 default: 18205f94e909Ssg70180 /* should never happen */ 18215f94e909Ssg70180 DERR(vswp, "%s: unknown type %d", __func__, type); 18225f94e909Ssg70180 ASSERT(0); 18235f94e909Ssg70180 return (1); 18245f94e909Ssg70180 } 18255f94e909Ssg70180 18265f94e909Ssg70180 D1(vswp, "%s: exit", __func__); 18275f94e909Ssg70180 return (rv); 18285f94e909Ssg70180 } 18295f94e909Ssg70180 18305f94e909Ssg70180 /* 18315f94e909Ssg70180 * Attempt to program a unicast address into HW. 18325f94e909Ssg70180 * 18335f94e909Ssg70180 * Returns 0 on sucess, 1 on failure. 18345f94e909Ssg70180 */ 18355f94e909Ssg70180 static int 18365f94e909Ssg70180 vsw_set_hw_addr(vsw_t *vswp, mac_multi_addr_t *mac) 18375f94e909Ssg70180 { 18385f94e909Ssg70180 void *mah; 1839*19b65a69Ssb155480 int rv = EINVAL; 18405f94e909Ssg70180 18415f94e909Ssg70180 D1(vswp, "%s: enter", __func__); 18425f94e909Ssg70180 18435f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 18445f94e909Ssg70180 1845e1ebb9ecSlm66018 if (vswp->maddr.maddr_handle == NULL) 1846*19b65a69Ssb155480 return (rv); 1847e1ebb9ecSlm66018 1848e1ebb9ecSlm66018 mah = vswp->maddr.maddr_handle; 1849e1ebb9ecSlm66018 18505f94e909Ssg70180 rv = vswp->maddr.maddr_add(mah, mac); 18515f94e909Ssg70180 18525f94e909Ssg70180 if (rv == 0) 1853*19b65a69Ssb155480 return (rv); 18545f94e909Ssg70180 18555f94e909Ssg70180 /* 18565f94e909Ssg70180 * Its okay for the add to fail because we have exhausted 18575f94e909Ssg70180 * all the resouces in the hardware device. Any other error 18585f94e909Ssg70180 * we want to flag. 18595f94e909Ssg70180 */ 18605f94e909Ssg70180 if (rv != ENOSPC) { 18615f94e909Ssg70180 cmn_err(CE_WARN, "!vsw%d: error programming " 1862*19b65a69Ssb155480 "address %s into HW err (%d)", 1863*19b65a69Ssb155480 vswp->instance, ether_sprintf((void *)mac->mma_addr), rv); 18645f94e909Ssg70180 } 18655f94e909Ssg70180 D1(vswp, "%s: exit", __func__); 1866*19b65a69Ssb155480 return (rv); 1867e1ebb9ecSlm66018 } 1868e1ebb9ecSlm66018 18695f94e909Ssg70180 /* 18705f94e909Ssg70180 * Remove a unicast mac address which has previously been programmed 18715f94e909Ssg70180 * into HW. 18725f94e909Ssg70180 * 18735f94e909Ssg70180 * Returns 0 on sucess, 1 on failure. 18745f94e909Ssg70180 */ 18755f94e909Ssg70180 static int 18765f94e909Ssg70180 vsw_unset_hw_addr(vsw_t *vswp, int slot) 18775f94e909Ssg70180 { 18785f94e909Ssg70180 void *mah; 18795f94e909Ssg70180 int rv; 1880e1ebb9ecSlm66018 18815f94e909Ssg70180 D1(vswp, "%s: enter", __func__); 18825f94e909Ssg70180 18835f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 18845f94e909Ssg70180 ASSERT(slot >= 0); 18855f94e909Ssg70180 18865f94e909Ssg70180 if (vswp->maddr.maddr_handle == NULL) 18875f94e909Ssg70180 return (1); 18885f94e909Ssg70180 18895f94e909Ssg70180 mah = vswp->maddr.maddr_handle; 18905f94e909Ssg70180 18915f94e909Ssg70180 rv = vswp->maddr.maddr_remove(mah, slot); 18925f94e909Ssg70180 if (rv != 0) { 18935f94e909Ssg70180 cmn_err(CE_WARN, "!vsw%d: unable to remove address " 18945f94e909Ssg70180 "from slot %d in device %s (err %d)", 18955f94e909Ssg70180 vswp->instance, slot, vswp->physname, rv); 18965f94e909Ssg70180 return (1); 1897e1ebb9ecSlm66018 } 1898e1ebb9ecSlm66018 18995f94e909Ssg70180 D2(vswp, "removed addr from slot %d in device %s", 19005f94e909Ssg70180 slot, vswp->physname); 19015f94e909Ssg70180 1902e1ebb9ecSlm66018 D1(vswp, "%s: exit", __func__); 1903e1ebb9ecSlm66018 return (0); 1904e1ebb9ecSlm66018 } 1905e1ebb9ecSlm66018 1906e1ebb9ecSlm66018 /* 1907e1ebb9ecSlm66018 * Set network card into promisc mode. 1908e1ebb9ecSlm66018 * 1909e1ebb9ecSlm66018 * Returns 0 on success, 1 on failure. 1910e1ebb9ecSlm66018 */ 1911e1ebb9ecSlm66018 static int 19125f94e909Ssg70180 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type) 1913e1ebb9ecSlm66018 { 1914e1ebb9ecSlm66018 D1(vswp, "%s: enter", __func__); 1915e1ebb9ecSlm66018 19165f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 19175f94e909Ssg70180 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 19185f94e909Ssg70180 191934683adeSsg70180 mutex_enter(&vswp->mac_lock); 192034683adeSsg70180 if (vswp->mh == NULL) { 192134683adeSsg70180 mutex_exit(&vswp->mac_lock); 1922e1ebb9ecSlm66018 return (1); 192334683adeSsg70180 } 1924e1ebb9ecSlm66018 1925e1ebb9ecSlm66018 if (vswp->promisc_cnt++ == 0) { 1926e1ebb9ecSlm66018 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 1927e1ebb9ecSlm66018 vswp->promisc_cnt--; 192834683adeSsg70180 mutex_exit(&vswp->mac_lock); 1929e1ebb9ecSlm66018 return (1); 1930e1ebb9ecSlm66018 } 193134683adeSsg70180 cmn_err(CE_NOTE, "!vsw%d: switching device %s into " 193234683adeSsg70180 "promiscuous mode", vswp->instance, vswp->physname); 1933e1ebb9ecSlm66018 } 193434683adeSsg70180 mutex_exit(&vswp->mac_lock); 19355f94e909Ssg70180 19365f94e909Ssg70180 if (type == VSW_VNETPORT) { 19375f94e909Ssg70180 ASSERT(port != NULL); 1938e1ebb9ecSlm66018 port->addr_set = VSW_ADDR_PROMISC; 19395f94e909Ssg70180 } else { 19405f94e909Ssg70180 vswp->addr_set = VSW_ADDR_PROMISC; 19415f94e909Ssg70180 } 1942e1ebb9ecSlm66018 1943e1ebb9ecSlm66018 D1(vswp, "%s: exit", __func__); 1944e1ebb9ecSlm66018 1945e1ebb9ecSlm66018 return (0); 1946e1ebb9ecSlm66018 } 1947e1ebb9ecSlm66018 1948e1ebb9ecSlm66018 /* 1949e1ebb9ecSlm66018 * Turn off promiscuous mode on network card. 1950e1ebb9ecSlm66018 * 1951e1ebb9ecSlm66018 * Returns 0 on success, 1 on failure. 1952e1ebb9ecSlm66018 */ 1953e1ebb9ecSlm66018 static int 19545f94e909Ssg70180 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type) 1955e1ebb9ecSlm66018 { 1956e1ebb9ecSlm66018 vsw_port_list_t *plist = &vswp->plist; 1957e1ebb9ecSlm66018 195834683adeSsg70180 D2(vswp, "%s: enter", __func__); 1959e1ebb9ecSlm66018 19605f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 19615f94e909Ssg70180 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 19625f94e909Ssg70180 196334683adeSsg70180 mutex_enter(&vswp->mac_lock); 196434683adeSsg70180 if (vswp->mh == NULL) { 196534683adeSsg70180 mutex_exit(&vswp->mac_lock); 1966e1ebb9ecSlm66018 return (1); 196734683adeSsg70180 } 1968e1ebb9ecSlm66018 1969e1ebb9ecSlm66018 if (--vswp->promisc_cnt == 0) { 1970e1ebb9ecSlm66018 if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) { 1971e1ebb9ecSlm66018 vswp->promisc_cnt++; 197234683adeSsg70180 mutex_exit(&vswp->mac_lock); 1973e1ebb9ecSlm66018 return (1); 1974e1ebb9ecSlm66018 } 1975e1ebb9ecSlm66018 1976e1ebb9ecSlm66018 /* 1977e1ebb9ecSlm66018 * We are exiting promisc mode either because we were 1978e1ebb9ecSlm66018 * only in promisc mode because we had failed over from 1979e1ebb9ecSlm66018 * switched mode due to HW resource issues, or the user 1980e1ebb9ecSlm66018 * wanted the card in promisc mode for all the ports and 1981e1ebb9ecSlm66018 * the last port is now being deleted. Tweak the message 1982e1ebb9ecSlm66018 * accordingly. 1983e1ebb9ecSlm66018 */ 1984e1ebb9ecSlm66018 if (plist->num_ports != 0) { 198534683adeSsg70180 cmn_err(CE_NOTE, "!vsw%d: switching device %s back to " 1986205eeb1aSlm66018 "programmed mode", vswp->instance, vswp->physname); 19871ae08745Sheppo } else { 198834683adeSsg70180 cmn_err(CE_NOTE, "!vsw%d: switching device %s out of " 1989205eeb1aSlm66018 "promiscuous mode", vswp->instance, vswp->physname); 19901ae08745Sheppo } 19911ae08745Sheppo } 199234683adeSsg70180 mutex_exit(&vswp->mac_lock); 19935f94e909Ssg70180 19945f94e909Ssg70180 if (type == VSW_VNETPORT) { 19955f94e909Ssg70180 ASSERT(port != NULL); 19965f94e909Ssg70180 ASSERT(port->addr_set == VSW_ADDR_PROMISC); 1997e1ebb9ecSlm66018 port->addr_set = VSW_ADDR_UNSET; 19985f94e909Ssg70180 } else { 19995f94e909Ssg70180 ASSERT(vswp->addr_set == VSW_ADDR_PROMISC); 20005f94e909Ssg70180 vswp->addr_set = VSW_ADDR_UNSET; 20015f94e909Ssg70180 } 2002e1ebb9ecSlm66018 2003e1ebb9ecSlm66018 D1(vswp, "%s: exit", __func__); 2004e1ebb9ecSlm66018 return (0); 2005e1ebb9ecSlm66018 } 2006e1ebb9ecSlm66018 2007e1ebb9ecSlm66018 /* 2008e1ebb9ecSlm66018 * Determine whether or not we are operating in our prefered 2009e1ebb9ecSlm66018 * mode and if not whether the physical resources now allow us 2010e1ebb9ecSlm66018 * to operate in it. 2011e1ebb9ecSlm66018 * 20125f94e909Ssg70180 * If a port is being removed should only be invoked after port has been 2013e1ebb9ecSlm66018 * removed from the port list. 2014e1ebb9ecSlm66018 */ 20155f94e909Ssg70180 static void 2016e1ebb9ecSlm66018 vsw_reconfig_hw(vsw_t *vswp) 2017e1ebb9ecSlm66018 { 2018e1ebb9ecSlm66018 int s_idx; 2019e1ebb9ecSlm66018 2020e1ebb9ecSlm66018 D1(vswp, "%s: enter", __func__); 2021e1ebb9ecSlm66018 20225f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 2023e1ebb9ecSlm66018 20245f94e909Ssg70180 if (vswp->maddr.maddr_handle == NULL) { 20255f94e909Ssg70180 return; 20265f94e909Ssg70180 } 2027e1ebb9ecSlm66018 2028e1ebb9ecSlm66018 /* 2029e1ebb9ecSlm66018 * If we are in layer 2 (i.e. switched) or would like to be 20305f94e909Ssg70180 * in layer 2 then check if any ports or the vswitch itself 20315f94e909Ssg70180 * need to be programmed into the HW. 2032e1ebb9ecSlm66018 * 2033e1ebb9ecSlm66018 * This can happen in two cases - switched was specified as 2034e1ebb9ecSlm66018 * the prefered mode of operation but we exhausted the HW 2035e1ebb9ecSlm66018 * resources and so failed over to the next specifed mode, 2036e1ebb9ecSlm66018 * or switched was the only mode specified so after HW 2037e1ebb9ecSlm66018 * resources were exhausted there was nothing more we 2038e1ebb9ecSlm66018 * could do. 2039e1ebb9ecSlm66018 */ 2040e1ebb9ecSlm66018 if (vswp->smode_idx > 0) 2041e1ebb9ecSlm66018 s_idx = vswp->smode_idx - 1; 2042e1ebb9ecSlm66018 else 2043e1ebb9ecSlm66018 s_idx = vswp->smode_idx; 2044e1ebb9ecSlm66018 20455f94e909Ssg70180 if (vswp->smode[s_idx] != VSW_LAYER2) { 20465f94e909Ssg70180 return; 20475f94e909Ssg70180 } 2048e1ebb9ecSlm66018 2049e1ebb9ecSlm66018 D2(vswp, "%s: attempting reconfig..", __func__); 2050e1ebb9ecSlm66018 2051e1ebb9ecSlm66018 /* 20525f94e909Ssg70180 * First, attempt to set the vswitch mac address into HW, 20535f94e909Ssg70180 * if required. 2054e1ebb9ecSlm66018 */ 20555f94e909Ssg70180 if (vsw_prog_if(vswp)) { 20565f94e909Ssg70180 return; 2057e1ebb9ecSlm66018 } 2058e1ebb9ecSlm66018 2059e1ebb9ecSlm66018 /* 20605f94e909Ssg70180 * Next, attempt to set any ports which have not yet been 20615f94e909Ssg70180 * programmed into HW. 2062e1ebb9ecSlm66018 */ 20635f94e909Ssg70180 if (vsw_prog_ports(vswp)) { 20645f94e909Ssg70180 return; 2065e1ebb9ecSlm66018 } 2066e1ebb9ecSlm66018 20675f94e909Ssg70180 /* 20685f94e909Ssg70180 * By now we know that have programmed all desired ports etc 20695f94e909Ssg70180 * into HW, so safe to mark reconfiguration as complete. 20705f94e909Ssg70180 */ 2071e1ebb9ecSlm66018 vswp->recfg_reqd = B_FALSE; 2072e1ebb9ecSlm66018 2073e1ebb9ecSlm66018 vswp->smode_idx = s_idx; 2074e1ebb9ecSlm66018 20755f94e909Ssg70180 D1(vswp, "%s: exit", __func__); 20765f94e909Ssg70180 } 20775f94e909Ssg70180 20785f94e909Ssg70180 /* 20795f94e909Ssg70180 * Check to see if vsw itself is plumbed, and if so whether or not 20805f94e909Ssg70180 * its mac address should be written into HW. 20815f94e909Ssg70180 * 20825f94e909Ssg70180 * Returns 0 if could set address, or didn't have to set it. 20835f94e909Ssg70180 * Returns 1 if failed to set address. 20845f94e909Ssg70180 */ 20855f94e909Ssg70180 static int 20865f94e909Ssg70180 vsw_prog_if(vsw_t *vswp) 20875f94e909Ssg70180 { 20885f94e909Ssg70180 mac_multi_addr_t addr; 20895f94e909Ssg70180 20905f94e909Ssg70180 D1(vswp, "%s: enter", __func__); 20915f94e909Ssg70180 20925f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 20935f94e909Ssg70180 20945f94e909Ssg70180 READ_ENTER(&vswp->if_lockrw); 20955f94e909Ssg70180 if ((vswp->if_state & VSW_IF_UP) && 20965f94e909Ssg70180 (vswp->addr_set != VSW_ADDR_HW)) { 20975f94e909Ssg70180 20985f94e909Ssg70180 addr.mma_addrlen = ETHERADDRL; 20995f94e909Ssg70180 ether_copy(&vswp->if_addr, &addr.mma_addr); 21005f94e909Ssg70180 21015f94e909Ssg70180 if (vsw_set_hw_addr(vswp, &addr) != 0) { 21025f94e909Ssg70180 RW_EXIT(&vswp->if_lockrw); 21035f94e909Ssg70180 return (1); 21045f94e909Ssg70180 } 21055f94e909Ssg70180 21065f94e909Ssg70180 vswp->addr_slot = addr.mma_slot; 21075f94e909Ssg70180 21085f94e909Ssg70180 /* 21095f94e909Ssg70180 * If previously when plumbed had had to place 21105f94e909Ssg70180 * interface into promisc mode, now reverse that. 21115f94e909Ssg70180 * 21125f94e909Ssg70180 * Note that interface will only actually be set into 21135f94e909Ssg70180 * non-promisc mode when last port/interface has been 21145f94e909Ssg70180 * programmed into HW. 21155f94e909Ssg70180 */ 21165f94e909Ssg70180 if (vswp->addr_set == VSW_ADDR_PROMISC) 21175f94e909Ssg70180 (void) vsw_unset_hw_promisc(vswp, NULL, VSW_LOCALDEV); 21185f94e909Ssg70180 21195f94e909Ssg70180 vswp->addr_set = VSW_ADDR_HW; 21205f94e909Ssg70180 } 21215f94e909Ssg70180 RW_EXIT(&vswp->if_lockrw); 21225f94e909Ssg70180 21235f94e909Ssg70180 D1(vswp, "%s: exit", __func__); 2124e1ebb9ecSlm66018 return (0); 2125e1ebb9ecSlm66018 } 2126e1ebb9ecSlm66018 21275f94e909Ssg70180 /* 21285f94e909Ssg70180 * Scan the port list for any ports which have not yet been set 21295f94e909Ssg70180 * into HW. For those found attempt to program their mac addresses 21305f94e909Ssg70180 * into the physical device. 21315f94e909Ssg70180 * 21325f94e909Ssg70180 * Returns 0 if able to program all required ports (can be 0) into HW. 21335f94e909Ssg70180 * Returns 1 if failed to set at least one mac address. 21345f94e909Ssg70180 */ 21355f94e909Ssg70180 static int 21365f94e909Ssg70180 vsw_prog_ports(vsw_t *vswp) 21375f94e909Ssg70180 { 21385f94e909Ssg70180 mac_multi_addr_t addr; 21395f94e909Ssg70180 vsw_port_list_t *plist = &vswp->plist; 21405f94e909Ssg70180 vsw_port_t *tp; 21415f94e909Ssg70180 int rv = 0; 21425f94e909Ssg70180 21435f94e909Ssg70180 D1(vswp, "%s: enter", __func__); 21445f94e909Ssg70180 21455f94e909Ssg70180 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 21465f94e909Ssg70180 21475f94e909Ssg70180 READ_ENTER(&plist->lockrw); 21485f94e909Ssg70180 for (tp = plist->head; tp != NULL; tp = tp->p_next) { 21495f94e909Ssg70180 if (tp->addr_set != VSW_ADDR_HW) { 21505f94e909Ssg70180 addr.mma_addrlen = ETHERADDRL; 21515f94e909Ssg70180 ether_copy(&tp->p_macaddr, &addr.mma_addr); 21525f94e909Ssg70180 21535f94e909Ssg70180 if (vsw_set_hw_addr(vswp, &addr) != 0) { 21545f94e909Ssg70180 rv = 1; 21555f94e909Ssg70180 break; 21565f94e909Ssg70180 } 21575f94e909Ssg70180 21585f94e909Ssg70180 tp->addr_slot = addr.mma_slot; 21595f94e909Ssg70180 21605f94e909Ssg70180 /* 21615f94e909Ssg70180 * If when this port had first attached we had 21625f94e909Ssg70180 * had to place the interface into promisc mode, 21635f94e909Ssg70180 * then now reverse that. 21645f94e909Ssg70180 * 21655f94e909Ssg70180 * Note that the interface will not actually 21665f94e909Ssg70180 * change to non-promisc mode until all ports 21675f94e909Ssg70180 * have been programmed. 21685f94e909Ssg70180 */ 21695f94e909Ssg70180 if (tp->addr_set == VSW_ADDR_PROMISC) 21705f94e909Ssg70180 (void) vsw_unset_hw_promisc(vswp, 21715f94e909Ssg70180 tp, VSW_VNETPORT); 21725f94e909Ssg70180 21735f94e909Ssg70180 tp->addr_set = VSW_ADDR_HW; 21745f94e909Ssg70180 } 21755f94e909Ssg70180 } 21765f94e909Ssg70180 RW_EXIT(&plist->lockrw); 21775f94e909Ssg70180 21785f94e909Ssg70180 D1(vswp, "%s: exit", __func__); 2179e1ebb9ecSlm66018 return (rv); 21801ae08745Sheppo } 21811ae08745Sheppo 21827636cb21Slm66018 static void 21837636cb21Slm66018 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp) 21847636cb21Slm66018 { 21857636cb21Slm66018 ringp->ring_state = VSW_MAC_RING_FREE; 21867636cb21Slm66018 ringp->ring_arg = NULL; 21877636cb21Slm66018 ringp->ring_blank = NULL; 21887636cb21Slm66018 ringp->ring_vqp = NULL; 21897636cb21Slm66018 ringp->ring_vswp = vswp; 21907636cb21Slm66018 } 21917636cb21Slm66018 21927636cb21Slm66018 static void 21937636cb21Slm66018 vsw_mac_ring_tbl_init(vsw_t *vswp) 21947636cb21Slm66018 { 21957636cb21Slm66018 int i; 21967636cb21Slm66018 21977636cb21Slm66018 mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL); 21987636cb21Slm66018 21997636cb21Slm66018 vswp->mac_ring_tbl_sz = vsw_mac_rx_rings; 22007636cb21Slm66018 vswp->mac_ring_tbl = 2201205eeb1aSlm66018 kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t), KM_SLEEP); 22027636cb21Slm66018 22037636cb21Slm66018 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) 22047636cb21Slm66018 vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]); 22057636cb21Slm66018 } 22067636cb21Slm66018 22077636cb21Slm66018 static void 22087636cb21Slm66018 vsw_mac_ring_tbl_destroy(vsw_t *vswp) 22097636cb21Slm66018 { 22107636cb21Slm66018 int i; 221134683adeSsg70180 vsw_mac_ring_t *ringp; 22127636cb21Slm66018 22137636cb21Slm66018 mutex_enter(&vswp->mac_ring_lock); 22147636cb21Slm66018 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 221534683adeSsg70180 ringp = &vswp->mac_ring_tbl[i]; 221634683adeSsg70180 221734683adeSsg70180 if (ringp->ring_state != VSW_MAC_RING_FREE) { 22187636cb21Slm66018 /* 22197636cb21Slm66018 * Destroy the queue. 22207636cb21Slm66018 */ 222134683adeSsg70180 vsw_queue_stop(ringp->ring_vqp); 222234683adeSsg70180 vsw_queue_destroy(ringp->ring_vqp); 22237636cb21Slm66018 22247636cb21Slm66018 /* 22257636cb21Slm66018 * Re-initialize the structure. 22267636cb21Slm66018 */ 222734683adeSsg70180 vsw_mac_ring_tbl_entry_init(vswp, ringp); 22287636cb21Slm66018 } 22297636cb21Slm66018 } 22307636cb21Slm66018 mutex_exit(&vswp->mac_ring_lock); 22317636cb21Slm66018 22327636cb21Slm66018 mutex_destroy(&vswp->mac_ring_lock); 22337636cb21Slm66018 kmem_free(vswp->mac_ring_tbl, 22347636cb21Slm66018 vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t)); 22357636cb21Slm66018 vswp->mac_ring_tbl_sz = 0; 22367636cb21Slm66018 } 22377636cb21Slm66018 22387636cb21Slm66018 /* 22397636cb21Slm66018 * Handle resource add callbacks from the driver below. 22407636cb21Slm66018 */ 22417636cb21Slm66018 static mac_resource_handle_t 22427636cb21Slm66018 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp) 22437636cb21Slm66018 { 22447636cb21Slm66018 vsw_t *vswp = (vsw_t *)arg; 22457636cb21Slm66018 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 22467636cb21Slm66018 vsw_mac_ring_t *ringp; 22477636cb21Slm66018 vsw_queue_t *vqp; 22487636cb21Slm66018 int i; 22497636cb21Slm66018 22507636cb21Slm66018 ASSERT(vswp != NULL); 22517636cb21Slm66018 ASSERT(mrp != NULL); 22527636cb21Slm66018 ASSERT(vswp->mac_ring_tbl != NULL); 22537636cb21Slm66018 22547636cb21Slm66018 D1(vswp, "%s: enter", __func__); 22557636cb21Slm66018 22567636cb21Slm66018 /* 22577636cb21Slm66018 * Check to make sure we have the correct resource type. 22587636cb21Slm66018 */ 22597636cb21Slm66018 if (mrp->mr_type != MAC_RX_FIFO) 22607636cb21Slm66018 return (NULL); 22617636cb21Slm66018 22627636cb21Slm66018 /* 22637636cb21Slm66018 * Find a open entry in the ring table. 22647636cb21Slm66018 */ 22657636cb21Slm66018 mutex_enter(&vswp->mac_ring_lock); 22667636cb21Slm66018 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 22677636cb21Slm66018 ringp = &vswp->mac_ring_tbl[i]; 22687636cb21Slm66018 22697636cb21Slm66018 /* 22707636cb21Slm66018 * Check for an empty slot, if found, then setup queue 22717636cb21Slm66018 * and thread. 22727636cb21Slm66018 */ 22737636cb21Slm66018 if (ringp->ring_state == VSW_MAC_RING_FREE) { 22747636cb21Slm66018 /* 22757636cb21Slm66018 * Create the queue for this ring. 22767636cb21Slm66018 */ 22777636cb21Slm66018 vqp = vsw_queue_create(); 22787636cb21Slm66018 22797636cb21Slm66018 /* 22807636cb21Slm66018 * Initialize the ring data structure. 22817636cb21Slm66018 */ 22827636cb21Slm66018 ringp->ring_vqp = vqp; 22837636cb21Slm66018 ringp->ring_arg = mrfp->mrf_arg; 22847636cb21Slm66018 ringp->ring_blank = mrfp->mrf_blank; 22857636cb21Slm66018 ringp->ring_state = VSW_MAC_RING_INUSE; 22867636cb21Slm66018 22877636cb21Slm66018 /* 22887636cb21Slm66018 * Create the worker thread. 22897636cb21Slm66018 */ 22907636cb21Slm66018 vqp->vq_worker = thread_create(NULL, 0, 22917636cb21Slm66018 vsw_queue_worker, ringp, 0, &p0, 22927636cb21Slm66018 TS_RUN, minclsyspri); 22937636cb21Slm66018 if (vqp->vq_worker == NULL) { 22947636cb21Slm66018 vsw_queue_destroy(vqp); 22957636cb21Slm66018 vsw_mac_ring_tbl_entry_init(vswp, ringp); 22967636cb21Slm66018 ringp = NULL; 22977636cb21Slm66018 } 22987636cb21Slm66018 229934683adeSsg70180 if (ringp != NULL) { 230034683adeSsg70180 /* 230134683adeSsg70180 * Make sure thread get's running state for 230234683adeSsg70180 * this ring. 230334683adeSsg70180 */ 230434683adeSsg70180 mutex_enter(&vqp->vq_lock); 230534683adeSsg70180 while ((vqp->vq_state != VSW_QUEUE_RUNNING) && 230634683adeSsg70180 (vqp->vq_state != VSW_QUEUE_DRAINED)) { 230734683adeSsg70180 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 230834683adeSsg70180 } 230934683adeSsg70180 231034683adeSsg70180 /* 231134683adeSsg70180 * If the thread is not running, cleanup. 231234683adeSsg70180 */ 231334683adeSsg70180 if (vqp->vq_state == VSW_QUEUE_DRAINED) { 231434683adeSsg70180 vsw_queue_destroy(vqp); 231534683adeSsg70180 vsw_mac_ring_tbl_entry_init(vswp, 231634683adeSsg70180 ringp); 231734683adeSsg70180 ringp = NULL; 231834683adeSsg70180 } 231934683adeSsg70180 mutex_exit(&vqp->vq_lock); 232034683adeSsg70180 } 232134683adeSsg70180 23227636cb21Slm66018 mutex_exit(&vswp->mac_ring_lock); 23237636cb21Slm66018 D1(vswp, "%s: exit", __func__); 23247636cb21Slm66018 return ((mac_resource_handle_t)ringp); 23257636cb21Slm66018 } 23267636cb21Slm66018 } 23277636cb21Slm66018 mutex_exit(&vswp->mac_ring_lock); 23287636cb21Slm66018 23297636cb21Slm66018 /* 23307636cb21Slm66018 * No slots in the ring table available. 23317636cb21Slm66018 */ 23327636cb21Slm66018 D1(vswp, "%s: exit", __func__); 23337636cb21Slm66018 return (NULL); 23347636cb21Slm66018 } 23357636cb21Slm66018 23367636cb21Slm66018 static void 23377636cb21Slm66018 vsw_queue_stop(vsw_queue_t *vqp) 23387636cb21Slm66018 { 23397636cb21Slm66018 mutex_enter(&vqp->vq_lock); 23407636cb21Slm66018 23417636cb21Slm66018 if (vqp->vq_state == VSW_QUEUE_RUNNING) { 23427636cb21Slm66018 vqp->vq_state = VSW_QUEUE_STOP; 23437636cb21Slm66018 cv_signal(&vqp->vq_cv); 23447636cb21Slm66018 23457636cb21Slm66018 while (vqp->vq_state != VSW_QUEUE_DRAINED) 23467636cb21Slm66018 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 23477636cb21Slm66018 } 23487636cb21Slm66018 234934683adeSsg70180 vqp->vq_state = VSW_QUEUE_STOPPED; 235034683adeSsg70180 23517636cb21Slm66018 mutex_exit(&vqp->vq_lock); 23527636cb21Slm66018 } 23537636cb21Slm66018 23547636cb21Slm66018 static vsw_queue_t * 23557636cb21Slm66018 vsw_queue_create() 23567636cb21Slm66018 { 23577636cb21Slm66018 vsw_queue_t *vqp; 23587636cb21Slm66018 23597636cb21Slm66018 vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP); 23607636cb21Slm66018 23617636cb21Slm66018 mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL); 23627636cb21Slm66018 cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL); 23637636cb21Slm66018 vqp->vq_first = NULL; 23647636cb21Slm66018 vqp->vq_last = NULL; 236534683adeSsg70180 vqp->vq_state = VSW_QUEUE_STOPPED; 23667636cb21Slm66018 23677636cb21Slm66018 return (vqp); 23687636cb21Slm66018 } 23697636cb21Slm66018 23707636cb21Slm66018 static void 23717636cb21Slm66018 vsw_queue_destroy(vsw_queue_t *vqp) 23727636cb21Slm66018 { 23737636cb21Slm66018 cv_destroy(&vqp->vq_cv); 23747636cb21Slm66018 mutex_destroy(&vqp->vq_lock); 23757636cb21Slm66018 kmem_free(vqp, sizeof (vsw_queue_t)); 23767636cb21Slm66018 } 23777636cb21Slm66018 23787636cb21Slm66018 static void 23797636cb21Slm66018 vsw_queue_worker(vsw_mac_ring_t *rrp) 23807636cb21Slm66018 { 23817636cb21Slm66018 mblk_t *mp; 23827636cb21Slm66018 vsw_queue_t *vqp = rrp->ring_vqp; 23837636cb21Slm66018 vsw_t *vswp = rrp->ring_vswp; 23847636cb21Slm66018 23857636cb21Slm66018 mutex_enter(&vqp->vq_lock); 23867636cb21Slm66018 238734683adeSsg70180 ASSERT(vqp->vq_state == VSW_QUEUE_STOPPED); 23887636cb21Slm66018 23897636cb21Slm66018 /* 23907636cb21Slm66018 * Set the state to running, since the thread is now active. 23917636cb21Slm66018 */ 23927636cb21Slm66018 vqp->vq_state = VSW_QUEUE_RUNNING; 239334683adeSsg70180 cv_signal(&vqp->vq_cv); 23947636cb21Slm66018 23957636cb21Slm66018 while (vqp->vq_state == VSW_QUEUE_RUNNING) { 23967636cb21Slm66018 /* 23977636cb21Slm66018 * Wait for work to do or the state has changed 23987636cb21Slm66018 * to not running. 23997636cb21Slm66018 */ 24007636cb21Slm66018 while ((vqp->vq_state == VSW_QUEUE_RUNNING) && 24017636cb21Slm66018 (vqp->vq_first == NULL)) { 24027636cb21Slm66018 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 24037636cb21Slm66018 } 24047636cb21Slm66018 24057636cb21Slm66018 /* 24067636cb21Slm66018 * Process packets that we received from the interface. 24077636cb21Slm66018 */ 24087636cb21Slm66018 if (vqp->vq_first != NULL) { 24097636cb21Slm66018 mp = vqp->vq_first; 24107636cb21Slm66018 24117636cb21Slm66018 vqp->vq_first = NULL; 24127636cb21Slm66018 vqp->vq_last = NULL; 24137636cb21Slm66018 24147636cb21Slm66018 mutex_exit(&vqp->vq_lock); 24157636cb21Slm66018 24167636cb21Slm66018 /* switch the chain of packets received */ 241734683adeSsg70180 vswp->vsw_switch_frame(vswp, mp, 241834683adeSsg70180 VSW_PHYSDEV, NULL, NULL); 24197636cb21Slm66018 24207636cb21Slm66018 mutex_enter(&vqp->vq_lock); 24217636cb21Slm66018 } 24227636cb21Slm66018 } 24237636cb21Slm66018 24247636cb21Slm66018 /* 24257636cb21Slm66018 * We are drained and signal we are done. 24267636cb21Slm66018 */ 24277636cb21Slm66018 vqp->vq_state = VSW_QUEUE_DRAINED; 24287636cb21Slm66018 cv_signal(&vqp->vq_cv); 24297636cb21Slm66018 24307636cb21Slm66018 /* 24317636cb21Slm66018 * Exit lock and drain the remaining packets. 24327636cb21Slm66018 */ 24337636cb21Slm66018 mutex_exit(&vqp->vq_lock); 24347636cb21Slm66018 24357636cb21Slm66018 /* 24367636cb21Slm66018 * Exit the thread 24377636cb21Slm66018 */ 24387636cb21Slm66018 thread_exit(); 24397636cb21Slm66018 } 24407636cb21Slm66018 24417636cb21Slm66018 /* 24427636cb21Slm66018 * static void 24437636cb21Slm66018 * vsw_rx_queue_cb() - Receive callback routine when 24447636cb21Slm66018 * vsw_multi_ring_enable is non-zero. Queue the packets 24457636cb21Slm66018 * to a packet queue for a worker thread to process. 24467636cb21Slm66018 */ 24477636cb21Slm66018 static void 24487636cb21Slm66018 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 24497636cb21Slm66018 { 24507636cb21Slm66018 vsw_mac_ring_t *ringp = (vsw_mac_ring_t *)mrh; 24517636cb21Slm66018 vsw_t *vswp = (vsw_t *)arg; 24527636cb21Slm66018 vsw_queue_t *vqp; 24537636cb21Slm66018 mblk_t *bp, *last; 24547636cb21Slm66018 24557636cb21Slm66018 ASSERT(mrh != NULL); 24567636cb21Slm66018 ASSERT(vswp != NULL); 24577636cb21Slm66018 ASSERT(mp != NULL); 24587636cb21Slm66018 24597636cb21Slm66018 D1(vswp, "%s: enter", __func__); 24607636cb21Slm66018 24617636cb21Slm66018 /* 24627636cb21Slm66018 * Find the last element in the mblk chain. 24637636cb21Slm66018 */ 24647636cb21Slm66018 bp = mp; 24657636cb21Slm66018 do { 24667636cb21Slm66018 last = bp; 24677636cb21Slm66018 bp = bp->b_next; 24687636cb21Slm66018 } while (bp != NULL); 24697636cb21Slm66018 24707636cb21Slm66018 /* Get the queue for the packets */ 24717636cb21Slm66018 vqp = ringp->ring_vqp; 24727636cb21Slm66018 24737636cb21Slm66018 /* 24747636cb21Slm66018 * Grab the lock such we can queue the packets. 24757636cb21Slm66018 */ 24767636cb21Slm66018 mutex_enter(&vqp->vq_lock); 24777636cb21Slm66018 24787636cb21Slm66018 if (vqp->vq_state != VSW_QUEUE_RUNNING) { 24797636cb21Slm66018 freemsg(mp); 248034683adeSsg70180 mutex_exit(&vqp->vq_lock); 24817636cb21Slm66018 goto vsw_rx_queue_cb_exit; 24827636cb21Slm66018 } 24837636cb21Slm66018 24847636cb21Slm66018 /* 24857636cb21Slm66018 * Add the mblk chain to the queue. If there 24867636cb21Slm66018 * is some mblks in the queue, then add the new 24877636cb21Slm66018 * chain to the end. 24887636cb21Slm66018 */ 24897636cb21Slm66018 if (vqp->vq_first == NULL) 24907636cb21Slm66018 vqp->vq_first = mp; 24917636cb21Slm66018 else 24927636cb21Slm66018 vqp->vq_last->b_next = mp; 24937636cb21Slm66018 24947636cb21Slm66018 vqp->vq_last = last; 24957636cb21Slm66018 24967636cb21Slm66018 /* 24977636cb21Slm66018 * Signal the worker thread that there is work to 24987636cb21Slm66018 * do. 24997636cb21Slm66018 */ 25007636cb21Slm66018 cv_signal(&vqp->vq_cv); 25017636cb21Slm66018 25027636cb21Slm66018 /* 25037636cb21Slm66018 * Let go of the lock and exit. 25047636cb21Slm66018 */ 25057636cb21Slm66018 mutex_exit(&vqp->vq_lock); 250634683adeSsg70180 250734683adeSsg70180 vsw_rx_queue_cb_exit: 25087636cb21Slm66018 D1(vswp, "%s: exit", __func__); 25097636cb21Slm66018 } 25107636cb21Slm66018 25111ae08745Sheppo /* 25121ae08745Sheppo * receive callback routine. Invoked by MAC layer when there 25131ae08745Sheppo * are pkts being passed up from physical device. 25141ae08745Sheppo * 25151ae08745Sheppo * PERF: It may be more efficient when the card is in promisc 25161ae08745Sheppo * mode to check the dest address of the pkts here (against 25171ae08745Sheppo * the FDB) rather than checking later. Needs to be investigated. 25181ae08745Sheppo */ 25191ae08745Sheppo static void 25201ae08745Sheppo vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 25211ae08745Sheppo { 25221ae08745Sheppo _NOTE(ARGUNUSED(mrh)) 25231ae08745Sheppo 25241ae08745Sheppo vsw_t *vswp = (vsw_t *)arg; 25251ae08745Sheppo 25261ae08745Sheppo ASSERT(vswp != NULL); 25271ae08745Sheppo 25281ae08745Sheppo D1(vswp, "vsw_rx_cb: enter"); 25291ae08745Sheppo 25301ae08745Sheppo /* switch the chain of packets received */ 253134683adeSsg70180 vswp->vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 25321ae08745Sheppo 25331ae08745Sheppo D1(vswp, "vsw_rx_cb: exit"); 25341ae08745Sheppo } 25351ae08745Sheppo 25361ae08745Sheppo /* 25371ae08745Sheppo * Send a message out over the physical device via the MAC layer. 25381ae08745Sheppo * 25391ae08745Sheppo * Returns any mblks that it was unable to transmit. 25401ae08745Sheppo */ 25411ae08745Sheppo static mblk_t * 25421ae08745Sheppo vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 25431ae08745Sheppo { 25441ae08745Sheppo const mac_txinfo_t *mtp; 25451ae08745Sheppo mblk_t *nextp; 25461ae08745Sheppo 254734683adeSsg70180 mutex_enter(&vswp->mac_lock); 2548*19b65a69Ssb155480 if ((vswp->mh == NULL) || (vswp->mstarted == B_FALSE)) { 2549*19b65a69Ssb155480 25501ae08745Sheppo DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 255134683adeSsg70180 mutex_exit(&vswp->mac_lock); 25521ae08745Sheppo return (mp); 25531ae08745Sheppo } else { 25541ae08745Sheppo for (;;) { 25551ae08745Sheppo nextp = mp->b_next; 25561ae08745Sheppo mp->b_next = NULL; 25571ae08745Sheppo 25581ae08745Sheppo mtp = vswp->txinfo; 255934683adeSsg70180 25601ae08745Sheppo if ((mp = mtp->mt_fn(mtp->mt_arg, mp)) != NULL) { 25611ae08745Sheppo mp->b_next = nextp; 25621ae08745Sheppo break; 25631ae08745Sheppo } 25641ae08745Sheppo 25651ae08745Sheppo if ((mp = nextp) == NULL) 25661ae08745Sheppo break; 25671ae08745Sheppo } 25681ae08745Sheppo } 256934683adeSsg70180 mutex_exit(&vswp->mac_lock); 25701ae08745Sheppo 25711ae08745Sheppo return (mp); 25721ae08745Sheppo } 25731ae08745Sheppo 25741ae08745Sheppo /* 25751ae08745Sheppo * Register with the MAC layer as a network device, so we 25761ae08745Sheppo * can be plumbed if necessary. 25771ae08745Sheppo */ 25781ae08745Sheppo static int 25791ae08745Sheppo vsw_mac_register(vsw_t *vswp) 25801ae08745Sheppo { 2581ba2e4443Sseb mac_register_t *macp; 2582ba2e4443Sseb int rv; 25831ae08745Sheppo 25841ae08745Sheppo D1(vswp, "%s: enter", __func__); 25851ae08745Sheppo 2586ba2e4443Sseb if ((macp = mac_alloc(MAC_VERSION)) == NULL) 2587ba2e4443Sseb return (EINVAL); 2588ba2e4443Sseb macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 25891ae08745Sheppo macp->m_driver = vswp; 2590ba2e4443Sseb macp->m_dip = vswp->dip; 2591ba2e4443Sseb macp->m_src_addr = (uint8_t *)&vswp->if_addr; 2592ba2e4443Sseb macp->m_callbacks = &vsw_m_callbacks; 2593ba2e4443Sseb macp->m_min_sdu = 0; 2594ba2e4443Sseb macp->m_max_sdu = ETHERMTU; 2595ba2e4443Sseb rv = mac_register(macp, &vswp->if_mh); 2596ba2e4443Sseb mac_free(macp); 2597*19b65a69Ssb155480 if (rv != 0) { 2598*19b65a69Ssb155480 /* 2599*19b65a69Ssb155480 * Treat this as a non-fatal error as we may be 2600*19b65a69Ssb155480 * able to operate in some other mode. 2601*19b65a69Ssb155480 */ 2602*19b65a69Ssb155480 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 2603*19b65a69Ssb155480 "a provider with MAC layer", vswp->instance); 2604*19b65a69Ssb155480 return (rv); 2605*19b65a69Ssb155480 } 2606*19b65a69Ssb155480 2607ba2e4443Sseb vswp->if_state |= VSW_IF_REG; 26081ae08745Sheppo 26091ae08745Sheppo D1(vswp, "%s: exit", __func__); 26101ae08745Sheppo 26111ae08745Sheppo return (rv); 26121ae08745Sheppo } 26131ae08745Sheppo 26141ae08745Sheppo static int 26151ae08745Sheppo vsw_mac_unregister(vsw_t *vswp) 26161ae08745Sheppo { 26171ae08745Sheppo int rv = 0; 26181ae08745Sheppo 26191ae08745Sheppo D1(vswp, "%s: enter", __func__); 26201ae08745Sheppo 26211ae08745Sheppo WRITE_ENTER(&vswp->if_lockrw); 26221ae08745Sheppo 2623ba2e4443Sseb if (vswp->if_state & VSW_IF_REG) { 2624ba2e4443Sseb rv = mac_unregister(vswp->if_mh); 26251ae08745Sheppo if (rv != 0) { 26261ae08745Sheppo DWARN(vswp, "%s: unable to unregister from MAC " 26271ae08745Sheppo "framework", __func__); 26281ae08745Sheppo 26291ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 26301ae08745Sheppo D1(vswp, "%s: fail exit", __func__); 26311ae08745Sheppo return (rv); 26321ae08745Sheppo } 26331ae08745Sheppo 2634ba2e4443Sseb /* mark i/f as down and unregistered */ 2635ba2e4443Sseb vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 26361ae08745Sheppo } 26371ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 26381ae08745Sheppo 26391ae08745Sheppo D1(vswp, "%s: exit", __func__); 26401ae08745Sheppo 26411ae08745Sheppo return (rv); 26421ae08745Sheppo } 26431ae08745Sheppo 2644ba2e4443Sseb static int 2645ba2e4443Sseb vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 26461ae08745Sheppo { 26471ae08745Sheppo vsw_t *vswp = (vsw_t *)arg; 26481ae08745Sheppo 26491ae08745Sheppo D1(vswp, "%s: enter", __func__); 26501ae08745Sheppo 265134683adeSsg70180 mutex_enter(&vswp->mac_lock); 265234683adeSsg70180 if (vswp->mh == NULL) { 265334683adeSsg70180 mutex_exit(&vswp->mac_lock); 2654ba2e4443Sseb return (EINVAL); 265534683adeSsg70180 } 26561ae08745Sheppo 26571ae08745Sheppo /* return stats from underlying device */ 2658ba2e4443Sseb *val = mac_stat_get(vswp->mh, stat); 265934683adeSsg70180 266034683adeSsg70180 mutex_exit(&vswp->mac_lock); 266134683adeSsg70180 2662ba2e4443Sseb return (0); 26631ae08745Sheppo } 26641ae08745Sheppo 26651ae08745Sheppo static void 26661ae08745Sheppo vsw_m_stop(void *arg) 26671ae08745Sheppo { 26681ae08745Sheppo vsw_t *vswp = (vsw_t *)arg; 26691ae08745Sheppo 26701ae08745Sheppo D1(vswp, "%s: enter", __func__); 26711ae08745Sheppo 26721ae08745Sheppo WRITE_ENTER(&vswp->if_lockrw); 26731ae08745Sheppo vswp->if_state &= ~VSW_IF_UP; 26741ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 26751ae08745Sheppo 26765f94e909Ssg70180 mutex_enter(&vswp->hw_lock); 26775f94e909Ssg70180 26785f94e909Ssg70180 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 26795f94e909Ssg70180 26805f94e909Ssg70180 if (vswp->recfg_reqd) 26815f94e909Ssg70180 vsw_reconfig_hw(vswp); 26825f94e909Ssg70180 26835f94e909Ssg70180 mutex_exit(&vswp->hw_lock); 26845f94e909Ssg70180 26851ae08745Sheppo D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 26861ae08745Sheppo } 26871ae08745Sheppo 26881ae08745Sheppo static int 26891ae08745Sheppo vsw_m_start(void *arg) 26901ae08745Sheppo { 26911ae08745Sheppo vsw_t *vswp = (vsw_t *)arg; 26921ae08745Sheppo 26931ae08745Sheppo D1(vswp, "%s: enter", __func__); 26941ae08745Sheppo 26951ae08745Sheppo WRITE_ENTER(&vswp->if_lockrw); 26961ae08745Sheppo 2697*19b65a69Ssb155480 vswp->if_state |= VSW_IF_UP; 2698*19b65a69Ssb155480 2699*19b65a69Ssb155480 if (vswp->switching_setup_done == B_FALSE) { 2700*19b65a69Ssb155480 /* 2701*19b65a69Ssb155480 * If the switching mode has not been setup yet, just 2702*19b65a69Ssb155480 * return. The unicast address will be programmed 2703*19b65a69Ssb155480 * after the physical device is successfully setup by the 2704*19b65a69Ssb155480 * timeout handler. 2705*19b65a69Ssb155480 */ 2706*19b65a69Ssb155480 RW_EXIT(&vswp->if_lockrw); 2707*19b65a69Ssb155480 return (0); 2708*19b65a69Ssb155480 } 2709*19b65a69Ssb155480 2710*19b65a69Ssb155480 /* if in layer2 mode, program unicast address. */ 2711*19b65a69Ssb155480 if (vswp->mh != NULL) { 27125f94e909Ssg70180 mutex_enter(&vswp->hw_lock); 27135f94e909Ssg70180 (void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 27145f94e909Ssg70180 mutex_exit(&vswp->hw_lock); 2715*19b65a69Ssb155480 } 2716*19b65a69Ssb155480 2717*19b65a69Ssb155480 RW_EXIT(&vswp->if_lockrw); 27185f94e909Ssg70180 27191ae08745Sheppo D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 27201ae08745Sheppo return (0); 27211ae08745Sheppo } 27221ae08745Sheppo 27231ae08745Sheppo /* 27241ae08745Sheppo * Change the local interface address. 27255f94e909Ssg70180 * 27265f94e909Ssg70180 * Note: we don't support this entry point. The local 27275f94e909Ssg70180 * mac address of the switch can only be changed via its 27285f94e909Ssg70180 * MD node properties. 27291ae08745Sheppo */ 27301ae08745Sheppo static int 27311ae08745Sheppo vsw_m_unicst(void *arg, const uint8_t *macaddr) 27321ae08745Sheppo { 27335f94e909Ssg70180 _NOTE(ARGUNUSED(arg, macaddr)) 27341ae08745Sheppo 27355f94e909Ssg70180 return (DDI_FAILURE); 27361ae08745Sheppo } 27371ae08745Sheppo 27381ae08745Sheppo static int 27391ae08745Sheppo vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 27401ae08745Sheppo { 27411ae08745Sheppo vsw_t *vswp = (vsw_t *)arg; 27421ae08745Sheppo mcst_addr_t *mcst_p = NULL; 27431ae08745Sheppo uint64_t addr = 0x0; 2744e1ebb9ecSlm66018 int i, ret = 0; 27451ae08745Sheppo 27461ae08745Sheppo D1(vswp, "%s: enter", __func__); 27471ae08745Sheppo 27481ae08745Sheppo /* 27491ae08745Sheppo * Convert address into form that can be used 27501ae08745Sheppo * as hash table key. 27511ae08745Sheppo */ 27521ae08745Sheppo for (i = 0; i < ETHERADDRL; i++) { 27531ae08745Sheppo addr = (addr << 8) | mca[i]; 27541ae08745Sheppo } 27551ae08745Sheppo 27561ae08745Sheppo D2(vswp, "%s: addr = 0x%llx", __func__, addr); 27571ae08745Sheppo 27581ae08745Sheppo if (add) { 27591ae08745Sheppo D2(vswp, "%s: adding multicast", __func__); 27601ae08745Sheppo if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 27611ae08745Sheppo /* 27621ae08745Sheppo * Update the list of multicast addresses 27631ae08745Sheppo * contained within the vsw_t structure to 27641ae08745Sheppo * include this new one. 27651ae08745Sheppo */ 27661ae08745Sheppo mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 27671ae08745Sheppo if (mcst_p == NULL) { 27681ae08745Sheppo DERR(vswp, "%s unable to alloc mem", __func__); 2769*19b65a69Ssb155480 (void) vsw_del_mcst(vswp, 2770*19b65a69Ssb155480 VSW_LOCALDEV, addr, NULL); 27711ae08745Sheppo return (1); 27721ae08745Sheppo } 27731ae08745Sheppo mcst_p->addr = addr; 2774*19b65a69Ssb155480 ether_copy(mca, &mcst_p->mca); 27751ae08745Sheppo 27761ae08745Sheppo /* 27771ae08745Sheppo * Call into the underlying driver to program the 27781ae08745Sheppo * address into HW. 27791ae08745Sheppo */ 278034683adeSsg70180 mutex_enter(&vswp->mac_lock); 2781e1ebb9ecSlm66018 if (vswp->mh != NULL) { 2782e1ebb9ecSlm66018 ret = mac_multicst_add(vswp->mh, mca); 2783e1ebb9ecSlm66018 if (ret != 0) { 278434683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: unable to " 278534683adeSsg70180 "add multicast address", 278634683adeSsg70180 vswp->instance); 278734683adeSsg70180 mutex_exit(&vswp->mac_lock); 2788*19b65a69Ssb155480 (void) vsw_del_mcst(vswp, 2789*19b65a69Ssb155480 VSW_LOCALDEV, addr, NULL); 2790*19b65a69Ssb155480 kmem_free(mcst_p, sizeof (*mcst_p)); 2791*19b65a69Ssb155480 return (ret); 2792e1ebb9ecSlm66018 } 2793*19b65a69Ssb155480 mcst_p->mac_added = B_TRUE; 27941ae08745Sheppo } 279534683adeSsg70180 mutex_exit(&vswp->mac_lock); 2796*19b65a69Ssb155480 2797*19b65a69Ssb155480 mutex_enter(&vswp->mca_lock); 2798*19b65a69Ssb155480 mcst_p->nextp = vswp->mcap; 2799*19b65a69Ssb155480 vswp->mcap = mcst_p; 2800*19b65a69Ssb155480 mutex_exit(&vswp->mca_lock); 28011ae08745Sheppo } else { 280234683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 280334683adeSsg70180 "address", vswp->instance); 2804e1ebb9ecSlm66018 } 2805e1ebb9ecSlm66018 return (ret); 2806e1ebb9ecSlm66018 } 2807e1ebb9ecSlm66018 28081ae08745Sheppo D2(vswp, "%s: removing multicast", __func__); 28091ae08745Sheppo /* 28101ae08745Sheppo * Remove the address from the hash table.. 28111ae08745Sheppo */ 28121ae08745Sheppo if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 28131ae08745Sheppo 28141ae08745Sheppo /* 28151ae08745Sheppo * ..and then from the list maintained in the 28161ae08745Sheppo * vsw_t structure. 28171ae08745Sheppo */ 2818*19b65a69Ssb155480 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 2819*19b65a69Ssb155480 ASSERT(mcst_p != NULL); 28201ae08745Sheppo 282134683adeSsg70180 mutex_enter(&vswp->mac_lock); 2822*19b65a69Ssb155480 if (vswp->mh != NULL && mcst_p->mac_added) { 28231ae08745Sheppo (void) mac_multicst_remove(vswp->mh, mca); 2824*19b65a69Ssb155480 mcst_p->mac_added = B_FALSE; 2825*19b65a69Ssb155480 } 282634683adeSsg70180 mutex_exit(&vswp->mac_lock); 2827*19b65a69Ssb155480 kmem_free(mcst_p, sizeof (*mcst_p)); 28281ae08745Sheppo } 28291ae08745Sheppo 28301ae08745Sheppo D1(vswp, "%s: exit", __func__); 28311ae08745Sheppo 28321ae08745Sheppo return (0); 28331ae08745Sheppo } 28341ae08745Sheppo 28351ae08745Sheppo static int 28361ae08745Sheppo vsw_m_promisc(void *arg, boolean_t on) 28371ae08745Sheppo { 28381ae08745Sheppo vsw_t *vswp = (vsw_t *)arg; 28391ae08745Sheppo 28401ae08745Sheppo D1(vswp, "%s: enter", __func__); 28411ae08745Sheppo 28421ae08745Sheppo WRITE_ENTER(&vswp->if_lockrw); 28431ae08745Sheppo if (on) 28441ae08745Sheppo vswp->if_state |= VSW_IF_PROMISC; 28451ae08745Sheppo else 28461ae08745Sheppo vswp->if_state &= ~VSW_IF_PROMISC; 28471ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 28481ae08745Sheppo 28491ae08745Sheppo D1(vswp, "%s: exit", __func__); 28501ae08745Sheppo 28511ae08745Sheppo return (0); 28521ae08745Sheppo } 28531ae08745Sheppo 28541ae08745Sheppo static mblk_t * 28551ae08745Sheppo vsw_m_tx(void *arg, mblk_t *mp) 28561ae08745Sheppo { 28571ae08745Sheppo vsw_t *vswp = (vsw_t *)arg; 28581ae08745Sheppo 28591ae08745Sheppo D1(vswp, "%s: enter", __func__); 28601ae08745Sheppo 286134683adeSsg70180 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 28621ae08745Sheppo 28631ae08745Sheppo D1(vswp, "%s: exit", __func__); 28641ae08745Sheppo 28651ae08745Sheppo return (NULL); 28661ae08745Sheppo } 28671ae08745Sheppo 28681ae08745Sheppo /* 28691ae08745Sheppo * Register for machine description (MD) updates. 287034683adeSsg70180 * 287134683adeSsg70180 * Returns 0 on success, 1 on failure. 28721ae08745Sheppo */ 287334683adeSsg70180 static int 28741ae08745Sheppo vsw_mdeg_register(vsw_t *vswp) 28751ae08745Sheppo { 28761ae08745Sheppo mdeg_prop_spec_t *pspecp; 28771ae08745Sheppo mdeg_node_spec_t *inst_specp; 287834683adeSsg70180 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 28791ae08745Sheppo size_t templatesz; 2880*19b65a69Ssb155480 int rv; 28811ae08745Sheppo 28821ae08745Sheppo D1(vswp, "%s: enter", __func__); 28831ae08745Sheppo 288434683adeSsg70180 /* 28851ae08745Sheppo * Allocate and initialize a per-instance copy 28861ae08745Sheppo * of the global property spec array that will 28871ae08745Sheppo * uniquely identify this vsw instance. 28881ae08745Sheppo */ 28891ae08745Sheppo templatesz = sizeof (vsw_prop_template); 28901ae08745Sheppo pspecp = kmem_zalloc(templatesz, KM_SLEEP); 28911ae08745Sheppo 28921ae08745Sheppo bcopy(vsw_prop_template, pspecp, templatesz); 28931ae08745Sheppo 2894*19b65a69Ssb155480 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 28951ae08745Sheppo 28961ae08745Sheppo /* initialize the complete prop spec structure */ 28971ae08745Sheppo inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 28981ae08745Sheppo inst_specp->namep = "virtual-device"; 28991ae08745Sheppo inst_specp->specp = pspecp; 29001ae08745Sheppo 2901*19b65a69Ssb155480 D2(vswp, "%s: instance %d registering with mdeg", __func__, 2902*19b65a69Ssb155480 vswp->regprop); 290334683adeSsg70180 /* 290434683adeSsg70180 * Register an interest in 'virtual-device' nodes with a 290534683adeSsg70180 * 'name' property of 'virtual-network-switch' 290634683adeSsg70180 */ 290734683adeSsg70180 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 29081ae08745Sheppo (void *)vswp, &mdeg_hdl); 290934683adeSsg70180 if (rv != MDEG_SUCCESS) { 291034683adeSsg70180 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 291134683adeSsg70180 __func__, rv); 291234683adeSsg70180 goto mdeg_reg_fail; 291334683adeSsg70180 } 29141ae08745Sheppo 291534683adeSsg70180 /* 291634683adeSsg70180 * Register an interest in 'vsw-port' nodes. 291734683adeSsg70180 */ 291834683adeSsg70180 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 291934683adeSsg70180 (void *)vswp, &mdeg_port_hdl); 29201ae08745Sheppo if (rv != MDEG_SUCCESS) { 29211ae08745Sheppo DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 292234683adeSsg70180 (void) mdeg_unregister(mdeg_hdl); 292334683adeSsg70180 goto mdeg_reg_fail; 29241ae08745Sheppo } 29251ae08745Sheppo 29261ae08745Sheppo /* save off data that will be needed later */ 29271ae08745Sheppo vswp->inst_spec = inst_specp; 29281ae08745Sheppo vswp->mdeg_hdl = mdeg_hdl; 292934683adeSsg70180 vswp->mdeg_port_hdl = mdeg_port_hdl; 29301ae08745Sheppo 29311ae08745Sheppo D1(vswp, "%s: exit", __func__); 293234683adeSsg70180 return (0); 293334683adeSsg70180 293434683adeSsg70180 mdeg_reg_fail: 293534683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 293634683adeSsg70180 vswp->instance); 293734683adeSsg70180 kmem_free(pspecp, templatesz); 293834683adeSsg70180 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 293934683adeSsg70180 294034683adeSsg70180 vswp->mdeg_hdl = NULL; 294134683adeSsg70180 vswp->mdeg_port_hdl = NULL; 294234683adeSsg70180 294334683adeSsg70180 return (1); 29441ae08745Sheppo } 29451ae08745Sheppo 29461ae08745Sheppo static void 29471ae08745Sheppo vsw_mdeg_unregister(vsw_t *vswp) 29481ae08745Sheppo { 29491ae08745Sheppo D1(vswp, "vsw_mdeg_unregister: enter"); 29501ae08745Sheppo 295134683adeSsg70180 if (vswp->mdeg_hdl != NULL) 29521ae08745Sheppo (void) mdeg_unregister(vswp->mdeg_hdl); 29531ae08745Sheppo 295434683adeSsg70180 if (vswp->mdeg_port_hdl != NULL) 295534683adeSsg70180 (void) mdeg_unregister(vswp->mdeg_port_hdl); 295634683adeSsg70180 295734683adeSsg70180 if (vswp->inst_spec != NULL) { 29581ae08745Sheppo if (vswp->inst_spec->specp != NULL) { 29591ae08745Sheppo (void) kmem_free(vswp->inst_spec->specp, 29601ae08745Sheppo sizeof (vsw_prop_template)); 29611ae08745Sheppo vswp->inst_spec->specp = NULL; 29621ae08745Sheppo } 29631ae08745Sheppo 2964205eeb1aSlm66018 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 29651ae08745Sheppo vswp->inst_spec = NULL; 29661ae08745Sheppo } 29671ae08745Sheppo 29681ae08745Sheppo D1(vswp, "vsw_mdeg_unregister: exit"); 29691ae08745Sheppo } 29701ae08745Sheppo 297134683adeSsg70180 /* 297234683adeSsg70180 * Mdeg callback invoked for the vsw node itself. 297334683adeSsg70180 */ 29741ae08745Sheppo static int 29751ae08745Sheppo vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 29761ae08745Sheppo { 29771ae08745Sheppo vsw_t *vswp; 29781ae08745Sheppo md_t *mdp; 29791ae08745Sheppo mde_cookie_t node; 29801ae08745Sheppo uint64_t inst; 298134683adeSsg70180 char *node_name = NULL; 29821ae08745Sheppo 29831ae08745Sheppo if (resp == NULL) 29841ae08745Sheppo return (MDEG_FAILURE); 29851ae08745Sheppo 29861ae08745Sheppo vswp = (vsw_t *)cb_argp; 29871ae08745Sheppo 298834683adeSsg70180 D1(vswp, "%s: added %d : removed %d : curr matched %d" 298934683adeSsg70180 " : prev matched %d", __func__, resp->added.nelem, 299034683adeSsg70180 resp->removed.nelem, resp->match_curr.nelem, 299134683adeSsg70180 resp->match_prev.nelem); 299234683adeSsg70180 299334683adeSsg70180 /* 2994*19b65a69Ssb155480 * We get an initial callback for this node as 'added' 2995*19b65a69Ssb155480 * after registering with mdeg. Note that we would have 2996*19b65a69Ssb155480 * already gathered information about this vsw node by 2997*19b65a69Ssb155480 * walking MD earlier during attach (in vsw_read_mdprops()). 2998*19b65a69Ssb155480 * So, there is a window where the properties of this 2999*19b65a69Ssb155480 * node might have changed when we get this initial 'added' 3000*19b65a69Ssb155480 * callback. We handle this as if an update occured 3001*19b65a69Ssb155480 * and invoke the same function which handles updates to 3002*19b65a69Ssb155480 * the properties of this vsw-node if any. 3003*19b65a69Ssb155480 * 300434683adeSsg70180 * A non-zero 'match' value indicates that the MD has been 3005*19b65a69Ssb155480 * updated and that a virtual-network-switch node is 3006*19b65a69Ssb155480 * present which may or may not have been updated. It is 3007*19b65a69Ssb155480 * up to the clients to examine their own nodes and 3008*19b65a69Ssb155480 * determine if they have changed. 300934683adeSsg70180 */ 3010*19b65a69Ssb155480 if (resp->added.nelem != 0) { 301134683adeSsg70180 3012*19b65a69Ssb155480 if (resp->added.nelem != 1) { 3013*19b65a69Ssb155480 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 3014*19b65a69Ssb155480 "invalid: %d\n", vswp->instance, resp->added.nelem); 3015*19b65a69Ssb155480 return (MDEG_FAILURE); 3016*19b65a69Ssb155480 } 3017*19b65a69Ssb155480 3018*19b65a69Ssb155480 mdp = resp->added.mdp; 3019*19b65a69Ssb155480 node = resp->added.mdep[0]; 3020*19b65a69Ssb155480 3021*19b65a69Ssb155480 } else if (resp->match_curr.nelem != 0) { 3022*19b65a69Ssb155480 3023*19b65a69Ssb155480 if (resp->match_curr.nelem != 1) { 3024*19b65a69Ssb155480 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 3025*19b65a69Ssb155480 "invalid: %d\n", vswp->instance, 3026*19b65a69Ssb155480 resp->match_curr.nelem); 3027*19b65a69Ssb155480 return (MDEG_FAILURE); 3028*19b65a69Ssb155480 } 3029*19b65a69Ssb155480 3030*19b65a69Ssb155480 mdp = resp->match_curr.mdp; 3031*19b65a69Ssb155480 node = resp->match_curr.mdep[0]; 3032*19b65a69Ssb155480 3033*19b65a69Ssb155480 } else { 3034*19b65a69Ssb155480 return (MDEG_FAILURE); 3035*19b65a69Ssb155480 } 3036*19b65a69Ssb155480 3037*19b65a69Ssb155480 /* Validate name and instance */ 303834683adeSsg70180 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 3039*19b65a69Ssb155480 DERR(vswp, "%s: unable to get node name\n", __func__); 3040*19b65a69Ssb155480 return (MDEG_FAILURE); 3041*19b65a69Ssb155480 } 3042*19b65a69Ssb155480 3043*19b65a69Ssb155480 /* is this a virtual-network-switch? */ 3044*19b65a69Ssb155480 if (strcmp(node_name, vsw_propname) != 0) { 3045*19b65a69Ssb155480 DERR(vswp, "%s: Invalid node name: %s\n", 3046*19b65a69Ssb155480 __func__, node_name); 3047*19b65a69Ssb155480 return (MDEG_FAILURE); 304834683adeSsg70180 } 304934683adeSsg70180 305034683adeSsg70180 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 3051*19b65a69Ssb155480 DERR(vswp, "%s: prop(cfg-handle) not found\n", 3052*19b65a69Ssb155480 __func__); 3053*19b65a69Ssb155480 return (MDEG_FAILURE); 305434683adeSsg70180 } 305534683adeSsg70180 3056*19b65a69Ssb155480 /* is this the right instance of vsw? */ 3057*19b65a69Ssb155480 if (inst != vswp->regprop) { 3058*19b65a69Ssb155480 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 3059*19b65a69Ssb155480 __func__, inst); 3060*19b65a69Ssb155480 return (MDEG_FAILURE); 3061*19b65a69Ssb155480 } 306234683adeSsg70180 306334683adeSsg70180 vsw_update_md_prop(vswp, mdp, node); 306434683adeSsg70180 306534683adeSsg70180 return (MDEG_SUCCESS); 306634683adeSsg70180 } 306734683adeSsg70180 306834683adeSsg70180 /* 306934683adeSsg70180 * Mdeg callback invoked for changes to the vsw-port nodes 307034683adeSsg70180 * under the vsw node. 307134683adeSsg70180 */ 307234683adeSsg70180 static int 307334683adeSsg70180 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 307434683adeSsg70180 { 307534683adeSsg70180 vsw_t *vswp; 307634683adeSsg70180 int idx; 307734683adeSsg70180 md_t *mdp; 307834683adeSsg70180 mde_cookie_t node; 307934683adeSsg70180 uint64_t inst; 308034683adeSsg70180 308134683adeSsg70180 if ((resp == NULL) || (cb_argp == NULL)) 308234683adeSsg70180 return (MDEG_FAILURE); 308334683adeSsg70180 308434683adeSsg70180 vswp = (vsw_t *)cb_argp; 308534683adeSsg70180 308634683adeSsg70180 D2(vswp, "%s: added %d : removed %d : curr matched %d" 308734683adeSsg70180 " : prev matched %d", __func__, resp->added.nelem, 308834683adeSsg70180 resp->removed.nelem, resp->match_curr.nelem, 30891ae08745Sheppo resp->match_prev.nelem); 30901ae08745Sheppo 30911ae08745Sheppo /* process added ports */ 30921ae08745Sheppo for (idx = 0; idx < resp->added.nelem; idx++) { 30931ae08745Sheppo mdp = resp->added.mdp; 30941ae08745Sheppo node = resp->added.mdep[idx]; 30951ae08745Sheppo 30961ae08745Sheppo D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 30971ae08745Sheppo 30981ae08745Sheppo if (vsw_port_add(vswp, mdp, &node) != 0) { 309934683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 310034683adeSsg70180 "(0x%lx)", vswp->instance, node); 31011ae08745Sheppo } 31021ae08745Sheppo } 31031ae08745Sheppo 31041ae08745Sheppo /* process removed ports */ 31051ae08745Sheppo for (idx = 0; idx < resp->removed.nelem; idx++) { 31061ae08745Sheppo mdp = resp->removed.mdp; 31071ae08745Sheppo node = resp->removed.mdep[idx]; 31081ae08745Sheppo 31091ae08745Sheppo if (md_get_prop_val(mdp, node, id_propname, &inst)) { 311034683adeSsg70180 DERR(vswp, "%s: prop(%s) not found in port(%d)", 31111ae08745Sheppo __func__, id_propname, idx); 31121ae08745Sheppo continue; 31131ae08745Sheppo } 31141ae08745Sheppo 31151ae08745Sheppo D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 31161ae08745Sheppo 31171ae08745Sheppo if (vsw_port_detach(vswp, inst) != 0) { 311834683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 311934683adeSsg70180 vswp->instance, inst); 31201ae08745Sheppo } 31211ae08745Sheppo } 31221ae08745Sheppo 31231ae08745Sheppo /* 31241ae08745Sheppo * Currently no support for updating already active ports. 31251ae08745Sheppo * So, ignore the match_curr and match_priv arrays for now. 31261ae08745Sheppo */ 31271ae08745Sheppo 31281ae08745Sheppo D1(vswp, "%s: exit", __func__); 31291ae08745Sheppo 31301ae08745Sheppo return (MDEG_SUCCESS); 31311ae08745Sheppo } 31321ae08745Sheppo 31331ae08745Sheppo /* 3134*19b65a69Ssb155480 * Scan the machine description for this instance of vsw 3135*19b65a69Ssb155480 * and read its properties. Called only from vsw_attach(). 3136*19b65a69Ssb155480 * Returns: 0 on success, 1 on failure. 3137*19b65a69Ssb155480 */ 3138*19b65a69Ssb155480 static int 3139*19b65a69Ssb155480 vsw_read_mdprops(vsw_t *vswp) 3140*19b65a69Ssb155480 { 3141*19b65a69Ssb155480 md_t *mdp = NULL; 3142*19b65a69Ssb155480 mde_cookie_t rootnode; 3143*19b65a69Ssb155480 mde_cookie_t *listp = NULL; 3144*19b65a69Ssb155480 uint64_t inst; 3145*19b65a69Ssb155480 uint64_t cfgh; 3146*19b65a69Ssb155480 char *name; 3147*19b65a69Ssb155480 int rv = 1; 3148*19b65a69Ssb155480 int num_nodes = 0; 3149*19b65a69Ssb155480 int num_devs = 0; 3150*19b65a69Ssb155480 int listsz = 0; 3151*19b65a69Ssb155480 int i; 3152*19b65a69Ssb155480 3153*19b65a69Ssb155480 /* 3154*19b65a69Ssb155480 * In each 'virtual-device' node in the MD there is a 3155*19b65a69Ssb155480 * 'cfg-handle' property which is the MD's concept of 3156*19b65a69Ssb155480 * an instance number (this may be completely different from 3157*19b65a69Ssb155480 * the device drivers instance #). OBP reads that value and 3158*19b65a69Ssb155480 * stores it in the 'reg' property of the appropriate node in 3159*19b65a69Ssb155480 * the device tree. We first read this reg property and use this 3160*19b65a69Ssb155480 * to compare against the 'cfg-handle' property of vsw nodes 3161*19b65a69Ssb155480 * in MD to get to this specific vsw instance and then read 3162*19b65a69Ssb155480 * other properties that we are interested in. 3163*19b65a69Ssb155480 * We also cache the value of 'reg' property and use it later 3164*19b65a69Ssb155480 * to register callbacks with mdeg (see vsw_mdeg_register()) 3165*19b65a69Ssb155480 */ 3166*19b65a69Ssb155480 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 3167*19b65a69Ssb155480 DDI_PROP_DONTPASS, reg_propname, -1); 3168*19b65a69Ssb155480 if (inst == -1) { 3169*19b65a69Ssb155480 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 3170*19b65a69Ssb155480 "OBP device tree", vswp->instance, reg_propname); 3171*19b65a69Ssb155480 return (rv); 3172*19b65a69Ssb155480 } 3173*19b65a69Ssb155480 3174*19b65a69Ssb155480 vswp->regprop = inst; 3175*19b65a69Ssb155480 3176*19b65a69Ssb155480 if ((mdp = md_get_handle()) == NULL) { 3177*19b65a69Ssb155480 DWARN(vswp, "%s: cannot init MD\n", __func__); 3178*19b65a69Ssb155480 return (rv); 3179*19b65a69Ssb155480 } 3180*19b65a69Ssb155480 3181*19b65a69Ssb155480 num_nodes = md_node_count(mdp); 3182*19b65a69Ssb155480 ASSERT(num_nodes > 0); 3183*19b65a69Ssb155480 3184*19b65a69Ssb155480 listsz = num_nodes * sizeof (mde_cookie_t); 3185*19b65a69Ssb155480 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 3186*19b65a69Ssb155480 3187*19b65a69Ssb155480 rootnode = md_root_node(mdp); 3188*19b65a69Ssb155480 3189*19b65a69Ssb155480 /* search for all "virtual_device" nodes */ 3190*19b65a69Ssb155480 num_devs = md_scan_dag(mdp, rootnode, 3191*19b65a69Ssb155480 md_find_name(mdp, vdev_propname), 3192*19b65a69Ssb155480 md_find_name(mdp, "fwd"), listp); 3193*19b65a69Ssb155480 if (num_devs <= 0) { 3194*19b65a69Ssb155480 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 3195*19b65a69Ssb155480 goto vsw_readmd_exit; 3196*19b65a69Ssb155480 } 3197*19b65a69Ssb155480 3198*19b65a69Ssb155480 /* 3199*19b65a69Ssb155480 * Now loop through the list of virtual-devices looking for 3200*19b65a69Ssb155480 * devices with name "virtual-network-switch" and for each 3201*19b65a69Ssb155480 * such device compare its instance with what we have from 3202*19b65a69Ssb155480 * the 'reg' property to find the right node in MD and then 3203*19b65a69Ssb155480 * read all its properties. 3204*19b65a69Ssb155480 */ 3205*19b65a69Ssb155480 for (i = 0; i < num_devs; i++) { 3206*19b65a69Ssb155480 3207*19b65a69Ssb155480 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 3208*19b65a69Ssb155480 DWARN(vswp, "%s: name property not found\n", 3209*19b65a69Ssb155480 __func__); 3210*19b65a69Ssb155480 goto vsw_readmd_exit; 3211*19b65a69Ssb155480 } 3212*19b65a69Ssb155480 3213*19b65a69Ssb155480 /* is this a virtual-network-switch? */ 3214*19b65a69Ssb155480 if (strcmp(name, vsw_propname) != 0) 3215*19b65a69Ssb155480 continue; 3216*19b65a69Ssb155480 3217*19b65a69Ssb155480 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 3218*19b65a69Ssb155480 DWARN(vswp, "%s: cfg-handle property not found\n", 3219*19b65a69Ssb155480 __func__); 3220*19b65a69Ssb155480 goto vsw_readmd_exit; 3221*19b65a69Ssb155480 } 3222*19b65a69Ssb155480 3223*19b65a69Ssb155480 /* is this the required instance of vsw? */ 3224*19b65a69Ssb155480 if (inst != cfgh) 3225*19b65a69Ssb155480 continue; 3226*19b65a69Ssb155480 3227*19b65a69Ssb155480 /* now read all properties of this vsw instance */ 3228*19b65a69Ssb155480 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 3229*19b65a69Ssb155480 break; 3230*19b65a69Ssb155480 } 3231*19b65a69Ssb155480 3232*19b65a69Ssb155480 vsw_readmd_exit: 3233*19b65a69Ssb155480 3234*19b65a69Ssb155480 kmem_free(listp, listsz); 3235*19b65a69Ssb155480 (void) md_fini_handle(mdp); 3236*19b65a69Ssb155480 return (rv); 3237*19b65a69Ssb155480 } 3238*19b65a69Ssb155480 3239*19b65a69Ssb155480 /* 324034683adeSsg70180 * Read the initial start-of-day values from the specified MD node. 324134683adeSsg70180 */ 3242*19b65a69Ssb155480 static int 324334683adeSsg70180 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 324434683adeSsg70180 { 324534683adeSsg70180 int i; 324634683adeSsg70180 uint64_t macaddr = 0; 324734683adeSsg70180 324834683adeSsg70180 D1(vswp, "%s: enter", __func__); 324934683adeSsg70180 3250*19b65a69Ssb155480 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 3251*19b65a69Ssb155480 return (1); 325234683adeSsg70180 } 325334683adeSsg70180 325434683adeSsg70180 /* mac address for vswitch device itself */ 325534683adeSsg70180 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 325634683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 325734683adeSsg70180 vswp->instance); 3258*19b65a69Ssb155480 return (1); 3259*19b65a69Ssb155480 } 326034683adeSsg70180 3261*19b65a69Ssb155480 vsw_save_lmacaddr(vswp, macaddr); 326234683adeSsg70180 3263205eeb1aSlm66018 if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) { 326434683adeSsg70180 cmn_err(CE_WARN, "vsw%d: Unable to read %s property from " 326534683adeSsg70180 "MD, defaulting to programmed mode", vswp->instance, 326634683adeSsg70180 smode_propname); 326734683adeSsg70180 326834683adeSsg70180 for (i = 0; i < NUM_SMODES; i++) 326934683adeSsg70180 vswp->smode[i] = VSW_LAYER2; 327034683adeSsg70180 327134683adeSsg70180 vswp->smode_num = NUM_SMODES; 327234683adeSsg70180 } else { 327334683adeSsg70180 ASSERT(vswp->smode_num != 0); 327434683adeSsg70180 } 327534683adeSsg70180 327634683adeSsg70180 D1(vswp, "%s: exit", __func__); 3277*19b65a69Ssb155480 return (0); 327834683adeSsg70180 } 327934683adeSsg70180 328034683adeSsg70180 /* 328134683adeSsg70180 * Check to see if the relevant properties in the specified node have 328234683adeSsg70180 * changed, and if so take the appropriate action. 328334683adeSsg70180 * 328434683adeSsg70180 * If any of the properties are missing or invalid we don't take 328534683adeSsg70180 * any action, as this function should only be invoked when modifications 328634683adeSsg70180 * have been made to what we assume is a working configuration, which 328734683adeSsg70180 * we leave active. 328834683adeSsg70180 * 328934683adeSsg70180 * Note it is legal for this routine to be invoked even if none of the 329034683adeSsg70180 * properties in the port node within the MD have actually changed. 329134683adeSsg70180 */ 329234683adeSsg70180 static void 329334683adeSsg70180 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 329434683adeSsg70180 { 329534683adeSsg70180 char physname[LIFNAMSIZ]; 329634683adeSsg70180 char drv[LIFNAMSIZ]; 329734683adeSsg70180 uint_t ddi_instance; 329834683adeSsg70180 uint8_t new_smode[NUM_SMODES]; 329934683adeSsg70180 int i, smode_num = 0; 330034683adeSsg70180 uint64_t macaddr = 0; 330134683adeSsg70180 enum {MD_init = 0x1, 330234683adeSsg70180 MD_physname = 0x2, 330334683adeSsg70180 MD_macaddr = 0x4, 330434683adeSsg70180 MD_smode = 0x8} updated; 3305*19b65a69Ssb155480 int rv; 330634683adeSsg70180 330734683adeSsg70180 updated = MD_init; 330834683adeSsg70180 330934683adeSsg70180 D1(vswp, "%s: enter", __func__); 331034683adeSsg70180 331134683adeSsg70180 /* 331234683adeSsg70180 * Check if name of physical device in MD has changed. 331334683adeSsg70180 */ 331434683adeSsg70180 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 331534683adeSsg70180 /* 331634683adeSsg70180 * Do basic sanity check on new device name/instance, 331734683adeSsg70180 * if its non NULL. It is valid for the device name to 331834683adeSsg70180 * have changed from a non NULL to a NULL value, i.e. 331934683adeSsg70180 * the vsw is being changed to 'routed' mode. 332034683adeSsg70180 */ 332134683adeSsg70180 if ((strlen(physname) != 0) && 3322*19b65a69Ssb155480 (ddi_parse(physname, drv, 3323*19b65a69Ssb155480 &ddi_instance) != DDI_SUCCESS)) { 332434683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: new device name %s is not" 332534683adeSsg70180 " a valid device name/instance", 332634683adeSsg70180 vswp->instance, physname); 332734683adeSsg70180 goto fail_reconf; 332834683adeSsg70180 } 332934683adeSsg70180 333034683adeSsg70180 if (strcmp(physname, vswp->physname)) { 333134683adeSsg70180 D2(vswp, "%s: device name changed from %s to %s", 333234683adeSsg70180 __func__, vswp->physname, physname); 333334683adeSsg70180 333434683adeSsg70180 updated |= MD_physname; 333534683adeSsg70180 } else { 333634683adeSsg70180 D2(vswp, "%s: device name unchanged at %s", 333734683adeSsg70180 __func__, vswp->physname); 333834683adeSsg70180 } 333934683adeSsg70180 } else { 334034683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 334134683adeSsg70180 "device from updated MD.", vswp->instance); 334234683adeSsg70180 goto fail_reconf; 334334683adeSsg70180 } 334434683adeSsg70180 334534683adeSsg70180 /* 334634683adeSsg70180 * Check if MAC address has changed. 334734683adeSsg70180 */ 334834683adeSsg70180 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 334934683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 335034683adeSsg70180 vswp->instance); 335134683adeSsg70180 goto fail_reconf; 335234683adeSsg70180 } else { 3353*19b65a69Ssb155480 uint64_t maddr = macaddr; 335434683adeSsg70180 READ_ENTER(&vswp->if_lockrw); 335534683adeSsg70180 for (i = ETHERADDRL - 1; i >= 0; i--) { 3356*19b65a69Ssb155480 if (vswp->if_addr.ether_addr_octet[i] 3357*19b65a69Ssb155480 != (macaddr & 0xFF)) { 335834683adeSsg70180 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 335934683adeSsg70180 __func__, i, 336034683adeSsg70180 vswp->if_addr.ether_addr_octet[i], 336134683adeSsg70180 (macaddr & 0xFF)); 336234683adeSsg70180 updated |= MD_macaddr; 3363*19b65a69Ssb155480 macaddr = maddr; 336434683adeSsg70180 break; 336534683adeSsg70180 } 336634683adeSsg70180 macaddr >>= 8; 336734683adeSsg70180 } 336834683adeSsg70180 RW_EXIT(&vswp->if_lockrw); 3369*19b65a69Ssb155480 if (updated & MD_macaddr) { 3370*19b65a69Ssb155480 vsw_save_lmacaddr(vswp, macaddr); 3371*19b65a69Ssb155480 } 337234683adeSsg70180 } 337334683adeSsg70180 337434683adeSsg70180 /* 337534683adeSsg70180 * Check if switching modes have changed. 337634683adeSsg70180 */ 3377*19b65a69Ssb155480 if (vsw_get_md_smodes(vswp, mdp, node, 3378*19b65a69Ssb155480 new_smode, &smode_num)) { 337934683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 338034683adeSsg70180 vswp->instance, smode_propname); 338134683adeSsg70180 goto fail_reconf; 338234683adeSsg70180 } else { 338334683adeSsg70180 ASSERT(smode_num != 0); 338434683adeSsg70180 if (smode_num != vswp->smode_num) { 338534683adeSsg70180 D2(vswp, "%s: number of modes changed from %d to %d", 338634683adeSsg70180 __func__, vswp->smode_num, smode_num); 338734683adeSsg70180 } 338834683adeSsg70180 338934683adeSsg70180 for (i = 0; i < smode_num; i++) { 339034683adeSsg70180 if (new_smode[i] != vswp->smode[i]) { 339134683adeSsg70180 D2(vswp, "%s: mode changed from %d to %d", 339234683adeSsg70180 __func__, vswp->smode[i], new_smode[i]); 339334683adeSsg70180 updated |= MD_smode; 339434683adeSsg70180 break; 339534683adeSsg70180 } 339634683adeSsg70180 } 339734683adeSsg70180 } 339834683adeSsg70180 339934683adeSsg70180 /* 340034683adeSsg70180 * Now make any changes which are needed... 340134683adeSsg70180 */ 340234683adeSsg70180 340334683adeSsg70180 if (updated & (MD_physname | MD_smode)) { 340434683adeSsg70180 340534683adeSsg70180 /* 3406*19b65a69Ssb155480 * Stop any pending timeout to setup switching mode. 340734683adeSsg70180 */ 3408*19b65a69Ssb155480 vsw_stop_switching_timeout(vswp); 3409*19b65a69Ssb155480 3410*19b65a69Ssb155480 /* 3411*19b65a69Ssb155480 * Remove unicst, mcst addrs of vsw interface 3412*19b65a69Ssb155480 * and ports from the physdev. 3413*19b65a69Ssb155480 */ 3414*19b65a69Ssb155480 vsw_unset_addrs(vswp); 3415*19b65a69Ssb155480 3416*19b65a69Ssb155480 /* 3417*19b65a69Ssb155480 * Stop, detach and close the old device.. 3418*19b65a69Ssb155480 */ 3419*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 3420*19b65a69Ssb155480 342134683adeSsg70180 vsw_mac_detach(vswp); 3422*19b65a69Ssb155480 vsw_mac_close(vswp); 3423*19b65a69Ssb155480 3424*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 342534683adeSsg70180 342634683adeSsg70180 /* 342734683adeSsg70180 * Update phys name. 342834683adeSsg70180 */ 342934683adeSsg70180 if (updated & MD_physname) { 343034683adeSsg70180 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 343134683adeSsg70180 vswp->instance, vswp->physname, physname); 343234683adeSsg70180 (void) strncpy(vswp->physname, 343334683adeSsg70180 physname, strlen(physname) + 1); 343434683adeSsg70180 } 343534683adeSsg70180 343634683adeSsg70180 /* 343734683adeSsg70180 * Update array with the new switch mode values. 343834683adeSsg70180 */ 343934683adeSsg70180 if (updated & MD_smode) { 344034683adeSsg70180 for (i = 0; i < smode_num; i++) 344134683adeSsg70180 vswp->smode[i] = new_smode[i]; 344234683adeSsg70180 344334683adeSsg70180 vswp->smode_num = smode_num; 344434683adeSsg70180 vswp->smode_idx = 0; 344534683adeSsg70180 } 344634683adeSsg70180 344734683adeSsg70180 /* 344834683adeSsg70180 * ..and attach, start the new device. 344934683adeSsg70180 */ 3450*19b65a69Ssb155480 rv = vsw_setup_switching(vswp); 3451*19b65a69Ssb155480 if (rv == EAGAIN) { 3452*19b65a69Ssb155480 /* 3453*19b65a69Ssb155480 * Unable to setup switching mode. 3454*19b65a69Ssb155480 * As the error is EAGAIN, schedule a timeout to retry 3455*19b65a69Ssb155480 * and return. Programming addresses of ports and 3456*19b65a69Ssb155480 * vsw interface will be done when the timeout handler 3457*19b65a69Ssb155480 * completes successfully. 3458*19b65a69Ssb155480 */ 3459*19b65a69Ssb155480 mutex_enter(&vswp->swtmout_lock); 3460*19b65a69Ssb155480 3461*19b65a69Ssb155480 vswp->swtmout_enabled = B_TRUE; 3462*19b65a69Ssb155480 vswp->swtmout_id = 3463*19b65a69Ssb155480 timeout(vsw_setup_switching_timeout, vswp, 3464*19b65a69Ssb155480 (vsw_setup_switching_delay * 3465*19b65a69Ssb155480 drv_usectohz(MICROSEC))); 3466*19b65a69Ssb155480 3467*19b65a69Ssb155480 mutex_exit(&vswp->swtmout_lock); 3468*19b65a69Ssb155480 3469*19b65a69Ssb155480 return; 3470*19b65a69Ssb155480 3471*19b65a69Ssb155480 } else if (rv) { 347234683adeSsg70180 goto fail_update; 3473*19b65a69Ssb155480 } 347434683adeSsg70180 347534683adeSsg70180 /* 3476*19b65a69Ssb155480 * program unicst, mcst addrs of vsw interface 3477*19b65a69Ssb155480 * and ports in the physdev. 347834683adeSsg70180 */ 3479*19b65a69Ssb155480 vsw_set_addrs(vswp); 348034683adeSsg70180 3481*19b65a69Ssb155480 } else if (updated & MD_macaddr) { 3482*19b65a69Ssb155480 /* 3483*19b65a69Ssb155480 * We enter here if only MD_macaddr is exclusively updated. 3484*19b65a69Ssb155480 * If MD_physname and/or MD_smode are also updated, then 3485*19b65a69Ssb155480 * as part of that, we would have implicitly processed 3486*19b65a69Ssb155480 * MD_macaddr update (above). 3487*19b65a69Ssb155480 */ 348834683adeSsg70180 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 348934683adeSsg70180 vswp->instance, macaddr); 349034683adeSsg70180 3491*19b65a69Ssb155480 READ_ENTER(&vswp->if_lockrw); 3492*19b65a69Ssb155480 if (vswp->if_state & VSW_IF_UP) { 349334683adeSsg70180 34945f94e909Ssg70180 mutex_enter(&vswp->hw_lock); 3495*19b65a69Ssb155480 /* 3496*19b65a69Ssb155480 * Remove old mac address of vsw interface 3497*19b65a69Ssb155480 * from the physdev 3498*19b65a69Ssb155480 */ 34995f94e909Ssg70180 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 3500*19b65a69Ssb155480 /* 3501*19b65a69Ssb155480 * Program new mac address of vsw interface 3502*19b65a69Ssb155480 * in the physdev 3503*19b65a69Ssb155480 */ 3504*19b65a69Ssb155480 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 35055f94e909Ssg70180 mutex_exit(&vswp->hw_lock); 3506*19b65a69Ssb155480 if (rv != 0) { 3507*19b65a69Ssb155480 cmn_err(CE_NOTE, 3508*19b65a69Ssb155480 "!vsw%d: failed to program interface " 3509*19b65a69Ssb155480 "unicast address\n", vswp->instance); 3510*19b65a69Ssb155480 } 35115f94e909Ssg70180 /* 351234683adeSsg70180 * Notify the MAC layer of the changed address. 351334683adeSsg70180 */ 3514*19b65a69Ssb155480 mac_unicst_update(vswp->if_mh, 3515*19b65a69Ssb155480 (uint8_t *)&vswp->if_addr); 3516*19b65a69Ssb155480 3517*19b65a69Ssb155480 } 3518*19b65a69Ssb155480 RW_EXIT(&vswp->if_lockrw); 3519*19b65a69Ssb155480 352034683adeSsg70180 } 352134683adeSsg70180 352234683adeSsg70180 return; 352334683adeSsg70180 352434683adeSsg70180 fail_reconf: 352534683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 352634683adeSsg70180 return; 352734683adeSsg70180 352834683adeSsg70180 fail_update: 352934683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: update of configuration failed", 353034683adeSsg70180 vswp->instance); 353134683adeSsg70180 } 353234683adeSsg70180 353334683adeSsg70180 /* 35341ae08745Sheppo * Add a new port to the system. 35351ae08745Sheppo * 35361ae08745Sheppo * Returns 0 on success, 1 on failure. 35371ae08745Sheppo */ 35381ae08745Sheppo int 35391ae08745Sheppo vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 35401ae08745Sheppo { 35411ae08745Sheppo uint64_t ldc_id; 35421ae08745Sheppo uint8_t *addrp; 35431ae08745Sheppo int i, addrsz; 35441ae08745Sheppo int num_nodes = 0, nchan = 0; 35451ae08745Sheppo int listsz = 0; 35461ae08745Sheppo mde_cookie_t *listp = NULL; 35471ae08745Sheppo struct ether_addr ea; 35481ae08745Sheppo uint64_t macaddr; 35491ae08745Sheppo uint64_t inst = 0; 35501ae08745Sheppo vsw_port_t *port; 35511ae08745Sheppo 35521ae08745Sheppo if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 35531ae08745Sheppo DWARN(vswp, "%s: prop(%s) not found", __func__, 35541ae08745Sheppo id_propname); 35551ae08745Sheppo return (1); 35561ae08745Sheppo } 35571ae08745Sheppo 35581ae08745Sheppo /* 35591ae08745Sheppo * Find the channel endpoint node(s) (which should be under this 35601ae08745Sheppo * port node) which contain the channel id(s). 35611ae08745Sheppo */ 35621ae08745Sheppo if ((num_nodes = md_node_count(mdp)) <= 0) { 35631ae08745Sheppo DERR(vswp, "%s: invalid number of nodes found (%d)", 35641ae08745Sheppo __func__, num_nodes); 35651ae08745Sheppo return (1); 35661ae08745Sheppo } 35671ae08745Sheppo 356834683adeSsg70180 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 356934683adeSsg70180 35701ae08745Sheppo /* allocate enough space for node list */ 35711ae08745Sheppo listsz = num_nodes * sizeof (mde_cookie_t); 35721ae08745Sheppo listp = kmem_zalloc(listsz, KM_SLEEP); 35731ae08745Sheppo 3574205eeb1aSlm66018 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 35751ae08745Sheppo md_find_name(mdp, "fwd"), listp); 35761ae08745Sheppo 35771ae08745Sheppo if (nchan <= 0) { 35781ae08745Sheppo DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 35791ae08745Sheppo kmem_free(listp, listsz); 35801ae08745Sheppo return (1); 35811ae08745Sheppo } 35821ae08745Sheppo 35831ae08745Sheppo D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 35841ae08745Sheppo 35851ae08745Sheppo /* use property from first node found */ 35861ae08745Sheppo if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 35871ae08745Sheppo DWARN(vswp, "%s: prop(%s) not found\n", __func__, 35881ae08745Sheppo id_propname); 35891ae08745Sheppo kmem_free(listp, listsz); 35901ae08745Sheppo return (1); 35911ae08745Sheppo } 35921ae08745Sheppo 35931ae08745Sheppo /* don't need list any more */ 35941ae08745Sheppo kmem_free(listp, listsz); 35951ae08745Sheppo 35961ae08745Sheppo D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 35971ae08745Sheppo 35981ae08745Sheppo /* read mac-address property */ 35991ae08745Sheppo if (md_get_prop_data(mdp, *node, remaddr_propname, 36001ae08745Sheppo &addrp, &addrsz)) { 36011ae08745Sheppo DWARN(vswp, "%s: prop(%s) not found", 36021ae08745Sheppo __func__, remaddr_propname); 36031ae08745Sheppo return (1); 36041ae08745Sheppo } 36051ae08745Sheppo 36061ae08745Sheppo if (addrsz < ETHERADDRL) { 36071ae08745Sheppo DWARN(vswp, "%s: invalid address size", __func__); 36081ae08745Sheppo return (1); 36091ae08745Sheppo } 36101ae08745Sheppo 36111ae08745Sheppo macaddr = *((uint64_t *)addrp); 36121ae08745Sheppo D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 36131ae08745Sheppo 36141ae08745Sheppo for (i = ETHERADDRL - 1; i >= 0; i--) { 36151ae08745Sheppo ea.ether_addr_octet[i] = macaddr & 0xFF; 36161ae08745Sheppo macaddr >>= 8; 36171ae08745Sheppo } 36181ae08745Sheppo 36191ae08745Sheppo if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 36201ae08745Sheppo DERR(vswp, "%s: failed to attach port", __func__); 36211ae08745Sheppo return (1); 36221ae08745Sheppo } 36231ae08745Sheppo 36241ae08745Sheppo port = vsw_lookup_port(vswp, (int)inst); 36251ae08745Sheppo 36261ae08745Sheppo /* just successfuly created the port, so it should exist */ 36271ae08745Sheppo ASSERT(port != NULL); 36281ae08745Sheppo 36291ae08745Sheppo return (0); 36301ae08745Sheppo } 36311ae08745Sheppo 36321ae08745Sheppo /* 36331ae08745Sheppo * Attach the specified port. 36341ae08745Sheppo * 36351ae08745Sheppo * Returns 0 on success, 1 on failure. 36361ae08745Sheppo */ 36371ae08745Sheppo static int 36381ae08745Sheppo vsw_port_attach(vsw_t *vswp, int p_instance, uint64_t *ldcids, int nids, 36391ae08745Sheppo struct ether_addr *macaddr) 36401ae08745Sheppo { 36411ae08745Sheppo vsw_port_list_t *plist = &vswp->plist; 36421ae08745Sheppo vsw_port_t *port, **prev_port; 36431ae08745Sheppo int i; 36441ae08745Sheppo 36451ae08745Sheppo D1(vswp, "%s: enter : port %d", __func__, p_instance); 36461ae08745Sheppo 36471ae08745Sheppo /* port already exists? */ 36481ae08745Sheppo READ_ENTER(&plist->lockrw); 36491ae08745Sheppo for (port = plist->head; port != NULL; port = port->p_next) { 36501ae08745Sheppo if (port->p_instance == p_instance) { 36511ae08745Sheppo DWARN(vswp, "%s: port instance %d already attached", 36521ae08745Sheppo __func__, p_instance); 36531ae08745Sheppo RW_EXIT(&plist->lockrw); 36541ae08745Sheppo return (1); 36551ae08745Sheppo } 36561ae08745Sheppo } 36571ae08745Sheppo RW_EXIT(&plist->lockrw); 36581ae08745Sheppo 36591ae08745Sheppo port = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 36601ae08745Sheppo port->p_vswp = vswp; 36611ae08745Sheppo port->p_instance = p_instance; 36621ae08745Sheppo port->p_ldclist.num_ldcs = 0; 36631ae08745Sheppo port->p_ldclist.head = NULL; 3664e1ebb9ecSlm66018 port->addr_set = VSW_ADDR_UNSET; 36651ae08745Sheppo 36661ae08745Sheppo rw_init(&port->p_ldclist.lockrw, NULL, RW_DRIVER, NULL); 36671ae08745Sheppo 36681ae08745Sheppo mutex_init(&port->tx_lock, NULL, MUTEX_DRIVER, NULL); 36691ae08745Sheppo mutex_init(&port->mca_lock, NULL, MUTEX_DRIVER, NULL); 36701ae08745Sheppo 36711ae08745Sheppo mutex_init(&port->ref_lock, NULL, MUTEX_DRIVER, NULL); 36721ae08745Sheppo cv_init(&port->ref_cv, NULL, CV_DRIVER, NULL); 36731ae08745Sheppo 36741ae08745Sheppo mutex_init(&port->state_lock, NULL, MUTEX_DRIVER, NULL); 36751ae08745Sheppo cv_init(&port->state_cv, NULL, CV_DRIVER, NULL); 36761ae08745Sheppo port->state = VSW_PORT_INIT; 36771ae08745Sheppo 36781ae08745Sheppo if (nids > VSW_PORT_MAX_LDCS) { 3679*19b65a69Ssb155480 D2(vswp, "%s: using first of %d ldc ids", 3680*19b65a69Ssb155480 __func__, nids); 36811ae08745Sheppo nids = VSW_PORT_MAX_LDCS; 36821ae08745Sheppo } 36831ae08745Sheppo 36841ae08745Sheppo D2(vswp, "%s: %d nids", __func__, nids); 36851ae08745Sheppo for (i = 0; i < nids; i++) { 36861ae08745Sheppo D2(vswp, "%s: ldcid (%llx)", __func__, (uint64_t)ldcids[i]); 36871ae08745Sheppo if (vsw_ldc_attach(port, (uint64_t)ldcids[i]) != 0) { 36881ae08745Sheppo DERR(vswp, "%s: ldc_attach failed", __func__); 36891ae08745Sheppo 36901ae08745Sheppo rw_destroy(&port->p_ldclist.lockrw); 36911ae08745Sheppo 36921ae08745Sheppo cv_destroy(&port->ref_cv); 36931ae08745Sheppo mutex_destroy(&port->ref_lock); 36941ae08745Sheppo 36951ae08745Sheppo cv_destroy(&port->state_cv); 36961ae08745Sheppo mutex_destroy(&port->state_lock); 36971ae08745Sheppo 36981ae08745Sheppo mutex_destroy(&port->tx_lock); 36991ae08745Sheppo mutex_destroy(&port->mca_lock); 37001ae08745Sheppo kmem_free(port, sizeof (vsw_port_t)); 37011ae08745Sheppo return (1); 37021ae08745Sheppo } 37031ae08745Sheppo } 37041ae08745Sheppo 37051ae08745Sheppo ether_copy(macaddr, &port->p_macaddr); 37061ae08745Sheppo 3707*19b65a69Ssb155480 if (vswp->switching_setup_done == B_TRUE) { 3708*19b65a69Ssb155480 /* 3709*19b65a69Ssb155480 * If the underlying physical device has been setup, 3710*19b65a69Ssb155480 * program the mac address of this port in it. 3711*19b65a69Ssb155480 * Otherwise, port macaddr will be set after the physical 3712*19b65a69Ssb155480 * device is successfully setup by the timeout handler. 3713*19b65a69Ssb155480 */ 3714*19b65a69Ssb155480 mutex_enter(&vswp->hw_lock); 3715*19b65a69Ssb155480 (void) vsw_set_hw(vswp, port, VSW_VNETPORT); 3716*19b65a69Ssb155480 mutex_exit(&vswp->hw_lock); 3717*19b65a69Ssb155480 } 3718*19b65a69Ssb155480 37191ae08745Sheppo WRITE_ENTER(&plist->lockrw); 37201ae08745Sheppo 37211ae08745Sheppo /* create the fdb entry for this port/mac address */ 37221ae08745Sheppo (void) vsw_add_fdb(vswp, port); 37231ae08745Sheppo 37241ae08745Sheppo /* link it into the list of ports for this vsw instance */ 37251ae08745Sheppo prev_port = (vsw_port_t **)(&plist->head); 37261ae08745Sheppo port->p_next = *prev_port; 37271ae08745Sheppo *prev_port = port; 37281ae08745Sheppo plist->num_ports++; 3729*19b65a69Ssb155480 37301ae08745Sheppo RW_EXIT(&plist->lockrw); 37311ae08745Sheppo 37321ae08745Sheppo /* 37331ae08745Sheppo * Initialise the port and any ldc's under it. 37341ae08745Sheppo */ 37351ae08745Sheppo (void) vsw_init_ldcs(port); 37361ae08745Sheppo 37371ae08745Sheppo D1(vswp, "%s: exit", __func__); 37381ae08745Sheppo return (0); 37391ae08745Sheppo } 37401ae08745Sheppo 37411ae08745Sheppo /* 37421ae08745Sheppo * Detach the specified port. 37431ae08745Sheppo * 37441ae08745Sheppo * Returns 0 on success, 1 on failure. 37451ae08745Sheppo */ 37461ae08745Sheppo static int 37471ae08745Sheppo vsw_port_detach(vsw_t *vswp, int p_instance) 37481ae08745Sheppo { 37491ae08745Sheppo vsw_port_t *port = NULL; 37501ae08745Sheppo vsw_port_list_t *plist = &vswp->plist; 37511ae08745Sheppo 37521ae08745Sheppo D1(vswp, "%s: enter: port id %d", __func__, p_instance); 37531ae08745Sheppo 37541ae08745Sheppo WRITE_ENTER(&plist->lockrw); 37551ae08745Sheppo 37561ae08745Sheppo if ((port = vsw_lookup_port(vswp, p_instance)) == NULL) { 37571ae08745Sheppo RW_EXIT(&plist->lockrw); 37581ae08745Sheppo return (1); 37591ae08745Sheppo } 37601ae08745Sheppo 37611ae08745Sheppo if (vsw_plist_del_node(vswp, port)) { 37621ae08745Sheppo RW_EXIT(&plist->lockrw); 37631ae08745Sheppo return (1); 37641ae08745Sheppo } 37651ae08745Sheppo 37661ae08745Sheppo /* Remove the fdb entry for this port/mac address */ 37671ae08745Sheppo (void) vsw_del_fdb(vswp, port); 37681ae08745Sheppo 37691ae08745Sheppo /* Remove any multicast addresses.. */ 37701ae08745Sheppo vsw_del_mcst_port(port); 37711ae08745Sheppo 37721ae08745Sheppo /* 3773e1ebb9ecSlm66018 * No longer need to hold writer lock on port list now 3774e1ebb9ecSlm66018 * that we have unlinked the target port from the list. 37751ae08745Sheppo */ 37761ae08745Sheppo RW_EXIT(&plist->lockrw); 37771ae08745Sheppo 37785f94e909Ssg70180 /* Remove address if was programmed into HW. */ 37795f94e909Ssg70180 mutex_enter(&vswp->hw_lock); 3780*19b65a69Ssb155480 3781*19b65a69Ssb155480 /* 3782*19b65a69Ssb155480 * Port's address may not have been set in hardware. This could 3783*19b65a69Ssb155480 * happen if the underlying physical device is not yet available and 3784*19b65a69Ssb155480 * vsw_setup_switching_timeout() may be in progress. 3785*19b65a69Ssb155480 * We remove its addr from hardware only if it has been set before. 3786*19b65a69Ssb155480 */ 3787*19b65a69Ssb155480 if (port->addr_set != VSW_ADDR_UNSET) 37885f94e909Ssg70180 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 3789*19b65a69Ssb155480 3790e1ebb9ecSlm66018 if (vswp->recfg_reqd) 37915f94e909Ssg70180 vsw_reconfig_hw(vswp); 3792*19b65a69Ssb155480 37935f94e909Ssg70180 mutex_exit(&vswp->hw_lock); 3794e1ebb9ecSlm66018 37951ae08745Sheppo if (vsw_port_delete(port)) { 37961ae08745Sheppo return (1); 37971ae08745Sheppo } 37981ae08745Sheppo 37991ae08745Sheppo D1(vswp, "%s: exit: p_instance(%d)", __func__, p_instance); 38001ae08745Sheppo return (0); 38011ae08745Sheppo } 38021ae08745Sheppo 38031ae08745Sheppo /* 38041ae08745Sheppo * Detach all active ports. 38051ae08745Sheppo * 38061ae08745Sheppo * Returns 0 on success, 1 on failure. 38071ae08745Sheppo */ 38081ae08745Sheppo static int 38091ae08745Sheppo vsw_detach_ports(vsw_t *vswp) 38101ae08745Sheppo { 38111ae08745Sheppo vsw_port_list_t *plist = &vswp->plist; 38121ae08745Sheppo vsw_port_t *port = NULL; 38131ae08745Sheppo 38141ae08745Sheppo D1(vswp, "%s: enter", __func__); 38151ae08745Sheppo 38161ae08745Sheppo WRITE_ENTER(&plist->lockrw); 38171ae08745Sheppo 38181ae08745Sheppo while ((port = plist->head) != NULL) { 38191ae08745Sheppo if (vsw_plist_del_node(vswp, port)) { 38201ae08745Sheppo DERR(vswp, "%s: Error deleting port %d" 3821205eeb1aSlm66018 " from port list", __func__, port->p_instance); 38221ae08745Sheppo RW_EXIT(&plist->lockrw); 38231ae08745Sheppo return (1); 38241ae08745Sheppo } 38251ae08745Sheppo 3826e1ebb9ecSlm66018 /* Remove address if was programmed into HW. */ 38275f94e909Ssg70180 mutex_enter(&vswp->hw_lock); 38285f94e909Ssg70180 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 38295f94e909Ssg70180 mutex_exit(&vswp->hw_lock); 3830e1ebb9ecSlm66018 38311ae08745Sheppo /* Remove the fdb entry for this port/mac address */ 38321ae08745Sheppo (void) vsw_del_fdb(vswp, port); 38331ae08745Sheppo 38341ae08745Sheppo /* Remove any multicast addresses.. */ 38351ae08745Sheppo vsw_del_mcst_port(port); 38361ae08745Sheppo 38371ae08745Sheppo /* 38381ae08745Sheppo * No longer need to hold the lock on the port list 38391ae08745Sheppo * now that we have unlinked the target port from the 38401ae08745Sheppo * list. 38411ae08745Sheppo */ 38421ae08745Sheppo RW_EXIT(&plist->lockrw); 38431ae08745Sheppo if (vsw_port_delete(port)) { 38441ae08745Sheppo DERR(vswp, "%s: Error deleting port %d", 38451ae08745Sheppo __func__, port->p_instance); 38461ae08745Sheppo return (1); 38471ae08745Sheppo } 38481ae08745Sheppo WRITE_ENTER(&plist->lockrw); 38491ae08745Sheppo } 38501ae08745Sheppo RW_EXIT(&plist->lockrw); 38511ae08745Sheppo 38521ae08745Sheppo D1(vswp, "%s: exit", __func__); 38531ae08745Sheppo 38541ae08745Sheppo return (0); 38551ae08745Sheppo } 38561ae08745Sheppo 38571ae08745Sheppo /* 38581ae08745Sheppo * Delete the specified port. 38591ae08745Sheppo * 38601ae08745Sheppo * Returns 0 on success, 1 on failure. 38611ae08745Sheppo */ 38621ae08745Sheppo static int 38631ae08745Sheppo vsw_port_delete(vsw_port_t *port) 38641ae08745Sheppo { 38651ae08745Sheppo vsw_ldc_list_t *ldcl; 38661ae08745Sheppo vsw_t *vswp = port->p_vswp; 38671ae08745Sheppo 38681ae08745Sheppo D1(vswp, "%s: enter : port id %d", __func__, port->p_instance); 38691ae08745Sheppo 38701ae08745Sheppo (void) vsw_uninit_ldcs(port); 38711ae08745Sheppo 38721ae08745Sheppo /* 38731ae08745Sheppo * Wait for any pending ctrl msg tasks which reference this 38741ae08745Sheppo * port to finish. 38751ae08745Sheppo */ 38761ae08745Sheppo if (vsw_drain_port_taskq(port)) 38771ae08745Sheppo return (1); 38781ae08745Sheppo 38791ae08745Sheppo /* 38801ae08745Sheppo * Wait for port reference count to hit zero. 38811ae08745Sheppo */ 38821ae08745Sheppo mutex_enter(&port->ref_lock); 38831ae08745Sheppo while (port->ref_cnt != 0) 38841ae08745Sheppo cv_wait(&port->ref_cv, &port->ref_lock); 38851ae08745Sheppo mutex_exit(&port->ref_lock); 38861ae08745Sheppo 38871ae08745Sheppo /* 38881ae08745Sheppo * Wait for any active callbacks to finish 38891ae08745Sheppo */ 38901ae08745Sheppo if (vsw_drain_ldcs(port)) 38911ae08745Sheppo return (1); 38921ae08745Sheppo 38931ae08745Sheppo ldcl = &port->p_ldclist; 38941ae08745Sheppo WRITE_ENTER(&ldcl->lockrw); 38951ae08745Sheppo while (ldcl->num_ldcs > 0) { 3896205eeb1aSlm66018 if (vsw_ldc_detach(port, ldcl->head->ldc_id) != 0) { 389734683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: unable to detach ldc %ld", 389834683adeSsg70180 vswp->instance, ldcl->head->ldc_id); 38991ae08745Sheppo RW_EXIT(&ldcl->lockrw); 39001ae08745Sheppo return (1); 39011ae08745Sheppo } 39021ae08745Sheppo } 39031ae08745Sheppo RW_EXIT(&ldcl->lockrw); 39041ae08745Sheppo 39051ae08745Sheppo rw_destroy(&port->p_ldclist.lockrw); 39061ae08745Sheppo 39071ae08745Sheppo mutex_destroy(&port->mca_lock); 39081ae08745Sheppo mutex_destroy(&port->tx_lock); 39091ae08745Sheppo cv_destroy(&port->ref_cv); 39101ae08745Sheppo mutex_destroy(&port->ref_lock); 39111ae08745Sheppo 39121ae08745Sheppo cv_destroy(&port->state_cv); 39131ae08745Sheppo mutex_destroy(&port->state_lock); 39141ae08745Sheppo 39151ae08745Sheppo kmem_free(port, sizeof (vsw_port_t)); 39161ae08745Sheppo 39171ae08745Sheppo D1(vswp, "%s: exit", __func__); 39181ae08745Sheppo 39191ae08745Sheppo return (0); 39201ae08745Sheppo } 39211ae08745Sheppo 39221ae08745Sheppo /* 39231ae08745Sheppo * Attach a logical domain channel (ldc) under a specified port. 39241ae08745Sheppo * 39251ae08745Sheppo * Returns 0 on success, 1 on failure. 39261ae08745Sheppo */ 39271ae08745Sheppo static int 39281ae08745Sheppo vsw_ldc_attach(vsw_port_t *port, uint64_t ldc_id) 39291ae08745Sheppo { 39301ae08745Sheppo vsw_t *vswp = port->p_vswp; 39311ae08745Sheppo vsw_ldc_list_t *ldcl = &port->p_ldclist; 39321ae08745Sheppo vsw_ldc_t *ldcp = NULL; 39331ae08745Sheppo ldc_attr_t attr; 39341ae08745Sheppo ldc_status_t istatus; 39351ae08745Sheppo int status = DDI_FAILURE; 3936d10e4ef2Snarayan int rv; 39373af08d82Slm66018 enum { PROG_init = 0x0, PROG_mblks = 0x1, 39383af08d82Slm66018 PROG_callback = 0x2} 39393af08d82Slm66018 progress; 39403af08d82Slm66018 39413af08d82Slm66018 progress = PROG_init; 39421ae08745Sheppo 39431ae08745Sheppo D1(vswp, "%s: enter", __func__); 39441ae08745Sheppo 39451ae08745Sheppo ldcp = kmem_zalloc(sizeof (vsw_ldc_t), KM_NOSLEEP); 39461ae08745Sheppo if (ldcp == NULL) { 39471ae08745Sheppo DERR(vswp, "%s: kmem_zalloc failed", __func__); 39481ae08745Sheppo return (1); 39491ae08745Sheppo } 39501ae08745Sheppo ldcp->ldc_id = ldc_id; 39511ae08745Sheppo 3952d10e4ef2Snarayan /* allocate pool of receive mblks */ 3953d10e4ef2Snarayan rv = vio_create_mblks(vsw_num_mblks, vsw_mblk_size, &(ldcp->rxh)); 3954d10e4ef2Snarayan if (rv) { 3955d10e4ef2Snarayan DWARN(vswp, "%s: unable to create free mblk pool for" 3956d10e4ef2Snarayan " channel %ld (rv %d)", __func__, ldc_id, rv); 3957d10e4ef2Snarayan kmem_free(ldcp, sizeof (vsw_ldc_t)); 3958d10e4ef2Snarayan return (1); 3959d10e4ef2Snarayan } 3960d10e4ef2Snarayan 39613af08d82Slm66018 progress |= PROG_mblks; 39623af08d82Slm66018 39631ae08745Sheppo mutex_init(&ldcp->ldc_txlock, NULL, MUTEX_DRIVER, NULL); 39641ae08745Sheppo mutex_init(&ldcp->ldc_cblock, NULL, MUTEX_DRIVER, NULL); 39651ae08745Sheppo mutex_init(&ldcp->drain_cv_lock, NULL, MUTEX_DRIVER, NULL); 39661ae08745Sheppo cv_init(&ldcp->drain_cv, NULL, CV_DRIVER, NULL); 3967445b4c2eSsb155480 rw_init(&ldcp->lane_in.dlistrw, NULL, RW_DRIVER, NULL); 3968445b4c2eSsb155480 rw_init(&ldcp->lane_out.dlistrw, NULL, RW_DRIVER, NULL); 39691ae08745Sheppo 39701ae08745Sheppo /* required for handshake with peer */ 39711ae08745Sheppo ldcp->local_session = (uint64_t)ddi_get_lbolt(); 39721ae08745Sheppo ldcp->peer_session = 0; 39731ae08745Sheppo ldcp->session_status = 0; 39741ae08745Sheppo 39751ae08745Sheppo mutex_init(&ldcp->hss_lock, NULL, MUTEX_DRIVER, NULL); 39761ae08745Sheppo ldcp->hss_id = 1; /* Initial handshake session id */ 39771ae08745Sheppo 39781ae08745Sheppo /* only set for outbound lane, inbound set by peer */ 3979d10e4ef2Snarayan mutex_init(&ldcp->lane_in.seq_lock, NULL, MUTEX_DRIVER, NULL); 3980d10e4ef2Snarayan mutex_init(&ldcp->lane_out.seq_lock, NULL, MUTEX_DRIVER, NULL); 39811ae08745Sheppo vsw_set_lane_attr(vswp, &ldcp->lane_out); 39821ae08745Sheppo 39831ae08745Sheppo attr.devclass = LDC_DEV_NT_SVC; 39841ae08745Sheppo attr.instance = ddi_get_instance(vswp->dip); 39851ae08745Sheppo attr.mode = LDC_MODE_UNRELIABLE; 3986e1ebb9ecSlm66018 attr.mtu = VSW_LDC_MTU; 39871ae08745Sheppo status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); 39881ae08745Sheppo if (status != 0) { 39891ae08745Sheppo DERR(vswp, "%s(%lld): ldc_init failed, rv (%d)", 39901ae08745Sheppo __func__, ldc_id, status); 3991d10e4ef2Snarayan goto ldc_attach_fail; 39921ae08745Sheppo } 39931ae08745Sheppo 39941ae08745Sheppo status = ldc_reg_callback(ldcp->ldc_handle, vsw_ldc_cb, (caddr_t)ldcp); 39951ae08745Sheppo if (status != 0) { 39961ae08745Sheppo DERR(vswp, "%s(%lld): ldc_reg_callback failed, rv (%d)", 39971ae08745Sheppo __func__, ldc_id, status); 39981ae08745Sheppo (void) ldc_fini(ldcp->ldc_handle); 3999d10e4ef2Snarayan goto ldc_attach_fail; 40001ae08745Sheppo } 40011ae08745Sheppo 40023af08d82Slm66018 progress |= PROG_callback; 40033af08d82Slm66018 40043af08d82Slm66018 mutex_init(&ldcp->status_lock, NULL, MUTEX_DRIVER, NULL); 40051ae08745Sheppo 40061ae08745Sheppo if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 40071ae08745Sheppo DERR(vswp, "%s: ldc_status failed", __func__); 40083af08d82Slm66018 mutex_destroy(&ldcp->status_lock); 40093af08d82Slm66018 goto ldc_attach_fail; 40101ae08745Sheppo } 40111ae08745Sheppo 40121ae08745Sheppo ldcp->ldc_status = istatus; 40131ae08745Sheppo ldcp->ldc_port = port; 40141ae08745Sheppo ldcp->ldc_vswp = vswp; 40151ae08745Sheppo 40161ae08745Sheppo /* link it into the list of channels for this port */ 40171ae08745Sheppo WRITE_ENTER(&ldcl->lockrw); 40181ae08745Sheppo ldcp->ldc_next = ldcl->head; 40191ae08745Sheppo ldcl->head = ldcp; 40201ae08745Sheppo ldcl->num_ldcs++; 40211ae08745Sheppo RW_EXIT(&ldcl->lockrw); 40221ae08745Sheppo 40231ae08745Sheppo D1(vswp, "%s: exit", __func__); 40241ae08745Sheppo return (0); 4025d10e4ef2Snarayan 4026d10e4ef2Snarayan ldc_attach_fail: 4027d10e4ef2Snarayan mutex_destroy(&ldcp->ldc_txlock); 4028d10e4ef2Snarayan mutex_destroy(&ldcp->ldc_cblock); 4029d10e4ef2Snarayan 4030d10e4ef2Snarayan cv_destroy(&ldcp->drain_cv); 4031d10e4ef2Snarayan 4032445b4c2eSsb155480 rw_destroy(&ldcp->lane_in.dlistrw); 4033445b4c2eSsb155480 rw_destroy(&ldcp->lane_out.dlistrw); 4034445b4c2eSsb155480 40353af08d82Slm66018 if (progress & PROG_callback) { 40363af08d82Slm66018 (void) ldc_unreg_callback(ldcp->ldc_handle); 40373af08d82Slm66018 } 40383af08d82Slm66018 40393af08d82Slm66018 if ((progress & PROG_mblks) && (ldcp->rxh != NULL)) { 4040d10e4ef2Snarayan if (vio_destroy_mblks(ldcp->rxh) != 0) { 4041d10e4ef2Snarayan /* 4042d10e4ef2Snarayan * Something odd has happened, as the destroy 4043d10e4ef2Snarayan * will only fail if some mblks have been allocated 4044d10e4ef2Snarayan * from the pool already (which shouldn't happen) 4045d10e4ef2Snarayan * and have not been returned. 4046d10e4ef2Snarayan * 4047d10e4ef2Snarayan * Add the pool pointer to a list maintained in 4048d10e4ef2Snarayan * the device instance. Another attempt will be made 4049d10e4ef2Snarayan * to free the pool when the device itself detaches. 4050d10e4ef2Snarayan */ 405134683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: Creation of ldc channel %ld " 405234683adeSsg70180 "failed and cannot destroy associated mblk " 405334683adeSsg70180 "pool", vswp->instance, ldc_id); 4054d10e4ef2Snarayan ldcp->rxh->nextp = vswp->rxh; 4055d10e4ef2Snarayan vswp->rxh = ldcp->rxh; 4056d10e4ef2Snarayan } 4057d10e4ef2Snarayan } 4058d10e4ef2Snarayan mutex_destroy(&ldcp->drain_cv_lock); 4059d10e4ef2Snarayan mutex_destroy(&ldcp->hss_lock); 4060d10e4ef2Snarayan 4061d10e4ef2Snarayan mutex_destroy(&ldcp->lane_in.seq_lock); 4062d10e4ef2Snarayan mutex_destroy(&ldcp->lane_out.seq_lock); 4063d10e4ef2Snarayan kmem_free(ldcp, sizeof (vsw_ldc_t)); 4064d10e4ef2Snarayan 4065d10e4ef2Snarayan return (1); 40661ae08745Sheppo } 40671ae08745Sheppo 40681ae08745Sheppo /* 40691ae08745Sheppo * Detach a logical domain channel (ldc) belonging to a 40701ae08745Sheppo * particular port. 40711ae08745Sheppo * 40721ae08745Sheppo * Returns 0 on success, 1 on failure. 40731ae08745Sheppo */ 40741ae08745Sheppo static int 40751ae08745Sheppo vsw_ldc_detach(vsw_port_t *port, uint64_t ldc_id) 40761ae08745Sheppo { 40771ae08745Sheppo vsw_t *vswp = port->p_vswp; 40781ae08745Sheppo vsw_ldc_t *ldcp, *prev_ldcp; 40791ae08745Sheppo vsw_ldc_list_t *ldcl = &port->p_ldclist; 40801ae08745Sheppo int rv; 40811ae08745Sheppo 40821ae08745Sheppo prev_ldcp = ldcl->head; 40831ae08745Sheppo for (; (ldcp = prev_ldcp) != NULL; prev_ldcp = ldcp->ldc_next) { 40841ae08745Sheppo if (ldcp->ldc_id == ldc_id) { 40851ae08745Sheppo break; 40861ae08745Sheppo } 40871ae08745Sheppo } 40881ae08745Sheppo 40891ae08745Sheppo /* specified ldc id not found */ 40901ae08745Sheppo if (ldcp == NULL) { 40911ae08745Sheppo DERR(vswp, "%s: ldcp = NULL", __func__); 40921ae08745Sheppo return (1); 40931ae08745Sheppo } 40941ae08745Sheppo 40951ae08745Sheppo D2(vswp, "%s: detaching channel %lld", __func__, ldcp->ldc_id); 40961ae08745Sheppo 40971ae08745Sheppo /* 40981ae08745Sheppo * Before we can close the channel we must release any mapped 40991ae08745Sheppo * resources (e.g. drings). 41001ae08745Sheppo */ 41011ae08745Sheppo vsw_free_lane_resources(ldcp, INBOUND); 41021ae08745Sheppo vsw_free_lane_resources(ldcp, OUTBOUND); 41031ae08745Sheppo 41041ae08745Sheppo /* 41051ae08745Sheppo * If the close fails we are in serious trouble, as won't 41061ae08745Sheppo * be able to delete the parent port. 41071ae08745Sheppo */ 41081ae08745Sheppo if ((rv = ldc_close(ldcp->ldc_handle)) != 0) { 41091ae08745Sheppo DERR(vswp, "%s: error %d closing channel %lld", 41101ae08745Sheppo __func__, rv, ldcp->ldc_id); 41111ae08745Sheppo return (1); 41121ae08745Sheppo } 41131ae08745Sheppo 41141ae08745Sheppo (void) ldc_fini(ldcp->ldc_handle); 41151ae08745Sheppo 41161ae08745Sheppo ldcp->ldc_status = LDC_INIT; 41171ae08745Sheppo ldcp->ldc_handle = NULL; 41181ae08745Sheppo ldcp->ldc_vswp = NULL; 4119d10e4ef2Snarayan 4120d10e4ef2Snarayan if (ldcp->rxh != NULL) { 4121d10e4ef2Snarayan if (vio_destroy_mblks(ldcp->rxh)) { 4122d10e4ef2Snarayan /* 4123d10e4ef2Snarayan * Mostly likely some mblks are still in use and 4124d10e4ef2Snarayan * have not been returned to the pool. Add the pool 4125d10e4ef2Snarayan * to the list maintained in the device instance. 4126d10e4ef2Snarayan * Another attempt will be made to destroy the pool 4127d10e4ef2Snarayan * when the device detaches. 4128d10e4ef2Snarayan */ 4129d10e4ef2Snarayan ldcp->rxh->nextp = vswp->rxh; 4130d10e4ef2Snarayan vswp->rxh = ldcp->rxh; 4131d10e4ef2Snarayan } 4132d10e4ef2Snarayan } 4133d10e4ef2Snarayan 41343af08d82Slm66018 /* unlink it from the list */ 41353af08d82Slm66018 prev_ldcp = ldcp->ldc_next; 41363af08d82Slm66018 ldcl->num_ldcs--; 41373af08d82Slm66018 41381ae08745Sheppo mutex_destroy(&ldcp->ldc_txlock); 41391ae08745Sheppo mutex_destroy(&ldcp->ldc_cblock); 41401ae08745Sheppo cv_destroy(&ldcp->drain_cv); 41411ae08745Sheppo mutex_destroy(&ldcp->drain_cv_lock); 41421ae08745Sheppo mutex_destroy(&ldcp->hss_lock); 4143d10e4ef2Snarayan mutex_destroy(&ldcp->lane_in.seq_lock); 4144d10e4ef2Snarayan mutex_destroy(&ldcp->lane_out.seq_lock); 41453af08d82Slm66018 mutex_destroy(&ldcp->status_lock); 4146445b4c2eSsb155480 rw_destroy(&ldcp->lane_in.dlistrw); 4147445b4c2eSsb155480 rw_destroy(&ldcp->lane_out.dlistrw); 41481ae08745Sheppo 41491ae08745Sheppo kmem_free(ldcp, sizeof (vsw_ldc_t)); 41501ae08745Sheppo 41511ae08745Sheppo return (0); 41521ae08745Sheppo } 41531ae08745Sheppo 41541ae08745Sheppo /* 41551ae08745Sheppo * Open and attempt to bring up the channel. Note that channel 41561ae08745Sheppo * can only be brought up if peer has also opened channel. 41571ae08745Sheppo * 41581ae08745Sheppo * Returns 0 if can open and bring up channel, otherwise 41591ae08745Sheppo * returns 1. 41601ae08745Sheppo */ 41611ae08745Sheppo static int 41621ae08745Sheppo vsw_ldc_init(vsw_ldc_t *ldcp) 41631ae08745Sheppo { 41641ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 41651ae08745Sheppo ldc_status_t istatus = 0; 41661ae08745Sheppo int rv; 41671ae08745Sheppo 41681ae08745Sheppo D1(vswp, "%s: enter", __func__); 41691ae08745Sheppo 41701ae08745Sheppo LDC_ENTER_LOCK(ldcp); 41711ae08745Sheppo 41721ae08745Sheppo /* don't start at 0 in case clients don't like that */ 41731ae08745Sheppo ldcp->next_ident = 1; 41741ae08745Sheppo 41751ae08745Sheppo rv = ldc_open(ldcp->ldc_handle); 41761ae08745Sheppo if (rv != 0) { 41771ae08745Sheppo DERR(vswp, "%s: ldc_open failed: id(%lld) rv(%d)", 41781ae08745Sheppo __func__, ldcp->ldc_id, rv); 41791ae08745Sheppo LDC_EXIT_LOCK(ldcp); 41801ae08745Sheppo return (1); 41811ae08745Sheppo } 41821ae08745Sheppo 41831ae08745Sheppo if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { 41841ae08745Sheppo DERR(vswp, "%s: unable to get status", __func__); 41851ae08745Sheppo LDC_EXIT_LOCK(ldcp); 41861ae08745Sheppo return (1); 41871ae08745Sheppo 41881ae08745Sheppo } else if (istatus != LDC_OPEN && istatus != LDC_READY) { 41891ae08745Sheppo DERR(vswp, "%s: id (%lld) status(%d) is not OPEN/READY", 41901ae08745Sheppo __func__, ldcp->ldc_id, istatus); 41911ae08745Sheppo LDC_EXIT_LOCK(ldcp); 41921ae08745Sheppo return (1); 41931ae08745Sheppo } 41941ae08745Sheppo 41953af08d82Slm66018 mutex_enter(&ldcp->status_lock); 41961ae08745Sheppo ldcp->ldc_status = istatus; 41973af08d82Slm66018 mutex_exit(&ldcp->status_lock); 41983af08d82Slm66018 41991ae08745Sheppo rv = ldc_up(ldcp->ldc_handle); 42001ae08745Sheppo if (rv != 0) { 42011ae08745Sheppo /* 42021ae08745Sheppo * Not a fatal error for ldc_up() to fail, as peer 42031ae08745Sheppo * end point may simply not be ready yet. 42041ae08745Sheppo */ 42051ae08745Sheppo D2(vswp, "%s: ldc_up err id(%lld) rv(%d)", __func__, 42061ae08745Sheppo ldcp->ldc_id, rv); 42071ae08745Sheppo LDC_EXIT_LOCK(ldcp); 42081ae08745Sheppo return (1); 42091ae08745Sheppo } 42101ae08745Sheppo 42111ae08745Sheppo /* 42121ae08745Sheppo * ldc_up() call is non-blocking so need to explicitly 42131ae08745Sheppo * check channel status to see if in fact the channel 42141ae08745Sheppo * is UP. 42151ae08745Sheppo */ 42163af08d82Slm66018 mutex_enter(&ldcp->status_lock); 42173af08d82Slm66018 if (ldc_status(ldcp->ldc_handle, &ldcp->ldc_status) != 0) { 42181ae08745Sheppo DERR(vswp, "%s: unable to get status", __func__); 42193af08d82Slm66018 mutex_exit(&ldcp->status_lock); 42201ae08745Sheppo LDC_EXIT_LOCK(ldcp); 42211ae08745Sheppo return (1); 42221ae08745Sheppo 42231ae08745Sheppo } 4224b071742bSsg70180 4225b071742bSsg70180 if (ldcp->ldc_status == LDC_UP) { 4226b071742bSsg70180 D2(vswp, "%s: channel %ld now UP (%ld)", __func__, 4227b071742bSsg70180 ldcp->ldc_id, istatus); 42283af08d82Slm66018 mutex_exit(&ldcp->status_lock); 42291ae08745Sheppo LDC_EXIT_LOCK(ldcp); 42301ae08745Sheppo 4231b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 4232b071742bSsg70180 return (0); 42333af08d82Slm66018 } 42343af08d82Slm66018 4235b071742bSsg70180 mutex_exit(&ldcp->status_lock); 4236b071742bSsg70180 LDC_EXIT_LOCK(ldcp); 4237b071742bSsg70180 42381ae08745Sheppo D1(vswp, "%s: exit", __func__); 42391ae08745Sheppo return (0); 42401ae08745Sheppo } 42411ae08745Sheppo 42421ae08745Sheppo /* disable callbacks on the channel */ 42431ae08745Sheppo static int 42441ae08745Sheppo vsw_ldc_uninit(vsw_ldc_t *ldcp) 42451ae08745Sheppo { 42461ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 42471ae08745Sheppo int rv; 42481ae08745Sheppo 42491ae08745Sheppo D1(vswp, "vsw_ldc_uninit: enter: id(%lx)\n", ldcp->ldc_id); 42501ae08745Sheppo 42511ae08745Sheppo LDC_ENTER_LOCK(ldcp); 42521ae08745Sheppo 42531ae08745Sheppo rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); 42541ae08745Sheppo if (rv != 0) { 42551ae08745Sheppo DERR(vswp, "vsw_ldc_uninit(%lld): error disabling " 42561ae08745Sheppo "interrupts (rv = %d)\n", ldcp->ldc_id, rv); 42571ae08745Sheppo LDC_EXIT_LOCK(ldcp); 42581ae08745Sheppo return (1); 42591ae08745Sheppo } 42601ae08745Sheppo 42613af08d82Slm66018 mutex_enter(&ldcp->status_lock); 42621ae08745Sheppo ldcp->ldc_status = LDC_INIT; 42633af08d82Slm66018 mutex_exit(&ldcp->status_lock); 42641ae08745Sheppo 42651ae08745Sheppo LDC_EXIT_LOCK(ldcp); 42661ae08745Sheppo 42671ae08745Sheppo D1(vswp, "vsw_ldc_uninit: exit: id(%lx)", ldcp->ldc_id); 42681ae08745Sheppo 42691ae08745Sheppo return (0); 42701ae08745Sheppo } 42711ae08745Sheppo 42721ae08745Sheppo static int 42731ae08745Sheppo vsw_init_ldcs(vsw_port_t *port) 42741ae08745Sheppo { 42751ae08745Sheppo vsw_ldc_list_t *ldcl = &port->p_ldclist; 42761ae08745Sheppo vsw_ldc_t *ldcp; 42771ae08745Sheppo 42781ae08745Sheppo READ_ENTER(&ldcl->lockrw); 42791ae08745Sheppo ldcp = ldcl->head; 42801ae08745Sheppo for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 42811ae08745Sheppo (void) vsw_ldc_init(ldcp); 42821ae08745Sheppo } 42831ae08745Sheppo RW_EXIT(&ldcl->lockrw); 42841ae08745Sheppo 42851ae08745Sheppo return (0); 42861ae08745Sheppo } 42871ae08745Sheppo 42881ae08745Sheppo static int 42891ae08745Sheppo vsw_uninit_ldcs(vsw_port_t *port) 42901ae08745Sheppo { 42911ae08745Sheppo vsw_ldc_list_t *ldcl = &port->p_ldclist; 42921ae08745Sheppo vsw_ldc_t *ldcp; 42931ae08745Sheppo 42941ae08745Sheppo D1(NULL, "vsw_uninit_ldcs: enter\n"); 42951ae08745Sheppo 42961ae08745Sheppo READ_ENTER(&ldcl->lockrw); 42971ae08745Sheppo ldcp = ldcl->head; 42981ae08745Sheppo for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 42991ae08745Sheppo (void) vsw_ldc_uninit(ldcp); 43001ae08745Sheppo } 43011ae08745Sheppo RW_EXIT(&ldcl->lockrw); 43021ae08745Sheppo 43031ae08745Sheppo D1(NULL, "vsw_uninit_ldcs: exit\n"); 43041ae08745Sheppo 43051ae08745Sheppo return (0); 43061ae08745Sheppo } 43071ae08745Sheppo 43081ae08745Sheppo /* 43091ae08745Sheppo * Wait until the callback(s) associated with the ldcs under the specified 43101ae08745Sheppo * port have completed. 43111ae08745Sheppo * 43121ae08745Sheppo * Prior to this function being invoked each channel under this port 43131ae08745Sheppo * should have been quiesced via ldc_set_cb_mode(DISABLE). 43141ae08745Sheppo * 43151ae08745Sheppo * A short explaination of what we are doing below.. 43161ae08745Sheppo * 43171ae08745Sheppo * The simplest approach would be to have a reference counter in 43181ae08745Sheppo * the ldc structure which is increment/decremented by the callbacks as 43191ae08745Sheppo * they use the channel. The drain function could then simply disable any 43201ae08745Sheppo * further callbacks and do a cv_wait for the ref to hit zero. Unfortunately 43211ae08745Sheppo * there is a tiny window here - before the callback is able to get the lock 43221ae08745Sheppo * on the channel it is interrupted and this function gets to execute. It 43231ae08745Sheppo * sees that the ref count is zero and believes its free to delete the 43241ae08745Sheppo * associated data structures. 43251ae08745Sheppo * 43261ae08745Sheppo * We get around this by taking advantage of the fact that before the ldc 43271ae08745Sheppo * framework invokes a callback it sets a flag to indicate that there is a 43281ae08745Sheppo * callback active (or about to become active). If when we attempt to 43291ae08745Sheppo * unregister a callback when this active flag is set then the unregister 43301ae08745Sheppo * will fail with EWOULDBLOCK. 43311ae08745Sheppo * 43321ae08745Sheppo * If the unregister fails we do a cv_timedwait. We will either be signaled 43331ae08745Sheppo * by the callback as it is exiting (note we have to wait a short period to 43341ae08745Sheppo * allow the callback to return fully to the ldc framework and it to clear 43351ae08745Sheppo * the active flag), or by the timer expiring. In either case we again attempt 43361ae08745Sheppo * the unregister. We repeat this until we can succesfully unregister the 43371ae08745Sheppo * callback. 43381ae08745Sheppo * 43391ae08745Sheppo * The reason we use a cv_timedwait rather than a simple cv_wait is to catch 43401ae08745Sheppo * the case where the callback has finished but the ldc framework has not yet 43411ae08745Sheppo * cleared the active flag. In this case we would never get a cv_signal. 43421ae08745Sheppo */ 43431ae08745Sheppo static int 43441ae08745Sheppo vsw_drain_ldcs(vsw_port_t *port) 43451ae08745Sheppo { 43461ae08745Sheppo vsw_ldc_list_t *ldcl = &port->p_ldclist; 43471ae08745Sheppo vsw_ldc_t *ldcp; 43481ae08745Sheppo vsw_t *vswp = port->p_vswp; 43491ae08745Sheppo 43501ae08745Sheppo D1(vswp, "%s: enter", __func__); 43511ae08745Sheppo 43521ae08745Sheppo READ_ENTER(&ldcl->lockrw); 43531ae08745Sheppo 43541ae08745Sheppo ldcp = ldcl->head; 43551ae08745Sheppo 43561ae08745Sheppo for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 43571ae08745Sheppo /* 43581ae08745Sheppo * If we can unregister the channel callback then we 43591ae08745Sheppo * know that there is no callback either running or 43601ae08745Sheppo * scheduled to run for this channel so move on to next 43611ae08745Sheppo * channel in the list. 43621ae08745Sheppo */ 43631ae08745Sheppo mutex_enter(&ldcp->drain_cv_lock); 43641ae08745Sheppo 43651ae08745Sheppo /* prompt active callbacks to quit */ 43661ae08745Sheppo ldcp->drain_state = VSW_LDC_DRAINING; 43671ae08745Sheppo 43681ae08745Sheppo if ((ldc_unreg_callback(ldcp->ldc_handle)) == 0) { 43691ae08745Sheppo D2(vswp, "%s: unreg callback for chan %ld", __func__, 43701ae08745Sheppo ldcp->ldc_id); 43711ae08745Sheppo mutex_exit(&ldcp->drain_cv_lock); 43721ae08745Sheppo continue; 43731ae08745Sheppo } else { 43741ae08745Sheppo /* 43751ae08745Sheppo * If we end up here we know that either 1) a callback 43761ae08745Sheppo * is currently executing, 2) is about to start (i.e. 43771ae08745Sheppo * the ldc framework has set the active flag but 43781ae08745Sheppo * has not actually invoked the callback yet, or 3) 43791ae08745Sheppo * has finished and has returned to the ldc framework 43801ae08745Sheppo * but the ldc framework has not yet cleared the 43811ae08745Sheppo * active bit. 43821ae08745Sheppo * 43831ae08745Sheppo * Wait for it to finish. 43841ae08745Sheppo */ 43851ae08745Sheppo while (ldc_unreg_callback(ldcp->ldc_handle) 43861ae08745Sheppo == EWOULDBLOCK) 43871ae08745Sheppo (void) cv_timedwait(&ldcp->drain_cv, 43881ae08745Sheppo &ldcp->drain_cv_lock, lbolt + hz); 43891ae08745Sheppo 43901ae08745Sheppo mutex_exit(&ldcp->drain_cv_lock); 43911ae08745Sheppo D2(vswp, "%s: unreg callback for chan %ld after " 43921ae08745Sheppo "timeout", __func__, ldcp->ldc_id); 43931ae08745Sheppo } 43941ae08745Sheppo } 43951ae08745Sheppo RW_EXIT(&ldcl->lockrw); 43961ae08745Sheppo 43971ae08745Sheppo D1(vswp, "%s: exit", __func__); 43981ae08745Sheppo return (0); 43991ae08745Sheppo } 44001ae08745Sheppo 44011ae08745Sheppo /* 44021ae08745Sheppo * Wait until all tasks which reference this port have completed. 44031ae08745Sheppo * 44041ae08745Sheppo * Prior to this function being invoked each channel under this port 44051ae08745Sheppo * should have been quiesced via ldc_set_cb_mode(DISABLE). 44061ae08745Sheppo */ 44071ae08745Sheppo static int 44081ae08745Sheppo vsw_drain_port_taskq(vsw_port_t *port) 44091ae08745Sheppo { 44101ae08745Sheppo vsw_t *vswp = port->p_vswp; 44111ae08745Sheppo 44121ae08745Sheppo D1(vswp, "%s: enter", __func__); 44131ae08745Sheppo 44141ae08745Sheppo /* 44151ae08745Sheppo * Mark the port as in the process of being detached, and 44161ae08745Sheppo * dispatch a marker task to the queue so we know when all 44171ae08745Sheppo * relevant tasks have completed. 44181ae08745Sheppo */ 44191ae08745Sheppo mutex_enter(&port->state_lock); 44201ae08745Sheppo port->state = VSW_PORT_DETACHING; 44211ae08745Sheppo 44221ae08745Sheppo if ((vswp->taskq_p == NULL) || 44231ae08745Sheppo (ddi_taskq_dispatch(vswp->taskq_p, vsw_marker_task, 44241ae08745Sheppo port, DDI_NOSLEEP) != DDI_SUCCESS)) { 44251ae08745Sheppo DERR(vswp, "%s: unable to dispatch marker task", 44261ae08745Sheppo __func__); 44271ae08745Sheppo mutex_exit(&port->state_lock); 44281ae08745Sheppo return (1); 44291ae08745Sheppo } 44301ae08745Sheppo 44311ae08745Sheppo /* 44321ae08745Sheppo * Wait for the marker task to finish. 44331ae08745Sheppo */ 44341ae08745Sheppo while (port->state != VSW_PORT_DETACHABLE) 44351ae08745Sheppo cv_wait(&port->state_cv, &port->state_lock); 44361ae08745Sheppo 44371ae08745Sheppo mutex_exit(&port->state_lock); 44381ae08745Sheppo 44391ae08745Sheppo D1(vswp, "%s: exit", __func__); 44401ae08745Sheppo 44411ae08745Sheppo return (0); 44421ae08745Sheppo } 44431ae08745Sheppo 44441ae08745Sheppo static void 44451ae08745Sheppo vsw_marker_task(void *arg) 44461ae08745Sheppo { 44471ae08745Sheppo vsw_port_t *port = arg; 44481ae08745Sheppo vsw_t *vswp = port->p_vswp; 44491ae08745Sheppo 44501ae08745Sheppo D1(vswp, "%s: enter", __func__); 44511ae08745Sheppo 44521ae08745Sheppo mutex_enter(&port->state_lock); 44531ae08745Sheppo 44541ae08745Sheppo /* 44551ae08745Sheppo * No further tasks should be dispatched which reference 44561ae08745Sheppo * this port so ok to mark it as safe to detach. 44571ae08745Sheppo */ 44581ae08745Sheppo port->state = VSW_PORT_DETACHABLE; 44591ae08745Sheppo 44601ae08745Sheppo cv_signal(&port->state_cv); 44611ae08745Sheppo 44621ae08745Sheppo mutex_exit(&port->state_lock); 44631ae08745Sheppo 44641ae08745Sheppo D1(vswp, "%s: exit", __func__); 44651ae08745Sheppo } 44661ae08745Sheppo 44671ae08745Sheppo static vsw_port_t * 44681ae08745Sheppo vsw_lookup_port(vsw_t *vswp, int p_instance) 44691ae08745Sheppo { 44701ae08745Sheppo vsw_port_list_t *plist = &vswp->plist; 44711ae08745Sheppo vsw_port_t *port; 44721ae08745Sheppo 44731ae08745Sheppo for (port = plist->head; port != NULL; port = port->p_next) { 44741ae08745Sheppo if (port->p_instance == p_instance) { 44751ae08745Sheppo D2(vswp, "vsw_lookup_port: found p_instance\n"); 44761ae08745Sheppo return (port); 44771ae08745Sheppo } 44781ae08745Sheppo } 44791ae08745Sheppo 44801ae08745Sheppo return (NULL); 44811ae08745Sheppo } 44821ae08745Sheppo 44831ae08745Sheppo /* 44841ae08745Sheppo * Search for and remove the specified port from the port 44851ae08745Sheppo * list. Returns 0 if able to locate and remove port, otherwise 44861ae08745Sheppo * returns 1. 44871ae08745Sheppo */ 44881ae08745Sheppo static int 44891ae08745Sheppo vsw_plist_del_node(vsw_t *vswp, vsw_port_t *port) 44901ae08745Sheppo { 44911ae08745Sheppo vsw_port_list_t *plist = &vswp->plist; 44921ae08745Sheppo vsw_port_t *curr_p, *prev_p; 44931ae08745Sheppo 44941ae08745Sheppo if (plist->head == NULL) 44951ae08745Sheppo return (1); 44961ae08745Sheppo 44971ae08745Sheppo curr_p = prev_p = plist->head; 44981ae08745Sheppo 44991ae08745Sheppo while (curr_p != NULL) { 45001ae08745Sheppo if (curr_p == port) { 45011ae08745Sheppo if (prev_p == curr_p) { 45021ae08745Sheppo plist->head = curr_p->p_next; 45031ae08745Sheppo } else { 45041ae08745Sheppo prev_p->p_next = curr_p->p_next; 45051ae08745Sheppo } 45061ae08745Sheppo plist->num_ports--; 45071ae08745Sheppo break; 45081ae08745Sheppo } else { 45091ae08745Sheppo prev_p = curr_p; 45101ae08745Sheppo curr_p = curr_p->p_next; 45111ae08745Sheppo } 45121ae08745Sheppo } 45131ae08745Sheppo return (0); 45141ae08745Sheppo } 45151ae08745Sheppo 45161ae08745Sheppo /* 45171ae08745Sheppo * Interrupt handler for ldc messages. 45181ae08745Sheppo */ 45191ae08745Sheppo static uint_t 45201ae08745Sheppo vsw_ldc_cb(uint64_t event, caddr_t arg) 45211ae08745Sheppo { 45221ae08745Sheppo vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 45231ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 45241ae08745Sheppo 45251ae08745Sheppo D1(vswp, "%s: enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 45261ae08745Sheppo 45271ae08745Sheppo mutex_enter(&ldcp->ldc_cblock); 45281ae08745Sheppo 4529b071742bSsg70180 mutex_enter(&ldcp->status_lock); 45301ae08745Sheppo if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { 4531b071742bSsg70180 mutex_exit(&ldcp->status_lock); 45321ae08745Sheppo mutex_exit(&ldcp->ldc_cblock); 45331ae08745Sheppo return (LDC_SUCCESS); 45341ae08745Sheppo } 45353af08d82Slm66018 mutex_exit(&ldcp->status_lock); 45363af08d82Slm66018 45371ae08745Sheppo if (event & LDC_EVT_UP) { 45381ae08745Sheppo /* 4539b071742bSsg70180 * Channel has come up. 45401ae08745Sheppo */ 45411ae08745Sheppo D2(vswp, "%s: id(%ld) event(%llx) UP: status(%ld)", 4542b071742bSsg70180 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 4543b071742bSsg70180 4544b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 45451ae08745Sheppo 45461ae08745Sheppo ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 45471ae08745Sheppo } 45481ae08745Sheppo 45491ae08745Sheppo if (event & LDC_EVT_READ) { 45501ae08745Sheppo /* 45511ae08745Sheppo * Data available for reading. 45521ae08745Sheppo */ 45531ae08745Sheppo D2(vswp, "%s: id(ld) event(%llx) data READ", 45541ae08745Sheppo __func__, ldcp->ldc_id, event); 45551ae08745Sheppo 45561ae08745Sheppo vsw_process_pkt(ldcp); 45571ae08745Sheppo 45581ae08745Sheppo ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); 45591ae08745Sheppo 45601ae08745Sheppo goto vsw_cb_exit; 45611ae08745Sheppo } 45621ae08745Sheppo 45633af08d82Slm66018 if (event & (LDC_EVT_DOWN | LDC_EVT_RESET)) { 4564b071742bSsg70180 D2(vswp, "%s: id(%ld) event (%lx) DOWN/RESET: status(%ld)", 4565b071742bSsg70180 __func__, ldcp->ldc_id, event, ldcp->ldc_status); 45663af08d82Slm66018 4567b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 45681ae08745Sheppo } 45691ae08745Sheppo 45701ae08745Sheppo /* 45711ae08745Sheppo * Catch either LDC_EVT_WRITE which we don't support or any 45721ae08745Sheppo * unknown event. 45731ae08745Sheppo */ 4574205eeb1aSlm66018 if (event & 4575205eeb1aSlm66018 ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) { 45761ae08745Sheppo DERR(vswp, "%s: id(%ld) Unexpected event=(%llx) status(%ld)", 45771ae08745Sheppo __func__, ldcp->ldc_id, event, ldcp->ldc_status); 45781ae08745Sheppo } 45791ae08745Sheppo 45801ae08745Sheppo vsw_cb_exit: 45811ae08745Sheppo mutex_exit(&ldcp->ldc_cblock); 45821ae08745Sheppo 45831ae08745Sheppo /* 45841ae08745Sheppo * Let the drain function know we are finishing if it 45851ae08745Sheppo * is waiting. 45861ae08745Sheppo */ 45871ae08745Sheppo mutex_enter(&ldcp->drain_cv_lock); 45881ae08745Sheppo if (ldcp->drain_state == VSW_LDC_DRAINING) 45891ae08745Sheppo cv_signal(&ldcp->drain_cv); 45901ae08745Sheppo mutex_exit(&ldcp->drain_cv_lock); 45911ae08745Sheppo 45921ae08745Sheppo return (LDC_SUCCESS); 45931ae08745Sheppo } 45941ae08745Sheppo 45951ae08745Sheppo /* 4596b071742bSsg70180 * Reinitialise data structures associated with the channel. 45971ae08745Sheppo */ 45981ae08745Sheppo static void 4599b071742bSsg70180 vsw_ldc_reinit(vsw_ldc_t *ldcp) 46001ae08745Sheppo { 46011ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 46021ae08745Sheppo vsw_port_t *port; 46031ae08745Sheppo vsw_ldc_list_t *ldcl; 46041ae08745Sheppo 46053af08d82Slm66018 D1(vswp, "%s: enter", __func__); 46061ae08745Sheppo 46071ae08745Sheppo port = ldcp->ldc_port; 46081ae08745Sheppo ldcl = &port->p_ldclist; 46091ae08745Sheppo 46103af08d82Slm66018 READ_ENTER(&ldcl->lockrw); 46111ae08745Sheppo 46121ae08745Sheppo D2(vswp, "%s: in 0x%llx : out 0x%llx", __func__, 46131ae08745Sheppo ldcp->lane_in.lstate, ldcp->lane_out.lstate); 46141ae08745Sheppo 46151ae08745Sheppo vsw_free_lane_resources(ldcp, INBOUND); 46161ae08745Sheppo vsw_free_lane_resources(ldcp, OUTBOUND); 46171ae08745Sheppo RW_EXIT(&ldcl->lockrw); 46181ae08745Sheppo 46191ae08745Sheppo ldcp->lane_in.lstate = 0; 46201ae08745Sheppo ldcp->lane_out.lstate = 0; 46211ae08745Sheppo 46221ae08745Sheppo /* 46231ae08745Sheppo * Remove parent port from any multicast groups 46241ae08745Sheppo * it may have registered with. Client must resend 46251ae08745Sheppo * multicast add command after handshake completes. 46261ae08745Sheppo */ 46271ae08745Sheppo (void) vsw_del_fdb(vswp, port); 46281ae08745Sheppo 46291ae08745Sheppo vsw_del_mcst_port(port); 46301ae08745Sheppo 46311ae08745Sheppo ldcp->peer_session = 0; 46321ae08745Sheppo ldcp->session_status = 0; 46333af08d82Slm66018 ldcp->hcnt = 0; 46343af08d82Slm66018 ldcp->hphase = VSW_MILESTONE0; 46353af08d82Slm66018 46363af08d82Slm66018 D1(vswp, "%s: exit", __func__); 46373af08d82Slm66018 } 46383af08d82Slm66018 46393af08d82Slm66018 /* 4640b071742bSsg70180 * Process a connection event. 4641b071742bSsg70180 * 4642b071742bSsg70180 * Note - care must be taken to ensure that this function is 4643b071742bSsg70180 * not called with the dlistrw lock held. 46443af08d82Slm66018 */ 46453af08d82Slm66018 static void 4646b071742bSsg70180 vsw_process_conn_evt(vsw_ldc_t *ldcp, uint16_t evt) 46473af08d82Slm66018 { 46483af08d82Slm66018 vsw_t *vswp = ldcp->ldc_vswp; 4649b071742bSsg70180 vsw_conn_evt_t *conn = NULL; 46503af08d82Slm66018 4651b071742bSsg70180 D1(vswp, "%s: enter", __func__); 46521ae08745Sheppo 46531ae08745Sheppo /* 4654b071742bSsg70180 * Check if either a reset or restart event is pending 4655b071742bSsg70180 * or in progress. If so just return. 4656b071742bSsg70180 * 4657b071742bSsg70180 * A VSW_CONN_RESET event originates either with a LDC_RESET_EVT 4658b071742bSsg70180 * being received by the callback handler, or a ECONNRESET error 4659b071742bSsg70180 * code being returned from a ldc_read() or ldc_write() call. 4660b071742bSsg70180 * 4661b071742bSsg70180 * A VSW_CONN_RESTART event occurs when some error checking code 4662b071742bSsg70180 * decides that there is a problem with data from the channel, 4663b071742bSsg70180 * and that the handshake should be restarted. 4664b071742bSsg70180 */ 4665b071742bSsg70180 if (((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) && 4666b071742bSsg70180 (ldstub((uint8_t *)&ldcp->reset_active))) 4667b071742bSsg70180 return; 4668b071742bSsg70180 4669b071742bSsg70180 /* 4670b071742bSsg70180 * If it is an LDC_UP event we first check the recorded 4671b071742bSsg70180 * state of the channel. If this is UP then we know that 4672b071742bSsg70180 * the channel moving to the UP state has already been dealt 4673b071742bSsg70180 * with and don't need to dispatch a new task. 4674b071742bSsg70180 * 4675b071742bSsg70180 * The reason for this check is that when we do a ldc_up(), 4676b071742bSsg70180 * depending on the state of the peer, we may or may not get 4677b071742bSsg70180 * a LDC_UP event. As we can't depend on getting a LDC_UP evt 4678b071742bSsg70180 * every time we do ldc_up() we explicitly check the channel 4679b071742bSsg70180 * status to see has it come up (ldc_up() is asynch and will 4680b071742bSsg70180 * complete at some undefined time), and take the appropriate 4681b071742bSsg70180 * action. 4682b071742bSsg70180 * 4683b071742bSsg70180 * The flip side of this is that we may get a LDC_UP event 4684b071742bSsg70180 * when we have already seen that the channel is up and have 4685b071742bSsg70180 * dealt with that. 4686b071742bSsg70180 */ 4687b071742bSsg70180 mutex_enter(&ldcp->status_lock); 4688b071742bSsg70180 if (evt == VSW_CONN_UP) { 4689205eeb1aSlm66018 if ((ldcp->ldc_status == LDC_UP) || (ldcp->reset_active != 0)) { 4690b071742bSsg70180 mutex_exit(&ldcp->status_lock); 4691b071742bSsg70180 return; 4692b071742bSsg70180 } 4693b071742bSsg70180 } 4694b071742bSsg70180 mutex_exit(&ldcp->status_lock); 4695b071742bSsg70180 4696b071742bSsg70180 /* 4697b071742bSsg70180 * The transaction group id allows us to identify and discard 4698b071742bSsg70180 * any tasks which are still pending on the taskq and refer 4699b071742bSsg70180 * to the handshake session we are about to restart or reset. 4700b071742bSsg70180 * These stale messages no longer have any real meaning. 47011ae08745Sheppo */ 47021ae08745Sheppo mutex_enter(&ldcp->hss_lock); 47031ae08745Sheppo ldcp->hss_id++; 47041ae08745Sheppo mutex_exit(&ldcp->hss_lock); 47051ae08745Sheppo 4706b071742bSsg70180 ASSERT(vswp->taskq_p != NULL); 4707b071742bSsg70180 4708b071742bSsg70180 if ((conn = kmem_zalloc(sizeof (vsw_conn_evt_t), KM_NOSLEEP)) == NULL) { 4709b071742bSsg70180 cmn_err(CE_WARN, "!vsw%d: unable to allocate memory for" 4710b071742bSsg70180 " connection event", vswp->instance); 4711b071742bSsg70180 goto err_exit; 4712b071742bSsg70180 } 4713b071742bSsg70180 4714b071742bSsg70180 conn->evt = evt; 4715b071742bSsg70180 conn->ldcp = ldcp; 4716b071742bSsg70180 4717b071742bSsg70180 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_conn_task, conn, 4718b071742bSsg70180 DDI_NOSLEEP) != DDI_SUCCESS) { 4719b071742bSsg70180 cmn_err(CE_WARN, "!vsw%d: Can't dispatch connection task", 4720b071742bSsg70180 vswp->instance); 4721b071742bSsg70180 4722b071742bSsg70180 kmem_free(conn, sizeof (vsw_conn_evt_t)); 4723b071742bSsg70180 goto err_exit; 4724b071742bSsg70180 } 4725b071742bSsg70180 4726b071742bSsg70180 D1(vswp, "%s: exit", __func__); 4727b071742bSsg70180 return; 4728b071742bSsg70180 4729b071742bSsg70180 err_exit: 4730b071742bSsg70180 /* 4731b071742bSsg70180 * Have mostly likely failed due to memory shortage. Clear the flag so 4732b071742bSsg70180 * that future requests will at least be attempted and will hopefully 4733b071742bSsg70180 * succeed. 4734b071742bSsg70180 */ 4735b071742bSsg70180 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 4736b071742bSsg70180 ldcp->reset_active = 0; 4737b071742bSsg70180 } 4738b071742bSsg70180 4739b071742bSsg70180 /* 4740b071742bSsg70180 * Deal with events relating to a connection. Invoked from a taskq. 4741b071742bSsg70180 */ 4742b071742bSsg70180 static void 4743b071742bSsg70180 vsw_conn_task(void *arg) 4744b071742bSsg70180 { 4745b071742bSsg70180 vsw_conn_evt_t *conn = (vsw_conn_evt_t *)arg; 4746b071742bSsg70180 vsw_ldc_t *ldcp = NULL; 4747b071742bSsg70180 vsw_t *vswp = NULL; 4748b071742bSsg70180 uint16_t evt; 4749b071742bSsg70180 ldc_status_t curr_status; 4750b071742bSsg70180 4751b071742bSsg70180 ldcp = conn->ldcp; 4752b071742bSsg70180 evt = conn->evt; 4753b071742bSsg70180 vswp = ldcp->ldc_vswp; 4754b071742bSsg70180 4755b071742bSsg70180 D1(vswp, "%s: enter", __func__); 4756b071742bSsg70180 4757b071742bSsg70180 /* can safely free now have copied out data */ 4758b071742bSsg70180 kmem_free(conn, sizeof (vsw_conn_evt_t)); 4759b071742bSsg70180 4760b071742bSsg70180 mutex_enter(&ldcp->status_lock); 4761b071742bSsg70180 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 4762b071742bSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 4763b071742bSsg70180 "channel %ld", vswp->instance, ldcp->ldc_id); 4764b071742bSsg70180 mutex_exit(&ldcp->status_lock); 4765b071742bSsg70180 return; 4766b071742bSsg70180 } 4767b071742bSsg70180 4768b071742bSsg70180 /* 4769b071742bSsg70180 * If we wish to restart the handshake on this channel, then if 4770b071742bSsg70180 * the channel is UP we bring it DOWN to flush the underlying 4771b071742bSsg70180 * ldc queue. 4772b071742bSsg70180 */ 4773b071742bSsg70180 if ((evt == VSW_CONN_RESTART) && (curr_status == LDC_UP)) 4774b071742bSsg70180 (void) ldc_down(ldcp->ldc_handle); 4775b071742bSsg70180 4776b071742bSsg70180 /* 4777b071742bSsg70180 * re-init all the associated data structures. 4778b071742bSsg70180 */ 4779b071742bSsg70180 vsw_ldc_reinit(ldcp); 4780b071742bSsg70180 4781b071742bSsg70180 /* 4782b071742bSsg70180 * Bring the channel back up (note it does no harm to 4783b071742bSsg70180 * do this even if the channel is already UP, Just 4784b071742bSsg70180 * becomes effectively a no-op). 4785b071742bSsg70180 */ 4786b071742bSsg70180 (void) ldc_up(ldcp->ldc_handle); 4787b071742bSsg70180 4788b071742bSsg70180 /* 4789b071742bSsg70180 * Check if channel is now UP. This will only happen if 4790b071742bSsg70180 * peer has also done a ldc_up(). 4791b071742bSsg70180 */ 4792b071742bSsg70180 if (ldc_status(ldcp->ldc_handle, &curr_status) != 0) { 4793b071742bSsg70180 cmn_err(CE_WARN, "!vsw%d: Unable to read status of " 4794b071742bSsg70180 "channel %ld", vswp->instance, ldcp->ldc_id); 4795b071742bSsg70180 mutex_exit(&ldcp->status_lock); 4796b071742bSsg70180 return; 4797b071742bSsg70180 } 4798b071742bSsg70180 4799b071742bSsg70180 ldcp->ldc_status = curr_status; 4800b071742bSsg70180 4801b071742bSsg70180 /* channel UP so restart handshake by sending version info */ 4802b071742bSsg70180 if (curr_status == LDC_UP) { 48031ae08745Sheppo if (ldcp->hcnt++ > vsw_num_handshakes) { 480434683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: exceeded number of permitted" 48051ae08745Sheppo " handshake attempts (%d) on channel %ld", 480634683adeSsg70180 vswp->instance, ldcp->hcnt, ldcp->ldc_id); 48073af08d82Slm66018 mutex_exit(&ldcp->status_lock); 48083af08d82Slm66018 return; 48093af08d82Slm66018 } 4810b071742bSsg70180 4811b071742bSsg70180 if (ddi_taskq_dispatch(vswp->taskq_p, vsw_send_ver, ldcp, 4812b071742bSsg70180 DDI_NOSLEEP) != DDI_SUCCESS) { 4813b071742bSsg70180 cmn_err(CE_WARN, "!vsw%d: Can't dispatch version task", 4814b071742bSsg70180 vswp->instance); 48153af08d82Slm66018 48163af08d82Slm66018 /* 4817b071742bSsg70180 * Don't count as valid restart attempt if couldn't 4818b071742bSsg70180 * send version msg. 48193af08d82Slm66018 */ 4820b071742bSsg70180 if (ldcp->hcnt > 0) 4821b071742bSsg70180 ldcp->hcnt--; 4822b071742bSsg70180 } 48233af08d82Slm66018 } 48243af08d82Slm66018 48253af08d82Slm66018 /* 4826b071742bSsg70180 * Mark that the process is complete by clearing the flag. 4827b071742bSsg70180 * 4828b071742bSsg70180 * Note is it possible that the taskq dispatch above may have failed, 4829b071742bSsg70180 * most likely due to memory shortage. We still clear the flag so 4830b071742bSsg70180 * future attempts will at least be attempted and will hopefully 4831b071742bSsg70180 * succeed. 48323af08d82Slm66018 */ 4833b071742bSsg70180 if ((evt == VSW_CONN_RESET) || (evt == VSW_CONN_RESTART)) 4834445b4c2eSsb155480 ldcp->reset_active = 0; 4835b071742bSsg70180 4836b071742bSsg70180 mutex_exit(&ldcp->status_lock); 48373af08d82Slm66018 48383af08d82Slm66018 D1(vswp, "%s: exit", __func__); 48393af08d82Slm66018 } 48403af08d82Slm66018 48413af08d82Slm66018 /* 48421ae08745Sheppo * returns 0 if legal for event signified by flag to have 48431ae08745Sheppo * occured at the time it did. Otherwise returns 1. 48441ae08745Sheppo */ 48451ae08745Sheppo int 48461ae08745Sheppo vsw_check_flag(vsw_ldc_t *ldcp, int dir, uint64_t flag) 48471ae08745Sheppo { 48481ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 48491ae08745Sheppo uint64_t state; 48501ae08745Sheppo uint64_t phase; 48511ae08745Sheppo 48521ae08745Sheppo if (dir == INBOUND) 48531ae08745Sheppo state = ldcp->lane_in.lstate; 48541ae08745Sheppo else 48551ae08745Sheppo state = ldcp->lane_out.lstate; 48561ae08745Sheppo 48571ae08745Sheppo phase = ldcp->hphase; 48581ae08745Sheppo 48591ae08745Sheppo switch (flag) { 48601ae08745Sheppo case VSW_VER_INFO_RECV: 48611ae08745Sheppo if (phase > VSW_MILESTONE0) { 48621ae08745Sheppo DERR(vswp, "vsw_check_flag (%d): VER_INFO_RECV" 48631ae08745Sheppo " when in state %d\n", ldcp->ldc_id, phase); 4864b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 48651ae08745Sheppo return (1); 48661ae08745Sheppo } 48671ae08745Sheppo break; 48681ae08745Sheppo 48691ae08745Sheppo case VSW_VER_ACK_RECV: 48701ae08745Sheppo case VSW_VER_NACK_RECV: 48711ae08745Sheppo if (!(state & VSW_VER_INFO_SENT)) { 4872205eeb1aSlm66018 DERR(vswp, "vsw_check_flag (%d): spurious VER_ACK or " 4873205eeb1aSlm66018 "VER_NACK when in state %d\n", ldcp->ldc_id, phase); 4874b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 48751ae08745Sheppo return (1); 48761ae08745Sheppo } else 48771ae08745Sheppo state &= ~VSW_VER_INFO_SENT; 48781ae08745Sheppo break; 48791ae08745Sheppo 48801ae08745Sheppo case VSW_ATTR_INFO_RECV: 48811ae08745Sheppo if ((phase < VSW_MILESTONE1) || (phase >= VSW_MILESTONE2)) { 48821ae08745Sheppo DERR(vswp, "vsw_check_flag (%d): ATTR_INFO_RECV" 48831ae08745Sheppo " when in state %d\n", ldcp->ldc_id, phase); 4884b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 48851ae08745Sheppo return (1); 48861ae08745Sheppo } 48871ae08745Sheppo break; 48881ae08745Sheppo 48891ae08745Sheppo case VSW_ATTR_ACK_RECV: 48901ae08745Sheppo case VSW_ATTR_NACK_RECV: 48911ae08745Sheppo if (!(state & VSW_ATTR_INFO_SENT)) { 48921ae08745Sheppo DERR(vswp, "vsw_check_flag (%d): spurious ATTR_ACK" 48931ae08745Sheppo " or ATTR_NACK when in state %d\n", 48941ae08745Sheppo ldcp->ldc_id, phase); 4895b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 48961ae08745Sheppo return (1); 48971ae08745Sheppo } else 48981ae08745Sheppo state &= ~VSW_ATTR_INFO_SENT; 48991ae08745Sheppo break; 49001ae08745Sheppo 49011ae08745Sheppo case VSW_DRING_INFO_RECV: 49021ae08745Sheppo if (phase < VSW_MILESTONE1) { 49031ae08745Sheppo DERR(vswp, "vsw_check_flag (%d): DRING_INFO_RECV" 49041ae08745Sheppo " when in state %d\n", ldcp->ldc_id, phase); 4905b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 49061ae08745Sheppo return (1); 49071ae08745Sheppo } 49081ae08745Sheppo break; 49091ae08745Sheppo 49101ae08745Sheppo case VSW_DRING_ACK_RECV: 49111ae08745Sheppo case VSW_DRING_NACK_RECV: 49121ae08745Sheppo if (!(state & VSW_DRING_INFO_SENT)) { 49131ae08745Sheppo DERR(vswp, "vsw_check_flag (%d): spurious DRING_ACK " 49141ae08745Sheppo " or DRING_NACK when in state %d\n", 49151ae08745Sheppo ldcp->ldc_id, phase); 4916b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 49171ae08745Sheppo return (1); 49181ae08745Sheppo } else 49191ae08745Sheppo state &= ~VSW_DRING_INFO_SENT; 49201ae08745Sheppo break; 49211ae08745Sheppo 49221ae08745Sheppo case VSW_RDX_INFO_RECV: 49231ae08745Sheppo if (phase < VSW_MILESTONE3) { 49241ae08745Sheppo DERR(vswp, "vsw_check_flag (%d): RDX_INFO_RECV" 49251ae08745Sheppo " when in state %d\n", ldcp->ldc_id, phase); 4926b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 49271ae08745Sheppo return (1); 49281ae08745Sheppo } 49291ae08745Sheppo break; 49301ae08745Sheppo 49311ae08745Sheppo case VSW_RDX_ACK_RECV: 49321ae08745Sheppo case VSW_RDX_NACK_RECV: 49331ae08745Sheppo if (!(state & VSW_RDX_INFO_SENT)) { 4934205eeb1aSlm66018 DERR(vswp, "vsw_check_flag (%d): spurious RDX_ACK or " 4935205eeb1aSlm66018 "RDX_NACK when in state %d\n", ldcp->ldc_id, phase); 4936b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 49371ae08745Sheppo return (1); 49381ae08745Sheppo } else 49391ae08745Sheppo state &= ~VSW_RDX_INFO_SENT; 49401ae08745Sheppo break; 49411ae08745Sheppo 49421ae08745Sheppo case VSW_MCST_INFO_RECV: 49431ae08745Sheppo if (phase < VSW_MILESTONE3) { 49441ae08745Sheppo DERR(vswp, "vsw_check_flag (%d): VSW_MCST_INFO_RECV" 49451ae08745Sheppo " when in state %d\n", ldcp->ldc_id, phase); 4946b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 49471ae08745Sheppo return (1); 49481ae08745Sheppo } 49491ae08745Sheppo break; 49501ae08745Sheppo 49511ae08745Sheppo default: 49521ae08745Sheppo DERR(vswp, "vsw_check_flag (%lld): unknown flag (%llx)", 49531ae08745Sheppo ldcp->ldc_id, flag); 49541ae08745Sheppo return (1); 49551ae08745Sheppo } 49561ae08745Sheppo 49571ae08745Sheppo if (dir == INBOUND) 49581ae08745Sheppo ldcp->lane_in.lstate = state; 49591ae08745Sheppo else 49601ae08745Sheppo ldcp->lane_out.lstate = state; 49611ae08745Sheppo 49621ae08745Sheppo D1(vswp, "vsw_check_flag (chan %lld): exit", ldcp->ldc_id); 49631ae08745Sheppo 49641ae08745Sheppo return (0); 49651ae08745Sheppo } 49661ae08745Sheppo 49671ae08745Sheppo void 49681ae08745Sheppo vsw_next_milestone(vsw_ldc_t *ldcp) 49691ae08745Sheppo { 49701ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 49711ae08745Sheppo 49721ae08745Sheppo D1(vswp, "%s (chan %lld): enter (phase %ld)", __func__, 49731ae08745Sheppo ldcp->ldc_id, ldcp->hphase); 49741ae08745Sheppo 49751ae08745Sheppo DUMP_FLAGS(ldcp->lane_in.lstate); 49761ae08745Sheppo DUMP_FLAGS(ldcp->lane_out.lstate); 49771ae08745Sheppo 49781ae08745Sheppo switch (ldcp->hphase) { 49791ae08745Sheppo 49801ae08745Sheppo case VSW_MILESTONE0: 49811ae08745Sheppo /* 49821ae08745Sheppo * If we haven't started to handshake with our peer, 49831ae08745Sheppo * start to do so now. 49841ae08745Sheppo */ 49851ae08745Sheppo if (ldcp->lane_out.lstate == 0) { 49861ae08745Sheppo D2(vswp, "%s: (chan %lld) starting handshake " 49871ae08745Sheppo "with peer", __func__, ldcp->ldc_id); 4988b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_UP); 49891ae08745Sheppo } 49901ae08745Sheppo 49911ae08745Sheppo /* 49921ae08745Sheppo * Only way to pass this milestone is to have successfully 49931ae08745Sheppo * negotiated version info. 49941ae08745Sheppo */ 49951ae08745Sheppo if ((ldcp->lane_in.lstate & VSW_VER_ACK_SENT) && 49961ae08745Sheppo (ldcp->lane_out.lstate & VSW_VER_ACK_RECV)) { 49971ae08745Sheppo 49981ae08745Sheppo D2(vswp, "%s: (chan %lld) leaving milestone 0", 49991ae08745Sheppo __func__, ldcp->ldc_id); 50001ae08745Sheppo 50011ae08745Sheppo /* 50021ae08745Sheppo * Next milestone is passed when attribute 50031ae08745Sheppo * information has been successfully exchanged. 50041ae08745Sheppo */ 50051ae08745Sheppo ldcp->hphase = VSW_MILESTONE1; 50061ae08745Sheppo vsw_send_attr(ldcp); 50071ae08745Sheppo 50081ae08745Sheppo } 50091ae08745Sheppo break; 50101ae08745Sheppo 50111ae08745Sheppo case VSW_MILESTONE1: 50121ae08745Sheppo /* 50131ae08745Sheppo * Only way to pass this milestone is to have successfully 50141ae08745Sheppo * negotiated attribute information. 50151ae08745Sheppo */ 50161ae08745Sheppo if (ldcp->lane_in.lstate & VSW_ATTR_ACK_SENT) { 50171ae08745Sheppo 50181ae08745Sheppo ldcp->hphase = VSW_MILESTONE2; 50191ae08745Sheppo 50201ae08745Sheppo /* 50211ae08745Sheppo * If the peer device has said it wishes to 50221ae08745Sheppo * use descriptor rings then we send it our ring 50231ae08745Sheppo * info, otherwise we just set up a private ring 50241ae08745Sheppo * which we use an internal buffer 50251ae08745Sheppo */ 50261ae08745Sheppo if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) 50271ae08745Sheppo vsw_send_dring_info(ldcp); 50281ae08745Sheppo } 50291ae08745Sheppo break; 50301ae08745Sheppo 50311ae08745Sheppo case VSW_MILESTONE2: 50321ae08745Sheppo /* 50331ae08745Sheppo * If peer has indicated in its attribute message that 50341ae08745Sheppo * it wishes to use descriptor rings then the only way 50351ae08745Sheppo * to pass this milestone is for us to have received 50361ae08745Sheppo * valid dring info. 50371ae08745Sheppo * 50381ae08745Sheppo * If peer is not using descriptor rings then just fall 50391ae08745Sheppo * through. 50401ae08745Sheppo */ 50411ae08745Sheppo if ((ldcp->lane_in.xfer_mode == VIO_DRING_MODE) && 50421ae08745Sheppo (!(ldcp->lane_in.lstate & VSW_DRING_ACK_SENT))) 50431ae08745Sheppo break; 50441ae08745Sheppo 50451ae08745Sheppo D2(vswp, "%s: (chan %lld) leaving milestone 2", 50461ae08745Sheppo __func__, ldcp->ldc_id); 50471ae08745Sheppo 50481ae08745Sheppo ldcp->hphase = VSW_MILESTONE3; 50491ae08745Sheppo vsw_send_rdx(ldcp); 50501ae08745Sheppo break; 50511ae08745Sheppo 50521ae08745Sheppo case VSW_MILESTONE3: 50531ae08745Sheppo /* 50541ae08745Sheppo * Pass this milestone when all paramaters have been 50551ae08745Sheppo * successfully exchanged and RDX sent in both directions. 50561ae08745Sheppo * 50571ae08745Sheppo * Mark outbound lane as available to transmit data. 50581ae08745Sheppo */ 5059b071742bSsg70180 if ((ldcp->lane_out.lstate & VSW_RDX_ACK_SENT) && 5060b071742bSsg70180 (ldcp->lane_in.lstate & VSW_RDX_ACK_RECV)) { 50611ae08745Sheppo 50621ae08745Sheppo D2(vswp, "%s: (chan %lld) leaving milestone 3", 50631ae08745Sheppo __func__, ldcp->ldc_id); 50643af08d82Slm66018 D2(vswp, "%s: ** handshake complete (0x%llx : " 50653af08d82Slm66018 "0x%llx) **", __func__, ldcp->lane_in.lstate, 50663af08d82Slm66018 ldcp->lane_out.lstate); 50671ae08745Sheppo ldcp->lane_out.lstate |= VSW_LANE_ACTIVE; 50681ae08745Sheppo ldcp->hphase = VSW_MILESTONE4; 50691ae08745Sheppo ldcp->hcnt = 0; 50701ae08745Sheppo DISPLAY_STATE(); 50713af08d82Slm66018 } else { 5072205eeb1aSlm66018 D2(vswp, "%s: still in milestone 3 (0x%llx : 0x%llx)", 5073205eeb1aSlm66018 __func__, ldcp->lane_in.lstate, 50743af08d82Slm66018 ldcp->lane_out.lstate); 50751ae08745Sheppo } 50761ae08745Sheppo break; 50771ae08745Sheppo 50781ae08745Sheppo case VSW_MILESTONE4: 50791ae08745Sheppo D2(vswp, "%s: (chan %lld) in milestone 4", __func__, 50801ae08745Sheppo ldcp->ldc_id); 50811ae08745Sheppo break; 50821ae08745Sheppo 50831ae08745Sheppo default: 50841ae08745Sheppo DERR(vswp, "%s: (chan %lld) Unknown Phase %x", __func__, 50851ae08745Sheppo ldcp->ldc_id, ldcp->hphase); 50861ae08745Sheppo } 50871ae08745Sheppo 50881ae08745Sheppo D1(vswp, "%s (chan %lld): exit (phase %ld)", __func__, ldcp->ldc_id, 50891ae08745Sheppo ldcp->hphase); 50901ae08745Sheppo } 50911ae08745Sheppo 50921ae08745Sheppo /* 50931ae08745Sheppo * Check if major version is supported. 50941ae08745Sheppo * 50951ae08745Sheppo * Returns 0 if finds supported major number, and if necessary 50961ae08745Sheppo * adjusts the minor field. 50971ae08745Sheppo * 50981ae08745Sheppo * Returns 1 if can't match major number exactly. Sets mjor/minor 50991ae08745Sheppo * to next lowest support values, or to zero if no other values possible. 51001ae08745Sheppo */ 51011ae08745Sheppo static int 51021ae08745Sheppo vsw_supported_version(vio_ver_msg_t *vp) 51031ae08745Sheppo { 51041ae08745Sheppo int i; 51051ae08745Sheppo 51061ae08745Sheppo D1(NULL, "vsw_supported_version: enter"); 51071ae08745Sheppo 51081ae08745Sheppo for (i = 0; i < VSW_NUM_VER; i++) { 51091ae08745Sheppo if (vsw_versions[i].ver_major == vp->ver_major) { 51101ae08745Sheppo /* 51111ae08745Sheppo * Matching or lower major version found. Update 51121ae08745Sheppo * minor number if necessary. 51131ae08745Sheppo */ 51141ae08745Sheppo if (vp->ver_minor > vsw_versions[i].ver_minor) { 5115205eeb1aSlm66018 D2(NULL, "%s: adjusting minor value from %d " 5116205eeb1aSlm66018 "to %d", __func__, vp->ver_minor, 51171ae08745Sheppo vsw_versions[i].ver_minor); 51181ae08745Sheppo vp->ver_minor = vsw_versions[i].ver_minor; 51191ae08745Sheppo } 51201ae08745Sheppo 51211ae08745Sheppo return (0); 51221ae08745Sheppo } 51231ae08745Sheppo 51241ae08745Sheppo if (vsw_versions[i].ver_major < vp->ver_major) { 51251ae08745Sheppo if (vp->ver_minor > vsw_versions[i].ver_minor) { 5126205eeb1aSlm66018 D2(NULL, "%s: adjusting minor value from %d " 5127205eeb1aSlm66018 "to %d", __func__, vp->ver_minor, 51281ae08745Sheppo vsw_versions[i].ver_minor); 51291ae08745Sheppo vp->ver_minor = vsw_versions[i].ver_minor; 51301ae08745Sheppo } 51311ae08745Sheppo return (1); 51321ae08745Sheppo } 51331ae08745Sheppo } 51341ae08745Sheppo 51351ae08745Sheppo /* No match was possible, zero out fields */ 51361ae08745Sheppo vp->ver_major = 0; 51371ae08745Sheppo vp->ver_minor = 0; 51381ae08745Sheppo 51391ae08745Sheppo D1(NULL, "vsw_supported_version: exit"); 51401ae08745Sheppo 51411ae08745Sheppo return (1); 51421ae08745Sheppo } 51431ae08745Sheppo 51441ae08745Sheppo /* 51451ae08745Sheppo * Main routine for processing messages received over LDC. 51461ae08745Sheppo */ 51471ae08745Sheppo static void 51481ae08745Sheppo vsw_process_pkt(void *arg) 51491ae08745Sheppo { 51501ae08745Sheppo vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 51511ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 51521ae08745Sheppo size_t msglen; 51531ae08745Sheppo vio_msg_tag_t tag; 51541ae08745Sheppo def_msg_t dmsg; 51551ae08745Sheppo int rv = 0; 51561ae08745Sheppo 51573af08d82Slm66018 51581ae08745Sheppo D1(vswp, "%s enter: ldcid (%lld)\n", __func__, ldcp->ldc_id); 51591ae08745Sheppo 51601ae08745Sheppo /* 51611ae08745Sheppo * If channel is up read messages until channel is empty. 51621ae08745Sheppo */ 51631ae08745Sheppo do { 51641ae08745Sheppo msglen = sizeof (dmsg); 51651ae08745Sheppo rv = ldc_read(ldcp->ldc_handle, (caddr_t)&dmsg, &msglen); 51661ae08745Sheppo 51671ae08745Sheppo if (rv != 0) { 5168205eeb1aSlm66018 DERR(vswp, "%s :ldc_read err id(%lld) rv(%d) len(%d)\n", 5169205eeb1aSlm66018 __func__, ldcp->ldc_id, rv, msglen); 51703af08d82Slm66018 } 51713af08d82Slm66018 51723af08d82Slm66018 /* channel has been reset */ 51733af08d82Slm66018 if (rv == ECONNRESET) { 5174b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 51751ae08745Sheppo break; 51761ae08745Sheppo } 51771ae08745Sheppo 51781ae08745Sheppo if (msglen == 0) { 51791ae08745Sheppo D2(vswp, "%s: ldc_read id(%lld) NODATA", __func__, 51801ae08745Sheppo ldcp->ldc_id); 51811ae08745Sheppo break; 51821ae08745Sheppo } 51831ae08745Sheppo 51841ae08745Sheppo D2(vswp, "%s: ldc_read id(%lld): msglen(%d)", __func__, 51851ae08745Sheppo ldcp->ldc_id, msglen); 51861ae08745Sheppo 51871ae08745Sheppo /* 51881ae08745Sheppo * Figure out what sort of packet we have gotten by 51891ae08745Sheppo * examining the msg tag, and then switch it appropriately. 51901ae08745Sheppo */ 51911ae08745Sheppo bcopy(&dmsg, &tag, sizeof (vio_msg_tag_t)); 51921ae08745Sheppo 51931ae08745Sheppo switch (tag.vio_msgtype) { 51941ae08745Sheppo case VIO_TYPE_CTRL: 51951ae08745Sheppo vsw_dispatch_ctrl_task(ldcp, &dmsg, tag); 51961ae08745Sheppo break; 51971ae08745Sheppo case VIO_TYPE_DATA: 51981ae08745Sheppo vsw_process_data_pkt(ldcp, &dmsg, tag); 51991ae08745Sheppo break; 52001ae08745Sheppo case VIO_TYPE_ERR: 52011ae08745Sheppo vsw_process_err_pkt(ldcp, &dmsg, tag); 52021ae08745Sheppo break; 52031ae08745Sheppo default: 52041ae08745Sheppo DERR(vswp, "%s: Unknown tag(%lx) ", __func__, 52051ae08745Sheppo "id(%lx)\n", tag.vio_msgtype, ldcp->ldc_id); 52061ae08745Sheppo break; 52071ae08745Sheppo } 52081ae08745Sheppo } while (msglen); 52091ae08745Sheppo 52101ae08745Sheppo D1(vswp, "%s exit: ldcid (%lld)\n", __func__, ldcp->ldc_id); 52111ae08745Sheppo } 52121ae08745Sheppo 52131ae08745Sheppo /* 52141ae08745Sheppo * Dispatch a task to process a VIO control message. 52151ae08745Sheppo */ 52161ae08745Sheppo static void 52171ae08745Sheppo vsw_dispatch_ctrl_task(vsw_ldc_t *ldcp, void *cpkt, vio_msg_tag_t tag) 52181ae08745Sheppo { 52191ae08745Sheppo vsw_ctrl_task_t *ctaskp = NULL; 52201ae08745Sheppo vsw_port_t *port = ldcp->ldc_port; 52211ae08745Sheppo vsw_t *vswp = port->p_vswp; 52221ae08745Sheppo 52231ae08745Sheppo D1(vswp, "%s: enter", __func__); 52241ae08745Sheppo 52251ae08745Sheppo /* 52261ae08745Sheppo * We need to handle RDX ACK messages in-band as once they 52271ae08745Sheppo * are exchanged it is possible that we will get an 52281ae08745Sheppo * immediate (legitimate) data packet. 52291ae08745Sheppo */ 52301ae08745Sheppo if ((tag.vio_subtype_env == VIO_RDX) && 52311ae08745Sheppo (tag.vio_subtype == VIO_SUBTYPE_ACK)) { 52323af08d82Slm66018 5233b071742bSsg70180 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_ACK_RECV)) 52341ae08745Sheppo return; 52351ae08745Sheppo 5236b071742bSsg70180 ldcp->lane_in.lstate |= VSW_RDX_ACK_RECV; 52373af08d82Slm66018 D2(vswp, "%s (%ld) handling RDX_ACK in place " 52383af08d82Slm66018 "(ostate 0x%llx : hphase %d)", __func__, 5239b071742bSsg70180 ldcp->ldc_id, ldcp->lane_in.lstate, ldcp->hphase); 52401ae08745Sheppo vsw_next_milestone(ldcp); 52411ae08745Sheppo return; 52421ae08745Sheppo } 52431ae08745Sheppo 52441ae08745Sheppo ctaskp = kmem_alloc(sizeof (vsw_ctrl_task_t), KM_NOSLEEP); 52451ae08745Sheppo 52461ae08745Sheppo if (ctaskp == NULL) { 5247205eeb1aSlm66018 DERR(vswp, "%s: unable to alloc space for ctrl msg", __func__); 5248b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 52491ae08745Sheppo return; 52501ae08745Sheppo } 52511ae08745Sheppo 52521ae08745Sheppo ctaskp->ldcp = ldcp; 52531ae08745Sheppo bcopy((def_msg_t *)cpkt, &ctaskp->pktp, sizeof (def_msg_t)); 52541ae08745Sheppo mutex_enter(&ldcp->hss_lock); 52551ae08745Sheppo ctaskp->hss_id = ldcp->hss_id; 52561ae08745Sheppo mutex_exit(&ldcp->hss_lock); 52571ae08745Sheppo 52581ae08745Sheppo /* 52591ae08745Sheppo * Dispatch task to processing taskq if port is not in 52601ae08745Sheppo * the process of being detached. 52611ae08745Sheppo */ 52621ae08745Sheppo mutex_enter(&port->state_lock); 52631ae08745Sheppo if (port->state == VSW_PORT_INIT) { 52641ae08745Sheppo if ((vswp->taskq_p == NULL) || 5265205eeb1aSlm66018 (ddi_taskq_dispatch(vswp->taskq_p, vsw_process_ctrl_pkt, 5266205eeb1aSlm66018 ctaskp, DDI_NOSLEEP) != DDI_SUCCESS)) { 52671ae08745Sheppo DERR(vswp, "%s: unable to dispatch task to taskq", 52681ae08745Sheppo __func__); 52691ae08745Sheppo kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 52701ae08745Sheppo mutex_exit(&port->state_lock); 5271b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 52721ae08745Sheppo return; 52731ae08745Sheppo } 52741ae08745Sheppo } else { 52751ae08745Sheppo DWARN(vswp, "%s: port %d detaching, not dispatching " 52761ae08745Sheppo "task", __func__, port->p_instance); 52771ae08745Sheppo } 52781ae08745Sheppo 52791ae08745Sheppo mutex_exit(&port->state_lock); 52801ae08745Sheppo 52811ae08745Sheppo D2(vswp, "%s: dispatched task to taskq for chan %d", __func__, 52821ae08745Sheppo ldcp->ldc_id); 52831ae08745Sheppo D1(vswp, "%s: exit", __func__); 52841ae08745Sheppo } 52851ae08745Sheppo 52861ae08745Sheppo /* 52871ae08745Sheppo * Process a VIO ctrl message. Invoked from taskq. 52881ae08745Sheppo */ 52891ae08745Sheppo static void 52901ae08745Sheppo vsw_process_ctrl_pkt(void *arg) 52911ae08745Sheppo { 52921ae08745Sheppo vsw_ctrl_task_t *ctaskp = (vsw_ctrl_task_t *)arg; 52931ae08745Sheppo vsw_ldc_t *ldcp = ctaskp->ldcp; 52941ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 52951ae08745Sheppo vio_msg_tag_t tag; 52961ae08745Sheppo uint16_t env; 52971ae08745Sheppo 52981ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 52991ae08745Sheppo 53001ae08745Sheppo bcopy(&ctaskp->pktp, &tag, sizeof (vio_msg_tag_t)); 53011ae08745Sheppo env = tag.vio_subtype_env; 53021ae08745Sheppo 53031ae08745Sheppo /* stale pkt check */ 53041ae08745Sheppo mutex_enter(&ldcp->hss_lock); 53051ae08745Sheppo if (ctaskp->hss_id < ldcp->hss_id) { 5306205eeb1aSlm66018 DWARN(vswp, "%s: discarding stale packet belonging to earlier" 5307205eeb1aSlm66018 " (%ld) handshake session", __func__, ctaskp->hss_id); 53081ae08745Sheppo mutex_exit(&ldcp->hss_lock); 53091ae08745Sheppo return; 53101ae08745Sheppo } 53111ae08745Sheppo mutex_exit(&ldcp->hss_lock); 53121ae08745Sheppo 53131ae08745Sheppo /* session id check */ 53141ae08745Sheppo if (ldcp->session_status & VSW_PEER_SESSION) { 53151ae08745Sheppo if (ldcp->peer_session != tag.vio_sid) { 53161ae08745Sheppo DERR(vswp, "%s (chan %d): invalid session id (%llx)", 53171ae08745Sheppo __func__, ldcp->ldc_id, tag.vio_sid); 53181ae08745Sheppo kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 5319b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 53201ae08745Sheppo return; 53211ae08745Sheppo } 53221ae08745Sheppo } 53231ae08745Sheppo 53241ae08745Sheppo /* 53251ae08745Sheppo * Switch on vio_subtype envelope, then let lower routines 53261ae08745Sheppo * decide if its an INFO, ACK or NACK packet. 53271ae08745Sheppo */ 53281ae08745Sheppo switch (env) { 53291ae08745Sheppo case VIO_VER_INFO: 53301ae08745Sheppo vsw_process_ctrl_ver_pkt(ldcp, &ctaskp->pktp); 53311ae08745Sheppo break; 53321ae08745Sheppo case VIO_DRING_REG: 53331ae08745Sheppo vsw_process_ctrl_dring_reg_pkt(ldcp, &ctaskp->pktp); 53341ae08745Sheppo break; 53351ae08745Sheppo case VIO_DRING_UNREG: 53361ae08745Sheppo vsw_process_ctrl_dring_unreg_pkt(ldcp, &ctaskp->pktp); 53371ae08745Sheppo break; 53381ae08745Sheppo case VIO_ATTR_INFO: 53391ae08745Sheppo vsw_process_ctrl_attr_pkt(ldcp, &ctaskp->pktp); 53401ae08745Sheppo break; 53411ae08745Sheppo case VNET_MCAST_INFO: 53421ae08745Sheppo vsw_process_ctrl_mcst_pkt(ldcp, &ctaskp->pktp); 53431ae08745Sheppo break; 53441ae08745Sheppo case VIO_RDX: 53451ae08745Sheppo vsw_process_ctrl_rdx_pkt(ldcp, &ctaskp->pktp); 53461ae08745Sheppo break; 53471ae08745Sheppo default: 5348205eeb1aSlm66018 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 53491ae08745Sheppo } 53501ae08745Sheppo 53511ae08745Sheppo kmem_free(ctaskp, sizeof (vsw_ctrl_task_t)); 53521ae08745Sheppo D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 53531ae08745Sheppo } 53541ae08745Sheppo 53551ae08745Sheppo /* 53561ae08745Sheppo * Version negotiation. We can end up here either because our peer 53571ae08745Sheppo * has responded to a handshake message we have sent it, or our peer 53581ae08745Sheppo * has initiated a handshake with us. If its the former then can only 53591ae08745Sheppo * be ACK or NACK, if its the later can only be INFO. 53601ae08745Sheppo * 53611ae08745Sheppo * If its an ACK we move to the next stage of the handshake, namely 53621ae08745Sheppo * attribute exchange. If its a NACK we see if we can specify another 53631ae08745Sheppo * version, if we can't we stop. 53641ae08745Sheppo * 53651ae08745Sheppo * If it is an INFO we reset all params associated with communication 53661ae08745Sheppo * in that direction over this channel (remember connection is 53671ae08745Sheppo * essentially 2 independent simplex channels). 53681ae08745Sheppo */ 53691ae08745Sheppo void 53701ae08745Sheppo vsw_process_ctrl_ver_pkt(vsw_ldc_t *ldcp, void *pkt) 53711ae08745Sheppo { 53721ae08745Sheppo vio_ver_msg_t *ver_pkt; 53731ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 53741ae08745Sheppo 53751ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 53761ae08745Sheppo 53771ae08745Sheppo /* 53781ae08745Sheppo * We know this is a ctrl/version packet so 53791ae08745Sheppo * cast it into the correct structure. 53801ae08745Sheppo */ 53811ae08745Sheppo ver_pkt = (vio_ver_msg_t *)pkt; 53821ae08745Sheppo 53831ae08745Sheppo switch (ver_pkt->tag.vio_subtype) { 53841ae08745Sheppo case VIO_SUBTYPE_INFO: 53851ae08745Sheppo D2(vswp, "vsw_process_ctrl_ver_pkt: VIO_SUBTYPE_INFO\n"); 53861ae08745Sheppo 53871ae08745Sheppo /* 53881ae08745Sheppo * Record the session id, which we will use from now 53891ae08745Sheppo * until we see another VER_INFO msg. Even then the 53901ae08745Sheppo * session id in most cases will be unchanged, execpt 53911ae08745Sheppo * if channel was reset. 53921ae08745Sheppo */ 53931ae08745Sheppo if ((ldcp->session_status & VSW_PEER_SESSION) && 53941ae08745Sheppo (ldcp->peer_session != ver_pkt->tag.vio_sid)) { 53951ae08745Sheppo DERR(vswp, "%s: updating session id for chan %lld " 53961ae08745Sheppo "from %llx to %llx", __func__, ldcp->ldc_id, 53971ae08745Sheppo ldcp->peer_session, ver_pkt->tag.vio_sid); 53981ae08745Sheppo } 53991ae08745Sheppo 54001ae08745Sheppo ldcp->peer_session = ver_pkt->tag.vio_sid; 54011ae08745Sheppo ldcp->session_status |= VSW_PEER_SESSION; 54021ae08745Sheppo 54031ae08745Sheppo /* Legal message at this time ? */ 54041ae08745Sheppo if (vsw_check_flag(ldcp, INBOUND, VSW_VER_INFO_RECV)) 54051ae08745Sheppo return; 54061ae08745Sheppo 54071ae08745Sheppo /* 54081ae08745Sheppo * First check the device class. Currently only expect 54091ae08745Sheppo * to be talking to a network device. In the future may 54101ae08745Sheppo * also talk to another switch. 54111ae08745Sheppo */ 54121ae08745Sheppo if (ver_pkt->dev_class != VDEV_NETWORK) { 54131ae08745Sheppo DERR(vswp, "%s: illegal device class %d", __func__, 54141ae08745Sheppo ver_pkt->dev_class); 54151ae08745Sheppo 54161ae08745Sheppo ver_pkt->tag.vio_sid = ldcp->local_session; 54171ae08745Sheppo ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 54181ae08745Sheppo 54191ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 54201ae08745Sheppo 5421b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 5422b071742bSsg70180 sizeof (vio_ver_msg_t), B_TRUE); 54231ae08745Sheppo 54241ae08745Sheppo ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 54251ae08745Sheppo vsw_next_milestone(ldcp); 54261ae08745Sheppo return; 54271ae08745Sheppo } else { 54281ae08745Sheppo ldcp->dev_class = ver_pkt->dev_class; 54291ae08745Sheppo } 54301ae08745Sheppo 54311ae08745Sheppo /* 54321ae08745Sheppo * Now check the version. 54331ae08745Sheppo */ 54341ae08745Sheppo if (vsw_supported_version(ver_pkt) == 0) { 54351ae08745Sheppo /* 54361ae08745Sheppo * Support this major version and possibly 54371ae08745Sheppo * adjusted minor version. 54381ae08745Sheppo */ 54391ae08745Sheppo 54401ae08745Sheppo D2(vswp, "%s: accepted ver %d:%d", __func__, 54411ae08745Sheppo ver_pkt->ver_major, ver_pkt->ver_minor); 54421ae08745Sheppo 54431ae08745Sheppo /* Store accepted values */ 54441ae08745Sheppo ldcp->lane_in.ver_major = ver_pkt->ver_major; 54451ae08745Sheppo ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 54461ae08745Sheppo 54471ae08745Sheppo ver_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 54481ae08745Sheppo 54491ae08745Sheppo ldcp->lane_in.lstate |= VSW_VER_ACK_SENT; 54501ae08745Sheppo } else { 54511ae08745Sheppo /* 54521ae08745Sheppo * NACK back with the next lower major/minor 54531ae08745Sheppo * pairing we support (if don't suuport any more 54541ae08745Sheppo * versions then they will be set to zero. 54551ae08745Sheppo */ 54561ae08745Sheppo 54571ae08745Sheppo D2(vswp, "%s: replying with ver %d:%d", __func__, 54581ae08745Sheppo ver_pkt->ver_major, ver_pkt->ver_minor); 54591ae08745Sheppo 54601ae08745Sheppo /* Store updated values */ 54611ae08745Sheppo ldcp->lane_in.ver_major = ver_pkt->ver_major; 54621ae08745Sheppo ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 54631ae08745Sheppo 54641ae08745Sheppo ver_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 54651ae08745Sheppo 54661ae08745Sheppo ldcp->lane_in.lstate |= VSW_VER_NACK_SENT; 54671ae08745Sheppo } 54681ae08745Sheppo 54691ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 54701ae08745Sheppo ver_pkt->tag.vio_sid = ldcp->local_session; 5471b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 5472b071742bSsg70180 sizeof (vio_ver_msg_t), B_TRUE); 54731ae08745Sheppo 54741ae08745Sheppo vsw_next_milestone(ldcp); 54751ae08745Sheppo break; 54761ae08745Sheppo 54771ae08745Sheppo case VIO_SUBTYPE_ACK: 54781ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_ACK\n", __func__); 54791ae08745Sheppo 54801ae08745Sheppo if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_ACK_RECV)) 54811ae08745Sheppo return; 54821ae08745Sheppo 54831ae08745Sheppo /* Store updated values */ 54841ae08745Sheppo ldcp->lane_in.ver_major = ver_pkt->ver_major; 54851ae08745Sheppo ldcp->lane_in.ver_minor = ver_pkt->ver_minor; 54861ae08745Sheppo 54871ae08745Sheppo ldcp->lane_out.lstate |= VSW_VER_ACK_RECV; 54881ae08745Sheppo vsw_next_milestone(ldcp); 54891ae08745Sheppo 54901ae08745Sheppo break; 54911ae08745Sheppo 54921ae08745Sheppo case VIO_SUBTYPE_NACK: 54931ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_NACK\n", __func__); 54941ae08745Sheppo 54951ae08745Sheppo if (vsw_check_flag(ldcp, OUTBOUND, VSW_VER_NACK_RECV)) 54961ae08745Sheppo return; 54971ae08745Sheppo 54981ae08745Sheppo /* 54991ae08745Sheppo * If our peer sent us a NACK with the ver fields set to 55001ae08745Sheppo * zero then there is nothing more we can do. Otherwise see 55011ae08745Sheppo * if we support either the version suggested, or a lesser 55021ae08745Sheppo * one. 55031ae08745Sheppo */ 55041ae08745Sheppo if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 55051ae08745Sheppo DERR(vswp, "%s: peer unable to negotiate any " 55061ae08745Sheppo "further.", __func__); 55071ae08745Sheppo ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 55081ae08745Sheppo vsw_next_milestone(ldcp); 55091ae08745Sheppo return; 55101ae08745Sheppo } 55111ae08745Sheppo 55121ae08745Sheppo /* 55131ae08745Sheppo * Check to see if we support this major version or 55141ae08745Sheppo * a lower one. If we don't then maj/min will be set 55151ae08745Sheppo * to zero. 55161ae08745Sheppo */ 55171ae08745Sheppo (void) vsw_supported_version(ver_pkt); 55181ae08745Sheppo if ((ver_pkt->ver_major == 0) && (ver_pkt->ver_minor == 0)) { 55191ae08745Sheppo /* Nothing more we can do */ 55201ae08745Sheppo DERR(vswp, "%s: version negotiation failed.\n", 55211ae08745Sheppo __func__); 55221ae08745Sheppo ldcp->lane_out.lstate |= VSW_VER_NACK_RECV; 55231ae08745Sheppo vsw_next_milestone(ldcp); 55241ae08745Sheppo } else { 55251ae08745Sheppo /* found a supported major version */ 55261ae08745Sheppo ldcp->lane_out.ver_major = ver_pkt->ver_major; 55271ae08745Sheppo ldcp->lane_out.ver_minor = ver_pkt->ver_minor; 55281ae08745Sheppo 55291ae08745Sheppo D2(vswp, "%s: resending with updated values (%x, %x)", 5530205eeb1aSlm66018 __func__, ver_pkt->ver_major, ver_pkt->ver_minor); 55311ae08745Sheppo 55321ae08745Sheppo ldcp->lane_out.lstate |= VSW_VER_INFO_SENT; 55331ae08745Sheppo ver_pkt->tag.vio_sid = ldcp->local_session; 55341ae08745Sheppo ver_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 55351ae08745Sheppo 55361ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)ver_pkt); 55371ae08745Sheppo 5538b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)ver_pkt, 5539b071742bSsg70180 sizeof (vio_ver_msg_t), B_TRUE); 55401ae08745Sheppo 55411ae08745Sheppo vsw_next_milestone(ldcp); 55421ae08745Sheppo 55431ae08745Sheppo } 55441ae08745Sheppo break; 55451ae08745Sheppo 55461ae08745Sheppo default: 55471ae08745Sheppo DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 55481ae08745Sheppo ver_pkt->tag.vio_subtype); 55491ae08745Sheppo } 55501ae08745Sheppo 55511ae08745Sheppo D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 55521ae08745Sheppo } 55531ae08745Sheppo 55541ae08745Sheppo /* 55551ae08745Sheppo * Process an attribute packet. We can end up here either because our peer 55561ae08745Sheppo * has ACK/NACK'ed back to an earlier ATTR msg we had sent it, or our 55571ae08745Sheppo * peer has sent us an attribute INFO message 55581ae08745Sheppo * 55591ae08745Sheppo * If its an ACK we then move to the next stage of the handshake which 55601ae08745Sheppo * is to send our descriptor ring info to our peer. If its a NACK then 55611ae08745Sheppo * there is nothing more we can (currently) do. 55621ae08745Sheppo * 55631ae08745Sheppo * If we get a valid/acceptable INFO packet (and we have already negotiated 55641ae08745Sheppo * a version) we ACK back and set channel state to ATTR_RECV, otherwise we 55651ae08745Sheppo * NACK back and reset channel state to INACTIV. 55661ae08745Sheppo * 55671ae08745Sheppo * FUTURE: in time we will probably negotiate over attributes, but for 55681ae08745Sheppo * the moment unacceptable attributes are regarded as a fatal error. 55691ae08745Sheppo * 55701ae08745Sheppo */ 55711ae08745Sheppo void 55721ae08745Sheppo vsw_process_ctrl_attr_pkt(vsw_ldc_t *ldcp, void *pkt) 55731ae08745Sheppo { 55741ae08745Sheppo vnet_attr_msg_t *attr_pkt; 55751ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 55761ae08745Sheppo vsw_port_t *port = ldcp->ldc_port; 55771ae08745Sheppo uint64_t macaddr = 0; 55781ae08745Sheppo int i; 55791ae08745Sheppo 55801ae08745Sheppo D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 55811ae08745Sheppo 55821ae08745Sheppo /* 55831ae08745Sheppo * We know this is a ctrl/attr packet so 55841ae08745Sheppo * cast it into the correct structure. 55851ae08745Sheppo */ 55861ae08745Sheppo attr_pkt = (vnet_attr_msg_t *)pkt; 55871ae08745Sheppo 55881ae08745Sheppo switch (attr_pkt->tag.vio_subtype) { 55891ae08745Sheppo case VIO_SUBTYPE_INFO: 55901ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 55911ae08745Sheppo 55921ae08745Sheppo if (vsw_check_flag(ldcp, INBOUND, VSW_ATTR_INFO_RECV)) 55931ae08745Sheppo return; 55941ae08745Sheppo 55951ae08745Sheppo /* 55961ae08745Sheppo * If the attributes are unacceptable then we NACK back. 55971ae08745Sheppo */ 55981ae08745Sheppo if (vsw_check_attr(attr_pkt, ldcp->ldc_port)) { 55991ae08745Sheppo 56001ae08745Sheppo DERR(vswp, "%s (chan %d): invalid attributes", 56011ae08745Sheppo __func__, ldcp->ldc_id); 56021ae08745Sheppo 56031ae08745Sheppo vsw_free_lane_resources(ldcp, INBOUND); 56041ae08745Sheppo 56051ae08745Sheppo attr_pkt->tag.vio_sid = ldcp->local_session; 56061ae08745Sheppo attr_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 56071ae08745Sheppo 56081ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 56091ae08745Sheppo ldcp->lane_in.lstate |= VSW_ATTR_NACK_SENT; 5610b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 5611b071742bSsg70180 sizeof (vnet_attr_msg_t), B_TRUE); 56121ae08745Sheppo 56131ae08745Sheppo vsw_next_milestone(ldcp); 56141ae08745Sheppo return; 56151ae08745Sheppo } 56161ae08745Sheppo 56171ae08745Sheppo /* 56181ae08745Sheppo * Otherwise store attributes for this lane and update 56191ae08745Sheppo * lane state. 56201ae08745Sheppo */ 56211ae08745Sheppo ldcp->lane_in.mtu = attr_pkt->mtu; 56221ae08745Sheppo ldcp->lane_in.addr = attr_pkt->addr; 56231ae08745Sheppo ldcp->lane_in.addr_type = attr_pkt->addr_type; 56241ae08745Sheppo ldcp->lane_in.xfer_mode = attr_pkt->xfer_mode; 56251ae08745Sheppo ldcp->lane_in.ack_freq = attr_pkt->ack_freq; 56261ae08745Sheppo 56271ae08745Sheppo macaddr = ldcp->lane_in.addr; 56281ae08745Sheppo for (i = ETHERADDRL - 1; i >= 0; i--) { 56291ae08745Sheppo port->p_macaddr.ether_addr_octet[i] = macaddr & 0xFF; 56301ae08745Sheppo macaddr >>= 8; 56311ae08745Sheppo } 56321ae08745Sheppo 56331ae08745Sheppo /* create the fdb entry for this port/mac address */ 56341ae08745Sheppo (void) vsw_add_fdb(vswp, port); 56351ae08745Sheppo 56361ae08745Sheppo /* setup device specifc xmit routines */ 56371ae08745Sheppo mutex_enter(&port->tx_lock); 56381ae08745Sheppo if (ldcp->lane_in.xfer_mode == VIO_DRING_MODE) { 56391ae08745Sheppo D2(vswp, "%s: mode = VIO_DRING_MODE", __func__); 56401ae08745Sheppo port->transmit = vsw_dringsend; 56411ae08745Sheppo } else if (ldcp->lane_in.xfer_mode == VIO_DESC_MODE) { 56421ae08745Sheppo D2(vswp, "%s: mode = VIO_DESC_MODE", __func__); 56431ae08745Sheppo vsw_create_privring(ldcp); 56441ae08745Sheppo port->transmit = vsw_descrsend; 56451ae08745Sheppo } 56461ae08745Sheppo mutex_exit(&port->tx_lock); 56471ae08745Sheppo 56481ae08745Sheppo attr_pkt->tag.vio_sid = ldcp->local_session; 56491ae08745Sheppo attr_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 56501ae08745Sheppo 56511ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)attr_pkt); 56521ae08745Sheppo 56531ae08745Sheppo ldcp->lane_in.lstate |= VSW_ATTR_ACK_SENT; 56541ae08745Sheppo 5655b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)attr_pkt, 5656b071742bSsg70180 sizeof (vnet_attr_msg_t), B_TRUE); 56571ae08745Sheppo 56581ae08745Sheppo vsw_next_milestone(ldcp); 56591ae08745Sheppo break; 56601ae08745Sheppo 56611ae08745Sheppo case VIO_SUBTYPE_ACK: 56621ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 56631ae08745Sheppo 56641ae08745Sheppo if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_ACK_RECV)) 56651ae08745Sheppo return; 56661ae08745Sheppo 56671ae08745Sheppo ldcp->lane_out.lstate |= VSW_ATTR_ACK_RECV; 56681ae08745Sheppo vsw_next_milestone(ldcp); 56691ae08745Sheppo break; 56701ae08745Sheppo 56711ae08745Sheppo case VIO_SUBTYPE_NACK: 56721ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 56731ae08745Sheppo 56741ae08745Sheppo if (vsw_check_flag(ldcp, OUTBOUND, VSW_ATTR_NACK_RECV)) 56751ae08745Sheppo return; 56761ae08745Sheppo 56771ae08745Sheppo ldcp->lane_out.lstate |= VSW_ATTR_NACK_RECV; 56781ae08745Sheppo vsw_next_milestone(ldcp); 56791ae08745Sheppo break; 56801ae08745Sheppo 56811ae08745Sheppo default: 56821ae08745Sheppo DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 56831ae08745Sheppo attr_pkt->tag.vio_subtype); 56841ae08745Sheppo } 56851ae08745Sheppo 56861ae08745Sheppo D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 56871ae08745Sheppo } 56881ae08745Sheppo 56891ae08745Sheppo /* 56901ae08745Sheppo * Process a dring info packet. We can end up here either because our peer 56911ae08745Sheppo * has ACK/NACK'ed back to an earlier DRING msg we had sent it, or our 56921ae08745Sheppo * peer has sent us a dring INFO message. 56931ae08745Sheppo * 56941ae08745Sheppo * If we get a valid/acceptable INFO packet (and we have already negotiated 56951ae08745Sheppo * a version) we ACK back and update the lane state, otherwise we NACK back. 56961ae08745Sheppo * 56971ae08745Sheppo * FUTURE: nothing to stop client from sending us info on multiple dring's 56981ae08745Sheppo * but for the moment we will just use the first one we are given. 56991ae08745Sheppo * 57001ae08745Sheppo */ 57011ae08745Sheppo void 57021ae08745Sheppo vsw_process_ctrl_dring_reg_pkt(vsw_ldc_t *ldcp, void *pkt) 57031ae08745Sheppo { 57041ae08745Sheppo vio_dring_reg_msg_t *dring_pkt; 57051ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 57061ae08745Sheppo ldc_mem_info_t minfo; 57071ae08745Sheppo dring_info_t *dp, *dbp; 57081ae08745Sheppo int dring_found = 0; 57091ae08745Sheppo 57101ae08745Sheppo /* 57111ae08745Sheppo * We know this is a ctrl/dring packet so 57121ae08745Sheppo * cast it into the correct structure. 57131ae08745Sheppo */ 57141ae08745Sheppo dring_pkt = (vio_dring_reg_msg_t *)pkt; 57151ae08745Sheppo 57161ae08745Sheppo D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 57171ae08745Sheppo 57181ae08745Sheppo switch (dring_pkt->tag.vio_subtype) { 57191ae08745Sheppo case VIO_SUBTYPE_INFO: 57201ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 57211ae08745Sheppo 57221ae08745Sheppo if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 57231ae08745Sheppo return; 57241ae08745Sheppo 57251ae08745Sheppo /* 57261ae08745Sheppo * If the dring params are unacceptable then we NACK back. 57271ae08745Sheppo */ 57281ae08745Sheppo if (vsw_check_dring_info(dring_pkt)) { 57291ae08745Sheppo 57301ae08745Sheppo DERR(vswp, "%s (%lld): invalid dring info", 57311ae08745Sheppo __func__, ldcp->ldc_id); 57321ae08745Sheppo 57331ae08745Sheppo vsw_free_lane_resources(ldcp, INBOUND); 57341ae08745Sheppo 57351ae08745Sheppo dring_pkt->tag.vio_sid = ldcp->local_session; 57361ae08745Sheppo dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 57371ae08745Sheppo 57381ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 57391ae08745Sheppo 57401ae08745Sheppo ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 57411ae08745Sheppo 5742b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 5743b071742bSsg70180 sizeof (vio_dring_reg_msg_t), B_TRUE); 57441ae08745Sheppo 57451ae08745Sheppo vsw_next_milestone(ldcp); 57461ae08745Sheppo return; 57471ae08745Sheppo } 57481ae08745Sheppo 57491ae08745Sheppo /* 57501ae08745Sheppo * Otherwise, attempt to map in the dring using the 57511ae08745Sheppo * cookie. If that succeeds we send back a unique dring 57521ae08745Sheppo * identifier that the sending side will use in future 57531ae08745Sheppo * to refer to this descriptor ring. 57541ae08745Sheppo */ 57551ae08745Sheppo dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 57561ae08745Sheppo 57571ae08745Sheppo dp->num_descriptors = dring_pkt->num_descriptors; 57581ae08745Sheppo dp->descriptor_size = dring_pkt->descriptor_size; 57591ae08745Sheppo dp->options = dring_pkt->options; 57601ae08745Sheppo dp->ncookies = dring_pkt->ncookies; 57611ae08745Sheppo 57621ae08745Sheppo /* 57631ae08745Sheppo * Note: should only get one cookie. Enforced in 57641ae08745Sheppo * the ldc layer. 57651ae08745Sheppo */ 57661ae08745Sheppo bcopy(&dring_pkt->cookie[0], &dp->cookie[0], 57671ae08745Sheppo sizeof (ldc_mem_cookie_t)); 57681ae08745Sheppo 57691ae08745Sheppo D2(vswp, "%s: num_desc %ld : desc_size %ld", __func__, 57701ae08745Sheppo dp->num_descriptors, dp->descriptor_size); 57711ae08745Sheppo D2(vswp, "%s: options 0x%lx: ncookies %ld", __func__, 57721ae08745Sheppo dp->options, dp->ncookies); 57731ae08745Sheppo 57741ae08745Sheppo if ((ldc_mem_dring_map(ldcp->ldc_handle, &dp->cookie[0], 5775205eeb1aSlm66018 dp->ncookies, dp->num_descriptors, dp->descriptor_size, 5776205eeb1aSlm66018 LDC_SHADOW_MAP, &(dp->handle))) != 0) { 57771ae08745Sheppo 57781ae08745Sheppo DERR(vswp, "%s: dring_map failed\n", __func__); 57791ae08745Sheppo 57801ae08745Sheppo kmem_free(dp, sizeof (dring_info_t)); 57811ae08745Sheppo vsw_free_lane_resources(ldcp, INBOUND); 57821ae08745Sheppo 57831ae08745Sheppo dring_pkt->tag.vio_sid = ldcp->local_session; 57841ae08745Sheppo dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 57851ae08745Sheppo 57861ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 57871ae08745Sheppo 57881ae08745Sheppo ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 5789b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 5790b071742bSsg70180 sizeof (vio_dring_reg_msg_t), B_TRUE); 57911ae08745Sheppo 57921ae08745Sheppo vsw_next_milestone(ldcp); 57931ae08745Sheppo return; 57941ae08745Sheppo } 57951ae08745Sheppo 57961ae08745Sheppo if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 57971ae08745Sheppo 57981ae08745Sheppo DERR(vswp, "%s: dring_addr failed\n", __func__); 57991ae08745Sheppo 58001ae08745Sheppo kmem_free(dp, sizeof (dring_info_t)); 58011ae08745Sheppo vsw_free_lane_resources(ldcp, INBOUND); 58021ae08745Sheppo 58031ae08745Sheppo dring_pkt->tag.vio_sid = ldcp->local_session; 58041ae08745Sheppo dring_pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; 58051ae08745Sheppo 58061ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)dring_pkt); 58071ae08745Sheppo 58081ae08745Sheppo ldcp->lane_in.lstate |= VSW_DRING_NACK_SENT; 5809b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 5810b071742bSsg70180 sizeof (vio_dring_reg_msg_t), B_TRUE); 58111ae08745Sheppo 58121ae08745Sheppo vsw_next_milestone(ldcp); 58131ae08745Sheppo return; 58141ae08745Sheppo } else { 58151ae08745Sheppo /* store the address of the pub part of ring */ 58161ae08745Sheppo dp->pub_addr = minfo.vaddr; 58171ae08745Sheppo } 58181ae08745Sheppo 58191ae08745Sheppo /* no private section as we are importing */ 58201ae08745Sheppo dp->priv_addr = NULL; 58211ae08745Sheppo 58221ae08745Sheppo /* 58231ae08745Sheppo * Using simple mono increasing int for ident at 58241ae08745Sheppo * the moment. 58251ae08745Sheppo */ 58261ae08745Sheppo dp->ident = ldcp->next_ident; 58271ae08745Sheppo ldcp->next_ident++; 58281ae08745Sheppo 58291ae08745Sheppo dp->end_idx = 0; 58301ae08745Sheppo dp->next = NULL; 58311ae08745Sheppo 58321ae08745Sheppo /* 58331ae08745Sheppo * Link it onto the end of the list of drings 58341ae08745Sheppo * for this lane. 58351ae08745Sheppo */ 58361ae08745Sheppo if (ldcp->lane_in.dringp == NULL) { 58371ae08745Sheppo D2(vswp, "%s: adding first INBOUND dring", __func__); 58381ae08745Sheppo ldcp->lane_in.dringp = dp; 58391ae08745Sheppo } else { 58401ae08745Sheppo dbp = ldcp->lane_in.dringp; 58411ae08745Sheppo 58421ae08745Sheppo while (dbp->next != NULL) 58431ae08745Sheppo dbp = dbp->next; 58441ae08745Sheppo 58451ae08745Sheppo dbp->next = dp; 58461ae08745Sheppo } 58471ae08745Sheppo 58481ae08745Sheppo /* acknowledge it */ 58491ae08745Sheppo dring_pkt->tag.vio_sid = ldcp->local_session; 58501ae08745Sheppo dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 58511ae08745Sheppo dring_pkt->dring_ident = dp->ident; 58521ae08745Sheppo 5853b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 5854b071742bSsg70180 sizeof (vio_dring_reg_msg_t), B_TRUE); 58551ae08745Sheppo 58561ae08745Sheppo ldcp->lane_in.lstate |= VSW_DRING_ACK_SENT; 58571ae08745Sheppo vsw_next_milestone(ldcp); 58581ae08745Sheppo break; 58591ae08745Sheppo 58601ae08745Sheppo case VIO_SUBTYPE_ACK: 58611ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 58621ae08745Sheppo 58631ae08745Sheppo if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_ACK_RECV)) 58641ae08745Sheppo return; 58651ae08745Sheppo 58661ae08745Sheppo /* 58671ae08745Sheppo * Peer is acknowledging our dring info and will have 58681ae08745Sheppo * sent us a dring identifier which we will use to 58691ae08745Sheppo * refer to this ring w.r.t. our peer. 58701ae08745Sheppo */ 58711ae08745Sheppo dp = ldcp->lane_out.dringp; 58721ae08745Sheppo if (dp != NULL) { 58731ae08745Sheppo /* 58741ae08745Sheppo * Find the ring this ident should be associated 58751ae08745Sheppo * with. 58761ae08745Sheppo */ 58771ae08745Sheppo if (vsw_dring_match(dp, dring_pkt)) { 58781ae08745Sheppo dring_found = 1; 58791ae08745Sheppo 58801ae08745Sheppo } else while (dp != NULL) { 58811ae08745Sheppo if (vsw_dring_match(dp, dring_pkt)) { 58821ae08745Sheppo dring_found = 1; 58831ae08745Sheppo break; 58841ae08745Sheppo } 58851ae08745Sheppo dp = dp->next; 58861ae08745Sheppo } 58871ae08745Sheppo 58881ae08745Sheppo if (dring_found == 0) { 58891ae08745Sheppo DERR(NULL, "%s: unrecognised ring cookie", 58901ae08745Sheppo __func__); 5891b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 58921ae08745Sheppo return; 58931ae08745Sheppo } 58941ae08745Sheppo 58951ae08745Sheppo } else { 58961ae08745Sheppo DERR(vswp, "%s: DRING ACK received but no drings " 58971ae08745Sheppo "allocated", __func__); 5898b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 58991ae08745Sheppo return; 59001ae08745Sheppo } 59011ae08745Sheppo 59021ae08745Sheppo /* store ident */ 59031ae08745Sheppo dp->ident = dring_pkt->dring_ident; 59041ae08745Sheppo ldcp->lane_out.lstate |= VSW_DRING_ACK_RECV; 59051ae08745Sheppo vsw_next_milestone(ldcp); 59061ae08745Sheppo break; 59071ae08745Sheppo 59081ae08745Sheppo case VIO_SUBTYPE_NACK: 59091ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 59101ae08745Sheppo 59111ae08745Sheppo if (vsw_check_flag(ldcp, OUTBOUND, VSW_DRING_NACK_RECV)) 59121ae08745Sheppo return; 59131ae08745Sheppo 59141ae08745Sheppo ldcp->lane_out.lstate |= VSW_DRING_NACK_RECV; 59151ae08745Sheppo vsw_next_milestone(ldcp); 59161ae08745Sheppo break; 59171ae08745Sheppo 59181ae08745Sheppo default: 59191ae08745Sheppo DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 59201ae08745Sheppo dring_pkt->tag.vio_subtype); 59211ae08745Sheppo } 59221ae08745Sheppo 59231ae08745Sheppo D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 59241ae08745Sheppo } 59251ae08745Sheppo 59261ae08745Sheppo /* 59271ae08745Sheppo * Process a request from peer to unregister a dring. 59281ae08745Sheppo * 59291ae08745Sheppo * For the moment we just restart the handshake if our 59301ae08745Sheppo * peer endpoint attempts to unregister a dring. 59311ae08745Sheppo */ 59321ae08745Sheppo void 59331ae08745Sheppo vsw_process_ctrl_dring_unreg_pkt(vsw_ldc_t *ldcp, void *pkt) 59341ae08745Sheppo { 59351ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 59361ae08745Sheppo vio_dring_unreg_msg_t *dring_pkt; 59371ae08745Sheppo 59381ae08745Sheppo /* 59391ae08745Sheppo * We know this is a ctrl/dring packet so 59401ae08745Sheppo * cast it into the correct structure. 59411ae08745Sheppo */ 59421ae08745Sheppo dring_pkt = (vio_dring_unreg_msg_t *)pkt; 59431ae08745Sheppo 59441ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 59451ae08745Sheppo 59461ae08745Sheppo switch (dring_pkt->tag.vio_subtype) { 59471ae08745Sheppo case VIO_SUBTYPE_INFO: 59481ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 59491ae08745Sheppo 59501ae08745Sheppo DWARN(vswp, "%s: restarting handshake..", __func__); 59511ae08745Sheppo break; 59521ae08745Sheppo 59531ae08745Sheppo case VIO_SUBTYPE_ACK: 59541ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 59551ae08745Sheppo 59561ae08745Sheppo DWARN(vswp, "%s: restarting handshake..", __func__); 59571ae08745Sheppo break; 59581ae08745Sheppo 59591ae08745Sheppo case VIO_SUBTYPE_NACK: 59601ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 59611ae08745Sheppo 59621ae08745Sheppo DWARN(vswp, "%s: restarting handshake..", __func__); 59631ae08745Sheppo break; 59641ae08745Sheppo 59651ae08745Sheppo default: 59661ae08745Sheppo DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 59671ae08745Sheppo dring_pkt->tag.vio_subtype); 59681ae08745Sheppo } 59691ae08745Sheppo 5970b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 5971b071742bSsg70180 59721ae08745Sheppo D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 59731ae08745Sheppo } 59741ae08745Sheppo 59751ae08745Sheppo #define SND_MCST_NACK(ldcp, pkt) \ 59761ae08745Sheppo pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 59771ae08745Sheppo pkt->tag.vio_sid = ldcp->local_session; \ 5978b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)pkt, \ 5979b071742bSsg70180 sizeof (vnet_mcast_msg_t), B_TRUE); 59801ae08745Sheppo 59811ae08745Sheppo /* 59821ae08745Sheppo * Process a multicast request from a vnet. 59831ae08745Sheppo * 59841ae08745Sheppo * Vnet's specify a multicast address that they are interested in. This 59851ae08745Sheppo * address is used as a key into the hash table which forms the multicast 59861ae08745Sheppo * forwarding database (mFDB). 59871ae08745Sheppo * 59881ae08745Sheppo * The table keys are the multicast addresses, while the table entries 59891ae08745Sheppo * are pointers to lists of ports which wish to receive packets for the 59901ae08745Sheppo * specified multicast address. 59911ae08745Sheppo * 59921ae08745Sheppo * When a multicast packet is being switched we use the address as a key 59931ae08745Sheppo * into the hash table, and then walk the appropriate port list forwarding 59941ae08745Sheppo * the pkt to each port in turn. 59951ae08745Sheppo * 59961ae08745Sheppo * If a vnet is no longer interested in a particular multicast grouping 59971ae08745Sheppo * we simply find the correct location in the hash table and then delete 59981ae08745Sheppo * the relevant port from the port list. 59991ae08745Sheppo * 60001ae08745Sheppo * To deal with the case whereby a port is being deleted without first 60011ae08745Sheppo * removing itself from the lists in the hash table, we maintain a list 60021ae08745Sheppo * of multicast addresses the port has registered an interest in, within 60031ae08745Sheppo * the port structure itself. We then simply walk that list of addresses 60041ae08745Sheppo * using them as keys into the hash table and remove the port from the 60051ae08745Sheppo * appropriate lists. 60061ae08745Sheppo */ 60071ae08745Sheppo static void 60081ae08745Sheppo vsw_process_ctrl_mcst_pkt(vsw_ldc_t *ldcp, void *pkt) 60091ae08745Sheppo { 60101ae08745Sheppo vnet_mcast_msg_t *mcst_pkt; 60111ae08745Sheppo vsw_port_t *port = ldcp->ldc_port; 60121ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 60131ae08745Sheppo int i; 60141ae08745Sheppo 60151ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 60161ae08745Sheppo 60171ae08745Sheppo /* 60181ae08745Sheppo * We know this is a ctrl/mcast packet so 60191ae08745Sheppo * cast it into the correct structure. 60201ae08745Sheppo */ 60211ae08745Sheppo mcst_pkt = (vnet_mcast_msg_t *)pkt; 60221ae08745Sheppo 60231ae08745Sheppo switch (mcst_pkt->tag.vio_subtype) { 60241ae08745Sheppo case VIO_SUBTYPE_INFO: 60251ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 60261ae08745Sheppo 60271ae08745Sheppo /* 60281ae08745Sheppo * Check if in correct state to receive a multicast 60291ae08745Sheppo * message (i.e. handshake complete). If not reset 60301ae08745Sheppo * the handshake. 60311ae08745Sheppo */ 60321ae08745Sheppo if (vsw_check_flag(ldcp, INBOUND, VSW_MCST_INFO_RECV)) 60331ae08745Sheppo return; 60341ae08745Sheppo 60351ae08745Sheppo /* 60361ae08745Sheppo * Before attempting to add or remove address check 60371ae08745Sheppo * that they are valid multicast addresses. 60381ae08745Sheppo * If not, then NACK back. 60391ae08745Sheppo */ 60401ae08745Sheppo for (i = 0; i < mcst_pkt->count; i++) { 60411ae08745Sheppo if ((mcst_pkt->mca[i].ether_addr_octet[0] & 01) != 1) { 60421ae08745Sheppo DERR(vswp, "%s: invalid multicast address", 60431ae08745Sheppo __func__); 60441ae08745Sheppo SND_MCST_NACK(ldcp, mcst_pkt); 60451ae08745Sheppo return; 60461ae08745Sheppo } 60471ae08745Sheppo } 60481ae08745Sheppo 60491ae08745Sheppo /* 60501ae08745Sheppo * Now add/remove the addresses. If this fails we 60511ae08745Sheppo * NACK back. 60521ae08745Sheppo */ 60531ae08745Sheppo if (vsw_add_rem_mcst(mcst_pkt, port) != 0) { 60541ae08745Sheppo SND_MCST_NACK(ldcp, mcst_pkt); 60551ae08745Sheppo return; 60561ae08745Sheppo } 60571ae08745Sheppo 60581ae08745Sheppo mcst_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 60591ae08745Sheppo mcst_pkt->tag.vio_sid = ldcp->local_session; 60601ae08745Sheppo 60611ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)mcst_pkt); 60621ae08745Sheppo 6063b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)mcst_pkt, 6064b071742bSsg70180 sizeof (vnet_mcast_msg_t), B_TRUE); 60651ae08745Sheppo break; 60661ae08745Sheppo 60671ae08745Sheppo case VIO_SUBTYPE_ACK: 60681ae08745Sheppo DWARN(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 60691ae08745Sheppo 60701ae08745Sheppo /* 60711ae08745Sheppo * We shouldn't ever get a multicast ACK message as 60721ae08745Sheppo * at the moment we never request multicast addresses 60731ae08745Sheppo * to be set on some other device. This may change in 60741ae08745Sheppo * the future if we have cascading switches. 60751ae08745Sheppo */ 60761ae08745Sheppo if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_ACK_RECV)) 60771ae08745Sheppo return; 60781ae08745Sheppo 60791ae08745Sheppo /* Do nothing */ 60801ae08745Sheppo break; 60811ae08745Sheppo 60821ae08745Sheppo case VIO_SUBTYPE_NACK: 60831ae08745Sheppo DWARN(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 60841ae08745Sheppo 60851ae08745Sheppo /* 60861ae08745Sheppo * We shouldn't get a multicast NACK packet for the 60871ae08745Sheppo * same reasons as we shouldn't get a ACK packet. 60881ae08745Sheppo */ 60891ae08745Sheppo if (vsw_check_flag(ldcp, OUTBOUND, VSW_MCST_NACK_RECV)) 60901ae08745Sheppo return; 60911ae08745Sheppo 60921ae08745Sheppo /* Do nothing */ 60931ae08745Sheppo break; 60941ae08745Sheppo 60951ae08745Sheppo default: 60961ae08745Sheppo DERR(vswp, "%s: unknown vio_subtype %x\n", __func__, 60971ae08745Sheppo mcst_pkt->tag.vio_subtype); 60981ae08745Sheppo } 60991ae08745Sheppo 61001ae08745Sheppo D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 61011ae08745Sheppo } 61021ae08745Sheppo 61031ae08745Sheppo static void 61041ae08745Sheppo vsw_process_ctrl_rdx_pkt(vsw_ldc_t *ldcp, void *pkt) 61051ae08745Sheppo { 61061ae08745Sheppo vio_rdx_msg_t *rdx_pkt; 61071ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 61081ae08745Sheppo 61091ae08745Sheppo /* 61101ae08745Sheppo * We know this is a ctrl/rdx packet so 61111ae08745Sheppo * cast it into the correct structure. 61121ae08745Sheppo */ 61131ae08745Sheppo rdx_pkt = (vio_rdx_msg_t *)pkt; 61141ae08745Sheppo 61151ae08745Sheppo D1(vswp, "%s(%lld) enter", __func__, ldcp->ldc_id); 61161ae08745Sheppo 61171ae08745Sheppo switch (rdx_pkt->tag.vio_subtype) { 61181ae08745Sheppo case VIO_SUBTYPE_INFO: 61191ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 61201ae08745Sheppo 6121b071742bSsg70180 if (vsw_check_flag(ldcp, OUTBOUND, VSW_RDX_INFO_RECV)) 61221ae08745Sheppo return; 61231ae08745Sheppo 61241ae08745Sheppo rdx_pkt->tag.vio_sid = ldcp->local_session; 61251ae08745Sheppo rdx_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 61261ae08745Sheppo 61271ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)rdx_pkt); 61281ae08745Sheppo 6129b071742bSsg70180 ldcp->lane_out.lstate |= VSW_RDX_ACK_SENT; 61301ae08745Sheppo 6131b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)rdx_pkt, 6132b071742bSsg70180 sizeof (vio_rdx_msg_t), B_TRUE); 61331ae08745Sheppo 61341ae08745Sheppo vsw_next_milestone(ldcp); 61351ae08745Sheppo break; 61361ae08745Sheppo 61371ae08745Sheppo case VIO_SUBTYPE_ACK: 61381ae08745Sheppo /* 61391ae08745Sheppo * Should be handled in-band by callback handler. 61401ae08745Sheppo */ 61411ae08745Sheppo DERR(vswp, "%s: Unexpected VIO_SUBTYPE_ACK", __func__); 6142b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 61431ae08745Sheppo break; 61441ae08745Sheppo 61451ae08745Sheppo case VIO_SUBTYPE_NACK: 61461ae08745Sheppo D2(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 61471ae08745Sheppo 6148b071742bSsg70180 if (vsw_check_flag(ldcp, INBOUND, VSW_RDX_NACK_RECV)) 61491ae08745Sheppo return; 61501ae08745Sheppo 6151b071742bSsg70180 ldcp->lane_in.lstate |= VSW_RDX_NACK_RECV; 61521ae08745Sheppo vsw_next_milestone(ldcp); 61531ae08745Sheppo break; 61541ae08745Sheppo 61551ae08745Sheppo default: 61561ae08745Sheppo DERR(vswp, "%s: Unknown vio_subtype %x\n", __func__, 61571ae08745Sheppo rdx_pkt->tag.vio_subtype); 61581ae08745Sheppo } 61591ae08745Sheppo 61601ae08745Sheppo D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 61611ae08745Sheppo } 61621ae08745Sheppo 61631ae08745Sheppo static void 61641ae08745Sheppo vsw_process_data_pkt(vsw_ldc_t *ldcp, void *dpkt, vio_msg_tag_t tag) 61651ae08745Sheppo { 61661ae08745Sheppo uint16_t env = tag.vio_subtype_env; 61671ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 61681ae08745Sheppo 61691ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 61701ae08745Sheppo 61711ae08745Sheppo /* session id check */ 61721ae08745Sheppo if (ldcp->session_status & VSW_PEER_SESSION) { 61731ae08745Sheppo if (ldcp->peer_session != tag.vio_sid) { 61741ae08745Sheppo DERR(vswp, "%s (chan %d): invalid session id (%llx)", 61751ae08745Sheppo __func__, ldcp->ldc_id, tag.vio_sid); 6176b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 61771ae08745Sheppo return; 61781ae08745Sheppo } 61791ae08745Sheppo } 61801ae08745Sheppo 61811ae08745Sheppo /* 61821ae08745Sheppo * It is an error for us to be getting data packets 61831ae08745Sheppo * before the handshake has completed. 61841ae08745Sheppo */ 61851ae08745Sheppo if (ldcp->hphase != VSW_MILESTONE4) { 61861ae08745Sheppo DERR(vswp, "%s: got data packet before handshake complete " 61871ae08745Sheppo "hphase %d (%x: %x)", __func__, ldcp->hphase, 61881ae08745Sheppo ldcp->lane_in.lstate, ldcp->lane_out.lstate); 61891ae08745Sheppo DUMP_FLAGS(ldcp->lane_in.lstate); 61901ae08745Sheppo DUMP_FLAGS(ldcp->lane_out.lstate); 6191b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 61921ae08745Sheppo return; 61931ae08745Sheppo } 61941ae08745Sheppo 61951ae08745Sheppo /* 61961ae08745Sheppo * Switch on vio_subtype envelope, then let lower routines 61971ae08745Sheppo * decide if its an INFO, ACK or NACK packet. 61981ae08745Sheppo */ 61991ae08745Sheppo if (env == VIO_DRING_DATA) { 62001ae08745Sheppo vsw_process_data_dring_pkt(ldcp, dpkt); 62011ae08745Sheppo } else if (env == VIO_PKT_DATA) { 62021ae08745Sheppo vsw_process_data_raw_pkt(ldcp, dpkt); 62031ae08745Sheppo } else if (env == VIO_DESC_DATA) { 62041ae08745Sheppo vsw_process_data_ibnd_pkt(ldcp, dpkt); 62051ae08745Sheppo } else { 6206205eeb1aSlm66018 DERR(vswp, "%s: unknown vio_subtype_env (%x)\n", __func__, env); 62071ae08745Sheppo } 62081ae08745Sheppo 62091ae08745Sheppo D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 62101ae08745Sheppo } 62111ae08745Sheppo 62121ae08745Sheppo #define SND_DRING_NACK(ldcp, pkt) \ 62131ae08745Sheppo pkt->tag.vio_subtype = VIO_SUBTYPE_NACK; \ 62141ae08745Sheppo pkt->tag.vio_sid = ldcp->local_session; \ 6215b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)pkt, \ 6216b071742bSsg70180 sizeof (vio_dring_msg_t), B_TRUE); 62171ae08745Sheppo 62181ae08745Sheppo static void 62191ae08745Sheppo vsw_process_data_dring_pkt(vsw_ldc_t *ldcp, void *dpkt) 62201ae08745Sheppo { 62211ae08745Sheppo vio_dring_msg_t *dring_pkt; 62221ae08745Sheppo vnet_public_desc_t *pub_addr = NULL; 62231ae08745Sheppo vsw_private_desc_t *priv_addr = NULL; 62241ae08745Sheppo dring_info_t *dp = NULL; 62251ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 62261ae08745Sheppo mblk_t *mp = NULL; 62271ae08745Sheppo mblk_t *bp = NULL; 62281ae08745Sheppo mblk_t *bpt = NULL; 62291ae08745Sheppo size_t nbytes = 0; 62301ae08745Sheppo size_t off = 0; 62311ae08745Sheppo uint64_t ncookies = 0; 62321ae08745Sheppo uint64_t chain = 0; 6233d10e4ef2Snarayan uint64_t j, len; 6234d10e4ef2Snarayan uint32_t pos, start, datalen; 6235d10e4ef2Snarayan uint32_t range_start, range_end; 6236d10e4ef2Snarayan int32_t end, num, cnt = 0; 6237b071742bSsg70180 int i, rv, msg_rv = 0; 62381ae08745Sheppo boolean_t ack_needed = B_FALSE; 6239d10e4ef2Snarayan boolean_t prev_desc_ack = B_FALSE; 6240d10e4ef2Snarayan int read_attempts = 0; 62411ae08745Sheppo 62421ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 62431ae08745Sheppo 62441ae08745Sheppo /* 62451ae08745Sheppo * We know this is a data/dring packet so 62461ae08745Sheppo * cast it into the correct structure. 62471ae08745Sheppo */ 62481ae08745Sheppo dring_pkt = (vio_dring_msg_t *)dpkt; 62491ae08745Sheppo 62501ae08745Sheppo /* 62511ae08745Sheppo * Switch on the vio_subtype. If its INFO then we need to 62521ae08745Sheppo * process the data. If its an ACK we need to make sure 62531ae08745Sheppo * it makes sense (i.e did we send an earlier data/info), 62541ae08745Sheppo * and if its a NACK then we maybe attempt a retry. 62551ae08745Sheppo */ 62561ae08745Sheppo switch (dring_pkt->tag.vio_subtype) { 62571ae08745Sheppo case VIO_SUBTYPE_INFO: 62581ae08745Sheppo D2(vswp, "%s(%lld): VIO_SUBTYPE_INFO", __func__, ldcp->ldc_id); 62591ae08745Sheppo 6260445b4c2eSsb155480 READ_ENTER(&ldcp->lane_in.dlistrw); 62611ae08745Sheppo if ((dp = vsw_ident2dring(&ldcp->lane_in, 62621ae08745Sheppo dring_pkt->dring_ident)) == NULL) { 6263445b4c2eSsb155480 RW_EXIT(&ldcp->lane_in.dlistrw); 62641ae08745Sheppo 62651ae08745Sheppo DERR(vswp, "%s(%lld): unable to find dring from " 62661ae08745Sheppo "ident 0x%llx", __func__, ldcp->ldc_id, 62671ae08745Sheppo dring_pkt->dring_ident); 62681ae08745Sheppo 62691ae08745Sheppo SND_DRING_NACK(ldcp, dring_pkt); 62701ae08745Sheppo return; 62711ae08745Sheppo } 62721ae08745Sheppo 6273d10e4ef2Snarayan start = pos = dring_pkt->start_idx; 62741ae08745Sheppo end = dring_pkt->end_idx; 6275d10e4ef2Snarayan len = dp->num_descriptors; 62761ae08745Sheppo 6277d10e4ef2Snarayan range_start = range_end = pos; 6278d10e4ef2Snarayan 6279d10e4ef2Snarayan D2(vswp, "%s(%lld): start index %ld : end %ld\n", 62801ae08745Sheppo __func__, ldcp->ldc_id, start, end); 62811ae08745Sheppo 6282d10e4ef2Snarayan if (end == -1) { 6283d10e4ef2Snarayan num = -1; 62844bac2208Snarayan } else if (end >= 0) { 6285205eeb1aSlm66018 num = end >= pos ? end - pos + 1: (len - pos + 1) + end; 6286d10e4ef2Snarayan 62871ae08745Sheppo /* basic sanity check */ 62881ae08745Sheppo if (end > len) { 6289445b4c2eSsb155480 RW_EXIT(&ldcp->lane_in.dlistrw); 6290d10e4ef2Snarayan DERR(vswp, "%s(%lld): endpoint %lld outside " 6291d10e4ef2Snarayan "ring length %lld", __func__, 6292d10e4ef2Snarayan ldcp->ldc_id, end, len); 62931ae08745Sheppo 62941ae08745Sheppo SND_DRING_NACK(ldcp, dring_pkt); 62951ae08745Sheppo return; 62961ae08745Sheppo } 6297d10e4ef2Snarayan } else { 6298445b4c2eSsb155480 RW_EXIT(&ldcp->lane_in.dlistrw); 6299d10e4ef2Snarayan DERR(vswp, "%s(%lld): invalid endpoint %lld", 6300d10e4ef2Snarayan __func__, ldcp->ldc_id, end); 6301d10e4ef2Snarayan SND_DRING_NACK(ldcp, dring_pkt); 63021ae08745Sheppo return; 63031ae08745Sheppo } 63041ae08745Sheppo 6305d10e4ef2Snarayan while (cnt != num) { 6306d10e4ef2Snarayan vsw_recheck_desc: 6307d10e4ef2Snarayan if ((rv = ldc_mem_dring_acquire(dp->handle, 6308d10e4ef2Snarayan pos, pos)) != 0) { 6309445b4c2eSsb155480 RW_EXIT(&ldcp->lane_in.dlistrw); 6310d10e4ef2Snarayan DERR(vswp, "%s(%lld): unable to acquire " 6311d10e4ef2Snarayan "descriptor at pos %d: err %d", 6312d10e4ef2Snarayan __func__, pos, ldcp->ldc_id, rv); 6313d10e4ef2Snarayan SND_DRING_NACK(ldcp, dring_pkt); 6314d10e4ef2Snarayan return; 6315d10e4ef2Snarayan } 63161ae08745Sheppo 6317d10e4ef2Snarayan pub_addr = (vnet_public_desc_t *)dp->pub_addr + pos; 63181ae08745Sheppo 6319d10e4ef2Snarayan /* 6320d10e4ef2Snarayan * When given a bounded range of descriptors 6321d10e4ef2Snarayan * to process, its an error to hit a descriptor 6322d10e4ef2Snarayan * which is not ready. In the non-bounded case 6323d10e4ef2Snarayan * (end_idx == -1) this simply indicates we have 6324d10e4ef2Snarayan * reached the end of the current active range. 6325d10e4ef2Snarayan */ 6326d10e4ef2Snarayan if (pub_addr->hdr.dstate != VIO_DESC_READY) { 6327d10e4ef2Snarayan /* unbound - no error */ 6328d10e4ef2Snarayan if (end == -1) { 6329d10e4ef2Snarayan if (read_attempts == vsw_read_attempts) 6330d10e4ef2Snarayan break; 63311ae08745Sheppo 6332d10e4ef2Snarayan delay(drv_usectohz(vsw_desc_delay)); 6333d10e4ef2Snarayan read_attempts++; 6334d10e4ef2Snarayan goto vsw_recheck_desc; 6335d10e4ef2Snarayan } 63361ae08745Sheppo 6337d10e4ef2Snarayan /* bounded - error - so NACK back */ 6338445b4c2eSsb155480 RW_EXIT(&ldcp->lane_in.dlistrw); 6339d10e4ef2Snarayan DERR(vswp, "%s(%lld): descriptor not READY " 6340d10e4ef2Snarayan "(%d)", __func__, ldcp->ldc_id, 6341d10e4ef2Snarayan pub_addr->hdr.dstate); 6342d10e4ef2Snarayan SND_DRING_NACK(ldcp, dring_pkt); 6343d10e4ef2Snarayan return; 6344d10e4ef2Snarayan } 6345d10e4ef2Snarayan 6346d10e4ef2Snarayan DTRACE_PROBE1(read_attempts, int, read_attempts); 6347d10e4ef2Snarayan 6348d10e4ef2Snarayan range_end = pos; 6349d10e4ef2Snarayan 6350d10e4ef2Snarayan /* 6351d10e4ef2Snarayan * If we ACK'd the previous descriptor then now 6352d10e4ef2Snarayan * record the new range start position for later 6353d10e4ef2Snarayan * ACK's. 6354d10e4ef2Snarayan */ 6355d10e4ef2Snarayan if (prev_desc_ack) { 6356d10e4ef2Snarayan range_start = pos; 6357d10e4ef2Snarayan 6358205eeb1aSlm66018 D2(vswp, "%s(%lld): updating range start to be " 6359205eeb1aSlm66018 "%d", __func__, ldcp->ldc_id, range_start); 6360d10e4ef2Snarayan 6361d10e4ef2Snarayan prev_desc_ack = B_FALSE; 6362d10e4ef2Snarayan } 63631ae08745Sheppo 63641ae08745Sheppo /* 63651ae08745Sheppo * Data is padded to align on 8 byte boundary, 63661ae08745Sheppo * datalen is actual data length, i.e. minus that 63671ae08745Sheppo * padding. 63681ae08745Sheppo */ 63691ae08745Sheppo datalen = pub_addr->nbytes; 63701ae08745Sheppo 63711ae08745Sheppo /* 63721ae08745Sheppo * Does peer wish us to ACK when we have finished 63731ae08745Sheppo * with this descriptor ? 63741ae08745Sheppo */ 63751ae08745Sheppo if (pub_addr->hdr.ack) 63761ae08745Sheppo ack_needed = B_TRUE; 63771ae08745Sheppo 63781ae08745Sheppo D2(vswp, "%s(%lld): processing desc %lld at pos" 63791ae08745Sheppo " 0x%llx : dstate 0x%lx : datalen 0x%lx", 6380d10e4ef2Snarayan __func__, ldcp->ldc_id, pos, pub_addr, 63811ae08745Sheppo pub_addr->hdr.dstate, datalen); 63821ae08745Sheppo 63831ae08745Sheppo /* 63841ae08745Sheppo * Mark that we are starting to process descriptor. 63851ae08745Sheppo */ 63861ae08745Sheppo pub_addr->hdr.dstate = VIO_DESC_ACCEPTED; 63871ae08745Sheppo 6388d10e4ef2Snarayan mp = vio_allocb(ldcp->rxh); 6389d10e4ef2Snarayan if (mp == NULL) { 63901ae08745Sheppo /* 6391d10e4ef2Snarayan * No free receive buffers available, so 6392d10e4ef2Snarayan * fallback onto allocb(9F). Make sure that 6393d10e4ef2Snarayan * we get a data buffer which is a multiple 6394d10e4ef2Snarayan * of 8 as this is required by ldc_mem_copy. 63951ae08745Sheppo */ 6396d10e4ef2Snarayan DTRACE_PROBE(allocb); 6397205eeb1aSlm66018 if ((mp = allocb(datalen + VNET_IPALIGN + 8, 6398205eeb1aSlm66018 BPRI_MED)) == NULL) { 6399205eeb1aSlm66018 DERR(vswp, "%s(%ld): allocb failed", 6400205eeb1aSlm66018 __func__, ldcp->ldc_id); 6401205eeb1aSlm66018 pub_addr->hdr.dstate = VIO_DESC_DONE; 6402205eeb1aSlm66018 (void) ldc_mem_dring_release(dp->handle, 6403205eeb1aSlm66018 pos, pos); 6404205eeb1aSlm66018 break; 6405205eeb1aSlm66018 } 6406d10e4ef2Snarayan } 6407d10e4ef2Snarayan 6408d10e4ef2Snarayan /* 6409d10e4ef2Snarayan * Ensure that we ask ldc for an aligned 6410d10e4ef2Snarayan * number of bytes. 6411d10e4ef2Snarayan */ 6412d10e4ef2Snarayan nbytes = datalen + VNET_IPALIGN; 64131ae08745Sheppo if (nbytes & 0x7) { 64141ae08745Sheppo off = 8 - (nbytes & 0x7); 64151ae08745Sheppo nbytes += off; 64161ae08745Sheppo } 64171ae08745Sheppo 64181ae08745Sheppo ncookies = pub_addr->ncookies; 64191ae08745Sheppo rv = ldc_mem_copy(ldcp->ldc_handle, 64201ae08745Sheppo (caddr_t)mp->b_rptr, 0, &nbytes, 6421205eeb1aSlm66018 pub_addr->memcookie, ncookies, LDC_COPY_IN); 64221ae08745Sheppo 64231ae08745Sheppo if (rv != 0) { 6424205eeb1aSlm66018 DERR(vswp, "%s(%d): unable to copy in data " 6425205eeb1aSlm66018 "from %d cookies in desc %d (rv %d)", 6426205eeb1aSlm66018 __func__, ldcp->ldc_id, ncookies, pos, rv); 64271ae08745Sheppo freemsg(mp); 6428d10e4ef2Snarayan 6429d10e4ef2Snarayan pub_addr->hdr.dstate = VIO_DESC_DONE; 64301ae08745Sheppo (void) ldc_mem_dring_release(dp->handle, 6431d10e4ef2Snarayan pos, pos); 6432d10e4ef2Snarayan break; 64331ae08745Sheppo } else { 64341ae08745Sheppo D2(vswp, "%s(%d): copied in %ld bytes" 64351ae08745Sheppo " using %d cookies", __func__, 64361ae08745Sheppo ldcp->ldc_id, nbytes, ncookies); 64371ae08745Sheppo } 64381ae08745Sheppo 6439d10e4ef2Snarayan /* adjust the read pointer to skip over the padding */ 6440d10e4ef2Snarayan mp->b_rptr += VNET_IPALIGN; 6441d10e4ef2Snarayan 64421ae08745Sheppo /* point to the actual end of data */ 64431ae08745Sheppo mp->b_wptr = mp->b_rptr + datalen; 64441ae08745Sheppo 64451ae08745Sheppo /* build a chain of received packets */ 64461ae08745Sheppo if (bp == NULL) { 64471ae08745Sheppo /* first pkt */ 64481ae08745Sheppo bp = mp; 64491ae08745Sheppo bp->b_next = bp->b_prev = NULL; 64501ae08745Sheppo bpt = bp; 64511ae08745Sheppo chain = 1; 64521ae08745Sheppo } else { 64531ae08745Sheppo mp->b_next = NULL; 64541ae08745Sheppo mp->b_prev = bpt; 64551ae08745Sheppo bpt->b_next = mp; 64561ae08745Sheppo bpt = mp; 64571ae08745Sheppo chain++; 64581ae08745Sheppo } 64591ae08745Sheppo 64601ae08745Sheppo /* mark we are finished with this descriptor */ 64611ae08745Sheppo pub_addr->hdr.dstate = VIO_DESC_DONE; 64621ae08745Sheppo 6463d10e4ef2Snarayan (void) ldc_mem_dring_release(dp->handle, pos, pos); 6464d10e4ef2Snarayan 64651ae08745Sheppo /* 6466d10e4ef2Snarayan * Send an ACK back to peer if requested. 64671ae08745Sheppo */ 64681ae08745Sheppo if (ack_needed) { 64691ae08745Sheppo ack_needed = B_FALSE; 64701ae08745Sheppo 6471d10e4ef2Snarayan dring_pkt->start_idx = range_start; 6472d10e4ef2Snarayan dring_pkt->end_idx = range_end; 64731ae08745Sheppo 6474d10e4ef2Snarayan DERR(vswp, "%s(%lld): processed %d %d, ACK" 6475d10e4ef2Snarayan " requested", __func__, ldcp->ldc_id, 6476205eeb1aSlm66018 dring_pkt->start_idx, dring_pkt->end_idx); 64771ae08745Sheppo 6478d10e4ef2Snarayan dring_pkt->dring_process_state = VIO_DP_ACTIVE; 64791ae08745Sheppo dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 64801ae08745Sheppo dring_pkt->tag.vio_sid = ldcp->local_session; 6481205eeb1aSlm66018 6482b071742bSsg70180 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 6483205eeb1aSlm66018 sizeof (vio_dring_msg_t), B_FALSE); 6484b071742bSsg70180 6485b071742bSsg70180 /* 6486b071742bSsg70180 * Check if ACK was successfully sent. If not 6487b071742bSsg70180 * we break and deal with that below. 6488b071742bSsg70180 */ 6489b071742bSsg70180 if (msg_rv != 0) 6490b071742bSsg70180 break; 6491d10e4ef2Snarayan 6492d10e4ef2Snarayan prev_desc_ack = B_TRUE; 6493d10e4ef2Snarayan range_start = pos; 64941ae08745Sheppo } 64951ae08745Sheppo 6496d10e4ef2Snarayan /* next descriptor */ 6497d10e4ef2Snarayan pos = (pos + 1) % len; 6498d10e4ef2Snarayan cnt++; 6499d10e4ef2Snarayan 6500d10e4ef2Snarayan /* 6501d10e4ef2Snarayan * Break out of loop here and stop processing to 6502d10e4ef2Snarayan * allow some other network device (or disk) to 6503d10e4ef2Snarayan * get access to the cpu. 6504d10e4ef2Snarayan */ 6505d10e4ef2Snarayan if (chain > vsw_chain_len) { 6506d10e4ef2Snarayan D3(vswp, "%s(%lld): switching chain of %d " 6507d10e4ef2Snarayan "msgs", __func__, ldcp->ldc_id, chain); 6508d10e4ef2Snarayan break; 65091ae08745Sheppo } 65101ae08745Sheppo } 6511445b4c2eSsb155480 RW_EXIT(&ldcp->lane_in.dlistrw); 65121ae08745Sheppo 6513b071742bSsg70180 /* 6514b071742bSsg70180 * If when we attempted to send the ACK we found that the 6515b071742bSsg70180 * channel had been reset then now handle this. We deal with 6516b071742bSsg70180 * it here as we cannot reset the channel while holding the 6517b071742bSsg70180 * dlistrw lock, and we don't want to acquire/release it 6518b071742bSsg70180 * continuously in the above loop, as a channel reset should 6519b071742bSsg70180 * be a rare event. 6520b071742bSsg70180 */ 6521b071742bSsg70180 if (msg_rv == ECONNRESET) { 6522b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 6523b071742bSsg70180 break; 6524b071742bSsg70180 } 6525b071742bSsg70180 65261ae08745Sheppo /* send the chain of packets to be switched */ 6527d10e4ef2Snarayan if (bp != NULL) { 6528d10e4ef2Snarayan D3(vswp, "%s(%lld): switching chain of %d msgs", 6529d10e4ef2Snarayan __func__, ldcp->ldc_id, chain); 653034683adeSsg70180 vswp->vsw_switch_frame(vswp, bp, VSW_VNETPORT, 65311ae08745Sheppo ldcp->ldc_port, NULL); 6532d10e4ef2Snarayan } 65331ae08745Sheppo 6534d10e4ef2Snarayan DTRACE_PROBE1(msg_cnt, int, cnt); 6535d10e4ef2Snarayan 6536d10e4ef2Snarayan /* 6537d10e4ef2Snarayan * We are now finished so ACK back with the state 6538d10e4ef2Snarayan * set to STOPPING so our peer knows we are finished 6539d10e4ef2Snarayan */ 6540d10e4ef2Snarayan dring_pkt->tag.vio_subtype = VIO_SUBTYPE_ACK; 6541d10e4ef2Snarayan dring_pkt->tag.vio_sid = ldcp->local_session; 6542d10e4ef2Snarayan 6543d10e4ef2Snarayan dring_pkt->dring_process_state = VIO_DP_STOPPED; 6544d10e4ef2Snarayan 6545d10e4ef2Snarayan DTRACE_PROBE(stop_process_sent); 6546d10e4ef2Snarayan 6547d10e4ef2Snarayan /* 6548d10e4ef2Snarayan * We have not processed any more descriptors beyond 6549d10e4ef2Snarayan * the last one we ACK'd. 6550d10e4ef2Snarayan */ 6551d10e4ef2Snarayan if (prev_desc_ack) 6552d10e4ef2Snarayan range_start = range_end; 6553d10e4ef2Snarayan 6554d10e4ef2Snarayan dring_pkt->start_idx = range_start; 6555d10e4ef2Snarayan dring_pkt->end_idx = range_end; 6556d10e4ef2Snarayan 6557d10e4ef2Snarayan D2(vswp, "%s(%lld) processed : %d : %d, now stopping", 6558d10e4ef2Snarayan __func__, ldcp->ldc_id, dring_pkt->start_idx, 6559d10e4ef2Snarayan dring_pkt->end_idx); 6560d10e4ef2Snarayan 6561b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)dring_pkt, 6562b071742bSsg70180 sizeof (vio_dring_msg_t), B_TRUE); 65631ae08745Sheppo break; 65641ae08745Sheppo 65651ae08745Sheppo case VIO_SUBTYPE_ACK: 65661ae08745Sheppo D2(vswp, "%s(%lld): VIO_SUBTYPE_ACK", __func__, ldcp->ldc_id); 65671ae08745Sheppo /* 65681ae08745Sheppo * Verify that the relevant descriptors are all 65691ae08745Sheppo * marked as DONE 65701ae08745Sheppo */ 6571445b4c2eSsb155480 READ_ENTER(&ldcp->lane_out.dlistrw); 65721ae08745Sheppo if ((dp = vsw_ident2dring(&ldcp->lane_out, 65731ae08745Sheppo dring_pkt->dring_ident)) == NULL) { 6574445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 65751ae08745Sheppo DERR(vswp, "%s: unknown ident in ACK", __func__); 65761ae08745Sheppo return; 65771ae08745Sheppo } 65781ae08745Sheppo 65791ae08745Sheppo pub_addr = (vnet_public_desc_t *)dp->pub_addr; 65801ae08745Sheppo priv_addr = (vsw_private_desc_t *)dp->priv_addr; 65811ae08745Sheppo 65821ae08745Sheppo start = end = 0; 65831ae08745Sheppo start = dring_pkt->start_idx; 65841ae08745Sheppo end = dring_pkt->end_idx; 65851ae08745Sheppo len = dp->num_descriptors; 65861ae08745Sheppo 65871ae08745Sheppo j = num = 0; 65881ae08745Sheppo /* calculate # descriptors taking into a/c wrap around */ 65891ae08745Sheppo num = end >= start ? end - start + 1: (len - start + 1) + end; 65901ae08745Sheppo 65911ae08745Sheppo D2(vswp, "%s(%lld): start index %ld : end %ld : num %ld\n", 65921ae08745Sheppo __func__, ldcp->ldc_id, start, end, num); 65931ae08745Sheppo 6594d10e4ef2Snarayan mutex_enter(&dp->dlock); 6595d10e4ef2Snarayan dp->last_ack_recv = end; 6596d10e4ef2Snarayan mutex_exit(&dp->dlock); 6597d10e4ef2Snarayan 65981ae08745Sheppo for (i = start; j < num; i = (i + 1) % len, j++) { 65991ae08745Sheppo pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 66001ae08745Sheppo priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 66011ae08745Sheppo 6602d10e4ef2Snarayan /* 6603d10e4ef2Snarayan * If the last descriptor in a range has the ACK 6604d10e4ef2Snarayan * bit set then we will get two messages from our 6605d10e4ef2Snarayan * peer relating to it. The normal ACK msg and then 6606d10e4ef2Snarayan * a subsequent STOP msg. The first message will have 6607d10e4ef2Snarayan * resulted in the descriptor being reclaimed and 6608d10e4ef2Snarayan * its state set to FREE so when we encounter a non 6609d10e4ef2Snarayan * DONE descriptor we need to check to see if its 6610d10e4ef2Snarayan * because we have just reclaimed it. 6611d10e4ef2Snarayan */ 6612d10e4ef2Snarayan mutex_enter(&priv_addr->dstate_lock); 6613d10e4ef2Snarayan if (pub_addr->hdr.dstate == VIO_DESC_DONE) { 66141ae08745Sheppo /* clear all the fields */ 66151ae08745Sheppo bzero(priv_addr->datap, priv_addr->datalen); 66161ae08745Sheppo priv_addr->datalen = 0; 66171ae08745Sheppo 66181ae08745Sheppo pub_addr->hdr.dstate = VIO_DESC_FREE; 66191ae08745Sheppo pub_addr->hdr.ack = 0; 6620d10e4ef2Snarayan 66211ae08745Sheppo priv_addr->dstate = VIO_DESC_FREE; 6622d10e4ef2Snarayan mutex_exit(&priv_addr->dstate_lock); 66231ae08745Sheppo 66241ae08745Sheppo D3(vswp, "clearing descp %d : pub state " 66251ae08745Sheppo "0x%llx : priv state 0x%llx", i, 6626205eeb1aSlm66018 pub_addr->hdr.dstate, priv_addr->dstate); 6627d10e4ef2Snarayan 6628d10e4ef2Snarayan } else { 6629d10e4ef2Snarayan mutex_exit(&priv_addr->dstate_lock); 6630d10e4ef2Snarayan 6631d10e4ef2Snarayan if (dring_pkt->dring_process_state != 6632d10e4ef2Snarayan VIO_DP_STOPPED) { 6633d10e4ef2Snarayan DERR(vswp, "%s: descriptor %lld at pos " 6634d10e4ef2Snarayan " 0x%llx not DONE (0x%lx)\n", 6635d10e4ef2Snarayan __func__, i, pub_addr, 6636d10e4ef2Snarayan pub_addr->hdr.dstate); 6637445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 6638d10e4ef2Snarayan return; 6639d10e4ef2Snarayan } 66401ae08745Sheppo } 66411ae08745Sheppo } 66421ae08745Sheppo 6643d10e4ef2Snarayan /* 6644d10e4ef2Snarayan * If our peer is stopping processing descriptors then 6645d10e4ef2Snarayan * we check to make sure it has processed all the descriptors 6646d10e4ef2Snarayan * we have updated. If not then we send it a new message 6647d10e4ef2Snarayan * to prompt it to restart. 6648d10e4ef2Snarayan */ 6649d10e4ef2Snarayan if (dring_pkt->dring_process_state == VIO_DP_STOPPED) { 6650d10e4ef2Snarayan DTRACE_PROBE(stop_process_recv); 6651d10e4ef2Snarayan D2(vswp, "%s(%lld): got stopping msg : %d : %d", 6652d10e4ef2Snarayan __func__, ldcp->ldc_id, dring_pkt->start_idx, 6653d10e4ef2Snarayan dring_pkt->end_idx); 6654d10e4ef2Snarayan 6655d10e4ef2Snarayan /* 6656d10e4ef2Snarayan * Check next descriptor in public section of ring. 6657d10e4ef2Snarayan * If its marked as READY then we need to prompt our 6658d10e4ef2Snarayan * peer to start processing the ring again. 6659d10e4ef2Snarayan */ 6660d10e4ef2Snarayan i = (end + 1) % len; 6661d10e4ef2Snarayan pub_addr = (vnet_public_desc_t *)dp->pub_addr + i; 6662d10e4ef2Snarayan priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6663d10e4ef2Snarayan 6664d10e4ef2Snarayan /* 6665d10e4ef2Snarayan * Hold the restart lock across all of this to 6666d10e4ef2Snarayan * make sure that its not possible for us to 6667d10e4ef2Snarayan * decide that a msg needs to be sent in the future 6668d10e4ef2Snarayan * but the sending code having already checked is 6669d10e4ef2Snarayan * about to exit. 6670d10e4ef2Snarayan */ 6671d10e4ef2Snarayan mutex_enter(&dp->restart_lock); 6672d10e4ef2Snarayan mutex_enter(&priv_addr->dstate_lock); 6673d10e4ef2Snarayan if (pub_addr->hdr.dstate == VIO_DESC_READY) { 6674d10e4ef2Snarayan 6675d10e4ef2Snarayan mutex_exit(&priv_addr->dstate_lock); 6676d10e4ef2Snarayan 6677d10e4ef2Snarayan dring_pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; 6678d10e4ef2Snarayan dring_pkt->tag.vio_sid = ldcp->local_session; 6679d10e4ef2Snarayan 6680d10e4ef2Snarayan mutex_enter(&ldcp->lane_out.seq_lock); 6681d10e4ef2Snarayan dring_pkt->seq_num = ldcp->lane_out.seq_num++; 6682d10e4ef2Snarayan mutex_exit(&ldcp->lane_out.seq_lock); 6683d10e4ef2Snarayan 6684d10e4ef2Snarayan dring_pkt->start_idx = (end + 1) % len; 6685d10e4ef2Snarayan dring_pkt->end_idx = -1; 6686d10e4ef2Snarayan 6687d10e4ef2Snarayan D2(vswp, "%s(%lld) : sending restart msg:" 6688d10e4ef2Snarayan " %d : %d", __func__, ldcp->ldc_id, 6689205eeb1aSlm66018 dring_pkt->start_idx, dring_pkt->end_idx); 6690d10e4ef2Snarayan 6691b071742bSsg70180 msg_rv = vsw_send_msg(ldcp, (void *)dring_pkt, 6692b071742bSsg70180 sizeof (vio_dring_msg_t), B_FALSE); 6693b071742bSsg70180 6694d10e4ef2Snarayan } else { 6695d10e4ef2Snarayan mutex_exit(&priv_addr->dstate_lock); 6696d10e4ef2Snarayan dp->restart_reqd = B_TRUE; 6697d10e4ef2Snarayan } 6698d10e4ef2Snarayan mutex_exit(&dp->restart_lock); 6699d10e4ef2Snarayan } 6700445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 6701b071742bSsg70180 6702b071742bSsg70180 /* only do channel reset after dropping dlistrw lock */ 6703b071742bSsg70180 if (msg_rv == ECONNRESET) 6704b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 6705b071742bSsg70180 67061ae08745Sheppo break; 67071ae08745Sheppo 67081ae08745Sheppo case VIO_SUBTYPE_NACK: 67091ae08745Sheppo DWARN(vswp, "%s(%lld): VIO_SUBTYPE_NACK", 67101ae08745Sheppo __func__, ldcp->ldc_id); 67111ae08745Sheppo /* 67121ae08745Sheppo * Something is badly wrong if we are getting NACK's 67131ae08745Sheppo * for our data pkts. So reset the channel. 67141ae08745Sheppo */ 6715b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESTART); 67161ae08745Sheppo 67171ae08745Sheppo break; 67181ae08745Sheppo 67191ae08745Sheppo default: 67201ae08745Sheppo DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 67211ae08745Sheppo ldcp->ldc_id, dring_pkt->tag.vio_subtype); 67221ae08745Sheppo } 67231ae08745Sheppo 67241ae08745Sheppo D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 67251ae08745Sheppo } 67261ae08745Sheppo 67271ae08745Sheppo /* 67281ae08745Sheppo * VIO_PKT_DATA (a.k.a raw data mode ) 67291ae08745Sheppo * 67301ae08745Sheppo * Note - currently not supported. Do nothing. 67311ae08745Sheppo */ 67321ae08745Sheppo static void 67331ae08745Sheppo vsw_process_data_raw_pkt(vsw_ldc_t *ldcp, void *dpkt) 67341ae08745Sheppo { 67351ae08745Sheppo _NOTE(ARGUNUSED(dpkt)) 67361ae08745Sheppo 67371ae08745Sheppo D1(NULL, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 6738205eeb1aSlm66018 DERR(NULL, "%s (%lld): currently unsupported", __func__, ldcp->ldc_id); 67391ae08745Sheppo D1(NULL, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 67401ae08745Sheppo } 67411ae08745Sheppo 67421ae08745Sheppo /* 67431ae08745Sheppo * Process an in-band descriptor message (most likely from 67441ae08745Sheppo * OBP). 67451ae08745Sheppo */ 67461ae08745Sheppo static void 67471ae08745Sheppo vsw_process_data_ibnd_pkt(vsw_ldc_t *ldcp, void *pkt) 67481ae08745Sheppo { 6749445b4c2eSsb155480 vnet_ibnd_desc_t *ibnd_desc; 67501ae08745Sheppo dring_info_t *dp = NULL; 67511ae08745Sheppo vsw_private_desc_t *priv_addr = NULL; 67521ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 67531ae08745Sheppo mblk_t *mp = NULL; 67541ae08745Sheppo size_t nbytes = 0; 67551ae08745Sheppo size_t off = 0; 67561ae08745Sheppo uint64_t idx = 0; 67574bac2208Snarayan uint32_t num = 1, len, datalen = 0; 67581ae08745Sheppo uint64_t ncookies = 0; 67594bac2208Snarayan int i, rv; 67604bac2208Snarayan int j = 0; 67611ae08745Sheppo 67621ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 67631ae08745Sheppo 6764445b4c2eSsb155480 ibnd_desc = (vnet_ibnd_desc_t *)pkt; 67651ae08745Sheppo 67661ae08745Sheppo switch (ibnd_desc->hdr.tag.vio_subtype) { 67671ae08745Sheppo case VIO_SUBTYPE_INFO: 67681ae08745Sheppo D1(vswp, "%s: VIO_SUBTYPE_INFO", __func__); 67691ae08745Sheppo 67701ae08745Sheppo if (vsw_check_flag(ldcp, INBOUND, VSW_DRING_INFO_RECV)) 67711ae08745Sheppo return; 67721ae08745Sheppo 67731ae08745Sheppo /* 67741ae08745Sheppo * Data is padded to align on a 8 byte boundary, 67751ae08745Sheppo * nbytes is actual data length, i.e. minus that 67761ae08745Sheppo * padding. 67771ae08745Sheppo */ 67781ae08745Sheppo datalen = ibnd_desc->nbytes; 67791ae08745Sheppo 67801ae08745Sheppo D2(vswp, "%s(%lld): processing inband desc : " 67811ae08745Sheppo ": datalen 0x%lx", __func__, ldcp->ldc_id, datalen); 67821ae08745Sheppo 67831ae08745Sheppo ncookies = ibnd_desc->ncookies; 67841ae08745Sheppo 67851ae08745Sheppo /* 67861ae08745Sheppo * allocb(9F) returns an aligned data block. We 67871ae08745Sheppo * need to ensure that we ask ldc for an aligned 67881ae08745Sheppo * number of bytes also. 67891ae08745Sheppo */ 67901ae08745Sheppo nbytes = datalen; 67911ae08745Sheppo if (nbytes & 0x7) { 67921ae08745Sheppo off = 8 - (nbytes & 0x7); 67931ae08745Sheppo nbytes += off; 67941ae08745Sheppo } 67951ae08745Sheppo 67961ae08745Sheppo mp = allocb(datalen, BPRI_MED); 67971ae08745Sheppo if (mp == NULL) { 67981ae08745Sheppo DERR(vswp, "%s(%lld): allocb failed", 67991ae08745Sheppo __func__, ldcp->ldc_id); 68001ae08745Sheppo return; 68011ae08745Sheppo } 68021ae08745Sheppo 68031ae08745Sheppo rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, 68041ae08745Sheppo 0, &nbytes, ibnd_desc->memcookie, (uint64_t)ncookies, 68051ae08745Sheppo LDC_COPY_IN); 68061ae08745Sheppo 68071ae08745Sheppo if (rv != 0) { 68081ae08745Sheppo DERR(vswp, "%s(%d): unable to copy in data from " 6809205eeb1aSlm66018 "%d cookie(s)", __func__, ldcp->ldc_id, ncookies); 68101ae08745Sheppo freemsg(mp); 68111ae08745Sheppo return; 6812023505bcSraghuram } 6813023505bcSraghuram 6814205eeb1aSlm66018 D2(vswp, "%s(%d): copied in %ld bytes using %d cookies", 6815205eeb1aSlm66018 __func__, ldcp->ldc_id, nbytes, ncookies); 6816023505bcSraghuram 68171ae08745Sheppo /* point to the actual end of data */ 6818da86a4daSrf157361 mp->b_wptr = mp->b_rptr + datalen; 68191ae08745Sheppo 68201ae08745Sheppo /* 68211ae08745Sheppo * We ACK back every in-band descriptor message we process 68221ae08745Sheppo */ 68231ae08745Sheppo ibnd_desc->hdr.tag.vio_subtype = VIO_SUBTYPE_ACK; 68241ae08745Sheppo ibnd_desc->hdr.tag.vio_sid = ldcp->local_session; 6825b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)ibnd_desc, 6826b071742bSsg70180 sizeof (vnet_ibnd_desc_t), B_TRUE); 68271ae08745Sheppo 68281ae08745Sheppo /* send the packet to be switched */ 6829da86a4daSrf157361 vswp->vsw_switch_frame(vswp, mp, VSW_VNETPORT, 68301ae08745Sheppo ldcp->ldc_port, NULL); 68311ae08745Sheppo 68321ae08745Sheppo break; 68331ae08745Sheppo 68341ae08745Sheppo case VIO_SUBTYPE_ACK: 68351ae08745Sheppo D1(vswp, "%s: VIO_SUBTYPE_ACK", __func__); 68361ae08745Sheppo 68371ae08745Sheppo /* Verify the ACK is valid */ 68381ae08745Sheppo idx = ibnd_desc->hdr.desc_handle; 68391ae08745Sheppo 68401ae08745Sheppo if (idx >= VSW_RING_NUM_EL) { 684134683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: corrupted ACK received " 684234683adeSsg70180 "(idx %ld)", vswp->instance, idx); 68431ae08745Sheppo return; 68441ae08745Sheppo } 68451ae08745Sheppo 68461ae08745Sheppo if ((dp = ldcp->lane_out.dringp) == NULL) { 68471ae08745Sheppo DERR(vswp, "%s: no dring found", __func__); 68481ae08745Sheppo return; 68491ae08745Sheppo } 68501ae08745Sheppo 68514bac2208Snarayan len = dp->num_descriptors; 68524bac2208Snarayan /* 68534bac2208Snarayan * If the descriptor we are being ACK'ed for is not the 68544bac2208Snarayan * one we expected, then pkts were lost somwhere, either 68554bac2208Snarayan * when we tried to send a msg, or a previous ACK msg from 68564bac2208Snarayan * our peer. In either case we now reclaim the descriptors 68574bac2208Snarayan * in the range from the last ACK we received up to the 68584bac2208Snarayan * current ACK. 68594bac2208Snarayan */ 68604bac2208Snarayan if (idx != dp->last_ack_recv) { 68614bac2208Snarayan DWARN(vswp, "%s: dropped pkts detected, (%ld, %ld)", 68624bac2208Snarayan __func__, dp->last_ack_recv, idx); 68634bac2208Snarayan num = idx >= dp->last_ack_recv ? 68644bac2208Snarayan idx - dp->last_ack_recv + 1: 68654bac2208Snarayan (len - dp->last_ack_recv + 1) + idx; 68664bac2208Snarayan } 68671ae08745Sheppo 68681ae08745Sheppo /* 68691ae08745Sheppo * When we sent the in-band message to our peer we 68701ae08745Sheppo * marked the copy in our private ring as READY. We now 68711ae08745Sheppo * check that the descriptor we are being ACK'ed for is in 68721ae08745Sheppo * fact READY, i.e. it is one we have shared with our peer. 68734bac2208Snarayan * 68744bac2208Snarayan * If its not we flag an error, but still reset the descr 68754bac2208Snarayan * back to FREE. 68761ae08745Sheppo */ 68774bac2208Snarayan for (i = dp->last_ack_recv; j < num; i = (i + 1) % len, j++) { 68784bac2208Snarayan priv_addr = (vsw_private_desc_t *)dp->priv_addr + i; 6879d10e4ef2Snarayan mutex_enter(&priv_addr->dstate_lock); 68801ae08745Sheppo if (priv_addr->dstate != VIO_DESC_READY) { 68814bac2208Snarayan DERR(vswp, "%s: (%ld) desc at index %ld not " 68824bac2208Snarayan "READY (0x%lx)", __func__, 68834bac2208Snarayan ldcp->ldc_id, idx, priv_addr->dstate); 68844bac2208Snarayan DERR(vswp, "%s: bound %d: ncookies %ld : " 68854bac2208Snarayan "datalen %ld", __func__, 68864bac2208Snarayan priv_addr->bound, priv_addr->ncookies, 68874bac2208Snarayan priv_addr->datalen); 68884bac2208Snarayan } 68891ae08745Sheppo D2(vswp, "%s: (%lld) freeing descp at %lld", __func__, 68901ae08745Sheppo ldcp->ldc_id, idx); 68911ae08745Sheppo /* release resources associated with sent msg */ 68921ae08745Sheppo bzero(priv_addr->datap, priv_addr->datalen); 68931ae08745Sheppo priv_addr->datalen = 0; 68941ae08745Sheppo priv_addr->dstate = VIO_DESC_FREE; 6895d10e4ef2Snarayan mutex_exit(&priv_addr->dstate_lock); 68961ae08745Sheppo } 68974bac2208Snarayan /* update to next expected value */ 68984bac2208Snarayan dp->last_ack_recv = (idx + 1) % dp->num_descriptors; 68994bac2208Snarayan 69001ae08745Sheppo break; 69011ae08745Sheppo 69021ae08745Sheppo case VIO_SUBTYPE_NACK: 69031ae08745Sheppo DERR(vswp, "%s: VIO_SUBTYPE_NACK", __func__); 69041ae08745Sheppo 69051ae08745Sheppo /* 69061ae08745Sheppo * We should only get a NACK if our peer doesn't like 69071ae08745Sheppo * something about a message we have sent it. If this 69081ae08745Sheppo * happens we just release the resources associated with 69091ae08745Sheppo * the message. (We are relying on higher layers to decide 69101ae08745Sheppo * whether or not to resend. 69111ae08745Sheppo */ 69121ae08745Sheppo 69131ae08745Sheppo /* limit check */ 69141ae08745Sheppo idx = ibnd_desc->hdr.desc_handle; 69151ae08745Sheppo 69161ae08745Sheppo if (idx >= VSW_RING_NUM_EL) { 69171ae08745Sheppo DERR(vswp, "%s: corrupted NACK received (idx %lld)", 69181ae08745Sheppo __func__, idx); 69191ae08745Sheppo return; 69201ae08745Sheppo } 69211ae08745Sheppo 69221ae08745Sheppo if ((dp = ldcp->lane_out.dringp) == NULL) { 69231ae08745Sheppo DERR(vswp, "%s: no dring found", __func__); 69241ae08745Sheppo return; 69251ae08745Sheppo } 69261ae08745Sheppo 69271ae08745Sheppo priv_addr = (vsw_private_desc_t *)dp->priv_addr; 69281ae08745Sheppo 69291ae08745Sheppo /* move to correct location in ring */ 69301ae08745Sheppo priv_addr += idx; 69311ae08745Sheppo 69321ae08745Sheppo /* release resources associated with sent msg */ 6933d10e4ef2Snarayan mutex_enter(&priv_addr->dstate_lock); 69341ae08745Sheppo bzero(priv_addr->datap, priv_addr->datalen); 69351ae08745Sheppo priv_addr->datalen = 0; 69361ae08745Sheppo priv_addr->dstate = VIO_DESC_FREE; 6937d10e4ef2Snarayan mutex_exit(&priv_addr->dstate_lock); 69381ae08745Sheppo 69391ae08745Sheppo break; 69401ae08745Sheppo 69411ae08745Sheppo default: 69421ae08745Sheppo DERR(vswp, "%s(%lld): Unknown vio_subtype %x\n", __func__, 69431ae08745Sheppo ldcp->ldc_id, ibnd_desc->hdr.tag.vio_subtype); 69441ae08745Sheppo } 69451ae08745Sheppo 69461ae08745Sheppo D1(vswp, "%s(%lld) exit", __func__, ldcp->ldc_id); 69471ae08745Sheppo } 69481ae08745Sheppo 69491ae08745Sheppo static void 69501ae08745Sheppo vsw_process_err_pkt(vsw_ldc_t *ldcp, void *epkt, vio_msg_tag_t tag) 69511ae08745Sheppo { 69521ae08745Sheppo _NOTE(ARGUNUSED(epkt)) 69531ae08745Sheppo 69541ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 69551ae08745Sheppo uint16_t env = tag.vio_subtype_env; 69561ae08745Sheppo 69571ae08745Sheppo D1(vswp, "%s (%lld): enter\n", __func__, ldcp->ldc_id); 69581ae08745Sheppo 69591ae08745Sheppo /* 69601ae08745Sheppo * Error vio_subtypes have yet to be defined. So for 69611ae08745Sheppo * the moment we can't do anything. 69621ae08745Sheppo */ 69631ae08745Sheppo D2(vswp, "%s: (%x) vio_subtype env", __func__, env); 69641ae08745Sheppo 69651ae08745Sheppo D1(vswp, "%s (%lld): exit\n", __func__, ldcp->ldc_id); 69661ae08745Sheppo } 69671ae08745Sheppo 69681ae08745Sheppo /* 69691ae08745Sheppo * Switch the given ethernet frame when operating in layer 2 mode. 69701ae08745Sheppo * 69711ae08745Sheppo * vswp: pointer to the vsw instance 69721ae08745Sheppo * mp: pointer to chain of ethernet frame(s) to be switched 69731ae08745Sheppo * caller: identifies the source of this frame as: 69741ae08745Sheppo * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 69751ae08745Sheppo * 2. VSW_PHYSDEV - the physical ethernet device 69761ae08745Sheppo * 3. VSW_LOCALDEV - vsw configured as a virtual interface 69771ae08745Sheppo * arg: argument provided by the caller. 69781ae08745Sheppo * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 69791ae08745Sheppo * 2. for PHYSDEV - NULL 69801ae08745Sheppo * 3. for LOCALDEV - pointer to to this vsw_t(self) 69811ae08745Sheppo */ 69821ae08745Sheppo void 69831ae08745Sheppo vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 69841ae08745Sheppo vsw_port_t *arg, mac_resource_handle_t mrh) 69851ae08745Sheppo { 69861ae08745Sheppo struct ether_header *ehp; 69871ae08745Sheppo vsw_port_t *port = NULL; 69881ae08745Sheppo mblk_t *bp, *ret_m; 69891ae08745Sheppo mblk_t *nmp = NULL; 69901ae08745Sheppo vsw_port_list_t *plist = &vswp->plist; 69911ae08745Sheppo 69921ae08745Sheppo D1(vswp, "%s: enter (caller %d)", __func__, caller); 69931ae08745Sheppo 69941ae08745Sheppo /* 69951ae08745Sheppo * PERF: rather than breaking up the chain here, scan it 69961ae08745Sheppo * to find all mblks heading to same destination and then 69971ae08745Sheppo * pass that sub-chain to the lower transmit functions. 69981ae08745Sheppo */ 69991ae08745Sheppo 70001ae08745Sheppo /* process the chain of packets */ 70011ae08745Sheppo bp = mp; 70021ae08745Sheppo while (bp) { 70031ae08745Sheppo mp = bp; 70041ae08745Sheppo bp = bp->b_next; 70051ae08745Sheppo mp->b_next = mp->b_prev = NULL; 70061ae08745Sheppo ehp = (struct ether_header *)mp->b_rptr; 70071ae08745Sheppo 70081ae08745Sheppo D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 70091ae08745Sheppo __func__, MBLKSIZE(mp), MBLKL(mp)); 70101ae08745Sheppo 70111ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 70121ae08745Sheppo if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 70131ae08745Sheppo /* 70141ae08745Sheppo * If destination is VSW_LOCALDEV (vsw as an eth 70151ae08745Sheppo * interface) and if the device is up & running, 70161ae08745Sheppo * send the packet up the stack on this host. 70171ae08745Sheppo * If the virtual interface is down, drop the packet. 70181ae08745Sheppo */ 70191ae08745Sheppo if (caller != VSW_LOCALDEV) { 70201ae08745Sheppo if (vswp->if_state & VSW_IF_UP) { 70211ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 7022ba2e4443Sseb mac_rx(vswp->if_mh, mrh, mp); 70231ae08745Sheppo } else { 70241ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 70251ae08745Sheppo /* Interface down, drop pkt */ 70261ae08745Sheppo freemsg(mp); 70271ae08745Sheppo } 70281ae08745Sheppo } else { 70291ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 70301ae08745Sheppo freemsg(mp); 70311ae08745Sheppo } 70321ae08745Sheppo continue; 70331ae08745Sheppo } 70341ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 70351ae08745Sheppo 70361ae08745Sheppo READ_ENTER(&plist->lockrw); 70371ae08745Sheppo port = vsw_lookup_fdb(vswp, ehp); 70381ae08745Sheppo if (port) { 70391ae08745Sheppo /* 70401ae08745Sheppo * Mark the port as in-use. 70411ae08745Sheppo */ 70421ae08745Sheppo mutex_enter(&port->ref_lock); 70431ae08745Sheppo port->ref_cnt++; 70441ae08745Sheppo mutex_exit(&port->ref_lock); 70451ae08745Sheppo RW_EXIT(&plist->lockrw); 70461ae08745Sheppo 70471ae08745Sheppo /* 70481ae08745Sheppo * If plumbed and in promisc mode then copy msg 70491ae08745Sheppo * and send up the stack. 70501ae08745Sheppo */ 70511ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 70521ae08745Sheppo if (VSW_U_P(vswp->if_state)) { 70531ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 70541ae08745Sheppo nmp = copymsg(mp); 70551ae08745Sheppo if (nmp) 7056ba2e4443Sseb mac_rx(vswp->if_mh, mrh, nmp); 70571ae08745Sheppo } else { 70581ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 70591ae08745Sheppo } 70601ae08745Sheppo 70611ae08745Sheppo /* 70621ae08745Sheppo * If the destination is in FDB, the packet 70631ae08745Sheppo * should be forwarded to the correponding 70641ae08745Sheppo * vsw_port (connected to a vnet device - 70651ae08745Sheppo * VSW_VNETPORT) 70661ae08745Sheppo */ 70671ae08745Sheppo (void) vsw_portsend(port, mp); 70681ae08745Sheppo 70691ae08745Sheppo /* 70701ae08745Sheppo * Decrement use count in port and check if 70711ae08745Sheppo * should wake delete thread. 70721ae08745Sheppo */ 70731ae08745Sheppo mutex_enter(&port->ref_lock); 70741ae08745Sheppo port->ref_cnt--; 70751ae08745Sheppo if (port->ref_cnt == 0) 70761ae08745Sheppo cv_signal(&port->ref_cv); 70771ae08745Sheppo mutex_exit(&port->ref_lock); 70781ae08745Sheppo } else { 70791ae08745Sheppo RW_EXIT(&plist->lockrw); 70801ae08745Sheppo /* 70811ae08745Sheppo * Destination not in FDB. 70821ae08745Sheppo * 70831ae08745Sheppo * If the destination is broadcast or 70841ae08745Sheppo * multicast forward the packet to all 70851ae08745Sheppo * (VNETPORTs, PHYSDEV, LOCALDEV), 70861ae08745Sheppo * except the caller. 70871ae08745Sheppo */ 70881ae08745Sheppo if (IS_BROADCAST(ehp)) { 70891ae08745Sheppo D3(vswp, "%s: BROADCAST pkt", __func__); 7090205eeb1aSlm66018 (void) vsw_forward_all(vswp, mp, caller, arg); 70911ae08745Sheppo } else if (IS_MULTICAST(ehp)) { 70921ae08745Sheppo D3(vswp, "%s: MULTICAST pkt", __func__); 7093205eeb1aSlm66018 (void) vsw_forward_grp(vswp, mp, caller, arg); 70941ae08745Sheppo } else { 70951ae08745Sheppo /* 70961ae08745Sheppo * If the destination is unicast, and came 70971ae08745Sheppo * from either a logical network device or 70981ae08745Sheppo * the switch itself when it is plumbed, then 70991ae08745Sheppo * send it out on the physical device and also 71001ae08745Sheppo * up the stack if the logical interface is 71011ae08745Sheppo * in promiscious mode. 71021ae08745Sheppo * 71031ae08745Sheppo * NOTE: The assumption here is that if we 71041ae08745Sheppo * cannot find the destination in our fdb, its 71051ae08745Sheppo * a unicast address, and came from either a 71061ae08745Sheppo * vnet or down the stack (when plumbed) it 71071ae08745Sheppo * must be destinded for an ethernet device 71081ae08745Sheppo * outside our ldoms. 71091ae08745Sheppo */ 71101ae08745Sheppo if (caller == VSW_VNETPORT) { 71111ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 71121ae08745Sheppo if (VSW_U_P(vswp->if_state)) { 71131ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 71141ae08745Sheppo nmp = copymsg(mp); 71151ae08745Sheppo if (nmp) 7116ba2e4443Sseb mac_rx(vswp->if_mh, 71171ae08745Sheppo mrh, nmp); 71181ae08745Sheppo } else { 71191ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 71201ae08745Sheppo } 71211ae08745Sheppo if ((ret_m = vsw_tx_msg(vswp, mp)) 71221ae08745Sheppo != NULL) { 71231ae08745Sheppo DERR(vswp, "%s: drop mblks to " 71241ae08745Sheppo "phys dev", __func__); 71251ae08745Sheppo freemsg(ret_m); 71261ae08745Sheppo } 71271ae08745Sheppo 71281ae08745Sheppo } else if (caller == VSW_PHYSDEV) { 71291ae08745Sheppo /* 71301ae08745Sheppo * Pkt seen because card in promisc 71311ae08745Sheppo * mode. Send up stack if plumbed in 71321ae08745Sheppo * promisc mode, else drop it. 71331ae08745Sheppo */ 71341ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 71351ae08745Sheppo if (VSW_U_P(vswp->if_state)) { 71361ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 7137ba2e4443Sseb mac_rx(vswp->if_mh, mrh, mp); 71381ae08745Sheppo } else { 71391ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 71401ae08745Sheppo freemsg(mp); 71411ae08745Sheppo } 71421ae08745Sheppo 71431ae08745Sheppo } else if (caller == VSW_LOCALDEV) { 71441ae08745Sheppo /* 71451ae08745Sheppo * Pkt came down the stack, send out 71461ae08745Sheppo * over physical device. 71471ae08745Sheppo */ 71481ae08745Sheppo if ((ret_m = vsw_tx_msg(vswp, mp)) 71491ae08745Sheppo != NULL) { 71501ae08745Sheppo DERR(vswp, "%s: drop mblks to " 71511ae08745Sheppo "phys dev", __func__); 71521ae08745Sheppo freemsg(ret_m); 71531ae08745Sheppo } 71541ae08745Sheppo } 71551ae08745Sheppo } 71561ae08745Sheppo } 71571ae08745Sheppo } 71581ae08745Sheppo D1(vswp, "%s: exit\n", __func__); 71591ae08745Sheppo } 71601ae08745Sheppo 71611ae08745Sheppo /* 71621ae08745Sheppo * Switch ethernet frame when in layer 3 mode (i.e. using IP 71631ae08745Sheppo * layer to do the routing). 71641ae08745Sheppo * 71651ae08745Sheppo * There is a large amount of overlap between this function and 71661ae08745Sheppo * vsw_switch_l2_frame. At some stage we need to revisit and refactor 71671ae08745Sheppo * both these functions. 71681ae08745Sheppo */ 71691ae08745Sheppo void 71701ae08745Sheppo vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 71711ae08745Sheppo vsw_port_t *arg, mac_resource_handle_t mrh) 71721ae08745Sheppo { 71731ae08745Sheppo struct ether_header *ehp; 71741ae08745Sheppo vsw_port_t *port = NULL; 71751ae08745Sheppo mblk_t *bp = NULL; 71761ae08745Sheppo vsw_port_list_t *plist = &vswp->plist; 71771ae08745Sheppo 71781ae08745Sheppo D1(vswp, "%s: enter (caller %d)", __func__, caller); 71791ae08745Sheppo 71801ae08745Sheppo /* 71811ae08745Sheppo * In layer 3 mode should only ever be switching packets 71821ae08745Sheppo * between IP layer and vnet devices. So make sure thats 71831ae08745Sheppo * who is invoking us. 71841ae08745Sheppo */ 71851ae08745Sheppo if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 71861ae08745Sheppo DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 71871ae08745Sheppo freemsgchain(mp); 71881ae08745Sheppo return; 71891ae08745Sheppo } 71901ae08745Sheppo 71911ae08745Sheppo /* process the chain of packets */ 71921ae08745Sheppo bp = mp; 71931ae08745Sheppo while (bp) { 71941ae08745Sheppo mp = bp; 71951ae08745Sheppo bp = bp->b_next; 71961ae08745Sheppo mp->b_next = mp->b_prev = NULL; 71971ae08745Sheppo ehp = (struct ether_header *)mp->b_rptr; 71981ae08745Sheppo 71991ae08745Sheppo D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 72001ae08745Sheppo __func__, MBLKSIZE(mp), MBLKL(mp)); 72011ae08745Sheppo 72021ae08745Sheppo READ_ENTER(&plist->lockrw); 72031ae08745Sheppo port = vsw_lookup_fdb(vswp, ehp); 72041ae08745Sheppo if (port) { 72051ae08745Sheppo /* 72061ae08745Sheppo * Mark port as in-use. 72071ae08745Sheppo */ 72081ae08745Sheppo mutex_enter(&port->ref_lock); 72091ae08745Sheppo port->ref_cnt++; 72101ae08745Sheppo mutex_exit(&port->ref_lock); 72111ae08745Sheppo RW_EXIT(&plist->lockrw); 72121ae08745Sheppo 72131ae08745Sheppo D2(vswp, "%s: sending to target port", __func__); 72141ae08745Sheppo (void) vsw_portsend(port, mp); 72151ae08745Sheppo 72161ae08745Sheppo /* 72171ae08745Sheppo * Finished with port so decrement ref count and 72181ae08745Sheppo * check if should wake delete thread. 72191ae08745Sheppo */ 72201ae08745Sheppo mutex_enter(&port->ref_lock); 72211ae08745Sheppo port->ref_cnt--; 72221ae08745Sheppo if (port->ref_cnt == 0) 72231ae08745Sheppo cv_signal(&port->ref_cv); 72241ae08745Sheppo mutex_exit(&port->ref_lock); 72251ae08745Sheppo } else { 72261ae08745Sheppo RW_EXIT(&plist->lockrw); 72271ae08745Sheppo /* 72281ae08745Sheppo * Destination not in FDB 72291ae08745Sheppo * 72301ae08745Sheppo * If the destination is broadcast or 72311ae08745Sheppo * multicast forward the packet to all 72321ae08745Sheppo * (VNETPORTs, PHYSDEV, LOCALDEV), 72331ae08745Sheppo * except the caller. 72341ae08745Sheppo */ 72351ae08745Sheppo if (IS_BROADCAST(ehp)) { 72361ae08745Sheppo D2(vswp, "%s: BROADCAST pkt", __func__); 7237205eeb1aSlm66018 (void) vsw_forward_all(vswp, mp, caller, arg); 72381ae08745Sheppo } else if (IS_MULTICAST(ehp)) { 72391ae08745Sheppo D2(vswp, "%s: MULTICAST pkt", __func__); 7240205eeb1aSlm66018 (void) vsw_forward_grp(vswp, mp, caller, arg); 72411ae08745Sheppo } else { 72421ae08745Sheppo /* 72431ae08745Sheppo * Unicast pkt from vnet that we don't have 72441ae08745Sheppo * an FDB entry for, so must be destinded for 72451ae08745Sheppo * the outside world. Attempt to send up to the 72461ae08745Sheppo * IP layer to allow it to deal with it. 72471ae08745Sheppo */ 72481ae08745Sheppo if (caller == VSW_VNETPORT) { 72491ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 72501ae08745Sheppo if (vswp->if_state & VSW_IF_UP) { 72511ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 72521ae08745Sheppo D2(vswp, "%s: sending up", 72531ae08745Sheppo __func__); 7254ba2e4443Sseb mac_rx(vswp->if_mh, mrh, mp); 72551ae08745Sheppo } else { 72561ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 72571ae08745Sheppo /* Interface down, drop pkt */ 72581ae08745Sheppo D2(vswp, "%s I/F down", 72591ae08745Sheppo __func__); 72601ae08745Sheppo freemsg(mp); 72611ae08745Sheppo } 72621ae08745Sheppo } 72631ae08745Sheppo } 72641ae08745Sheppo } 72651ae08745Sheppo } 72661ae08745Sheppo 72671ae08745Sheppo D1(vswp, "%s: exit", __func__); 72681ae08745Sheppo } 72691ae08745Sheppo 72701ae08745Sheppo /* 72711ae08745Sheppo * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 72721ae08745Sheppo * except the caller (port on which frame arrived). 72731ae08745Sheppo */ 72741ae08745Sheppo static int 72751ae08745Sheppo vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 72761ae08745Sheppo { 72771ae08745Sheppo vsw_port_list_t *plist = &vswp->plist; 72781ae08745Sheppo vsw_port_t *portp; 72791ae08745Sheppo mblk_t *nmp = NULL; 72801ae08745Sheppo mblk_t *ret_m = NULL; 72811ae08745Sheppo int skip_port = 0; 72821ae08745Sheppo 72831ae08745Sheppo D1(vswp, "vsw_forward_all: enter\n"); 72841ae08745Sheppo 72851ae08745Sheppo /* 72861ae08745Sheppo * Broadcast message from inside ldoms so send to outside 72871ae08745Sheppo * world if in either of layer 2 modes. 72881ae08745Sheppo */ 72891ae08745Sheppo if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 72901ae08745Sheppo (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 72911ae08745Sheppo ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 72921ae08745Sheppo 72931ae08745Sheppo nmp = dupmsg(mp); 72941ae08745Sheppo if (nmp) { 72951ae08745Sheppo if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 72961ae08745Sheppo DERR(vswp, "%s: dropping pkt(s) " 72971ae08745Sheppo "consisting of %ld bytes of data for" 72981ae08745Sheppo " physical device", __func__, MBLKL(ret_m)); 72991ae08745Sheppo freemsg(ret_m); 73001ae08745Sheppo } 73011ae08745Sheppo } 73021ae08745Sheppo } 73031ae08745Sheppo 73041ae08745Sheppo if (caller == VSW_VNETPORT) 73051ae08745Sheppo skip_port = 1; 73061ae08745Sheppo 73071ae08745Sheppo /* 73081ae08745Sheppo * Broadcast message from other vnet (layer 2 or 3) or outside 73091ae08745Sheppo * world (layer 2 only), send up stack if plumbed. 73101ae08745Sheppo */ 73111ae08745Sheppo if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 73121ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 73131ae08745Sheppo if (vswp->if_state & VSW_IF_UP) { 73141ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 73151ae08745Sheppo nmp = copymsg(mp); 73161ae08745Sheppo if (nmp) 7317ba2e4443Sseb mac_rx(vswp->if_mh, NULL, nmp); 73181ae08745Sheppo } else { 73191ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 73201ae08745Sheppo } 73211ae08745Sheppo } 73221ae08745Sheppo 73231ae08745Sheppo /* send it to all VNETPORTs */ 73241ae08745Sheppo READ_ENTER(&plist->lockrw); 73251ae08745Sheppo for (portp = plist->head; portp != NULL; portp = portp->p_next) { 73261ae08745Sheppo D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 73271ae08745Sheppo /* 73281ae08745Sheppo * Caution ! - don't reorder these two checks as arg 73291ae08745Sheppo * will be NULL if the caller is PHYSDEV. skip_port is 73301ae08745Sheppo * only set if caller is VNETPORT. 73311ae08745Sheppo */ 73321ae08745Sheppo if ((skip_port) && (portp == arg)) 73331ae08745Sheppo continue; 73341ae08745Sheppo else { 73351ae08745Sheppo nmp = dupmsg(mp); 73361ae08745Sheppo if (nmp) { 73371ae08745Sheppo (void) vsw_portsend(portp, nmp); 73381ae08745Sheppo } else { 73391ae08745Sheppo DERR(vswp, "vsw_forward_all: nmp NULL"); 73401ae08745Sheppo } 73411ae08745Sheppo } 73421ae08745Sheppo } 73431ae08745Sheppo RW_EXIT(&plist->lockrw); 73441ae08745Sheppo 73451ae08745Sheppo freemsg(mp); 73461ae08745Sheppo 73471ae08745Sheppo D1(vswp, "vsw_forward_all: exit\n"); 73481ae08745Sheppo return (0); 73491ae08745Sheppo } 73501ae08745Sheppo 73511ae08745Sheppo /* 73521ae08745Sheppo * Forward pkts to any devices or interfaces which have registered 73531ae08745Sheppo * an interest in them (i.e. multicast groups). 73541ae08745Sheppo */ 73551ae08745Sheppo static int 73561ae08745Sheppo vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 73571ae08745Sheppo { 73581ae08745Sheppo struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 73591ae08745Sheppo mfdb_ent_t *entp = NULL; 73601ae08745Sheppo mfdb_ent_t *tpp = NULL; 73611ae08745Sheppo vsw_port_t *port; 73621ae08745Sheppo uint64_t key = 0; 73631ae08745Sheppo mblk_t *nmp = NULL; 73641ae08745Sheppo mblk_t *ret_m = NULL; 73651ae08745Sheppo boolean_t check_if = B_TRUE; 73661ae08745Sheppo 73671ae08745Sheppo /* 73681ae08745Sheppo * Convert address to hash table key 73691ae08745Sheppo */ 73701ae08745Sheppo KEY_HASH(key, ehp->ether_dhost); 73711ae08745Sheppo 73721ae08745Sheppo D1(vswp, "%s: key 0x%llx", __func__, key); 73731ae08745Sheppo 73741ae08745Sheppo /* 73751ae08745Sheppo * If pkt came from either a vnet or down the stack (if we are 73761ae08745Sheppo * plumbed) and we are in layer 2 mode, then we send the pkt out 73771ae08745Sheppo * over the physical adapter, and then check to see if any other 73781ae08745Sheppo * vnets are interested in it. 73791ae08745Sheppo */ 73801ae08745Sheppo if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 73811ae08745Sheppo (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 73821ae08745Sheppo ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 73831ae08745Sheppo nmp = dupmsg(mp); 73841ae08745Sheppo if (nmp) { 73851ae08745Sheppo if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 7386205eeb1aSlm66018 DERR(vswp, "%s: dropping pkt(s) consisting of " 7387205eeb1aSlm66018 "%ld bytes of data for physical device", 73881ae08745Sheppo __func__, MBLKL(ret_m)); 73891ae08745Sheppo freemsg(ret_m); 73901ae08745Sheppo } 73911ae08745Sheppo } 73921ae08745Sheppo } 73931ae08745Sheppo 73941ae08745Sheppo READ_ENTER(&vswp->mfdbrw); 73951ae08745Sheppo if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 73961ae08745Sheppo (mod_hash_val_t *)&entp) != 0) { 73971ae08745Sheppo D3(vswp, "%s: no table entry found for addr 0x%llx", 73981ae08745Sheppo __func__, key); 73991ae08745Sheppo } else { 74001ae08745Sheppo /* 74011ae08745Sheppo * Send to list of devices associated with this address... 74021ae08745Sheppo */ 74031ae08745Sheppo for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 74041ae08745Sheppo 74051ae08745Sheppo /* dont send to ourselves */ 74061ae08745Sheppo if ((caller == VSW_VNETPORT) && 74071ae08745Sheppo (tpp->d_addr == (void *)arg)) { 74081ae08745Sheppo port = (vsw_port_t *)tpp->d_addr; 74091ae08745Sheppo D3(vswp, "%s: not sending to ourselves" 7410205eeb1aSlm66018 " : port %d", __func__, port->p_instance); 74111ae08745Sheppo continue; 74121ae08745Sheppo 74131ae08745Sheppo } else if ((caller == VSW_LOCALDEV) && 74141ae08745Sheppo (tpp->d_type == VSW_LOCALDEV)) { 74151ae08745Sheppo D3(vswp, "%s: not sending back up stack", 74161ae08745Sheppo __func__); 74171ae08745Sheppo continue; 74181ae08745Sheppo } 74191ae08745Sheppo 74201ae08745Sheppo if (tpp->d_type == VSW_VNETPORT) { 74211ae08745Sheppo port = (vsw_port_t *)tpp->d_addr; 7422205eeb1aSlm66018 D3(vswp, "%s: sending to port %ld for addr " 7423205eeb1aSlm66018 "0x%llx", __func__, port->p_instance, key); 74241ae08745Sheppo 74251ae08745Sheppo nmp = dupmsg(mp); 74261ae08745Sheppo if (nmp) 74271ae08745Sheppo (void) vsw_portsend(port, nmp); 74281ae08745Sheppo } else { 74291ae08745Sheppo if (vswp->if_state & VSW_IF_UP) { 74301ae08745Sheppo nmp = copymsg(mp); 74311ae08745Sheppo if (nmp) 7432ba2e4443Sseb mac_rx(vswp->if_mh, NULL, nmp); 74331ae08745Sheppo check_if = B_FALSE; 74341ae08745Sheppo D3(vswp, "%s: sending up stack" 7435205eeb1aSlm66018 " for addr 0x%llx", __func__, key); 74361ae08745Sheppo } 74371ae08745Sheppo } 74381ae08745Sheppo } 74391ae08745Sheppo } 74401ae08745Sheppo 74411ae08745Sheppo RW_EXIT(&vswp->mfdbrw); 74421ae08745Sheppo 74431ae08745Sheppo /* 74441ae08745Sheppo * If the pkt came from either a vnet or from physical device, 74451ae08745Sheppo * and if we havent already sent the pkt up the stack then we 74461ae08745Sheppo * check now if we can/should (i.e. the interface is plumbed 74471ae08745Sheppo * and in promisc mode). 74481ae08745Sheppo */ 74491ae08745Sheppo if ((check_if) && 74501ae08745Sheppo ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 74511ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 74521ae08745Sheppo if (VSW_U_P(vswp->if_state)) { 74531ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 74541ae08745Sheppo D3(vswp, "%s: (caller %d) finally sending up stack" 74551ae08745Sheppo " for addr 0x%llx", __func__, caller, key); 74561ae08745Sheppo nmp = copymsg(mp); 74571ae08745Sheppo if (nmp) 7458ba2e4443Sseb mac_rx(vswp->if_mh, NULL, nmp); 74591ae08745Sheppo } else { 74601ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 74611ae08745Sheppo } 74621ae08745Sheppo } 74631ae08745Sheppo 74641ae08745Sheppo freemsg(mp); 74651ae08745Sheppo 74661ae08745Sheppo D1(vswp, "%s: exit", __func__); 74671ae08745Sheppo 74681ae08745Sheppo return (0); 74691ae08745Sheppo } 74701ae08745Sheppo 74711ae08745Sheppo /* transmit the packet over the given port */ 74721ae08745Sheppo static int 74731ae08745Sheppo vsw_portsend(vsw_port_t *port, mblk_t *mp) 74741ae08745Sheppo { 74751ae08745Sheppo vsw_ldc_list_t *ldcl = &port->p_ldclist; 74761ae08745Sheppo vsw_ldc_t *ldcp; 74771ae08745Sheppo int status = 0; 74781ae08745Sheppo 74791ae08745Sheppo 74801ae08745Sheppo READ_ENTER(&ldcl->lockrw); 74811ae08745Sheppo /* 74821ae08745Sheppo * Note for now, we have a single channel. 74831ae08745Sheppo */ 74841ae08745Sheppo ldcp = ldcl->head; 74851ae08745Sheppo if (ldcp == NULL) { 74861ae08745Sheppo DERR(port->p_vswp, "vsw_portsend: no ldc: dropping packet\n"); 74871ae08745Sheppo freemsg(mp); 74881ae08745Sheppo RW_EXIT(&ldcl->lockrw); 74891ae08745Sheppo return (1); 74901ae08745Sheppo } 74911ae08745Sheppo 74921ae08745Sheppo /* 74931ae08745Sheppo * Send the message out using the appropriate 74941ae08745Sheppo * transmit function which will free mblock when it 74951ae08745Sheppo * is finished with it. 74961ae08745Sheppo */ 74971ae08745Sheppo mutex_enter(&port->tx_lock); 74981ae08745Sheppo if (port->transmit != NULL) 74991ae08745Sheppo status = (*port->transmit)(ldcp, mp); 75001ae08745Sheppo else { 75011ae08745Sheppo freemsg(mp); 75021ae08745Sheppo } 75031ae08745Sheppo mutex_exit(&port->tx_lock); 75041ae08745Sheppo 75051ae08745Sheppo RW_EXIT(&ldcl->lockrw); 75061ae08745Sheppo 75071ae08745Sheppo return (status); 75081ae08745Sheppo } 75091ae08745Sheppo 75101ae08745Sheppo /* 75111ae08745Sheppo * Send packet out via descriptor ring to a logical device. 75121ae08745Sheppo */ 75131ae08745Sheppo static int 75141ae08745Sheppo vsw_dringsend(vsw_ldc_t *ldcp, mblk_t *mp) 75151ae08745Sheppo { 75161ae08745Sheppo vio_dring_msg_t dring_pkt; 75171ae08745Sheppo dring_info_t *dp = NULL; 75181ae08745Sheppo vsw_private_desc_t *priv_desc = NULL; 7519d10e4ef2Snarayan vnet_public_desc_t *pub = NULL; 75201ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 75211ae08745Sheppo mblk_t *bp; 75221ae08745Sheppo size_t n, size; 75231ae08745Sheppo caddr_t bufp; 75241ae08745Sheppo int idx; 75251ae08745Sheppo int status = LDC_TX_SUCCESS; 75261ae08745Sheppo 75271ae08745Sheppo D1(vswp, "%s(%lld): enter\n", __func__, ldcp->ldc_id); 75281ae08745Sheppo 75291ae08745Sheppo /* TODO: make test a macro */ 75301ae08745Sheppo if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 75311ae08745Sheppo (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 75321ae08745Sheppo DWARN(vswp, "%s(%lld) status(%d) lstate(0x%llx), dropping " 75331ae08745Sheppo "packet\n", __func__, ldcp->ldc_id, ldcp->ldc_status, 75341ae08745Sheppo ldcp->lane_out.lstate); 75351ae08745Sheppo freemsg(mp); 75361ae08745Sheppo return (LDC_TX_FAILURE); 75371ae08745Sheppo } 75381ae08745Sheppo 75391ae08745Sheppo /* 75401ae08745Sheppo * Note - using first ring only, this may change 75411ae08745Sheppo * in the future. 75421ae08745Sheppo */ 7543445b4c2eSsb155480 READ_ENTER(&ldcp->lane_out.dlistrw); 75441ae08745Sheppo if ((dp = ldcp->lane_out.dringp) == NULL) { 7545445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 75461ae08745Sheppo DERR(vswp, "%s(%lld): no dring for outbound lane on" 75471ae08745Sheppo " channel %d", __func__, ldcp->ldc_id, ldcp->ldc_id); 75481ae08745Sheppo freemsg(mp); 75491ae08745Sheppo return (LDC_TX_FAILURE); 75501ae08745Sheppo } 75511ae08745Sheppo 75521ae08745Sheppo size = msgsize(mp); 75531ae08745Sheppo if (size > (size_t)ETHERMAX) { 7554445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 75551ae08745Sheppo DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 75561ae08745Sheppo ldcp->ldc_id, size); 7557d10e4ef2Snarayan freemsg(mp); 7558d10e4ef2Snarayan return (LDC_TX_FAILURE); 75591ae08745Sheppo } 75601ae08745Sheppo 75611ae08745Sheppo /* 75621ae08745Sheppo * Find a free descriptor 75631ae08745Sheppo * 75641ae08745Sheppo * Note: for the moment we are assuming that we will only 75651ae08745Sheppo * have one dring going from the switch to each of its 75661ae08745Sheppo * peers. This may change in the future. 75671ae08745Sheppo */ 75681ae08745Sheppo if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 7569d10e4ef2Snarayan D2(vswp, "%s(%lld): no descriptor available for ring " 75701ae08745Sheppo "at 0x%llx", __func__, ldcp->ldc_id, dp); 75711ae08745Sheppo 75721ae08745Sheppo /* nothing more we can do */ 75731ae08745Sheppo status = LDC_TX_NORESOURCES; 75741ae08745Sheppo goto vsw_dringsend_free_exit; 75751ae08745Sheppo } else { 7576205eeb1aSlm66018 D2(vswp, "%s(%lld): free private descriptor found at pos %ld " 7577205eeb1aSlm66018 "addr 0x%llx\n", __func__, ldcp->ldc_id, idx, priv_desc); 75781ae08745Sheppo } 75791ae08745Sheppo 75801ae08745Sheppo /* copy data into the descriptor */ 75811ae08745Sheppo bufp = priv_desc->datap; 7582d10e4ef2Snarayan bufp += VNET_IPALIGN; 75831ae08745Sheppo for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 75841ae08745Sheppo n = MBLKL(bp); 75851ae08745Sheppo bcopy(bp->b_rptr, bufp, n); 75861ae08745Sheppo bufp += n; 75871ae08745Sheppo } 75881ae08745Sheppo 75891ae08745Sheppo priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 7590d10e4ef2Snarayan 7591d10e4ef2Snarayan pub = priv_desc->descp; 7592d10e4ef2Snarayan pub->nbytes = priv_desc->datalen; 7593d10e4ef2Snarayan 7594d10e4ef2Snarayan mutex_enter(&priv_desc->dstate_lock); 7595d10e4ef2Snarayan pub->hdr.dstate = VIO_DESC_READY; 7596d10e4ef2Snarayan mutex_exit(&priv_desc->dstate_lock); 75971ae08745Sheppo 75981ae08745Sheppo /* 7599d10e4ef2Snarayan * Determine whether or not we need to send a message to our 7600d10e4ef2Snarayan * peer prompting them to read our newly updated descriptor(s). 76011ae08745Sheppo */ 7602d10e4ef2Snarayan mutex_enter(&dp->restart_lock); 7603d10e4ef2Snarayan if (dp->restart_reqd) { 7604d10e4ef2Snarayan dp->restart_reqd = B_FALSE; 7605d10e4ef2Snarayan mutex_exit(&dp->restart_lock); 76061ae08745Sheppo 76071ae08745Sheppo /* 76081ae08745Sheppo * Send a vio_dring_msg to peer to prompt them to read 76091ae08745Sheppo * the updated descriptor ring. 76101ae08745Sheppo */ 76111ae08745Sheppo dring_pkt.tag.vio_msgtype = VIO_TYPE_DATA; 76121ae08745Sheppo dring_pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 76131ae08745Sheppo dring_pkt.tag.vio_subtype_env = VIO_DRING_DATA; 76141ae08745Sheppo dring_pkt.tag.vio_sid = ldcp->local_session; 76151ae08745Sheppo 76161ae08745Sheppo /* Note - for now using first ring */ 76171ae08745Sheppo dring_pkt.dring_ident = dp->ident; 76181ae08745Sheppo 7619d10e4ef2Snarayan mutex_enter(&ldcp->lane_out.seq_lock); 76201ae08745Sheppo dring_pkt.seq_num = ldcp->lane_out.seq_num++; 7621d10e4ef2Snarayan mutex_exit(&ldcp->lane_out.seq_lock); 76221ae08745Sheppo 7623d10e4ef2Snarayan /* 7624d10e4ef2Snarayan * If last_ack_recv is -1 then we know we've not 7625d10e4ef2Snarayan * received any ack's yet, so this must be the first 7626d10e4ef2Snarayan * msg sent, so set the start to the begining of the ring. 7627d10e4ef2Snarayan */ 7628d10e4ef2Snarayan mutex_enter(&dp->dlock); 7629d10e4ef2Snarayan if (dp->last_ack_recv == -1) { 7630d10e4ef2Snarayan dring_pkt.start_idx = 0; 7631d10e4ef2Snarayan } else { 7632205eeb1aSlm66018 dring_pkt.start_idx = 7633205eeb1aSlm66018 (dp->last_ack_recv + 1) % dp->num_descriptors; 7634d10e4ef2Snarayan } 7635d10e4ef2Snarayan dring_pkt.end_idx = -1; 7636d10e4ef2Snarayan mutex_exit(&dp->dlock); 76371ae08745Sheppo 76381ae08745Sheppo D3(vswp, "%s(%lld): dring 0x%llx : ident 0x%llx\n", __func__, 76391ae08745Sheppo ldcp->ldc_id, dp, dring_pkt.dring_ident); 7640d10e4ef2Snarayan D3(vswp, "%s(%lld): start %lld : end %lld : seq %lld\n", 7641d10e4ef2Snarayan __func__, ldcp->ldc_id, dring_pkt.start_idx, 7642d10e4ef2Snarayan dring_pkt.end_idx, dring_pkt.seq_num); 76431ae08745Sheppo 7644b071742bSsg70180 RW_EXIT(&ldcp->lane_out.dlistrw); 7645b071742bSsg70180 7646b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)&dring_pkt, 7647b071742bSsg70180 sizeof (vio_dring_msg_t), B_TRUE); 7648b071742bSsg70180 7649b071742bSsg70180 /* free the message block */ 7650b071742bSsg70180 freemsg(mp); 7651b071742bSsg70180 return (status); 7652b071742bSsg70180 7653d10e4ef2Snarayan } else { 7654d10e4ef2Snarayan mutex_exit(&dp->restart_lock); 7655d10e4ef2Snarayan D2(vswp, "%s(%lld): updating descp %d", __func__, 7656d10e4ef2Snarayan ldcp->ldc_id, idx); 7657d10e4ef2Snarayan } 76581ae08745Sheppo 76591ae08745Sheppo vsw_dringsend_free_exit: 76601ae08745Sheppo 7661445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 7662445b4c2eSsb155480 76631ae08745Sheppo /* free the message block */ 76641ae08745Sheppo freemsg(mp); 76651ae08745Sheppo 76661ae08745Sheppo D1(vswp, "%s(%lld): exit\n", __func__, ldcp->ldc_id); 76671ae08745Sheppo return (status); 76681ae08745Sheppo } 76691ae08745Sheppo 76701ae08745Sheppo /* 76711ae08745Sheppo * Send an in-band descriptor message over ldc. 76721ae08745Sheppo */ 76731ae08745Sheppo static int 76741ae08745Sheppo vsw_descrsend(vsw_ldc_t *ldcp, mblk_t *mp) 76751ae08745Sheppo { 76761ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 7677445b4c2eSsb155480 vnet_ibnd_desc_t ibnd_msg; 76781ae08745Sheppo vsw_private_desc_t *priv_desc = NULL; 76791ae08745Sheppo dring_info_t *dp = NULL; 76801ae08745Sheppo size_t n, size = 0; 76811ae08745Sheppo caddr_t bufp; 76821ae08745Sheppo mblk_t *bp; 76831ae08745Sheppo int idx, i; 76841ae08745Sheppo int status = LDC_TX_SUCCESS; 76851ae08745Sheppo static int warn_msg = 1; 76861ae08745Sheppo 76871ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 76881ae08745Sheppo 76891ae08745Sheppo ASSERT(mp != NULL); 76901ae08745Sheppo 76911ae08745Sheppo if ((!(ldcp->lane_out.lstate & VSW_LANE_ACTIVE)) || 76921ae08745Sheppo (ldcp->ldc_status != LDC_UP) || (ldcp->ldc_handle == NULL)) { 76931ae08745Sheppo DERR(vswp, "%s(%lld) status(%d) state (0x%llx), dropping pkt", 76941ae08745Sheppo __func__, ldcp->ldc_id, ldcp->ldc_status, 76951ae08745Sheppo ldcp->lane_out.lstate); 76961ae08745Sheppo freemsg(mp); 76971ae08745Sheppo return (LDC_TX_FAILURE); 76981ae08745Sheppo } 76991ae08745Sheppo 77001ae08745Sheppo /* 77011ae08745Sheppo * only expect single dring to exist, which we use 77021ae08745Sheppo * as an internal buffer, rather than a transfer channel. 77031ae08745Sheppo */ 7704445b4c2eSsb155480 READ_ENTER(&ldcp->lane_out.dlistrw); 77051ae08745Sheppo if ((dp = ldcp->lane_out.dringp) == NULL) { 77061ae08745Sheppo DERR(vswp, "%s(%lld): no dring for outbound lane", 77071ae08745Sheppo __func__, ldcp->ldc_id); 7708205eeb1aSlm66018 DERR(vswp, "%s(%lld) status(%d) state (0x%llx)", __func__, 7709205eeb1aSlm66018 ldcp->ldc_id, ldcp->ldc_status, ldcp->lane_out.lstate); 7710445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 77111ae08745Sheppo freemsg(mp); 77121ae08745Sheppo return (LDC_TX_FAILURE); 77131ae08745Sheppo } 77141ae08745Sheppo 77151ae08745Sheppo size = msgsize(mp); 77161ae08745Sheppo if (size > (size_t)ETHERMAX) { 7717b071742bSsg70180 RW_EXIT(&ldcp->lane_out.dlistrw); 77181ae08745Sheppo DERR(vswp, "%s(%lld) invalid size (%ld)\n", __func__, 77191ae08745Sheppo ldcp->ldc_id, size); 7720d10e4ef2Snarayan freemsg(mp); 7721d10e4ef2Snarayan return (LDC_TX_FAILURE); 77221ae08745Sheppo } 77231ae08745Sheppo 77241ae08745Sheppo /* 77251ae08745Sheppo * Find a free descriptor in our buffer ring 77261ae08745Sheppo */ 77271ae08745Sheppo if (vsw_dring_find_free_desc(dp, &priv_desc, &idx) != 0) { 7728b071742bSsg70180 RW_EXIT(&ldcp->lane_out.dlistrw); 77291ae08745Sheppo if (warn_msg) { 77301ae08745Sheppo DERR(vswp, "%s(%lld): no descriptor available for ring " 77311ae08745Sheppo "at 0x%llx", __func__, ldcp->ldc_id, dp); 77321ae08745Sheppo warn_msg = 0; 77331ae08745Sheppo } 77341ae08745Sheppo 77351ae08745Sheppo /* nothing more we can do */ 77361ae08745Sheppo status = LDC_TX_NORESOURCES; 77371ae08745Sheppo goto vsw_descrsend_free_exit; 77381ae08745Sheppo } else { 77391ae08745Sheppo D2(vswp, "%s(%lld): free private descriptor found at pos " 7740205eeb1aSlm66018 "%ld addr 0x%x\n", __func__, ldcp->ldc_id, idx, priv_desc); 77411ae08745Sheppo warn_msg = 1; 77421ae08745Sheppo } 77431ae08745Sheppo 77441ae08745Sheppo /* copy data into the descriptor */ 77451ae08745Sheppo bufp = priv_desc->datap; 77461ae08745Sheppo for (bp = mp, n = 0; bp != NULL; bp = bp->b_cont) { 77471ae08745Sheppo n = MBLKL(bp); 77481ae08745Sheppo bcopy(bp->b_rptr, bufp, n); 77491ae08745Sheppo bufp += n; 77501ae08745Sheppo } 77511ae08745Sheppo 77521ae08745Sheppo priv_desc->datalen = (size < (size_t)ETHERMIN) ? ETHERMIN : size; 77531ae08745Sheppo 77541ae08745Sheppo /* create and send the in-band descp msg */ 77551ae08745Sheppo ibnd_msg.hdr.tag.vio_msgtype = VIO_TYPE_DATA; 77561ae08745Sheppo ibnd_msg.hdr.tag.vio_subtype = VIO_SUBTYPE_INFO; 77571ae08745Sheppo ibnd_msg.hdr.tag.vio_subtype_env = VIO_DESC_DATA; 77581ae08745Sheppo ibnd_msg.hdr.tag.vio_sid = ldcp->local_session; 77591ae08745Sheppo 7760d10e4ef2Snarayan mutex_enter(&ldcp->lane_out.seq_lock); 77611ae08745Sheppo ibnd_msg.hdr.seq_num = ldcp->lane_out.seq_num++; 7762d10e4ef2Snarayan mutex_exit(&ldcp->lane_out.seq_lock); 77631ae08745Sheppo 77641ae08745Sheppo /* 77651ae08745Sheppo * Copy the mem cookies describing the data from the 77661ae08745Sheppo * private region of the descriptor ring into the inband 77671ae08745Sheppo * descriptor. 77681ae08745Sheppo */ 77691ae08745Sheppo for (i = 0; i < priv_desc->ncookies; i++) { 77701ae08745Sheppo bcopy(&priv_desc->memcookie[i], &ibnd_msg.memcookie[i], 77711ae08745Sheppo sizeof (ldc_mem_cookie_t)); 77721ae08745Sheppo } 77731ae08745Sheppo 77741ae08745Sheppo ibnd_msg.hdr.desc_handle = idx; 77751ae08745Sheppo ibnd_msg.ncookies = priv_desc->ncookies; 77761ae08745Sheppo ibnd_msg.nbytes = size; 77771ae08745Sheppo 7778b071742bSsg70180 RW_EXIT(&ldcp->lane_out.dlistrw); 7779b071742bSsg70180 7780b071742bSsg70180 (void) vsw_send_msg(ldcp, (void *)&ibnd_msg, 7781b071742bSsg70180 sizeof (vnet_ibnd_desc_t), B_TRUE); 77821ae08745Sheppo 77831ae08745Sheppo vsw_descrsend_free_exit: 77841ae08745Sheppo 77851ae08745Sheppo /* free the allocated message blocks */ 77861ae08745Sheppo freemsg(mp); 77871ae08745Sheppo 77881ae08745Sheppo D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 77891ae08745Sheppo return (status); 77901ae08745Sheppo } 77911ae08745Sheppo 77921ae08745Sheppo static void 77933af08d82Slm66018 vsw_send_ver(void *arg) 77941ae08745Sheppo { 77953af08d82Slm66018 vsw_ldc_t *ldcp = (vsw_ldc_t *)arg; 77961ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 77971ae08745Sheppo lane_t *lp = &ldcp->lane_out; 77981ae08745Sheppo vio_ver_msg_t ver_msg; 77991ae08745Sheppo 78001ae08745Sheppo D1(vswp, "%s enter", __func__); 78011ae08745Sheppo 78021ae08745Sheppo ver_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 78031ae08745Sheppo ver_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 78041ae08745Sheppo ver_msg.tag.vio_subtype_env = VIO_VER_INFO; 78051ae08745Sheppo ver_msg.tag.vio_sid = ldcp->local_session; 78061ae08745Sheppo 78071ae08745Sheppo ver_msg.ver_major = vsw_versions[0].ver_major; 78081ae08745Sheppo ver_msg.ver_minor = vsw_versions[0].ver_minor; 78091ae08745Sheppo ver_msg.dev_class = VDEV_NETWORK_SWITCH; 78101ae08745Sheppo 78111ae08745Sheppo lp->lstate |= VSW_VER_INFO_SENT; 78121ae08745Sheppo lp->ver_major = ver_msg.ver_major; 78131ae08745Sheppo lp->ver_minor = ver_msg.ver_minor; 78141ae08745Sheppo 78151ae08745Sheppo DUMP_TAG(ver_msg.tag); 78161ae08745Sheppo 7817b071742bSsg70180 (void) vsw_send_msg(ldcp, &ver_msg, sizeof (vio_ver_msg_t), B_TRUE); 78181ae08745Sheppo 78191ae08745Sheppo D1(vswp, "%s (%d): exit", __func__, ldcp->ldc_id); 78201ae08745Sheppo } 78211ae08745Sheppo 78221ae08745Sheppo static void 78231ae08745Sheppo vsw_send_attr(vsw_ldc_t *ldcp) 78241ae08745Sheppo { 78251ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 78261ae08745Sheppo lane_t *lp = &ldcp->lane_out; 78271ae08745Sheppo vnet_attr_msg_t attr_msg; 78281ae08745Sheppo 78291ae08745Sheppo D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 78301ae08745Sheppo 78311ae08745Sheppo /* 78321ae08745Sheppo * Subtype is set to INFO by default 78331ae08745Sheppo */ 78341ae08745Sheppo attr_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 78351ae08745Sheppo attr_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 78361ae08745Sheppo attr_msg.tag.vio_subtype_env = VIO_ATTR_INFO; 78371ae08745Sheppo attr_msg.tag.vio_sid = ldcp->local_session; 78381ae08745Sheppo 78391ae08745Sheppo /* payload copied from default settings for lane */ 78401ae08745Sheppo attr_msg.mtu = lp->mtu; 78411ae08745Sheppo attr_msg.addr_type = lp->addr_type; 78421ae08745Sheppo attr_msg.xfer_mode = lp->xfer_mode; 78431ae08745Sheppo attr_msg.ack_freq = lp->xfer_mode; 78441ae08745Sheppo 78451ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 78461ae08745Sheppo bcopy(&(vswp->if_addr), &(attr_msg.addr), ETHERADDRL); 78471ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 78481ae08745Sheppo 78491ae08745Sheppo ldcp->lane_out.lstate |= VSW_ATTR_INFO_SENT; 78501ae08745Sheppo 78511ae08745Sheppo DUMP_TAG(attr_msg.tag); 78521ae08745Sheppo 7853b071742bSsg70180 (void) vsw_send_msg(ldcp, &attr_msg, sizeof (vnet_attr_msg_t), B_TRUE); 78541ae08745Sheppo 7855b071742bSsg70180 D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 78561ae08745Sheppo } 78571ae08745Sheppo 78581ae08745Sheppo /* 78591ae08745Sheppo * Create dring info msg (which also results in the creation of 78601ae08745Sheppo * a dring). 78611ae08745Sheppo */ 78621ae08745Sheppo static vio_dring_reg_msg_t * 78631ae08745Sheppo vsw_create_dring_info_pkt(vsw_ldc_t *ldcp) 78641ae08745Sheppo { 78651ae08745Sheppo vio_dring_reg_msg_t *mp; 78661ae08745Sheppo dring_info_t *dp; 78671ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 78681ae08745Sheppo 78691ae08745Sheppo D1(vswp, "vsw_create_dring_info_pkt enter\n"); 78701ae08745Sheppo 78711ae08745Sheppo /* 78721ae08745Sheppo * If we can't create a dring, obviously no point sending 78731ae08745Sheppo * a message. 78741ae08745Sheppo */ 78751ae08745Sheppo if ((dp = vsw_create_dring(ldcp)) == NULL) 78761ae08745Sheppo return (NULL); 78771ae08745Sheppo 78781ae08745Sheppo mp = kmem_zalloc(sizeof (vio_dring_reg_msg_t), KM_SLEEP); 78791ae08745Sheppo 78801ae08745Sheppo mp->tag.vio_msgtype = VIO_TYPE_CTRL; 78811ae08745Sheppo mp->tag.vio_subtype = VIO_SUBTYPE_INFO; 78821ae08745Sheppo mp->tag.vio_subtype_env = VIO_DRING_REG; 78831ae08745Sheppo mp->tag.vio_sid = ldcp->local_session; 78841ae08745Sheppo 78851ae08745Sheppo /* payload */ 78861ae08745Sheppo mp->num_descriptors = dp->num_descriptors; 78871ae08745Sheppo mp->descriptor_size = dp->descriptor_size; 78881ae08745Sheppo mp->options = dp->options; 78891ae08745Sheppo mp->ncookies = dp->ncookies; 78901ae08745Sheppo bcopy(&dp->cookie[0], &mp->cookie[0], sizeof (ldc_mem_cookie_t)); 78911ae08745Sheppo 78921ae08745Sheppo mp->dring_ident = 0; 78931ae08745Sheppo 78941ae08745Sheppo D1(vswp, "vsw_create_dring_info_pkt exit\n"); 78951ae08745Sheppo 78961ae08745Sheppo return (mp); 78971ae08745Sheppo } 78981ae08745Sheppo 78991ae08745Sheppo static void 79001ae08745Sheppo vsw_send_dring_info(vsw_ldc_t *ldcp) 79011ae08745Sheppo { 79021ae08745Sheppo vio_dring_reg_msg_t *dring_msg; 79031ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 79041ae08745Sheppo 79051ae08745Sheppo D1(vswp, "%s: (%ld) enter", __func__, ldcp->ldc_id); 79061ae08745Sheppo 79071ae08745Sheppo dring_msg = vsw_create_dring_info_pkt(ldcp); 79081ae08745Sheppo if (dring_msg == NULL) { 790934683adeSsg70180 cmn_err(CE_WARN, "!vsw%d: %s: error creating msg", 791034683adeSsg70180 vswp->instance, __func__); 79111ae08745Sheppo return; 79121ae08745Sheppo } 79131ae08745Sheppo 79141ae08745Sheppo ldcp->lane_out.lstate |= VSW_DRING_INFO_SENT; 79151ae08745Sheppo 79161ae08745Sheppo DUMP_TAG_PTR((vio_msg_tag_t *)dring_msg); 79171ae08745Sheppo 7918b071742bSsg70180 (void) vsw_send_msg(ldcp, dring_msg, 7919b071742bSsg70180 sizeof (vio_dring_reg_msg_t), B_TRUE); 79201ae08745Sheppo 79211ae08745Sheppo kmem_free(dring_msg, sizeof (vio_dring_reg_msg_t)); 79221ae08745Sheppo 79231ae08745Sheppo D1(vswp, "%s: (%ld) exit", __func__, ldcp->ldc_id); 79241ae08745Sheppo } 79251ae08745Sheppo 79261ae08745Sheppo static void 79271ae08745Sheppo vsw_send_rdx(vsw_ldc_t *ldcp) 79281ae08745Sheppo { 79291ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 79301ae08745Sheppo vio_rdx_msg_t rdx_msg; 79311ae08745Sheppo 79321ae08745Sheppo D1(vswp, "%s (%ld) enter", __func__, ldcp->ldc_id); 79331ae08745Sheppo 79341ae08745Sheppo rdx_msg.tag.vio_msgtype = VIO_TYPE_CTRL; 79351ae08745Sheppo rdx_msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 79361ae08745Sheppo rdx_msg.tag.vio_subtype_env = VIO_RDX; 79371ae08745Sheppo rdx_msg.tag.vio_sid = ldcp->local_session; 79381ae08745Sheppo 7939b071742bSsg70180 ldcp->lane_in.lstate |= VSW_RDX_INFO_SENT; 79401ae08745Sheppo 79411ae08745Sheppo DUMP_TAG(rdx_msg.tag); 79421ae08745Sheppo 7943b071742bSsg70180 (void) vsw_send_msg(ldcp, &rdx_msg, sizeof (vio_rdx_msg_t), B_TRUE); 79441ae08745Sheppo 79451ae08745Sheppo D1(vswp, "%s (%ld) exit", __func__, ldcp->ldc_id); 79461ae08745Sheppo } 79471ae08745Sheppo 79481ae08745Sheppo /* 79491ae08745Sheppo * Generic routine to send message out over ldc channel. 7950b071742bSsg70180 * 7951b071742bSsg70180 * It is possible that when we attempt to write over the ldc channel 7952b071742bSsg70180 * that we get notified that it has been reset. Depending on the value 7953b071742bSsg70180 * of the handle_reset flag we either handle that event here or simply 7954b071742bSsg70180 * notify the caller that the channel was reset. 79551ae08745Sheppo */ 7956b071742bSsg70180 static int 7957b071742bSsg70180 vsw_send_msg(vsw_ldc_t *ldcp, void *msgp, int size, boolean_t handle_reset) 79581ae08745Sheppo { 79591ae08745Sheppo int rv; 79601ae08745Sheppo size_t msglen = size; 79611ae08745Sheppo vio_msg_tag_t *tag = (vio_msg_tag_t *)msgp; 79621ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 79631ae08745Sheppo 79641ae08745Sheppo D1(vswp, "vsw_send_msg (%lld) enter : sending %d bytes", 79651ae08745Sheppo ldcp->ldc_id, size); 79661ae08745Sheppo 79671ae08745Sheppo D2(vswp, "send_msg: type 0x%llx", tag->vio_msgtype); 79681ae08745Sheppo D2(vswp, "send_msg: stype 0x%llx", tag->vio_subtype); 79691ae08745Sheppo D2(vswp, "send_msg: senv 0x%llx", tag->vio_subtype_env); 79701ae08745Sheppo 79711ae08745Sheppo mutex_enter(&ldcp->ldc_txlock); 79721ae08745Sheppo do { 79731ae08745Sheppo msglen = size; 79741ae08745Sheppo rv = ldc_write(ldcp->ldc_handle, (caddr_t)msgp, &msglen); 79751ae08745Sheppo } while (rv == EWOULDBLOCK && --vsw_wretries > 0); 79761ae08745Sheppo 79771ae08745Sheppo if ((rv != 0) || (msglen != size)) { 7978205eeb1aSlm66018 DERR(vswp, "vsw_send_msg:ldc_write failed: chan(%lld) rv(%d) " 7979205eeb1aSlm66018 "size (%d) msglen(%d)\n", ldcp->ldc_id, rv, size, msglen); 79801ae08745Sheppo } 79813af08d82Slm66018 mutex_exit(&ldcp->ldc_txlock); 79823af08d82Slm66018 7983b071742bSsg70180 /* 7984b071742bSsg70180 * If channel has been reset we either handle it here or 7985b071742bSsg70180 * simply report back that it has been reset and let caller 7986b071742bSsg70180 * decide what to do. 7987b071742bSsg70180 */ 79883af08d82Slm66018 if (rv == ECONNRESET) { 7989205eeb1aSlm66018 DWARN(vswp, "%s (%lld) channel reset", __func__, ldcp->ldc_id); 7990b071742bSsg70180 7991b071742bSsg70180 /* 7992b071742bSsg70180 * N.B - must never be holding the dlistrw lock when 7993b071742bSsg70180 * we do a reset of the channel. 7994b071742bSsg70180 */ 7995b071742bSsg70180 if (handle_reset) { 7996b071742bSsg70180 vsw_process_conn_evt(ldcp, VSW_CONN_RESET); 7997b071742bSsg70180 } 79983af08d82Slm66018 } 79991ae08745Sheppo 8000b071742bSsg70180 return (rv); 80011ae08745Sheppo } 80021ae08745Sheppo 80031ae08745Sheppo /* 80041ae08745Sheppo * Add an entry into FDB, for the given mac address and port_id. 80051ae08745Sheppo * Returns 0 on success, 1 on failure. 80061ae08745Sheppo * 80071ae08745Sheppo * Lock protecting FDB must be held by calling process. 80081ae08745Sheppo */ 80091ae08745Sheppo static int 80101ae08745Sheppo vsw_add_fdb(vsw_t *vswp, vsw_port_t *port) 80111ae08745Sheppo { 80121ae08745Sheppo uint64_t addr = 0; 80131ae08745Sheppo 80141ae08745Sheppo D1(vswp, "%s: enter", __func__); 80151ae08745Sheppo 80161ae08745Sheppo KEY_HASH(addr, port->p_macaddr); 80171ae08745Sheppo 80181ae08745Sheppo D2(vswp, "%s: key = 0x%llx", __func__, addr); 80191ae08745Sheppo 80201ae08745Sheppo /* 80211ae08745Sheppo * Note: duplicate keys will be rejected by mod_hash. 80221ae08745Sheppo */ 80231ae08745Sheppo if (mod_hash_insert(vswp->fdb, (mod_hash_key_t)addr, 80241ae08745Sheppo (mod_hash_val_t)port) != 0) { 80251ae08745Sheppo DERR(vswp, "%s: unable to add entry into fdb.", __func__); 80261ae08745Sheppo return (1); 80271ae08745Sheppo } 80281ae08745Sheppo 80291ae08745Sheppo D1(vswp, "%s: exit", __func__); 80301ae08745Sheppo return (0); 80311ae08745Sheppo } 80321ae08745Sheppo 80331ae08745Sheppo /* 80341ae08745Sheppo * Remove an entry from FDB. 80351ae08745Sheppo * Returns 0 on success, 1 on failure. 80361ae08745Sheppo */ 80371ae08745Sheppo static int 80381ae08745Sheppo vsw_del_fdb(vsw_t *vswp, vsw_port_t *port) 80391ae08745Sheppo { 80401ae08745Sheppo uint64_t addr = 0; 80411ae08745Sheppo 80421ae08745Sheppo D1(vswp, "%s: enter", __func__); 80431ae08745Sheppo 80441ae08745Sheppo KEY_HASH(addr, port->p_macaddr); 80451ae08745Sheppo 80461ae08745Sheppo D2(vswp, "%s: key = 0x%llx", __func__, addr); 80471ae08745Sheppo 80481ae08745Sheppo (void) mod_hash_destroy(vswp->fdb, (mod_hash_val_t)addr); 80491ae08745Sheppo 80501ae08745Sheppo D1(vswp, "%s: enter", __func__); 80511ae08745Sheppo 80521ae08745Sheppo return (0); 80531ae08745Sheppo } 80541ae08745Sheppo 80551ae08745Sheppo /* 80561ae08745Sheppo * Search fdb for a given mac address. 80571ae08745Sheppo * Returns pointer to the entry if found, else returns NULL. 80581ae08745Sheppo */ 80591ae08745Sheppo static vsw_port_t * 80601ae08745Sheppo vsw_lookup_fdb(vsw_t *vswp, struct ether_header *ehp) 80611ae08745Sheppo { 80621ae08745Sheppo uint64_t key = 0; 80631ae08745Sheppo vsw_port_t *port = NULL; 80641ae08745Sheppo 80651ae08745Sheppo D1(vswp, "%s: enter", __func__); 80661ae08745Sheppo 80671ae08745Sheppo KEY_HASH(key, ehp->ether_dhost); 80681ae08745Sheppo 80691ae08745Sheppo D2(vswp, "%s: key = 0x%llx", __func__, key); 80701ae08745Sheppo 80711ae08745Sheppo if (mod_hash_find(vswp->fdb, (mod_hash_key_t)key, 80721ae08745Sheppo (mod_hash_val_t *)&port) != 0) { 807334683adeSsg70180 D2(vswp, "%s: no port found", __func__); 80741ae08745Sheppo return (NULL); 80751ae08745Sheppo } 80761ae08745Sheppo 80771ae08745Sheppo D1(vswp, "%s: exit", __func__); 80781ae08745Sheppo 80791ae08745Sheppo return (port); 80801ae08745Sheppo } 80811ae08745Sheppo 80821ae08745Sheppo /* 80831ae08745Sheppo * Add or remove multicast address(es). 80841ae08745Sheppo * 80851ae08745Sheppo * Returns 0 on success, 1 on failure. 80861ae08745Sheppo */ 80871ae08745Sheppo static int 80881ae08745Sheppo vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 80891ae08745Sheppo { 80901ae08745Sheppo mcst_addr_t *mcst_p = NULL; 80911ae08745Sheppo vsw_t *vswp = port->p_vswp; 80921ae08745Sheppo uint64_t addr = 0x0; 809334683adeSsg70180 int i; 80941ae08745Sheppo 80951ae08745Sheppo D1(vswp, "%s: enter", __func__); 80961ae08745Sheppo 80971ae08745Sheppo D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 80981ae08745Sheppo 80991ae08745Sheppo for (i = 0; i < mcst_pkt->count; i++) { 81001ae08745Sheppo /* 81011ae08745Sheppo * Convert address into form that can be used 81021ae08745Sheppo * as hash table key. 81031ae08745Sheppo */ 81041ae08745Sheppo KEY_HASH(addr, mcst_pkt->mca[i]); 81051ae08745Sheppo 81061ae08745Sheppo /* 81071ae08745Sheppo * Add or delete the specified address/port combination. 81081ae08745Sheppo */ 81091ae08745Sheppo if (mcst_pkt->set == 0x1) { 81101ae08745Sheppo D3(vswp, "%s: adding multicast address 0x%llx for " 81111ae08745Sheppo "port %ld", __func__, addr, port->p_instance); 81121ae08745Sheppo if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 81131ae08745Sheppo /* 81141ae08745Sheppo * Update the list of multicast 81151ae08745Sheppo * addresses contained within the 81161ae08745Sheppo * port structure to include this new 81171ae08745Sheppo * one. 81181ae08745Sheppo */ 8119*19b65a69Ssb155480 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 8120*19b65a69Ssb155480 KM_NOSLEEP); 81211ae08745Sheppo if (mcst_p == NULL) { 81221ae08745Sheppo DERR(vswp, "%s: unable to alloc mem", 81231ae08745Sheppo __func__); 8124*19b65a69Ssb155480 (void) vsw_del_mcst(vswp, 8125*19b65a69Ssb155480 VSW_VNETPORT, addr, port); 81261ae08745Sheppo return (1); 81271ae08745Sheppo } 81281ae08745Sheppo 81291ae08745Sheppo mcst_p->nextp = NULL; 81301ae08745Sheppo mcst_p->addr = addr; 8131*19b65a69Ssb155480 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 81321ae08745Sheppo 81331ae08745Sheppo /* 81341ae08745Sheppo * Program the address into HW. If the addr 81351ae08745Sheppo * has already been programmed then the MAC 81361ae08745Sheppo * just increments a ref counter (which is 81371ae08745Sheppo * used when the address is being deleted) 81381ae08745Sheppo */ 813934683adeSsg70180 mutex_enter(&vswp->mac_lock); 8140*19b65a69Ssb155480 if (vswp->mh != NULL) { 8141*19b65a69Ssb155480 if (mac_multicst_add(vswp->mh, 814234683adeSsg70180 (uchar_t *)&mcst_pkt->mca[i])) { 814334683adeSsg70180 mutex_exit(&vswp->mac_lock); 8144*19b65a69Ssb155480 cmn_err(CE_WARN, "!vsw%d: " 8145*19b65a69Ssb155480 "unable to add multicast " 8146*19b65a69Ssb155480 "address: %s\n", 8147*19b65a69Ssb155480 vswp->instance, 8148*19b65a69Ssb155480 ether_sprintf((void *) 8149*19b65a69Ssb155480 &mcst_p->mca)); 8150*19b65a69Ssb155480 (void) vsw_del_mcst(vswp, 8151*19b65a69Ssb155480 VSW_VNETPORT, addr, port); 8152*19b65a69Ssb155480 kmem_free(mcst_p, 8153*19b65a69Ssb155480 sizeof (*mcst_p)); 815434683adeSsg70180 return (1); 8155e1ebb9ecSlm66018 } 8156*19b65a69Ssb155480 mcst_p->mac_added = B_TRUE; 8157*19b65a69Ssb155480 } 815834683adeSsg70180 mutex_exit(&vswp->mac_lock); 81591ae08745Sheppo 8160*19b65a69Ssb155480 mutex_enter(&port->mca_lock); 8161*19b65a69Ssb155480 mcst_p->nextp = port->mcap; 8162*19b65a69Ssb155480 port->mcap = mcst_p; 8163*19b65a69Ssb155480 mutex_exit(&port->mca_lock); 8164*19b65a69Ssb155480 81651ae08745Sheppo } else { 81661ae08745Sheppo DERR(vswp, "%s: error adding multicast " 81671ae08745Sheppo "address 0x%llx for port %ld", 81681ae08745Sheppo __func__, addr, port->p_instance); 81691ae08745Sheppo return (1); 81701ae08745Sheppo } 81711ae08745Sheppo } else { 81721ae08745Sheppo /* 81731ae08745Sheppo * Delete an entry from the multicast hash 81741ae08745Sheppo * table and update the address list 81751ae08745Sheppo * appropriately. 81761ae08745Sheppo */ 81771ae08745Sheppo if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 81781ae08745Sheppo D3(vswp, "%s: deleting multicast address " 81791ae08745Sheppo "0x%llx for port %ld", __func__, addr, 81801ae08745Sheppo port->p_instance); 81811ae08745Sheppo 8182*19b65a69Ssb155480 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 8183*19b65a69Ssb155480 ASSERT(mcst_p != NULL); 81841ae08745Sheppo 81851ae08745Sheppo /* 81861ae08745Sheppo * Remove the address from HW. The address 81871ae08745Sheppo * will actually only be removed once the ref 81881ae08745Sheppo * count within the MAC layer has dropped to 81891ae08745Sheppo * zero. I.e. we can safely call this fn even 81901ae08745Sheppo * if other ports are interested in this 81911ae08745Sheppo * address. 81921ae08745Sheppo */ 819334683adeSsg70180 mutex_enter(&vswp->mac_lock); 8194*19b65a69Ssb155480 if (vswp->mh != NULL && mcst_p->mac_added) { 8195*19b65a69Ssb155480 if (mac_multicst_remove(vswp->mh, 819634683adeSsg70180 (uchar_t *)&mcst_pkt->mca[i])) { 819734683adeSsg70180 mutex_exit(&vswp->mac_lock); 8198*19b65a69Ssb155480 cmn_err(CE_WARN, "!vsw%d: " 8199*19b65a69Ssb155480 "unable to remove mcast " 8200*19b65a69Ssb155480 "address: %s\n", 8201*19b65a69Ssb155480 vswp->instance, 8202*19b65a69Ssb155480 ether_sprintf((void *) 8203*19b65a69Ssb155480 &mcst_p->mca)); 8204*19b65a69Ssb155480 kmem_free(mcst_p, 8205*19b65a69Ssb155480 sizeof (*mcst_p)); 820634683adeSsg70180 return (1); 820734683adeSsg70180 } 8208*19b65a69Ssb155480 mcst_p->mac_added = B_FALSE; 8209*19b65a69Ssb155480 } 821034683adeSsg70180 mutex_exit(&vswp->mac_lock); 8211*19b65a69Ssb155480 kmem_free(mcst_p, sizeof (*mcst_p)); 82121ae08745Sheppo 82131ae08745Sheppo } else { 82141ae08745Sheppo DERR(vswp, "%s: error deleting multicast " 82151ae08745Sheppo "addr 0x%llx for port %ld", 82161ae08745Sheppo __func__, addr, port->p_instance); 82171ae08745Sheppo return (1); 82181ae08745Sheppo } 82191ae08745Sheppo } 82201ae08745Sheppo } 82211ae08745Sheppo D1(vswp, "%s: exit", __func__); 82221ae08745Sheppo return (0); 82231ae08745Sheppo } 82241ae08745Sheppo 82251ae08745Sheppo /* 82261ae08745Sheppo * Add a new multicast entry. 82271ae08745Sheppo * 82281ae08745Sheppo * Search hash table based on address. If match found then 82291ae08745Sheppo * update associated val (which is chain of ports), otherwise 82301ae08745Sheppo * create new key/val (addr/port) pair and insert into table. 82311ae08745Sheppo */ 82321ae08745Sheppo static int 82331ae08745Sheppo vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 82341ae08745Sheppo { 82351ae08745Sheppo int dup = 0; 82361ae08745Sheppo int rv = 0; 82371ae08745Sheppo mfdb_ent_t *ment = NULL; 82381ae08745Sheppo mfdb_ent_t *tmp_ent = NULL; 82391ae08745Sheppo mfdb_ent_t *new_ent = NULL; 82401ae08745Sheppo void *tgt = NULL; 82411ae08745Sheppo 82421ae08745Sheppo if (devtype == VSW_VNETPORT) { 82431ae08745Sheppo /* 82441ae08745Sheppo * Being invoked from a vnet. 82451ae08745Sheppo */ 82461ae08745Sheppo ASSERT(arg != NULL); 82471ae08745Sheppo tgt = arg; 82481ae08745Sheppo D2(NULL, "%s: port %d : address 0x%llx", __func__, 82491ae08745Sheppo ((vsw_port_t *)arg)->p_instance, addr); 82501ae08745Sheppo } else { 82511ae08745Sheppo /* 82521ae08745Sheppo * We are being invoked via the m_multicst mac entry 82531ae08745Sheppo * point. 82541ae08745Sheppo */ 82551ae08745Sheppo D2(NULL, "%s: address 0x%llx", __func__, addr); 82561ae08745Sheppo tgt = (void *)vswp; 82571ae08745Sheppo } 82581ae08745Sheppo 82591ae08745Sheppo WRITE_ENTER(&vswp->mfdbrw); 82601ae08745Sheppo if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 82611ae08745Sheppo (mod_hash_val_t *)&ment) != 0) { 82621ae08745Sheppo 82631ae08745Sheppo /* address not currently in table */ 82641ae08745Sheppo ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 82651ae08745Sheppo ment->d_addr = (void *)tgt; 82661ae08745Sheppo ment->d_type = devtype; 82671ae08745Sheppo ment->nextp = NULL; 82681ae08745Sheppo 82691ae08745Sheppo if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 82701ae08745Sheppo (mod_hash_val_t)ment) != 0) { 82711ae08745Sheppo DERR(vswp, "%s: hash table insertion failed", __func__); 82721ae08745Sheppo kmem_free(ment, sizeof (mfdb_ent_t)); 82731ae08745Sheppo rv = 1; 82741ae08745Sheppo } else { 82751ae08745Sheppo D2(vswp, "%s: added initial entry for 0x%llx to " 82761ae08745Sheppo "table", __func__, addr); 82771ae08745Sheppo } 82781ae08745Sheppo } else { 82791ae08745Sheppo /* 82801ae08745Sheppo * Address in table. Check to see if specified port 82811ae08745Sheppo * is already associated with the address. If not add 82821ae08745Sheppo * it now. 82831ae08745Sheppo */ 82841ae08745Sheppo tmp_ent = ment; 82851ae08745Sheppo while (tmp_ent != NULL) { 82861ae08745Sheppo if (tmp_ent->d_addr == (void *)tgt) { 82871ae08745Sheppo if (devtype == VSW_VNETPORT) { 82881ae08745Sheppo DERR(vswp, "%s: duplicate port entry " 82891ae08745Sheppo "found for portid %ld and key " 82901ae08745Sheppo "0x%llx", __func__, 82911ae08745Sheppo ((vsw_port_t *)arg)->p_instance, 82921ae08745Sheppo addr); 82931ae08745Sheppo } else { 82941ae08745Sheppo DERR(vswp, "%s: duplicate entry found" 8295205eeb1aSlm66018 "for key 0x%llx", __func__, addr); 82961ae08745Sheppo } 82971ae08745Sheppo rv = 1; 82981ae08745Sheppo dup = 1; 82991ae08745Sheppo break; 83001ae08745Sheppo } 83011ae08745Sheppo tmp_ent = tmp_ent->nextp; 83021ae08745Sheppo } 83031ae08745Sheppo 83041ae08745Sheppo /* 83051ae08745Sheppo * Port not on list so add it to end now. 83061ae08745Sheppo */ 83071ae08745Sheppo if (0 == dup) { 83081ae08745Sheppo D2(vswp, "%s: added entry for 0x%llx to table", 83091ae08745Sheppo __func__, addr); 83101ae08745Sheppo new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 83111ae08745Sheppo new_ent->d_addr = (void *)tgt; 83121ae08745Sheppo new_ent->d_type = devtype; 83131ae08745Sheppo new_ent->nextp = NULL; 83141ae08745Sheppo 83151ae08745Sheppo tmp_ent = ment; 83161ae08745Sheppo while (tmp_ent->nextp != NULL) 83171ae08745Sheppo tmp_ent = tmp_ent->nextp; 83181ae08745Sheppo 83191ae08745Sheppo tmp_ent->nextp = new_ent; 83201ae08745Sheppo } 83211ae08745Sheppo } 83221ae08745Sheppo 83231ae08745Sheppo RW_EXIT(&vswp->mfdbrw); 83241ae08745Sheppo return (rv); 83251ae08745Sheppo } 83261ae08745Sheppo 83271ae08745Sheppo /* 83281ae08745Sheppo * Remove a multicast entry from the hashtable. 83291ae08745Sheppo * 83301ae08745Sheppo * Search hash table based on address. If match found, scan 83311ae08745Sheppo * list of ports associated with address. If specified port 83321ae08745Sheppo * found remove it from list. 83331ae08745Sheppo */ 83341ae08745Sheppo static int 83351ae08745Sheppo vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 83361ae08745Sheppo { 83371ae08745Sheppo mfdb_ent_t *ment = NULL; 83381ae08745Sheppo mfdb_ent_t *curr_p, *prev_p; 83391ae08745Sheppo void *tgt = NULL; 83401ae08745Sheppo 83411ae08745Sheppo D1(vswp, "%s: enter", __func__); 83421ae08745Sheppo 83431ae08745Sheppo if (devtype == VSW_VNETPORT) { 83441ae08745Sheppo tgt = (vsw_port_t *)arg; 83451ae08745Sheppo D2(vswp, "%s: removing port %d from mFDB for address" 8346205eeb1aSlm66018 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 83471ae08745Sheppo } else { 83481ae08745Sheppo D2(vswp, "%s: removing entry", __func__); 83491ae08745Sheppo tgt = (void *)vswp; 83501ae08745Sheppo } 83511ae08745Sheppo 83521ae08745Sheppo WRITE_ENTER(&vswp->mfdbrw); 83531ae08745Sheppo if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 83541ae08745Sheppo (mod_hash_val_t *)&ment) != 0) { 83551ae08745Sheppo D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 83561ae08745Sheppo RW_EXIT(&vswp->mfdbrw); 83571ae08745Sheppo return (1); 83581ae08745Sheppo } 83591ae08745Sheppo 83601ae08745Sheppo prev_p = curr_p = ment; 83611ae08745Sheppo 83621ae08745Sheppo while (curr_p != NULL) { 83631ae08745Sheppo if (curr_p->d_addr == (void *)tgt) { 83641ae08745Sheppo if (devtype == VSW_VNETPORT) { 83651ae08745Sheppo D2(vswp, "%s: port %d found", __func__, 83661ae08745Sheppo ((vsw_port_t *)tgt)->p_instance); 83671ae08745Sheppo } else { 83681ae08745Sheppo D2(vswp, "%s: instance found", __func__); 83691ae08745Sheppo } 83701ae08745Sheppo 83711ae08745Sheppo if (prev_p == curr_p) { 83721ae08745Sheppo /* 83731ae08745Sheppo * head of list, if no other element is in 83741ae08745Sheppo * list then destroy this entry, otherwise 83751ae08745Sheppo * just replace it with updated value. 83761ae08745Sheppo */ 83771ae08745Sheppo ment = curr_p->nextp; 83781ae08745Sheppo if (ment == NULL) { 83791ae08745Sheppo (void) mod_hash_destroy(vswp->mfdb, 83801ae08745Sheppo (mod_hash_val_t)addr); 83811ae08745Sheppo } else { 83821ae08745Sheppo (void) mod_hash_replace(vswp->mfdb, 83831ae08745Sheppo (mod_hash_key_t)addr, 83841ae08745Sheppo (mod_hash_val_t)ment); 83851ae08745Sheppo } 83861ae08745Sheppo } else { 83871ae08745Sheppo /* 83881ae08745Sheppo * Not head of list, no need to do 83891ae08745Sheppo * replacement, just adjust list pointers. 83901ae08745Sheppo */ 83911ae08745Sheppo prev_p->nextp = curr_p->nextp; 83921ae08745Sheppo } 83931ae08745Sheppo break; 83941ae08745Sheppo } 83951ae08745Sheppo 83961ae08745Sheppo prev_p = curr_p; 83971ae08745Sheppo curr_p = curr_p->nextp; 83981ae08745Sheppo } 83991ae08745Sheppo 84001ae08745Sheppo RW_EXIT(&vswp->mfdbrw); 84011ae08745Sheppo 84021ae08745Sheppo D1(vswp, "%s: exit", __func__); 84031ae08745Sheppo 8404*19b65a69Ssb155480 if (curr_p == NULL) 8405*19b65a69Ssb155480 return (1); 8406*19b65a69Ssb155480 kmem_free(curr_p, sizeof (mfdb_ent_t)); 84071ae08745Sheppo return (0); 84081ae08745Sheppo } 84091ae08745Sheppo 84101ae08745Sheppo /* 84111ae08745Sheppo * Port is being deleted, but has registered an interest in one 84121ae08745Sheppo * or more multicast groups. Using the list of addresses maintained 84131ae08745Sheppo * within the port structure find the appropriate entry in the hash 84141ae08745Sheppo * table and remove this port from the list of interested ports. 84151ae08745Sheppo */ 84161ae08745Sheppo static void 84171ae08745Sheppo vsw_del_mcst_port(vsw_port_t *port) 84181ae08745Sheppo { 8419*19b65a69Ssb155480 mcst_addr_t *mcap = NULL; 84201ae08745Sheppo vsw_t *vswp = port->p_vswp; 84211ae08745Sheppo 84221ae08745Sheppo D1(vswp, "%s: enter", __func__); 84231ae08745Sheppo 84241ae08745Sheppo mutex_enter(&port->mca_lock); 84251ae08745Sheppo 8426*19b65a69Ssb155480 while ((mcap = port->mcap) != NULL) { 8427*19b65a69Ssb155480 8428*19b65a69Ssb155480 port->mcap = mcap->nextp; 8429*19b65a69Ssb155480 8430*19b65a69Ssb155480 mutex_exit(&port->mca_lock); 8431*19b65a69Ssb155480 8432*19b65a69Ssb155480 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 8433*19b65a69Ssb155480 mcap->addr, port); 8434*19b65a69Ssb155480 8435*19b65a69Ssb155480 /* 8436*19b65a69Ssb155480 * Remove the address from HW. The address 8437*19b65a69Ssb155480 * will actually only be removed once the ref 8438*19b65a69Ssb155480 * count within the MAC layer has dropped to 8439*19b65a69Ssb155480 * zero. I.e. we can safely call this fn even 8440*19b65a69Ssb155480 * if other ports are interested in this 8441*19b65a69Ssb155480 * address. 8442*19b65a69Ssb155480 */ 8443*19b65a69Ssb155480 mutex_enter(&vswp->mac_lock); 8444*19b65a69Ssb155480 if (vswp->mh != NULL && mcap->mac_added) { 8445*19b65a69Ssb155480 (void) mac_multicst_remove(vswp->mh, 8446*19b65a69Ssb155480 (uchar_t *)&mcap->mca); 84471ae08745Sheppo } 8448*19b65a69Ssb155480 mutex_exit(&vswp->mac_lock); 8449*19b65a69Ssb155480 8450*19b65a69Ssb155480 kmem_free(mcap, sizeof (*mcap)); 8451*19b65a69Ssb155480 8452*19b65a69Ssb155480 mutex_enter(&port->mca_lock); 8453*19b65a69Ssb155480 8454*19b65a69Ssb155480 } 8455*19b65a69Ssb155480 84561ae08745Sheppo mutex_exit(&port->mca_lock); 84571ae08745Sheppo 84581ae08745Sheppo D1(vswp, "%s: exit", __func__); 84591ae08745Sheppo } 84601ae08745Sheppo 84611ae08745Sheppo /* 84621ae08745Sheppo * This vsw instance is detaching, but has registered an interest in one 84631ae08745Sheppo * or more multicast groups. Using the list of addresses maintained 84641ae08745Sheppo * within the vsw structure find the appropriate entry in the hash 84651ae08745Sheppo * table and remove this instance from the list of interested ports. 84661ae08745Sheppo */ 84671ae08745Sheppo static void 84681ae08745Sheppo vsw_del_mcst_vsw(vsw_t *vswp) 84691ae08745Sheppo { 84701ae08745Sheppo mcst_addr_t *next_p = NULL; 84711ae08745Sheppo 84721ae08745Sheppo D1(vswp, "%s: enter", __func__); 84731ae08745Sheppo 84741ae08745Sheppo mutex_enter(&vswp->mca_lock); 84751ae08745Sheppo 84761ae08745Sheppo while (vswp->mcap != NULL) { 84771ae08745Sheppo DERR(vswp, "%s: deleting addr 0x%llx", 84781ae08745Sheppo __func__, vswp->mcap->addr); 8479205eeb1aSlm66018 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 84801ae08745Sheppo 84811ae08745Sheppo next_p = vswp->mcap->nextp; 84821ae08745Sheppo kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 84831ae08745Sheppo vswp->mcap = next_p; 84841ae08745Sheppo } 84851ae08745Sheppo 84861ae08745Sheppo vswp->mcap = NULL; 84871ae08745Sheppo mutex_exit(&vswp->mca_lock); 84881ae08745Sheppo 84891ae08745Sheppo D1(vswp, "%s: exit", __func__); 84901ae08745Sheppo } 84911ae08745Sheppo 84921ae08745Sheppo /* 84931ae08745Sheppo * Remove the specified address from the list of address maintained 84941ae08745Sheppo * in this port node. 84951ae08745Sheppo */ 8496*19b65a69Ssb155480 static mcst_addr_t * 84971ae08745Sheppo vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr) 84981ae08745Sheppo { 84991ae08745Sheppo vsw_t *vswp = NULL; 85001ae08745Sheppo vsw_port_t *port = NULL; 85011ae08745Sheppo mcst_addr_t *prev_p = NULL; 85021ae08745Sheppo mcst_addr_t *curr_p = NULL; 85031ae08745Sheppo 85041ae08745Sheppo D1(NULL, "%s: enter : devtype %d : addr 0x%llx", 85051ae08745Sheppo __func__, devtype, addr); 85061ae08745Sheppo 85071ae08745Sheppo if (devtype == VSW_VNETPORT) { 85081ae08745Sheppo port = (vsw_port_t *)arg; 85091ae08745Sheppo mutex_enter(&port->mca_lock); 85101ae08745Sheppo prev_p = curr_p = port->mcap; 85111ae08745Sheppo } else { 85121ae08745Sheppo vswp = (vsw_t *)arg; 85131ae08745Sheppo mutex_enter(&vswp->mca_lock); 85141ae08745Sheppo prev_p = curr_p = vswp->mcap; 85151ae08745Sheppo } 85161ae08745Sheppo 85171ae08745Sheppo while (curr_p != NULL) { 85181ae08745Sheppo if (curr_p->addr == addr) { 85191ae08745Sheppo D2(NULL, "%s: address found", __func__); 85201ae08745Sheppo /* match found */ 85211ae08745Sheppo if (prev_p == curr_p) { 85221ae08745Sheppo /* list head */ 85231ae08745Sheppo if (devtype == VSW_VNETPORT) 85241ae08745Sheppo port->mcap = curr_p->nextp; 85251ae08745Sheppo else 85261ae08745Sheppo vswp->mcap = curr_p->nextp; 85271ae08745Sheppo } else { 85281ae08745Sheppo prev_p->nextp = curr_p->nextp; 85291ae08745Sheppo } 85301ae08745Sheppo break; 85311ae08745Sheppo } else { 85321ae08745Sheppo prev_p = curr_p; 85331ae08745Sheppo curr_p = curr_p->nextp; 85341ae08745Sheppo } 85351ae08745Sheppo } 85361ae08745Sheppo 85371ae08745Sheppo if (devtype == VSW_VNETPORT) 85381ae08745Sheppo mutex_exit(&port->mca_lock); 85391ae08745Sheppo else 85401ae08745Sheppo mutex_exit(&vswp->mca_lock); 85411ae08745Sheppo 85421ae08745Sheppo D1(NULL, "%s: exit", __func__); 8543*19b65a69Ssb155480 8544*19b65a69Ssb155480 return (curr_p); 85451ae08745Sheppo } 85461ae08745Sheppo 85471ae08745Sheppo /* 85481ae08745Sheppo * Creates a descriptor ring (dring) and links it into the 85491ae08745Sheppo * link of outbound drings for this channel. 85501ae08745Sheppo * 85511ae08745Sheppo * Returns NULL if creation failed. 85521ae08745Sheppo */ 85531ae08745Sheppo static dring_info_t * 85541ae08745Sheppo vsw_create_dring(vsw_ldc_t *ldcp) 85551ae08745Sheppo { 85561ae08745Sheppo vsw_private_desc_t *priv_addr = NULL; 85571ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 85581ae08745Sheppo ldc_mem_info_t minfo; 85591ae08745Sheppo dring_info_t *dp, *tp; 85601ae08745Sheppo int i; 85611ae08745Sheppo 85621ae08745Sheppo dp = (dring_info_t *)kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 85631ae08745Sheppo 85641ae08745Sheppo mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 85651ae08745Sheppo 85661ae08745Sheppo /* create public section of ring */ 85671ae08745Sheppo if ((ldc_mem_dring_create(VSW_RING_NUM_EL, 85681ae08745Sheppo VSW_PUB_SIZE, &dp->handle)) != 0) { 85691ae08745Sheppo 85701ae08745Sheppo DERR(vswp, "vsw_create_dring(%lld): ldc dring create " 85711ae08745Sheppo "failed", ldcp->ldc_id); 85721ae08745Sheppo goto create_fail_exit; 85731ae08745Sheppo } 85741ae08745Sheppo 85751ae08745Sheppo ASSERT(dp->handle != NULL); 85761ae08745Sheppo 85771ae08745Sheppo /* 85781ae08745Sheppo * Get the base address of the public section of the ring. 85791ae08745Sheppo */ 85801ae08745Sheppo if ((ldc_mem_dring_info(dp->handle, &minfo)) != 0) { 85811ae08745Sheppo DERR(vswp, "vsw_create_dring(%lld): dring info failed\n", 85821ae08745Sheppo ldcp->ldc_id); 85831ae08745Sheppo goto dring_fail_exit; 85841ae08745Sheppo } else { 85851ae08745Sheppo ASSERT(minfo.vaddr != 0); 85861ae08745Sheppo dp->pub_addr = minfo.vaddr; 85871ae08745Sheppo } 85881ae08745Sheppo 85891ae08745Sheppo dp->num_descriptors = VSW_RING_NUM_EL; 85901ae08745Sheppo dp->descriptor_size = VSW_PUB_SIZE; 85911ae08745Sheppo dp->options = VIO_TX_DRING; 85921ae08745Sheppo dp->ncookies = 1; /* guaranteed by ldc */ 85931ae08745Sheppo 85941ae08745Sheppo /* 85951ae08745Sheppo * create private portion of ring 85961ae08745Sheppo */ 85971ae08745Sheppo dp->priv_addr = (vsw_private_desc_t *)kmem_zalloc( 85981ae08745Sheppo (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 85991ae08745Sheppo 86001ae08745Sheppo if (vsw_setup_ring(ldcp, dp)) { 86011ae08745Sheppo DERR(vswp, "%s: unable to setup ring", __func__); 86021ae08745Sheppo goto dring_fail_exit; 86031ae08745Sheppo } 86041ae08745Sheppo 86051ae08745Sheppo /* haven't used any descriptors yet */ 86061ae08745Sheppo dp->end_idx = 0; 8607d10e4ef2Snarayan dp->last_ack_recv = -1; 86081ae08745Sheppo 86091ae08745Sheppo /* bind dring to the channel */ 86101ae08745Sheppo if ((ldc_mem_dring_bind(ldcp->ldc_handle, dp->handle, 86111ae08745Sheppo LDC_SHADOW_MAP, LDC_MEM_RW, 86121ae08745Sheppo &dp->cookie[0], &dp->ncookies)) != 0) { 86131ae08745Sheppo DERR(vswp, "vsw_create_dring: unable to bind to channel " 86141ae08745Sheppo "%lld", ldcp->ldc_id); 86151ae08745Sheppo goto dring_fail_exit; 86161ae08745Sheppo } 86171ae08745Sheppo 8618d10e4ef2Snarayan mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 8619d10e4ef2Snarayan dp->restart_reqd = B_TRUE; 8620d10e4ef2Snarayan 86211ae08745Sheppo /* 86221ae08745Sheppo * Only ever create rings for outgoing lane. Link it onto 86231ae08745Sheppo * end of list. 86241ae08745Sheppo */ 8625445b4c2eSsb155480 WRITE_ENTER(&ldcp->lane_out.dlistrw); 86261ae08745Sheppo if (ldcp->lane_out.dringp == NULL) { 86271ae08745Sheppo D2(vswp, "vsw_create_dring: adding first outbound ring"); 86281ae08745Sheppo ldcp->lane_out.dringp = dp; 86291ae08745Sheppo } else { 86301ae08745Sheppo tp = ldcp->lane_out.dringp; 86311ae08745Sheppo while (tp->next != NULL) 86321ae08745Sheppo tp = tp->next; 86331ae08745Sheppo 86341ae08745Sheppo tp->next = dp; 86351ae08745Sheppo } 8636445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 86371ae08745Sheppo 86381ae08745Sheppo return (dp); 86391ae08745Sheppo 86401ae08745Sheppo dring_fail_exit: 86411ae08745Sheppo (void) ldc_mem_dring_destroy(dp->handle); 86421ae08745Sheppo 86431ae08745Sheppo create_fail_exit: 86441ae08745Sheppo if (dp->priv_addr != NULL) { 86451ae08745Sheppo priv_addr = dp->priv_addr; 86461ae08745Sheppo for (i = 0; i < VSW_RING_NUM_EL; i++) { 86471ae08745Sheppo if (priv_addr->memhandle != NULL) 86481ae08745Sheppo (void) ldc_mem_free_handle( 86491ae08745Sheppo priv_addr->memhandle); 86501ae08745Sheppo priv_addr++; 86511ae08745Sheppo } 86521ae08745Sheppo kmem_free(dp->priv_addr, 86531ae08745Sheppo (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 86541ae08745Sheppo } 86551ae08745Sheppo mutex_destroy(&dp->dlock); 86561ae08745Sheppo 86571ae08745Sheppo kmem_free(dp, sizeof (dring_info_t)); 86581ae08745Sheppo return (NULL); 86591ae08745Sheppo } 86601ae08745Sheppo 86611ae08745Sheppo /* 86621ae08745Sheppo * Create a ring consisting of just a private portion and link 86631ae08745Sheppo * it into the list of rings for the outbound lane. 86641ae08745Sheppo * 86651ae08745Sheppo * These type of rings are used primarily for temporary data 86661ae08745Sheppo * storage (i.e. as data buffers). 86671ae08745Sheppo */ 86681ae08745Sheppo void 86691ae08745Sheppo vsw_create_privring(vsw_ldc_t *ldcp) 86701ae08745Sheppo { 86711ae08745Sheppo dring_info_t *dp, *tp; 86721ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 86731ae08745Sheppo 86741ae08745Sheppo D1(vswp, "%s(%lld): enter", __func__, ldcp->ldc_id); 86751ae08745Sheppo 86761ae08745Sheppo dp = kmem_zalloc(sizeof (dring_info_t), KM_SLEEP); 86771ae08745Sheppo 86781ae08745Sheppo mutex_init(&dp->dlock, NULL, MUTEX_DRIVER, NULL); 86791ae08745Sheppo 86801ae08745Sheppo /* no public section */ 86811ae08745Sheppo dp->pub_addr = NULL; 86821ae08745Sheppo 8683205eeb1aSlm66018 dp->priv_addr = kmem_zalloc( 8684205eeb1aSlm66018 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL), KM_SLEEP); 86851ae08745Sheppo 86864bac2208Snarayan dp->num_descriptors = VSW_RING_NUM_EL; 86874bac2208Snarayan 86881ae08745Sheppo if (vsw_setup_ring(ldcp, dp)) { 86891ae08745Sheppo DERR(vswp, "%s: setup of ring failed", __func__); 86901ae08745Sheppo kmem_free(dp->priv_addr, 86911ae08745Sheppo (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 86921ae08745Sheppo mutex_destroy(&dp->dlock); 86931ae08745Sheppo kmem_free(dp, sizeof (dring_info_t)); 86941ae08745Sheppo return; 86951ae08745Sheppo } 86961ae08745Sheppo 86971ae08745Sheppo /* haven't used any descriptors yet */ 86981ae08745Sheppo dp->end_idx = 0; 86991ae08745Sheppo 8700d10e4ef2Snarayan mutex_init(&dp->restart_lock, NULL, MUTEX_DRIVER, NULL); 8701d10e4ef2Snarayan dp->restart_reqd = B_TRUE; 8702d10e4ef2Snarayan 87031ae08745Sheppo /* 87041ae08745Sheppo * Only ever create rings for outgoing lane. Link it onto 87051ae08745Sheppo * end of list. 87061ae08745Sheppo */ 8707445b4c2eSsb155480 WRITE_ENTER(&ldcp->lane_out.dlistrw); 87081ae08745Sheppo if (ldcp->lane_out.dringp == NULL) { 87091ae08745Sheppo D2(vswp, "%s: adding first outbound privring", __func__); 87101ae08745Sheppo ldcp->lane_out.dringp = dp; 87111ae08745Sheppo } else { 87121ae08745Sheppo tp = ldcp->lane_out.dringp; 87131ae08745Sheppo while (tp->next != NULL) 87141ae08745Sheppo tp = tp->next; 87151ae08745Sheppo 87161ae08745Sheppo tp->next = dp; 87171ae08745Sheppo } 8718445b4c2eSsb155480 RW_EXIT(&ldcp->lane_out.dlistrw); 87191ae08745Sheppo 87201ae08745Sheppo D1(vswp, "%s(%lld): exit", __func__, ldcp->ldc_id); 87211ae08745Sheppo } 87221ae08745Sheppo 87231ae08745Sheppo /* 87241ae08745Sheppo * Setup the descriptors in the dring. Returns 0 on success, 1 on 87251ae08745Sheppo * failure. 87261ae08745Sheppo */ 87271ae08745Sheppo int 87281ae08745Sheppo vsw_setup_ring(vsw_ldc_t *ldcp, dring_info_t *dp) 87291ae08745Sheppo { 87301ae08745Sheppo vnet_public_desc_t *pub_addr = NULL; 87311ae08745Sheppo vsw_private_desc_t *priv_addr = NULL; 87321ae08745Sheppo vsw_t *vswp = ldcp->ldc_vswp; 87331ae08745Sheppo uint64_t *tmpp; 87341ae08745Sheppo uint64_t offset = 0; 87351ae08745Sheppo uint32_t ncookies = 0; 87361ae08745Sheppo static char *name = "vsw_setup_ring"; 8737d10e4ef2Snarayan int i, j, nc, rv; 87381ae08745Sheppo 87391ae08745Sheppo priv_addr = dp->priv_addr; 87401ae08745Sheppo pub_addr = dp->pub_addr; 87411ae08745Sheppo 8742d10e4ef2Snarayan /* public section may be null but private should never be */ 8743d10e4ef2Snarayan ASSERT(priv_addr != NULL); 8744d10e4ef2Snarayan 87451ae08745Sheppo /* 87461ae08745Sheppo * Allocate the region of memory which will be used to hold 87471ae08745Sheppo * the data the descriptors will refer to. 87481ae08745Sheppo */ 87491ae08745Sheppo dp->data_sz = (VSW_RING_NUM_EL * VSW_RING_EL_DATA_SZ); 87501ae08745Sheppo dp->data_addr = kmem_alloc(dp->data_sz, KM_SLEEP); 87511ae08745Sheppo 87521ae08745Sheppo D2(vswp, "%s: allocated %lld bytes at 0x%llx\n", name, 87531ae08745Sheppo dp->data_sz, dp->data_addr); 87541ae08745Sheppo 87551ae08745Sheppo tmpp = (uint64_t *)dp->data_addr; 87561ae08745Sheppo offset = VSW_RING_EL_DATA_SZ / sizeof (tmpp); 87571ae08745Sheppo 87581ae08745Sheppo /* 87591ae08745Sheppo * Initialise some of the private and public (if they exist) 87601ae08745Sheppo * descriptor fields. 87611ae08745Sheppo */ 87621ae08745Sheppo for (i = 0; i < VSW_RING_NUM_EL; i++) { 8763d10e4ef2Snarayan mutex_init(&priv_addr->dstate_lock, NULL, MUTEX_DRIVER, NULL); 8764d10e4ef2Snarayan 87651ae08745Sheppo if ((ldc_mem_alloc_handle(ldcp->ldc_handle, 87661ae08745Sheppo &priv_addr->memhandle)) != 0) { 87671ae08745Sheppo DERR(vswp, "%s: alloc mem handle failed", name); 87681ae08745Sheppo goto setup_ring_cleanup; 87691ae08745Sheppo } 87701ae08745Sheppo 87711ae08745Sheppo priv_addr->datap = (void *)tmpp; 87721ae08745Sheppo 87731ae08745Sheppo rv = ldc_mem_bind_handle(priv_addr->memhandle, 87741ae08745Sheppo (caddr_t)priv_addr->datap, VSW_RING_EL_DATA_SZ, 87751ae08745Sheppo LDC_SHADOW_MAP, LDC_MEM_R|LDC_MEM_W, 87761ae08745Sheppo &(priv_addr->memcookie[0]), &ncookies); 87771ae08745Sheppo if (rv != 0) { 87781ae08745Sheppo DERR(vswp, "%s(%lld): ldc_mem_bind_handle failed " 87791ae08745Sheppo "(rv %d)", name, ldcp->ldc_id, rv); 87801ae08745Sheppo goto setup_ring_cleanup; 87811ae08745Sheppo } 87821ae08745Sheppo priv_addr->bound = 1; 87831ae08745Sheppo 87841ae08745Sheppo D2(vswp, "%s: %d: memcookie 0 : addr 0x%llx : size 0x%llx", 87851ae08745Sheppo name, i, priv_addr->memcookie[0].addr, 87861ae08745Sheppo priv_addr->memcookie[0].size); 87871ae08745Sheppo 87881ae08745Sheppo if (ncookies >= (uint32_t)(VSW_MAX_COOKIES + 1)) { 87891ae08745Sheppo DERR(vswp, "%s(%lld) ldc_mem_bind_handle returned " 87901ae08745Sheppo "invalid num of cookies (%d) for size 0x%llx", 8791205eeb1aSlm66018 name, ldcp->ldc_id, ncookies, VSW_RING_EL_DATA_SZ); 87921ae08745Sheppo 87931ae08745Sheppo goto setup_ring_cleanup; 87941ae08745Sheppo } else { 87951ae08745Sheppo for (j = 1; j < ncookies; j++) { 87961ae08745Sheppo rv = ldc_mem_nextcookie(priv_addr->memhandle, 87971ae08745Sheppo &(priv_addr->memcookie[j])); 87981ae08745Sheppo if (rv != 0) { 87991ae08745Sheppo DERR(vswp, "%s: ldc_mem_nextcookie " 88001ae08745Sheppo "failed rv (%d)", name, rv); 88011ae08745Sheppo goto setup_ring_cleanup; 88021ae08745Sheppo } 88031ae08745Sheppo D3(vswp, "%s: memcookie %d : addr 0x%llx : " 88041ae08745Sheppo "size 0x%llx", name, j, 88051ae08745Sheppo priv_addr->memcookie[j].addr, 88061ae08745Sheppo priv_addr->memcookie[j].size); 88071ae08745Sheppo } 88081ae08745Sheppo 88091ae08745Sheppo } 88101ae08745Sheppo priv_addr->ncookies = ncookies; 88111ae08745Sheppo priv_addr->dstate = VIO_DESC_FREE; 88121ae08745Sheppo 88131ae08745Sheppo if (pub_addr != NULL) { 88141ae08745Sheppo 88151ae08745Sheppo /* link pub and private sides */ 88161ae08745Sheppo priv_addr->descp = pub_addr; 88171ae08745Sheppo 8818d10e4ef2Snarayan pub_addr->ncookies = priv_addr->ncookies; 8819d10e4ef2Snarayan 8820d10e4ef2Snarayan for (nc = 0; nc < pub_addr->ncookies; nc++) { 8821d10e4ef2Snarayan bcopy(&priv_addr->memcookie[nc], 8822d10e4ef2Snarayan &pub_addr->memcookie[nc], 8823d10e4ef2Snarayan sizeof (ldc_mem_cookie_t)); 8824d10e4ef2Snarayan } 8825d10e4ef2Snarayan 88261ae08745Sheppo pub_addr->hdr.dstate = VIO_DESC_FREE; 88271ae08745Sheppo pub_addr++; 88281ae08745Sheppo } 88291ae08745Sheppo 88301ae08745Sheppo /* 88311ae08745Sheppo * move to next element in the dring and the next 88321ae08745Sheppo * position in the data buffer. 88331ae08745Sheppo */ 88341ae08745Sheppo priv_addr++; 88351ae08745Sheppo tmpp += offset; 88361ae08745Sheppo } 88371ae08745Sheppo 88381ae08745Sheppo return (0); 88391ae08745Sheppo 88401ae08745Sheppo setup_ring_cleanup: 88411ae08745Sheppo priv_addr = dp->priv_addr; 88421ae08745Sheppo 8843d10e4ef2Snarayan for (j = 0; j < i; j++) { 88441ae08745Sheppo (void) ldc_mem_unbind_handle(priv_addr->memhandle); 88451ae08745Sheppo (void) ldc_mem_free_handle(priv_addr->memhandle); 88461ae08745Sheppo 8847d10e4ef2Snarayan mutex_destroy(&priv_addr->dstate_lock); 8848d10e4ef2Snarayan 88491ae08745Sheppo priv_addr++; 88501ae08745Sheppo } 88511ae08745Sheppo kmem_free(dp->data_addr, dp->data_sz); 88521ae08745Sheppo 88531ae08745Sheppo return (1); 88541ae08745Sheppo } 88551ae08745Sheppo 88561ae08745Sheppo /* 88571ae08745Sheppo * Searches the private section of a ring for a free descriptor, 88581ae08745Sheppo * starting at the location of the last free descriptor found 88591ae08745Sheppo * previously. 88601ae08745Sheppo * 8861d10e4ef2Snarayan * Returns 0 if free descriptor is available, and updates state 8862d10e4ef2Snarayan * of private descriptor to VIO_DESC_READY, otherwise returns 1. 88631ae08745Sheppo * 88641ae08745Sheppo * FUTURE: might need to return contiguous range of descriptors 88651ae08745Sheppo * as dring info msg assumes all will be contiguous. 88661ae08745Sheppo */ 88671ae08745Sheppo static int 88681ae08745Sheppo vsw_dring_find_free_desc(dring_info_t *dringp, 88691ae08745Sheppo vsw_private_desc_t **priv_p, int *idx) 88701ae08745Sheppo { 8871d10e4ef2Snarayan vsw_private_desc_t *addr = NULL; 88721ae08745Sheppo int num = VSW_RING_NUM_EL; 88731ae08745Sheppo int ret = 1; 88741ae08745Sheppo 88751ae08745Sheppo D1(NULL, "%s enter\n", __func__); 88761ae08745Sheppo 8877d10e4ef2Snarayan ASSERT(dringp->priv_addr != NULL); 88781ae08745Sheppo 88791ae08745Sheppo D2(NULL, "%s: searching ring, dringp 0x%llx : start pos %lld", 8880d10e4ef2Snarayan __func__, dringp, dringp->end_idx); 88811ae08745Sheppo 8882d10e4ef2Snarayan addr = (vsw_private_desc_t *)dringp->priv_addr + dringp->end_idx; 8883d10e4ef2Snarayan 8884d10e4ef2Snarayan mutex_enter(&addr->dstate_lock); 88851ae08745Sheppo if (addr->dstate == VIO_DESC_FREE) { 8886d10e4ef2Snarayan addr->dstate = VIO_DESC_READY; 88871ae08745Sheppo *priv_p = addr; 8888d10e4ef2Snarayan *idx = dringp->end_idx; 8889d10e4ef2Snarayan dringp->end_idx = (dringp->end_idx + 1) % num; 88901ae08745Sheppo ret = 0; 8891d10e4ef2Snarayan 88921ae08745Sheppo } 8893d10e4ef2Snarayan mutex_exit(&addr->dstate_lock); 88941ae08745Sheppo 88951ae08745Sheppo /* ring full */ 88961ae08745Sheppo if (ret == 1) { 8897d10e4ef2Snarayan D2(NULL, "%s: no desp free: started at %d", __func__, 8898d10e4ef2Snarayan dringp->end_idx); 88991ae08745Sheppo } 89001ae08745Sheppo 89011ae08745Sheppo D1(NULL, "%s: exit\n", __func__); 89021ae08745Sheppo 89031ae08745Sheppo return (ret); 89041ae08745Sheppo } 89051ae08745Sheppo 89061ae08745Sheppo /* 89071ae08745Sheppo * Map from a dring identifier to the ring itself. Returns 89081ae08745Sheppo * pointer to ring or NULL if no match found. 8909445b4c2eSsb155480 * 8910445b4c2eSsb155480 * Should be called with dlistrw rwlock held as reader. 89111ae08745Sheppo */ 89121ae08745Sheppo static dring_info_t * 89131ae08745Sheppo vsw_ident2dring(lane_t *lane, uint64_t ident) 89141ae08745Sheppo { 89151ae08745Sheppo dring_info_t *dp = NULL; 89161ae08745Sheppo 89171ae08745Sheppo if ((dp = lane->dringp) == NULL) { 89181ae08745Sheppo return (NULL); 89191ae08745Sheppo } else { 89201ae08745Sheppo if (dp->ident == ident) 89211ae08745Sheppo return (dp); 89221ae08745Sheppo 89231ae08745Sheppo while (dp != NULL) { 89241ae08745Sheppo if (dp->ident == ident) 89251ae08745Sheppo break; 89261ae08745Sheppo dp = dp->next; 89271ae08745Sheppo } 89281ae08745Sheppo } 89291ae08745Sheppo 89301ae08745Sheppo return (dp); 89311ae08745Sheppo } 89321ae08745Sheppo 89331ae08745Sheppo /* 89341ae08745Sheppo * Set the default lane attributes. These are copied into 89351ae08745Sheppo * the attr msg we send to our peer. If they are not acceptable 89361ae08745Sheppo * then (currently) the handshake ends. 89371ae08745Sheppo */ 89381ae08745Sheppo static void 89391ae08745Sheppo vsw_set_lane_attr(vsw_t *vswp, lane_t *lp) 89401ae08745Sheppo { 89411ae08745Sheppo bzero(lp, sizeof (lane_t)); 89421ae08745Sheppo 89431ae08745Sheppo READ_ENTER(&vswp->if_lockrw); 89441ae08745Sheppo ether_copy(&(vswp->if_addr), &(lp->addr)); 89451ae08745Sheppo RW_EXIT(&vswp->if_lockrw); 89461ae08745Sheppo 89471ae08745Sheppo lp->mtu = VSW_MTU; 89481ae08745Sheppo lp->addr_type = ADDR_TYPE_MAC; 89491ae08745Sheppo lp->xfer_mode = VIO_DRING_MODE; 89501ae08745Sheppo lp->ack_freq = 0; /* for shared mode */ 8951d10e4ef2Snarayan 8952d10e4ef2Snarayan mutex_enter(&lp->seq_lock); 89531ae08745Sheppo lp->seq_num = VNET_ISS; 8954d10e4ef2Snarayan mutex_exit(&lp->seq_lock); 89551ae08745Sheppo } 89561ae08745Sheppo 89571ae08745Sheppo /* 89581ae08745Sheppo * Verify that the attributes are acceptable. 89591ae08745Sheppo * 89601ae08745Sheppo * FUTURE: If some attributes are not acceptable, change them 89611ae08745Sheppo * our desired values. 89621ae08745Sheppo */ 89631ae08745Sheppo static int 89641ae08745Sheppo vsw_check_attr(vnet_attr_msg_t *pkt, vsw_port_t *port) 89651ae08745Sheppo { 89661ae08745Sheppo int ret = 0; 89671ae08745Sheppo 89681ae08745Sheppo D1(NULL, "vsw_check_attr enter\n"); 89691ae08745Sheppo 89701ae08745Sheppo /* 89711ae08745Sheppo * Note we currently only support in-band descriptors 89721ae08745Sheppo * and descriptor rings, not packet based transfer (VIO_PKT_MODE) 89731ae08745Sheppo */ 89741ae08745Sheppo if ((pkt->xfer_mode != VIO_DESC_MODE) && 89751ae08745Sheppo (pkt->xfer_mode != VIO_DRING_MODE)) { 8976205eeb1aSlm66018 D2(NULL, "vsw_check_attr: unknown mode %x\n", pkt->xfer_mode); 89771ae08745Sheppo ret = 1; 89781ae08745Sheppo } 89791ae08745Sheppo 89801ae08745Sheppo /* Only support MAC addresses at moment. */ 89811ae08745Sheppo if ((pkt->addr_type != ADDR_TYPE_MAC) || (pkt->addr == 0)) { 89821ae08745Sheppo D2(NULL, "vsw_check_attr: invalid addr_type %x, " 8983205eeb1aSlm66018 "or address 0x%llx\n", pkt->addr_type, pkt->addr); 89841ae08745Sheppo ret = 1; 89851ae08745Sheppo } 89861ae08745Sheppo 89871ae08745Sheppo /* 89881ae08745Sheppo * MAC address supplied by device should match that stored 89891ae08745Sheppo * in the vsw-port OBP node. Need to decide what to do if they 89901ae08745Sheppo * don't match, for the moment just warn but don't fail. 89911ae08745Sheppo */ 89921ae08745Sheppo if (bcmp(&pkt->addr, &port->p_macaddr, ETHERADDRL) != 0) { 89931ae08745Sheppo DERR(NULL, "vsw_check_attr: device supplied address " 89941ae08745Sheppo "0x%llx doesn't match node address 0x%llx\n", 89951ae08745Sheppo pkt->addr, port->p_macaddr); 89961ae08745Sheppo } 89971ae08745Sheppo 89981ae08745Sheppo /* 89991ae08745Sheppo * Ack freq only makes sense in pkt mode, in shared 90001ae08745Sheppo * mode the ring descriptors say whether or not to 90011ae08745Sheppo * send back an ACK. 90021ae08745Sheppo */ 90031ae08745Sheppo if ((pkt->xfer_mode == VIO_DRING_MODE) && 90041ae08745Sheppo (pkt->ack_freq > 0)) { 90051ae08745Sheppo D2(NULL, "vsw_check_attr: non zero ack freq " 90061ae08745Sheppo " in SHM mode\n"); 90071ae08745Sheppo ret = 1; 90081ae08745Sheppo } 90091ae08745Sheppo 90101ae08745Sheppo /* 90111ae08745Sheppo * Note: for the moment we only support ETHER 90121ae08745Sheppo * frames. This may change in the future. 90131ae08745Sheppo */ 90141ae08745Sheppo if ((pkt->mtu > VSW_MTU) || (pkt->mtu <= 0)) { 90151ae08745Sheppo D2(NULL, "vsw_check_attr: invalid MTU (0x%llx)\n", 90161ae08745Sheppo pkt->mtu); 90171ae08745Sheppo ret = 1; 90181ae08745Sheppo } 90191ae08745Sheppo 90201ae08745Sheppo D1(NULL, "vsw_check_attr exit\n"); 90211ae08745Sheppo 90221ae08745Sheppo return (ret); 90231ae08745Sheppo } 90241ae08745Sheppo 90251ae08745Sheppo /* 90261ae08745Sheppo * Returns 1 if there is a problem, 0 otherwise. 90271ae08745Sheppo */ 90281ae08745Sheppo static int 90291ae08745Sheppo vsw_check_dring_info(vio_dring_reg_msg_t *pkt) 90301ae08745Sheppo { 90311ae08745Sheppo _NOTE(ARGUNUSED(pkt)) 90321ae08745Sheppo 90331ae08745Sheppo int ret = 0; 90341ae08745Sheppo 90351ae08745Sheppo D1(NULL, "vsw_check_dring_info enter\n"); 90361ae08745Sheppo 90371ae08745Sheppo if ((pkt->num_descriptors == 0) || 90381ae08745Sheppo (pkt->descriptor_size == 0) || 90391ae08745Sheppo (pkt->ncookies != 1)) { 90401ae08745Sheppo DERR(NULL, "vsw_check_dring_info: invalid dring msg"); 90411ae08745Sheppo ret = 1; 90421ae08745Sheppo } 90431ae08745Sheppo 90441ae08745Sheppo D1(NULL, "vsw_check_dring_info exit\n"); 90451ae08745Sheppo 90461ae08745Sheppo return (ret); 90471ae08745Sheppo } 90481ae08745Sheppo 90491ae08745Sheppo /* 90501ae08745Sheppo * Returns 1 if two memory cookies match. Otherwise returns 0. 90511ae08745Sheppo */ 90521ae08745Sheppo static int 90531ae08745Sheppo vsw_mem_cookie_match(ldc_mem_cookie_t *m1, ldc_mem_cookie_t *m2) 90541ae08745Sheppo { 90551ae08745Sheppo if ((m1->addr != m2->addr) || 90561ae08745Sheppo (m2->size != m2->size)) { 90571ae08745Sheppo return (0); 90581ae08745Sheppo } else { 90591ae08745Sheppo return (1); 90601ae08745Sheppo } 90611ae08745Sheppo } 90621ae08745Sheppo 90631ae08745Sheppo /* 90641ae08745Sheppo * Returns 1 if ring described in reg message matches that 90651ae08745Sheppo * described by dring_info structure. Otherwise returns 0. 90661ae08745Sheppo */ 90671ae08745Sheppo static int 90681ae08745Sheppo vsw_dring_match(dring_info_t *dp, vio_dring_reg_msg_t *msg) 90691ae08745Sheppo { 90701ae08745Sheppo if ((msg->descriptor_size != dp->descriptor_size) || 90711ae08745Sheppo (msg->num_descriptors != dp->num_descriptors) || 90721ae08745Sheppo (msg->ncookies != dp->ncookies) || 90731ae08745Sheppo !(vsw_mem_cookie_match(&msg->cookie[0], &dp->cookie[0]))) { 90741ae08745Sheppo return (0); 90751ae08745Sheppo } else { 90761ae08745Sheppo return (1); 90771ae08745Sheppo } 90781ae08745Sheppo 90791ae08745Sheppo } 90801ae08745Sheppo 90811ae08745Sheppo static caddr_t 90821ae08745Sheppo vsw_print_ethaddr(uint8_t *a, char *ebuf) 90831ae08745Sheppo { 90841ae08745Sheppo (void) sprintf(ebuf, "%x:%x:%x:%x:%x:%x", 90851ae08745Sheppo a[0], a[1], a[2], a[3], a[4], a[5]); 90861ae08745Sheppo return (ebuf); 90871ae08745Sheppo } 90881ae08745Sheppo 90891ae08745Sheppo /* 90901ae08745Sheppo * Reset and free all the resources associated with 90911ae08745Sheppo * the channel. 90921ae08745Sheppo */ 90931ae08745Sheppo static void 90941ae08745Sheppo vsw_free_lane_resources(vsw_ldc_t *ldcp, uint64_t dir) 90951ae08745Sheppo { 90961ae08745Sheppo dring_info_t *dp, *dpp; 90971ae08745Sheppo lane_t *lp = NULL; 90981ae08745Sheppo int rv = 0; 90991ae08745Sheppo 91001ae08745Sheppo ASSERT(ldcp != NULL); 91011ae08745Sheppo 91021ae08745Sheppo D1(ldcp->ldc_vswp, "%s (%lld): enter", __func__, ldcp->ldc_id); 91031ae08745Sheppo 91041ae08745Sheppo if (dir == INBOUND) { 91051ae08745Sheppo D2(ldcp->ldc_vswp, "%s: freeing INBOUND lane" 91061ae08745Sheppo " of channel %lld", __func__, ldcp->ldc_id); 91071ae08745Sheppo lp = &ldcp->lane_in; 91081ae08745Sheppo } else { 91091ae08745Sheppo D2(ldcp->ldc_vswp, "%s: freeing OUTBOUND lane" 91101ae08745Sheppo " of channel %lld", __func__, ldcp->ldc_id); 91111ae08745Sheppo lp = &ldcp->lane_out; 91121ae08745Sheppo } 91131ae08745Sheppo 91141ae08745Sheppo lp->lstate = VSW_LANE_INACTIV; 9115d10e4ef2Snarayan mutex_enter(&lp->seq_lock); 91161ae08745Sheppo lp->seq_num = VNET_ISS; 9117d10e4ef2Snarayan mutex_exit(&lp->seq_lock); 91181ae08745Sheppo if (lp->dringp) { 91191ae08745Sheppo if (dir == INBOUND) { 9120445b4c2eSsb155480 WRITE_ENTER(&lp->dlistrw); 91211ae08745Sheppo dp = lp->dringp; 91221ae08745Sheppo while (dp != NULL) { 91231ae08745Sheppo dpp = dp->next; 91241ae08745Sheppo if (dp->handle != NULL) 91251ae08745Sheppo (void) ldc_mem_dring_unmap(dp->handle); 91261ae08745Sheppo kmem_free(dp, sizeof (dring_info_t)); 91271ae08745Sheppo dp = dpp; 91281ae08745Sheppo } 9129445b4c2eSsb155480 RW_EXIT(&lp->dlistrw); 91301ae08745Sheppo } else { 91311ae08745Sheppo /* 91321ae08745Sheppo * unbind, destroy exported dring, free dring struct 91331ae08745Sheppo */ 9134445b4c2eSsb155480 WRITE_ENTER(&lp->dlistrw); 91351ae08745Sheppo dp = lp->dringp; 91361ae08745Sheppo rv = vsw_free_ring(dp); 9137445b4c2eSsb155480 RW_EXIT(&lp->dlistrw); 91381ae08745Sheppo } 91391ae08745Sheppo if (rv == 0) { 91401ae08745Sheppo lp->dringp = NULL; 91411ae08745Sheppo } 91421ae08745Sheppo } 91431ae08745Sheppo 91441ae08745Sheppo D1(ldcp->ldc_vswp, "%s (%lld): exit", __func__, ldcp->ldc_id); 91451ae08745Sheppo } 91461ae08745Sheppo 91471ae08745Sheppo /* 91481ae08745Sheppo * Free ring and all associated resources. 9149445b4c2eSsb155480 * 9150445b4c2eSsb155480 * Should be called with dlistrw rwlock held as writer. 91511ae08745Sheppo */ 91521ae08745Sheppo static int 91531ae08745Sheppo vsw_free_ring(dring_info_t *dp) 91541ae08745Sheppo { 91551ae08745Sheppo vsw_private_desc_t *paddr = NULL; 91561ae08745Sheppo dring_info_t *dpp; 91571ae08745Sheppo int i, rv = 1; 91581ae08745Sheppo 91591ae08745Sheppo while (dp != NULL) { 91601ae08745Sheppo mutex_enter(&dp->dlock); 91611ae08745Sheppo dpp = dp->next; 91621ae08745Sheppo if (dp->priv_addr != NULL) { 91631ae08745Sheppo /* 91641ae08745Sheppo * First unbind and free the memory handles 91651ae08745Sheppo * stored in each descriptor within the ring. 91661ae08745Sheppo */ 91671ae08745Sheppo for (i = 0; i < VSW_RING_NUM_EL; i++) { 91681ae08745Sheppo paddr = (vsw_private_desc_t *) 91691ae08745Sheppo dp->priv_addr + i; 91701ae08745Sheppo if (paddr->memhandle != NULL) { 91711ae08745Sheppo if (paddr->bound == 1) { 91721ae08745Sheppo rv = ldc_mem_unbind_handle( 91731ae08745Sheppo paddr->memhandle); 91741ae08745Sheppo 91751ae08745Sheppo if (rv != 0) { 91761ae08745Sheppo DERR(NULL, "error " 91771ae08745Sheppo "unbinding handle for " 91781ae08745Sheppo "ring 0x%llx at pos %d", 91791ae08745Sheppo dp, i); 91801ae08745Sheppo mutex_exit(&dp->dlock); 91811ae08745Sheppo return (rv); 91821ae08745Sheppo } 91831ae08745Sheppo paddr->bound = 0; 91841ae08745Sheppo } 91851ae08745Sheppo 91861ae08745Sheppo rv = ldc_mem_free_handle( 91871ae08745Sheppo paddr->memhandle); 91881ae08745Sheppo if (rv != 0) { 91891ae08745Sheppo DERR(NULL, "error freeing " 9190205eeb1aSlm66018 "handle for ring 0x%llx " 9191205eeb1aSlm66018 "at pos %d", dp, i); 91921ae08745Sheppo mutex_exit(&dp->dlock); 91931ae08745Sheppo return (rv); 91941ae08745Sheppo } 91951ae08745Sheppo paddr->memhandle = NULL; 91961ae08745Sheppo } 9197d10e4ef2Snarayan mutex_destroy(&paddr->dstate_lock); 91981ae08745Sheppo } 9199205eeb1aSlm66018 kmem_free(dp->priv_addr, 9200205eeb1aSlm66018 (sizeof (vsw_private_desc_t) * VSW_RING_NUM_EL)); 92011ae08745Sheppo } 92021ae08745Sheppo 92031ae08745Sheppo /* 92041ae08745Sheppo * Now unbind and destroy the ring itself. 92051ae08745Sheppo */ 92061ae08745Sheppo if (dp->handle != NULL) { 92071ae08745Sheppo (void) ldc_mem_dring_unbind(dp->handle); 92081ae08745Sheppo (void) ldc_mem_dring_destroy(dp->handle); 92091ae08745Sheppo } 92101ae08745Sheppo 92111ae08745Sheppo if (dp->data_addr != NULL) { 92121ae08745Sheppo kmem_free(dp->data_addr, dp->data_sz); 92131ae08745Sheppo } 92141ae08745Sheppo 92151ae08745Sheppo mutex_exit(&dp->dlock); 92161ae08745Sheppo mutex_destroy(&dp->dlock); 9217d10e4ef2Snarayan mutex_destroy(&dp->restart_lock); 92181ae08745Sheppo kmem_free(dp, sizeof (dring_info_t)); 92191ae08745Sheppo 92201ae08745Sheppo dp = dpp; 92211ae08745Sheppo } 92221ae08745Sheppo return (0); 92231ae08745Sheppo } 92241ae08745Sheppo 92251ae08745Sheppo /* 92261ae08745Sheppo * Debugging routines 92271ae08745Sheppo */ 92281ae08745Sheppo static void 92291ae08745Sheppo display_state(void) 92301ae08745Sheppo { 92311ae08745Sheppo vsw_t *vswp; 92321ae08745Sheppo vsw_port_list_t *plist; 92331ae08745Sheppo vsw_port_t *port; 92341ae08745Sheppo vsw_ldc_list_t *ldcl; 92351ae08745Sheppo vsw_ldc_t *ldcp; 92361ae08745Sheppo 92371ae08745Sheppo cmn_err(CE_NOTE, "***** system state *****"); 92381ae08745Sheppo 92391ae08745Sheppo for (vswp = vsw_head; vswp; vswp = vswp->next) { 92401ae08745Sheppo plist = &vswp->plist; 92411ae08745Sheppo READ_ENTER(&plist->lockrw); 92421ae08745Sheppo cmn_err(CE_CONT, "vsw instance %d has %d ports attached\n", 92431ae08745Sheppo vswp->instance, plist->num_ports); 92441ae08745Sheppo 92451ae08745Sheppo for (port = plist->head; port != NULL; port = port->p_next) { 92461ae08745Sheppo ldcl = &port->p_ldclist; 92471ae08745Sheppo cmn_err(CE_CONT, "port %d : %d ldcs attached\n", 92481ae08745Sheppo port->p_instance, ldcl->num_ldcs); 92491ae08745Sheppo READ_ENTER(&ldcl->lockrw); 92501ae08745Sheppo ldcp = ldcl->head; 92511ae08745Sheppo for (; ldcp != NULL; ldcp = ldcp->ldc_next) { 92521ae08745Sheppo cmn_err(CE_CONT, "chan %lu : dev %d : " 92531ae08745Sheppo "status %d : phase %u\n", 92541ae08745Sheppo ldcp->ldc_id, ldcp->dev_class, 92551ae08745Sheppo ldcp->ldc_status, ldcp->hphase); 92561ae08745Sheppo cmn_err(CE_CONT, "chan %lu : lsession %lu : " 9257205eeb1aSlm66018 "psession %lu\n", ldcp->ldc_id, 9258205eeb1aSlm66018 ldcp->local_session, ldcp->peer_session); 92591ae08745Sheppo 92601ae08745Sheppo cmn_err(CE_CONT, "Inbound lane:\n"); 92611ae08745Sheppo display_lane(&ldcp->lane_in); 92621ae08745Sheppo cmn_err(CE_CONT, "Outbound lane:\n"); 92631ae08745Sheppo display_lane(&ldcp->lane_out); 92641ae08745Sheppo } 92651ae08745Sheppo RW_EXIT(&ldcl->lockrw); 92661ae08745Sheppo } 92671ae08745Sheppo RW_EXIT(&plist->lockrw); 92681ae08745Sheppo } 92691ae08745Sheppo cmn_err(CE_NOTE, "***** system state *****"); 92701ae08745Sheppo } 92711ae08745Sheppo 92721ae08745Sheppo static void 92731ae08745Sheppo display_lane(lane_t *lp) 92741ae08745Sheppo { 92751ae08745Sheppo dring_info_t *drp; 92761ae08745Sheppo 92771ae08745Sheppo cmn_err(CE_CONT, "ver 0x%x:0x%x : state %lx : mtu 0x%lx\n", 92781ae08745Sheppo lp->ver_major, lp->ver_minor, lp->lstate, lp->mtu); 92791ae08745Sheppo cmn_err(CE_CONT, "addr_type %d : addr 0x%lx : xmode %d\n", 92801ae08745Sheppo lp->addr_type, lp->addr, lp->xfer_mode); 92811ae08745Sheppo cmn_err(CE_CONT, "dringp 0x%lx\n", (uint64_t)lp->dringp); 92821ae08745Sheppo 92831ae08745Sheppo cmn_err(CE_CONT, "Dring info:\n"); 92841ae08745Sheppo for (drp = lp->dringp; drp != NULL; drp = drp->next) { 92851ae08745Sheppo cmn_err(CE_CONT, "\tnum_desc %u : dsize %u\n", 92861ae08745Sheppo drp->num_descriptors, drp->descriptor_size); 92871ae08745Sheppo cmn_err(CE_CONT, "\thandle 0x%lx\n", drp->handle); 92881ae08745Sheppo cmn_err(CE_CONT, "\tpub_addr 0x%lx : priv_addr 0x%lx\n", 92891ae08745Sheppo (uint64_t)drp->pub_addr, (uint64_t)drp->priv_addr); 92901ae08745Sheppo cmn_err(CE_CONT, "\tident 0x%lx : end_idx %lu\n", 92911ae08745Sheppo drp->ident, drp->end_idx); 92921ae08745Sheppo display_ring(drp); 92931ae08745Sheppo } 92941ae08745Sheppo } 92951ae08745Sheppo 92961ae08745Sheppo static void 92971ae08745Sheppo display_ring(dring_info_t *dringp) 92981ae08745Sheppo { 92991ae08745Sheppo uint64_t i; 93001ae08745Sheppo uint64_t priv_count = 0; 93011ae08745Sheppo uint64_t pub_count = 0; 93021ae08745Sheppo vnet_public_desc_t *pub_addr = NULL; 93031ae08745Sheppo vsw_private_desc_t *priv_addr = NULL; 93041ae08745Sheppo 93051ae08745Sheppo for (i = 0; i < VSW_RING_NUM_EL; i++) { 93061ae08745Sheppo if (dringp->pub_addr != NULL) { 93071ae08745Sheppo pub_addr = (vnet_public_desc_t *)dringp->pub_addr + i; 93081ae08745Sheppo 93091ae08745Sheppo if (pub_addr->hdr.dstate == VIO_DESC_FREE) 93101ae08745Sheppo pub_count++; 93111ae08745Sheppo } 93121ae08745Sheppo 93131ae08745Sheppo if (dringp->priv_addr != NULL) { 9314205eeb1aSlm66018 priv_addr = (vsw_private_desc_t *)dringp->priv_addr + i; 93151ae08745Sheppo 93161ae08745Sheppo if (priv_addr->dstate == VIO_DESC_FREE) 93171ae08745Sheppo priv_count++; 93181ae08745Sheppo } 93191ae08745Sheppo } 93201ae08745Sheppo cmn_err(CE_CONT, "\t%lu elements: %lu priv free: %lu pub free\n", 93211ae08745Sheppo i, priv_count, pub_count); 93221ae08745Sheppo } 93231ae08745Sheppo 93241ae08745Sheppo static void 93251ae08745Sheppo dump_flags(uint64_t state) 93261ae08745Sheppo { 93271ae08745Sheppo int i; 93281ae08745Sheppo 93291ae08745Sheppo typedef struct flag_name { 93301ae08745Sheppo int flag_val; 93311ae08745Sheppo char *flag_name; 93321ae08745Sheppo } flag_name_t; 93331ae08745Sheppo 93341ae08745Sheppo flag_name_t flags[] = { 93351ae08745Sheppo VSW_VER_INFO_SENT, "VSW_VER_INFO_SENT", 93361ae08745Sheppo VSW_VER_INFO_RECV, "VSW_VER_INFO_RECV", 93371ae08745Sheppo VSW_VER_ACK_RECV, "VSW_VER_ACK_RECV", 93381ae08745Sheppo VSW_VER_ACK_SENT, "VSW_VER_ACK_SENT", 93391ae08745Sheppo VSW_VER_NACK_RECV, "VSW_VER_NACK_RECV", 93401ae08745Sheppo VSW_VER_NACK_SENT, "VSW_VER_NACK_SENT", 93411ae08745Sheppo VSW_ATTR_INFO_SENT, "VSW_ATTR_INFO_SENT", 93421ae08745Sheppo VSW_ATTR_INFO_RECV, "VSW_ATTR_INFO_RECV", 93431ae08745Sheppo VSW_ATTR_ACK_SENT, "VSW_ATTR_ACK_SENT", 93441ae08745Sheppo VSW_ATTR_ACK_RECV, "VSW_ATTR_ACK_RECV", 93451ae08745Sheppo VSW_ATTR_NACK_SENT, "VSW_ATTR_NACK_SENT", 93461ae08745Sheppo VSW_ATTR_NACK_RECV, "VSW_ATTR_NACK_RECV", 93471ae08745Sheppo VSW_DRING_INFO_SENT, "VSW_DRING_INFO_SENT", 93481ae08745Sheppo VSW_DRING_INFO_RECV, "VSW_DRING_INFO_RECV", 93491ae08745Sheppo VSW_DRING_ACK_SENT, "VSW_DRING_ACK_SENT", 93501ae08745Sheppo VSW_DRING_ACK_RECV, "VSW_DRING_ACK_RECV", 93511ae08745Sheppo VSW_DRING_NACK_SENT, "VSW_DRING_NACK_SENT", 93521ae08745Sheppo VSW_DRING_NACK_RECV, "VSW_DRING_NACK_RECV", 93531ae08745Sheppo VSW_RDX_INFO_SENT, "VSW_RDX_INFO_SENT", 93541ae08745Sheppo VSW_RDX_INFO_RECV, "VSW_RDX_INFO_RECV", 93551ae08745Sheppo VSW_RDX_ACK_SENT, "VSW_RDX_ACK_SENT", 93561ae08745Sheppo VSW_RDX_ACK_RECV, "VSW_RDX_ACK_RECV", 93571ae08745Sheppo VSW_RDX_NACK_SENT, "VSW_RDX_NACK_SENT", 93581ae08745Sheppo VSW_RDX_NACK_RECV, "VSW_RDX_NACK_RECV", 93591ae08745Sheppo VSW_MCST_INFO_SENT, "VSW_MCST_INFO_SENT", 93601ae08745Sheppo VSW_MCST_INFO_RECV, "VSW_MCST_INFO_RECV", 93611ae08745Sheppo VSW_MCST_ACK_SENT, "VSW_MCST_ACK_SENT", 93621ae08745Sheppo VSW_MCST_ACK_RECV, "VSW_MCST_ACK_RECV", 93631ae08745Sheppo VSW_MCST_NACK_SENT, "VSW_MCST_NACK_SENT", 93641ae08745Sheppo VSW_MCST_NACK_RECV, "VSW_MCST_NACK_RECV", 93651ae08745Sheppo VSW_LANE_ACTIVE, "VSW_LANE_ACTIVE"}; 93661ae08745Sheppo 93671ae08745Sheppo DERR(NULL, "DUMP_FLAGS: %llx\n", state); 93681ae08745Sheppo for (i = 0; i < sizeof (flags)/sizeof (flag_name_t); i++) { 93691ae08745Sheppo if (state & flags[i].flag_val) 93701ae08745Sheppo DERR(NULL, "DUMP_FLAGS %s", flags[i].flag_name); 93711ae08745Sheppo } 93721ae08745Sheppo } 9373