/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Function prototypes. */ static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); static int vsw_unattach(vsw_t *vswp); static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *); void vsw_destroy_rxpools(void *); /* MDEG routines */ static int vsw_mdeg_register(vsw_t *vswp); static void vsw_mdeg_unregister(vsw_t *vswp); static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); static int vsw_read_mdprops(vsw_t *vswp); static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp, uint16_t *default_idp); static void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node, uint64_t *bw); static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, md_t *mdp, mde_cookie_t *node); static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node); static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu); static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu); static void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, boolean_t *pls); static void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw); static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids); /* Mac driver related routines */ static int vsw_mac_register(vsw_t *); static int vsw_mac_unregister(vsw_t *); static int vsw_m_stat(void *, uint_t, uint64_t *); static void vsw_m_stop(void *arg); static int vsw_m_start(void *arg); static int vsw_m_unicst(void *arg, const uint8_t *); static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); static int vsw_m_promisc(void *arg, boolean_t); static mblk_t *vsw_m_tx(void *arg, mblk_t *); void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state); void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, mblk_t *mp, vsw_macrx_flags_t flags); void vsw_physlink_state_update(vsw_t *vswp); /* * Functions imported from other files. */ extern void vsw_setup_switching_thread(void *arg); extern int vsw_setup_switching_start(vsw_t *vswp); extern void vsw_setup_switching_stop(vsw_t *vswp); extern int vsw_setup_switching(vsw_t *); extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port, mac_resource_handle_t mrh); extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); extern void vsw_del_mcst_vsw(vsw_t *); extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); extern void vsw_detach_ports(vsw_t *vswp); extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); extern int vsw_port_detach(vsw_t *vswp, int p_instance); static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex); extern int vsw_port_attach(vsw_port_t *port); extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); extern int vsw_mac_open(vsw_t *vswp); extern void vsw_mac_close(vsw_t *vswp); extern void vsw_mac_cleanup_ports(vsw_t *vswp); extern void vsw_unset_addrs(vsw_t *vswp); extern void vsw_setup_switching_post_process(vsw_t *vswp); extern void vsw_create_vlans(void *arg, int type); extern void vsw_destroy_vlans(void *arg, int type); extern void vsw_vlan_add_ids(void *arg, int type); extern void vsw_vlan_remove_ids(void *arg, int type); extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt); extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); extern void vsw_hio_cleanup(vsw_t *vswp); extern void vsw_hio_start_ports(vsw_t *vswp); extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int); extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int); extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids); extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans, uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids); extern void vsw_reset_ports(vsw_t *vswp); extern void vsw_port_reset(vsw_port_t *portp); extern void vsw_physlink_update_ports(vsw_t *vswp); extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, uint64_t maxbw); /* * Internal tunables. */ int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ int vsw_wretries = 100; /* # of write attempts */ int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ /* 300*3 = 900sec(15min) of max tmout */ int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ int vsw_ldc_retries = 5; /* # of ldc_close() retries */ int vsw_ldc_delay = 1000; /* 1 ms delay for ldc_close() */ boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ int vsw_rxpool_cleanup_delay = 100000; /* 100ms */ uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ uint32_t vsw_ethermtu = 1500; /* mtu of the device */ /* delay in usec to wait for all references on a fdb entry to be dropped */ uint32_t vsw_fdbe_refcnt_delay = 10; /* * Default vlan id. This is only used internally when the "default-vlan-id" * property is not present in the MD device node. Therefore, this should not be * used as a tunable; if this value is changed, the corresponding variable * should be updated to the same value in all vnets connected to this vsw. */ uint16_t vsw_default_vlan_id = 1; /* * Workaround for a version handshake bug in obp's vnet. * If vsw initiates version negotiation starting from the highest version, * obp sends a nack and terminates version handshake. To workaround * this, we do not initiate version handshake when the channel comes up. * Instead, we wait for the peer to send its version info msg and go through * the version protocol exchange. If we successfully negotiate a version, * before sending the ack, we send our version info msg to the peer * using the version that we are about to ack. */ boolean_t vsw_obp_ver_proto_workaround = B_TRUE; /* * In the absence of "priority-ether-types" property in MD, the following * internal tunable can be set to specify a single priority ethertype. */ uint64_t vsw_pri_eth_type = 0; /* * Number of transmit priority buffers that are preallocated per device. * This number is chosen to be a small value to throttle transmission * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). */ uint32_t vsw_pri_tx_nmblks = 64; /* * Number of RARP packets sent to announce macaddr to the physical switch, * after vsw's physical device is changed dynamically or after a guest (client * vnet) is live migrated in. */ uint32_t vsw_publish_macaddr_count = 3; /* * Enable/disable HybridIO */ boolean_t vsw_hio_enabled = B_TRUE; /* * Max retries for HybridIO cleanup */ int vsw_hio_max_cleanup_retries = 10; /* * 10ms delay for HybridIO cleanup */ int vsw_hio_cleanup_delay = 10000; /* * Descriptor ring modes of LDC data transfer: * * 1) TxDring mode: * In versions < v1.6 of VIO Protocol, we support only TxDring mode. In this * mode, we create a transmit descriptor ring and export it to the peer through * dring registration process of handshake. The descriptor ring is exported * using LDC shared memory. Each descriptor is associated with a data buffer. * The data buffer is also exported over LDC and the cookies for this data * buffer are provided in the descriptor. The peer maps this ring as its * receive ring. Similarly, the peer exports a transmit descriptor ring which * is mapped by this device as its receive ring. In this mode, in a given data * transfer direction, the transmitter copies the data to the exported data * buffer (owned by itself), bound to the descriptor. The receiver uses the LDC * cookies specified in the descriptor to copy the data into the receiving * guest through the hypervisor (ldc_mem_copy()). * * 2) RxDringData mode: * In versions >= v1.6 of VIO Protocol, we also support RxDringData mode. In * this mode, we create a receive descriptor ring and export it to the peer * through dring registration process of handshake. In addition, we export a * receive buffer area and provide that information also in the dring * registration message. The descriptor ring and the data buffer area are * exported using LDC shared memory. Each descriptor is associated with a data * buffer in the data buffer area and the offset of the specific data buffer * within this area is specified in the descriptor. The peer maps this ring * along with the data buffer area as its transmit ring. Similarly, the peer * exports a receive ring which is mapped by this device as its transmit ring, * along with its buffer area. In this mode, in a given data transfer * direction, the transmitter copies the data to the data buffer offset * specified in the descriptor. The receiver simply picks up the data buffer * (owned by itself) without any copy operation into the receiving guest. * * We provide a tunable to enable RxDringData mode for versions >= v1.6 of VIO * Protocol. By default, this tunable is set to 1 (VIO_TX_DRING). To enable * RxDringData mode set this tunable to 4 (VIO_RX_DRING_DATA). This enables us * to negotiate RxDringData mode with peers that support versions >= v1.6. For * peers that support version < v1.6, we continue to operate in TxDring mode * with them though the tunable is enabled. */ uint8_t vsw_dring_mode = VIO_TX_DRING; /* * Number of descriptors; must be power of 2. */ uint32_t vsw_num_descriptors = VSW_NUM_DESCRIPTORS; /* * In RxDringData mode, # of buffers is determined by multiplying the # of * descriptors with the factor below. Note that the factor must be > 1; i.e, * the # of buffers must always be > # of descriptors. This is needed because, * while the shared memory buffers are sent up the stack on the receiver, the * sender needs additional buffers that can be used for further transmits. * See vsw_setup_rx_dring() for details. */ uint32_t vsw_nrbufs_factor = 2; /* * Delay when rx descr not ready; used in both dring modes. */ int vsw_recv_delay = 0; /* * Retry when rx descr not ready; used in both dring modes. */ int vsw_recv_retries = 5; /* * Max number of mblks received in one receive operation. */ uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); /* * Internal tunables for receive buffer pools, that is, the size and number of * mblks for each pool. At least 3 sizes must be specified if these are used. * The sizes must be specified in increasing order. Non-zero value of the first * size will be used as a hint to use these values instead of the algorithm * that determines the sizes based on MTU. Used in TxDring mode only. */ uint32_t vsw_mblk_size1 = 0; uint32_t vsw_mblk_size2 = 0; uint32_t vsw_mblk_size3 = 0; uint32_t vsw_mblk_size4 = 0; uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS; /* number of mblks for pool4 */ /* * Set this to non-zero to enable additional internal receive buffer pools * based on the MTU of the device for better performance at the cost of more * memory consumption. This is turned off by default, to use allocb(9F) for * receive buffer allocations of sizes > 2K. */ boolean_t vsw_jumbo_rxpools = B_FALSE; /* * vsw_max_tx_qcount is the maximum # of packets that can be queued * before the tx worker thread begins processing the queue. Its value * is chosen to be 4x the default length of tx descriptor ring. */ uint32_t vsw_max_tx_qcount = 4 * VSW_NUM_DESCRIPTORS; /* * MAC callbacks */ static mac_callbacks_t vsw_m_callbacks = { 0, vsw_m_stat, vsw_m_start, vsw_m_stop, vsw_m_promisc, vsw_m_multicst, vsw_m_unicst, vsw_m_tx }; static struct cb_ops vsw_cb_ops = { nulldev, /* cb_open */ nulldev, /* cb_close */ nodev, /* cb_strategy */ nodev, /* cb_print */ nodev, /* cb_dump */ nodev, /* cb_read */ nodev, /* cb_write */ nodev, /* cb_ioctl */ nodev, /* cb_devmap */ nodev, /* cb_mmap */ nodev, /* cb_segmap */ nochpoll, /* cb_chpoll */ ddi_prop_op, /* cb_prop_op */ NULL, /* cb_stream */ D_MP, /* cb_flag */ CB_REV, /* rev */ nodev, /* int (*cb_aread)() */ nodev /* int (*cb_awrite)() */ }; static struct dev_ops vsw_ops = { DEVO_REV, /* devo_rev */ 0, /* devo_refcnt */ NULL, /* devo_getinfo */ nulldev, /* devo_identify */ nulldev, /* devo_probe */ vsw_attach, /* devo_attach */ vsw_detach, /* devo_detach */ nodev, /* devo_reset */ &vsw_cb_ops, /* devo_cb_ops */ (struct bus_ops *)NULL, /* devo_bus_ops */ ddi_power /* devo_power */ }; extern struct mod_ops mod_driverops; static struct modldrv vswmodldrv = { &mod_driverops, "sun4v Virtual Switch", &vsw_ops, }; #define LDC_ENTER_LOCK(ldcp) \ mutex_enter(&((ldcp)->ldc_cblock));\ mutex_enter(&((ldcp)->ldc_rxlock));\ mutex_enter(&((ldcp)->ldc_txlock)); #define LDC_EXIT_LOCK(ldcp) \ mutex_exit(&((ldcp)->ldc_txlock));\ mutex_exit(&((ldcp)->ldc_rxlock));\ mutex_exit(&((ldcp)->ldc_cblock)); /* Driver soft state ptr */ static void *vsw_state; /* * Linked list of "vsw_t" structures - one per instance. */ vsw_t *vsw_head = NULL; krwlock_t vsw_rw; /* * Property names */ static char vdev_propname[] = "virtual-device"; static char vsw_propname[] = "virtual-network-switch"; static char physdev_propname[] = "vsw-phys-dev"; static char smode_propname[] = "vsw-switch-mode"; static char macaddr_propname[] = "local-mac-address"; static char remaddr_propname[] = "remote-mac-address"; static char ldcids_propname[] = "ldc-ids"; static char chan_propname[] = "channel-endpoint"; static char id_propname[] = "id"; static char reg_propname[] = "reg"; static char pri_types_propname[] = "priority-ether-types"; static char vsw_pvid_propname[] = "port-vlan-id"; static char vsw_vid_propname[] = "vlan-id"; static char vsw_dvid_propname[] = "default-vlan-id"; static char port_pvid_propname[] = "remote-port-vlan-id"; static char port_vid_propname[] = "remote-vlan-id"; static char hybrid_propname[] = "hybrid"; static char vsw_mtu_propname[] = "mtu"; static char vsw_linkprop_propname[] = "linkprop"; static char vsw_maxbw_propname[] = "maxbw"; static char port_maxbw_propname[] = "maxbw"; /* * Matching criteria passed to the MDEG to register interest * in changes to 'virtual-device-port' nodes identified by their * 'id' property. */ static md_prop_match_t vport_prop_match[] = { { MDET_PROP_VAL, "id" }, { MDET_LIST_END, NULL } }; static mdeg_node_match_t vport_match = { "virtual-device-port", vport_prop_match }; /* * Matching criteria passed to the MDEG to register interest * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified * by their 'name' and 'cfg-handle' properties. */ static md_prop_match_t vdev_prop_match[] = { { MDET_PROP_STR, "name" }, { MDET_PROP_VAL, "cfg-handle" }, { MDET_LIST_END, NULL } }; static mdeg_node_match_t vdev_match = { "virtual-device", vdev_prop_match }; /* * Specification of an MD node passed to the MDEG to filter any * 'vport' nodes that do not belong to the specified node. This * template is copied for each vsw instance and filled in with * the appropriate 'cfg-handle' value before being passed to the MDEG. */ static mdeg_prop_spec_t vsw_prop_template[] = { { MDET_PROP_STR, "name", vsw_propname }, { MDET_PROP_VAL, "cfg-handle", NULL }, { MDET_LIST_END, NULL, NULL } }; #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); #ifdef DEBUG /* * Print debug messages - set to 0x1f to enable all msgs * or 0x0 to turn all off. */ int vswdbg = 0x0; /* * debug levels: * 0x01: Function entry/exit tracing * 0x02: Internal function messages * 0x04: Verbose internal messages * 0x08: Warning messages * 0x10: Error messages */ void vswdebug(vsw_t *vswp, const char *fmt, ...) { char buf[512]; va_list ap; va_start(ap, fmt); (void) vsprintf(buf, fmt, ap); va_end(ap); if (vswp == NULL) cmn_err(CE_CONT, "%s\n", buf); else cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); } #endif /* DEBUG */ static struct modlinkage modlinkage = { MODREV_1, &vswmodldrv, NULL }; int _init(void) { int status; rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); if (status != 0) { return (status); } mac_init_ops(&vsw_ops, DRV_NAME); status = mod_install(&modlinkage); if (status != 0) { ddi_soft_state_fini(&vsw_state); } return (status); } int _fini(void) { int status; status = mod_remove(&modlinkage); if (status != 0) return (status); mac_fini_ops(&vsw_ops); ddi_soft_state_fini(&vsw_state); rw_destroy(&vsw_rw); return (status); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } static int vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { vsw_t *vswp; int instance; char hashname[MAXNAMELEN]; char qname[TASKQ_NAMELEN]; vsw_attach_progress_t progress = PROG_init; int rv; switch (cmd) { case DDI_ATTACH: break; case DDI_RESUME: /* nothing to do for this non-device */ return (DDI_SUCCESS); case DDI_PM_RESUME: default: return (DDI_FAILURE); } instance = ddi_get_instance(dip); if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); return (DDI_FAILURE); } vswp = ddi_get_soft_state(vsw_state, instance); if (vswp == NULL) { DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); goto vsw_attach_fail; } vswp->dip = dip; vswp->instance = instance; vswp->phys_link_state = LINK_STATE_UNKNOWN; ddi_set_driver_private(dip, (caddr_t)vswp); mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL); cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL); rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL); rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); progress |= PROG_locks; rv = vsw_read_mdprops(vswp); if (rv != 0) goto vsw_attach_fail; progress |= PROG_readmd; /* setup the unicast forwarding database */ (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", vswp->instance); D2(vswp, "creating unicast hash table (%s)...", hashname); vswp->fdb_nchains = vsw_fdb_nchains; vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, mod_hash_null_valdtor, sizeof (void *)); vsw_create_vlans((void *)vswp, VSW_LOCALDEV); progress |= PROG_fdb; /* setup the multicast fowarding database */ (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", vswp->instance); D2(vswp, "creating multicast hash table %s)...", hashname); vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, mod_hash_null_valdtor, sizeof (void *)); progress |= PROG_mfdb; /* * Create the taskq which will process all the VIO * control messages. */ (void) snprintf(qname, TASKQ_NAMELEN, "taskq%d", vswp->instance); if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, TASKQ_DEFAULTPRI, 0)) == NULL) { cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", vswp->instance); goto vsw_attach_fail; } progress |= PROG_taskq; (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d", vswp->instance); if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1, TASKQ_DEFAULTPRI, 0)) == NULL) { cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue", vswp->instance); goto vsw_attach_fail; } progress |= PROG_rxp_taskq; /* prevent auto-detaching */ if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " "instance %u", DDI_NO_AUTODETACH, instance); } /* * The null switching function is set to avoid panic until * switch mode is setup. */ vswp->vsw_switch_frame = vsw_switch_frame_nop; /* * Setup the required switching mode, based on the mdprops that we read * earlier. We start a thread to do this, to avoid calling mac_open() * directly from attach(). */ rv = vsw_setup_switching_start(vswp); if (rv != 0) { goto vsw_attach_fail; } progress |= PROG_swmode; /* Register with mac layer as a provider */ rv = vsw_mac_register(vswp); if (rv != 0) goto vsw_attach_fail; progress |= PROG_macreg; /* * Now we have everything setup, register an interest in * specific MD nodes. * * The callback is invoked in 2 cases, firstly if upon mdeg * registration there are existing nodes which match our specified * criteria, and secondly if the MD is changed (and again, there * are nodes which we are interested in present within it. Note * that our callback will be invoked even if our specified nodes * have not actually changed). * */ rv = vsw_mdeg_register(vswp); if (rv != 0) goto vsw_attach_fail; progress |= PROG_mdreg; vswp->attach_progress = progress; WRITE_ENTER(&vsw_rw); vswp->next = vsw_head; vsw_head = vswp; RW_EXIT(&vsw_rw); ddi_report_dev(vswp->dip); return (DDI_SUCCESS); vsw_attach_fail: DERR(NULL, "vsw_attach: failed"); vswp->attach_progress = progress; (void) vsw_unattach(vswp); ddi_soft_state_free(vsw_state, instance); return (DDI_FAILURE); } static int vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { vsw_t **vswpp, *vswp; int instance; instance = ddi_get_instance(dip); vswp = ddi_get_soft_state(vsw_state, instance); if (vswp == NULL) { return (DDI_FAILURE); } switch (cmd) { case DDI_DETACH: break; case DDI_SUSPEND: case DDI_PM_SUSPEND: default: return (DDI_FAILURE); } D2(vswp, "detaching instance %d", instance); if (vsw_unattach(vswp) != 0) { return (DDI_FAILURE); } ddi_remove_minor_node(dip, NULL); WRITE_ENTER(&vsw_rw); for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { if (*vswpp == vswp) { *vswpp = vswp->next; break; } } RW_EXIT(&vsw_rw); ddi_soft_state_free(vsw_state, instance); return (DDI_SUCCESS); } /* * Common routine to handle vsw_attach() failure and vsw_detach(). Note that * the only reason this function could fail is if mac_unregister() fails. * Otherwise, this function must ensure that all resources are freed and return * success. */ static int vsw_unattach(vsw_t *vswp) { vsw_attach_progress_t progress; progress = vswp->attach_progress; /* * Unregister from the gldv3 subsystem. This can fail, in particular * if there are still any open references to this mac device; in which * case we just return failure without continuing to detach further. */ if (progress & PROG_macreg) { if (vsw_mac_unregister(vswp) != 0) { cmn_err(CE_WARN, "!vsw%d: Unable to detach from " "MAC layer", vswp->instance); return (1); } progress &= ~PROG_macreg; } /* * Now that we have unregistered from gldv3, we must finish all other * steps and successfully return from this function; otherwise we will * end up leaving the device in a broken/unusable state. * * If we have registered with mdeg, unregister now to stop further * callbacks to this vsw device and/or its ports. Then, detach any * existing ports. */ if (progress & PROG_mdreg) { vsw_mdeg_unregister(vswp); vsw_detach_ports(vswp); progress &= ~PROG_mdreg; } /* * If we have started a thread to setup the switching mode, stop it, if * it is still running. If it has finished setting up the switching * mode, then we need to clean up some additional things if we are * running in L2 mode: first free up any hybrid resources; then stop * and close the underlying physical device. Note that we would have * already released all per mac_client resources (ucast, mcast addrs, * hio-shares etc) as all the ports are detached and if the vsw device * itself was in use as an interface, it has been unplumbed (otherwise * mac_unregister() above would fail). */ if (progress & PROG_swmode) { vsw_setup_switching_stop(vswp); if (vswp->hio_capable == B_TRUE) { vsw_hio_cleanup(vswp); vswp->hio_capable = B_FALSE; } mutex_enter(&vswp->mac_lock); vsw_mac_close(vswp); mutex_exit(&vswp->mac_lock); progress &= ~PROG_swmode; } /* * We now destroy the taskq used to clean up rx mblk pools that * couldn't be destroyed when the ports/channels were detached. * We implicitly wait for those tasks to complete in * ddi_taskq_destroy(). */ if (progress & PROG_rxp_taskq) { ddi_taskq_destroy(vswp->rxp_taskq); progress &= ~PROG_rxp_taskq; } /* * By now any pending tasks have finished and the underlying * ldc's have been destroyed, so its safe to delete the control * message taskq. */ if (progress & PROG_taskq) { ddi_taskq_destroy(vswp->taskq_p); progress &= ~PROG_taskq; } /* Destroy the multicast hash table */ if (progress & PROG_mfdb) { mod_hash_destroy_hash(vswp->mfdb); progress &= ~PROG_mfdb; } /* Destroy the vlan hash table and fdb */ if (progress & PROG_fdb) { vsw_destroy_vlans(vswp, VSW_LOCALDEV); mod_hash_destroy_hash(vswp->fdb_hashp); progress &= ~PROG_fdb; } if (progress & PROG_readmd) { if (VSW_PRI_ETH_DEFINED(vswp)) { kmem_free(vswp->pri_types, sizeof (uint16_t) * vswp->pri_num_types); (void) vio_destroy_mblks(vswp->pri_tx_vmp); } progress &= ~PROG_readmd; } if (progress & PROG_locks) { rw_destroy(&vswp->plist.lockrw); rw_destroy(&vswp->mfdbrw); rw_destroy(&vswp->if_lockrw); rw_destroy(&vswp->maccl_rwlock); cv_destroy(&vswp->sw_thr_cv); mutex_destroy(&vswp->sw_thr_lock); mutex_destroy(&vswp->mca_lock); mutex_destroy(&vswp->mac_lock); progress &= ~PROG_locks; } vswp->attach_progress = progress; return (0); } void vsw_destroy_rxpools(void *arg) { vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg; vio_mblk_pool_t *npoolp; while (poolp != NULL) { npoolp = poolp->nextp; while (vio_destroy_mblks(poolp) != 0) { drv_usecwait(vsw_rxpool_cleanup_delay); } poolp = npoolp; } } /* * Get the value of the "vsw-phys-dev" property in the specified * node. This property is the name of the physical device that * the virtual switch will use to talk to the outside world. * * Note it is valid for this property to be NULL (but the property * itself must exist). Callers of this routine should verify that * the value returned is what they expected (i.e. either NULL or non NULL). * * On success returns value of the property in region pointed to by * the 'name' argument, and with return value of 0. Otherwise returns 1. */ static int vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) { int len = 0; int instance; char *physname = NULL; char *dev; const char *dev_name; char myname[MAXNAMELEN]; dev_name = ddi_driver_name(vswp->dip); instance = ddi_get_instance(vswp->dip); (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); if (md_get_prop_data(mdp, node, physdev_propname, (uint8_t **)(&physname), &len) != 0) { cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " "device(s) from MD", vswp->instance); return (1); } else if ((strlen(physname) + 1) > LIFNAMSIZ) { cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", vswp->instance, physname); return (1); } else if (strcmp(myname, physname) == 0) { /* * Prevent the vswitch from opening itself as the * network device. */ cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", vswp->instance, physname); return (1); } else { (void) strncpy(name, physname, strlen(physname) + 1); D2(vswp, "%s: using first device specified (%s)", __func__, physname); } #ifdef DEBUG /* * As a temporary measure to aid testing we check to see if there * is a vsw.conf file present. If there is we use the value of the * vsw_physname property in the file as the name of the physical * device, overriding the value from the MD. * * There may be multiple devices listed, but for the moment * we just use the first one. */ if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, "vsw_physname", &dev) == DDI_PROP_SUCCESS) { if ((strlen(dev) + 1) > LIFNAMSIZ) { cmn_err(CE_WARN, "vsw%d: %s is too long a device name", vswp->instance, dev); ddi_prop_free(dev); return (1); } else { cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " "config file", vswp->instance, dev); (void) strncpy(name, dev, strlen(dev) + 1); } ddi_prop_free(dev); } #endif return (0); } /* * Read the 'vsw-switch-mode' property from the specified MD node. * * Returns 0 on success, otherwise returns 1. */ static int vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode) { int len = 0; char *smode = NULL; char *curr_mode = NULL; D1(vswp, "%s: enter", __func__); /* * Get the switch-mode property. The modes are listed in * decreasing order of preference, i.e. prefered mode is * first item in list. */ len = 0; if (md_get_prop_data(mdp, node, smode_propname, (uint8_t **)(&smode), &len) != 0) { /* * Unable to get switch-mode property from MD, nothing * more we can do. */ cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" " from the MD", vswp->instance); return (1); } curr_mode = smode; /* * Modes of operation: * 'switched' - layer 2 switching, underlying HW in * programmed mode. * 'promiscuous' - layer 2 switching, underlying HW in * promiscuous mode. * 'routed' - layer 3 (i.e. IP) routing, underlying HW * in non-promiscuous mode. */ while (curr_mode < (smode + len)) { D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); if (strcmp(curr_mode, "switched") == 0) { *mode = VSW_LAYER2; } else if (strcmp(curr_mode, "promiscuous") == 0) { *mode = VSW_LAYER2 | VSW_LAYER2_PROMISC; } else if (strcmp(curr_mode, "routed") == 0) { *mode = VSW_LAYER3; } else { cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " "setting to default switched mode", vswp->instance, curr_mode); *mode = VSW_LAYER2; } curr_mode += strlen(curr_mode) + 1; } D2(vswp, "%s: %d mode", __func__, *mode); D1(vswp, "%s: exit", __func__); return (0); } /* * Register with the MAC layer as a network device, so we * can be plumbed if necessary. */ static int vsw_mac_register(vsw_t *vswp) { mac_register_t *macp; int rv; D1(vswp, "%s: enter", __func__); if ((macp = mac_alloc(MAC_VERSION)) == NULL) return (EINVAL); macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; macp->m_driver = vswp; macp->m_dip = vswp->dip; macp->m_src_addr = (uint8_t *)&vswp->if_addr; macp->m_callbacks = &vsw_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = vswp->mtu; macp->m_margin = VLAN_TAGSZ; rv = mac_register(macp, &vswp->if_mh); mac_free(macp); if (rv != 0) { /* * Treat this as a non-fatal error as we may be * able to operate in some other mode. */ cmn_err(CE_NOTE, "!vsw%d: Unable to register as " "a provider with MAC layer", vswp->instance); return (rv); } vswp->if_state |= VSW_IF_REG; D1(vswp, "%s: exit", __func__); return (rv); } static int vsw_mac_unregister(vsw_t *vswp) { int rv = 0; D1(vswp, "%s: enter", __func__); WRITE_ENTER(&vswp->if_lockrw); if (vswp->if_state & VSW_IF_REG) { rv = mac_unregister(vswp->if_mh); if (rv != 0) { DWARN(vswp, "%s: unable to unregister from MAC " "framework", __func__); RW_EXIT(&vswp->if_lockrw); D1(vswp, "%s: fail exit", __func__); return (rv); } /* mark i/f as down and unregistered */ vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); } RW_EXIT(&vswp->if_lockrw); D1(vswp, "%s: exit", __func__); return (rv); } static int vsw_m_stat(void *arg, uint_t stat, uint64_t *val) { vsw_t *vswp = (vsw_t *)arg; D1(vswp, "%s: enter", __func__); mutex_enter(&vswp->mac_lock); if (vswp->mh == NULL) { mutex_exit(&vswp->mac_lock); return (EINVAL); } /* return stats from underlying device */ *val = mac_stat_get(vswp->mh, stat); mutex_exit(&vswp->mac_lock); return (0); } static void vsw_m_stop(void *arg) { vsw_t *vswp = (vsw_t *)arg; D1(vswp, "%s: enter", __func__); WRITE_ENTER(&vswp->if_lockrw); vswp->if_state &= ~VSW_IF_UP; RW_EXIT(&vswp->if_lockrw); /* Cleanup and close the mac client */ vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); } static int vsw_m_start(void *arg) { int rv; vsw_t *vswp = (vsw_t *)arg; D1(vswp, "%s: enter", __func__); WRITE_ENTER(&vswp->if_lockrw); vswp->if_state |= VSW_IF_UP; if (vswp->switching_setup_done == B_FALSE) { /* * If the switching mode has not been setup yet, just * return. The unicast address will be programmed * after the physical device is successfully setup by the * timeout handler. */ RW_EXIT(&vswp->if_lockrw); return (0); } /* if in layer2 mode, program unicast address. */ if (vswp->mh != NULL) { /* Init a mac client and program addresses */ rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV); if (rv != 0) { cmn_err(CE_NOTE, "!vsw%d: failed to program interface " "unicast address\n", vswp->instance); } } RW_EXIT(&vswp->if_lockrw); D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); return (0); } /* * Change the local interface address. * * Note: we don't support this entry point. The local * mac address of the switch can only be changed via its * MD node properties. */ static int vsw_m_unicst(void *arg, const uint8_t *macaddr) { _NOTE(ARGUNUSED(arg, macaddr)) return (DDI_FAILURE); } static int vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) { vsw_t *vswp = (vsw_t *)arg; mcst_addr_t *mcst_p = NULL; uint64_t addr = 0x0; int i, ret = 0; D1(vswp, "%s: enter", __func__); /* * Convert address into form that can be used * as hash table key. */ for (i = 0; i < ETHERADDRL; i++) { addr = (addr << 8) | mca[i]; } D2(vswp, "%s: addr = 0x%llx", __func__, addr); if (add) { D2(vswp, "%s: adding multicast", __func__); if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { /* * Update the list of multicast addresses * contained within the vsw_t structure to * include this new one. */ mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); if (mcst_p == NULL) { DERR(vswp, "%s unable to alloc mem", __func__); (void) vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL); return (1); } mcst_p->addr = addr; ether_copy(mca, &mcst_p->mca); /* * Call into the underlying driver to program the * address into HW. */ ret = vsw_mac_multicast_add(vswp, NULL, mcst_p, VSW_LOCALDEV); if (ret != 0) { (void) vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL); kmem_free(mcst_p, sizeof (*mcst_p)); return (ret); } mutex_enter(&vswp->mca_lock); mcst_p->nextp = vswp->mcap; vswp->mcap = mcst_p; mutex_exit(&vswp->mca_lock); } else { cmn_err(CE_WARN, "!vsw%d: unable to add multicast " "address", vswp->instance); } return (ret); } D2(vswp, "%s: removing multicast", __func__); /* * Remove the address from the hash table.. */ if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { /* * ..and then from the list maintained in the * vsw_t structure. */ mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); ASSERT(mcst_p != NULL); vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV); kmem_free(mcst_p, sizeof (*mcst_p)); } D1(vswp, "%s: exit", __func__); return (0); } static int vsw_m_promisc(void *arg, boolean_t on) { vsw_t *vswp = (vsw_t *)arg; D1(vswp, "%s: enter", __func__); WRITE_ENTER(&vswp->if_lockrw); if (on) vswp->if_state |= VSW_IF_PROMISC; else vswp->if_state &= ~VSW_IF_PROMISC; RW_EXIT(&vswp->if_lockrw); D1(vswp, "%s: exit", __func__); return (0); } static mblk_t * vsw_m_tx(void *arg, mblk_t *mp) { vsw_t *vswp = (vsw_t *)arg; D1(vswp, "%s: enter", __func__); mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); if (mp == NULL) { return (NULL); } vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); D1(vswp, "%s: exit", __func__); return (NULL); } /* * Register for machine description (MD) updates. * * Returns 0 on success, 1 on failure. */ static int vsw_mdeg_register(vsw_t *vswp) { mdeg_prop_spec_t *pspecp; mdeg_node_spec_t *inst_specp; mdeg_handle_t mdeg_hdl, mdeg_port_hdl; size_t templatesz; int rv; D1(vswp, "%s: enter", __func__); /* * Allocate and initialize a per-instance copy * of the global property spec array that will * uniquely identify this vsw instance. */ templatesz = sizeof (vsw_prop_template); pspecp = kmem_zalloc(templatesz, KM_SLEEP); bcopy(vsw_prop_template, pspecp, templatesz); VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); /* initialize the complete prop spec structure */ inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); inst_specp->namep = "virtual-device"; inst_specp->specp = pspecp; D2(vswp, "%s: instance %d registering with mdeg", __func__, vswp->regprop); /* * Register an interest in 'virtual-device' nodes with a * 'name' property of 'virtual-network-switch' */ rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, (void *)vswp, &mdeg_hdl); if (rv != MDEG_SUCCESS) { DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", __func__, rv); goto mdeg_reg_fail; } /* * Register an interest in 'vsw-port' nodes. */ rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, (void *)vswp, &mdeg_port_hdl); if (rv != MDEG_SUCCESS) { DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); (void) mdeg_unregister(mdeg_hdl); goto mdeg_reg_fail; } /* save off data that will be needed later */ vswp->inst_spec = inst_specp; vswp->mdeg_hdl = mdeg_hdl; vswp->mdeg_port_hdl = mdeg_port_hdl; D1(vswp, "%s: exit", __func__); return (0); mdeg_reg_fail: cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", vswp->instance); kmem_free(pspecp, templatesz); kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); vswp->mdeg_hdl = NULL; vswp->mdeg_port_hdl = NULL; return (1); } static void vsw_mdeg_unregister(vsw_t *vswp) { D1(vswp, "vsw_mdeg_unregister: enter"); if (vswp->mdeg_hdl != NULL) (void) mdeg_unregister(vswp->mdeg_hdl); if (vswp->mdeg_port_hdl != NULL) (void) mdeg_unregister(vswp->mdeg_port_hdl); if (vswp->inst_spec != NULL) { if (vswp->inst_spec->specp != NULL) { (void) kmem_free(vswp->inst_spec->specp, sizeof (vsw_prop_template)); vswp->inst_spec->specp = NULL; } (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); vswp->inst_spec = NULL; } D1(vswp, "vsw_mdeg_unregister: exit"); } /* * Mdeg callback invoked for the vsw node itself. */ static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) { vsw_t *vswp; md_t *mdp; mde_cookie_t node; uint64_t inst; char *node_name = NULL; if (resp == NULL) return (MDEG_FAILURE); vswp = (vsw_t *)cb_argp; D1(vswp, "%s: added %d : removed %d : curr matched %d" " : prev matched %d", __func__, resp->added.nelem, resp->removed.nelem, resp->match_curr.nelem, resp->match_prev.nelem); /* * We get an initial callback for this node as 'added' * after registering with mdeg. Note that we would have * already gathered information about this vsw node by * walking MD earlier during attach (in vsw_read_mdprops()). * So, there is a window where the properties of this * node might have changed when we get this initial 'added' * callback. We handle this as if an update occured * and invoke the same function which handles updates to * the properties of this vsw-node if any. * * A non-zero 'match' value indicates that the MD has been * updated and that a virtual-network-switch node is * present which may or may not have been updated. It is * up to the clients to examine their own nodes and * determine if they have changed. */ if (resp->added.nelem != 0) { if (resp->added.nelem != 1) { cmn_err(CE_NOTE, "!vsw%d: number of nodes added " "invalid: %d\n", vswp->instance, resp->added.nelem); return (MDEG_FAILURE); } mdp = resp->added.mdp; node = resp->added.mdep[0]; } else if (resp->match_curr.nelem != 0) { if (resp->match_curr.nelem != 1) { cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " "invalid: %d\n", vswp->instance, resp->match_curr.nelem); return (MDEG_FAILURE); } mdp = resp->match_curr.mdp; node = resp->match_curr.mdep[0]; } else { return (MDEG_FAILURE); } /* Validate name and instance */ if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { DERR(vswp, "%s: unable to get node name\n", __func__); return (MDEG_FAILURE); } /* is this a virtual-network-switch? */ if (strcmp(node_name, vsw_propname) != 0) { DERR(vswp, "%s: Invalid node name: %s\n", __func__, node_name); return (MDEG_FAILURE); } if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { DERR(vswp, "%s: prop(cfg-handle) not found\n", __func__); return (MDEG_FAILURE); } /* is this the right instance of vsw? */ if (inst != vswp->regprop) { DERR(vswp, "%s: Invalid cfg-handle: %lx\n", __func__, inst); return (MDEG_FAILURE); } vsw_update_md_prop(vswp, mdp, node); return (MDEG_SUCCESS); } /* * Mdeg callback invoked for changes to the vsw-port nodes * under the vsw node. */ static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) { vsw_t *vswp; int idx; md_t *mdp; mde_cookie_t node; uint64_t inst; int rv; if ((resp == NULL) || (cb_argp == NULL)) return (MDEG_FAILURE); vswp = (vsw_t *)cb_argp; D2(vswp, "%s: added %d : removed %d : curr matched %d" " : prev matched %d", __func__, resp->added.nelem, resp->removed.nelem, resp->match_curr.nelem, resp->match_prev.nelem); /* process added ports */ for (idx = 0; idx < resp->added.nelem; idx++) { mdp = resp->added.mdp; node = resp->added.mdep[idx]; D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { cmn_err(CE_WARN, "!vsw%d: Unable to add new port " "(0x%lx), err=%d", vswp->instance, node, rv); } } /* process removed ports */ for (idx = 0; idx < resp->removed.nelem; idx++) { mdp = resp->removed.mdp; node = resp->removed.mdep[idx]; if (md_get_prop_val(mdp, node, id_propname, &inst)) { DERR(vswp, "%s: prop(%s) not found in port(%d)", __func__, id_propname, idx); continue; } D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); if (vsw_port_detach(vswp, inst) != 0) { cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", vswp->instance, inst); } } for (idx = 0; idx < resp->match_curr.nelem; idx++) { (void) vsw_port_update(vswp, resp->match_curr.mdp, resp->match_curr.mdep[idx], resp->match_prev.mdp, resp->match_prev.mdep[idx]); } D1(vswp, "%s: exit", __func__); return (MDEG_SUCCESS); } /* * Scan the machine description for this instance of vsw * and read its properties. Called only from vsw_attach(). * Returns: 0 on success, 1 on failure. */ static int vsw_read_mdprops(vsw_t *vswp) { md_t *mdp = NULL; mde_cookie_t rootnode; mde_cookie_t *listp = NULL; uint64_t inst; uint64_t cfgh; char *name; int rv = 1; int num_nodes = 0; int num_devs = 0; int listsz = 0; int i; /* * In each 'virtual-device' node in the MD there is a * 'cfg-handle' property which is the MD's concept of * an instance number (this may be completely different from * the device drivers instance #). OBP reads that value and * stores it in the 'reg' property of the appropriate node in * the device tree. We first read this reg property and use this * to compare against the 'cfg-handle' property of vsw nodes * in MD to get to this specific vsw instance and then read * other properties that we are interested in. * We also cache the value of 'reg' property and use it later * to register callbacks with mdeg (see vsw_mdeg_register()) */ inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, DDI_PROP_DONTPASS, reg_propname, -1); if (inst == -1) { cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " "OBP device tree", vswp->instance, reg_propname); return (rv); } vswp->regprop = inst; if ((mdp = md_get_handle()) == NULL) { DWARN(vswp, "%s: cannot init MD\n", __func__); return (rv); } num_nodes = md_node_count(mdp); ASSERT(num_nodes > 0); listsz = num_nodes * sizeof (mde_cookie_t); listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); rootnode = md_root_node(mdp); /* search for all "virtual_device" nodes */ num_devs = md_scan_dag(mdp, rootnode, md_find_name(mdp, vdev_propname), md_find_name(mdp, "fwd"), listp); if (num_devs <= 0) { DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); goto vsw_readmd_exit; } /* * Now loop through the list of virtual-devices looking for * devices with name "virtual-network-switch" and for each * such device compare its instance with what we have from * the 'reg' property to find the right node in MD and then * read all its properties. */ for (i = 0; i < num_devs; i++) { if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { DWARN(vswp, "%s: name property not found\n", __func__); goto vsw_readmd_exit; } /* is this a virtual-network-switch? */ if (strcmp(name, vsw_propname) != 0) continue; if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { DWARN(vswp, "%s: cfg-handle property not found\n", __func__); goto vsw_readmd_exit; } /* is this the required instance of vsw? */ if (inst != cfgh) continue; /* now read all properties of this vsw instance */ rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); break; } vsw_readmd_exit: kmem_free(listp, listsz); (void) md_fini_handle(mdp); return (rv); } /* * Read the initial start-of-day values from the specified MD node. */ static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) { uint64_t macaddr = 0; D1(vswp, "%s: enter", __func__); if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { return (1); } /* mac address for vswitch device itself */ if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", vswp->instance); return (1); } vsw_save_lmacaddr(vswp, macaddr); if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) { DWARN(vswp, "%s: Unable to read %s property from MD, " "defaulting to 'switched' mode", __func__, smode_propname); vswp->smode = VSW_LAYER2; } /* * Read the 'linkprop' property to know if this * vsw device wants to get physical link updates. */ vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update); /* read mtu */ vsw_mtu_read(vswp, mdp, node, &vswp->mtu); if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) { vswp->mtu = ETHERMTU; } vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) + VLAN_TAGSZ; /* read vlan id properties of this vsw instance */ vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); /* read priority-ether-types */ vsw_read_pri_eth_types(vswp, mdp, node); /* read bandwidth property of this vsw instance */ vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth); D1(vswp, "%s: exit", __func__); return (0); } /* * Read vlan id properties of the given MD node. * Arguments: * arg: device argument(vsw device or a port) * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) * mdp: machine description * node: md node cookie * * Returns: * pvidp: port-vlan-id of the node * vidspp: list of vlan-ids of the node * nvidsp: # of vlan-ids in the list * default_idp: default-vlan-id of the node(if node is vsw device) */ static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp, uint16_t *default_idp) { vsw_t *vswp; vsw_port_t *portp; char *pvid_propname; char *vid_propname; uint_t nvids = 0; uint32_t vids_size; int rv; int i; uint64_t *data; uint64_t val; int size; int inst; if (type == VSW_LOCALDEV) { vswp = (vsw_t *)arg; pvid_propname = vsw_pvid_propname; vid_propname = vsw_vid_propname; inst = vswp->instance; } else if (type == VSW_VNETPORT) { portp = (vsw_port_t *)arg; vswp = portp->p_vswp; pvid_propname = port_pvid_propname; vid_propname = port_vid_propname; inst = portp->p_instance; } else { return; } if (type == VSW_LOCALDEV && default_idp != NULL) { rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); if (rv != 0) { DWARN(vswp, "%s: prop(%s) not found", __func__, vsw_dvid_propname); *default_idp = vsw_default_vlan_id; } else { *default_idp = val & 0xFFF; D2(vswp, "%s: %s(%d): (%d)\n", __func__, vsw_dvid_propname, inst, *default_idp); } } rv = md_get_prop_val(mdp, node, pvid_propname, &val); if (rv != 0) { DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); *pvidp = vsw_default_vlan_id; } else { *pvidp = val & 0xFFF; D2(vswp, "%s: %s(%d): (%d)\n", __func__, pvid_propname, inst, *pvidp); } rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, &size); if (rv != 0) { D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); size = 0; } else { size /= sizeof (uint64_t); } nvids = size; if (nvids != 0) { D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); vids_size = sizeof (vsw_vlanid_t) * nvids; *vidspp = kmem_zalloc(vids_size, KM_SLEEP); for (i = 0; i < nvids; i++) { (*vidspp)[i].vl_vid = data[i] & 0xFFFF; (*vidspp)[i].vl_set = B_FALSE; D2(vswp, " %d ", (*vidspp)[i].vl_vid); } D2(vswp, "\n"); } *nvidsp = nvids; } static void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node, uint64_t *bw) { int rv; uint64_t val; vsw_t *vswp; vswp = portp->p_vswp; rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val); if (rv != 0) { *bw = 0; D3(vswp, "%s: prop(%s) not found\n", __func__, port_maxbw_propname); } else { *bw = val; D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname); } } /* * This function reads "priority-ether-types" property from md. This property * is used to enable support for priority frames. Applications which need * guaranteed and timely delivery of certain high priority frames to/from * a vnet or vsw within ldoms, should configure this property by providing * the ether type(s) for which the priority facility is needed. * Normal data frames are delivered over a ldc channel using the descriptor * ring mechanism which is constrained by factors such as descriptor ring size, * the rate at which the ring is processed at the peer ldc end point, etc. * The priority mechanism provides an Out-Of-Band path to send/receive frames * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the * descriptor ring path and enables a more reliable and timely delivery of * frames to the peer. */ static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) { int rv; uint16_t *types; uint64_t *data; int size; int i; size_t mblk_sz; rv = md_get_prop_data(mdp, node, pri_types_propname, (uint8_t **)&data, &size); if (rv != 0) { /* * Property may not exist if we are running pre-ldoms1.1 f/w. * Check if 'vsw_pri_eth_type' has been set in that case. */ if (vsw_pri_eth_type != 0) { size = sizeof (vsw_pri_eth_type); data = &vsw_pri_eth_type; } else { D3(vswp, "%s: prop(%s) not found", __func__, pri_types_propname); size = 0; } } if (size == 0) { vswp->pri_num_types = 0; return; } /* * we have some priority-ether-types defined; * allocate a table of these types and also * allocate a pool of mblks to transmit these * priority packets. */ size /= sizeof (uint64_t); vswp->pri_num_types = size; vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); for (i = 0, types = vswp->pri_types; i < size; i++) { types[i] = data[i] & 0xFFFF; } mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, NULL, &vswp->pri_tx_vmp); } static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) { int rv; int inst; uint64_t val; char *mtu_propname; mtu_propname = vsw_mtu_propname; inst = vswp->instance; rv = md_get_prop_val(mdp, node, mtu_propname, &val); if (rv != 0) { D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname); *mtu = vsw_ethermtu; } else { *mtu = val & 0xFFFF; D2(vswp, "%s: %s(%d): (%d)\n", __func__, mtu_propname, inst, *mtu); } } /* * Update the mtu of the vsw device. We first check if the device has been * plumbed and if so fail the mtu update. Otherwise, we continue to update the * new mtu and reset all ports to initiate handshake re-negotiation with peers * using the new mtu. */ static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu) { int rv; WRITE_ENTER(&vswp->if_lockrw); if (vswp->if_state & VSW_IF_UP) { RW_EXIT(&vswp->if_lockrw); cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" " as the device is plumbed\n", vswp->instance); return (EBUSY); } else { D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n", __func__, vswp->mtu, mtu); vswp->mtu = mtu; vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) + VLAN_TAGSZ; rv = mac_maxsdu_update(vswp->if_mh, mtu); if (rv != 0) { cmn_err(CE_NOTE, "!vsw%d: Unable to update mtu with mac" " layer\n", vswp->instance); } RW_EXIT(&vswp->if_lockrw); /* Reset ports to renegotiate with the new mtu */ vsw_reset_ports(vswp); } return (0); } static void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, boolean_t *pls) { int rv; uint64_t val; char *linkpropname; linkpropname = vsw_linkprop_propname; rv = md_get_prop_val(mdp, node, linkpropname, &val); if (rv != 0) { D3(vswp, "%s: prop(%s) not found", __func__, linkpropname); *pls = B_FALSE; } else { *pls = (val & 0x1) ? B_TRUE : B_FALSE; D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname, vswp->instance, *pls); } } void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state) { READ_ENTER(&vswp->if_lockrw); if (vswp->if_state & VSW_IF_REG) { mac_link_update(vswp->if_mh, link_state); } RW_EXIT(&vswp->if_lockrw); } void vsw_physlink_state_update(vsw_t *vswp) { if (vswp->pls_update == B_TRUE) { vsw_mac_link_update(vswp, vswp->phys_link_state); } vsw_physlink_update_ports(vswp); } static void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw) { /* read the vsw bandwidth from md */ int rv; uint64_t val; rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val); if (rv != 0) { *bw = 0; D3(vswp, "%s: prop(%s) not found", __func__, vsw_maxbw_propname); } else { *bw = val; D3(vswp, "%s: %s(%d): (%ld)\n", __func__, vsw_maxbw_propname, vswp->instance, *bw); } } /* * Check to see if the relevant properties in the specified node have * changed, and if so take the appropriate action. * * If any of the properties are missing or invalid we don't take * any action, as this function should only be invoked when modifications * have been made to what we assume is a working configuration, which * we leave active. * * Note it is legal for this routine to be invoked even if none of the * properties in the port node within the MD have actually changed. */ static void vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) { char physname[LIFNAMSIZ]; char drv[LIFNAMSIZ]; uint_t ddi_instance; uint8_t new_smode; int i; uint64_t macaddr = 0; enum {MD_init = 0x1, MD_physname = 0x2, MD_macaddr = 0x4, MD_smode = 0x8, MD_vlans = 0x10, MD_mtu = 0x20, MD_pls = 0x40, MD_bw = 0x80} updated; int rv; uint16_t pvid; vsw_vlanid_t *vids; uint16_t nvids; uint32_t mtu; boolean_t pls_update; uint64_t maxbw; updated = MD_init; D1(vswp, "%s: enter", __func__); /* * Check if name of physical device in MD has changed. */ if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { /* * Do basic sanity check on new device name/instance, * if its non NULL. It is valid for the device name to * have changed from a non NULL to a NULL value, i.e. * the vsw is being changed to 'routed' mode. */ if ((strlen(physname) != 0) && (ddi_parse(physname, drv, &ddi_instance) != DDI_SUCCESS)) { cmn_err(CE_WARN, "!vsw%d: physical device %s is not" " a valid device name/instance", vswp->instance, physname); goto fail_reconf; } if (strcmp(physname, vswp->physname)) { D2(vswp, "%s: device name changed from %s to %s", __func__, vswp->physname, physname); updated |= MD_physname; } else { D2(vswp, "%s: device name unchanged at %s", __func__, vswp->physname); } } else { cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " "device from updated MD.", vswp->instance); goto fail_reconf; } /* * Check if MAC address has changed. */ if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", vswp->instance); goto fail_reconf; } else { uint64_t maddr = macaddr; READ_ENTER(&vswp->if_lockrw); for (i = ETHERADDRL - 1; i >= 0; i--) { if (vswp->if_addr.ether_addr_octet[i] != (macaddr & 0xFF)) { D2(vswp, "%s: octet[%d] 0x%x != 0x%x", __func__, i, vswp->if_addr.ether_addr_octet[i], (macaddr & 0xFF)); updated |= MD_macaddr; macaddr = maddr; break; } macaddr >>= 8; } RW_EXIT(&vswp->if_lockrw); if (updated & MD_macaddr) { vsw_save_lmacaddr(vswp, macaddr); } } /* * Check if switching modes have changed. */ if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) { cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", vswp->instance, smode_propname); goto fail_reconf; } else { if (new_smode != vswp->smode) { D2(vswp, "%s: switching mode changed from %d to %d", __func__, vswp->smode, new_smode); updated |= MD_smode; } } /* Read the vlan ids */ vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, &nvids, NULL); /* Determine if there are any vlan id updates */ if ((pvid != vswp->pvid) || /* pvid changed? */ (nvids != vswp->nvids) || /* # of vids changed? */ ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ !vsw_cmp_vids(vids, vswp->vids, nvids))) { updated |= MD_vlans; } /* Read mtu */ vsw_mtu_read(vswp, mdp, node, &mtu); if (mtu != vswp->mtu) { if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { updated |= MD_mtu; } else { cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" " as the specified value:%d is invalid\n", vswp->instance, mtu); } } /* * Read the 'linkprop' property. */ vsw_linkprop_read(vswp, mdp, node, &pls_update); if (pls_update != vswp->pls_update) { updated |= MD_pls; } /* Read bandwidth */ vsw_bandwidth_read(vswp, mdp, node, &maxbw); if (maxbw != vswp->bandwidth) { if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { updated |= MD_bw; } else { cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth" " update as the specified value:%ld is invalid\n", vswp->instance, maxbw); } } /* * Now make any changes which are needed... */ if (updated & MD_pls) { /* save the updated property. */ vswp->pls_update = pls_update; if (pls_update == B_FALSE) { /* * Phys link state update is now disabled for this vsw * interface. If we had previously reported a link-down * to the stack, undo that by sending a link-up. */ if (vswp->phys_link_state == LINK_STATE_DOWN) { vsw_mac_link_update(vswp, LINK_STATE_UP); } } else { /* * Phys link state update is now enabled. Send up an * update based on the current phys link state. */ if (vswp->smode & VSW_LAYER2) { vsw_mac_link_update(vswp, vswp->phys_link_state); } } } if (updated & (MD_physname | MD_smode | MD_mtu)) { /* * Stop any pending thread to setup switching mode. */ vsw_setup_switching_stop(vswp); /* Cleanup HybridIO */ vsw_hio_cleanup(vswp); /* * Remove unicst, mcst addrs of vsw interface * and ports from the physdev. This also closes * the corresponding mac clients. */ vsw_unset_addrs(vswp); /* * Stop, detach and close the old device.. */ mutex_enter(&vswp->mac_lock); vsw_mac_close(vswp); mutex_exit(&vswp->mac_lock); /* * Update phys name. */ if (updated & MD_physname) { cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", vswp->instance, vswp->physname, physname); (void) strncpy(vswp->physname, physname, strlen(physname) + 1); } /* * Update array with the new switch mode values. */ if (updated & MD_smode) { vswp->smode = new_smode; } /* Update mtu */ if (updated & MD_mtu) { rv = vsw_mtu_update(vswp, mtu); if (rv != 0) { goto fail_update; } } /* * ..and attach, start the new device. */ rv = vsw_setup_switching(vswp); if (rv == EAGAIN) { /* * Unable to setup switching mode. * As the error is EAGAIN, schedule a thread to retry * and return. Programming addresses of ports and * vsw interface will be done by the thread when the * switching setup completes successfully. */ if (vsw_setup_switching_start(vswp) != 0) { goto fail_update; } return; } else if (rv) { goto fail_update; } vsw_setup_switching_post_process(vswp); } else if (updated & MD_macaddr) { /* * We enter here if only MD_macaddr is exclusively updated. * If MD_physname and/or MD_smode are also updated, then * as part of that, we would have implicitly processed * MD_macaddr update (above). */ cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", vswp->instance, macaddr); READ_ENTER(&vswp->if_lockrw); if (vswp->if_state & VSW_IF_UP) { /* reconfigure with new address */ vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0); /* * Notify the MAC layer of the changed address. */ mac_unicst_update(vswp->if_mh, (uint8_t *)&vswp->if_addr); } RW_EXIT(&vswp->if_lockrw); } if (updated & MD_vlans) { /* Remove existing vlan ids from the hash table. */ vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); if (vswp->if_state & VSW_IF_UP) { vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids); } else { if (vswp->nvids != 0) { kmem_free(vswp->vids, sizeof (vsw_vlanid_t) * vswp->nvids); } vswp->vids = vids; vswp->nvids = nvids; vswp->pvid = pvid; } /* add these new vlan ids into hash table */ vsw_vlan_add_ids(vswp, VSW_LOCALDEV); } else { if (nvids != 0) { kmem_free(vids, sizeof (vsw_vlanid_t) * nvids); } } if (updated & MD_bw) { vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw); } return; fail_reconf: cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); return; fail_update: cmn_err(CE_WARN, "!vsw%d: re-configuration failed", vswp->instance); } /* * Read the port's md properties. */ static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, md_t *mdp, mde_cookie_t *node) { uint64_t ldc_id; uint8_t *addrp; int i, addrsz; int num_nodes = 0, nchan = 0; int listsz = 0; mde_cookie_t *listp = NULL; struct ether_addr ea; uint64_t macaddr; uint64_t inst = 0; uint64_t val; if (md_get_prop_val(mdp, *node, id_propname, &inst)) { DWARN(vswp, "%s: prop(%s) not found", __func__, id_propname); return (1); } /* * Find the channel endpoint node(s) (which should be under this * port node) which contain the channel id(s). */ if ((num_nodes = md_node_count(mdp)) <= 0) { DERR(vswp, "%s: invalid number of nodes found (%d)", __func__, num_nodes); return (1); } D2(vswp, "%s: %d nodes found", __func__, num_nodes); /* allocate enough space for node list */ listsz = num_nodes * sizeof (mde_cookie_t); listp = kmem_zalloc(listsz, KM_SLEEP); nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), md_find_name(mdp, "fwd"), listp); if (nchan <= 0) { DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); kmem_free(listp, listsz); return (1); } D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); /* use property from first node found */ if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { DWARN(vswp, "%s: prop(%s) not found\n", __func__, id_propname); kmem_free(listp, listsz); return (1); } /* don't need list any more */ kmem_free(listp, listsz); D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); /* read mac-address property */ if (md_get_prop_data(mdp, *node, remaddr_propname, &addrp, &addrsz)) { DWARN(vswp, "%s: prop(%s) not found", __func__, remaddr_propname); return (1); } if (addrsz < ETHERADDRL) { DWARN(vswp, "%s: invalid address size", __func__); return (1); } macaddr = *((uint64_t *)addrp); D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); for (i = ETHERADDRL - 1; i >= 0; i--) { ea.ether_addr_octet[i] = macaddr & 0xFF; macaddr >>= 8; } /* now update all properties into the port */ portp->p_vswp = vswp; portp->p_instance = inst; portp->addr_set = B_FALSE; ether_copy(&ea, &portp->p_macaddr); if (nchan > VSW_PORT_MAX_LDCS) { D2(vswp, "%s: using first of %d ldc ids", __func__, nchan); nchan = VSW_PORT_MAX_LDCS; } portp->num_ldcs = nchan; portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); /* read vlan id properties of this port node */ vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, &portp->vids, &portp->nvids, NULL); /* Check if hybrid property is present */ if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); portp->p_hio_enabled = B_TRUE; } else { portp->p_hio_enabled = B_FALSE; } /* * Port hio capability determined after version * negotiation, i.e., when we know the peer is HybridIO capable. */ portp->p_hio_capable = B_FALSE; /* Read bandwidth of this port */ vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth); return (0); } /* * Add a new port to the system. * * Returns 0 on success, 1 on failure. */ int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) { vsw_port_t *portp; int rv; portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); rv = vsw_port_read_props(portp, vswp, mdp, node); if (rv != 0) { kmem_free(portp, sizeof (*portp)); return (1); } rv = vsw_port_attach(portp); if (rv != 0) { DERR(vswp, "%s: failed to attach port", __func__); return (1); } return (0); } static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex) { uint64_t cport_num; uint64_t pport_num; vsw_port_list_t *plistp; vsw_port_t *portp; uint16_t pvid; vsw_vlanid_t *vids; uint16_t nvids; uint64_t val; boolean_t hio_enabled = B_FALSE; uint64_t maxbw; enum {P_MD_init = 0x1, P_MD_vlans = 0x2, P_MD_hio = 0x4, P_MD_maxbw = 0x8} updated; updated = P_MD_init; /* * For now, we get port updates only if vlan ids changed. * We read the port num and do some sanity check. */ if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { return (1); } if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { return (1); } if (cport_num != pport_num) return (1); plistp = &(vswp->plist); READ_ENTER(&plistp->lockrw); portp = vsw_lookup_port(vswp, cport_num); if (portp == NULL) { RW_EXIT(&plistp->lockrw); return (1); } /* Read the vlan ids */ vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, &vids, &nvids, NULL); /* Determine if there are any vlan id updates */ if ((pvid != portp->pvid) || /* pvid changed? */ (nvids != portp->nvids) || /* # of vids changed? */ ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ !vsw_cmp_vids(vids, portp->vids, nvids))) { updated |= P_MD_vlans; } /* Check if hybrid property is present */ if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); hio_enabled = B_TRUE; } if (portp->p_hio_enabled != hio_enabled) { updated |= P_MD_hio; } /* Check if maxbw property is present */ vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw); if (maxbw != portp->p_bandwidth) { if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { updated |= P_MD_maxbw; } else { cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth" " update for port %d as the specified value:%ld" " is invalid\n", vswp->instance, portp->p_instance, maxbw); } } if (updated & P_MD_vlans) { /* Remove existing vlan ids from the hash table. */ vsw_vlan_remove_ids(portp, VSW_VNETPORT); /* Reconfigure vlans with network device */ vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids); /* add these new vlan ids into hash table */ vsw_vlan_add_ids(portp, VSW_VNETPORT); /* reset the port if it is vlan unaware (ver < 1.3) */ vsw_vlan_unaware_port_reset(portp); } if (updated & P_MD_hio) { vsw_hio_port_update(portp, hio_enabled); } if (updated & P_MD_maxbw) { vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw); } RW_EXIT(&plistp->lockrw); return (0); } /* * vsw_mac_rx -- A common function to send packets to the interface. * By default this function check if the interface is UP or not, the * rest of the behaviour depends on the flags as below: * * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. * VSW_MACRX_COPYMSG -- Make a copy of the message(s). * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. */ void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, mblk_t *mp, vsw_macrx_flags_t flags) { mblk_t *mpt; D1(vswp, "%s:enter\n", __func__); READ_ENTER(&vswp->if_lockrw); /* Check if the interface is up */ if (!(vswp->if_state & VSW_IF_UP)) { RW_EXIT(&vswp->if_lockrw); /* Free messages only if FREEMSG flag specified */ if (flags & VSW_MACRX_FREEMSG) { freemsgchain(mp); } D1(vswp, "%s:exit\n", __func__); return; } /* * If PROMISC flag is passed, then check if * the interface is in the PROMISC mode. * If not, drop the messages. */ if (flags & VSW_MACRX_PROMISC) { if (!(vswp->if_state & VSW_IF_PROMISC)) { RW_EXIT(&vswp->if_lockrw); /* Free messages only if FREEMSG flag specified */ if (flags & VSW_MACRX_FREEMSG) { freemsgchain(mp); } D1(vswp, "%s:exit\n", __func__); return; } } RW_EXIT(&vswp->if_lockrw); /* * If COPYMSG flag is passed, then make a copy * of the message chain and send up the copy. */ if (flags & VSW_MACRX_COPYMSG) { mp = copymsgchain(mp); if (mp == NULL) { D1(vswp, "%s:exit\n", __func__); return; } } D2(vswp, "%s: sending up stack", __func__); mpt = NULL; (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); if (mp != NULL) { mac_rx(vswp->if_mh, mrh, mp); } D1(vswp, "%s:exit\n", __func__); } /* copy mac address of vsw into soft state structure */ static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) { int i; WRITE_ENTER(&vswp->if_lockrw); for (i = ETHERADDRL - 1; i >= 0; i--) { vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; macaddr >>= 8; } RW_EXIT(&vswp->if_lockrw); } /* Compare VLAN ids, array size expected to be same. */ static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids) { int i, j; uint16_t vid; for (i = 0; i < nvids; i++) { vid = vids1[i].vl_vid; for (j = 0; j < nvids; j++) { if (vid == vids2[i].vl_vid) break; } if (j == nvids) { return (B_FALSE); } } return (B_TRUE); }