/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Implementation of the mac provider functionality for vnet using the * generic(default) transport layer of sun4v Logical Domain Channels(LDC). */ /* Entry Points */ int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, const uint8_t *macaddr, void **vgenhdl); int vgen_init_mdeg(void *arg); void vgen_uninit(void *arg); int vgen_dds_tx(void *arg, void *dmsg); int vgen_enable_intr(void *arg); int vgen_disable_intr(void *arg); mblk_t *vgen_rx_poll(void *arg, int bytes_to_pickup); static int vgen_start(void *arg); static void vgen_stop(void *arg); static mblk_t *vgen_tx(void *arg, mblk_t *mp); static int vgen_multicst(void *arg, boolean_t add, const uint8_t *mca); static int vgen_promisc(void *arg, boolean_t on); static int vgen_unicst(void *arg, const uint8_t *mca); static int vgen_stat(void *arg, uint_t stat, uint64_t *val); static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp); #ifdef VNET_IOC_DEBUG static int vgen_force_link_state(vgen_port_t *portp, int link_state); #endif /* Port/LDC Configuration */ static int vgen_read_mdprops(vgen_t *vgenp); static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node); static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu); static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, boolean_t *pls); static void vgen_detach_ports(vgen_t *vgenp); static void vgen_port_detach(vgen_port_t *portp); static void vgen_port_list_insert(vgen_port_t *portp); static void vgen_port_list_remove(vgen_port_t *portp); static vgen_port_t *vgen_port_lookup(vgen_portlist_t *plistp, int port_num); static int vgen_mdeg_reg(vgen_t *vgenp); static void vgen_mdeg_unreg(vgen_t *vgenp); static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp); static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp); static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex); static int vgen_port_attach(vgen_port_t *portp); static void vgen_port_detach_mdeg(vgen_port_t *portp); static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex); static uint64_t vgen_port_stat(vgen_port_t *portp, uint_t stat); static void vgen_port_reset(vgen_port_t *portp); static void vgen_reset_vsw_port(vgen_t *vgenp); static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller); static void vgen_ldc_up(vgen_ldc_t *ldcp); static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id); static void vgen_ldc_detach(vgen_ldc_t *ldcp); static void vgen_port_init(vgen_port_t *portp); static void vgen_port_uninit(vgen_port_t *portp); static int vgen_ldc_init(vgen_ldc_t *ldcp); static void vgen_ldc_uninit(vgen_ldc_t *ldcp); static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat); /* I/O Processing */ static int vgen_portsend(vgen_port_t *portp, mblk_t *mp); static int vgen_ldcsend(void *arg, mblk_t *mp); static void vgen_ldcsend_pkt(void *arg, mblk_t *mp); static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg); static void vgen_tx_watchdog(void *arg); /* Dring Configuration */ static int vgen_create_dring(vgen_ldc_t *ldcp); static void vgen_destroy_dring(vgen_ldc_t *ldcp); static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt); static void vgen_unmap_dring(vgen_ldc_t *ldcp); /* VIO Message Processing */ static int vgen_handshake(vgen_ldc_t *ldcp); static int vgen_handshake_done(vgen_ldc_t *ldcp); static vgen_ldc_t *vh_nextphase(vgen_ldc_t *ldcp); static int vgen_handshake_phase2(vgen_ldc_t *ldcp); static int vgen_handshake_phase3(vgen_ldc_t *ldcp); static void vgen_setup_handshake_params(vgen_ldc_t *ldcp); static int vgen_send_version_negotiate(vgen_ldc_t *ldcp); static int vgen_send_attr_info(vgen_ldc_t *ldcp); static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp); static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp); static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg, uint8_t option); static int vgen_send_rdx_info(vgen_ldc_t *ldcp); static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end); static int vgen_send_mcast_info(vgen_ldc_t *ldcp); static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg); static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg); static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen); static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen); static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static void vgen_handle_evt_up(vgen_ldc_t *ldcp); static int vgen_process_reset(vgen_ldc_t *ldcp, int flags); static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static void vgen_hwatchdog(void *arg); static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp); static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp); static void vgen_link_update(vgen_t *vgenp, link_state_t link_state); /* VLANs */ static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, uint16_t *default_idp); static void vgen_vlan_create_hash(vgen_port_t *portp); static void vgen_vlan_destroy_hash(vgen_port_t *portp); static void vgen_vlan_add_ids(vgen_port_t *portp); static void vgen_vlan_remove_ids(vgen_port_t *portp); static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp); static mblk_t *vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged, uint16_t vid); static void vgen_vlan_unaware_port_reset(vgen_port_t *portp); static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp); /* Exported functions */ int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller); int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller); void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen); void vgen_destroy_rxpools(void *arg); /* Externs */ extern void vnet_dds_rx(void *arg, void *dmsg); extern void vnet_dds_cleanup_hio(vnet_t *vnetp); extern int vnet_mtu_update(vnet_t *vnetp, uint32_t mtu); extern void vnet_link_update(vnet_t *vnetp, link_state_t link_state); extern int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, boolean_t caller_holds_lock); extern void vgen_stop_msg_thread(vgen_ldc_t *ldcp); extern int vgen_create_tx_dring(vgen_ldc_t *ldcp); extern void vgen_destroy_tx_dring(vgen_ldc_t *ldcp); extern int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt); extern void vgen_unmap_rx_dring(vgen_ldc_t *ldcp); extern int vgen_create_rx_dring(vgen_ldc_t *ldcp); extern void vgen_destroy_rx_dring(vgen_ldc_t *ldcp); extern int vgen_map_tx_dring(vgen_ldc_t *ldcp, void *pkt); extern void vgen_unmap_tx_dring(vgen_ldc_t *ldcp); extern int vgen_map_data(vgen_ldc_t *ldcp, void *pkt); extern int vgen_handle_dringdata_shm(void *arg1, void *arg2); extern int vgen_handle_dringdata(void *arg1, void *arg2); extern int vgen_dringsend_shm(void *arg, mblk_t *mp); extern int vgen_dringsend(void *arg, mblk_t *mp); extern void vgen_ldc_msg_worker(void *arg); extern int vgen_send_dringack_shm(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, int32_t end, uint8_t pstate); extern mblk_t *vgen_poll_rcv_shm(vgen_ldc_t *ldcp, int bytes_to_pickup); extern mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup); extern int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); #define VGEN_PRI_ETH_DEFINED(vgenp) ((vgenp)->pri_num_types != 0) #define LDC_LOCK(ldcp) \ mutex_enter(&((ldcp)->cblock));\ mutex_enter(&((ldcp)->rxlock));\ mutex_enter(&((ldcp)->wrlock));\ mutex_enter(&((ldcp)->txlock));\ mutex_enter(&((ldcp)->tclock)); #define LDC_UNLOCK(ldcp) \ mutex_exit(&((ldcp)->tclock));\ mutex_exit(&((ldcp)->txlock));\ mutex_exit(&((ldcp)->wrlock));\ mutex_exit(&((ldcp)->rxlock));\ mutex_exit(&((ldcp)->cblock)); #define VGEN_VER_EQ(ldcp, major, minor) \ ((ldcp)->local_hparams.ver_major == (major) && \ (ldcp)->local_hparams.ver_minor == (minor)) #define VGEN_VER_LT(ldcp, major, minor) \ (((ldcp)->local_hparams.ver_major < (major)) || \ ((ldcp)->local_hparams.ver_major == (major) && \ (ldcp)->local_hparams.ver_minor < (minor))) #define VGEN_VER_GTEQ(ldcp, major, minor) \ (((ldcp)->local_hparams.ver_major > (major)) || \ ((ldcp)->local_hparams.ver_major == (major) && \ (ldcp)->local_hparams.ver_minor >= (minor))) /* * Property names */ static char macaddr_propname[] = "mac-address"; static char rmacaddr_propname[] = "remote-mac-address"; static char channel_propname[] = "channel-endpoint"; static char reg_propname[] = "reg"; static char port_propname[] = "port"; static char swport_propname[] = "switch-port"; static char id_propname[] = "id"; static char vdev_propname[] = "virtual-device"; static char vnet_propname[] = "network"; static char pri_types_propname[] = "priority-ether-types"; static char vgen_pvid_propname[] = "port-vlan-id"; static char vgen_vid_propname[] = "vlan-id"; static char vgen_dvid_propname[] = "default-vlan-id"; static char port_pvid_propname[] = "remote-port-vlan-id"; static char port_vid_propname[] = "remote-vlan-id"; static char vgen_mtu_propname[] = "mtu"; static char vgen_linkprop_propname[] = "linkprop"; /* * VIO Protocol Version Info: * * The version specified below represents the version of protocol currently * supported in the driver. It means the driver can negotiate with peers with * versions <= this version. Here is a summary of the feature(s) that are * supported at each version of the protocol: * * 1.0 Basic VIO protocol. * 1.1 vDisk protocol update (no virtual network update). * 1.2 Support for priority frames (priority-ether-types). * 1.3 VLAN and HybridIO support. * 1.4 Jumbo Frame support. * 1.5 Link State Notification support with optional support * for Physical Link information. * 1.6 Support for RxDringData mode. */ static vgen_ver_t vgen_versions[VGEN_NUM_VER] = { {1, 6} }; /* Tunables */ uint32_t vgen_hwd_interval = 5; /* handshake watchdog freq in sec */ uint32_t vgen_ldcwr_retries = 10; /* max # of ldc_write() retries */ uint32_t vgen_ldcup_retries = 5; /* max # of ldc_up() retries */ uint32_t vgen_ldccl_retries = 5; /* max # of ldc_close() retries */ uint32_t vgen_tx_delay = 0x30; /* delay when tx descr not available */ uint32_t vgen_ldc_mtu = VGEN_LDC_MTU; /* ldc mtu */ uint32_t vgen_txwd_interval = VGEN_TXWD_INTERVAL; /* watchdog freq in msec */ uint32_t vgen_txwd_timeout = VGEN_TXWD_TIMEOUT; /* tx timeout in msec */ /* * Max # of channel resets allowed during handshake. */ uint32_t vgen_ldc_max_resets = 5; /* * We provide a tunable to enable RxDringData mode for versions >= 1.6. By * default, this tunable is set to 1 (VIO_TX_DRING). To enable RxDringData mode * set this tunable to 4 (VIO_RX_DRING_DATA). * See comments in vsw.c for details on the dring modes supported. */ uint8_t vgen_dring_mode = VIO_TX_DRING; /* * In RxDringData mode, # of buffers is determined by multiplying the # of * descriptors with the factor below. Note that the factor must be > 1; i.e, * the # of buffers must always be > # of descriptors. This is needed because, * while the shared memory buffers are sent up the stack on the receiver, the * sender needs additional buffers that can be used for further transmits. * See vgen_create_rx_dring() for details. */ uint32_t vgen_nrbufs_factor = 2; /* * Retry delay used while destroying rx mblk pools. Used in both Dring modes. */ int vgen_rxpool_cleanup_delay = 100000; /* 100ms */ /* * Delay when rx descr not ready; used in TxDring mode only. */ uint32_t vgen_recv_delay = 1; /* * Retry when rx descr not ready; used in TxDring mode only. */ uint32_t vgen_recv_retries = 10; /* * Max # of packets accumulated prior to sending them up. It is best * to keep this at 60% of the number of receive buffers. Used in TxDring mode * by the msg worker thread. Used in RxDringData mode while in interrupt mode * (not used in polled mode). */ uint32_t vgen_chain_len = (VGEN_NRBUFS * 0.6); /* * Internal tunables for receive buffer pools, that is, the size and number of * mblks for each pool. At least 3 sizes must be specified if these are used. * The sizes must be specified in increasing order. Non-zero value of the first * size will be used as a hint to use these values instead of the algorithm * that determines the sizes based on MTU. Used in TxDring mode only. */ uint32_t vgen_rbufsz1 = 0; uint32_t vgen_rbufsz2 = 0; uint32_t vgen_rbufsz3 = 0; uint32_t vgen_rbufsz4 = 0; uint32_t vgen_nrbufs1 = VGEN_NRBUFS; uint32_t vgen_nrbufs2 = VGEN_NRBUFS; uint32_t vgen_nrbufs3 = VGEN_NRBUFS; uint32_t vgen_nrbufs4 = VGEN_NRBUFS; /* * In the absence of "priority-ether-types" property in MD, the following * internal tunable can be set to specify a single priority ethertype. */ uint64_t vgen_pri_eth_type = 0; /* * Number of transmit priority buffers that are preallocated per device. * This number is chosen to be a small value to throttle transmission * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). */ uint32_t vgen_pri_tx_nmblks = 64; uint32_t vgen_vlan_nchains = 4; /* # of chains in vlan id hash table */ /* * Matching criteria passed to the MDEG to register interest * in changes to 'virtual-device' nodes (i.e. vnet nodes) identified * by their 'name' and 'cfg-handle' properties. */ static md_prop_match_t vdev_prop_match[] = { { MDET_PROP_STR, "name" }, { MDET_PROP_VAL, "cfg-handle" }, { MDET_LIST_END, NULL } }; static mdeg_node_match_t vdev_match = { "virtual-device", vdev_prop_match }; /* MD update matching structure */ static md_prop_match_t vport_prop_match[] = { { MDET_PROP_VAL, "id" }, { MDET_LIST_END, NULL } }; static mdeg_node_match_t vport_match = { "virtual-device-port", vport_prop_match }; /* Template for matching a particular vnet instance */ static mdeg_prop_spec_t vgen_prop_template[] = { { MDET_PROP_STR, "name", "network" }, { MDET_PROP_VAL, "cfg-handle", NULL }, { MDET_LIST_END, NULL, NULL } }; #define VGEN_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val) static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp); #ifdef VNET_IOC_DEBUG #define VGEN_M_CALLBACK_FLAGS (MC_IOCTL) #else #define VGEN_M_CALLBACK_FLAGS (0) #endif static mac_callbacks_t vgen_m_callbacks = { VGEN_M_CALLBACK_FLAGS, vgen_stat, vgen_start, vgen_stop, vgen_promisc, vgen_multicst, vgen_unicst, vgen_tx, NULL, vgen_ioctl, NULL, NULL }; /* Externs */ extern pri_t maxclsyspri; extern proc_t p0; extern uint32_t vnet_ethermtu; extern uint16_t vnet_default_vlan_id; #ifdef DEBUG #define DEBUG_PRINTF vgen_debug_printf extern int vnet_dbglevel; void vgen_debug_printf(const char *fname, vgen_t *vgenp, vgen_ldc_t *ldcp, const char *fmt, ...); /* -1 for all LDCs info, or ldc_id for a specific LDC info */ int vgendbg_ldcid = -1; /* Flags to simulate error conditions for debugging */ int vgen_inject_err_flag = 0; boolean_t vgen_inject_error(vgen_ldc_t *ldcp, int error) { if ((vgendbg_ldcid == ldcp->ldc_id) && (vgen_inject_err_flag & error)) { return (B_TRUE); } return (B_FALSE); } #endif /* * vgen_init() is called by an instance of vnet driver to initialize the * corresponding generic transport layer. This layer uses Logical Domain * Channels (LDCs) to communicate with the virtual switch in the service domain * and also with peer vnets in other guest domains in the system. * * Arguments: * vnetp: an opaque pointer to the vnet instance * regprop: frame to be transmitted * vnetdip: dip of the vnet device * macaddr: mac address of the vnet device * * Returns: * Sucess: a handle to the vgen instance (vgen_t) * Failure: NULL */ int vgen_init(void *vnetp, uint64_t regprop, dev_info_t *vnetdip, const uint8_t *macaddr, void **vgenhdl) { vgen_t *vgenp; int instance; int rv; char qname[TASKQ_NAMELEN]; if ((vnetp == NULL) || (vnetdip == NULL)) return (DDI_FAILURE); instance = ddi_get_instance(vnetdip); DBG1(NULL, NULL, "vnet(%d): enter\n", instance); vgenp = kmem_zalloc(sizeof (vgen_t), KM_SLEEP); vgenp->vnetp = vnetp; vgenp->instance = instance; vgenp->regprop = regprop; vgenp->vnetdip = vnetdip; bcopy(macaddr, &(vgenp->macaddr), ETHERADDRL); vgenp->phys_link_state = LINK_STATE_UNKNOWN; /* allocate multicast table */ vgenp->mctab = kmem_zalloc(VGEN_INIT_MCTAB_SIZE * sizeof (struct ether_addr), KM_SLEEP); vgenp->mccount = 0; vgenp->mcsize = VGEN_INIT_MCTAB_SIZE; mutex_init(&vgenp->lock, NULL, MUTEX_DRIVER, NULL); rw_init(&vgenp->vgenports.rwlock, NULL, RW_DRIVER, NULL); (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d", instance); if ((vgenp->rxp_taskq = ddi_taskq_create(vnetdip, qname, 1, TASKQ_DEFAULTPRI, 0)) == NULL) { cmn_err(CE_WARN, "!vnet%d: Unable to create rx pool task queue", instance); goto vgen_init_fail; } rv = vgen_read_mdprops(vgenp); if (rv != 0) { goto vgen_init_fail; } *vgenhdl = (void *)vgenp; DBG1(NULL, NULL, "vnet(%d): exit\n", instance); return (DDI_SUCCESS); vgen_init_fail: rw_destroy(&vgenp->vgenports.rwlock); mutex_destroy(&vgenp->lock); kmem_free(vgenp->mctab, VGEN_INIT_MCTAB_SIZE * sizeof (struct ether_addr)); if (VGEN_PRI_ETH_DEFINED(vgenp)) { kmem_free(vgenp->pri_types, sizeof (uint16_t) * vgenp->pri_num_types); (void) vio_destroy_mblks(vgenp->pri_tx_vmp); } if (vgenp->rxp_taskq != NULL) { ddi_taskq_destroy(vgenp->rxp_taskq); vgenp->rxp_taskq = NULL; } KMEM_FREE(vgenp); return (DDI_FAILURE); } int vgen_init_mdeg(void *arg) { vgen_t *vgenp = (vgen_t *)arg; /* register with MD event generator */ return (vgen_mdeg_reg(vgenp)); } /* * Called by vnet to undo the initializations done by vgen_init(). * The handle provided by generic transport during vgen_init() is the argument. */ void vgen_uninit(void *arg) { vgen_t *vgenp = (vgen_t *)arg; if (vgenp == NULL) { return; } DBG1(vgenp, NULL, "enter\n"); /* Unregister with MD event generator */ vgen_mdeg_unreg(vgenp); mutex_enter(&vgenp->lock); /* * Detach all ports from the device; note that the device should have * been unplumbed by this time (See vnet_unattach() for the sequence) * and thus vgen_stop() has already been invoked on all the ports. */ vgen_detach_ports(vgenp); /* * We now destroy the taskq used to clean up rx mblk pools that * couldn't be destroyed when the ports/channels were detached. * We implicitly wait for those tasks to complete in * ddi_taskq_destroy(). */ if (vgenp->rxp_taskq != NULL) { ddi_taskq_destroy(vgenp->rxp_taskq); vgenp->rxp_taskq = NULL; } /* Free multicast table */ kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr)); /* Free pri_types table */ if (VGEN_PRI_ETH_DEFINED(vgenp)) { kmem_free(vgenp->pri_types, sizeof (uint16_t) * vgenp->pri_num_types); (void) vio_destroy_mblks(vgenp->pri_tx_vmp); } mutex_exit(&vgenp->lock); rw_destroy(&vgenp->vgenports.rwlock); mutex_destroy(&vgenp->lock); DBG1(vgenp, NULL, "exit\n"); KMEM_FREE(vgenp); } /* enable transmit/receive for the device */ int vgen_start(void *arg) { vgen_port_t *portp = (vgen_port_t *)arg; vgen_t *vgenp = portp->vgenp; DBG1(vgenp, NULL, "enter\n"); mutex_enter(&portp->lock); vgen_port_init(portp); portp->flags |= VGEN_STARTED; mutex_exit(&portp->lock); DBG1(vgenp, NULL, "exit\n"); return (DDI_SUCCESS); } /* stop transmit/receive */ void vgen_stop(void *arg) { vgen_port_t *portp = (vgen_port_t *)arg; vgen_t *vgenp = portp->vgenp; DBG1(vgenp, NULL, "enter\n"); mutex_enter(&portp->lock); if (portp->flags & VGEN_STARTED) { vgen_port_uninit(portp); portp->flags &= ~(VGEN_STARTED); } mutex_exit(&portp->lock); DBG1(vgenp, NULL, "exit\n"); } /* vgen transmit function */ static mblk_t * vgen_tx(void *arg, mblk_t *mp) { vgen_port_t *portp; int status; portp = (vgen_port_t *)arg; status = vgen_portsend(portp, mp); if (status != VGEN_SUCCESS) { /* failure */ return (mp); } /* success */ return (NULL); } /* * This function provides any necessary tagging/untagging of the frames * that are being transmitted over the port. It first verifies the vlan * membership of the destination(port) and drops the packet if the * destination doesn't belong to the given vlan. * * Arguments: * portp: port over which the frames should be transmitted * mp: frame to be transmitted * is_tagged: * B_TRUE: indicates frame header contains the vlan tag already. * B_FALSE: indicates frame is untagged. * vid: vlan in which the frame should be transmitted. * * Returns: * Sucess: frame(mblk_t *) after doing the necessary tag/untag. * Failure: NULL */ static mblk_t * vgen_vlan_frame_fixtag(vgen_port_t *portp, mblk_t *mp, boolean_t is_tagged, uint16_t vid) { vgen_t *vgenp; boolean_t dst_tagged; int rv; vgenp = portp->vgenp; /* * If the packet is going to a vnet: * Check if the destination vnet is in the same vlan. * Check the frame header if tag or untag is needed. * * We do not check the above conditions if the packet is going to vsw: * vsw must be present implicitly in all the vlans that a vnet device * is configured into; even if vsw itself is not assigned to those * vlans as an interface. For instance, the packet might be destined * to another vnet(indirectly through vsw) or to an external host * which is in the same vlan as this vnet and vsw itself may not be * present in that vlan. Similarly packets going to vsw must be * always tagged(unless in the default-vlan) if not already tagged, * as we do not know the final destination. This is needed because * vsw must always invoke its switching function only after tagging * the packet; otherwise after switching function determines the * destination we cannot figure out if the destination belongs to the * the same vlan that the frame originated from and if it needs tag/ * untag. Note that vsw will tag the packet itself when it receives * it over the channel from a client if needed. However, that is * needed only in the case of vlan unaware clients such as obp or * earlier versions of vnet. * */ if (portp != vgenp->vsw_portp) { /* * Packet going to a vnet. Check if the destination vnet is in * the same vlan. Then check the frame header if tag/untag is * needed. */ rv = vgen_vlan_lookup(portp->vlan_hashp, vid); if (rv == B_FALSE) { /* drop the packet */ freemsg(mp); return (NULL); } /* is the destination tagged or untagged in this vlan? */ (vid == portp->pvid) ? (dst_tagged = B_FALSE) : (dst_tagged = B_TRUE); if (is_tagged == dst_tagged) { /* no tagging/untagging needed */ return (mp); } if (is_tagged == B_TRUE) { /* frame is tagged; destination needs untagged */ mp = vnet_vlan_remove_tag(mp); return (mp); } /* (is_tagged == B_FALSE): fallthru to tag tx packet: */ } /* * Packet going to a vnet needs tagging. * OR * If the packet is going to vsw, then it must be tagged in all cases: * unknown unicast, broadcast/multicast or to vsw interface. */ if (is_tagged == B_FALSE) { mp = vnet_vlan_insert_tag(mp, vid); } return (mp); } /* transmit packets over the given port */ static int vgen_portsend(vgen_port_t *portp, mblk_t *mp) { vgen_ldc_t *ldcp; int status; int rv = VGEN_SUCCESS; vgen_t *vgenp = portp->vgenp; vnet_t *vnetp = vgenp->vnetp; boolean_t is_tagged; boolean_t dec_refcnt = B_FALSE; uint16_t vlan_id; struct ether_header *ehp; if (portp == NULL) { return (VGEN_FAILURE); } if (portp->use_vsw_port) { (void) atomic_inc_32(&vgenp->vsw_port_refcnt); portp = portp->vgenp->vsw_portp; ASSERT(portp != NULL); dec_refcnt = B_TRUE; } /* * Determine the vlan id that the frame belongs to. */ ehp = (struct ether_header *)mp->b_rptr; is_tagged = vgen_frame_lookup_vid(vnetp, ehp, &vlan_id); if (vlan_id == vnetp->default_vlan_id) { /* Frames in default vlan must be untagged */ ASSERT(is_tagged == B_FALSE); /* * If the destination is a vnet-port verify it belongs to the * default vlan; otherwise drop the packet. We do not need * this check for vsw-port, as it should implicitly belong to * this vlan; see comments in vgen_vlan_frame_fixtag(). */ if (portp != vgenp->vsw_portp && portp->pvid != vnetp->default_vlan_id) { freemsg(mp); goto portsend_ret; } } else { /* frame not in default-vlan */ mp = vgen_vlan_frame_fixtag(portp, mp, is_tagged, vlan_id); if (mp == NULL) { goto portsend_ret; } } ldcp = portp->ldcp; status = ldcp->tx(ldcp, mp); if (status != VGEN_TX_SUCCESS) { rv = VGEN_FAILURE; } portsend_ret: if (dec_refcnt == B_TRUE) { (void) atomic_dec_32(&vgenp->vsw_port_refcnt); } return (rv); } /* * Wrapper function to transmit normal and/or priority frames over the channel. */ static int vgen_ldcsend(void *arg, mblk_t *mp) { vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; int status; struct ether_header *ehp; vgen_t *vgenp = LDC_TO_VGEN(ldcp); uint32_t num_types; uint16_t *types; int i; ASSERT(VGEN_PRI_ETH_DEFINED(vgenp)); num_types = vgenp->pri_num_types; types = vgenp->pri_types; ehp = (struct ether_header *)mp->b_rptr; for (i = 0; i < num_types; i++) { if (ehp->ether_type == types[i]) { /* priority frame, use pri tx function */ vgen_ldcsend_pkt(ldcp, mp); return (VGEN_SUCCESS); } } if (ldcp->tx_dringdata == NULL) { freemsg(mp); return (VGEN_SUCCESS); } status = ldcp->tx_dringdata(ldcp, mp); return (status); } /* * This function transmits the frame in the payload of a raw data * (VIO_PKT_DATA) message. Thus, it provides an Out-Of-Band path to * send special frames with high priorities, without going through * the normal data path which uses descriptor ring mechanism. */ static void vgen_ldcsend_pkt(void *arg, mblk_t *mp) { vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; vio_raw_data_msg_t *pkt; mblk_t *bp; mblk_t *nmp = NULL; vio_mblk_t *vmp; caddr_t dst; uint32_t mblksz; uint32_t size; uint32_t nbytes; int rv; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_stats_t *statsp = &ldcp->stats; /* drop the packet if ldc is not up or handshake is not done */ if (ldcp->ldc_status != LDC_UP) { (void) atomic_inc_32(&statsp->tx_pri_fail); DWARN(vgenp, ldcp, "status(%d), dropping packet\n", ldcp->ldc_status); goto send_pkt_exit; } if (ldcp->hphase != VH_DONE) { (void) atomic_inc_32(&statsp->tx_pri_fail); DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", ldcp->hphase); goto send_pkt_exit; } size = msgsize(mp); /* frame size bigger than available payload len of raw data msg ? */ if (size > (size_t)(ldcp->msglen - VIO_PKT_DATA_HDRSIZE)) { (void) atomic_inc_32(&statsp->tx_pri_fail); DWARN(vgenp, ldcp, "invalid size(%d)\n", size); goto send_pkt_exit; } if (size < ETHERMIN) size = ETHERMIN; /* alloc space for a raw data message */ vmp = vio_allocb(vgenp->pri_tx_vmp); if (vmp == NULL) { (void) atomic_inc_32(&statsp->tx_pri_fail); DWARN(vgenp, ldcp, "vio_allocb failed\n"); goto send_pkt_exit; } else { nmp = vmp->mp; } pkt = (vio_raw_data_msg_t *)nmp->b_rptr; /* copy frame into the payload of raw data message */ dst = (caddr_t)pkt->data; for (bp = mp; bp != NULL; bp = bp->b_cont) { mblksz = MBLKL(bp); bcopy(bp->b_rptr, dst, mblksz); dst += mblksz; } vmp->state = VIO_MBLK_HAS_DATA; /* setup the raw data msg */ pkt->tag.vio_msgtype = VIO_TYPE_DATA; pkt->tag.vio_subtype = VIO_SUBTYPE_INFO; pkt->tag.vio_subtype_env = VIO_PKT_DATA; pkt->tag.vio_sid = ldcp->local_sid; nbytes = VIO_PKT_DATA_HDRSIZE + size; /* send the msg over ldc */ rv = vgen_sendmsg(ldcp, (caddr_t)pkt, nbytes, B_FALSE); if (rv != VGEN_SUCCESS) { (void) atomic_inc_32(&statsp->tx_pri_fail); DWARN(vgenp, ldcp, "Error sending priority frame\n"); if (rv == ECONNRESET) { (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER); } goto send_pkt_exit; } /* update stats */ (void) atomic_inc_64(&statsp->tx_pri_packets); (void) atomic_add_64(&statsp->tx_pri_bytes, size); send_pkt_exit: if (nmp != NULL) freemsg(nmp); freemsg(mp); } /* * enable/disable a multicast address * note that the cblock of the ldc channel connected to the vsw is used for * synchronization of the mctab. */ int vgen_multicst(void *arg, boolean_t add, const uint8_t *mca) { vgen_t *vgenp; vnet_mcast_msg_t mcastmsg; vio_msg_tag_t *tagp; vgen_port_t *portp; vgen_ldc_t *ldcp; struct ether_addr *addrp; int rv = DDI_FAILURE; uint32_t i; portp = (vgen_port_t *)arg; vgenp = portp->vgenp; if (portp->is_vsw_port != B_TRUE) { return (DDI_SUCCESS); } addrp = (struct ether_addr *)mca; tagp = &mcastmsg.tag; bzero(&mcastmsg, sizeof (mcastmsg)); ldcp = portp->ldcp; if (ldcp == NULL) { return (DDI_FAILURE); } mutex_enter(&ldcp->cblock); if (ldcp->hphase == VH_DONE) { /* * If handshake is done, send a msg to vsw to add/remove * the multicast address. Otherwise, we just update this * mcast address in our table and the table will be sync'd * with vsw when handshake completes. */ tagp->vio_msgtype = VIO_TYPE_CTRL; tagp->vio_subtype = VIO_SUBTYPE_INFO; tagp->vio_subtype_env = VNET_MCAST_INFO; tagp->vio_sid = ldcp->local_sid; bcopy(mca, &(mcastmsg.mca), ETHERADDRL); mcastmsg.set = add; mcastmsg.count = 1; if (vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (mcastmsg), B_FALSE) != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); rv = DDI_FAILURE; goto vgen_mcast_exit; } } if (add) { /* expand multicast table if necessary */ if (vgenp->mccount >= vgenp->mcsize) { struct ether_addr *newtab; uint32_t newsize; newsize = vgenp->mcsize * 2; newtab = kmem_zalloc(newsize * sizeof (struct ether_addr), KM_NOSLEEP); if (newtab == NULL) goto vgen_mcast_exit; bcopy(vgenp->mctab, newtab, vgenp->mcsize * sizeof (struct ether_addr)); kmem_free(vgenp->mctab, vgenp->mcsize * sizeof (struct ether_addr)); vgenp->mctab = newtab; vgenp->mcsize = newsize; } /* add address to the table */ vgenp->mctab[vgenp->mccount++] = *addrp; } else { /* delete address from the table */ for (i = 0; i < vgenp->mccount; i++) { if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) { /* * If there's more than one address in this * table, delete the unwanted one by moving * the last one in the list over top of it; * otherwise, just remove it. */ if (vgenp->mccount > 1) { vgenp->mctab[i] = vgenp->mctab[vgenp->mccount-1]; } vgenp->mccount--; break; } } } rv = DDI_SUCCESS; vgen_mcast_exit: mutex_exit(&ldcp->cblock); return (rv); } /* set or clear promiscuous mode on the device */ static int vgen_promisc(void *arg, boolean_t on) { _NOTE(ARGUNUSED(arg, on)) return (DDI_SUCCESS); } /* set the unicast mac address of the device */ static int vgen_unicst(void *arg, const uint8_t *mca) { _NOTE(ARGUNUSED(arg, mca)) return (DDI_SUCCESS); } /* get device statistics */ int vgen_stat(void *arg, uint_t stat, uint64_t *val) { vgen_port_t *portp = (vgen_port_t *)arg; *val = vgen_port_stat(portp, stat); return (0); } /* vgen internal functions */ /* detach all ports from the device */ static void vgen_detach_ports(vgen_t *vgenp) { vgen_port_t *portp; vgen_portlist_t *plistp; plistp = &(vgenp->vgenports); WRITE_ENTER(&plistp->rwlock); while ((portp = plistp->headp) != NULL) { vgen_port_detach(portp); } RW_EXIT(&plistp->rwlock); } /* * detach the given port. */ static void vgen_port_detach(vgen_port_t *portp) { vgen_t *vgenp; int port_num; vgenp = portp->vgenp; port_num = portp->port_num; DBG1(vgenp, NULL, "port(%d):enter\n", port_num); /* * If this port is connected to the vswitch, then * potentially there could be ports that may be using * this port to transmit packets. To address this do * the following: * - First set vgenp->vsw_portp to NULL, so that * its not used after that. * - Then wait for the refcnt to go down to 0. * - Now we can safely detach this port. */ if (vgenp->vsw_portp == portp) { vgenp->vsw_portp = NULL; while (vgenp->vsw_port_refcnt > 0) { delay(drv_usectohz(vgen_tx_delay)); } (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0); } if (portp->vhp != NULL) { vio_net_resource_unreg(portp->vhp); portp->vhp = NULL; } vgen_vlan_destroy_hash(portp); /* remove it from port list */ vgen_port_list_remove(portp); /* detach channels from this port */ vgen_ldc_detach(portp->ldcp); if (portp->num_ldcs != 0) { kmem_free(portp->ldc_ids, portp->num_ldcs * sizeof (uint64_t)); portp->num_ldcs = 0; } mutex_destroy(&portp->lock); KMEM_FREE(portp); DBG1(vgenp, NULL, "port(%d):exit\n", port_num); } /* add a port to port list */ static void vgen_port_list_insert(vgen_port_t *portp) { vgen_portlist_t *plistp; vgen_t *vgenp; vgenp = portp->vgenp; plistp = &(vgenp->vgenports); if (plistp->headp == NULL) { plistp->headp = portp; } else { plistp->tailp->nextp = portp; } plistp->tailp = portp; portp->nextp = NULL; } /* remove a port from port list */ static void vgen_port_list_remove(vgen_port_t *portp) { vgen_port_t *prevp; vgen_port_t *nextp; vgen_portlist_t *plistp; vgen_t *vgenp; vgenp = portp->vgenp; plistp = &(vgenp->vgenports); if (plistp->headp == NULL) return; if (portp == plistp->headp) { plistp->headp = portp->nextp; if (portp == plistp->tailp) plistp->tailp = plistp->headp; } else { for (prevp = plistp->headp; ((nextp = prevp->nextp) != NULL) && (nextp != portp); prevp = nextp) ; if (nextp == portp) { prevp->nextp = portp->nextp; } if (portp == plistp->tailp) plistp->tailp = prevp; } } /* lookup a port in the list based on port_num */ static vgen_port_t * vgen_port_lookup(vgen_portlist_t *plistp, int port_num) { vgen_port_t *portp = NULL; for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { if (portp->port_num == port_num) { break; } } return (portp); } static void vgen_port_init(vgen_port_t *portp) { /* Add the port to the specified vlans */ vgen_vlan_add_ids(portp); /* Bring up the channel */ (void) vgen_ldc_init(portp->ldcp); } static void vgen_port_uninit(vgen_port_t *portp) { vgen_ldc_uninit(portp->ldcp); /* remove the port from vlans it has been assigned to */ vgen_vlan_remove_ids(portp); } /* * Scan the machine description for this instance of vnet * and read its properties. Called only from vgen_init(). * Returns: 0 on success, 1 on failure. */ static int vgen_read_mdprops(vgen_t *vgenp) { vnet_t *vnetp = vgenp->vnetp; md_t *mdp = NULL; mde_cookie_t rootnode; mde_cookie_t *listp = NULL; uint64_t cfgh; char *name; int rv = 1; int num_nodes = 0; int num_devs = 0; int listsz = 0; int i; if ((mdp = md_get_handle()) == NULL) { return (rv); } num_nodes = md_node_count(mdp); ASSERT(num_nodes > 0); listsz = num_nodes * sizeof (mde_cookie_t); listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); rootnode = md_root_node(mdp); /* search for all "virtual_device" nodes */ num_devs = md_scan_dag(mdp, rootnode, md_find_name(mdp, vdev_propname), md_find_name(mdp, "fwd"), listp); if (num_devs <= 0) { goto vgen_readmd_exit; } /* * Now loop through the list of virtual-devices looking for * devices with name "network" and for each such device compare * its instance with what we have from the 'reg' property to * find the right node in MD and then read all its properties. */ for (i = 0; i < num_devs; i++) { if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { goto vgen_readmd_exit; } /* is this a "network" device? */ if (strcmp(name, vnet_propname) != 0) continue; if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { goto vgen_readmd_exit; } /* is this the required instance of vnet? */ if (vgenp->regprop != cfgh) continue; /* * Read the 'linkprop' property to know if this vnet * device should get physical link updates from vswitch. */ vgen_linkprop_read(vgenp, mdp, listp[i], &vnetp->pls_update); /* * Read the mtu. Note that we set the mtu of vnet device within * this routine itself, after validating the range. */ vgen_mtu_read(vgenp, mdp, listp[i], &vnetp->mtu); if (vnetp->mtu < ETHERMTU || vnetp->mtu > VNET_MAX_MTU) { vnetp->mtu = ETHERMTU; } vgenp->max_frame_size = vnetp->mtu + sizeof (struct ether_header) + VLAN_TAGSZ; /* read priority ether types */ vgen_read_pri_eth_types(vgenp, mdp, listp[i]); /* read vlan id properties of this vnet instance */ vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, listp[i], &vnetp->pvid, &vnetp->vids, &vnetp->nvids, &vnetp->default_vlan_id); rv = 0; break; } vgen_readmd_exit: kmem_free(listp, listsz); (void) md_fini_handle(mdp); return (rv); } /* * Read vlan id properties of the given MD node. * Arguments: * arg: device argument(vnet device or a port) * type: type of arg; VGEN_LOCAL(vnet device) or VGEN_PEER(port) * mdp: machine description * node: md node cookie * * Returns: * pvidp: port-vlan-id of the node * vidspp: list of vlan-ids of the node * nvidsp: # of vlan-ids in the list * default_idp: default-vlan-id of the node(if node is vnet device) */ static void vgen_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, uint16_t *default_idp) { vgen_t *vgenp; vnet_t *vnetp; vgen_port_t *portp; char *pvid_propname; char *vid_propname; uint_t nvids; uint32_t vids_size; int rv; int i; uint64_t *data; uint64_t val; int size; int inst; if (type == VGEN_LOCAL) { vgenp = (vgen_t *)arg; vnetp = vgenp->vnetp; pvid_propname = vgen_pvid_propname; vid_propname = vgen_vid_propname; inst = vnetp->instance; } else if (type == VGEN_PEER) { portp = (vgen_port_t *)arg; vgenp = portp->vgenp; vnetp = vgenp->vnetp; pvid_propname = port_pvid_propname; vid_propname = port_vid_propname; inst = portp->port_num; } else { return; } if (type == VGEN_LOCAL && default_idp != NULL) { rv = md_get_prop_val(mdp, node, vgen_dvid_propname, &val); if (rv != 0) { DWARN(vgenp, NULL, "prop(%s) not found", vgen_dvid_propname); *default_idp = vnet_default_vlan_id; } else { *default_idp = val & 0xFFF; DBG2(vgenp, NULL, "%s(%d): (%d)\n", vgen_dvid_propname, inst, *default_idp); } } rv = md_get_prop_val(mdp, node, pvid_propname, &val); if (rv != 0) { DWARN(vgenp, NULL, "prop(%s) not found", pvid_propname); *pvidp = vnet_default_vlan_id; } else { *pvidp = val & 0xFFF; DBG2(vgenp, NULL, "%s(%d): (%d)\n", pvid_propname, inst, *pvidp); } rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, &size); if (rv != 0) { DBG2(vgenp, NULL, "prop(%s) not found", vid_propname); size = 0; } else { size /= sizeof (uint64_t); } nvids = size; if (nvids != 0) { DBG2(vgenp, NULL, "%s(%d): ", vid_propname, inst); vids_size = sizeof (uint16_t) * nvids; *vidspp = kmem_zalloc(vids_size, KM_SLEEP); for (i = 0; i < nvids; i++) { (*vidspp)[i] = data[i] & 0xFFFF; DBG2(vgenp, NULL, " %d ", (*vidspp)[i]); } DBG2(vgenp, NULL, "\n"); } *nvidsp = nvids; } /* * Create a vlan id hash table for the given port. */ static void vgen_vlan_create_hash(vgen_port_t *portp) { char hashname[MAXNAMELEN]; (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", portp->port_num); portp->vlan_nchains = vgen_vlan_nchains; portp->vlan_hashp = mod_hash_create_idhash(hashname, portp->vlan_nchains, mod_hash_null_valdtor); } /* * Destroy the vlan id hash table in the given port. */ static void vgen_vlan_destroy_hash(vgen_port_t *portp) { if (portp->vlan_hashp != NULL) { mod_hash_destroy_hash(portp->vlan_hashp); portp->vlan_hashp = NULL; portp->vlan_nchains = 0; } } /* * Add a port to the vlans specified in its port properites. */ static void vgen_vlan_add_ids(vgen_port_t *portp) { int rv; int i; rv = mod_hash_insert(portp->vlan_hashp, (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), (mod_hash_val_t)B_TRUE); ASSERT(rv == 0); for (i = 0; i < portp->nvids; i++) { rv = mod_hash_insert(portp->vlan_hashp, (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), (mod_hash_val_t)B_TRUE); ASSERT(rv == 0); } } /* * Remove a port from the vlans it has been assigned to. */ static void vgen_vlan_remove_ids(vgen_port_t *portp) { int rv; int i; mod_hash_val_t vp; rv = mod_hash_remove(portp->vlan_hashp, (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), (mod_hash_val_t *)&vp); ASSERT(rv == 0); for (i = 0; i < portp->nvids; i++) { rv = mod_hash_remove(portp->vlan_hashp, (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), (mod_hash_val_t *)&vp); ASSERT(rv == 0); } } /* * Lookup the vlan id of the given tx frame. If it is a vlan-tagged frame, * then the vlan-id is available in the tag; otherwise, its vlan id is * implicitly obtained from the port-vlan-id of the vnet device. * The vlan id determined is returned in vidp. * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. */ static boolean_t vgen_frame_lookup_vid(vnet_t *vnetp, struct ether_header *ehp, uint16_t *vidp) { struct ether_vlan_header *evhp; /* If it's a tagged frame, get the vlan id from vlan header */ if (ehp->ether_type == ETHERTYPE_VLAN) { evhp = (struct ether_vlan_header *)ehp; *vidp = VLAN_ID(ntohs(evhp->ether_tci)); return (B_TRUE); } /* Untagged frame, vlan-id is the pvid of vnet device */ *vidp = vnetp->pvid; return (B_FALSE); } /* * Find the given vlan id in the hash table. * Return: B_TRUE if the id is found; B_FALSE if not found. */ static boolean_t vgen_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) { int rv; mod_hash_val_t vp; rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); if (rv != 0) return (B_FALSE); return (B_TRUE); } /* * This function reads "priority-ether-types" property from md. This property * is used to enable support for priority frames. Applications which need * guaranteed and timely delivery of certain high priority frames to/from * a vnet or vsw within ldoms, should configure this property by providing * the ether type(s) for which the priority facility is needed. * Normal data frames are delivered over a ldc channel using the descriptor * ring mechanism which is constrained by factors such as descriptor ring size, * the rate at which the ring is processed at the peer ldc end point, etc. * The priority mechanism provides an Out-Of-Band path to send/receive frames * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the * descriptor ring path and enables a more reliable and timely delivery of * frames to the peer. */ static void vgen_read_pri_eth_types(vgen_t *vgenp, md_t *mdp, mde_cookie_t node) { int rv; uint16_t *types; uint64_t *data; int size; int i; size_t mblk_sz; rv = md_get_prop_data(mdp, node, pri_types_propname, (uint8_t **)&data, &size); if (rv != 0) { /* * Property may not exist if we are running pre-ldoms1.1 f/w. * Check if 'vgen_pri_eth_type' has been set in that case. */ if (vgen_pri_eth_type != 0) { size = sizeof (vgen_pri_eth_type); data = &vgen_pri_eth_type; } else { DBG2(vgenp, NULL, "prop(%s) not found", pri_types_propname); size = 0; } } if (size == 0) { vgenp->pri_num_types = 0; return; } /* * we have some priority-ether-types defined; * allocate a table of these types and also * allocate a pool of mblks to transmit these * priority packets. */ size /= sizeof (uint64_t); vgenp->pri_num_types = size; vgenp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); for (i = 0, types = vgenp->pri_types; i < size; i++) { types[i] = data[i] & 0xFFFF; } mblk_sz = (VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size + 7) & ~7; (void) vio_create_mblks(vgen_pri_tx_nmblks, mblk_sz, NULL, &vgenp->pri_tx_vmp); } static void vgen_mtu_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) { int rv; uint64_t val; char *mtu_propname; mtu_propname = vgen_mtu_propname; rv = md_get_prop_val(mdp, node, mtu_propname, &val); if (rv != 0) { DWARN(vgenp, NULL, "prop(%s) not found", mtu_propname); *mtu = vnet_ethermtu; } else { *mtu = val & 0xFFFF; DBG2(vgenp, NULL, "%s(%d): (%d)\n", mtu_propname, vgenp->instance, *mtu); } } static void vgen_linkprop_read(vgen_t *vgenp, md_t *mdp, mde_cookie_t node, boolean_t *pls) { int rv; uint64_t val; char *linkpropname; linkpropname = vgen_linkprop_propname; rv = md_get_prop_val(mdp, node, linkpropname, &val); if (rv != 0) { DWARN(vgenp, NULL, "prop(%s) not found", linkpropname); *pls = B_FALSE; } else { *pls = (val & 0x1) ? B_TRUE : B_FALSE; DBG2(vgenp, NULL, "%s(%d): (%d)\n", linkpropname, vgenp->instance, *pls); } } /* register with MD event generator */ static int vgen_mdeg_reg(vgen_t *vgenp) { mdeg_prop_spec_t *pspecp; mdeg_node_spec_t *parentp; uint_t templatesz; int rv; mdeg_handle_t dev_hdl = NULL; mdeg_handle_t port_hdl = NULL; templatesz = sizeof (vgen_prop_template); pspecp = kmem_zalloc(templatesz, KM_NOSLEEP); if (pspecp == NULL) { return (DDI_FAILURE); } parentp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_NOSLEEP); if (parentp == NULL) { kmem_free(pspecp, templatesz); return (DDI_FAILURE); } bcopy(vgen_prop_template, pspecp, templatesz); /* * NOTE: The instance here refers to the value of "reg" property and * not the dev_info instance (ddi_get_instance()) of vnet. */ VGEN_SET_MDEG_PROP_INST(pspecp, vgenp->regprop); parentp->namep = "virtual-device"; parentp->specp = pspecp; /* save parentp in vgen_t */ vgenp->mdeg_parentp = parentp; /* * Register an interest in 'virtual-device' nodes with a * 'name' property of 'network' */ rv = mdeg_register(parentp, &vdev_match, vgen_mdeg_cb, vgenp, &dev_hdl); if (rv != MDEG_SUCCESS) { DERR(vgenp, NULL, "mdeg_register failed\n"); goto mdeg_reg_fail; } /* Register an interest in 'port' nodes */ rv = mdeg_register(parentp, &vport_match, vgen_mdeg_port_cb, vgenp, &port_hdl); if (rv != MDEG_SUCCESS) { DERR(vgenp, NULL, "mdeg_register failed\n"); goto mdeg_reg_fail; } /* save mdeg handle in vgen_t */ vgenp->mdeg_dev_hdl = dev_hdl; vgenp->mdeg_port_hdl = port_hdl; return (DDI_SUCCESS); mdeg_reg_fail: if (dev_hdl != NULL) { (void) mdeg_unregister(dev_hdl); } KMEM_FREE(parentp); kmem_free(pspecp, templatesz); vgenp->mdeg_parentp = NULL; return (DDI_FAILURE); } /* unregister with MD event generator */ static void vgen_mdeg_unreg(vgen_t *vgenp) { if (vgenp->mdeg_dev_hdl != NULL) { (void) mdeg_unregister(vgenp->mdeg_dev_hdl); vgenp->mdeg_dev_hdl = NULL; } if (vgenp->mdeg_port_hdl != NULL) { (void) mdeg_unregister(vgenp->mdeg_port_hdl); vgenp->mdeg_port_hdl = NULL; } if (vgenp->mdeg_parentp != NULL) { kmem_free(vgenp->mdeg_parentp->specp, sizeof (vgen_prop_template)); KMEM_FREE(vgenp->mdeg_parentp); vgenp->mdeg_parentp = NULL; } } /* mdeg callback function for the port node */ static int vgen_mdeg_port_cb(void *cb_argp, mdeg_result_t *resp) { int idx; int vsw_idx = -1; uint64_t val; vgen_t *vgenp; if ((resp == NULL) || (cb_argp == NULL)) { return (MDEG_FAILURE); } vgenp = (vgen_t *)cb_argp; DBG1(vgenp, NULL, "enter\n"); mutex_enter(&vgenp->lock); DBG1(vgenp, NULL, "ports: removed(%x), " "added(%x), updated(%x)\n", resp->removed.nelem, resp->added.nelem, resp->match_curr.nelem); for (idx = 0; idx < resp->removed.nelem; idx++) { (void) vgen_remove_port(vgenp, resp->removed.mdp, resp->removed.mdep[idx]); } if (vgenp->vsw_portp == NULL) { /* * find vsw_port and add it first, because other ports need * this when adding fdb entry (see vgen_port_init()). */ for (idx = 0; idx < resp->added.nelem; idx++) { if (!(md_get_prop_val(resp->added.mdp, resp->added.mdep[idx], swport_propname, &val))) { if (val == 0) { /* * This port is connected to the * vsw on service domain. */ vsw_idx = idx; if (vgen_add_port(vgenp, resp->added.mdp, resp->added.mdep[idx]) != DDI_SUCCESS) { cmn_err(CE_NOTE, "vnet%d Could " "not initialize virtual " "switch port.", vgenp->instance); mutex_exit(&vgenp->lock); return (MDEG_FAILURE); } break; } } } if (vsw_idx == -1) { DWARN(vgenp, NULL, "can't find vsw_port\n"); mutex_exit(&vgenp->lock); return (MDEG_FAILURE); } } for (idx = 0; idx < resp->added.nelem; idx++) { if ((vsw_idx != -1) && (vsw_idx == idx)) /* skip vsw_port */ continue; /* If this port can't be added just skip it. */ (void) vgen_add_port(vgenp, resp->added.mdp, resp->added.mdep[idx]); } for (idx = 0; idx < resp->match_curr.nelem; idx++) { (void) vgen_update_port(vgenp, resp->match_curr.mdp, resp->match_curr.mdep[idx], resp->match_prev.mdp, resp->match_prev.mdep[idx]); } mutex_exit(&vgenp->lock); DBG1(vgenp, NULL, "exit\n"); return (MDEG_SUCCESS); } /* mdeg callback function for the vnet node */ static int vgen_mdeg_cb(void *cb_argp, mdeg_result_t *resp) { vgen_t *vgenp; vnet_t *vnetp; md_t *mdp; mde_cookie_t node; uint64_t inst; char *node_name = NULL; if ((resp == NULL) || (cb_argp == NULL)) { return (MDEG_FAILURE); } vgenp = (vgen_t *)cb_argp; vnetp = vgenp->vnetp; DBG1(vgenp, NULL, "added %d : removed %d : curr matched %d" " : prev matched %d", resp->added.nelem, resp->removed.nelem, resp->match_curr.nelem, resp->match_prev.nelem); mutex_enter(&vgenp->lock); /* * We get an initial callback for this node as 'added' after * registering with mdeg. Note that we would have already gathered * information about this vnet node by walking MD earlier during attach * (in vgen_read_mdprops()). So, there is a window where the properties * of this node might have changed when we get this initial 'added' * callback. We handle this as if an update occured and invoke the same * function which handles updates to the properties of this vnet-node * if any. A non-zero 'match' value indicates that the MD has been * updated and that a 'network' node is present which may or may not * have been updated. It is up to the clients to examine their own * nodes and determine if they have changed. */ if (resp->added.nelem != 0) { if (resp->added.nelem != 1) { cmn_err(CE_NOTE, "!vnet%d: number of nodes added " "invalid: %d\n", vnetp->instance, resp->added.nelem); goto vgen_mdeg_cb_err; } mdp = resp->added.mdp; node = resp->added.mdep[0]; } else if (resp->match_curr.nelem != 0) { if (resp->match_curr.nelem != 1) { cmn_err(CE_NOTE, "!vnet%d: number of nodes updated " "invalid: %d\n", vnetp->instance, resp->match_curr.nelem); goto vgen_mdeg_cb_err; } mdp = resp->match_curr.mdp; node = resp->match_curr.mdep[0]; } else { goto vgen_mdeg_cb_err; } /* Validate name and instance */ if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { DERR(vgenp, NULL, "unable to get node name\n"); goto vgen_mdeg_cb_err; } /* is this a virtual-network device? */ if (strcmp(node_name, vnet_propname) != 0) { DERR(vgenp, NULL, "%s: Invalid node name: %s\n", node_name); goto vgen_mdeg_cb_err; } if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { DERR(vgenp, NULL, "prop(cfg-handle) not found\n"); goto vgen_mdeg_cb_err; } /* is this the right instance of vnet? */ if (inst != vgenp->regprop) { DERR(vgenp, NULL, "Invalid cfg-handle: %lx\n", inst); goto vgen_mdeg_cb_err; } vgen_update_md_prop(vgenp, mdp, node); mutex_exit(&vgenp->lock); return (MDEG_SUCCESS); vgen_mdeg_cb_err: mutex_exit(&vgenp->lock); return (MDEG_FAILURE); } /* * Check to see if the relevant properties in the specified node have * changed, and if so take the appropriate action. */ static void vgen_update_md_prop(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) { uint16_t pvid; uint16_t *vids; uint16_t nvids; vnet_t *vnetp = vgenp->vnetp; uint32_t mtu; boolean_t pls_update; enum { MD_init = 0x1, MD_vlans = 0x2, MD_mtu = 0x4, MD_pls = 0x8 } updated; int rv; updated = MD_init; /* Read the vlan ids */ vgen_vlan_read_ids(vgenp, VGEN_LOCAL, mdp, mdex, &pvid, &vids, &nvids, NULL); /* Determine if there are any vlan id updates */ if ((pvid != vnetp->pvid) || /* pvid changed? */ (nvids != vnetp->nvids) || /* # of vids changed? */ ((nvids != 0) && (vnetp->nvids != 0) && /* vids changed? */ bcmp(vids, vnetp->vids, sizeof (uint16_t) * nvids))) { updated |= MD_vlans; } /* Read mtu */ vgen_mtu_read(vgenp, mdp, mdex, &mtu); if (mtu != vnetp->mtu) { if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { updated |= MD_mtu; } else { cmn_err(CE_NOTE, "!vnet%d: Unable to process mtu update" " as the specified value:%d is invalid\n", vnetp->instance, mtu); } } /* * Read the 'linkprop' property. */ vgen_linkprop_read(vgenp, mdp, mdex, &pls_update); if (pls_update != vnetp->pls_update) { updated |= MD_pls; } /* Now process the updated props */ if (updated & MD_vlans) { /* save the new vlan ids */ vnetp->pvid = pvid; if (vnetp->nvids != 0) { kmem_free(vnetp->vids, sizeof (uint16_t) * vnetp->nvids); vnetp->nvids = 0; } if (nvids != 0) { vnetp->nvids = nvids; vnetp->vids = vids; } /* reset vlan-unaware peers (ver < 1.3) and restart handshake */ vgen_reset_vlan_unaware_ports(vgenp); } else { if (nvids != 0) { kmem_free(vids, sizeof (uint16_t) * nvids); } } if (updated & MD_mtu) { DBG2(vgenp, NULL, "curr_mtu(%d) new_mtu(%d)\n", vnetp->mtu, mtu); rv = vnet_mtu_update(vnetp, mtu); if (rv == 0) { vgenp->max_frame_size = mtu + sizeof (struct ether_header) + VLAN_TAGSZ; } } if (updated & MD_pls) { /* enable/disable physical link state updates */ vnetp->pls_update = pls_update; mutex_exit(&vgenp->lock); /* reset vsw-port to re-negotiate with the updated prop. */ vgen_reset_vsw_port(vgenp); mutex_enter(&vgenp->lock); } } /* add a new port to the device */ static int vgen_add_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) { vgen_port_t *portp; int rv; portp = kmem_zalloc(sizeof (vgen_port_t), KM_SLEEP); rv = vgen_port_read_props(portp, vgenp, mdp, mdex); if (rv != DDI_SUCCESS) { KMEM_FREE(portp); return (DDI_FAILURE); } rv = vgen_port_attach(portp); if (rv != DDI_SUCCESS) { return (DDI_FAILURE); } return (DDI_SUCCESS); } /* read properties of the port from its md node */ static int vgen_port_read_props(vgen_port_t *portp, vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) { uint64_t port_num; uint64_t *ldc_ids; uint64_t macaddr; uint64_t val; int num_ldcs; int i; int addrsz; int num_nodes = 0; int listsz = 0; mde_cookie_t *listp = NULL; uint8_t *addrp; struct ether_addr ea; /* read "id" property to get the port number */ if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) { DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); return (DDI_FAILURE); } /* * Find the channel endpoint node(s) under this port node. */ if ((num_nodes = md_node_count(mdp)) <= 0) { DWARN(vgenp, NULL, "invalid number of nodes found (%d)", num_nodes); return (DDI_FAILURE); } /* allocate space for node list */ listsz = num_nodes * sizeof (mde_cookie_t); listp = kmem_zalloc(listsz, KM_NOSLEEP); if (listp == NULL) return (DDI_FAILURE); num_ldcs = md_scan_dag(mdp, mdex, md_find_name(mdp, channel_propname), md_find_name(mdp, "fwd"), listp); if (num_ldcs <= 0) { DWARN(vgenp, NULL, "can't find %s nodes", channel_propname); kmem_free(listp, listsz); return (DDI_FAILURE); } if (num_ldcs > 1) { DWARN(vgenp, NULL, "Port %d: Number of channels %d > 1\n", port_num, num_ldcs); } ldc_ids = kmem_zalloc(num_ldcs * sizeof (uint64_t), KM_NOSLEEP); if (ldc_ids == NULL) { kmem_free(listp, listsz); return (DDI_FAILURE); } for (i = 0; i < num_ldcs; i++) { /* read channel ids */ if (md_get_prop_val(mdp, listp[i], id_propname, &ldc_ids[i])) { DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); kmem_free(listp, listsz); kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); return (DDI_FAILURE); } DBG2(vgenp, NULL, "ldc_id 0x%llx", ldc_ids[i]); } kmem_free(listp, listsz); if (md_get_prop_data(mdp, mdex, rmacaddr_propname, &addrp, &addrsz)) { DWARN(vgenp, NULL, "prop(%s) not found\n", rmacaddr_propname); kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); return (DDI_FAILURE); } if (addrsz < ETHERADDRL) { DWARN(vgenp, NULL, "invalid address size (%d)\n", addrsz); kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); return (DDI_FAILURE); } macaddr = *((uint64_t *)addrp); DBG2(vgenp, NULL, "remote mac address 0x%llx\n", macaddr); for (i = ETHERADDRL - 1; i >= 0; i--) { ea.ether_addr_octet[i] = macaddr & 0xFF; macaddr >>= 8; } if (!(md_get_prop_val(mdp, mdex, swport_propname, &val))) { if (val == 0) { /* This port is connected to the vswitch */ portp->is_vsw_port = B_TRUE; } else { portp->is_vsw_port = B_FALSE; } } /* now update all properties into the port */ portp->vgenp = vgenp; portp->port_num = port_num; ether_copy(&ea, &portp->macaddr); portp->ldc_ids = kmem_zalloc(sizeof (uint64_t) * num_ldcs, KM_SLEEP); bcopy(ldc_ids, portp->ldc_ids, sizeof (uint64_t) * num_ldcs); portp->num_ldcs = num_ldcs; /* read vlan id properties of this port node */ vgen_vlan_read_ids(portp, VGEN_PEER, mdp, mdex, &portp->pvid, &portp->vids, &portp->nvids, NULL); kmem_free(ldc_ids, num_ldcs * sizeof (uint64_t)); return (DDI_SUCCESS); } /* remove a port from the device */ static int vgen_remove_port(vgen_t *vgenp, md_t *mdp, mde_cookie_t mdex) { uint64_t port_num; vgen_port_t *portp; vgen_portlist_t *plistp; /* read "id" property to get the port number */ if (md_get_prop_val(mdp, mdex, id_propname, &port_num)) { DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); return (DDI_FAILURE); } plistp = &(vgenp->vgenports); WRITE_ENTER(&plistp->rwlock); portp = vgen_port_lookup(plistp, (int)port_num); if (portp == NULL) { DWARN(vgenp, NULL, "can't find port(%lx)\n", port_num); RW_EXIT(&plistp->rwlock); return (DDI_FAILURE); } vgen_port_detach_mdeg(portp); RW_EXIT(&plistp->rwlock); return (DDI_SUCCESS); } /* attach a port to the device based on mdeg data */ static int vgen_port_attach(vgen_port_t *portp) { vgen_portlist_t *plistp; vgen_t *vgenp; uint64_t *ldcids; mac_register_t *macp; vio_net_res_type_t type; int rv; ASSERT(portp != NULL); vgenp = portp->vgenp; ldcids = portp->ldc_ids; DBG2(vgenp, NULL, "port_num(%d), ldcid(%lx)\n", portp->port_num, ldcids[0]); mutex_init(&portp->lock, NULL, MUTEX_DRIVER, NULL); /* * attach the channel under the port using its channel id; * note that we only support one channel per port for now. */ if (vgen_ldc_attach(portp, ldcids[0]) == DDI_FAILURE) { vgen_port_detach(portp); return (DDI_FAILURE); } /* create vlan id hash table */ vgen_vlan_create_hash(portp); if (portp->is_vsw_port == B_TRUE) { /* This port is connected to the switch port */ (void) atomic_swap_32(&portp->use_vsw_port, B_FALSE); type = VIO_NET_RES_LDC_SERVICE; } else { (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE); type = VIO_NET_RES_LDC_GUEST; } if ((macp = mac_alloc(MAC_VERSION)) == NULL) { vgen_port_detach(portp); return (DDI_FAILURE); } macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; macp->m_driver = portp; macp->m_dip = vgenp->vnetdip; macp->m_src_addr = (uint8_t *)&(vgenp->macaddr); macp->m_callbacks = &vgen_m_callbacks; macp->m_min_sdu = 0; macp->m_max_sdu = ETHERMTU; mutex_enter(&portp->lock); rv = vio_net_resource_reg(macp, type, vgenp->macaddr, portp->macaddr, &portp->vhp, &portp->vcb); mutex_exit(&portp->lock); mac_free(macp); if (rv == 0) { /* link it into the list of ports */ plistp = &(vgenp->vgenports); WRITE_ENTER(&plistp->rwlock); vgen_port_list_insert(portp); RW_EXIT(&plistp->rwlock); if (portp->is_vsw_port == B_TRUE) { /* We now have the vswitch port attached */ vgenp->vsw_portp = portp; (void) atomic_swap_32(&vgenp->vsw_port_refcnt, 0); } } else { DERR(vgenp, NULL, "vio_net_resource_reg failed for portp=0x%p", portp); vgen_port_detach(portp); } DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num); return (DDI_SUCCESS); } /* detach a port from the device based on mdeg data */ static void vgen_port_detach_mdeg(vgen_port_t *portp) { vgen_t *vgenp = portp->vgenp; DBG1(vgenp, NULL, "enter: port_num(%d)\n", portp->port_num); mutex_enter(&portp->lock); /* stop the port if needed */ if (portp->flags & VGEN_STARTED) { vgen_port_uninit(portp); portp->flags &= ~(VGEN_STARTED); } mutex_exit(&portp->lock); vgen_port_detach(portp); DBG1(vgenp, NULL, "exit: port_num(%d)\n", portp->port_num); } static int vgen_update_port(vgen_t *vgenp, md_t *curr_mdp, mde_cookie_t curr_mdex, md_t *prev_mdp, mde_cookie_t prev_mdex) { uint64_t cport_num; uint64_t pport_num; vgen_portlist_t *plistp; vgen_port_t *portp; boolean_t updated_vlans = B_FALSE; uint16_t pvid; uint16_t *vids; uint16_t nvids; /* * For now, we get port updates only if vlan ids changed. * We read the port num and do some sanity check. */ if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); return (DDI_FAILURE); } if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { DWARN(vgenp, NULL, "prop(%s) not found\n", id_propname); return (DDI_FAILURE); } if (cport_num != pport_num) return (DDI_FAILURE); plistp = &(vgenp->vgenports); READ_ENTER(&plistp->rwlock); portp = vgen_port_lookup(plistp, (int)cport_num); if (portp == NULL) { DWARN(vgenp, NULL, "can't find port(%lx)\n", cport_num); RW_EXIT(&plistp->rwlock); return (DDI_FAILURE); } /* Read the vlan ids */ vgen_vlan_read_ids(portp, VGEN_PEER, curr_mdp, curr_mdex, &pvid, &vids, &nvids, NULL); /* Determine if there are any vlan id updates */ if ((pvid != portp->pvid) || /* pvid changed? */ (nvids != portp->nvids) || /* # of vids changed? */ ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) { updated_vlans = B_TRUE; } if (updated_vlans == B_FALSE) { RW_EXIT(&plistp->rwlock); return (DDI_FAILURE); } /* remove the port from vlans it has been assigned to */ vgen_vlan_remove_ids(portp); /* save the new vlan ids */ portp->pvid = pvid; if (portp->nvids != 0) { kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids); portp->nvids = 0; } if (nvids != 0) { portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP); bcopy(vids, portp->vids, sizeof (uint16_t) * nvids); portp->nvids = nvids; kmem_free(vids, sizeof (uint16_t) * nvids); } /* add port to the new vlans */ vgen_vlan_add_ids(portp); /* reset the port if it is vlan unaware (ver < 1.3) */ vgen_vlan_unaware_port_reset(portp); RW_EXIT(&plistp->rwlock); return (DDI_SUCCESS); } static uint64_t vgen_port_stat(vgen_port_t *portp, uint_t stat) { return (vgen_ldc_stat(portp->ldcp, stat)); } /* attach the channel corresponding to the given ldc_id to the port */ static int vgen_ldc_attach(vgen_port_t *portp, uint64_t ldc_id) { vgen_t *vgenp; vgen_ldc_t *ldcp; ldc_attr_t attr; int status; ldc_status_t istatus; char kname[MAXNAMELEN]; int instance; enum {AST_init = 0x0, AST_ldc_alloc = 0x1, AST_mutex_init = 0x2, AST_ldc_init = 0x4, AST_ldc_reg_cb = 0x8 } attach_state; attach_state = AST_init; vgenp = portp->vgenp; ldcp = kmem_zalloc(sizeof (vgen_ldc_t), KM_NOSLEEP); if (ldcp == NULL) { goto ldc_attach_failed; } ldcp->ldc_id = ldc_id; ldcp->portp = portp; attach_state |= AST_ldc_alloc; mutex_init(&ldcp->txlock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->cblock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->tclock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->wrlock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->rxlock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->pollq_lock, NULL, MUTEX_DRIVER, NULL); mutex_init(&ldcp->msg_thr_lock, NULL, MUTEX_DRIVER, NULL); cv_init(&ldcp->msg_thr_cv, NULL, CV_DRIVER, NULL); attach_state |= AST_mutex_init; attr.devclass = LDC_DEV_NT; attr.instance = vgenp->instance; attr.mode = LDC_MODE_UNRELIABLE; attr.mtu = vgen_ldc_mtu; status = ldc_init(ldc_id, &attr, &ldcp->ldc_handle); if (status != 0) { DWARN(vgenp, ldcp, "ldc_init failed,rv (%d)\n", status); goto ldc_attach_failed; } attach_state |= AST_ldc_init; status = ldc_reg_callback(ldcp->ldc_handle, vgen_ldc_cb, (caddr_t)ldcp); if (status != 0) { DWARN(vgenp, ldcp, "ldc_reg_callback failed, rv (%d)\n", status); goto ldc_attach_failed; } /* * allocate a message for ldc_read()s, big enough to hold ctrl and * data msgs, including raw data msgs used to recv priority frames. */ ldcp->msglen = VIO_PKT_DATA_HDRSIZE + vgenp->max_frame_size; ldcp->ldcmsg = kmem_alloc(ldcp->msglen, KM_SLEEP); attach_state |= AST_ldc_reg_cb; (void) ldc_status(ldcp->ldc_handle, &istatus); ASSERT(istatus == LDC_INIT); ldcp->ldc_status = istatus; /* Setup kstats for the channel */ instance = vgenp->instance; (void) sprintf(kname, "vnetldc0x%lx", ldcp->ldc_id); ldcp->ksp = vgen_setup_kstats("vnet", instance, kname, &ldcp->stats); if (ldcp->ksp == NULL) { goto ldc_attach_failed; } /* initialize vgen_versions supported */ bcopy(vgen_versions, ldcp->vgen_versions, sizeof (ldcp->vgen_versions)); vgen_reset_vnet_proto_ops(ldcp); /* Link this channel to the port */ portp->ldcp = ldcp; ldcp->link_state = LINK_STATE_UNKNOWN; #ifdef VNET_IOC_DEBUG ldcp->link_down_forced = B_FALSE; #endif ldcp->flags |= CHANNEL_ATTACHED; return (DDI_SUCCESS); ldc_attach_failed: if (attach_state & AST_ldc_reg_cb) { (void) ldc_unreg_callback(ldcp->ldc_handle); kmem_free(ldcp->ldcmsg, ldcp->msglen); } if (attach_state & AST_ldc_init) { (void) ldc_fini(ldcp->ldc_handle); } if (attach_state & AST_mutex_init) { mutex_destroy(&ldcp->tclock); mutex_destroy(&ldcp->txlock); mutex_destroy(&ldcp->cblock); mutex_destroy(&ldcp->wrlock); mutex_destroy(&ldcp->rxlock); mutex_destroy(&ldcp->pollq_lock); } if (attach_state & AST_ldc_alloc) { KMEM_FREE(ldcp); } return (DDI_FAILURE); } /* detach a channel from the port */ static void vgen_ldc_detach(vgen_ldc_t *ldcp) { vgen_port_t *portp; vgen_t *vgenp; ASSERT(ldcp != NULL); portp = ldcp->portp; vgenp = portp->vgenp; if (ldcp->ldc_status != LDC_INIT) { DWARN(vgenp, ldcp, "ldc_status is not INIT\n"); } if (ldcp->flags & CHANNEL_ATTACHED) { ldcp->flags &= ~(CHANNEL_ATTACHED); (void) ldc_unreg_callback(ldcp->ldc_handle); (void) ldc_fini(ldcp->ldc_handle); kmem_free(ldcp->ldcmsg, ldcp->msglen); vgen_destroy_kstats(ldcp->ksp); ldcp->ksp = NULL; mutex_destroy(&ldcp->tclock); mutex_destroy(&ldcp->txlock); mutex_destroy(&ldcp->cblock); mutex_destroy(&ldcp->wrlock); mutex_destroy(&ldcp->rxlock); mutex_destroy(&ldcp->pollq_lock); mutex_destroy(&ldcp->msg_thr_lock); cv_destroy(&ldcp->msg_thr_cv); KMEM_FREE(ldcp); } } /* enable transmit/receive on the channel */ static int vgen_ldc_init(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); ldc_status_t istatus; int rv; enum { ST_init = 0x0, ST_ldc_open = 0x1, ST_cb_enable = 0x2} init_state; int flag = 0; init_state = ST_init; DBG1(vgenp, ldcp, "enter\n"); LDC_LOCK(ldcp); rv = ldc_open(ldcp->ldc_handle); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_open failed: rv(%d)\n", rv); goto ldcinit_failed; } init_state |= ST_ldc_open; (void) ldc_status(ldcp->ldc_handle, &istatus); if (istatus != LDC_OPEN && istatus != LDC_READY) { DWARN(vgenp, ldcp, "status(%d) is not OPEN/READY\n", istatus); goto ldcinit_failed; } ldcp->ldc_status = istatus; rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_ENABLE); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_set_cb_mode failed: rv(%d)\n", rv); goto ldcinit_failed; } init_state |= ST_cb_enable; vgen_ldc_up(ldcp); (void) ldc_status(ldcp->ldc_handle, &istatus); if (istatus == LDC_UP) { DWARN(vgenp, ldcp, "status(%d) is UP\n", istatus); } ldcp->ldc_status = istatus; ldcp->hphase = VH_PHASE0; ldcp->hstate = 0; ldcp->flags |= CHANNEL_STARTED; vgen_setup_handshake_params(ldcp); /* if channel is already UP - start handshake */ if (istatus == LDC_UP) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); if (ldcp->portp != vgenp->vsw_portp) { /* * As the channel is up, use this port from now on. */ (void) atomic_swap_32( &ldcp->portp->use_vsw_port, B_FALSE); } /* Initialize local session id */ ldcp->local_sid = ddi_get_lbolt(); /* clear peer session id */ ldcp->peer_sid = 0; mutex_exit(&ldcp->tclock); mutex_exit(&ldcp->txlock); mutex_exit(&ldcp->wrlock); mutex_exit(&ldcp->rxlock); rv = vgen_handshake(vh_nextphase(ldcp)); mutex_exit(&ldcp->cblock); if (rv != 0) { flag = (rv == ECONNRESET) ? VGEN_FLAG_EVT_RESET : VGEN_FLAG_NEED_LDCRESET; (void) vgen_process_reset(ldcp, flag); } } else { LDC_UNLOCK(ldcp); } return (DDI_SUCCESS); ldcinit_failed: if (init_state & ST_cb_enable) { (void) ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); } if (init_state & ST_ldc_open) { (void) ldc_close(ldcp->ldc_handle); } LDC_UNLOCK(ldcp); DBG1(vgenp, ldcp, "exit\n"); return (DDI_FAILURE); } /* stop transmit/receive on the channel */ static void vgen_ldc_uninit(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); DBG1(vgenp, ldcp, "enter\n"); LDC_LOCK(ldcp); if ((ldcp->flags & CHANNEL_STARTED) == 0) { LDC_UNLOCK(ldcp); DWARN(vgenp, ldcp, "CHANNEL_STARTED flag is not set\n"); return; } LDC_UNLOCK(ldcp); while (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) { delay(drv_usectohz(VGEN_LDC_UNINIT_DELAY)); } (void) vgen_process_reset(ldcp, VGEN_FLAG_UNINIT); DBG1(vgenp, ldcp, "exit\n"); } /* * Create a descriptor ring, that will be exported to the peer for mapping. */ static int vgen_create_dring(vgen_ldc_t *ldcp) { vgen_hparams_t *lp = &ldcp->local_hparams; int rv; if (lp->dring_mode == VIO_RX_DRING_DATA) { rv = vgen_create_rx_dring(ldcp); } else { rv = vgen_create_tx_dring(ldcp); } return (rv); } /* * Destroy the descriptor ring. */ static void vgen_destroy_dring(vgen_ldc_t *ldcp) { vgen_hparams_t *lp = &ldcp->local_hparams; if (lp->dring_mode == VIO_RX_DRING_DATA) { vgen_destroy_rx_dring(ldcp); } else { vgen_destroy_tx_dring(ldcp); } } /* * Map the descriptor ring exported by the peer. */ static int vgen_map_dring(vgen_ldc_t *ldcp, void *pkt) { int rv; vgen_hparams_t *lp = &ldcp->local_hparams; if (lp->dring_mode == VIO_RX_DRING_DATA) { /* * In RxDringData mode, dring that we map in * becomes our transmit descriptor ring. */ rv = vgen_map_tx_dring(ldcp, pkt); } else { /* * In TxDring mode, dring that we map in * becomes our receive descriptor ring. */ rv = vgen_map_rx_dring(ldcp, pkt); } return (rv); } /* * Unmap the descriptor ring exported by the peer. */ static void vgen_unmap_dring(vgen_ldc_t *ldcp) { vgen_hparams_t *lp = &ldcp->local_hparams; if (lp->dring_mode == VIO_RX_DRING_DATA) { vgen_unmap_tx_dring(ldcp); } else { vgen_unmap_rx_dring(ldcp); } } void vgen_destroy_rxpools(void *arg) { vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg; vio_mblk_pool_t *npoolp; while (poolp != NULL) { npoolp = poolp->nextp; while (vio_destroy_mblks(poolp) != 0) { drv_usecwait(vgen_rxpool_cleanup_delay); } poolp = npoolp; } } /* get channel statistics */ static uint64_t vgen_ldc_stat(vgen_ldc_t *ldcp, uint_t stat) { vgen_stats_t *statsp; uint64_t val; val = 0; statsp = &ldcp->stats; switch (stat) { case MAC_STAT_MULTIRCV: val = statsp->multircv; break; case MAC_STAT_BRDCSTRCV: val = statsp->brdcstrcv; break; case MAC_STAT_MULTIXMT: val = statsp->multixmt; break; case MAC_STAT_BRDCSTXMT: val = statsp->brdcstxmt; break; case MAC_STAT_NORCVBUF: val = statsp->norcvbuf; break; case MAC_STAT_IERRORS: val = statsp->ierrors; break; case MAC_STAT_NOXMTBUF: val = statsp->noxmtbuf; break; case MAC_STAT_OERRORS: val = statsp->oerrors; break; case MAC_STAT_COLLISIONS: break; case MAC_STAT_RBYTES: val = statsp->rbytes; break; case MAC_STAT_IPACKETS: val = statsp->ipackets; break; case MAC_STAT_OBYTES: val = statsp->obytes; break; case MAC_STAT_OPACKETS: val = statsp->opackets; break; /* stats not relevant to ldc, return 0 */ case MAC_STAT_IFSPEED: case ETHER_STAT_ALIGN_ERRORS: case ETHER_STAT_FCS_ERRORS: case ETHER_STAT_FIRST_COLLISIONS: case ETHER_STAT_MULTI_COLLISIONS: case ETHER_STAT_DEFER_XMTS: case ETHER_STAT_TX_LATE_COLLISIONS: case ETHER_STAT_EX_COLLISIONS: case ETHER_STAT_MACXMT_ERRORS: case ETHER_STAT_CARRIER_ERRORS: case ETHER_STAT_TOOLONG_ERRORS: case ETHER_STAT_XCVR_ADDR: case ETHER_STAT_XCVR_ID: case ETHER_STAT_XCVR_INUSE: case ETHER_STAT_CAP_1000FDX: case ETHER_STAT_CAP_1000HDX: case ETHER_STAT_CAP_100FDX: case ETHER_STAT_CAP_100HDX: case ETHER_STAT_CAP_10FDX: case ETHER_STAT_CAP_10HDX: case ETHER_STAT_CAP_ASMPAUSE: case ETHER_STAT_CAP_PAUSE: case ETHER_STAT_CAP_AUTONEG: case ETHER_STAT_ADV_CAP_1000FDX: case ETHER_STAT_ADV_CAP_1000HDX: case ETHER_STAT_ADV_CAP_100FDX: case ETHER_STAT_ADV_CAP_100HDX: case ETHER_STAT_ADV_CAP_10FDX: case ETHER_STAT_ADV_CAP_10HDX: case ETHER_STAT_ADV_CAP_ASMPAUSE: case ETHER_STAT_ADV_CAP_PAUSE: case ETHER_STAT_ADV_CAP_AUTONEG: case ETHER_STAT_LP_CAP_1000FDX: case ETHER_STAT_LP_CAP_1000HDX: case ETHER_STAT_LP_CAP_100FDX: case ETHER_STAT_LP_CAP_100HDX: case ETHER_STAT_LP_CAP_10FDX: case ETHER_STAT_LP_CAP_10HDX: case ETHER_STAT_LP_CAP_ASMPAUSE: case ETHER_STAT_LP_CAP_PAUSE: case ETHER_STAT_LP_CAP_AUTONEG: case ETHER_STAT_LINK_ASMPAUSE: case ETHER_STAT_LINK_PAUSE: case ETHER_STAT_LINK_AUTONEG: case ETHER_STAT_LINK_DUPLEX: default: val = 0; break; } return (val); } /* * LDC channel is UP, start handshake process with peer. */ static void vgen_handle_evt_up(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); DBG1(vgenp, ldcp, "enter\n"); ASSERT(MUTEX_HELD(&ldcp->cblock)); if (ldcp->portp != vgenp->vsw_portp) { /* * As the channel is up, use this port from now on. */ (void) atomic_swap_32(&ldcp->portp->use_vsw_port, B_FALSE); } /* Initialize local session id */ ldcp->local_sid = ddi_get_lbolt(); /* clear peer session id */ ldcp->peer_sid = 0; /* Initiate Handshake process with peer ldc endpoint */ (void) vgen_handshake(vh_nextphase(ldcp)); DBG1(vgenp, ldcp, "exit\n"); } /* * LDC channel is Reset, terminate connection with peer and try to * bring the channel up again. */ int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller) { if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) { ASSERT(MUTEX_HELD(&ldcp->cblock)); } /* Set the flag to indicate reset is in progress */ if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) { /* another thread is already in the process of resetting */ return (EBUSY); } if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) { mutex_exit(&ldcp->cblock); } (void) vgen_process_reset(ldcp, VGEN_FLAG_EVT_RESET); if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) { mutex_enter(&ldcp->cblock); } return (0); } /* Interrupt handler for the channel */ static uint_t vgen_ldc_cb(uint64_t event, caddr_t arg) { _NOTE(ARGUNUSED(event)) vgen_ldc_t *ldcp; vgen_t *vgenp; ldc_status_t istatus; vgen_stats_t *statsp; uint_t ret = LDC_SUCCESS; ldcp = (vgen_ldc_t *)arg; vgenp = LDC_TO_VGEN(ldcp); statsp = &ldcp->stats; DBG1(vgenp, ldcp, "enter\n"); mutex_enter(&ldcp->cblock); statsp->callbacks++; if ((ldcp->ldc_status == LDC_INIT) || (ldcp->ldc_handle == NULL)) { DWARN(vgenp, ldcp, "status(%d) is LDC_INIT\n", ldcp->ldc_status); mutex_exit(&ldcp->cblock); return (LDC_SUCCESS); } /* * NOTE: not using switch() as event could be triggered by * a state change and a read request. Also the ordering of the * check for the event types is deliberate. */ if (event & LDC_EVT_UP) { if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { DWARN(vgenp, ldcp, "ldc_status err\n"); /* status couldn't be determined */ ret = LDC_FAILURE; goto ldc_cb_ret; } ldcp->ldc_status = istatus; if (ldcp->ldc_status != LDC_UP) { DWARN(vgenp, ldcp, "LDC_EVT_UP received " " but ldc status is not UP(0x%x)\n", ldcp->ldc_status); /* spurious interrupt, return success */ goto ldc_cb_ret; } DWARN(vgenp, ldcp, "event(%lx) UP, status(%d)\n", event, ldcp->ldc_status); vgen_handle_evt_up(ldcp); ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); } /* Handle RESET/DOWN before READ event */ if (event & (LDC_EVT_RESET | LDC_EVT_DOWN)) { if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { DWARN(vgenp, ldcp, "ldc_status error\n"); /* status couldn't be determined */ ret = LDC_FAILURE; goto ldc_cb_ret; } ldcp->ldc_status = istatus; DWARN(vgenp, ldcp, "event(%lx) RESET/DOWN, status(%d)\n", event, ldcp->ldc_status); (void) vgen_handle_evt_reset(ldcp, VGEN_LDC_CB); /* * As the channel is down/reset, ignore READ event * but print a debug warning message. */ if (event & LDC_EVT_READ) { DWARN(vgenp, ldcp, "LDC_EVT_READ set along with RESET/DOWN\n"); event &= ~LDC_EVT_READ; } } if (event & LDC_EVT_READ) { DBG2(vgenp, ldcp, "event(%lx) READ, status(%d)\n", event, ldcp->ldc_status); ASSERT((event & (LDC_EVT_RESET | LDC_EVT_DOWN)) == 0); if (ldcp->msg_thread != NULL) { /* * If the receive thread is enabled, then * wakeup the receive thread to process the * LDC messages. */ mutex_exit(&ldcp->cblock); mutex_enter(&ldcp->msg_thr_lock); if (!(ldcp->msg_thr_flags & VGEN_WTHR_DATARCVD)) { ldcp->msg_thr_flags |= VGEN_WTHR_DATARCVD; cv_signal(&ldcp->msg_thr_cv); } mutex_exit(&ldcp->msg_thr_lock); mutex_enter(&ldcp->cblock); } else { (void) vgen_handle_evt_read(ldcp, VGEN_LDC_CB); } } ldc_cb_ret: mutex_exit(&ldcp->cblock); DBG1(vgenp, ldcp, "exit\n"); return (ret); } int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller) { int rv; uint64_t *ldcmsg; size_t msglen; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_msg_tag_t *tagp; ldc_status_t istatus; boolean_t has_data; DBG1(vgenp, ldcp, "enter\n"); if (caller == VGEN_LDC_CB) { ASSERT(MUTEX_HELD(&ldcp->cblock)); } else if (caller == VGEN_MSG_THR) { mutex_enter(&ldcp->cblock); } else { return (EINVAL); } ldcmsg = ldcp->ldcmsg; vgen_evtread: do { msglen = ldcp->msglen; rv = ldc_read(ldcp->ldc_handle, (caddr_t)ldcmsg, &msglen); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_read() failed " "rv(%d) len(%d)\n", rv, msglen); if (rv == ECONNRESET) goto vgen_evtread_error; break; } if (msglen == 0) { DBG2(vgenp, ldcp, "ldc_read NODATA"); break; } DBG2(vgenp, ldcp, "ldc_read msglen(%d)", msglen); tagp = (vio_msg_tag_t *)ldcmsg; if (ldcp->peer_sid) { /* * check sid only after we have received peer's sid * in the version negotiate msg. */ #ifdef DEBUG if (vgen_inject_error(ldcp, VGEN_ERR_HSID)) { /* simulate bad sid condition */ tagp->vio_sid = 0; vgen_inject_err_flag &= ~(VGEN_ERR_HSID); } #endif rv = vgen_check_sid(ldcp, tagp); if (rv != VGEN_SUCCESS) { /* * If sid mismatch is detected, * reset the channel. */ DWARN(vgenp, ldcp, "vgen_check_sid() failed\n"); goto vgen_evtread_error; } } switch (tagp->vio_msgtype) { case VIO_TYPE_CTRL: rv = vgen_handle_ctrlmsg(ldcp, tagp); if (rv != 0) { DWARN(vgenp, ldcp, "vgen_handle_ctrlmsg()" " failed rv(%d)\n", rv); } break; case VIO_TYPE_DATA: rv = vgen_handle_datamsg(ldcp, tagp, msglen); if (rv != 0) { DWARN(vgenp, ldcp, "vgen_handle_datamsg()" " failed rv(%d)\n", rv); } break; case VIO_TYPE_ERR: vgen_handle_errmsg(ldcp, tagp); break; default: DWARN(vgenp, ldcp, "Unknown VIO_TYPE(%x)\n", tagp->vio_msgtype); break; } /* * If an error is encountered, stop processing and * handle the error. */ if (rv != 0) { goto vgen_evtread_error; } } while (msglen); /* check once more before exiting */ rv = ldc_chkq(ldcp->ldc_handle, &has_data); if ((rv == 0) && (has_data == B_TRUE)) { DTRACE_PROBE1(vgen_chkq, vgen_ldc_t *, ldcp); goto vgen_evtread; } vgen_evtread_error: if (rv != 0) { /* * We handle the error and then return the error value. If we * are running in the context of the msg worker, the error * tells the worker thread to exit, as the channel would have * been reset. */ if (rv == ECONNRESET) { if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { DWARN(vgenp, ldcp, "ldc_status err\n"); } else { ldcp->ldc_status = istatus; } (void) vgen_handle_evt_reset(ldcp, caller); } else { DWARN(vgenp, ldcp, "Calling vgen_ldc_reset()...\n"); (void) vgen_ldc_reset(ldcp, caller); } } if (caller == VGEN_MSG_THR) { mutex_exit(&ldcp->cblock); } DBG1(vgenp, ldcp, "exit\n"); return (rv); } /* vgen handshake functions */ /* change the hphase for the channel to the next phase */ static vgen_ldc_t * vh_nextphase(vgen_ldc_t *ldcp) { if (ldcp->hphase == VH_PHASE4) { ldcp->hphase = VH_DONE; } else { ldcp->hphase++; } return (ldcp); } /* send version negotiate message to the peer over ldc */ static int vgen_send_version_negotiate(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_ver_msg_t vermsg; vio_msg_tag_t *tagp = &vermsg.tag; int rv; bzero(&vermsg, sizeof (vermsg)); tagp->vio_msgtype = VIO_TYPE_CTRL; tagp->vio_subtype = VIO_SUBTYPE_INFO; tagp->vio_subtype_env = VIO_VER_INFO; tagp->vio_sid = ldcp->local_sid; /* get version msg payload from ldcp->local */ vermsg.ver_major = ldcp->local_hparams.ver_major; vermsg.ver_minor = ldcp->local_hparams.ver_minor; vermsg.dev_class = ldcp->local_hparams.dev_class; rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vermsg), B_FALSE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); return (rv); } ldcp->hstate |= VER_INFO_SENT; DBG2(vgenp, ldcp, "VER_INFO_SENT ver(%d,%d)\n", vermsg.ver_major, vermsg.ver_minor); return (VGEN_SUCCESS); } /* send attr info message to the peer over ldc */ static int vgen_send_attr_info(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vnet_attr_msg_t attrmsg; vio_msg_tag_t *tagp = &attrmsg.tag; int rv; bzero(&attrmsg, sizeof (attrmsg)); tagp->vio_msgtype = VIO_TYPE_CTRL; tagp->vio_subtype = VIO_SUBTYPE_INFO; tagp->vio_subtype_env = VIO_ATTR_INFO; tagp->vio_sid = ldcp->local_sid; /* get attr msg payload from ldcp->local */ attrmsg.mtu = ldcp->local_hparams.mtu; attrmsg.addr = ldcp->local_hparams.addr; attrmsg.addr_type = ldcp->local_hparams.addr_type; attrmsg.xfer_mode = ldcp->local_hparams.xfer_mode; attrmsg.ack_freq = ldcp->local_hparams.ack_freq; attrmsg.physlink_update = ldcp->local_hparams.physlink_update; attrmsg.options = ldcp->local_hparams.dring_mode; rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (attrmsg), B_FALSE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); return (rv); } ldcp->hstate |= ATTR_INFO_SENT; DBG2(vgenp, ldcp, "ATTR_INFO_SENT\n"); return (VGEN_SUCCESS); } /* * Send descriptor ring register message to the peer over ldc. * Invoked in RxDringData mode. */ static int vgen_send_rx_dring_reg(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_dring_reg_msg_t *msg; vio_dring_reg_ext_msg_t *emsg; int rv; uint8_t *buf; uint_t msgsize; msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->rx_data_ncookies); msg = kmem_zalloc(msgsize, KM_SLEEP); /* Initialize the common part of dring reg msg */ vgen_init_dring_reg_msg(ldcp, msg, VIO_RX_DRING_DATA); /* skip over dring cookies at the tail of common section */ buf = (uint8_t *)msg->cookie; ASSERT(msg->ncookies == 1); buf += (msg->ncookies * sizeof (ldc_mem_cookie_t)); /* Now setup the extended part, specific to RxDringData mode */ emsg = (vio_dring_reg_ext_msg_t *)buf; /* copy data_ncookies in the msg */ emsg->data_ncookies = ldcp->rx_data_ncookies; /* copy data area size in the msg */ emsg->data_area_size = ldcp->rx_data_sz; /* copy data area cookies in the msg */ bcopy(ldcp->rx_data_cookie, (ldc_mem_cookie_t *)emsg->data_cookie, sizeof (ldc_mem_cookie_t) * ldcp->rx_data_ncookies); rv = vgen_sendmsg(ldcp, (caddr_t)msg, msgsize, B_FALSE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); kmem_free(msg, msgsize); return (rv); } ldcp->hstate |= DRING_INFO_SENT; DBG2(vgenp, ldcp, "DRING_INFO_SENT \n"); kmem_free(msg, msgsize); return (VGEN_SUCCESS); } /* * Send descriptor ring register message to the peer over ldc. * Invoked in TxDring mode. */ static int vgen_send_tx_dring_reg(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_dring_reg_msg_t msg; int rv; bzero(&msg, sizeof (msg)); /* * Initialize only the common part of dring reg msg in TxDring mode. */ vgen_init_dring_reg_msg(ldcp, &msg, VIO_TX_DRING); rv = vgen_sendmsg(ldcp, (caddr_t)&msg, sizeof (msg), B_FALSE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); return (rv); } ldcp->hstate |= DRING_INFO_SENT; DBG2(vgenp, ldcp, "DRING_INFO_SENT \n"); return (VGEN_SUCCESS); } static int vgen_send_rdx_info(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_rdx_msg_t rdxmsg; vio_msg_tag_t *tagp = &rdxmsg.tag; int rv; bzero(&rdxmsg, sizeof (rdxmsg)); tagp->vio_msgtype = VIO_TYPE_CTRL; tagp->vio_subtype = VIO_SUBTYPE_INFO; tagp->vio_subtype_env = VIO_RDX; tagp->vio_sid = ldcp->local_sid; rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (rdxmsg), B_FALSE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); return (rv); } ldcp->hstate |= RDX_INFO_SENT; DBG2(vgenp, ldcp, "RDX_INFO_SENT\n"); return (VGEN_SUCCESS); } /* send multicast addr info message to vsw */ static int vgen_send_mcast_info(vgen_ldc_t *ldcp) { vnet_mcast_msg_t mcastmsg; vnet_mcast_msg_t *msgp; vio_msg_tag_t *tagp; vgen_t *vgenp; struct ether_addr *mca; int rv; int i; uint32_t size; uint32_t mccount; uint32_t n; msgp = &mcastmsg; tagp = &msgp->tag; vgenp = LDC_TO_VGEN(ldcp); mccount = vgenp->mccount; i = 0; do { tagp->vio_msgtype = VIO_TYPE_CTRL; tagp->vio_subtype = VIO_SUBTYPE_INFO; tagp->vio_subtype_env = VNET_MCAST_INFO; tagp->vio_sid = ldcp->local_sid; n = ((mccount >= VNET_NUM_MCAST) ? VNET_NUM_MCAST : mccount); size = n * sizeof (struct ether_addr); mca = &(vgenp->mctab[i]); bcopy(mca, (msgp->mca), size); msgp->set = B_TRUE; msgp->count = n; rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg err(%d)\n", rv); return (rv); } mccount -= n; i += n; } while (mccount); return (VGEN_SUCCESS); } /* * vgen_dds_rx -- post DDS messages to vnet. */ static int vgen_dds_rx(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vio_dds_msg_t *dmsg = (vio_dds_msg_t *)tagp; vgen_t *vgenp = LDC_TO_VGEN(ldcp); if (dmsg->dds_class != DDS_VNET_NIU) { DWARN(vgenp, ldcp, "Unknown DDS class, dropping"); return (EBADMSG); } vnet_dds_rx(vgenp->vnetp, dmsg); return (0); } /* * vgen_dds_tx -- an interface called by vnet to send DDS messages. */ int vgen_dds_tx(void *arg, void *msg) { vgen_t *vgenp = arg; vio_dds_msg_t *dmsg = msg; vgen_portlist_t *plistp = &vgenp->vgenports; vgen_ldc_t *ldcp; int rv = EIO; READ_ENTER(&plistp->rwlock); ldcp = vgenp->vsw_portp->ldcp; if ((ldcp == NULL) || (ldcp->hphase != VH_DONE)) { goto vgen_dsend_exit; } dmsg->tag.vio_sid = ldcp->local_sid; rv = vgen_sendmsg(ldcp, (caddr_t)dmsg, sizeof (vio_dds_msg_t), B_FALSE); if (rv != VGEN_SUCCESS) { rv = EIO; } else { rv = 0; } vgen_dsend_exit: RW_EXIT(&plistp->rwlock); return (rv); } /* Initiate Phase 2 of handshake */ static int vgen_handshake_phase2(vgen_ldc_t *ldcp) { int rv; #ifdef DEBUG if (vgen_inject_error(ldcp, VGEN_ERR_HSTATE)) { /* simulate out of state condition */ vgen_inject_err_flag &= ~(VGEN_ERR_HSTATE); rv = vgen_send_rdx_info(ldcp); return (rv); } if (vgen_inject_error(ldcp, VGEN_ERR_HTIMEOUT)) { /* simulate timeout condition */ vgen_inject_err_flag &= ~(VGEN_ERR_HTIMEOUT); return (VGEN_SUCCESS); } #endif rv = vgen_send_attr_info(ldcp); if (rv != VGEN_SUCCESS) { return (rv); } return (VGEN_SUCCESS); } static int vgen_handshake_phase3(vgen_ldc_t *ldcp) { int rv; vgen_hparams_t *lp = &ldcp->local_hparams; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_stats_t *statsp = &ldcp->stats; /* dring mode has been negotiated in attr phase; save in stats */ statsp->dring_mode = lp->dring_mode; if (lp->dring_mode == VIO_RX_DRING_DATA) { /* RxDringData mode */ ldcp->rx_dringdata = vgen_handle_dringdata_shm; ldcp->tx_dringdata = vgen_dringsend_shm; if (!VGEN_PRI_ETH_DEFINED(vgenp)) { /* * If priority frames are not in use, we don't need a * separate wrapper function for 'tx', so we set it to * 'tx_dringdata'. If priority frames are configured, * we leave the 'tx' pointer as is (initialized in * vgen_set_vnet_proto_ops()). */ ldcp->tx = ldcp->tx_dringdata; } } else { /* TxDring mode */ ldcp->msg_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, vgen_ldc_msg_worker, ldcp, 0, &p0, TS_RUN, maxclsyspri); } rv = vgen_create_dring(ldcp); if (rv != VGEN_SUCCESS) { return (rv); } /* update local dring_info params */ if (lp->dring_mode == VIO_RX_DRING_DATA) { bcopy(&(ldcp->rx_dring_cookie), &(ldcp->local_hparams.dring_cookie), sizeof (ldc_mem_cookie_t)); ldcp->local_hparams.dring_ncookies = ldcp->rx_dring_ncookies; ldcp->local_hparams.num_desc = ldcp->num_rxds; ldcp->local_hparams.desc_size = sizeof (vnet_rx_dringdata_desc_t); rv = vgen_send_rx_dring_reg(ldcp); } else { bcopy(&(ldcp->tx_dring_cookie), &(ldcp->local_hparams.dring_cookie), sizeof (ldc_mem_cookie_t)); ldcp->local_hparams.dring_ncookies = ldcp->tx_dring_ncookies; ldcp->local_hparams.num_desc = ldcp->num_txds; ldcp->local_hparams.desc_size = sizeof (vnet_public_desc_t); rv = vgen_send_tx_dring_reg(ldcp); } if (rv != VGEN_SUCCESS) { return (rv); } return (VGEN_SUCCESS); } /* * Set vnet-protocol-version dependent functions based on version. */ static void vgen_set_vnet_proto_ops(vgen_ldc_t *ldcp) { vgen_hparams_t *lp = &ldcp->local_hparams; vgen_t *vgenp = LDC_TO_VGEN(ldcp); /* * Setup the appropriate dring data processing routine and any * associated thread based on the version. * * In versions < 1.6, we only support TxDring mode. In this mode, the * msg worker thread processes all types of VIO msgs (ctrl and data). * * In versions >= 1.6, we also support RxDringData mode. In this mode, * all msgs including dring data messages are handled directly by the * callback (intr) thread. The dring data msgs (msgtype: VIO_TYPE_DATA, * subtype: VIO_SUBTYPE_INFO, subtype_env: VIO_DRING_DATA) can also be * disabled while the polling thread is active, in which case the * polling thread processes the rcv descriptor ring. * * However, for versions >= 1.6, we can force to only use TxDring mode. * This could happen if RxDringData mode has been disabled (see * vgen_dring_mode) on this guest or on the peer guest. This info is * determined as part of attr exchange phase of handshake. Hence, we * setup these pointers for v1.6 after attr msg phase completes during * handshake. */ if (VGEN_VER_GTEQ(ldcp, 1, 6)) { /* Ver >= 1.6 */ /* * Set data dring mode for vgen_send_attr_info(). */ if (vgen_dring_mode == VIO_RX_DRING_DATA) { lp->dring_mode = (VIO_RX_DRING_DATA | VIO_TX_DRING); } else { lp->dring_mode = VIO_TX_DRING; } } else { /* Ver <= 1.5 */ lp->dring_mode = VIO_TX_DRING; } if (VGEN_VER_GTEQ(ldcp, 1, 5)) { vgen_port_t *portp = ldcp->portp; vnet_t *vnetp = vgenp->vnetp; /* * If the version negotiated with vswitch is >= 1.5 (link * status update support), set the required bits in our * attributes if this vnet device has been configured to get * physical link state updates. */ if (portp == vgenp->vsw_portp && vnetp->pls_update == B_TRUE) { lp->physlink_update = PHYSLINK_UPDATE_STATE; } else { lp->physlink_update = PHYSLINK_UPDATE_NONE; } } if (VGEN_VER_GTEQ(ldcp, 1, 4)) { /* * If the version negotiated with peer is >= 1.4(Jumbo Frame * Support), set the mtu in our attributes to max_frame_size. */ lp->mtu = vgenp->max_frame_size; } else if (VGEN_VER_EQ(ldcp, 1, 3)) { /* * If the version negotiated with peer is == 1.3 (Vlan Tag * Support) set the attr.mtu to ETHERMAX + VLAN_TAGSZ. */ lp->mtu = ETHERMAX + VLAN_TAGSZ; } else { vgen_port_t *portp = ldcp->portp; vnet_t *vnetp = vgenp->vnetp; /* * Pre-1.3 peers expect max frame size of ETHERMAX. * We can negotiate that size with those peers provided the * following conditions are true: * - Only pvid is defined for our peer and there are no vids. * - pvids are equal. * If the above conditions are true, then we can send/recv only * untagged frames of max size ETHERMAX. */ if (portp->nvids == 0 && portp->pvid == vnetp->pvid) { lp->mtu = ETHERMAX; } } if (VGEN_VER_GTEQ(ldcp, 1, 2)) { /* Versions >= 1.2 */ /* * Starting v1.2 we support priority frames; so set the * dring processing routines and xfer modes based on the * version. Note that the dring routines could be changed after * attribute handshake phase for versions >= 1.6 (See * vgen_handshake_phase3()) */ ldcp->tx_dringdata = vgen_dringsend; ldcp->rx_dringdata = vgen_handle_dringdata; if (VGEN_PRI_ETH_DEFINED(vgenp)) { /* * Enable priority routines and pkt mode only if * at least one pri-eth-type is specified in MD. */ ldcp->tx = vgen_ldcsend; ldcp->rx_pktdata = vgen_handle_pkt_data; /* set xfer mode for vgen_send_attr_info() */ lp->xfer_mode = VIO_PKT_MODE | VIO_DRING_MODE_V1_2; } else { /* No priority eth types defined in MD */ ldcp->tx = ldcp->tx_dringdata; ldcp->rx_pktdata = vgen_handle_pkt_data_nop; /* Set xfer mode for vgen_send_attr_info() */ lp->xfer_mode = VIO_DRING_MODE_V1_2; } } else { /* Versions prior to 1.2 */ vgen_reset_vnet_proto_ops(ldcp); } } /* * Reset vnet-protocol-version dependent functions to pre-v1.2. */ static void vgen_reset_vnet_proto_ops(vgen_ldc_t *ldcp) { vgen_hparams_t *lp = &ldcp->local_hparams; ldcp->tx = ldcp->tx_dringdata = vgen_dringsend; ldcp->rx_dringdata = vgen_handle_dringdata; ldcp->rx_pktdata = vgen_handle_pkt_data_nop; /* set xfer mode for vgen_send_attr_info() */ lp->xfer_mode = VIO_DRING_MODE_V1_0; } static void vgen_vlan_unaware_port_reset(vgen_port_t *portp) { vgen_ldc_t *ldcp = portp->ldcp; vgen_t *vgenp = portp->vgenp; vnet_t *vnetp = vgenp->vnetp; boolean_t need_reset = B_FALSE; mutex_enter(&ldcp->cblock); /* * If the peer is vlan_unaware(ver < 1.3), reset channel and terminate * the connection. See comments in vgen_set_vnet_proto_ops(). */ if (ldcp->hphase == VH_DONE && VGEN_VER_LT(ldcp, 1, 3) && (portp->nvids != 0 || portp->pvid != vnetp->pvid)) { need_reset = B_TRUE; } mutex_exit(&ldcp->cblock); if (need_reset == B_TRUE) { (void) vgen_ldc_reset(ldcp, VGEN_OTHER); } } static void vgen_port_reset(vgen_port_t *portp) { (void) vgen_ldc_reset(portp->ldcp, VGEN_OTHER); } static void vgen_reset_vlan_unaware_ports(vgen_t *vgenp) { vgen_port_t *portp; vgen_portlist_t *plistp; plistp = &(vgenp->vgenports); READ_ENTER(&plistp->rwlock); for (portp = plistp->headp; portp != NULL; portp = portp->nextp) { vgen_vlan_unaware_port_reset(portp); } RW_EXIT(&plistp->rwlock); } static void vgen_reset_vsw_port(vgen_t *vgenp) { vgen_port_t *portp; if ((portp = vgenp->vsw_portp) != NULL) { vgen_port_reset(portp); } } static void vgen_setup_handshake_params(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); /* * clear local handshake params and initialize. */ bzero(&(ldcp->local_hparams), sizeof (ldcp->local_hparams)); /* set version to the highest version supported */ ldcp->local_hparams.ver_major = ldcp->vgen_versions[0].ver_major; ldcp->local_hparams.ver_minor = ldcp->vgen_versions[0].ver_minor; ldcp->local_hparams.dev_class = VDEV_NETWORK; /* set attr_info params */ ldcp->local_hparams.mtu = vgenp->max_frame_size; ldcp->local_hparams.addr = vnet_macaddr_strtoul(vgenp->macaddr); ldcp->local_hparams.addr_type = ADDR_TYPE_MAC; ldcp->local_hparams.xfer_mode = VIO_DRING_MODE_V1_0; ldcp->local_hparams.ack_freq = 0; /* don't need acks */ ldcp->local_hparams.physlink_update = PHYSLINK_UPDATE_NONE; /* reset protocol version specific function pointers */ vgen_reset_vnet_proto_ops(ldcp); ldcp->local_hparams.dring_ident = 0; ldcp->local_hparams.dring_ready = B_FALSE; /* clear peer_hparams */ bzero(&(ldcp->peer_hparams), sizeof (ldcp->peer_hparams)); ldcp->peer_hparams.dring_ready = B_FALSE; } /* * Process Channel Reset. We tear down the resources (timers, threads, * descriptor rings etc) associated with the channel and reinitialize the * channel based on the flags. * * Arguments: * ldcp: The channel being processed. * * flags: * VGEN_FLAG_EVT_RESET: * A ECONNRESET error occured while doing ldc operations such as * ldc_read() or ldc_write(); the channel is already reset and it * needs to be handled. * VGEN_FLAG_NEED_LDCRESET: * Some other errors occured and the error handling code needs to * explicitly reset the channel and restart handshake with the * peer. The error could be either in ldc operations or other * parts of the code such as timeouts or mdeg events etc. * VGEN_FLAG_UNINIT: * The channel is being torn down; no need to bring up the channel * after resetting. */ static int vgen_process_reset(vgen_ldc_t *ldcp, int flags) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_port_t *portp = ldcp->portp; vgen_hparams_t *lp = &ldcp->local_hparams; boolean_t is_vsw_port = B_FALSE; boolean_t link_update = B_FALSE; ldc_status_t istatus; int rv; uint_t retries = 0; timeout_id_t htid = 0; timeout_id_t wd_tid = 0; if (portp == vgenp->vsw_portp) { /* vswitch port ? */ is_vsw_port = B_TRUE; } /* * Report that the channel is being reset; it ensures that any HybridIO * configuration is torn down before we reset the channel if it is not * already reset (flags == VGEN_FLAG_NEED_LDCRESET). */ if (is_vsw_port == B_TRUE) { vio_net_report_err_t rep_err = portp->vcb.vio_net_report_err; rep_err(portp->vhp, VIO_NET_RES_DOWN); } again: mutex_enter(&ldcp->cblock); /* Clear hstate and hphase */ ldcp->hstate = 0; ldcp->hphase = VH_PHASE0; if (flags == VGEN_FLAG_NEED_LDCRESET || flags == VGEN_FLAG_UNINIT) { DWARN(vgenp, ldcp, "Doing Channel Reset...\n"); (void) ldc_down(ldcp->ldc_handle); (void) ldc_status(ldcp->ldc_handle, &istatus); DWARN(vgenp, ldcp, "Reset Done, ldc_status(%d)\n", istatus); ldcp->ldc_status = istatus; if (flags == VGEN_FLAG_UNINIT) { /* disable further callbacks */ rv = ldc_set_cb_mode(ldcp->ldc_handle, LDC_CB_DISABLE); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_set_cb_mode failed\n"); } } } else { /* flags == VGEN_FLAG_EVT_RESET */ DWARN(vgenp, ldcp, "ldc status(%d)\n", ldcp->ldc_status); } /* * As the connection is now reset, mark the channel * link_state as 'down' and notify the stack if needed. */ if (ldcp->link_state != LINK_STATE_DOWN) { ldcp->link_state = LINK_STATE_DOWN; if (is_vsw_port == B_TRUE) { /* vswitch port ? */ /* * As the channel link is down, mark physical link also * as down. After the channel comes back up and * handshake completes, we will get an update on the * physlink state from vswitch (if this device has been * configured to get phys link updates). */ vgenp->phys_link_state = LINK_STATE_DOWN; link_update = B_TRUE; } } if (ldcp->htid != 0) { htid = ldcp->htid; ldcp->htid = 0; } if (ldcp->wd_tid != 0) { wd_tid = ldcp->wd_tid; ldcp->wd_tid = 0; } mutex_exit(&ldcp->cblock); /* Update link state to the stack */ if (link_update == B_TRUE) { vgen_link_update(vgenp, ldcp->link_state); } /* * As the channel is being reset, redirect traffic to the peer through * vswitch, until the channel becomes ready to be used again. */ if (is_vsw_port == B_FALSE && vgenp->vsw_portp != NULL) { (void) atomic_swap_32(&portp->use_vsw_port, B_TRUE); } /* Cancel handshake watchdog timeout */ if (htid) { (void) untimeout(htid); } /* Cancel transmit watchdog timeout */ if (wd_tid) { (void) untimeout(wd_tid); } /* Stop the msg worker thread */ if (lp->dring_mode == VIO_TX_DRING && curthread != ldcp->msg_thread) { vgen_stop_msg_thread(ldcp); } /* Grab all locks while we tear down tx/rx resources */ LDC_LOCK(ldcp); /* Destroy the local dring which is exported to the peer */ vgen_destroy_dring(ldcp); /* Unmap the remote dring which is imported from the peer */ vgen_unmap_dring(ldcp); /* * Bring up the channel and restart handshake * only if the channel is not being torn down. */ if (flags != VGEN_FLAG_UNINIT) { /* Setup handshake parameters to restart a new handshake */ vgen_setup_handshake_params(ldcp); /* Bring the channel up */ vgen_ldc_up(ldcp); if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { DWARN(vgenp, ldcp, "ldc_status err\n"); } else { ldcp->ldc_status = istatus; } /* If the channel is UP, start handshake */ if (ldcp->ldc_status == LDC_UP) { if (is_vsw_port == B_FALSE) { /* * Channel is up; use this port from now on. */ (void) atomic_swap_32(&portp->use_vsw_port, B_FALSE); } /* Initialize local session id */ ldcp->local_sid = ddi_get_lbolt(); /* clear peer session id */ ldcp->peer_sid = 0; /* * Initiate Handshake process with peer ldc endpoint by * sending version info vio message. If that fails we * go back to the top of this function to process the * error again. Note that we can be in this loop for * 'vgen_ldc_max_resets' times, after which the channel * is not brought up. */ mutex_exit(&ldcp->tclock); mutex_exit(&ldcp->txlock); mutex_exit(&ldcp->wrlock); mutex_exit(&ldcp->rxlock); rv = vgen_handshake(vh_nextphase(ldcp)); mutex_exit(&ldcp->cblock); if (rv != 0) { if (rv == ECONNRESET) { flags = VGEN_FLAG_EVT_RESET; } else { flags = VGEN_FLAG_NEED_LDCRESET; } /* * We still hold 'reset_in_progress'; so we can * just loop back to the top to restart error * processing. */ goto again; } } else { LDC_UNLOCK(ldcp); } } else { /* flags == VGEN_FLAG_UNINIT */ /* Close the channel - retry on EAGAIN */ while ((rv = ldc_close(ldcp->ldc_handle)) == EAGAIN) { if (++retries > vgen_ldccl_retries) { break; } drv_usecwait(VGEN_LDC_CLOSE_DELAY); } if (rv != 0) { cmn_err(CE_NOTE, "!vnet%d: Error(%d) closing the channel(0x%lx)\n", vgenp->instance, rv, ldcp->ldc_id); } ldcp->ldc_reset_count = 0; ldcp->ldc_status = LDC_INIT; ldcp->flags &= ~(CHANNEL_STARTED); LDC_UNLOCK(ldcp); } /* Done processing channel reset; clear the atomic flag */ ldcp->reset_in_progress = 0; return (0); } /* * Initiate handshake with the peer by sending various messages * based on the handshake-phase that the channel is currently in. */ static int vgen_handshake(vgen_ldc_t *ldcp) { uint32_t hphase = ldcp->hphase; vgen_t *vgenp = LDC_TO_VGEN(ldcp); int rv = 0; timeout_id_t htid; switch (hphase) { case VH_PHASE1: /* * start timer, for entire handshake process, turn this timer * off if all phases of handshake complete successfully and * hphase goes to VH_DONE(below) or channel is reset due to * errors or vgen_ldc_uninit() is invoked(vgen_stop). */ ASSERT(ldcp->htid == 0); ldcp->htid = timeout(vgen_hwatchdog, (caddr_t)ldcp, drv_usectohz(vgen_hwd_interval * MICROSEC)); /* Phase 1 involves negotiating the version */ rv = vgen_send_version_negotiate(ldcp); break; case VH_PHASE2: rv = vgen_handshake_phase2(ldcp); break; case VH_PHASE3: rv = vgen_handshake_phase3(ldcp); break; case VH_PHASE4: rv = vgen_send_rdx_info(ldcp); break; case VH_DONE: ldcp->ldc_reset_count = 0; DBG1(vgenp, ldcp, "Handshake Done\n"); /* * The channel is up and handshake is done successfully. Now we * can mark the channel link_state as 'up'. We also notify the * stack if the channel is connected to vswitch. */ ldcp->link_state = LINK_STATE_UP; if (ldcp->portp == vgenp->vsw_portp) { /* * If this channel(port) is connected to vsw, * need to sync multicast table with vsw. */ rv = vgen_send_mcast_info(ldcp); if (rv != VGEN_SUCCESS) break; if (vgenp->pls_negotiated == B_FALSE) { /* * We haven't negotiated with vswitch to get * physical link state updates. We can update * update the stack at this point as the * channel to vswitch is up and the handshake * is done successfully. * * If we have negotiated to get physical link * state updates, then we won't notify the * the stack here; we do that as soon as * vswitch sends us the initial phys link state * (see vgen_handle_physlink_info()). */ mutex_exit(&ldcp->cblock); vgen_link_update(vgenp, ldcp->link_state); mutex_enter(&ldcp->cblock); } } if (ldcp->htid != 0) { htid = ldcp->htid; ldcp->htid = 0; mutex_exit(&ldcp->cblock); (void) untimeout(htid); mutex_enter(&ldcp->cblock); } /* * Check if mac layer should be notified to restart * transmissions. This can happen if the channel got * reset and while tx_blocked is set. */ mutex_enter(&ldcp->tclock); if (ldcp->tx_blocked) { vio_net_tx_update_t vtx_update = ldcp->portp->vcb.vio_net_tx_update; ldcp->tx_blocked = B_FALSE; vtx_update(ldcp->portp->vhp); } mutex_exit(&ldcp->tclock); /* start transmit watchdog timer */ ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp, drv_usectohz(vgen_txwd_interval * 1000)); break; default: break; } return (rv); } /* * Check if the current handshake phase has completed successfully and * return the status. */ static int vgen_handshake_done(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); uint32_t hphase = ldcp->hphase; int status = 0; switch (hphase) { case VH_PHASE1: /* * Phase1 is done, if version negotiation * completed successfully. */ status = ((ldcp->hstate & VER_NEGOTIATED) == VER_NEGOTIATED); break; case VH_PHASE2: /* * Phase 2 is done, if attr info * has been exchanged successfully. */ status = ((ldcp->hstate & ATTR_INFO_EXCHANGED) == ATTR_INFO_EXCHANGED); break; case VH_PHASE3: /* * Phase 3 is done, if dring registration * has been exchanged successfully. */ status = ((ldcp->hstate & DRING_INFO_EXCHANGED) == DRING_INFO_EXCHANGED); break; case VH_PHASE4: /* Phase 4 is done, if rdx msg has been exchanged */ status = ((ldcp->hstate & RDX_EXCHANGED) == RDX_EXCHANGED); break; default: break; } if (status == 0) { return (VGEN_FAILURE); } DBG2(vgenp, ldcp, "PHASE(%d)\n", hphase); return (VGEN_SUCCESS); } /* * Link State Update Notes: * The link state of the channel connected to vswitch is reported as the link * state of the vnet device, by default. If the channel is down or reset, then * the link state is marked 'down'. If the channel is 'up' *and* handshake * between the vnet and vswitch is successful, then the link state is marked * 'up'. If physical network link state is desired, then the vnet device must * be configured to get physical link updates and the 'linkprop' property * in the virtual-device MD node indicates this. As part of attribute exchange * the vnet device negotiates with the vswitch to obtain physical link state * updates. If it successfully negotiates, vswitch sends an initial physlink * msg once the handshake is done and further whenever the physical link state * changes. Currently we don't have mac layer interfaces to report two distinct * link states - virtual and physical. Thus, if the vnet has been configured to * get physical link updates, then the link status will be reported as 'up' * only when both the virtual and physical links are up. */ static void vgen_link_update(vgen_t *vgenp, link_state_t link_state) { vnet_link_update(vgenp->vnetp, link_state); } /* * Handle a version info msg from the peer or an ACK/NACK from the peer * to a version info msg that we sent. */ static int vgen_handle_version_negotiate(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vgen_t *vgenp; vio_ver_msg_t *vermsg = (vio_ver_msg_t *)tagp; int ack = 0; int failed = 0; int idx; vgen_ver_t *versions = ldcp->vgen_versions; int rv = 0; vgenp = LDC_TO_VGEN(ldcp); DBG1(vgenp, ldcp, "enter\n"); switch (tagp->vio_subtype) { case VIO_SUBTYPE_INFO: /* Cache sid of peer if this is the first time */ if (ldcp->peer_sid == 0) { DBG2(vgenp, ldcp, "Caching peer_sid(%x)\n", tagp->vio_sid); ldcp->peer_sid = tagp->vio_sid; } if (ldcp->hphase != VH_PHASE1) { /* * If we are not already in VH_PHASE1, reset to * pre-handshake state, and initiate handshake * to the peer too. */ return (EINVAL); } ldcp->hstate |= VER_INFO_RCVD; /* save peer's requested values */ ldcp->peer_hparams.ver_major = vermsg->ver_major; ldcp->peer_hparams.ver_minor = vermsg->ver_minor; ldcp->peer_hparams.dev_class = vermsg->dev_class; if ((vermsg->dev_class != VDEV_NETWORK) && (vermsg->dev_class != VDEV_NETWORK_SWITCH)) { /* unsupported dev_class, send NACK */ DWARN(vgenp, ldcp, "Version Negotiation Failed\n"); tagp->vio_subtype = VIO_SUBTYPE_NACK; tagp->vio_sid = ldcp->local_sid; /* send reply msg back to peer */ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg), B_FALSE); if (rv != VGEN_SUCCESS) { return (rv); } return (VGEN_FAILURE); } DBG2(vgenp, ldcp, "VER_INFO_RCVD, ver(%d,%d)\n", vermsg->ver_major, vermsg->ver_minor); idx = 0; for (;;) { if (vermsg->ver_major > versions[idx].ver_major) { /* nack with next lower version */ tagp->vio_subtype = VIO_SUBTYPE_NACK; vermsg->ver_major = versions[idx].ver_major; vermsg->ver_minor = versions[idx].ver_minor; break; } if (vermsg->ver_major == versions[idx].ver_major) { /* major version match - ACK version */ tagp->vio_subtype = VIO_SUBTYPE_ACK; ack = 1; /* * lower minor version to the one this endpt * supports, if necessary */ if (vermsg->ver_minor > versions[idx].ver_minor) { vermsg->ver_minor = versions[idx].ver_minor; ldcp->peer_hparams.ver_minor = versions[idx].ver_minor; } break; } idx++; if (idx == VGEN_NUM_VER) { /* no version match - send NACK */ tagp->vio_subtype = VIO_SUBTYPE_NACK; vermsg->ver_major = 0; vermsg->ver_minor = 0; failed = 1; break; } } tagp->vio_sid = ldcp->local_sid; /* send reply msg back to peer */ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*vermsg), B_FALSE); if (rv != VGEN_SUCCESS) { return (rv); } if (ack) { ldcp->hstate |= VER_ACK_SENT; DBG2(vgenp, ldcp, "VER_ACK_SENT, ver(%d,%d) \n", vermsg->ver_major, vermsg->ver_minor); } if (failed) { DWARN(vgenp, ldcp, "Negotiation Failed\n"); return (VGEN_FAILURE); } if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { /* VER_ACK_SENT and VER_ACK_RCVD */ /* local and peer versions match? */ ASSERT((ldcp->local_hparams.ver_major == ldcp->peer_hparams.ver_major) && (ldcp->local_hparams.ver_minor == ldcp->peer_hparams.ver_minor)); vgen_set_vnet_proto_ops(ldcp); /* move to the next phase */ rv = vgen_handshake(vh_nextphase(ldcp)); if (rv != 0) { return (rv); } } break; case VIO_SUBTYPE_ACK: if (ldcp->hphase != VH_PHASE1) { /* This should not happen. */ DWARN(vgenp, ldcp, "Invalid Phase(%u)\n", ldcp->hphase); return (VGEN_FAILURE); } /* SUCCESS - we have agreed on a version */ ldcp->local_hparams.ver_major = vermsg->ver_major; ldcp->local_hparams.ver_minor = vermsg->ver_minor; ldcp->hstate |= VER_ACK_RCVD; DBG2(vgenp, ldcp, "VER_ACK_RCVD, ver(%d,%d) \n", vermsg->ver_major, vermsg->ver_minor); if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { /* VER_ACK_SENT and VER_ACK_RCVD */ /* local and peer versions match? */ ASSERT((ldcp->local_hparams.ver_major == ldcp->peer_hparams.ver_major) && (ldcp->local_hparams.ver_minor == ldcp->peer_hparams.ver_minor)); vgen_set_vnet_proto_ops(ldcp); /* move to the next phase */ rv = vgen_handshake(vh_nextphase(ldcp)); if (rv != 0) { return (rv); } } break; case VIO_SUBTYPE_NACK: if (ldcp->hphase != VH_PHASE1) { /* This should not happen. */ DWARN(vgenp, ldcp, "VER_NACK_RCVD Invalid " "Phase(%u)\n", ldcp->hphase); return (VGEN_FAILURE); } DBG2(vgenp, ldcp, "VER_NACK_RCVD next ver(%d,%d)\n", vermsg->ver_major, vermsg->ver_minor); /* check if version in NACK is zero */ if (vermsg->ver_major == 0 && vermsg->ver_minor == 0) { /* * Version Negotiation has failed. */ DWARN(vgenp, ldcp, "Version Negotiation Failed\n"); return (VGEN_FAILURE); } idx = 0; for (;;) { if (vermsg->ver_major > versions[idx].ver_major) { /* select next lower version */ ldcp->local_hparams.ver_major = versions[idx].ver_major; ldcp->local_hparams.ver_minor = versions[idx].ver_minor; break; } if (vermsg->ver_major == versions[idx].ver_major) { /* major version match */ ldcp->local_hparams.ver_major = versions[idx].ver_major; ldcp->local_hparams.ver_minor = versions[idx].ver_minor; break; } idx++; if (idx == VGEN_NUM_VER) { /* * no version match. * Version Negotiation has failed. */ DWARN(vgenp, ldcp, "Version Negotiation Failed\n"); return (VGEN_FAILURE); } } rv = vgen_send_version_negotiate(ldcp); if (rv != VGEN_SUCCESS) { return (rv); } break; } DBG1(vgenp, ldcp, "exit\n"); return (VGEN_SUCCESS); } static int vgen_handle_attr_info(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_hparams_t *lp = &ldcp->local_hparams; vgen_hparams_t *rp = &ldcp->peer_hparams; uint32_t mtu; uint8_t dring_mode; ldcp->hstate |= ATTR_INFO_RCVD; /* save peer's values */ rp->mtu = msg->mtu; rp->addr = msg->addr; rp->addr_type = msg->addr_type; rp->xfer_mode = msg->xfer_mode; rp->ack_freq = msg->ack_freq; rp->dring_mode = msg->options; /* * Process address type, ack frequency and transfer mode attributes. */ if ((msg->addr_type != ADDR_TYPE_MAC) || (msg->ack_freq > 64) || (msg->xfer_mode != lp->xfer_mode)) { return (VGEN_FAILURE); } /* * Process dring mode attribute. */ if (VGEN_VER_GTEQ(ldcp, 1, 6)) { /* * Versions >= 1.6: * Though we are operating in v1.6 mode, it is possible that * RxDringData mode has been disabled either on this guest or * on the peer guest. If so, we revert to pre v1.6 behavior of * TxDring mode. But this must be agreed upon in both * directions of attr exchange. We first determine the mode * that can be negotiated. */ if ((msg->options & VIO_RX_DRING_DATA) != 0 && vgen_dring_mode == VIO_RX_DRING_DATA) { /* * We are capable of handling RxDringData AND the peer * is also capable of it; we enable RxDringData mode on * this channel. */ dring_mode = VIO_RX_DRING_DATA; } else if ((msg->options & VIO_TX_DRING) != 0) { /* * If the peer is capable of TxDring mode, we * negotiate TxDring mode on this channel. */ dring_mode = VIO_TX_DRING; } else { /* * We support only VIO_TX_DRING and VIO_RX_DRING_DATA * modes. We don't support VIO_RX_DRING mode. */ return (VGEN_FAILURE); } /* * If we have received an ack for the attr info that we sent, * then check if the dring mode matches what the peer had ack'd * (saved in local hparams). If they don't match, we fail the * handshake. */ if (ldcp->hstate & ATTR_ACK_RCVD) { if (msg->options != lp->dring_mode) { /* send NACK */ return (VGEN_FAILURE); } } else { /* * Save the negotiated dring mode in our attr * parameters, so it gets sent in the attr info from us * to the peer. */ lp->dring_mode = dring_mode; } /* save the negotiated dring mode in the msg to be replied */ msg->options = dring_mode; } /* * Process MTU attribute. */ if (VGEN_VER_GTEQ(ldcp, 1, 4)) { /* * Versions >= 1.4: * Validate mtu of the peer is at least ETHERMAX. Then, the mtu * is negotiated down to the minimum of our mtu and peer's mtu. */ if (msg->mtu < ETHERMAX) { return (VGEN_FAILURE); } mtu = MIN(msg->mtu, vgenp->max_frame_size); /* * If we have received an ack for the attr info * that we sent, then check if the mtu computed * above matches the mtu that the peer had ack'd * (saved in local hparams). If they don't * match, we fail the handshake. */ if (ldcp->hstate & ATTR_ACK_RCVD) { if (mtu != lp->mtu) { /* send NACK */ return (VGEN_FAILURE); } } else { /* * Save the mtu computed above in our * attr parameters, so it gets sent in * the attr info from us to the peer. */ lp->mtu = mtu; } /* save the MIN mtu in the msg to be replied */ msg->mtu = mtu; } else { /* versions < 1.4, mtu must match */ if (msg->mtu != lp->mtu) { return (VGEN_FAILURE); } } return (VGEN_SUCCESS); } static int vgen_handle_attr_ack(vgen_ldc_t *ldcp, vnet_attr_msg_t *msg) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_hparams_t *lp = &ldcp->local_hparams; /* * Process dring mode attribute. */ if (VGEN_VER_GTEQ(ldcp, 1, 6)) { /* * Versions >= 1.6: * The ack msg sent by the peer contains the negotiated dring * mode between our capability (that we had sent in our attr * info) and the peer's capability. */ if (ldcp->hstate & ATTR_ACK_SENT) { /* * If we have sent an ack for the attr info msg from * the peer, check if the dring mode that was * negotiated then (saved in local hparams) matches the * mode that the peer has ack'd. If they don't match, * we fail the handshake. */ if (lp->dring_mode != msg->options) { return (VGEN_FAILURE); } } else { if ((msg->options & lp->dring_mode) == 0) { /* * Peer ack'd with a mode that we don't * support; we fail the handshake. */ return (VGEN_FAILURE); } if ((msg->options & (VIO_TX_DRING|VIO_RX_DRING_DATA)) == (VIO_TX_DRING|VIO_RX_DRING_DATA)) { /* * Peer must ack with only one negotiated mode. * Otherwise fail handshake. */ return (VGEN_FAILURE); } /* * Save the negotiated mode, so we can validate it when * we receive attr info from the peer. */ lp->dring_mode = msg->options; } } /* * Process Physical Link Update attribute. */ if (VGEN_VER_GTEQ(ldcp, 1, 5) && ldcp->portp == vgenp->vsw_portp) { /* * Versions >= 1.5: * If the vnet device has been configured to get * physical link state updates, check the corresponding * bits in the ack msg, if the peer is vswitch. */ if (((lp->physlink_update & PHYSLINK_UPDATE_STATE_MASK) == PHYSLINK_UPDATE_STATE) && ((msg->physlink_update & PHYSLINK_UPDATE_STATE_MASK) == PHYSLINK_UPDATE_STATE_ACK)) { vgenp->pls_negotiated = B_TRUE; } else { vgenp->pls_negotiated = B_FALSE; } } /* * Process MTU attribute. */ if (VGEN_VER_GTEQ(ldcp, 1, 4)) { /* * Versions >= 1.4: * The ack msg sent by the peer contains the minimum of * our mtu (that we had sent in our attr info) and the * peer's mtu. * * If we have sent an ack for the attr info msg from * the peer, check if the mtu that was computed then * (saved in local hparams) matches the mtu that the * peer has ack'd. If they don't match, we fail the * handshake. */ if (ldcp->hstate & ATTR_ACK_SENT) { if (lp->mtu != msg->mtu) { return (VGEN_FAILURE); } } else { /* * If the mtu ack'd by the peer is > our mtu * fail handshake. Otherwise, save the mtu, so * we can validate it when we receive attr info * from our peer. */ if (msg->mtu > lp->mtu) { return (VGEN_FAILURE); } if (msg->mtu <= lp->mtu) { lp->mtu = msg->mtu; } } } return (VGEN_SUCCESS); } /* * Handle an attribute info msg from the peer or an ACK/NACK from the peer * to an attr info msg that we sent. */ static int vgen_handle_attr_msg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vnet_attr_msg_t *msg = (vnet_attr_msg_t *)tagp; int rv = 0; DBG1(vgenp, ldcp, "enter\n"); if (ldcp->hphase != VH_PHASE2) { DWARN(vgenp, ldcp, "Rcvd ATTR_INFO subtype(%d)," " Invalid Phase(%u)\n", tagp->vio_subtype, ldcp->hphase); return (VGEN_FAILURE); } switch (tagp->vio_subtype) { case VIO_SUBTYPE_INFO: rv = vgen_handle_attr_info(ldcp, msg); if (rv == VGEN_SUCCESS) { tagp->vio_subtype = VIO_SUBTYPE_ACK; } else { tagp->vio_subtype = VIO_SUBTYPE_NACK; } tagp->vio_sid = ldcp->local_sid; /* send reply msg back to peer */ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msg), B_FALSE); if (rv != VGEN_SUCCESS) { return (rv); } if (tagp->vio_subtype == VIO_SUBTYPE_NACK) { DWARN(vgenp, ldcp, "ATTR_NACK_SENT"); break; } ldcp->hstate |= ATTR_ACK_SENT; DBG2(vgenp, ldcp, "ATTR_ACK_SENT \n"); if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { rv = vgen_handshake(vh_nextphase(ldcp)); if (rv != 0) { return (rv); } } break; case VIO_SUBTYPE_ACK: rv = vgen_handle_attr_ack(ldcp, msg); if (rv == VGEN_FAILURE) { break; } ldcp->hstate |= ATTR_ACK_RCVD; DBG2(vgenp, ldcp, "ATTR_ACK_RCVD \n"); if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { rv = vgen_handshake(vh_nextphase(ldcp)); if (rv != 0) { return (rv); } } break; case VIO_SUBTYPE_NACK: DBG2(vgenp, ldcp, "ATTR_NACK_RCVD \n"); return (VGEN_FAILURE); } DBG1(vgenp, ldcp, "exit\n"); return (VGEN_SUCCESS); } static int vgen_handle_dring_reg_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { int rv = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_hparams_t *lp = &ldcp->local_hparams; DBG2(vgenp, ldcp, "DRING_INFO_RCVD"); ldcp->hstate |= DRING_INFO_RCVD; if (VGEN_VER_GTEQ(ldcp, 1, 6) && (lp->dring_mode != ((vio_dring_reg_msg_t *)tagp)->options)) { /* * The earlier version of Solaris vnet driver doesn't set the * option (VIO_TX_DRING in its case) correctly in its dring reg * message. We workaround that here by doing the check only * for versions >= v1.6. */ DWARN(vgenp, ldcp, "Rcvd dring reg option (%d), negotiated mode (%d)\n", ((vio_dring_reg_msg_t *)tagp)->options, lp->dring_mode); return (VGEN_FAILURE); } /* * Map dring exported by the peer. */ rv = vgen_map_dring(ldcp, (void *)tagp); if (rv != VGEN_SUCCESS) { return (rv); } /* * Map data buffers exported by the peer if we are in RxDringData mode. */ if (lp->dring_mode == VIO_RX_DRING_DATA) { rv = vgen_map_data(ldcp, (void *)tagp); if (rv != VGEN_SUCCESS) { vgen_unmap_dring(ldcp); return (rv); } } if (ldcp->peer_hparams.dring_ready == B_FALSE) { ldcp->peer_hparams.dring_ready = B_TRUE; } return (VGEN_SUCCESS); } static int vgen_handle_dring_reg_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_hparams_t *lp = &ldcp->local_hparams; DBG2(vgenp, ldcp, "DRING_ACK_RCVD"); ldcp->hstate |= DRING_ACK_RCVD; if (lp->dring_ready) { return (VGEN_SUCCESS); } /* save dring_ident acked by peer */ lp->dring_ident = ((vio_dring_reg_msg_t *)tagp)->dring_ident; /* local dring is now ready */ lp->dring_ready = B_TRUE; return (VGEN_SUCCESS); } /* * Handle a descriptor ring register msg from the peer or an ACK/NACK from * the peer to a dring register msg that we sent. */ static int vgen_handle_dring_reg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); int rv = 0; int msgsize; vgen_hparams_t *lp = &ldcp->local_hparams; DBG1(vgenp, ldcp, "enter\n"); if (ldcp->hphase < VH_PHASE2) { /* dring_info can be rcvd in any of the phases after Phase1 */ DWARN(vgenp, ldcp, "Rcvd DRING_INFO Subtype (%d), Invalid Phase(%u)\n", tagp->vio_subtype, ldcp->hphase); return (VGEN_FAILURE); } switch (tagp->vio_subtype) { case VIO_SUBTYPE_INFO: rv = vgen_handle_dring_reg_info(ldcp, tagp); if (rv == VGEN_SUCCESS) { tagp->vio_subtype = VIO_SUBTYPE_ACK; } else { tagp->vio_subtype = VIO_SUBTYPE_NACK; } tagp->vio_sid = ldcp->local_sid; if (lp->dring_mode == VIO_RX_DRING_DATA) { msgsize = VNET_DRING_REG_EXT_MSG_SIZE(ldcp->tx_data_ncookies); } else { msgsize = sizeof (vio_dring_reg_msg_t); } /* send reply msg back to peer */ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, msgsize, B_FALSE); if (rv != VGEN_SUCCESS) { return (rv); } if (tagp->vio_subtype == VIO_SUBTYPE_NACK) { DWARN(vgenp, ldcp, "DRING_NACK_SENT"); return (VGEN_FAILURE); } ldcp->hstate |= DRING_ACK_SENT; DBG2(vgenp, ldcp, "DRING_ACK_SENT"); if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { rv = vgen_handshake(vh_nextphase(ldcp)); if (rv != 0) { return (rv); } } break; case VIO_SUBTYPE_ACK: rv = vgen_handle_dring_reg_ack(ldcp, tagp); if (rv == VGEN_FAILURE) { return (rv); } if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { rv = vgen_handshake(vh_nextphase(ldcp)); if (rv != 0) { return (rv); } } break; case VIO_SUBTYPE_NACK: DWARN(vgenp, ldcp, "DRING_NACK_RCVD"); return (VGEN_FAILURE); } DBG1(vgenp, ldcp, "exit\n"); return (VGEN_SUCCESS); } /* * Handle a rdx info msg from the peer or an ACK/NACK * from the peer to a rdx info msg that we sent. */ static int vgen_handle_rdx_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { int rv = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); DBG1(vgenp, ldcp, "enter\n"); if (ldcp->hphase != VH_PHASE4) { DWARN(vgenp, ldcp, "Rcvd RDX_INFO Subtype (%d), Invalid Phase(%u)\n", tagp->vio_subtype, ldcp->hphase); return (VGEN_FAILURE); } switch (tagp->vio_subtype) { case VIO_SUBTYPE_INFO: DBG2(vgenp, ldcp, "RDX_INFO_RCVD \n"); ldcp->hstate |= RDX_INFO_RCVD; tagp->vio_subtype = VIO_SUBTYPE_ACK; tagp->vio_sid = ldcp->local_sid; /* send reply msg back to peer */ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vio_rdx_msg_t), B_FALSE); if (rv != VGEN_SUCCESS) { return (rv); } ldcp->hstate |= RDX_ACK_SENT; DBG2(vgenp, ldcp, "RDX_ACK_SENT \n"); if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { rv = vgen_handshake(vh_nextphase(ldcp)); if (rv != 0) { return (rv); } } break; case VIO_SUBTYPE_ACK: ldcp->hstate |= RDX_ACK_RCVD; DBG2(vgenp, ldcp, "RDX_ACK_RCVD \n"); if (vgen_handshake_done(ldcp) == VGEN_SUCCESS) { rv = vgen_handshake(vh_nextphase(ldcp)); if (rv != 0) { return (rv); } } break; case VIO_SUBTYPE_NACK: DBG2(vgenp, ldcp, "RDX_NACK_RCVD \n"); return (VGEN_FAILURE); } DBG1(vgenp, ldcp, "exit\n"); return (VGEN_SUCCESS); } /* Handle ACK/NACK from vsw to a set multicast msg that we sent */ static int vgen_handle_mcast_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vnet_mcast_msg_t *msgp = (vnet_mcast_msg_t *)tagp; struct ether_addr *addrp; int count; int i; DBG1(vgenp, ldcp, "enter\n"); switch (tagp->vio_subtype) { case VIO_SUBTYPE_INFO: /* vnet shouldn't recv set mcast msg, only vsw handles it */ DWARN(vgenp, ldcp, "rcvd SET_MCAST_INFO \n"); break; case VIO_SUBTYPE_ACK: /* success adding/removing multicast addr */ DBG1(vgenp, ldcp, "rcvd SET_MCAST_ACK \n"); break; case VIO_SUBTYPE_NACK: DWARN(vgenp, ldcp, "rcvd SET_MCAST_NACK \n"); if (!(msgp->set)) { /* multicast remove request failed */ break; } /* multicast add request failed */ for (count = 0; count < msgp->count; count++) { addrp = &(msgp->mca[count]); /* delete address from the table */ for (i = 0; i < vgenp->mccount; i++) { if (ether_cmp(addrp, &(vgenp->mctab[i])) == 0) { if (vgenp->mccount > 1) { int t = vgenp->mccount - 1; vgenp->mctab[i] = vgenp->mctab[t]; } vgenp->mccount--; break; } } } break; } DBG1(vgenp, ldcp, "exit\n"); return (VGEN_SUCCESS); } /* * Physical link information message from the peer. Only vswitch should send * us this message; if the vnet device has been configured to get physical link * state updates. Note that we must have already negotiated this with the * vswitch during attribute exchange phase of handshake. */ static int vgen_handle_physlink_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vnet_physlink_msg_t *msgp = (vnet_physlink_msg_t *)tagp; link_state_t link_state; int rv; if (ldcp->portp != vgenp->vsw_portp) { /* * drop the message and don't process; as we should * receive physlink_info message from only vswitch. */ return (VGEN_SUCCESS); } if (vgenp->pls_negotiated == B_FALSE) { /* * drop the message and don't process; as we should receive * physlink_info message only if physlink update is enabled for * the device and negotiated with vswitch. */ return (VGEN_SUCCESS); } switch (tagp->vio_subtype) { case VIO_SUBTYPE_INFO: if ((msgp->physlink_info & VNET_PHYSLINK_STATE_MASK) == VNET_PHYSLINK_STATE_UP) { link_state = LINK_STATE_UP; } else { link_state = LINK_STATE_DOWN; } if (vgenp->phys_link_state != link_state) { vgenp->phys_link_state = link_state; mutex_exit(&ldcp->cblock); /* Now update the stack */ vgen_link_update(vgenp, link_state); mutex_enter(&ldcp->cblock); } tagp->vio_subtype = VIO_SUBTYPE_ACK; tagp->vio_sid = ldcp->local_sid; /* send reply msg back to peer */ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (vnet_physlink_msg_t), B_FALSE); if (rv != VGEN_SUCCESS) { return (rv); } break; case VIO_SUBTYPE_ACK: /* vnet shouldn't recv physlink acks */ DWARN(vgenp, ldcp, "rcvd PHYSLINK_ACK \n"); break; case VIO_SUBTYPE_NACK: /* vnet shouldn't recv physlink nacks */ DWARN(vgenp, ldcp, "rcvd PHYSLINK_NACK \n"); break; } DBG1(vgenp, ldcp, "exit\n"); return (VGEN_SUCCESS); } /* handler for control messages received from the peer ldc end-point */ static int vgen_handle_ctrlmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { int rv = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); DBG1(vgenp, ldcp, "enter\n"); switch (tagp->vio_subtype_env) { case VIO_VER_INFO: rv = vgen_handle_version_negotiate(ldcp, tagp); break; case VIO_ATTR_INFO: rv = vgen_handle_attr_msg(ldcp, tagp); break; case VIO_DRING_REG: rv = vgen_handle_dring_reg(ldcp, tagp); break; case VIO_RDX: rv = vgen_handle_rdx_info(ldcp, tagp); break; case VNET_MCAST_INFO: rv = vgen_handle_mcast_info(ldcp, tagp); break; case VIO_DDS_INFO: /* * If we are in the process of resetting the vswitch channel, * drop the dds message. A new handshake will be initiated * when the channel comes back up after the reset and dds * negotiation can then continue. */ if (ldcp->reset_in_progress == 1) { break; } rv = vgen_dds_rx(ldcp, tagp); break; case VNET_PHYSLINK_INFO: rv = vgen_handle_physlink_info(ldcp, tagp); break; } DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); return (rv); } /* handler for error messages received from the peer ldc end-point */ static void vgen_handle_errmsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { _NOTE(ARGUNUSED(ldcp, tagp)) } /* * This function handles raw pkt data messages received over the channel. * Currently, only priority-eth-type frames are received through this mechanism. * In this case, the frame(data) is present within the message itself which * is copied into an mblk before sending it up the stack. */ void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen) { vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1; vio_raw_data_msg_t *pkt = (vio_raw_data_msg_t *)arg2; uint32_t size; mblk_t *mp; vio_mblk_t *vmp; vio_net_rx_cb_t vrx_cb = NULL; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_stats_t *statsp = &ldcp->stats; vgen_hparams_t *lp = &ldcp->local_hparams; uint_t dring_mode = lp->dring_mode; ASSERT(MUTEX_HELD(&ldcp->cblock)); mutex_exit(&ldcp->cblock); size = msglen - VIO_PKT_DATA_HDRSIZE; if (size < ETHERMIN || size > lp->mtu) { (void) atomic_inc_32(&statsp->rx_pri_fail); mutex_enter(&ldcp->cblock); return; } vmp = vio_multipool_allocb(&ldcp->vmp, size); if (vmp == NULL) { mp = allocb(size, BPRI_MED); if (mp == NULL) { (void) atomic_inc_32(&statsp->rx_pri_fail); DWARN(vgenp, ldcp, "allocb failure, " "unable to process priority frame\n"); mutex_enter(&ldcp->cblock); return; } } else { mp = vmp->mp; } /* copy the frame from the payload of raw data msg into the mblk */ bcopy(pkt->data, mp->b_rptr, size); mp->b_wptr = mp->b_rptr + size; if (vmp != NULL) { vmp->state = VIO_MBLK_HAS_DATA; } /* update stats */ (void) atomic_inc_64(&statsp->rx_pri_packets); (void) atomic_add_64(&statsp->rx_pri_bytes, size); /* * If polling is currently enabled, add the packet to the priority * packets list and return. It will be picked up by the polling thread. */ if (dring_mode == VIO_RX_DRING_DATA) { mutex_enter(&ldcp->rxlock); } else { mutex_enter(&ldcp->pollq_lock); } if (ldcp->polling_on == B_TRUE) { if (ldcp->rx_pri_tail != NULL) { ldcp->rx_pri_tail->b_next = mp; } else { ldcp->rx_pri_head = ldcp->rx_pri_tail = mp; } } else { vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; } if (dring_mode == VIO_RX_DRING_DATA) { mutex_exit(&ldcp->rxlock); } else { mutex_exit(&ldcp->pollq_lock); } if (vrx_cb != NULL) { vrx_cb(ldcp->portp->vhp, mp); } mutex_enter(&ldcp->cblock); } /* * dummy pkt data handler function for vnet protocol version 1.0 */ static void vgen_handle_pkt_data_nop(void *arg1, void *arg2, uint32_t msglen) { _NOTE(ARGUNUSED(arg1, arg2, msglen)) } /* handler for data messages received from the peer ldc end-point */ static int vgen_handle_datamsg(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t msglen) { int rv = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_hparams_t *lp = &ldcp->local_hparams; DBG1(vgenp, ldcp, "enter\n"); if (ldcp->hphase != VH_DONE) { return (0); } /* * We check the data msg seqnum. This is needed only in TxDring mode. */ if (lp->dring_mode == VIO_TX_DRING && tagp->vio_subtype == VIO_SUBTYPE_INFO) { rv = vgen_check_datamsg_seq(ldcp, tagp); if (rv != 0) { return (rv); } } switch (tagp->vio_subtype_env) { case VIO_DRING_DATA: rv = ldcp->rx_dringdata((void *)ldcp, (void *)tagp); break; case VIO_PKT_DATA: ldcp->rx_pktdata((void *)ldcp, (void *)tagp, msglen); break; default: break; } DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); return (rv); } static int vgen_ldc_reset(vgen_ldc_t *ldcp, vgen_caller_t caller) { int rv; if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) { ASSERT(MUTEX_HELD(&ldcp->cblock)); } /* Set the flag to indicate reset is in progress */ if (atomic_cas_uint(&ldcp->reset_in_progress, 0, 1) != 0) { /* another thread is already in the process of resetting */ return (EBUSY); } if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) { mutex_exit(&ldcp->cblock); } rv = vgen_process_reset(ldcp, VGEN_FLAG_NEED_LDCRESET); if (caller == VGEN_LDC_CB || caller == VGEN_MSG_THR) { mutex_enter(&ldcp->cblock); } return (rv); } static void vgen_ldc_up(vgen_ldc_t *ldcp) { int rv; uint32_t retries = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); ASSERT(MUTEX_HELD(&ldcp->cblock)); /* * If the channel has been reset max # of times, without successfully * completing handshake, stop and do not bring the channel up. */ if (ldcp->ldc_reset_count == vgen_ldc_max_resets) { cmn_err(CE_WARN, "!vnet%d: exceeded number of permitted" " handshake attempts (%d) on channel %ld", vgenp->instance, vgen_ldc_max_resets, ldcp->ldc_id); return; } ldcp->ldc_reset_count++; do { rv = ldc_up(ldcp->ldc_handle); if ((rv != 0) && (rv == EWOULDBLOCK)) { drv_usecwait(VGEN_LDC_UP_DELAY); } if (retries++ >= vgen_ldcup_retries) break; } while (rv == EWOULDBLOCK); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_up err rv(%d)\n", rv); } } int vgen_enable_intr(void *arg) { uint32_t end_ix; vio_dring_msg_t msg; vgen_port_t *portp = (vgen_port_t *)arg; vgen_ldc_t *ldcp = portp->ldcp; vgen_hparams_t *lp = &ldcp->local_hparams; if (lp->dring_mode == VIO_RX_DRING_DATA) { mutex_enter(&ldcp->rxlock); ldcp->polling_on = B_FALSE; /* * We send a stopped message to peer (sender) as we are turning * off polled mode. This effectively restarts data interrupts * by allowing the peer to send further dring data msgs to us. */ end_ix = ldcp->next_rxi; DECR_RXI(end_ix, ldcp); msg.dring_ident = ldcp->peer_hparams.dring_ident; (void) vgen_send_dringack_shm(ldcp, (vio_msg_tag_t *)&msg, VNET_START_IDX_UNSPEC, end_ix, VIO_DP_STOPPED); mutex_exit(&ldcp->rxlock); } else { mutex_enter(&ldcp->pollq_lock); ldcp->polling_on = B_FALSE; mutex_exit(&ldcp->pollq_lock); } return (0); } int vgen_disable_intr(void *arg) { vgen_port_t *portp = (vgen_port_t *)arg; vgen_ldc_t *ldcp = portp->ldcp; vgen_hparams_t *lp = &ldcp->local_hparams; if (lp->dring_mode == VIO_RX_DRING_DATA) { mutex_enter(&ldcp->rxlock); ldcp->polling_on = B_TRUE; mutex_exit(&ldcp->rxlock); } else { mutex_enter(&ldcp->pollq_lock); ldcp->polling_on = B_TRUE; mutex_exit(&ldcp->pollq_lock); } return (0); } mblk_t * vgen_rx_poll(void *arg, int bytes_to_pickup) { vgen_port_t *portp = (vgen_port_t *)arg; vgen_ldc_t *ldcp = portp->ldcp; vgen_hparams_t *lp = &ldcp->local_hparams; mblk_t *mp = NULL; if (lp->dring_mode == VIO_RX_DRING_DATA) { mp = vgen_poll_rcv_shm(ldcp, bytes_to_pickup); } else { mp = vgen_poll_rcv(ldcp, bytes_to_pickup); } return (mp); } /* transmit watchdog timeout handler */ static void vgen_tx_watchdog(void *arg) { vgen_ldc_t *ldcp; vgen_t *vgenp; int rv; boolean_t tx_blocked; clock_t tx_blocked_lbolt; ldcp = (vgen_ldc_t *)arg; vgenp = LDC_TO_VGEN(ldcp); tx_blocked = ldcp->tx_blocked; tx_blocked_lbolt = ldcp->tx_blocked_lbolt; if (vgen_txwd_timeout && (tx_blocked == B_TRUE) && ((ddi_get_lbolt() - tx_blocked_lbolt) > drv_usectohz(vgen_txwd_timeout * 1000))) { /* * Something is wrong; the peer is not picking up the packets * in the transmit dring. We now go ahead and reset the channel * to break out of this condition. */ DWARN(vgenp, ldcp, "transmit timeout lbolt(%lx), " "tx_blocked_lbolt(%lx)\n", ddi_get_lbolt(), tx_blocked_lbolt); #ifdef DEBUG if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) { /* tx timeout triggered for debugging */ vgen_inject_err_flag &= ~(VGEN_ERR_TXTIMEOUT); } #endif /* * Clear tid before invoking vgen_ldc_reset(). Otherwise, * it will result in a deadlock when vgen_process_reset() tries * to untimeout() on seeing a non-zero tid, but it is being * invoked by the timer itself in this case. */ mutex_enter(&ldcp->cblock); if (ldcp->wd_tid == 0) { /* Cancelled by vgen_process_reset() */ mutex_exit(&ldcp->cblock); return; } ldcp->wd_tid = 0; mutex_exit(&ldcp->cblock); /* * Now reset the channel. */ rv = vgen_ldc_reset(ldcp, VGEN_OTHER); if (rv == 0) { /* * We have successfully reset the channel. If we are * in tx flow controlled state, clear it now and enable * transmit in the upper layer. */ if (ldcp->tx_blocked) { vio_net_tx_update_t vtx_update = ldcp->portp->vcb.vio_net_tx_update; ldcp->tx_blocked = B_FALSE; vtx_update(ldcp->portp->vhp); } } /* * Channel has been reset by us or some other thread is already * in the process of resetting. In either case, we return * without restarting the timer. When handshake completes and * the channel is ready for data transmit/receive we start a * new watchdog timer. */ return; } restart_timer: /* Restart the timer */ mutex_enter(&ldcp->cblock); if (ldcp->wd_tid == 0) { /* Cancelled by vgen_process_reset() */ mutex_exit(&ldcp->cblock); return; } ldcp->wd_tid = timeout(vgen_tx_watchdog, (caddr_t)ldcp, drv_usectohz(vgen_txwd_interval * 1000)); mutex_exit(&ldcp->cblock); } /* Handshake watchdog timeout handler */ static void vgen_hwatchdog(void *arg) { vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; vgen_t *vgenp = LDC_TO_VGEN(ldcp); DWARN(vgenp, ldcp, "handshake timeout phase(%x) state(%x)\n", ldcp->hphase, ldcp->hstate); mutex_enter(&ldcp->cblock); if (ldcp->htid == 0) { /* Cancelled by vgen_process_reset() */ mutex_exit(&ldcp->cblock); return; } ldcp->htid = 0; mutex_exit(&ldcp->cblock); /* * Something is wrong; handshake with the peer seems to be hung. We now * go ahead and reset the channel to break out of this condition. */ (void) vgen_ldc_reset(ldcp, VGEN_OTHER); } /* Check if the session id in the received message is valid */ static int vgen_check_sid(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); if (tagp->vio_sid != ldcp->peer_sid) { DWARN(vgenp, ldcp, "sid mismatch: expected(%x), rcvd(%x)\n", ldcp->peer_sid, tagp->vio_sid); return (VGEN_FAILURE); } else return (VGEN_SUCCESS); } /* * Initialize the common part of dring registration * message; used in both TxDring and RxDringData modes. */ static void vgen_init_dring_reg_msg(vgen_ldc_t *ldcp, vio_dring_reg_msg_t *msg, uint8_t option) { vio_msg_tag_t *tagp; tagp = &msg->tag; tagp->vio_msgtype = VIO_TYPE_CTRL; tagp->vio_subtype = VIO_SUBTYPE_INFO; tagp->vio_subtype_env = VIO_DRING_REG; tagp->vio_sid = ldcp->local_sid; /* get dring info msg payload from ldcp->local */ bcopy(&(ldcp->local_hparams.dring_cookie), &(msg->cookie[0]), sizeof (ldc_mem_cookie_t)); msg->ncookies = ldcp->local_hparams.dring_ncookies; msg->num_descriptors = ldcp->local_hparams.num_desc; msg->descriptor_size = ldcp->local_hparams.desc_size; msg->options = option; /* * dring_ident is set to 0. After mapping the dring, peer sets this * value and sends it in the ack, which is saved in * vgen_handle_dring_reg(). */ msg->dring_ident = 0; } #if DEBUG /* * Print debug messages - set to 0xf to enable all msgs */ void vgen_debug_printf(const char *fname, vgen_t *vgenp, vgen_ldc_t *ldcp, const char *fmt, ...) { char buf[256]; char *bufp = buf; va_list ap; if ((vgenp != NULL) && (vgenp->vnetp != NULL)) { (void) sprintf(bufp, "vnet%d:", ((vnet_t *)(vgenp->vnetp))->instance); bufp += strlen(bufp); } if (ldcp != NULL) { (void) sprintf(bufp, "ldc(%ld):", ldcp->ldc_id); bufp += strlen(bufp); } (void) sprintf(bufp, "%s: ", fname); bufp += strlen(bufp); va_start(ap, fmt); (void) vsprintf(bufp, fmt, ap); va_end(ap); if ((ldcp == NULL) ||(vgendbg_ldcid == -1) || (vgendbg_ldcid == ldcp->ldc_id)) { cmn_err(CE_CONT, "%s\n", buf); } } #endif #ifdef VNET_IOC_DEBUG static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp) { struct iocblk *iocp; vgen_port_t *portp; enum ioc_reply { IOC_INVAL = -1, /* bad, NAK with EINVAL */ IOC_ACK /* OK, just send ACK */ } status; int rv; iocp = (struct iocblk *)(uintptr_t)mp->b_rptr; iocp->ioc_error = 0; portp = (vgen_port_t *)arg; if (portp == NULL) { status = IOC_INVAL; goto vgen_ioc_exit; } mutex_enter(&portp->lock); switch (iocp->ioc_cmd) { case VNET_FORCE_LINK_DOWN: case VNET_FORCE_LINK_UP: rv = vgen_force_link_state(portp, iocp->ioc_cmd); (rv == 0) ? (status = IOC_ACK) : (status = IOC_INVAL); break; default: status = IOC_INVAL; break; } mutex_exit(&portp->lock); vgen_ioc_exit: switch (status) { default: case IOC_INVAL: /* Error, reply with a NAK and EINVAL error */ miocnak(q, mp, 0, EINVAL); break; case IOC_ACK: /* OK, reply with an ACK */ miocack(q, mp, 0, 0); break; } } static int vgen_force_link_state(vgen_port_t *portp, int cmd) { ldc_status_t istatus; int rv; vgen_ldc_t *ldcp = portp->ldcp; vgen_t *vgenp = portp->vgenp; mutex_enter(&ldcp->cblock); switch (cmd) { case VNET_FORCE_LINK_DOWN: (void) ldc_down(ldcp->ldc_handle); ldcp->link_down_forced = B_TRUE; break; case VNET_FORCE_LINK_UP: vgen_ldc_up(ldcp); ldcp->link_down_forced = B_FALSE; if (ldc_status(ldcp->ldc_handle, &istatus) != 0) { DWARN(vgenp, ldcp, "ldc_status err\n"); } else { ldcp->ldc_status = istatus; } /* if channel is already UP - restart handshake */ if (ldcp->ldc_status == LDC_UP) { vgen_handle_evt_up(ldcp); } break; } mutex_exit(&ldcp->cblock); return (0); } #else static void vgen_ioctl(void *arg, queue_t *q, mblk_t *mp) { vgen_port_t *portp; portp = (vgen_port_t *)arg; if (portp == NULL) { miocnak(q, mp, 0, EINVAL); return; } miocnak(q, mp, 0, ENOTSUP); } #endif