/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * This header file contains the basic data structures which the * virtual switch (vsw) uses to communicate with its clients and * the outside world. * * The virtual switch reads the machine description (MD) to * determine how many port_t structures to create (each port_t * can support communications to a single network device). The * port_t's are maintained in a linked list. * * Each port in turn contains a number of logical domain channels * (ldc's) which are inter domain communications channels which * are used for passing small messages between the domains. Their * may be an unlimited number of channels associated with each port, * though most devices only use a single channel. * * The ldc is a bi-directional channel, which is divided up into * two directional 'lanes', one outbound from the switch to the * virtual network device, the other inbound to the switch. * Depending on the type of device each lane may have seperate * communication paramaters (such as mtu etc). * * For those network clients which use descriptor rings the * rings are associated with the appropriate lane. I.e. rings * which the switch exports are associated with the outbound lanes * while those which the network clients are exporting to the switch * are associated with the inbound lane. * * In diagram form the data structures look as follows: * * vsw instance * | * +----->port_t----->port_t----->port_t-----> * | * +--->ldc_t--->ldc_t--->ldc_t---> * | * +--->lane_t (inbound) * | | * | +--->dring--->dring---> * | * +--->lane_t (outbound) * | * +--->dring--->dring---> * */ #ifndef _VSW_H #define _VSW_H #pragma ident "%Z%%M% %I% %E% SMI" #ifdef __cplusplus extern "C" { #endif #include #include #include #include /* * Default message type. */ typedef struct def_msg { uint64_t data[8]; } def_msg_t; /* * Currently only support one major/minor pair. */ #define VSW_NUM_VER 1 typedef struct ver_sup { uint32_t ver_major:16, ver_minor:16; } ver_sup_t; /* * Only support ETHER mtu at moment. */ #define VSW_MTU ETHERMAX /* * Lane states. */ #define VSW_LANE_INACTIV 0x0 /* No params set for lane */ #define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */ #define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */ #define VSW_VER_ACK_RECV 0x4 #define VSW_VER_ACK_SENT 0x8 #define VSW_VER_NACK_RECV 0x10 #define VSW_VER_NACK_SENT 0x20 #define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */ #define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */ #define VSW_ATTR_ACK_SENT 0x100 #define VSW_ATTR_ACK_RECV 0x200 #define VSW_ATTR_NACK_SENT 0x400 #define VSW_ATTR_NACK_RECV 0x800 #define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */ #define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */ #define VSW_DRING_ACK_SENT 0x4000 #define VSW_DRING_ACK_RECV 0x8000 #define VSW_DRING_NACK_SENT 0x10000 #define VSW_DRING_NACK_RECV 0x20000 #define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */ #define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */ #define VSW_RDX_ACK_SENT 0x100000 #define VSW_RDX_ACK_RECV 0x200000 #define VSW_RDX_NACK_SENT 0x400000 #define VSW_RDX_NACK_RECV 0x800000 #define VSW_MCST_INFO_SENT 0x1000000 #define VSW_MCST_INFO_RECV 0x2000000 #define VSW_MCST_ACK_SENT 0x4000000 #define VSW_MCST_ACK_RECV 0x8000000 #define VSW_MCST_NACK_SENT 0x10000000 #define VSW_MCST_NACK_RECV 0x20000000 #define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */ /* Handshake milestones */ #define VSW_MILESTONE0 0x1 /* ver info exchanged */ #define VSW_MILESTONE1 0x2 /* attribute exchanged */ #define VSW_MILESTONE2 0x4 /* dring info exchanged */ #define VSW_MILESTONE3 0x8 /* rdx exchanged */ #define VSW_MILESTONE4 0x10 /* handshake complete */ /* * Lane direction (relative to ourselves). */ #define INBOUND 0x1 #define OUTBOUND 0x2 /* Peer session id received */ #define VSW_PEER_SESSION 0x1 /* * Maximum number of consecutive reads of data from channel */ #define VSW_MAX_CHAN_READ 50 /* * Currently only support one ldc per port. */ #define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */ /* * Used for port add/deletion. */ #define VSW_PORT_UPDATED 0x1 #define LDC_TX_SUCCESS 0 /* ldc transmit success */ #define LDC_TX_FAILURE 1 /* ldc transmit failure */ #define LDC_TX_NORESOURCES 2 /* out of descriptors */ /* ID of the source of a frame being switched */ #define VSW_PHYSDEV 1 /* physical device associated */ #define VSW_VNETPORT 2 /* port connected to vnet (over ldc) */ #define VSW_LOCALDEV 4 /* vsw configured as an eth interface */ /* * Descriptor ring info * * Each descriptor element has a pre-allocated data buffer * associated with it, into which data being transmitted is * copied. By pre-allocating we speed up the copying process. * The buffer is re-used once the peer has indicated that it is * finished with the descriptor. */ #define VSW_RING_NUM_EL 512 /* Num of entries in ring */ #define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */ #define VSW_PRIV_SIZE sizeof (vnet_private_desc_t) #define VSW_PUB_SIZE sizeof (vnet_public_desc_t) #define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2) /* * LDC pkt tranfer MTU */ #define VSW_LDC_MTU sizeof (def_msg_t) /* * Size and number of mblks to be created in free pool. */ #define VSW_MBLK_SIZE 2048 #define VSW_NUM_MBLKS 1024 /* * Private descriptor */ typedef struct vsw_private_desc { /* * Below lock must be held when accessing the state of * a descriptor on either the private or public sections * of the ring. */ kmutex_t dstate_lock; uint64_t dstate; vnet_public_desc_t *descp; ldc_mem_handle_t memhandle; void *datap; uint64_t datalen; uint64_t ncookies; ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES]; int bound; } vsw_private_desc_t; /* * Descriptor ring structure */ typedef struct dring_info { struct dring_info *next; /* next ring in chain */ kmutex_t dlock; uint32_t num_descriptors; uint32_t descriptor_size; uint32_t options; uint32_t ncookies; ldc_mem_cookie_t cookie[1]; ldc_dring_handle_t handle; uint64_t ident; /* identifier sent to peer */ uint64_t end_idx; /* last idx processed */ int64_t last_ack_recv; kmutex_t restart_lock; boolean_t restart_reqd; /* send restart msg */ /* * base address of private and public portions of the * ring (where appropriate), and data block. */ void *pub_addr; /* base of public section */ void *priv_addr; /* base of private section */ void *data_addr; /* base of data section */ size_t data_sz; /* size of data section */ } dring_info_t; /* * Each ldc connection is comprised of two lanes, incoming * from a peer, and outgoing to that peer. Each lane shares * common ldc parameters and also has private lane-specific * parameters. */ typedef struct lane { uint64_t lstate; /* Lane state */ uint32_t ver_major:16, /* Version major number */ ver_minor:16; /* Version minor number */ kmutex_t seq_lock; uint64_t seq_num; /* Sequence number */ uint64_t mtu; /* ETHERMTU */ uint64_t addr; /* Unique physical address */ uint8_t addr_type; /* Only MAC address at moment */ uint8_t xfer_mode; /* Dring or Pkt based */ uint8_t ack_freq; /* Only non zero for Pkt based xfer */ krwlock_t dlistrw; /* Lock for dring list */ dring_info_t *dringp; /* List of drings for this lane */ } lane_t; /* channel drain states */ #define VSW_LDC_INIT 0x1 /* Initial non-drain state */ #define VSW_LDC_DRAINING 0x2 /* Channel draining */ /* ldc information associated with a vsw-port */ typedef struct vsw_ldc { struct vsw_ldc *ldc_next; /* next ldc in the list */ struct vsw_port *ldc_port; /* associated port */ struct vsw *ldc_vswp; /* associated vsw */ kmutex_t ldc_cblock; /* sync callback processing */ kmutex_t ldc_txlock; /* sync transmits */ uint64_t ldc_id; /* channel number */ ldc_handle_t ldc_handle; /* channel handle */ kmutex_t drain_cv_lock; kcondvar_t drain_cv; /* channel draining */ int drain_state; uint32_t hphase; /* handshake phase */ int hcnt; /* # handshake attempts */ kmutex_t status_lock; ldc_status_t ldc_status; /* channel status */ uint8_t reset_active; /* reset flag */ uint64_t local_session; /* Our session id */ uint64_t peer_session; /* Our peers session id */ uint8_t session_status; /* Session recv'd, sent */ kmutex_t hss_lock; uint32_t hss_id; /* Handshake session id */ uint64_t next_ident; /* Next dring ident # to use */ lane_t lane_in; /* Inbound lane */ lane_t lane_out; /* Outbound lane */ uint8_t dev_class; /* Peer device class */ vio_mblk_pool_t *rxh; /* Receive pool handle */ } vsw_ldc_t; /* list of ldcs per port */ typedef struct vsw_ldc_list { vsw_ldc_t *head; /* head of the list */ krwlock_t lockrw; /* sync access(rw) to the list */ int num_ldcs; /* number of ldcs in the list */ } vsw_ldc_list_t; /* multicast addresses port is interested in */ typedef struct mcst_addr { struct mcst_addr *nextp; uint64_t addr; } mcst_addr_t; /* Port detach states */ #define VSW_PORT_INIT 0x1 /* Initial non-detach state */ #define VSW_PORT_DETACHING 0x2 /* In process of being detached */ #define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */ #define VSW_ADDR_UNSET 0x0 /* Addr not set */ #define VSW_ADDR_HW 0x1 /* Addr programmed in HW */ #define VSW_ADDR_PROMISC 0x2 /* Card in promisc to see addr */ /* port information associated with a vsw */ typedef struct vsw_port { int p_instance; /* port instance */ struct vsw_port *p_next; /* next port in the list */ struct vsw *p_vswp; /* associated vsw */ vsw_ldc_list_t p_ldclist; /* list of ldcs for this port */ kmutex_t tx_lock; /* transmit lock */ int (*transmit)(vsw_ldc_t *, mblk_t *); int state; /* port state */ kmutex_t state_lock; kcondvar_t state_cv; int ref_cnt; /* # of active references */ kmutex_t ref_lock; kcondvar_t ref_cv; kmutex_t mca_lock; /* multicast lock */ mcst_addr_t *mcap; /* list of multicast addrs */ mac_addr_slot_t addr_slot; /* Unicast address slot */ int addr_set; /* Addr set where */ /* * mac address of the port & connected device */ struct ether_addr p_macaddr; } vsw_port_t; /* list of ports per vsw */ typedef struct vsw_port_list { vsw_port_t *head; /* head of the list */ krwlock_t lockrw; /* sync access(rw) to the list */ int num_ports; /* number of ports in the list */ } vsw_port_list_t; /* * Taskq control message */ typedef struct vsw_ctrl_task { vsw_ldc_t *ldcp; def_msg_t pktp; uint32_t hss_id; } vsw_ctrl_task_t; /* * State of connection to peer. Some of these states * can be mapped to LDC events as follows: * * VSW_CONN_RESET -> LDC_RESET_EVT * VSW_CONN_UP -> LDC_UP_EVT */ #define VSW_CONN_UP 0x1 /* Connection come up */ #define VSW_CONN_RESET 0x2 /* Connection reset */ #define VSW_CONN_RESTART 0x4 /* Restarting handshake on connection */ typedef struct vsw_conn_evt { uint16_t evt; /* Connection event */ vsw_ldc_t *ldcp; } vsw_conn_evt_t; /* * Vsw queue -- largely modeled after squeue * * VSW_QUEUE_RUNNING, vqueue thread for queue is running. * VSW_QUEUE_DRAINED, vqueue thread has drained current work and is exiting. * VSW_QUEUE_STOP, request for the vqueue thread to stop. * VSW_QUEUE_STOPPED, vqueue thread is not running. */ #define VSW_QUEUE_RUNNING 0x01 #define VSW_QUEUE_DRAINED 0x02 #define VSW_QUEUE_STOP 0x04 #define VSW_QUEUE_STOPPED 0x08 typedef struct vsw_queue_s { kmutex_t vq_lock; /* Lock, before using any member. */ kcondvar_t vq_cv; /* Async threads block on. */ uint32_t vq_state; /* State flags. */ mblk_t *vq_first; /* First mblk chain or NULL. */ mblk_t *vq_last; /* Last mblk chain. */ processorid_t vq_bind; /* Process to bind to */ kthread_t *vq_worker; /* Queue's thread */ } vsw_queue_t; /* * VSW MAC Ring Resources. * MAC Ring resource is composed of this state structure and * a kernel thread to perform the processing of the ring. */ typedef struct vsw_mac_ring_s { uint32_t ring_state; mac_blank_t ring_blank; void *ring_arg; vsw_queue_t *ring_vqp; struct vsw *ring_vswp; } vsw_mac_ring_t; /* * Maximum Ring Resources. */ #define VSW_MAC_RX_RINGS 0x40 /* * States for entry in ring table. */ #define VSW_MAC_RING_FREE 1 #define VSW_MAC_RING_INUSE 2 /* * Number of hash chains in the multicast forwarding database. */ #define VSW_NCHAINS 8 /* * State of interface if switch plumbed as network device. */ #define VSW_IF_REG 0x1 /* interface was registered */ #define VSW_IF_UP 0x2 /* Interface UP */ #define VSW_IF_PROMISC 0x4 /* Interface in promiscious mode */ #define VSW_U_P(state) \ (state == (VSW_IF_UP | VSW_IF_PROMISC)) /* * Switching modes. */ #define VSW_LAYER2 0x1 /* Layer 2 - MAC switching */ #define VSW_LAYER2_PROMISC 0x2 /* Layer 2 + promisc mode */ #define VSW_LAYER3 0x4 /* Layer 3 - IP switching */ #define NUM_SMODES 3 /* number of switching modes */ /* * Bits indicating which properties we've read from MD or physical device. */ #define VSW_MD_PHYSNAME 0x1 #define VSW_MD_MACADDR 0x2 #define VSW_DEV_MACADDR 0x4 #define VSW_MD_SMODE 0x8 /* * vsw instance state information. */ typedef struct vsw { int instance; /* instance # */ dev_info_t *dip; /* associated dev_info */ struct vsw *next; /* next in list */ char physname[LIFNAMSIZ]; /* phys-dev */ uint8_t smode[NUM_SMODES]; /* switching mode */ int smode_idx; /* curr pos in smode array */ int smode_num; /* # of modes specified */ uint8_t mdprops; /* bitmask of props found */ vsw_port_list_t plist; /* associated ports */ ddi_taskq_t *taskq_p; /* VIO ctrl msg taskq */ mod_hash_t *fdb; /* forwarding database */ mod_hash_t *mfdb; /* multicast FDB */ krwlock_t mfdbrw; /* rwlock for mFDB */ vio_mblk_pool_t *rxh; /* Receive pool handle */ void (*vsw_switch_frame) (struct vsw *, mblk_t *, int, vsw_port_t *, mac_resource_handle_t); /* mac layer */ kmutex_t mac_lock; /* protect fields below */ mac_handle_t mh; mac_rx_handle_t mrh; multiaddress_capab_t maddr; /* Multiple uni addr capable */ const mac_txinfo_t *txinfo; /* MAC tx routine */ boolean_t mstarted; /* Mac Started? */ boolean_t mresources; /* Mac Resources cb? */ /* * MAC Ring Resources. */ kmutex_t mac_ring_lock; /* Lock for the table. */ uint32_t mac_ring_tbl_sz; vsw_mac_ring_t *mac_ring_tbl; /* Mac ring table. */ kmutex_t hw_lock; /* sync access to HW */ boolean_t recfg_reqd; /* Reconfig of addrs needed */ int promisc_cnt; /* Machine Description updates */ mdeg_node_spec_t *inst_spec; mdeg_handle_t mdeg_hdl; mdeg_handle_t mdeg_port_hdl; /* if configured as an ethernet interface */ mac_handle_t if_mh; /* MAC handle */ struct ether_addr if_addr; /* interface address */ krwlock_t if_lockrw; uint8_t if_state; /* interface state */ mac_addr_slot_t addr_slot; /* Unicast address slot */ int addr_set; /* Addr set where */ /* multicast addresses when configured as eth interface */ kmutex_t mca_lock; /* multicast lock */ mcst_addr_t *mcap; /* list of multicast addrs */ } vsw_t; /* * Ethernet broadcast address definition. */ static struct ether_addr etherbroadcastaddr = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; #define IS_BROADCAST(ehp) \ (ether_cmp(&ehp->ether_dhost, ðerbroadcastaddr) == 0) #define IS_MULTICAST(ehp) \ ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) #define READ_ENTER(x) rw_enter(x, RW_READER) #define WRITE_ENTER(x) rw_enter(x, RW_WRITER) #define RW_EXIT(x) rw_exit(x) #ifdef __cplusplus } #endif #endif /* _VSW_H */