1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This header file contains the basic data structures which the 29 * virtual switch (vsw) uses to communicate with vnet clients. 30 * 31 * The virtual switch reads the machine description (MD) to 32 * determine how many port_t structures to create (each port_t 33 * can support communications to a single network device). The 34 * port_t's are maintained in a linked list. 35 * 36 * Each port in turn contains a number of logical domain channels 37 * (ldc's) which are inter domain communications channels which 38 * are used for passing small messages between the domains. Their 39 * may be an unlimited number of channels associated with each port, 40 * though most devices only use a single channel. 41 * 42 * The ldc is a bi-directional channel, which is divided up into 43 * two directional 'lanes', one outbound from the switch to the 44 * virtual network device, the other inbound to the switch. 45 * Depending on the type of device each lane may have seperate 46 * communication paramaters (such as mtu etc). 47 * 48 * For those network clients which use descriptor rings the 49 * rings are associated with the appropriate lane. I.e. rings 50 * which the switch exports are associated with the outbound lanes 51 * while those which the network clients are exporting to the switch 52 * are associated with the inbound lane. 53 * 54 * In diagram form the data structures look as follows: 55 * 56 * vsw instance 57 * | 58 * +----->port_t----->port_t----->port_t-----> 59 * | 60 * +--->ldc_t--->ldc_t--->ldc_t---> 61 * | 62 * +--->lane_t (inbound) 63 * | | 64 * | +--->dring--->dring---> 65 * | 66 * +--->lane_t (outbound) 67 * | 68 * +--->dring--->dring---> 69 * 70 */ 71 72 #ifndef _VSW_LDC_H 73 #define _VSW_LDC_H 74 75 #ifdef __cplusplus 76 extern "C" { 77 #endif 78 79 /* 80 * Default message type. 81 */ 82 typedef struct def_msg { 83 uint64_t data[8]; 84 } def_msg_t; 85 86 /* 87 * Currently only support one major/minor pair. 88 */ 89 #define VSW_NUM_VER 1 90 91 typedef struct ver_sup { 92 uint16_t ver_major; /* major version number */ 93 uint16_t ver_minor; /* minor version number */ 94 } ver_sup_t; 95 96 /* 97 * Lane states. 98 */ 99 #define VSW_LANE_INACTIV 0x0 /* No params set for lane */ 100 101 #define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */ 102 #define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */ 103 #define VSW_VER_ACK_RECV 0x4 104 #define VSW_VER_ACK_SENT 0x8 105 #define VSW_VER_NACK_RECV 0x10 106 #define VSW_VER_NACK_SENT 0x20 107 108 #define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */ 109 #define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */ 110 #define VSW_ATTR_ACK_SENT 0x100 111 #define VSW_ATTR_ACK_RECV 0x200 112 #define VSW_ATTR_NACK_SENT 0x400 113 #define VSW_ATTR_NACK_RECV 0x800 114 115 #define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */ 116 #define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */ 117 #define VSW_DRING_ACK_SENT 0x4000 118 #define VSW_DRING_ACK_RECV 0x8000 119 #define VSW_DRING_NACK_SENT 0x10000 120 #define VSW_DRING_NACK_RECV 0x20000 121 122 #define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */ 123 #define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */ 124 #define VSW_RDX_ACK_SENT 0x100000 125 #define VSW_RDX_ACK_RECV 0x200000 126 #define VSW_RDX_NACK_SENT 0x400000 127 #define VSW_RDX_NACK_RECV 0x800000 128 129 #define VSW_MCST_INFO_SENT 0x1000000 130 #define VSW_MCST_INFO_RECV 0x2000000 131 #define VSW_MCST_ACK_SENT 0x4000000 132 #define VSW_MCST_ACK_RECV 0x8000000 133 #define VSW_MCST_NACK_SENT 0x10000000 134 #define VSW_MCST_NACK_RECV 0x20000000 135 136 #define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */ 137 138 /* Handshake milestones */ 139 #define VSW_MILESTONE0 0x1 /* ver info exchanged */ 140 #define VSW_MILESTONE1 0x2 /* attribute exchanged */ 141 #define VSW_MILESTONE2 0x4 /* dring info exchanged */ 142 #define VSW_MILESTONE3 0x8 /* rdx exchanged */ 143 #define VSW_MILESTONE4 0x10 /* handshake complete */ 144 145 /* 146 * Lane direction (relative to ourselves). 147 */ 148 #define INBOUND 0x1 149 #define OUTBOUND 0x2 150 151 /* Peer session id received */ 152 #define VSW_PEER_SESSION 0x1 153 154 /* 155 * Maximum number of consecutive reads of data from channel 156 */ 157 #define VSW_MAX_CHAN_READ 50 158 159 /* 160 * Currently only support one ldc per port. 161 */ 162 #define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */ 163 164 /* 165 * Used for port add/deletion. 166 */ 167 #define VSW_PORT_UPDATED 0x1 168 169 #define LDC_TX_SUCCESS 0 /* ldc transmit success */ 170 #define LDC_TX_FAILURE 1 /* ldc transmit failure */ 171 #define LDC_TX_NORESOURCES 2 /* out of descriptors */ 172 173 /* 174 * Descriptor ring info 175 * 176 * Each descriptor element has a pre-allocated data buffer 177 * associated with it, into which data being transmitted is 178 * copied. By pre-allocating we speed up the copying process. 179 * The buffer is re-used once the peer has indicated that it is 180 * finished with the descriptor. 181 */ 182 #define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */ 183 #define VSW_PRIV_SIZE sizeof (vnet_private_desc_t) 184 #define VSW_PUB_SIZE sizeof (vnet_public_desc_t) 185 186 #define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2) 187 188 /* 189 * LDC pkt tranfer MTU 190 */ 191 #define VSW_LDC_MTU sizeof (def_msg_t) 192 193 /* 194 * Size of the mblk in each mblk pool. 195 */ 196 #define VSW_MBLK_SZ_128 128 197 #define VSW_MBLK_SZ_256 256 198 #define VSW_MBLK_SZ_2048 2048 199 200 /* 201 * Number of mblks in each mblk pool. 202 */ 203 #define VSW_NUM_MBLKS 1024 204 205 /* 206 * Private descriptor 207 */ 208 typedef struct vsw_private_desc { 209 /* 210 * Below lock must be held when accessing the state of 211 * a descriptor on either the private or public sections 212 * of the ring. 213 */ 214 kmutex_t dstate_lock; 215 uint64_t dstate; 216 vnet_public_desc_t *descp; 217 ldc_mem_handle_t memhandle; 218 void *datap; 219 uint64_t datalen; 220 uint64_t ncookies; 221 ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES]; 222 int bound; 223 } vsw_private_desc_t; 224 225 /* 226 * Descriptor ring structure 227 */ 228 typedef struct dring_info { 229 struct dring_info *next; /* next ring in chain */ 230 kmutex_t dlock; 231 uint32_t num_descriptors; 232 uint32_t descriptor_size; 233 uint32_t options; 234 uint32_t ncookies; 235 ldc_mem_cookie_t cookie[1]; 236 237 ldc_dring_handle_t handle; 238 uint64_t ident; /* identifier sent to peer */ 239 uint64_t end_idx; /* last idx processed */ 240 int64_t last_ack_recv; 241 242 kmutex_t restart_lock; 243 boolean_t restart_reqd; /* send restart msg */ 244 245 /* 246 * base address of private and public portions of the 247 * ring (where appropriate), and data block. 248 */ 249 void *pub_addr; /* base of public section */ 250 void *priv_addr; /* base of private section */ 251 void *data_addr; /* base of data section */ 252 size_t data_sz; /* size of data section */ 253 size_t desc_data_sz; /* size of descr data blk */ 254 uint8_t dring_mtype; /* dring mem map type */ 255 } dring_info_t; 256 257 /* 258 * Each ldc connection is comprised of two lanes, incoming 259 * from a peer, and outgoing to that peer. Each lane shares 260 * common ldc parameters and also has private lane-specific 261 * parameters. 262 */ 263 typedef struct lane { 264 uint64_t lstate; /* Lane state */ 265 uint16_t ver_major; /* Version major number */ 266 uint16_t ver_minor; /* Version minor number */ 267 uint64_t seq_num; /* Sequence number */ 268 uint64_t mtu; /* ETHERMTU */ 269 uint64_t addr; /* Unique physical address */ 270 uint8_t addr_type; /* Only MAC address at moment */ 271 uint8_t xfer_mode; /* Dring or Pkt based */ 272 uint8_t ack_freq; /* Only non zero for Pkt based xfer */ 273 krwlock_t dlistrw; /* Lock for dring list */ 274 dring_info_t *dringp; /* List of drings for this lane */ 275 } lane_t; 276 277 /* channel drain states */ 278 #define VSW_LDC_INIT 0x1 /* Initial non-drain state */ 279 #define VSW_LDC_DRAINING 0x2 /* Channel draining */ 280 281 /* 282 * vnet-protocol-version dependent function prototypes. 283 */ 284 typedef int (*vsw_ldctx_t) (void *, mblk_t *, mblk_t *, uint32_t); 285 typedef void (*vsw_ldcrx_pktdata_t) (void *, void *, uint32_t); 286 287 /* ldc information associated with a vsw-port */ 288 typedef struct vsw_ldc { 289 struct vsw_ldc *ldc_next; /* next ldc in the list */ 290 struct vsw_port *ldc_port; /* associated port */ 291 struct vsw *ldc_vswp; /* associated vsw */ 292 kmutex_t ldc_cblock; /* sync callback processing */ 293 kmutex_t ldc_txlock; /* sync transmits */ 294 kmutex_t ldc_rxlock; /* sync rx */ 295 uint64_t ldc_id; /* channel number */ 296 ldc_handle_t ldc_handle; /* channel handle */ 297 kmutex_t drain_cv_lock; 298 kcondvar_t drain_cv; /* channel draining */ 299 int drain_state; 300 uint32_t hphase; /* handshake phase */ 301 int hcnt; /* # handshake attempts */ 302 kmutex_t status_lock; 303 ldc_status_t ldc_status; /* channel status */ 304 uint8_t reset_active; /* reset flag */ 305 uint64_t local_session; /* Our session id */ 306 uint64_t peer_session; /* Our peers session id */ 307 uint8_t session_status; /* Session recv'd, sent */ 308 uint32_t hss_id; /* Handshake session id */ 309 uint64_t next_ident; /* Next dring ident # to use */ 310 lane_t lane_in; /* Inbound lane */ 311 lane_t lane_out; /* Outbound lane */ 312 uint8_t dev_class; /* Peer device class */ 313 vio_multi_pool_t vmp; /* Receive mblk pools */ 314 uint32_t max_rxpool_size; /* max size of rxpool in use */ 315 uint64_t *ldcmsg; /* msg buffer for ldc_read() */ 316 uint64_t msglen; /* size of ldcmsg */ 317 318 /* tx thread fields */ 319 kthread_t *tx_thread; /* tx thread */ 320 uint32_t tx_thr_flags; /* tx thread flags */ 321 kmutex_t tx_thr_lock; /* lock for tx thread */ 322 kcondvar_t tx_thr_cv; /* cond.var for tx thread */ 323 mblk_t *tx_mhead; /* tx mblks head */ 324 mblk_t *tx_mtail; /* tx mblks tail */ 325 uint32_t tx_cnt; /* # of pkts queued for tx */ 326 327 /* receive thread fields */ 328 kthread_t *rx_thread; /* receive thread */ 329 uint32_t rx_thr_flags; /* receive thread flags */ 330 kmutex_t rx_thr_lock; /* lock for receive thread */ 331 kcondvar_t rx_thr_cv; /* cond.var for recv thread */ 332 333 vsw_ldctx_t tx; /* transmit function */ 334 vsw_ldcrx_pktdata_t rx_pktdata; /* process rx raw data msg */ 335 336 /* channel statistics */ 337 vgen_stats_t ldc_stats; /* channel statistics */ 338 kstat_t *ksp; /* channel kstats */ 339 } vsw_ldc_t; 340 341 /* worker thread flags */ 342 #define VSW_WTHR_RUNNING 0x01 /* worker thread running */ 343 #define VSW_WTHR_DATARCVD 0x02 /* data received */ 344 #define VSW_WTHR_STOP 0x04 /* stop worker thread request */ 345 346 /* list of ldcs per port */ 347 typedef struct vsw_ldc_list { 348 vsw_ldc_t *head; /* head of the list */ 349 krwlock_t lockrw; /* sync access(rw) to the list */ 350 } vsw_ldc_list_t; 351 352 /* multicast addresses port is interested in */ 353 typedef struct mcst_addr { 354 struct mcst_addr *nextp; 355 struct ether_addr mca; /* multicast address */ 356 uint64_t addr; /* mcast addr converted to hash key */ 357 boolean_t mac_added; /* added into physical device */ 358 } mcst_addr_t; 359 360 /* Port detach states */ 361 #define VSW_PORT_INIT 0x1 /* Initial non-detach state */ 362 #define VSW_PORT_DETACHING 0x2 /* In process of being detached */ 363 #define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */ 364 365 #define VSW_ADDR_UNSET 0x0 /* Addr not set */ 366 #define VSW_ADDR_HW 0x1 /* Addr programmed in HW */ 367 #define VSW_ADDR_PROMISC 0x2 /* Card in promisc to see addr */ 368 369 /* port information associated with a vsw */ 370 typedef struct vsw_port { 371 int p_instance; /* port instance */ 372 struct vsw_port *p_next; /* next port in the list */ 373 struct vsw *p_vswp; /* associated vsw */ 374 int num_ldcs; /* # of ldcs in the port */ 375 uint64_t *ldc_ids; /* ldc ids */ 376 vsw_ldc_list_t p_ldclist; /* list of ldcs for this port */ 377 378 kmutex_t tx_lock; /* transmit lock */ 379 int (*transmit)(vsw_ldc_t *, mblk_t *); 380 381 int state; /* port state */ 382 kmutex_t state_lock; 383 kcondvar_t state_cv; 384 385 kmutex_t mca_lock; /* multicast lock */ 386 mcst_addr_t *mcap; /* list of multicast addrs */ 387 388 mac_addr_slot_t addr_slot; /* Unicast address slot */ 389 int addr_set; /* Addr set where */ 390 391 /* 392 * mac address of the port & connected device 393 */ 394 struct ether_addr p_macaddr; 395 uint16_t pvid; /* port vlan id (untagged) */ 396 uint16_t *vids; /* vlan ids (tagged) */ 397 uint16_t nvids; /* # of vids */ 398 uint32_t vids_size; /* size alloc'd for vids list */ 399 mod_hash_t *vlan_hashp; /* vlan hash table */ 400 uint32_t vlan_nchains; /* # of vlan hash chains */ 401 402 /* HybridIO related info */ 403 uint32_t p_hio_enabled; /* Hybrid mode enabled? */ 404 uint32_t p_hio_capable; /* Port capable of HIO */ 405 } vsw_port_t; 406 407 /* list of ports per vsw */ 408 typedef struct vsw_port_list { 409 vsw_port_t *head; /* head of the list */ 410 krwlock_t lockrw; /* sync access(rw) to the list */ 411 int num_ports; /* number of ports in the list */ 412 } vsw_port_list_t; 413 414 /* 415 * Taskq control message 416 */ 417 typedef struct vsw_ctrl_task { 418 vsw_ldc_t *ldcp; 419 def_msg_t pktp; 420 uint32_t hss_id; 421 } vsw_ctrl_task_t; 422 423 /* 424 * State of connection to peer. Some of these states 425 * can be mapped to LDC events as follows: 426 * 427 * VSW_CONN_RESET -> LDC_RESET_EVT 428 * VSW_CONN_UP -> LDC_UP_EVT 429 */ 430 #define VSW_CONN_UP 0x1 /* Connection come up */ 431 #define VSW_CONN_RESET 0x2 /* Connection reset */ 432 #define VSW_CONN_RESTART 0x4 /* Restarting handshake on connection */ 433 434 typedef struct vsw_conn_evt { 435 uint16_t evt; /* Connection event */ 436 vsw_ldc_t *ldcp; 437 } vsw_conn_evt_t; 438 439 /* 440 * Ethernet broadcast address definition. 441 */ 442 static struct ether_addr etherbroadcastaddr = { 443 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 444 }; 445 446 #define IS_BROADCAST(ehp) \ 447 (ether_cmp(&ehp->ether_dhost, ðerbroadcastaddr) == 0) 448 #define IS_MULTICAST(ehp) \ 449 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) 450 451 #define READ_ENTER(x) rw_enter(x, RW_READER) 452 #define WRITE_ENTER(x) rw_enter(x, RW_WRITER) 453 #define RW_EXIT(x) rw_exit(x) 454 455 #define VSW_PORT_REFHOLD(portp) atomic_inc_32(&((portp)->ref_cnt)) 456 #define VSW_PORT_REFRELE(portp) atomic_dec_32(&((portp)->ref_cnt)) 457 458 #ifdef __cplusplus 459 } 460 #endif 461 462 #endif /* _VSW_LDC_H */ 463