1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This header file contains the basic data structures which the 29 * virtual switch (vsw) uses to communicate with vnet clients. 30 * 31 * The virtual switch reads the machine description (MD) to 32 * determine how many port_t structures to create (each port_t 33 * can support communications to a single network device). The 34 * port_t's are maintained in a linked list. 35 * 36 * Each port in turn contains a number of logical domain channels 37 * (ldc's) which are inter domain communications channels which 38 * are used for passing small messages between the domains. There 39 * may be any number of channels associated with each port, though 40 * currently most devices only have a single channel. The current 41 * implementation provides support for only one channel per port. 42 * 43 * The ldc is a bi-directional channel, which is divided up into 44 * two directional 'lanes', one outbound from the switch to the 45 * virtual network device, the other inbound to the switch. 46 * Depending on the type of device each lane may have seperate 47 * communication paramaters (such as mtu etc). 48 * 49 * For those network clients which use descriptor rings the 50 * rings are associated with the appropriate lane. I.e. rings 51 * which the switch exports are associated with the outbound lanes 52 * while those which the network clients are exporting to the switch 53 * are associated with the inbound lane. 54 * 55 * In diagram form the data structures look as follows: 56 * 57 * vsw instance 58 * | 59 * +----->port_t----->port_t----->port_t-----> 60 * | 61 * +--->ldc_t 62 * | 63 * +--->lane_t (inbound) 64 * | | 65 * | +--->dring 66 * | 67 * +--->lane_t (outbound) 68 * | 69 * +--->dring 70 * 71 */ 72 73 #ifndef _VSW_LDC_H 74 #define _VSW_LDC_H 75 76 #ifdef __cplusplus 77 extern "C" { 78 #endif 79 80 /* 81 * LDC pkt tranfer MTU - largest msg size used 82 */ 83 #define VSW_LDC_MTU 64 84 85 #define VSW_DEF_MSG_WORDS \ 86 (VNET_DRING_REG_EXT_MSG_SIZE_MAX / sizeof (uint64_t)) 87 88 /* 89 * Default message type. 90 */ 91 typedef struct def_msg { 92 uint64_t data[VSW_DEF_MSG_WORDS]; 93 } def_msg_t; 94 95 /* 96 * Currently only support one major/minor pair. 97 */ 98 #define VSW_NUM_VER 1 99 100 typedef struct ver_sup { 101 uint16_t ver_major; /* major version number */ 102 uint16_t ver_minor; /* minor version number */ 103 } ver_sup_t; 104 105 /* 106 * Lane states. 107 */ 108 #define VSW_LANE_INACTIV 0x0 /* No params set for lane */ 109 110 #define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */ 111 #define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */ 112 #define VSW_VER_ACK_RECV 0x4 113 #define VSW_VER_ACK_SENT 0x8 114 #define VSW_VER_NACK_RECV 0x10 115 #define VSW_VER_NACK_SENT 0x20 116 117 #define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */ 118 #define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */ 119 #define VSW_ATTR_ACK_SENT 0x100 120 #define VSW_ATTR_ACK_RECV 0x200 121 #define VSW_ATTR_NACK_SENT 0x400 122 #define VSW_ATTR_NACK_RECV 0x800 123 124 #define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */ 125 #define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */ 126 #define VSW_DRING_ACK_SENT 0x4000 127 #define VSW_DRING_ACK_RECV 0x8000 128 #define VSW_DRING_NACK_SENT 0x10000 129 #define VSW_DRING_NACK_RECV 0x20000 130 131 #define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */ 132 #define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */ 133 #define VSW_RDX_ACK_SENT 0x100000 134 #define VSW_RDX_ACK_RECV 0x200000 135 #define VSW_RDX_NACK_SENT 0x400000 136 #define VSW_RDX_NACK_RECV 0x800000 137 138 #define VSW_MCST_INFO_SENT 0x1000000 139 #define VSW_MCST_INFO_RECV 0x2000000 140 #define VSW_MCST_ACK_SENT 0x4000000 141 #define VSW_MCST_ACK_RECV 0x8000000 142 #define VSW_MCST_NACK_SENT 0x10000000 143 #define VSW_MCST_NACK_RECV 0x20000000 144 145 #define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */ 146 147 /* Handshake milestones */ 148 #define VSW_MILESTONE0 0x1 /* ver info exchanged */ 149 #define VSW_MILESTONE1 0x2 /* attribute exchanged */ 150 #define VSW_MILESTONE2 0x4 /* dring info exchanged */ 151 #define VSW_MILESTONE3 0x8 /* rdx exchanged */ 152 #define VSW_MILESTONE4 0x10 /* handshake complete */ 153 154 /* 155 * Lane direction (relative to ourselves). 156 */ 157 #define INBOUND 0x1 158 #define OUTBOUND 0x2 159 160 /* Peer session id received */ 161 #define VSW_PEER_SESSION 0x1 162 163 /* 164 * Maximum number of consecutive reads of data from channel 165 */ 166 #define VSW_MAX_CHAN_READ 50 167 168 /* 169 * Currently only support one ldc per port. 170 */ 171 #define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */ 172 173 /* 174 * Used for port add/deletion. 175 */ 176 #define VSW_PORT_UPDATED 0x1 177 178 #define LDC_TX_SUCCESS 0 /* ldc transmit success */ 179 #define LDC_TX_FAILURE 1 /* ldc transmit failure */ 180 #define LDC_TX_NORESOURCES 2 /* out of descriptors */ 181 182 /* 183 * Descriptor ring info 184 * 185 * Each descriptor element has a pre-allocated data buffer 186 * associated with it, into which data being transmitted is 187 * copied. By pre-allocating we speed up the copying process. 188 * The buffer is re-used once the peer has indicated that it is 189 * finished with the descriptor. 190 */ 191 #define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */ 192 #define VSW_PRIV_SIZE sizeof (vnet_private_desc_t) 193 194 #define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2) 195 196 /* 197 * Size of the mblk in each mblk pool. 198 */ 199 #define VSW_MBLK_SZ_128 128 200 #define VSW_MBLK_SZ_256 256 201 #define VSW_MBLK_SZ_2048 2048 202 203 /* 204 * Number of mblks in each mblk pool. 205 */ 206 #define VSW_NUM_MBLKS 1024 207 208 /* increment recv index */ 209 #define INCR_DESC_INDEX(dp, i) \ 210 ((i) = (((i) + 1) & ((dp)->num_descriptors - 1))) 211 212 /* decrement recv index */ 213 #define DECR_DESC_INDEX(dp, i) \ 214 ((i) = (((i) - 1) & ((dp)->num_descriptors - 1))) 215 216 #define INCR_TXI INCR_DESC_INDEX 217 #define DECR_TXI DECR_DESC_INDEX 218 #define INCR_RXI INCR_DESC_INDEX 219 #define DECR_RXI DECR_DESC_INDEX 220 221 /* bounds check rx index */ 222 #define CHECK_DESC_INDEX(dp, i) \ 223 (((i) >= 0) && ((i) < (dp)->num_descriptors)) 224 225 #define CHECK_RXI CHECK_DESC_INDEX 226 #define CHECK_TXI CHECK_DESC_INDEX 227 228 /* 229 * Private descriptor 230 */ 231 typedef struct vsw_private_desc { 232 /* 233 * Below lock must be held when accessing the state of 234 * a descriptor on either the private or public sections 235 * of the ring. 236 */ 237 kmutex_t dstate_lock; 238 uint64_t dstate; 239 vnet_public_desc_t *descp; 240 ldc_mem_handle_t memhandle; 241 void *datap; 242 uint64_t datalen; 243 uint64_t ncookies; 244 ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES]; 245 int bound; 246 } vsw_private_desc_t; 247 248 /* 249 * Descriptor ring structure 250 */ 251 typedef struct dring_info { 252 kmutex_t dlock; /* sync access */ 253 uint32_t num_descriptors; /* # of descriptors */ 254 uint32_t descriptor_size; /* size of descriptor */ 255 uint32_t options; /* dring options (mode) */ 256 ldc_dring_handle_t dring_handle; /* dring LDC handle */ 257 uint32_t dring_ncookies; /* # of dring cookies */ 258 ldc_mem_cookie_t dring_cookie[1]; /* LDC cookie of dring */ 259 ldc_mem_handle_t data_handle; /* data area LDC handle */ 260 uint32_t data_ncookies; /* # of data area cookies */ 261 ldc_mem_cookie_t *data_cookie; /* data area LDC cookies */ 262 uint64_t ident; /* identifier sent to peer */ 263 uint64_t end_idx; /* last idx processed */ 264 int64_t last_ack_recv; /* last ack received */ 265 kmutex_t txlock; /* protect tx desc alloc */ 266 uint32_t next_txi; /* next tx descriptor index */ 267 uint32_t next_rxi; /* next expected recv index */ 268 kmutex_t restart_lock; /* protect restart_reqd */ 269 boolean_t restart_reqd; /* send restart msg */ 270 void *pub_addr; /* base of public section */ 271 void *priv_addr; /* base of private section */ 272 void *data_addr; /* base of data section */ 273 size_t data_sz; /* size of data section */ 274 size_t desc_data_sz; /* size of descr data blk */ 275 uint8_t dring_mtype; /* dring mem map type */ 276 uint32_t num_bufs; /* # of buffers */ 277 vio_mblk_pool_t *rx_vmp; /* rx mblk pool */ 278 vio_mblk_t **rxdp_to_vmp; /* descr to buf map tbl */ 279 } dring_info_t; 280 281 /* 282 * Each ldc connection is comprised of two lanes, incoming 283 * from a peer, and outgoing to that peer. Each lane shares 284 * common ldc parameters and also has private lane-specific 285 * parameters. 286 */ 287 typedef struct lane { 288 uint64_t lstate; /* Lane state */ 289 uint16_t ver_major; /* Version major number */ 290 uint16_t ver_minor; /* Version minor number */ 291 uint64_t seq_num; /* Sequence number */ 292 uint64_t mtu; /* ETHERMTU */ 293 uint64_t addr; /* Unique physical address */ 294 uint8_t addr_type; /* Only MAC address at moment */ 295 uint8_t xfer_mode; /* Dring or Pkt based */ 296 uint8_t ack_freq; /* Only non zero for Pkt based xfer */ 297 uint32_t physlink_update; /* physlink updates */ 298 uint8_t dring_mode; /* Descriptor ring mode */ 299 dring_info_t *dringp; /* List of drings for this lane */ 300 } lane_t; 301 302 /* channel drain states */ 303 #define VSW_LDC_INIT 0x1 /* Initial non-drain state */ 304 #define VSW_LDC_DRAINING 0x2 /* Channel draining */ 305 306 /* 307 * vnet-protocol-version dependent function prototypes. 308 */ 309 typedef int (*vsw_ldctx_t) (void *, mblk_t *, mblk_t *, uint32_t); 310 typedef void (*vsw_ldcrx_pktdata_t) (void *, void *, uint32_t); 311 typedef void (*vsw_ldcrx_dringdata_t) (void *, void *); 312 313 /* ldc information associated with a vsw-port */ 314 typedef struct vsw_ldc { 315 struct vsw_ldc *ldc_next; /* next ldc in the list */ 316 struct vsw_port *ldc_port; /* associated port */ 317 struct vsw *ldc_vswp; /* associated vsw */ 318 kmutex_t ldc_cblock; /* sync callback processing */ 319 kmutex_t ldc_txlock; /* sync transmits */ 320 kmutex_t ldc_rxlock; /* sync rx */ 321 uint64_t ldc_id; /* channel number */ 322 ldc_handle_t ldc_handle; /* channel handle */ 323 kmutex_t drain_cv_lock; 324 kcondvar_t drain_cv; /* channel draining */ 325 int drain_state; 326 uint32_t hphase; /* handshake phase */ 327 int hcnt; /* # handshake attempts */ 328 kmutex_t status_lock; 329 ldc_status_t ldc_status; /* channel status */ 330 uint8_t reset_active; /* reset flag */ 331 uint64_t local_session; /* Our session id */ 332 uint64_t peer_session; /* Our peers session id */ 333 uint8_t session_status; /* Session recv'd, sent */ 334 uint32_t hss_id; /* Handshake session id */ 335 uint64_t next_ident; /* Next dring ident # to use */ 336 lane_t lane_in; /* Inbound lane */ 337 lane_t lane_out; /* Outbound lane */ 338 uint8_t dev_class; /* Peer device class */ 339 boolean_t pls_negotiated; /* phys link state update ? */ 340 vio_multi_pool_t vmp; /* Receive mblk pools */ 341 uint32_t max_rxpool_size; /* max size of rxpool in use */ 342 uint64_t *ldcmsg; /* msg buffer for ldc_read() */ 343 uint64_t msglen; /* size of ldcmsg */ 344 uint32_t dringdata_msgid; /* msgid in RxDringData mode */ 345 346 /* tx thread fields */ 347 kthread_t *tx_thread; /* tx thread */ 348 uint32_t tx_thr_flags; /* tx thread flags */ 349 kmutex_t tx_thr_lock; /* lock for tx thread */ 350 kcondvar_t tx_thr_cv; /* cond.var for tx thread */ 351 mblk_t *tx_mhead; /* tx mblks head */ 352 mblk_t *tx_mtail; /* tx mblks tail */ 353 uint32_t tx_cnt; /* # of pkts queued for tx */ 354 355 /* message thread fields */ 356 kthread_t *msg_thread; /* message thread */ 357 uint32_t msg_thr_flags; /* message thread flags */ 358 kmutex_t msg_thr_lock; /* lock for message thread */ 359 kcondvar_t msg_thr_cv; /* cond.var for msg thread */ 360 361 /* receive thread fields */ 362 kthread_t *rcv_thread; /* receive thread */ 363 uint32_t rcv_thr_flags; /* receive thread flags */ 364 kmutex_t rcv_thr_lock; /* lock for receive thread */ 365 kcondvar_t rcv_thr_cv; /* cond.var for recv thread */ 366 367 vsw_ldctx_t tx; /* transmit function */ 368 vsw_ldcrx_pktdata_t rx_pktdata; /* process raw data msg */ 369 vsw_ldcrx_dringdata_t rx_dringdata; /* process dring data msg */ 370 371 /* channel statistics */ 372 vgen_stats_t ldc_stats; /* channel statistics */ 373 kstat_t *ksp; /* channel kstats */ 374 } vsw_ldc_t; 375 376 /* worker thread flags */ 377 #define VSW_WTHR_DATARCVD 0x01 /* data received */ 378 #define VSW_WTHR_STOP 0x02 /* stop worker thread request */ 379 380 /* multicast addresses port is interested in */ 381 typedef struct mcst_addr { 382 struct mcst_addr *nextp; 383 struct ether_addr mca; /* multicast address */ 384 uint64_t addr; /* mcast addr converted to hash key */ 385 boolean_t mac_added; /* added into physical device */ 386 } mcst_addr_t; 387 388 /* Port detach states */ 389 #define VSW_PORT_INIT 0x1 /* Initial non-detach state */ 390 #define VSW_PORT_DETACHING 0x2 /* In process of being detached */ 391 #define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */ 392 393 /* port information associated with a vsw */ 394 typedef struct vsw_port { 395 int p_instance; /* port instance */ 396 struct vsw_port *p_next; /* next port in the list */ 397 struct vsw *p_vswp; /* associated vsw */ 398 int num_ldcs; /* # of ldcs in the port */ 399 uint64_t *ldc_ids; /* ldc ids */ 400 vsw_ldc_t *ldcp; /* ldc for this port */ 401 402 kmutex_t tx_lock; /* transmit lock */ 403 int (*transmit)(vsw_ldc_t *, mblk_t *); 404 405 int state; /* port state */ 406 kmutex_t state_lock; 407 kcondvar_t state_cv; 408 409 krwlock_t maccl_rwlock; /* protect fields below */ 410 mac_client_handle_t p_mch; /* mac client handle */ 411 mac_unicast_handle_t p_muh; /* mac unicast handle */ 412 413 kmutex_t mca_lock; /* multicast lock */ 414 mcst_addr_t *mcap; /* list of multicast addrs */ 415 416 boolean_t addr_set; /* Addr set where */ 417 418 /* 419 * mac address of the port & connected device 420 */ 421 struct ether_addr p_macaddr; 422 uint16_t pvid; /* port vlan id (untagged) */ 423 struct vsw_vlanid *vids; /* vlan ids (tagged) */ 424 uint16_t nvids; /* # of vids */ 425 mod_hash_t *vlan_hashp; /* vlan hash table */ 426 uint32_t vlan_nchains; /* # of vlan hash chains */ 427 428 /* HybridIO related info */ 429 uint32_t p_hio_enabled; /* Hybrid mode enabled? */ 430 uint32_t p_hio_capable; /* Port capable of HIO */ 431 432 /* bandwidth limit */ 433 uint64_t p_bandwidth; /* bandwidth limit */ 434 } vsw_port_t; 435 436 /* list of ports per vsw */ 437 typedef struct vsw_port_list { 438 vsw_port_t *head; /* head of the list */ 439 krwlock_t lockrw; /* sync access(rw) to the list */ 440 int num_ports; /* number of ports in the list */ 441 } vsw_port_list_t; 442 443 /* 444 * Taskq control message 445 */ 446 typedef struct vsw_ctrl_task { 447 vsw_ldc_t *ldcp; 448 def_msg_t pktp; 449 uint32_t hss_id; 450 } vsw_ctrl_task_t; 451 452 /* 453 * State of connection to peer. Some of these states 454 * can be mapped to LDC events as follows: 455 * 456 * VSW_CONN_RESET -> LDC_RESET_EVT 457 * VSW_CONN_UP -> LDC_UP_EVT 458 */ 459 #define VSW_CONN_UP 0x1 /* Connection come up */ 460 #define VSW_CONN_RESET 0x2 /* Connection reset */ 461 #define VSW_CONN_RESTART 0x4 /* Restarting handshake on connection */ 462 463 typedef struct vsw_conn_evt { 464 uint16_t evt; /* Connection event */ 465 vsw_ldc_t *ldcp; 466 } vsw_conn_evt_t; 467 468 /* 469 * Ethernet broadcast address definition. 470 */ 471 static struct ether_addr etherbroadcastaddr = { 472 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 473 }; 474 475 #define IS_BROADCAST(ehp) \ 476 (bcmp(&ehp->ether_dhost, ðerbroadcastaddr, ETHERADDRL) == 0) 477 #define IS_MULTICAST(ehp) \ 478 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) 479 480 #define READ_ENTER(x) rw_enter(x, RW_READER) 481 #define WRITE_ENTER(x) rw_enter(x, RW_WRITER) 482 #define RW_EXIT(x) rw_exit(x) 483 484 #define VSW_PORT_REFHOLD(portp) atomic_inc_32(&((portp)->ref_cnt)) 485 #define VSW_PORT_REFRELE(portp) atomic_dec_32(&((portp)->ref_cnt)) 486 487 #ifdef __cplusplus 488 } 489 #endif 490 491 #endif /* _VSW_LDC_H */ 492