1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * This header file contains the basic data structures which the 28 * virtual switch (vsw) uses to communicate with vnet clients. 29 * 30 * The virtual switch reads the machine description (MD) to 31 * determine how many port_t structures to create (each port_t 32 * can support communications to a single network device). The 33 * port_t's are maintained in a linked list. 34 * 35 * Each port in turn contains a number of logical domain channels 36 * (ldc's) which are inter domain communications channels which 37 * are used for passing small messages between the domains. There 38 * may be any number of channels associated with each port, though 39 * currently most devices only have a single channel. The current 40 * implementation provides support for only one channel per port. 41 * 42 * The ldc is a bi-directional channel, which is divided up into 43 * two directional 'lanes', one outbound from the switch to the 44 * virtual network device, the other inbound to the switch. 45 * Depending on the type of device each lane may have seperate 46 * communication paramaters (such as mtu etc). 47 * 48 * For those network clients which use descriptor rings the 49 * rings are associated with the appropriate lane. I.e. rings 50 * which the switch exports are associated with the outbound lanes 51 * while those which the network clients are exporting to the switch 52 * are associated with the inbound lane. 53 * 54 * In diagram form the data structures look as follows: 55 * 56 * vsw instance 57 * | 58 * +----->port_t----->port_t----->port_t-----> 59 * | 60 * +--->ldc_t 61 * | 62 * +--->lane_t (inbound) 63 * | | 64 * | +--->dring 65 * | 66 * +--->lane_t (outbound) 67 * | 68 * +--->dring 69 * 70 */ 71 72 #ifndef _VSW_LDC_H 73 #define _VSW_LDC_H 74 75 #ifdef __cplusplus 76 extern "C" { 77 #endif 78 79 /* 80 * LDC pkt tranfer MTU - largest msg size used 81 */ 82 #define VSW_LDC_MTU 64 83 84 #define VSW_DEF_MSG_WORDS \ 85 (VNET_DRING_REG_EXT_MSG_SIZE_MAX / sizeof (uint64_t)) 86 87 /* 88 * Default message type. 89 */ 90 typedef struct def_msg { 91 uint64_t data[VSW_DEF_MSG_WORDS]; 92 } def_msg_t; 93 94 /* 95 * Currently only support one major/minor pair. 96 */ 97 #define VSW_NUM_VER 1 98 99 typedef struct ver_sup { 100 uint16_t ver_major; /* major version number */ 101 uint16_t ver_minor; /* minor version number */ 102 } ver_sup_t; 103 104 /* 105 * Lane states. 106 */ 107 #define VSW_LANE_INACTIV 0x0 /* No params set for lane */ 108 109 #define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */ 110 #define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */ 111 #define VSW_VER_ACK_RECV 0x4 112 #define VSW_VER_ACK_SENT 0x8 113 #define VSW_VER_NACK_RECV 0x10 114 #define VSW_VER_NACK_SENT 0x20 115 116 #define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */ 117 #define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */ 118 #define VSW_ATTR_ACK_SENT 0x100 119 #define VSW_ATTR_ACK_RECV 0x200 120 #define VSW_ATTR_NACK_SENT 0x400 121 #define VSW_ATTR_NACK_RECV 0x800 122 123 #define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */ 124 #define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */ 125 #define VSW_DRING_ACK_SENT 0x4000 126 #define VSW_DRING_ACK_RECV 0x8000 127 #define VSW_DRING_NACK_SENT 0x10000 128 #define VSW_DRING_NACK_RECV 0x20000 129 130 #define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */ 131 #define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */ 132 #define VSW_RDX_ACK_SENT 0x100000 133 #define VSW_RDX_ACK_RECV 0x200000 134 #define VSW_RDX_NACK_SENT 0x400000 135 #define VSW_RDX_NACK_RECV 0x800000 136 137 #define VSW_MCST_INFO_SENT 0x1000000 138 #define VSW_MCST_INFO_RECV 0x2000000 139 #define VSW_MCST_ACK_SENT 0x4000000 140 #define VSW_MCST_ACK_RECV 0x8000000 141 #define VSW_MCST_NACK_SENT 0x10000000 142 #define VSW_MCST_NACK_RECV 0x20000000 143 144 #define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */ 145 146 /* Handshake milestones */ 147 #define VSW_MILESTONE0 0x1 /* ver info exchanged */ 148 #define VSW_MILESTONE1 0x2 /* attribute exchanged */ 149 #define VSW_MILESTONE2 0x4 /* dring info exchanged */ 150 #define VSW_MILESTONE3 0x8 /* rdx exchanged */ 151 #define VSW_MILESTONE4 0x10 /* handshake complete */ 152 153 /* 154 * Lane direction (relative to ourselves). 155 */ 156 #define INBOUND 0x1 157 #define OUTBOUND 0x2 158 159 /* Peer session id received */ 160 #define VSW_PEER_SESSION 0x1 161 162 /* 163 * Maximum number of consecutive reads of data from channel 164 */ 165 #define VSW_MAX_CHAN_READ 50 166 167 /* 168 * Currently only support one ldc per port. 169 */ 170 #define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */ 171 172 /* 173 * Used for port add/deletion. 174 */ 175 #define VSW_PORT_UPDATED 0x1 176 177 #define LDC_TX_SUCCESS 0 /* ldc transmit success */ 178 #define LDC_TX_FAILURE 1 /* ldc transmit failure */ 179 #define LDC_TX_NORESOURCES 2 /* out of descriptors */ 180 181 /* 182 * Descriptor ring info 183 * 184 * Each descriptor element has a pre-allocated data buffer 185 * associated with it, into which data being transmitted is 186 * copied. By pre-allocating we speed up the copying process. 187 * The buffer is re-used once the peer has indicated that it is 188 * finished with the descriptor. 189 */ 190 #define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */ 191 #define VSW_PRIV_SIZE sizeof (vnet_private_desc_t) 192 193 #define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2) 194 195 /* 196 * Size of the mblk in each mblk pool. 197 */ 198 #define VSW_MBLK_SZ_128 128 199 #define VSW_MBLK_SZ_256 256 200 #define VSW_MBLK_SZ_2048 2048 201 202 /* 203 * Number of mblks in each mblk pool. 204 */ 205 #define VSW_NUM_MBLKS 1024 206 207 /* 208 * Number of rcv buffers in RxDringData mode 209 */ 210 #define VSW_RXDRING_NRBUFS (vsw_num_descriptors * vsw_nrbufs_factor) 211 212 /* increment recv index */ 213 #define INCR_DESC_INDEX(dp, i) \ 214 ((i) = (((i) + 1) & ((dp)->num_descriptors - 1))) 215 216 /* decrement recv index */ 217 #define DECR_DESC_INDEX(dp, i) \ 218 ((i) = (((i) - 1) & ((dp)->num_descriptors - 1))) 219 220 #define INCR_TXI INCR_DESC_INDEX 221 #define DECR_TXI DECR_DESC_INDEX 222 #define INCR_RXI INCR_DESC_INDEX 223 #define DECR_RXI DECR_DESC_INDEX 224 225 /* bounds check rx index */ 226 #define CHECK_DESC_INDEX(dp, i) \ 227 (((i) >= 0) && ((i) < (dp)->num_descriptors)) 228 229 #define CHECK_RXI CHECK_DESC_INDEX 230 #define CHECK_TXI CHECK_DESC_INDEX 231 232 /* 233 * Private descriptor 234 */ 235 typedef struct vsw_private_desc { 236 /* 237 * Below lock must be held when accessing the state of 238 * a descriptor on either the private or public sections 239 * of the ring. 240 */ 241 kmutex_t dstate_lock; 242 uint64_t dstate; 243 vnet_public_desc_t *descp; 244 ldc_mem_handle_t memhandle; 245 void *datap; 246 uint64_t datalen; 247 uint64_t ncookies; 248 ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES]; 249 int bound; 250 } vsw_private_desc_t; 251 252 /* 253 * Descriptor ring structure 254 */ 255 typedef struct dring_info { 256 kmutex_t dlock; /* sync access */ 257 uint32_t num_descriptors; /* # of descriptors */ 258 uint32_t descriptor_size; /* size of descriptor */ 259 uint32_t options; /* dring options (mode) */ 260 ldc_dring_handle_t dring_handle; /* dring LDC handle */ 261 uint32_t dring_ncookies; /* # of dring cookies */ 262 ldc_mem_cookie_t dring_cookie[1]; /* LDC cookie of dring */ 263 ldc_mem_handle_t data_handle; /* data area LDC handle */ 264 uint32_t data_ncookies; /* # of data area cookies */ 265 ldc_mem_cookie_t *data_cookie; /* data area LDC cookies */ 266 uint64_t ident; /* identifier sent to peer */ 267 uint64_t end_idx; /* last idx processed */ 268 int64_t last_ack_recv; /* last ack received */ 269 kmutex_t txlock; /* protect tx desc alloc */ 270 uint32_t next_txi; /* next tx descriptor index */ 271 uint32_t next_rxi; /* next expected recv index */ 272 kmutex_t restart_lock; /* protect restart_reqd */ 273 boolean_t restart_reqd; /* send restart msg */ 274 uint32_t restart_peer_txi; /* index to restart peer */ 275 void *pub_addr; /* base of public section */ 276 void *priv_addr; /* base of private section */ 277 void *data_addr; /* base of data section */ 278 size_t data_sz; /* size of data section */ 279 size_t desc_data_sz; /* size of descr data blk */ 280 uint8_t dring_mtype; /* dring mem map type */ 281 uint32_t num_bufs; /* # of buffers */ 282 vio_mblk_pool_t *rx_vmp; /* rx mblk pool */ 283 vio_mblk_t **rxdp_to_vmp; /* descr to buf map tbl */ 284 } dring_info_t; 285 286 /* 287 * Each ldc connection is comprised of two lanes, incoming 288 * from a peer, and outgoing to that peer. Each lane shares 289 * common ldc parameters and also has private lane-specific 290 * parameters. 291 */ 292 typedef struct lane { 293 uint64_t lstate; /* Lane state */ 294 uint16_t ver_major; /* Version major number */ 295 uint16_t ver_minor; /* Version minor number */ 296 uint64_t seq_num; /* Sequence number */ 297 uint64_t mtu; /* ETHERMTU */ 298 uint64_t addr; /* Unique physical address */ 299 uint8_t addr_type; /* Only MAC address at moment */ 300 uint8_t xfer_mode; /* Dring or Pkt based */ 301 uint8_t ack_freq; /* Only non zero for Pkt based xfer */ 302 uint32_t physlink_update; /* physlink updates */ 303 uint8_t dring_mode; /* Descriptor ring mode */ 304 dring_info_t *dringp; /* List of drings for this lane */ 305 } lane_t; 306 307 /* channel drain states */ 308 #define VSW_LDC_INIT 0x1 /* Initial non-drain state */ 309 #define VSW_LDC_DRAINING 0x2 /* Channel draining */ 310 311 /* 312 * vnet-protocol-version dependent function prototypes. 313 */ 314 typedef int (*vsw_ldctx_t) (void *, mblk_t *, mblk_t *, uint32_t); 315 typedef void (*vsw_ldcrx_pktdata_t) (void *, void *, uint32_t); 316 typedef void (*vsw_ldcrx_dringdata_t) (void *, void *); 317 318 /* ldc information associated with a vsw-port */ 319 typedef struct vsw_ldc { 320 struct vsw_ldc *ldc_next; /* next ldc in the list */ 321 struct vsw_port *ldc_port; /* associated port */ 322 struct vsw *ldc_vswp; /* associated vsw */ 323 kmutex_t ldc_cblock; /* sync callback processing */ 324 kmutex_t ldc_txlock; /* sync transmits */ 325 kmutex_t ldc_rxlock; /* sync rx */ 326 uint64_t ldc_id; /* channel number */ 327 ldc_handle_t ldc_handle; /* channel handle */ 328 kmutex_t drain_cv_lock; 329 kcondvar_t drain_cv; /* channel draining */ 330 int drain_state; 331 uint32_t hphase; /* handshake phase */ 332 int hcnt; /* # handshake attempts */ 333 kmutex_t status_lock; 334 ldc_status_t ldc_status; /* channel status */ 335 uint8_t reset_active; /* reset flag */ 336 uint64_t local_session; /* Our session id */ 337 uint64_t peer_session; /* Our peers session id */ 338 uint8_t session_status; /* Session recv'd, sent */ 339 uint32_t hss_id; /* Handshake session id */ 340 uint64_t next_ident; /* Next dring ident # to use */ 341 lane_t lane_in; /* Inbound lane */ 342 lane_t lane_out; /* Outbound lane */ 343 uint8_t dev_class; /* Peer device class */ 344 boolean_t pls_negotiated; /* phys link state update ? */ 345 vio_multi_pool_t vmp; /* Receive mblk pools */ 346 uint32_t max_rxpool_size; /* max size of rxpool in use */ 347 uint64_t *ldcmsg; /* msg buffer for ldc_read() */ 348 uint64_t msglen; /* size of ldcmsg */ 349 uint32_t dringdata_msgid; /* msgid in RxDringData mode */ 350 351 /* tx thread fields */ 352 kthread_t *tx_thread; /* tx thread */ 353 uint32_t tx_thr_flags; /* tx thread flags */ 354 kmutex_t tx_thr_lock; /* lock for tx thread */ 355 kcondvar_t tx_thr_cv; /* cond.var for tx thread */ 356 mblk_t *tx_mhead; /* tx mblks head */ 357 mblk_t *tx_mtail; /* tx mblks tail */ 358 uint32_t tx_cnt; /* # of pkts queued for tx */ 359 360 /* message thread fields */ 361 kthread_t *msg_thread; /* message thread */ 362 uint32_t msg_thr_flags; /* message thread flags */ 363 kmutex_t msg_thr_lock; /* lock for message thread */ 364 kcondvar_t msg_thr_cv; /* cond.var for msg thread */ 365 366 /* receive thread fields */ 367 kthread_t *rcv_thread; /* receive thread */ 368 uint32_t rcv_thr_flags; /* receive thread flags */ 369 kmutex_t rcv_thr_lock; /* lock for receive thread */ 370 kcondvar_t rcv_thr_cv; /* cond.var for recv thread */ 371 372 vsw_ldctx_t tx; /* transmit function */ 373 vsw_ldcrx_pktdata_t rx_pktdata; /* process raw data msg */ 374 vsw_ldcrx_dringdata_t rx_dringdata; /* process dring data msg */ 375 376 /* channel statistics */ 377 vgen_stats_t ldc_stats; /* channel statistics */ 378 kstat_t *ksp; /* channel kstats */ 379 } vsw_ldc_t; 380 381 /* worker thread flags */ 382 #define VSW_WTHR_DATARCVD 0x01 /* data received */ 383 #define VSW_WTHR_STOP 0x02 /* stop worker thread request */ 384 385 /* multicast addresses port is interested in */ 386 typedef struct mcst_addr { 387 struct mcst_addr *nextp; 388 struct ether_addr mca; /* multicast address */ 389 uint64_t addr; /* mcast addr converted to hash key */ 390 boolean_t mac_added; /* added into physical device */ 391 } mcst_addr_t; 392 393 /* Port detach states */ 394 #define VSW_PORT_INIT 0x1 /* Initial non-detach state */ 395 #define VSW_PORT_DETACHING 0x2 /* In process of being detached */ 396 #define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */ 397 398 /* port information associated with a vsw */ 399 typedef struct vsw_port { 400 int p_instance; /* port instance */ 401 struct vsw_port *p_next; /* next port in the list */ 402 struct vsw *p_vswp; /* associated vsw */ 403 int num_ldcs; /* # of ldcs in the port */ 404 uint64_t *ldc_ids; /* ldc ids */ 405 vsw_ldc_t *ldcp; /* ldc for this port */ 406 407 kmutex_t tx_lock; /* transmit lock */ 408 int (*transmit)(vsw_ldc_t *, mblk_t *); 409 410 int state; /* port state */ 411 kmutex_t state_lock; 412 kcondvar_t state_cv; 413 414 krwlock_t maccl_rwlock; /* protect fields below */ 415 mac_client_handle_t p_mch; /* mac client handle */ 416 mac_unicast_handle_t p_muh; /* mac unicast handle */ 417 418 kmutex_t mca_lock; /* multicast lock */ 419 mcst_addr_t *mcap; /* list of multicast addrs */ 420 421 boolean_t addr_set; /* Addr set where */ 422 423 /* 424 * mac address of the port & connected device 425 */ 426 struct ether_addr p_macaddr; 427 uint16_t pvid; /* port vlan id (untagged) */ 428 struct vsw_vlanid *vids; /* vlan ids (tagged) */ 429 uint16_t nvids; /* # of vids */ 430 mod_hash_t *vlan_hashp; /* vlan hash table */ 431 uint32_t vlan_nchains; /* # of vlan hash chains */ 432 433 /* HybridIO related info */ 434 uint32_t p_hio_enabled; /* Hybrid mode enabled? */ 435 uint32_t p_hio_capable; /* Port capable of HIO */ 436 437 /* bandwidth limit */ 438 uint64_t p_bandwidth; /* bandwidth limit */ 439 } vsw_port_t; 440 441 /* list of ports per vsw */ 442 typedef struct vsw_port_list { 443 vsw_port_t *head; /* head of the list */ 444 krwlock_t lockrw; /* sync access(rw) to the list */ 445 int num_ports; /* number of ports in the list */ 446 } vsw_port_list_t; 447 448 /* 449 * Taskq control message 450 */ 451 typedef struct vsw_ctrl_task { 452 vsw_ldc_t *ldcp; 453 def_msg_t pktp; 454 uint32_t hss_id; 455 } vsw_ctrl_task_t; 456 457 /* 458 * State of connection to peer. Some of these states 459 * can be mapped to LDC events as follows: 460 * 461 * VSW_CONN_RESET -> LDC_RESET_EVT 462 * VSW_CONN_UP -> LDC_UP_EVT 463 */ 464 #define VSW_CONN_UP 0x1 /* Connection come up */ 465 #define VSW_CONN_RESET 0x2 /* Connection reset */ 466 #define VSW_CONN_RESTART 0x4 /* Restarting handshake on connection */ 467 468 typedef struct vsw_conn_evt { 469 uint16_t evt; /* Connection event */ 470 vsw_ldc_t *ldcp; 471 } vsw_conn_evt_t; 472 473 /* 474 * Ethernet broadcast address definition. 475 */ 476 static struct ether_addr etherbroadcastaddr = { 477 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 478 }; 479 480 #define IS_BROADCAST(ehp) \ 481 (bcmp(&ehp->ether_dhost, ðerbroadcastaddr, ETHERADDRL) == 0) 482 #define IS_MULTICAST(ehp) \ 483 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) 484 485 #define READ_ENTER(x) rw_enter(x, RW_READER) 486 #define WRITE_ENTER(x) rw_enter(x, RW_WRITER) 487 #define RW_EXIT(x) rw_exit(x) 488 489 #define VSW_PORT_REFHOLD(portp) atomic_inc_32(&((portp)->ref_cnt)) 490 #define VSW_PORT_REFRELE(portp) atomic_dec_32(&((portp)->ref_cnt)) 491 492 #ifdef __cplusplus 493 } 494 #endif 495 496 #endif /* _VSW_LDC_H */ 497