1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * This header file contains the basic data structures which the 28 * virtual switch (vsw) uses to communicate with vnet clients. 29 * 30 * The virtual switch reads the machine description (MD) to 31 * determine how many port_t structures to create (each port_t 32 * can support communications to a single network device). The 33 * port_t's are maintained in a linked list. 34 * 35 * Each port in turn contains a number of logical domain channels 36 * (ldc's) which are inter domain communications channels which 37 * are used for passing small messages between the domains. There 38 * may be any number of channels associated with each port, though 39 * currently most devices only have a single channel. The current 40 * implementation provides support for only one channel per port. 41 * 42 * The ldc is a bi-directional channel, which is divided up into 43 * two directional 'lanes', one outbound from the switch to the 44 * virtual network device, the other inbound to the switch. 45 * Depending on the type of device each lane may have seperate 46 * communication paramaters (such as mtu etc). 47 * 48 * For those network clients which use descriptor rings the 49 * rings are associated with the appropriate lane. I.e. rings 50 * which the switch exports are associated with the outbound lanes 51 * while those which the network clients are exporting to the switch 52 * are associated with the inbound lane. 53 * 54 * In diagram form the data structures look as follows: 55 * 56 * vsw instance 57 * | 58 * +----->port_t----->port_t----->port_t-----> 59 * | 60 * +--->ldc_t 61 * | 62 * +--->lane_t (inbound) 63 * | | 64 * | +--->dring 65 * | 66 * +--->lane_t (outbound) 67 * | 68 * +--->dring 69 * 70 */ 71 72 #ifndef _VSW_LDC_H 73 #define _VSW_LDC_H 74 75 #ifdef __cplusplus 76 extern "C" { 77 #endif 78 79 /* 80 * LDC pkt tranfer MTU - largest msg size used 81 */ 82 #define VSW_LDC_MTU 64 83 84 #define VSW_DEF_MSG_WORDS \ 85 (VNET_DRING_REG_EXT_MSG_SIZE_MAX / sizeof (uint64_t)) 86 87 /* 88 * Default message type. 89 */ 90 typedef struct def_msg { 91 uint64_t data[VSW_DEF_MSG_WORDS]; 92 } def_msg_t; 93 94 /* 95 * Currently only support one major/minor pair. 96 */ 97 #define VSW_NUM_VER 1 98 99 typedef struct ver_sup { 100 uint16_t ver_major; /* major version number */ 101 uint16_t ver_minor; /* minor version number */ 102 } ver_sup_t; 103 104 /* 105 * Lane states. 106 */ 107 #define VSW_LANE_INACTIV 0x0 /* No params set for lane */ 108 109 #define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */ 110 #define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */ 111 #define VSW_VER_ACK_RECV 0x4 112 #define VSW_VER_ACK_SENT 0x8 113 #define VSW_VER_NACK_RECV 0x10 114 #define VSW_VER_NACK_SENT 0x20 115 116 #define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */ 117 #define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */ 118 #define VSW_ATTR_ACK_SENT 0x100 119 #define VSW_ATTR_ACK_RECV 0x200 120 #define VSW_ATTR_NACK_SENT 0x400 121 #define VSW_ATTR_NACK_RECV 0x800 122 123 #define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */ 124 #define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */ 125 #define VSW_DRING_ACK_SENT 0x4000 126 #define VSW_DRING_ACK_RECV 0x8000 127 #define VSW_DRING_NACK_SENT 0x10000 128 #define VSW_DRING_NACK_RECV 0x20000 129 130 #define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */ 131 #define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */ 132 #define VSW_RDX_ACK_SENT 0x100000 133 #define VSW_RDX_ACK_RECV 0x200000 134 #define VSW_RDX_NACK_SENT 0x400000 135 #define VSW_RDX_NACK_RECV 0x800000 136 137 #define VSW_MCST_INFO_SENT 0x1000000 138 #define VSW_MCST_INFO_RECV 0x2000000 139 #define VSW_MCST_ACK_SENT 0x4000000 140 #define VSW_MCST_ACK_RECV 0x8000000 141 #define VSW_MCST_NACK_SENT 0x10000000 142 #define VSW_MCST_NACK_RECV 0x20000000 143 144 #define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */ 145 146 /* Handshake milestones */ 147 #define VSW_MILESTONE0 0x1 /* ver info exchanged */ 148 #define VSW_MILESTONE1 0x2 /* attribute exchanged */ 149 #define VSW_MILESTONE2 0x4 /* dring info exchanged */ 150 #define VSW_MILESTONE3 0x8 /* rdx exchanged */ 151 #define VSW_MILESTONE4 0x10 /* handshake complete */ 152 153 /* 154 * Lane direction (relative to ourselves). 155 */ 156 #define INBOUND 0x1 157 #define OUTBOUND 0x2 158 159 /* Peer session id received */ 160 #define VSW_PEER_SESSION 0x1 161 162 /* 163 * Maximum number of consecutive reads of data from channel 164 */ 165 #define VSW_MAX_CHAN_READ 50 166 167 /* 168 * Currently only support one ldc per port. 169 */ 170 #define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */ 171 172 /* 173 * Used for port add/deletion. 174 */ 175 #define VSW_PORT_UPDATED 0x1 176 177 #define LDC_TX_SUCCESS 0 /* ldc transmit success */ 178 #define LDC_TX_FAILURE 1 /* ldc transmit failure */ 179 #define LDC_TX_NORESOURCES 2 /* out of descriptors */ 180 181 /* 182 * Descriptor ring info 183 * 184 * Each descriptor element has a pre-allocated data buffer 185 * associated with it, into which data being transmitted is 186 * copied. By pre-allocating we speed up the copying process. 187 * The buffer is re-used once the peer has indicated that it is 188 * finished with the descriptor. 189 */ 190 #define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */ 191 #define VSW_PRIV_SIZE sizeof (vnet_private_desc_t) 192 193 #define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2) 194 195 /* 196 * Size of the mblk in each mblk pool. 197 */ 198 #define VSW_MBLK_SZ_128 128 199 #define VSW_MBLK_SZ_256 256 200 #define VSW_MBLK_SZ_2048 2048 201 202 /* 203 * Number of mblks in each mblk pool. 204 */ 205 #define VSW_NUM_MBLKS 1024 206 207 /* increment recv index */ 208 #define INCR_DESC_INDEX(dp, i) \ 209 ((i) = (((i) + 1) & ((dp)->num_descriptors - 1))) 210 211 /* decrement recv index */ 212 #define DECR_DESC_INDEX(dp, i) \ 213 ((i) = (((i) - 1) & ((dp)->num_descriptors - 1))) 214 215 #define INCR_TXI INCR_DESC_INDEX 216 #define DECR_TXI DECR_DESC_INDEX 217 #define INCR_RXI INCR_DESC_INDEX 218 #define DECR_RXI DECR_DESC_INDEX 219 220 /* bounds check rx index */ 221 #define CHECK_DESC_INDEX(dp, i) \ 222 (((i) >= 0) && ((i) < (dp)->num_descriptors)) 223 224 #define CHECK_RXI CHECK_DESC_INDEX 225 #define CHECK_TXI CHECK_DESC_INDEX 226 227 /* 228 * Private descriptor 229 */ 230 typedef struct vsw_private_desc { 231 /* 232 * Below lock must be held when accessing the state of 233 * a descriptor on either the private or public sections 234 * of the ring. 235 */ 236 kmutex_t dstate_lock; 237 uint64_t dstate; 238 vnet_public_desc_t *descp; 239 ldc_mem_handle_t memhandle; 240 void *datap; 241 uint64_t datalen; 242 uint64_t ncookies; 243 ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES]; 244 int bound; 245 } vsw_private_desc_t; 246 247 /* 248 * Descriptor ring structure 249 */ 250 typedef struct dring_info { 251 kmutex_t dlock; /* sync access */ 252 uint32_t num_descriptors; /* # of descriptors */ 253 uint32_t descriptor_size; /* size of descriptor */ 254 uint32_t options; /* dring options (mode) */ 255 ldc_dring_handle_t dring_handle; /* dring LDC handle */ 256 uint32_t dring_ncookies; /* # of dring cookies */ 257 ldc_mem_cookie_t dring_cookie[1]; /* LDC cookie of dring */ 258 ldc_mem_handle_t data_handle; /* data area LDC handle */ 259 uint32_t data_ncookies; /* # of data area cookies */ 260 ldc_mem_cookie_t *data_cookie; /* data area LDC cookies */ 261 uint64_t ident; /* identifier sent to peer */ 262 uint64_t end_idx; /* last idx processed */ 263 int64_t last_ack_recv; /* last ack received */ 264 kmutex_t txlock; /* protect tx desc alloc */ 265 uint32_t next_txi; /* next tx descriptor index */ 266 uint32_t next_rxi; /* next expected recv index */ 267 kmutex_t restart_lock; /* protect restart_reqd */ 268 boolean_t restart_reqd; /* send restart msg */ 269 uint32_t restart_peer_txi; /* index to restart peer */ 270 void *pub_addr; /* base of public section */ 271 void *priv_addr; /* base of private section */ 272 void *data_addr; /* base of data section */ 273 size_t data_sz; /* size of data section */ 274 size_t desc_data_sz; /* size of descr data blk */ 275 uint8_t dring_mtype; /* dring mem map type */ 276 uint32_t num_bufs; /* # of buffers */ 277 vio_mblk_pool_t *rx_vmp; /* rx mblk pool */ 278 vio_mblk_t **rxdp_to_vmp; /* descr to buf map tbl */ 279 } dring_info_t; 280 281 /* 282 * Each ldc connection is comprised of two lanes, incoming 283 * from a peer, and outgoing to that peer. Each lane shares 284 * common ldc parameters and also has private lane-specific 285 * parameters. 286 */ 287 typedef struct lane { 288 uint64_t lstate; /* Lane state */ 289 uint16_t ver_major; /* Version major number */ 290 uint16_t ver_minor; /* Version minor number */ 291 uint64_t seq_num; /* Sequence number */ 292 uint64_t mtu; /* ETHERMTU */ 293 uint64_t addr; /* Unique physical address */ 294 uint8_t addr_type; /* Only MAC address at moment */ 295 uint8_t xfer_mode; /* Dring or Pkt based */ 296 uint8_t ack_freq; /* Only non zero for Pkt based xfer */ 297 uint32_t physlink_update; /* physlink updates */ 298 uint8_t dring_mode; /* Descriptor ring mode */ 299 dring_info_t *dringp; /* List of drings for this lane */ 300 } lane_t; 301 302 /* channel drain states */ 303 #define VSW_LDC_INIT 0x1 /* Initial non-drain state */ 304 #define VSW_LDC_DRAINING 0x2 /* Channel draining */ 305 306 /* 307 * vnet-protocol-version dependent function prototypes. 308 */ 309 typedef int (*vsw_ldctx_t) (void *, mblk_t *, mblk_t *, uint32_t); 310 typedef void (*vsw_ldcrx_pktdata_t) (void *, void *, uint32_t); 311 typedef void (*vsw_ldcrx_dringdata_t) (void *, void *); 312 313 /* ldc information associated with a vsw-port */ 314 typedef struct vsw_ldc { 315 struct vsw_ldc *ldc_next; /* next ldc in the list */ 316 struct vsw_port *ldc_port; /* associated port */ 317 struct vsw *ldc_vswp; /* associated vsw */ 318 kmutex_t ldc_cblock; /* sync callback processing */ 319 kmutex_t ldc_txlock; /* sync transmits */ 320 kmutex_t ldc_rxlock; /* sync rx */ 321 uint64_t ldc_id; /* channel number */ 322 ldc_handle_t ldc_handle; /* channel handle */ 323 kmutex_t drain_cv_lock; 324 kcondvar_t drain_cv; /* channel draining */ 325 int drain_state; 326 uint32_t hphase; /* handshake phase */ 327 int hcnt; /* # handshake attempts */ 328 kmutex_t status_lock; 329 ldc_status_t ldc_status; /* channel status */ 330 uint8_t reset_active; /* reset flag */ 331 uint64_t local_session; /* Our session id */ 332 uint64_t peer_session; /* Our peers session id */ 333 uint8_t session_status; /* Session recv'd, sent */ 334 uint32_t hss_id; /* Handshake session id */ 335 uint64_t next_ident; /* Next dring ident # to use */ 336 lane_t lane_in; /* Inbound lane */ 337 lane_t lane_out; /* Outbound lane */ 338 uint8_t dev_class; /* Peer device class */ 339 boolean_t pls_negotiated; /* phys link state update ? */ 340 vio_multi_pool_t vmp; /* Receive mblk pools */ 341 uint32_t max_rxpool_size; /* max size of rxpool in use */ 342 uint64_t *ldcmsg; /* msg buffer for ldc_read() */ 343 uint64_t msglen; /* size of ldcmsg */ 344 uint32_t dringdata_msgid; /* msgid in RxDringData mode */ 345 346 /* tx thread fields */ 347 kthread_t *tx_thread; /* tx thread */ 348 uint32_t tx_thr_flags; /* tx thread flags */ 349 kmutex_t tx_thr_lock; /* lock for tx thread */ 350 kcondvar_t tx_thr_cv; /* cond.var for tx thread */ 351 mblk_t *tx_mhead; /* tx mblks head */ 352 mblk_t *tx_mtail; /* tx mblks tail */ 353 uint32_t tx_cnt; /* # of pkts queued for tx */ 354 355 /* message thread fields */ 356 kthread_t *msg_thread; /* message thread */ 357 uint32_t msg_thr_flags; /* message thread flags */ 358 kmutex_t msg_thr_lock; /* lock for message thread */ 359 kcondvar_t msg_thr_cv; /* cond.var for msg thread */ 360 361 /* receive thread fields */ 362 kthread_t *rcv_thread; /* receive thread */ 363 uint32_t rcv_thr_flags; /* receive thread flags */ 364 kmutex_t rcv_thr_lock; /* lock for receive thread */ 365 kcondvar_t rcv_thr_cv; /* cond.var for recv thread */ 366 367 vsw_ldctx_t tx; /* transmit function */ 368 vsw_ldcrx_pktdata_t rx_pktdata; /* process raw data msg */ 369 vsw_ldcrx_dringdata_t rx_dringdata; /* process dring data msg */ 370 371 /* channel statistics */ 372 vgen_stats_t ldc_stats; /* channel statistics */ 373 kstat_t *ksp; /* channel kstats */ 374 } vsw_ldc_t; 375 376 /* worker thread flags */ 377 #define VSW_WTHR_DATARCVD 0x01 /* data received */ 378 #define VSW_WTHR_STOP 0x02 /* stop worker thread request */ 379 380 /* multicast addresses port is interested in */ 381 typedef struct mcst_addr { 382 struct mcst_addr *nextp; 383 struct ether_addr mca; /* multicast address */ 384 uint64_t addr; /* mcast addr converted to hash key */ 385 boolean_t mac_added; /* added into physical device */ 386 } mcst_addr_t; 387 388 /* Port detach states */ 389 #define VSW_PORT_INIT 0x1 /* Initial non-detach state */ 390 #define VSW_PORT_DETACHING 0x2 /* In process of being detached */ 391 #define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */ 392 393 /* port information associated with a vsw */ 394 typedef struct vsw_port { 395 int p_instance; /* port instance */ 396 struct vsw_port *p_next; /* next port in the list */ 397 struct vsw *p_vswp; /* associated vsw */ 398 int num_ldcs; /* # of ldcs in the port */ 399 uint64_t *ldc_ids; /* ldc ids */ 400 vsw_ldc_t *ldcp; /* ldc for this port */ 401 402 kmutex_t tx_lock; /* transmit lock */ 403 int (*transmit)(vsw_ldc_t *, mblk_t *); 404 405 int state; /* port state */ 406 kmutex_t state_lock; 407 kcondvar_t state_cv; 408 409 krwlock_t maccl_rwlock; /* protect fields below */ 410 mac_client_handle_t p_mch; /* mac client handle */ 411 mac_unicast_handle_t p_muh; /* mac unicast handle */ 412 413 kmutex_t mca_lock; /* multicast lock */ 414 mcst_addr_t *mcap; /* list of multicast addrs */ 415 416 boolean_t addr_set; /* Addr set where */ 417 418 /* 419 * mac address of the port & connected device 420 */ 421 struct ether_addr p_macaddr; 422 uint16_t pvid; /* port vlan id (untagged) */ 423 struct vsw_vlanid *vids; /* vlan ids (tagged) */ 424 uint16_t nvids; /* # of vids */ 425 mod_hash_t *vlan_hashp; /* vlan hash table */ 426 uint32_t vlan_nchains; /* # of vlan hash chains */ 427 428 /* HybridIO related info */ 429 uint32_t p_hio_enabled; /* Hybrid mode enabled? */ 430 uint32_t p_hio_capable; /* Port capable of HIO */ 431 432 /* bandwidth limit */ 433 uint64_t p_bandwidth; /* bandwidth limit */ 434 } vsw_port_t; 435 436 /* list of ports per vsw */ 437 typedef struct vsw_port_list { 438 vsw_port_t *head; /* head of the list */ 439 krwlock_t lockrw; /* sync access(rw) to the list */ 440 int num_ports; /* number of ports in the list */ 441 } vsw_port_list_t; 442 443 /* 444 * Taskq control message 445 */ 446 typedef struct vsw_ctrl_task { 447 vsw_ldc_t *ldcp; 448 def_msg_t pktp; 449 uint32_t hss_id; 450 } vsw_ctrl_task_t; 451 452 /* 453 * State of connection to peer. Some of these states 454 * can be mapped to LDC events as follows: 455 * 456 * VSW_CONN_RESET -> LDC_RESET_EVT 457 * VSW_CONN_UP -> LDC_UP_EVT 458 */ 459 #define VSW_CONN_UP 0x1 /* Connection come up */ 460 #define VSW_CONN_RESET 0x2 /* Connection reset */ 461 #define VSW_CONN_RESTART 0x4 /* Restarting handshake on connection */ 462 463 typedef struct vsw_conn_evt { 464 uint16_t evt; /* Connection event */ 465 vsw_ldc_t *ldcp; 466 } vsw_conn_evt_t; 467 468 /* 469 * Ethernet broadcast address definition. 470 */ 471 static struct ether_addr etherbroadcastaddr = { 472 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 473 }; 474 475 #define IS_BROADCAST(ehp) \ 476 (bcmp(&ehp->ether_dhost, ðerbroadcastaddr, ETHERADDRL) == 0) 477 #define IS_MULTICAST(ehp) \ 478 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) 479 480 #define READ_ENTER(x) rw_enter(x, RW_READER) 481 #define WRITE_ENTER(x) rw_enter(x, RW_WRITER) 482 #define RW_EXIT(x) rw_exit(x) 483 484 #define VSW_PORT_REFHOLD(portp) atomic_inc_32(&((portp)->ref_cnt)) 485 #define VSW_PORT_REFRELE(portp) atomic_dec_32(&((portp)->ref_cnt)) 486 487 #ifdef __cplusplus 488 } 489 #endif 490 491 #endif /* _VSW_LDC_H */ 492