1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This header file contains the basic data structures which the 29 * virtual switch (vsw) uses to communicate with its clients and 30 * the outside world. 31 * 32 * The virtual switch reads the machine description (MD) to 33 * determine how many port_t structures to create (each port_t 34 * can support communications to a single network device). The 35 * port_t's are maintained in a linked list. 36 * 37 * Each port in turn contains a number of logical domain channels 38 * (ldc's) which are inter domain communications channels which 39 * are used for passing small messages between the domains. Their 40 * may be an unlimited number of channels associated with each port, 41 * though most devices only use a single channel. 42 * 43 * The ldc is a bi-directional channel, which is divided up into 44 * two directional 'lanes', one outbound from the switch to the 45 * virtual network device, the other inbound to the switch. 46 * Depending on the type of device each lane may have seperate 47 * communication paramaters (such as mtu etc). 48 * 49 * For those network clients which use descriptor rings the 50 * rings are associated with the appropriate lane. I.e. rings 51 * which the switch exports are associated with the outbound lanes 52 * while those which the network clients are exporting to the switch 53 * are associated with the inbound lane. 54 * 55 * In diagram form the data structures look as follows: 56 * 57 * vsw instance 58 * | 59 * +----->port_t----->port_t----->port_t-----> 60 * | 61 * +--->ldc_t--->ldc_t--->ldc_t---> 62 * | 63 * +--->lane_t (inbound) 64 * | | 65 * | +--->dring--->dring---> 66 * | 67 * +--->lane_t (outbound) 68 * | 69 * +--->dring--->dring---> 70 * 71 */ 72 73 #ifndef _VSW_H 74 #define _VSW_H 75 76 #pragma ident "%Z%%M% %I% %E% SMI" 77 78 #ifdef __cplusplus 79 extern "C" { 80 #endif 81 82 #include <sys/vio_mailbox.h> 83 #include <sys/vnet_common.h> 84 #include <sys/ethernet.h> 85 #include <sys/vio_util.h> 86 87 /* 88 * Default message type. 89 */ 90 typedef struct def_msg { 91 uint64_t data[8]; 92 } def_msg_t; 93 94 /* 95 * Currently only support one major/minor pair. 96 */ 97 #define VSW_NUM_VER 1 98 99 typedef struct ver_sup { 100 uint32_t ver_major:16, 101 ver_minor:16; 102 } ver_sup_t; 103 104 /* 105 * Only support ETHER mtu at moment. 106 */ 107 #define VSW_MTU ETHERMAX 108 109 /* 110 * Lane states. 111 */ 112 #define VSW_LANE_INACTIV 0x0 /* No params set for lane */ 113 114 #define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */ 115 #define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */ 116 #define VSW_VER_ACK_RECV 0x4 117 #define VSW_VER_ACK_SENT 0x8 118 #define VSW_VER_NACK_RECV 0x10 119 #define VSW_VER_NACK_SENT 0x20 120 121 #define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */ 122 #define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */ 123 #define VSW_ATTR_ACK_SENT 0x100 124 #define VSW_ATTR_ACK_RECV 0x200 125 #define VSW_ATTR_NACK_SENT 0x400 126 #define VSW_ATTR_NACK_RECV 0x800 127 128 #define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */ 129 #define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */ 130 #define VSW_DRING_ACK_SENT 0x4000 131 #define VSW_DRING_ACK_RECV 0x8000 132 #define VSW_DRING_NACK_SENT 0x10000 133 #define VSW_DRING_NACK_RECV 0x20000 134 135 #define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */ 136 #define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */ 137 #define VSW_RDX_ACK_SENT 0x100000 138 #define VSW_RDX_ACK_RECV 0x200000 139 #define VSW_RDX_NACK_SENT 0x400000 140 #define VSW_RDX_NACK_RECV 0x800000 141 142 #define VSW_MCST_INFO_SENT 0x1000000 143 #define VSW_MCST_INFO_RECV 0x2000000 144 #define VSW_MCST_ACK_SENT 0x4000000 145 #define VSW_MCST_ACK_RECV 0x8000000 146 #define VSW_MCST_NACK_SENT 0x10000000 147 #define VSW_MCST_NACK_RECV 0x20000000 148 149 #define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */ 150 151 /* Handshake milestones */ 152 #define VSW_MILESTONE0 0x1 /* ver info exchanged */ 153 #define VSW_MILESTONE1 0x2 /* attribute exchanged */ 154 #define VSW_MILESTONE2 0x4 /* dring info exchanged */ 155 #define VSW_MILESTONE3 0x8 /* rdx exchanged */ 156 #define VSW_MILESTONE4 0x10 /* handshake complete */ 157 158 /* 159 * Lane direction (relative to ourselves). 160 */ 161 #define INBOUND 0x1 162 #define OUTBOUND 0x2 163 164 /* Peer session id received */ 165 #define VSW_PEER_SESSION 0x1 166 167 /* 168 * Maximum number of consecutive reads of data from channel 169 */ 170 #define VSW_MAX_CHAN_READ 50 171 172 /* 173 * Currently only support one ldc per port. 174 */ 175 #define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */ 176 177 /* 178 * Used for port add/deletion. 179 */ 180 #define VSW_PORT_UPDATED 0x1 181 182 #define LDC_TX_SUCCESS 0 /* ldc transmit success */ 183 #define LDC_TX_FAILURE 1 /* ldc transmit failure */ 184 #define LDC_TX_NORESOURCES 2 /* out of descriptors */ 185 186 /* ID of the source of a frame being switched */ 187 #define VSW_PHYSDEV 1 /* physical device associated */ 188 #define VSW_VNETPORT 2 /* port connected to vnet (over ldc) */ 189 #define VSW_LOCALDEV 4 /* vsw configured as an eth interface */ 190 191 /* 192 * Descriptor ring info 193 * 194 * Each descriptor element has a pre-allocated data buffer 195 * associated with it, into which data being transmitted is 196 * copied. By pre-allocating we speed up the copying process. 197 * The buffer is re-used once the peer has indicated that it is 198 * finished with the descriptor. 199 */ 200 #define VSW_RING_NUM_EL 512 /* Num of entries in ring */ 201 #define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */ 202 #define VSW_PRIV_SIZE sizeof (vnet_private_desc_t) 203 #define VSW_PUB_SIZE sizeof (vnet_public_desc_t) 204 205 #define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2) 206 207 /* 208 * LDC pkt tranfer MTU 209 */ 210 #define VSW_LDC_MTU sizeof (def_msg_t) 211 212 /* 213 * Size and number of mblks to be created in free pool. 214 */ 215 #define VSW_MBLK_SIZE 2048 216 #define VSW_NUM_MBLKS 1024 217 218 /* 219 * Private descriptor 220 */ 221 typedef struct vsw_private_desc { 222 /* 223 * Below lock must be held when accessing the state of 224 * a descriptor on either the private or public sections 225 * of the ring. 226 */ 227 kmutex_t dstate_lock; 228 uint64_t dstate; 229 vnet_public_desc_t *descp; 230 ldc_mem_handle_t memhandle; 231 void *datap; 232 uint64_t datalen; 233 uint64_t ncookies; 234 ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES]; 235 int bound; 236 } vsw_private_desc_t; 237 238 /* 239 * Descriptor ring structure 240 */ 241 typedef struct dring_info { 242 struct dring_info *next; /* next ring in chain */ 243 kmutex_t dlock; 244 uint32_t num_descriptors; 245 uint32_t descriptor_size; 246 uint32_t options; 247 uint32_t ncookies; 248 ldc_mem_cookie_t cookie[1]; 249 250 ldc_dring_handle_t handle; 251 uint64_t ident; /* identifier sent to peer */ 252 uint64_t end_idx; /* last idx processed */ 253 int64_t last_ack_recv; 254 255 kmutex_t restart_lock; 256 boolean_t restart_reqd; /* send restart msg */ 257 258 /* 259 * base address of private and public portions of the 260 * ring (where appropriate), and data block. 261 */ 262 void *pub_addr; /* base of public section */ 263 void *priv_addr; /* base of private section */ 264 void *data_addr; /* base of data section */ 265 size_t data_sz; /* size of data section */ 266 } dring_info_t; 267 268 /* 269 * Each ldc connection is comprised of two lanes, incoming 270 * from a peer, and outgoing to that peer. Each lane shares 271 * common ldc parameters and also has private lane-specific 272 * parameters. 273 */ 274 typedef struct lane { 275 uint64_t lstate; /* Lane state */ 276 uint32_t ver_major:16, /* Version major number */ 277 ver_minor:16; /* Version minor number */ 278 kmutex_t seq_lock; 279 uint64_t seq_num; /* Sequence number */ 280 uint64_t mtu; /* ETHERMTU */ 281 uint64_t addr; /* Unique physical address */ 282 uint8_t addr_type; /* Only MAC address at moment */ 283 uint8_t xfer_mode; /* Dring or Pkt based */ 284 uint8_t ack_freq; /* Only non zero for Pkt based xfer */ 285 krwlock_t dlistrw; /* Lock for dring list */ 286 dring_info_t *dringp; /* List of drings for this lane */ 287 } lane_t; 288 289 /* channel drain states */ 290 #define VSW_LDC_INIT 0x1 /* Initial non-drain state */ 291 #define VSW_LDC_DRAINING 0x2 /* Channel draining */ 292 293 /* ldc information associated with a vsw-port */ 294 typedef struct vsw_ldc { 295 struct vsw_ldc *ldc_next; /* next ldc in the list */ 296 struct vsw_port *ldc_port; /* associated port */ 297 struct vsw *ldc_vswp; /* associated vsw */ 298 kmutex_t ldc_cblock; /* sync callback processing */ 299 kmutex_t ldc_txlock; /* sync transmits */ 300 uint64_t ldc_id; /* channel number */ 301 ldc_handle_t ldc_handle; /* channel handle */ 302 kmutex_t drain_cv_lock; 303 kcondvar_t drain_cv; /* channel draining */ 304 int drain_state; 305 uint32_t hphase; /* handshake phase */ 306 int hcnt; /* # handshake attempts */ 307 kmutex_t status_lock; 308 ldc_status_t ldc_status; /* channel status */ 309 uint8_t reset_active; /* reset flag */ 310 uint64_t local_session; /* Our session id */ 311 uint64_t peer_session; /* Our peers session id */ 312 uint8_t session_status; /* Session recv'd, sent */ 313 kmutex_t hss_lock; 314 uint32_t hss_id; /* Handshake session id */ 315 uint64_t next_ident; /* Next dring ident # to use */ 316 lane_t lane_in; /* Inbound lane */ 317 lane_t lane_out; /* Outbound lane */ 318 uint8_t dev_class; /* Peer device class */ 319 vio_mblk_pool_t *rxh; /* Receive pool handle */ 320 } vsw_ldc_t; 321 322 /* list of ldcs per port */ 323 typedef struct vsw_ldc_list { 324 vsw_ldc_t *head; /* head of the list */ 325 krwlock_t lockrw; /* sync access(rw) to the list */ 326 int num_ldcs; /* number of ldcs in the list */ 327 } vsw_ldc_list_t; 328 329 /* multicast addresses port is interested in */ 330 typedef struct mcst_addr { 331 struct mcst_addr *nextp; 332 struct ether_addr mca; /* multicast address */ 333 uint64_t addr; /* mcast addr converted to hash key */ 334 boolean_t mac_added; /* added into physical device */ 335 } mcst_addr_t; 336 337 /* Port detach states */ 338 #define VSW_PORT_INIT 0x1 /* Initial non-detach state */ 339 #define VSW_PORT_DETACHING 0x2 /* In process of being detached */ 340 #define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */ 341 342 #define VSW_ADDR_UNSET 0x0 /* Addr not set */ 343 #define VSW_ADDR_HW 0x1 /* Addr programmed in HW */ 344 #define VSW_ADDR_PROMISC 0x2 /* Card in promisc to see addr */ 345 346 /* port information associated with a vsw */ 347 typedef struct vsw_port { 348 int p_instance; /* port instance */ 349 struct vsw_port *p_next; /* next port in the list */ 350 struct vsw *p_vswp; /* associated vsw */ 351 vsw_ldc_list_t p_ldclist; /* list of ldcs for this port */ 352 353 kmutex_t tx_lock; /* transmit lock */ 354 int (*transmit)(vsw_ldc_t *, mblk_t *); 355 356 int state; /* port state */ 357 kmutex_t state_lock; 358 kcondvar_t state_cv; 359 360 int ref_cnt; /* # of active references */ 361 kmutex_t ref_lock; 362 kcondvar_t ref_cv; 363 364 kmutex_t mca_lock; /* multicast lock */ 365 mcst_addr_t *mcap; /* list of multicast addrs */ 366 367 mac_addr_slot_t addr_slot; /* Unicast address slot */ 368 int addr_set; /* Addr set where */ 369 370 /* 371 * mac address of the port & connected device 372 */ 373 struct ether_addr p_macaddr; 374 } vsw_port_t; 375 376 /* list of ports per vsw */ 377 typedef struct vsw_port_list { 378 vsw_port_t *head; /* head of the list */ 379 krwlock_t lockrw; /* sync access(rw) to the list */ 380 int num_ports; /* number of ports in the list */ 381 } vsw_port_list_t; 382 383 /* 384 * Taskq control message 385 */ 386 typedef struct vsw_ctrl_task { 387 vsw_ldc_t *ldcp; 388 def_msg_t pktp; 389 uint32_t hss_id; 390 } vsw_ctrl_task_t; 391 392 /* 393 * State of connection to peer. Some of these states 394 * can be mapped to LDC events as follows: 395 * 396 * VSW_CONN_RESET -> LDC_RESET_EVT 397 * VSW_CONN_UP -> LDC_UP_EVT 398 */ 399 #define VSW_CONN_UP 0x1 /* Connection come up */ 400 #define VSW_CONN_RESET 0x2 /* Connection reset */ 401 #define VSW_CONN_RESTART 0x4 /* Restarting handshake on connection */ 402 403 typedef struct vsw_conn_evt { 404 uint16_t evt; /* Connection event */ 405 vsw_ldc_t *ldcp; 406 } vsw_conn_evt_t; 407 408 /* 409 * Vsw queue -- largely modeled after squeue 410 * 411 * VSW_QUEUE_RUNNING, vqueue thread for queue is running. 412 * VSW_QUEUE_DRAINED, vqueue thread has drained current work and is exiting. 413 * VSW_QUEUE_STOP, request for the vqueue thread to stop. 414 * VSW_QUEUE_STOPPED, vqueue thread is not running. 415 */ 416 #define VSW_QUEUE_RUNNING 0x01 417 #define VSW_QUEUE_DRAINED 0x02 418 #define VSW_QUEUE_STOP 0x04 419 #define VSW_QUEUE_STOPPED 0x08 420 421 typedef struct vsw_queue_s { 422 kmutex_t vq_lock; /* Lock, before using any member. */ 423 kcondvar_t vq_cv; /* Async threads block on. */ 424 uint32_t vq_state; /* State flags. */ 425 426 mblk_t *vq_first; /* First mblk chain or NULL. */ 427 mblk_t *vq_last; /* Last mblk chain. */ 428 429 processorid_t vq_bind; /* Process to bind to */ 430 kthread_t *vq_worker; /* Queue's thread */ 431 } vsw_queue_t; 432 433 /* 434 * VSW MAC Ring Resources. 435 * MAC Ring resource is composed of this state structure and 436 * a kernel thread to perform the processing of the ring. 437 */ 438 typedef struct vsw_mac_ring_s { 439 uint32_t ring_state; 440 441 mac_blank_t ring_blank; 442 void *ring_arg; 443 444 vsw_queue_t *ring_vqp; 445 struct vsw *ring_vswp; 446 } vsw_mac_ring_t; 447 448 /* 449 * Maximum Ring Resources. 450 */ 451 #define VSW_MAC_RX_RINGS 0x40 452 453 /* 454 * States for entry in ring table. 455 */ 456 #define VSW_MAC_RING_FREE 1 457 #define VSW_MAC_RING_INUSE 2 458 459 /* 460 * Number of hash chains in the multicast forwarding database. 461 */ 462 #define VSW_NCHAINS 8 463 464 /* 465 * State of interface if switch plumbed as network device. 466 */ 467 #define VSW_IF_REG 0x1 /* interface was registered */ 468 #define VSW_IF_UP 0x2 /* Interface UP */ 469 #define VSW_IF_PROMISC 0x4 /* Interface in promiscious mode */ 470 471 #define VSW_U_P(state) \ 472 (state == (VSW_IF_UP | VSW_IF_PROMISC)) 473 474 /* 475 * Switching modes. 476 */ 477 #define VSW_LAYER2 0x1 /* Layer 2 - MAC switching */ 478 #define VSW_LAYER2_PROMISC 0x2 /* Layer 2 + promisc mode */ 479 #define VSW_LAYER3 0x4 /* Layer 3 - IP switching */ 480 481 #define NUM_SMODES 3 /* number of switching modes */ 482 483 /* 484 * vsw instance state information. 485 */ 486 typedef struct vsw { 487 int instance; /* instance # */ 488 dev_info_t *dip; /* associated dev_info */ 489 uint64_t regprop; /* "reg" property */ 490 struct vsw *next; /* next in list */ 491 char physname[LIFNAMSIZ]; /* phys-dev */ 492 uint8_t smode[NUM_SMODES]; /* switching mode */ 493 int smode_idx; /* curr pos in smode array */ 494 int smode_num; /* # of modes specified */ 495 kmutex_t swtmout_lock; /* setup switching tmout lock */ 496 boolean_t swtmout_enabled; /* setup switching tmout on */ 497 timeout_id_t swtmout_id; /* setup switching tmout id */ 498 uint32_t switching_setup_done; /* setup switching done */ 499 int mac_open_retries; /* mac_open() retry count */ 500 vsw_port_list_t plist; /* associated ports */ 501 ddi_taskq_t *taskq_p; /* VIO ctrl msg taskq */ 502 mod_hash_t *fdb; /* forwarding database */ 503 504 mod_hash_t *mfdb; /* multicast FDB */ 505 krwlock_t mfdbrw; /* rwlock for mFDB */ 506 507 vio_mblk_pool_t *rxh; /* Receive pool handle */ 508 void (*vsw_switch_frame) 509 (struct vsw *, mblk_t *, int, 510 vsw_port_t *, mac_resource_handle_t); 511 512 /* mac layer */ 513 kmutex_t mac_lock; /* protect fields below */ 514 mac_handle_t mh; 515 mac_rx_handle_t mrh; 516 multiaddress_capab_t maddr; /* Multiple uni addr capable */ 517 const mac_txinfo_t *txinfo; /* MAC tx routine */ 518 boolean_t mstarted; /* Mac Started? */ 519 boolean_t mresources; /* Mac Resources cb? */ 520 521 /* 522 * MAC Ring Resources. 523 */ 524 kmutex_t mac_ring_lock; /* Lock for the table. */ 525 uint32_t mac_ring_tbl_sz; 526 vsw_mac_ring_t *mac_ring_tbl; /* Mac ring table. */ 527 528 kmutex_t hw_lock; /* sync access to HW */ 529 boolean_t recfg_reqd; /* Reconfig of addrs needed */ 530 int promisc_cnt; 531 532 /* Machine Description updates */ 533 mdeg_node_spec_t *inst_spec; 534 mdeg_handle_t mdeg_hdl; 535 mdeg_handle_t mdeg_port_hdl; 536 537 /* if configured as an ethernet interface */ 538 mac_handle_t if_mh; /* MAC handle */ 539 struct ether_addr if_addr; /* interface address */ 540 krwlock_t if_lockrw; 541 uint8_t if_state; /* interface state */ 542 543 mac_addr_slot_t addr_slot; /* Unicast address slot */ 544 int addr_set; /* Addr set where */ 545 546 /* multicast addresses when configured as eth interface */ 547 kmutex_t mca_lock; /* multicast lock */ 548 mcst_addr_t *mcap; /* list of multicast addrs */ 549 } vsw_t; 550 551 552 /* 553 * Ethernet broadcast address definition. 554 */ 555 static struct ether_addr etherbroadcastaddr = { 556 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 557 }; 558 559 #define IS_BROADCAST(ehp) \ 560 (ether_cmp(&ehp->ether_dhost, ðerbroadcastaddr) == 0) 561 #define IS_MULTICAST(ehp) \ 562 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) 563 564 #define READ_ENTER(x) rw_enter(x, RW_READER) 565 #define WRITE_ENTER(x) rw_enter(x, RW_WRITER) 566 #define RW_EXIT(x) rw_exit(x) 567 568 #ifdef __cplusplus 569 } 570 #endif 571 572 #endif /* _VSW_H */ 573