1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This header file contains the basic data structures which the 29 * virtual switch (vsw) uses to communicate with its clients and 30 * the outside world. 31 * 32 * The virtual switch reads the machine description (MD) to 33 * determine how many port_t structures to create (each port_t 34 * can support communications to a single network device). The 35 * port_t's are maintained in a linked list. 36 * 37 * Each port in turn contains a number of logical domain channels 38 * (ldc's) which are inter domain communications channels which 39 * are used for passing small messages between the domains. Their 40 * may be an unlimited number of channels associated with each port, 41 * though most devices only use a single channel. 42 * 43 * The ldc is a bi-directional channel, which is divided up into 44 * two directional 'lanes', one outbound from the switch to the 45 * virtual network device, the other inbound to the switch. 46 * Depending on the type of device each lane may have seperate 47 * communication paramaters (such as mtu etc). 48 * 49 * For those network clients which use descriptor rings the 50 * rings are associated with the appropriate lane. I.e. rings 51 * which the switch exports are associated with the outbound lanes 52 * while those which the network clients are exporting to the switch 53 * are associated with the inbound lane. 54 * 55 * In diagram form the data structures look as follows: 56 * 57 * vsw instance 58 * | 59 * +----->port_t----->port_t----->port_t-----> 60 * | 61 * +--->ldc_t--->ldc_t--->ldc_t---> 62 * | 63 * +--->lane_t (inbound) 64 * | | 65 * | +--->dring--->dring---> 66 * | 67 * +--->lane_t (outbound) 68 * | 69 * +--->dring--->dring---> 70 * 71 */ 72 73 #ifndef _VSW_H 74 #define _VSW_H 75 76 #pragma ident "%Z%%M% %I% %E% SMI" 77 78 #ifdef __cplusplus 79 extern "C" { 80 #endif 81 82 #include <sys/vio_mailbox.h> 83 #include <sys/vnet_common.h> 84 #include <sys/ethernet.h> 85 86 /* 87 * Default message type. 88 */ 89 typedef struct def_msg { 90 uint64_t data[8]; 91 } def_msg_t; 92 93 /* 94 * Currently only support one major/minor pair. 95 */ 96 #define VSW_NUM_VER 1 97 98 typedef struct ver_sup { 99 uint32_t ver_major:16, 100 ver_minor:16; 101 } ver_sup_t; 102 103 /* 104 * Only support ETHER mtu at moment. 105 */ 106 #define VSW_MTU ETHERMAX 107 108 /* 109 * Lane states. 110 */ 111 #define VSW_LANE_INACTIV 0x0 /* No params set for lane */ 112 113 #define VSW_VER_INFO_SENT 0x1 /* Version # sent to peer */ 114 #define VSW_VER_INFO_RECV 0x2 /* Version # recv from peer */ 115 #define VSW_VER_ACK_RECV 0x4 116 #define VSW_VER_ACK_SENT 0x8 117 #define VSW_VER_NACK_RECV 0x10 118 #define VSW_VER_NACK_SENT 0x20 119 120 #define VSW_ATTR_INFO_SENT 0x40 /* Attributes sent to peer */ 121 #define VSW_ATTR_INFO_RECV 0x80 /* Peer attributes received */ 122 #define VSW_ATTR_ACK_SENT 0x100 123 #define VSW_ATTR_ACK_RECV 0x200 124 #define VSW_ATTR_NACK_SENT 0x400 125 #define VSW_ATTR_NACK_RECV 0x800 126 127 #define VSW_DRING_INFO_SENT 0x1000 /* Dring info sent to peer */ 128 #define VSW_DRING_INFO_RECV 0x2000 /* Dring info received */ 129 #define VSW_DRING_ACK_SENT 0x4000 130 #define VSW_DRING_ACK_RECV 0x8000 131 #define VSW_DRING_NACK_SENT 0x10000 132 #define VSW_DRING_NACK_RECV 0x20000 133 134 #define VSW_RDX_INFO_SENT 0x40000 /* RDX sent to peer */ 135 #define VSW_RDX_INFO_RECV 0x80000 /* RDX received from peer */ 136 #define VSW_RDX_ACK_SENT 0x100000 137 #define VSW_RDX_ACK_RECV 0x200000 138 #define VSW_RDX_NACK_SENT 0x400000 139 #define VSW_RDX_NACK_RECV 0x800000 140 141 #define VSW_MCST_INFO_SENT 0x1000000 142 #define VSW_MCST_INFO_RECV 0x2000000 143 #define VSW_MCST_ACK_SENT 0x4000000 144 #define VSW_MCST_ACK_RECV 0x8000000 145 #define VSW_MCST_NACK_SENT 0x10000000 146 #define VSW_MCST_NACK_RECV 0x20000000 147 148 #define VSW_LANE_ACTIVE 0x40000000 /* Lane open to xmit data */ 149 150 /* Handshake milestones */ 151 #define VSW_MILESTONE0 0x1 /* ver info exchanged */ 152 #define VSW_MILESTONE1 0x2 /* attribute exchanged */ 153 #define VSW_MILESTONE2 0x4 /* dring info exchanged */ 154 #define VSW_MILESTONE3 0x8 /* rdx exchanged */ 155 #define VSW_MILESTONE4 0x10 /* handshake complete */ 156 157 /* 158 * Lane direction (relative to ourselves). 159 */ 160 #define INBOUND 0x1 161 #define OUTBOUND 0x2 162 163 /* Peer session id received */ 164 #define VSW_PEER_SESSION 0x1 165 166 /* 167 * Maximum number of consecutive reads of data from channel 168 */ 169 #define VSW_MAX_CHAN_READ 50 170 171 /* 172 * LDC queue length 173 */ 174 #define VSW_LDC_QLEN 1024 175 176 /* 177 * Currently only support one ldc per port. 178 */ 179 #define VSW_PORT_MAX_LDCS 1 /* max # of ldcs per port */ 180 181 /* 182 * Used for port add/deletion. 183 */ 184 #define VSW_PORT_UPDATED 0x1 185 186 #define LDC_TX_SUCCESS 0 /* ldc transmit success */ 187 #define LDC_TX_FAILURE 1 /* ldc transmit failure */ 188 #define LDC_TX_NORESOURCES 2 /* out of descriptors */ 189 190 /* ID of the source of a frame being switched */ 191 #define VSW_PHYSDEV 1 /* physical device associated */ 192 #define VSW_VNETPORT 2 /* port connected to vnet (over ldc) */ 193 #define VSW_LOCALDEV 4 /* vsw configured as an eth interface */ 194 195 /* 196 * Descriptor ring info 197 * 198 * Each descriptor element has a pre-allocated data buffer 199 * associated with it, into which data being transmitted is 200 * copied. By pre-allocating we speed up the copying process. 201 * The buffer is re-used once the peer has indicated that it is 202 * finished with the descriptor. 203 */ 204 #define VSW_RING_NUM_EL 512 /* Num of entries in ring */ 205 #define VSW_RING_EL_DATA_SZ 2048 /* Size of data section (bytes) */ 206 #define VSW_PRIV_SIZE sizeof (vnet_private_desc_t) 207 #define VSW_PUB_SIZE sizeof (vnet_public_desc_t) 208 209 #define VSW_MAX_COOKIES ((ETHERMTU >> MMU_PAGESHIFT) + 2) 210 211 /* 212 * Private descriptor 213 */ 214 typedef struct vsw_private_desc { 215 uint64_t dstate; 216 vnet_public_desc_t *descp; 217 ldc_mem_handle_t memhandle; 218 void *datap; 219 uint64_t datalen; 220 uint64_t ncookies; 221 ldc_mem_cookie_t memcookie[VSW_MAX_COOKIES]; 222 int bound; 223 } vsw_private_desc_t; 224 225 /* 226 * Descriptor ring structure 227 */ 228 typedef struct dring_info { 229 struct dring_info *next; /* next ring in chain */ 230 kmutex_t dlock; 231 uint32_t num_descriptors; 232 uint32_t descriptor_size; 233 uint32_t options; 234 uint32_t ncookies; 235 ldc_mem_cookie_t cookie[1]; 236 237 ldc_dring_handle_t handle; 238 uint64_t ident; /* identifier sent to peer */ 239 uint64_t end_idx; /* last idx processed */ 240 241 /* 242 * base address of private and public portions of the 243 * ring (where appropriate), and data block. 244 */ 245 void *pub_addr; /* base of public section */ 246 void *priv_addr; /* base of private section */ 247 void *data_addr; /* base of data section */ 248 size_t data_sz; /* size of data section */ 249 } dring_info_t; 250 251 /* 252 * Each ldc connection is comprised of two lanes, incoming 253 * from a peer, and outgoing to that peer. Each lane shares 254 * common ldc parameters and also has private lane-specific 255 * parameters. 256 */ 257 typedef struct lane { 258 uint64_t lstate; /* Lane state */ 259 uint32_t ver_major:16, /* Version major number */ 260 ver_minor:16; /* Version minor number */ 261 uint64_t seq_num; /* Sequence number */ 262 uint64_t mtu; /* ETHERMTU */ 263 uint64_t addr; /* Unique physical address */ 264 uint8_t addr_type; /* Only MAC address at moment */ 265 uint8_t xfer_mode; /* Dring or Pkt based */ 266 uint8_t ack_freq; /* Only non zero for Pkt based xfer */ 267 dring_info_t *dringp; /* List of drings for this lane */ 268 } lane_t; 269 270 /* channel drain states */ 271 #define VSW_LDC_INIT 0x1 /* Initial non-drain state */ 272 #define VSW_LDC_DRAINING 0x2 /* Channel draining */ 273 274 /* ldc information associated with a vsw-port */ 275 typedef struct vsw_ldc { 276 struct vsw_ldc *ldc_next; /* next ldc in the list */ 277 struct vsw_port *ldc_port; /* associated port */ 278 struct vsw *ldc_vswp; /* associated vsw */ 279 kmutex_t ldc_cblock; /* sync callback processing */ 280 kmutex_t ldc_txlock; /* sync transmits */ 281 uint64_t ldc_id; /* channel number */ 282 ldc_handle_t ldc_handle; /* channel handle */ 283 kmutex_t drain_cv_lock; 284 kcondvar_t drain_cv; /* channel draining */ 285 int drain_state; 286 uint32_t hphase; /* handshake phase */ 287 int hcnt; /* # handshake attempts */ 288 ldc_status_t ldc_status; /* channel status */ 289 uint64_t local_session; /* Our session id */ 290 uint64_t peer_session; /* Our peers session id */ 291 uint8_t session_status; /* Session recv'd, sent */ 292 kmutex_t hss_lock; 293 uint32_t hss_id; /* Handshake session id */ 294 uint64_t next_ident; /* Next dring ident # to use */ 295 lane_t lane_in; /* Inbound lane */ 296 lane_t lane_out; /* Outbound lane */ 297 uint8_t dev_class; /* Peer device class */ 298 } vsw_ldc_t; 299 300 /* list of ldcs per port */ 301 typedef struct vsw_ldc_list { 302 vsw_ldc_t *head; /* head of the list */ 303 krwlock_t lockrw; /* sync access(rw) to the list */ 304 int num_ldcs; /* number of ldcs in the list */ 305 } vsw_ldc_list_t; 306 307 /* multicast addresses port is interested in */ 308 typedef struct mcst_addr { 309 struct mcst_addr *nextp; 310 uint64_t addr; 311 } mcst_addr_t; 312 313 /* Port detach states */ 314 #define VSW_PORT_INIT 0x1 /* Initial non-detach state */ 315 #define VSW_PORT_DETACHING 0x2 /* In process of being detached */ 316 #define VSW_PORT_DETACHABLE 0x4 /* Safe to detach */ 317 318 /* port information associated with a vsw */ 319 typedef struct vsw_port { 320 int p_instance; /* port instance */ 321 struct vsw_port *p_next; /* next port in the list */ 322 struct vsw *p_vswp; /* associated vsw */ 323 vsw_ldc_list_t p_ldclist; /* list of ldcs for this port */ 324 325 kmutex_t tx_lock; /* transmit lock */ 326 int (*transmit)(vsw_ldc_t *, mblk_t *); 327 328 int state; /* port state */ 329 kmutex_t state_lock; 330 kcondvar_t state_cv; 331 332 int ref_cnt; /* # of active references */ 333 kmutex_t ref_lock; 334 kcondvar_t ref_cv; 335 336 kmutex_t mca_lock; /* multicast lock */ 337 mcst_addr_t *mcap; /* list of multicast addrs */ 338 339 /* 340 * mac address of the port & connected device 341 */ 342 struct ether_addr p_macaddr; 343 } vsw_port_t; 344 345 /* list of ports per vsw */ 346 typedef struct vsw_port_list { 347 vsw_port_t *head; /* head of the list */ 348 krwlock_t lockrw; /* sync access(rw) to the list */ 349 int num_ports; /* number of ports in the list */ 350 } vsw_port_list_t; 351 352 /* 353 * Taskq control message 354 */ 355 typedef struct vsw_ctrl_task { 356 vsw_ldc_t *ldcp; 357 def_msg_t pktp; 358 uint32_t hss_id; 359 } vsw_ctrl_task_t; 360 361 /* 362 * Number of hash chains in the multicast forwarding database. 363 */ 364 #define VSW_NCHAINS 8 365 366 /* 367 * State of interface if switch plumbed as network device. 368 */ 369 #define VSW_IF_UP 0x1 /* Interface UP */ 370 #define VSW_IF_PROMISC 0x2 /* Interface in promiscious mode */ 371 372 #define VSW_U_P(state) \ 373 (state == (VSW_IF_UP | VSW_IF_PROMISC)) 374 375 /* 376 * Switching modes. 377 */ 378 #define VSW_LAYER2 0x1 /* Layer 2 - MAC switching */ 379 #define VSW_LAYER2_PROMISC 0x2 /* Layer 2 + promisc mode */ 380 #define VSW_LAYER3 0x4 /* Layer 3 - IP switching */ 381 382 #define NUM_SMODES 3 /* number of switching modes */ 383 384 /* 385 * Bits indicating which properties we've read from MD. 386 */ 387 #define VSW_MD_PHYSNAME 0x1 388 #define VSW_MD_MACADDR 0x2 389 #define VSW_MD_SMODE 0x4 390 391 /* 392 * vsw instance state information. 393 */ 394 typedef struct vsw { 395 int instance; /* instance # */ 396 dev_info_t *dip; /* associated dev_info */ 397 struct vsw *next; /* next in list */ 398 char physname[LIFNAMSIZ]; /* phys-dev */ 399 uint8_t smode[NUM_SMODES]; /* switching mode */ 400 int smode_idx; /* curr pos in smode array */ 401 uint8_t mdprops; /* bitmask of props found */ 402 vsw_port_list_t plist; /* associated ports */ 403 ddi_taskq_t *taskq_p; /* VIO ctrl msg taskq */ 404 mod_hash_t *fdb; /* forwarding database */ 405 406 mod_hash_t *mfdb; /* multicast FDB */ 407 krwlock_t mfdbrw; /* rwlock for mFDB */ 408 409 /* mac layer */ 410 mac_handle_t mh; 411 mac_rx_handle_t mrh; 412 mac_notify_handle_t mnh; 413 const mac_txinfo_t *txinfo; /* MAC tx routine */ 414 415 /* Initial promisc setting of interface */ 416 boolean_t init_promisc; 417 418 /* Machine Description updates */ 419 mdeg_node_spec_t *inst_spec; 420 mdeg_handle_t mdeg_hdl; 421 422 /* if configured as an ethernet interface */ 423 mac_t *if_macp; /* MAC structure */ 424 mac_resource_handle_t if_mrh; 425 struct ether_addr if_addr; /* interface address */ 426 krwlock_t if_lockrw; 427 uint8_t if_state; /* interface state */ 428 429 /* multicast addresses when configured as eth interface */ 430 kmutex_t mca_lock; /* multicast lock */ 431 mcst_addr_t *mcap; /* list of multicast addrs */ 432 } vsw_t; 433 434 435 /* 436 * Ethernet broadcast address definition. 437 */ 438 static struct ether_addr etherbroadcastaddr = { 439 0xff, 0xff, 0xff, 0xff, 0xff, 0xff 440 }; 441 442 #define IS_BROADCAST(ehp) \ 443 (ether_cmp(&ehp->ether_dhost, ðerbroadcastaddr) == 0) 444 #define IS_MULTICAST(ehp) \ 445 ((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1) 446 447 #define READ_ENTER(x) rw_enter(x, RW_READER) 448 #define WRITE_ENTER(x) rw_enter(x, RW_WRITER) 449 #define RW_EXIT(x) rw_exit(x) 450 451 #ifdef __cplusplus 452 } 453 #endif 454 455 #endif /* _VSW_H */ 456