1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/debug.h> 29 #include <sys/time.h> 30 #include <sys/sysmacros.h> 31 #include <sys/systm.h> 32 #include <sys/user.h> 33 #include <sys/stropts.h> 34 #include <sys/stream.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #include <sys/cmn_err.h> 38 #include <sys/cpu.h> 39 #include <sys/kmem.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/ksynch.h> 44 #include <sys/stat.h> 45 #include <sys/kstat.h> 46 #include <sys/vtrace.h> 47 #include <sys/strsun.h> 48 #include <sys/dlpi.h> 49 #include <sys/ethernet.h> 50 #include <net/if.h> 51 #include <sys/varargs.h> 52 #include <sys/machsystm.h> 53 #include <sys/modctl.h> 54 #include <sys/modhash.h> 55 #include <sys/mac_provider.h> 56 #include <sys/mac_ether.h> 57 #include <sys/taskq.h> 58 #include <sys/note.h> 59 #include <sys/mach_descrip.h> 60 #include <sys/mac_provider.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/callb.h> 72 #include <sys/vlan.h> 73 74 /* 75 * Function prototypes. 76 */ 77 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 78 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 79 static int vsw_unattach(vsw_t *vswp); 80 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 81 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *); 82 void vsw_destroy_rxpools(void *); 83 84 /* MDEG routines */ 85 static int vsw_mdeg_register(vsw_t *vswp); 86 static void vsw_mdeg_unregister(vsw_t *vswp); 87 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 88 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 89 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 90 static int vsw_read_mdprops(vsw_t *vswp); 91 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 92 mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp, 93 uint16_t *nvidsp, uint16_t *default_idp); 94 static void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, 95 mde_cookie_t node, uint64_t *bw); 96 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 97 md_t *mdp, mde_cookie_t *node); 98 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 99 mde_cookie_t node); 100 static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 101 uint32_t *mtu); 102 static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu); 103 static void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 104 boolean_t *pls); 105 static void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 106 uint64_t *bw); 107 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 108 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 109 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1, 110 vsw_vlanid_t *vids2, int nvids); 111 112 /* Mac driver related routines */ 113 static int vsw_mac_register(vsw_t *); 114 static int vsw_mac_unregister(vsw_t *); 115 static int vsw_m_stat(void *, uint_t, uint64_t *); 116 static void vsw_m_stop(void *arg); 117 static int vsw_m_start(void *arg); 118 static int vsw_m_unicst(void *arg, const uint8_t *); 119 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 120 static int vsw_m_promisc(void *arg, boolean_t); 121 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 122 void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state); 123 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 124 mblk_t *mp, vsw_macrx_flags_t flags); 125 void vsw_physlink_state_update(vsw_t *vswp); 126 127 /* 128 * Functions imported from other files. 129 */ 130 extern void vsw_setup_switching_thread(void *arg); 131 extern int vsw_setup_switching_start(vsw_t *vswp); 132 extern void vsw_setup_switching_stop(vsw_t *vswp); 133 extern int vsw_setup_switching(vsw_t *); 134 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 135 vsw_port_t *port, mac_resource_handle_t mrh); 136 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 137 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 138 extern void vsw_del_mcst_vsw(vsw_t *); 139 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 140 extern void vsw_detach_ports(vsw_t *vswp); 141 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 142 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 143 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 144 md_t *prev_mdp, mde_cookie_t prev_mdex); 145 extern int vsw_port_attach(vsw_port_t *port); 146 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 147 extern int vsw_mac_open(vsw_t *vswp); 148 extern void vsw_mac_close(vsw_t *vswp); 149 extern void vsw_mac_cleanup_ports(vsw_t *vswp); 150 extern void vsw_unset_addrs(vsw_t *vswp); 151 extern void vsw_setup_switching_post_process(vsw_t *vswp); 152 extern void vsw_create_vlans(void *arg, int type); 153 extern void vsw_destroy_vlans(void *arg, int type); 154 extern void vsw_vlan_add_ids(void *arg, int type); 155 extern void vsw_vlan_remove_ids(void *arg, int type); 156 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 157 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 158 mblk_t **npt); 159 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 160 extern void vsw_hio_cleanup(vsw_t *vswp); 161 extern void vsw_hio_start_ports(vsw_t *vswp); 162 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 163 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 164 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 165 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid, 166 vsw_vlanid_t *new_vids, int new_nvids); 167 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 168 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 169 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans, 170 uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids); 171 extern void vsw_reset_ports(vsw_t *vswp); 172 extern void vsw_port_reset(vsw_port_t *portp); 173 extern void vsw_physlink_update_ports(vsw_t *vswp); 174 extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, 175 uint64_t maxbw); 176 177 /* 178 * Internal tunables. 179 */ 180 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 181 int vsw_wretries = 100; /* # of write attempts */ 182 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 183 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ 184 /* 300*3 = 900sec(15min) of max tmout */ 185 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 186 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 187 int vsw_ldc_retries = 5; /* # of ldc_close() retries */ 188 int vsw_ldc_delay = 1000; /* 1 ms delay for ldc_close() */ 189 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 190 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 191 int vsw_rxpool_cleanup_delay = 100000; /* 100ms */ 192 193 194 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 195 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 196 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 197 198 /* delay in usec to wait for all references on a fdb entry to be dropped */ 199 uint32_t vsw_fdbe_refcnt_delay = 10; 200 201 /* 202 * Default vlan id. This is only used internally when the "default-vlan-id" 203 * property is not present in the MD device node. Therefore, this should not be 204 * used as a tunable; if this value is changed, the corresponding variable 205 * should be updated to the same value in all vnets connected to this vsw. 206 */ 207 uint16_t vsw_default_vlan_id = 1; 208 209 /* 210 * Workaround for a version handshake bug in obp's vnet. 211 * If vsw initiates version negotiation starting from the highest version, 212 * obp sends a nack and terminates version handshake. To workaround 213 * this, we do not initiate version handshake when the channel comes up. 214 * Instead, we wait for the peer to send its version info msg and go through 215 * the version protocol exchange. If we successfully negotiate a version, 216 * before sending the ack, we send our version info msg to the peer 217 * using the <major,minor> version that we are about to ack. 218 */ 219 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 220 221 /* 222 * In the absence of "priority-ether-types" property in MD, the following 223 * internal tunable can be set to specify a single priority ethertype. 224 */ 225 uint64_t vsw_pri_eth_type = 0; 226 227 /* 228 * Number of transmit priority buffers that are preallocated per device. 229 * This number is chosen to be a small value to throttle transmission 230 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 231 */ 232 uint32_t vsw_pri_tx_nmblks = 64; 233 234 /* 235 * Number of RARP packets sent to announce macaddr to the physical switch, 236 * after vsw's physical device is changed dynamically or after a guest (client 237 * vnet) is live migrated in. 238 */ 239 uint32_t vsw_publish_macaddr_count = 3; 240 241 /* 242 * Enable/disable HybridIO 243 */ 244 boolean_t vsw_hio_enabled = B_TRUE; 245 246 /* 247 * Max retries for HybridIO cleanup 248 */ 249 int vsw_hio_max_cleanup_retries = 10; 250 251 /* 252 * 10ms delay for HybridIO cleanup 253 */ 254 int vsw_hio_cleanup_delay = 10000; 255 256 /* 257 * Descriptor ring modes of LDC data transfer: 258 * 259 * 1) TxDring mode: 260 * In versions < v1.6 of VIO Protocol, we support only TxDring mode. In this 261 * mode, we create a transmit descriptor ring and export it to the peer through 262 * dring registration process of handshake. The descriptor ring is exported 263 * using LDC shared memory. Each descriptor is associated with a data buffer. 264 * The data buffer is also exported over LDC and the cookies for this data 265 * buffer are provided in the descriptor. The peer maps this ring as its 266 * receive ring. Similarly, the peer exports a transmit descriptor ring which 267 * is mapped by this device as its receive ring. In this mode, in a given data 268 * transfer direction, the transmitter copies the data to the exported data 269 * buffer (owned by itself), bound to the descriptor. The receiver uses the LDC 270 * cookies specified in the descriptor to copy the data into the receiving 271 * guest through the hypervisor (ldc_mem_copy()). 272 * 273 * 2) RxDringData mode: 274 * In versions >= v1.6 of VIO Protocol, we also support RxDringData mode. In 275 * this mode, we create a receive descriptor ring and export it to the peer 276 * through dring registration process of handshake. In addition, we export a 277 * receive buffer area and provide that information also in the dring 278 * registration message. The descriptor ring and the data buffer area are 279 * exported using LDC shared memory. Each descriptor is associated with a data 280 * buffer in the data buffer area and the offset of the specific data buffer 281 * within this area is specified in the descriptor. The peer maps this ring 282 * along with the data buffer area as its transmit ring. Similarly, the peer 283 * exports a receive ring which is mapped by this device as its transmit ring, 284 * along with its buffer area. In this mode, in a given data transfer 285 * direction, the transmitter copies the data to the data buffer offset 286 * specified in the descriptor. The receiver simply picks up the data buffer 287 * (owned by itself) without any copy operation into the receiving guest. 288 * 289 * We provide a tunable to enable RxDringData mode for versions >= v1.6 of VIO 290 * Protocol. By default, this tunable is set to 1 (VIO_TX_DRING). To enable 291 * RxDringData mode set this tunable to 4 (VIO_RX_DRING_DATA). This enables us 292 * to negotiate RxDringData mode with peers that support versions >= v1.6. For 293 * peers that support version < v1.6, we continue to operate in TxDring mode 294 * with them though the tunable is enabled. 295 */ 296 uint8_t vsw_dring_mode = VIO_TX_DRING; 297 298 /* 299 * Number of descriptors; must be power of 2. 300 */ 301 uint32_t vsw_num_descriptors = VSW_NUM_DESCRIPTORS; 302 303 /* 304 * In RxDringData mode, # of buffers is determined by multiplying the # of 305 * descriptors with the factor below. Note that the factor must be > 1; i.e, 306 * the # of buffers must always be > # of descriptors. This is needed because, 307 * while the shared memory buffers are sent up the stack on the receiver, the 308 * sender needs additional buffers that can be used for further transmits. 309 * See vsw_setup_rx_dring() for details. 310 */ 311 uint32_t vsw_nrbufs_factor = 2; 312 313 /* 314 * Delay when rx descr not ready; used in both dring modes. 315 */ 316 int vsw_recv_delay = 0; 317 318 /* 319 * Retry when rx descr not ready; used in both dring modes. 320 */ 321 int vsw_recv_retries = 5; 322 323 /* 324 * Max number of mblks received in one receive operation. 325 */ 326 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 327 328 /* 329 * Internal tunables for receive buffer pools, that is, the size and number of 330 * mblks for each pool. At least 3 sizes must be specified if these are used. 331 * The sizes must be specified in increasing order. Non-zero value of the first 332 * size will be used as a hint to use these values instead of the algorithm 333 * that determines the sizes based on MTU. Used in TxDring mode only. 334 */ 335 uint32_t vsw_mblk_size1 = 0; 336 uint32_t vsw_mblk_size2 = 0; 337 uint32_t vsw_mblk_size3 = 0; 338 uint32_t vsw_mblk_size4 = 0; 339 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 340 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 341 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 342 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS; /* number of mblks for pool4 */ 343 344 /* 345 * Set this to non-zero to enable additional internal receive buffer pools 346 * based on the MTU of the device for better performance at the cost of more 347 * memory consumption. This is turned off by default, to use allocb(9F) for 348 * receive buffer allocations of sizes > 2K. 349 */ 350 boolean_t vsw_jumbo_rxpools = B_FALSE; 351 352 /* 353 * vsw_max_tx_qcount is the maximum # of packets that can be queued 354 * before the tx worker thread begins processing the queue. Its value 355 * is chosen to be 4x the default length of tx descriptor ring. 356 */ 357 uint32_t vsw_max_tx_qcount = 4 * VSW_NUM_DESCRIPTORS; 358 359 /* 360 * MAC callbacks 361 */ 362 static mac_callbacks_t vsw_m_callbacks = { 363 0, 364 vsw_m_stat, 365 vsw_m_start, 366 vsw_m_stop, 367 vsw_m_promisc, 368 vsw_m_multicst, 369 vsw_m_unicst, 370 vsw_m_tx 371 }; 372 373 static struct cb_ops vsw_cb_ops = { 374 nulldev, /* cb_open */ 375 nulldev, /* cb_close */ 376 nodev, /* cb_strategy */ 377 nodev, /* cb_print */ 378 nodev, /* cb_dump */ 379 nodev, /* cb_read */ 380 nodev, /* cb_write */ 381 nodev, /* cb_ioctl */ 382 nodev, /* cb_devmap */ 383 nodev, /* cb_mmap */ 384 nodev, /* cb_segmap */ 385 nochpoll, /* cb_chpoll */ 386 ddi_prop_op, /* cb_prop_op */ 387 NULL, /* cb_stream */ 388 D_MP, /* cb_flag */ 389 CB_REV, /* rev */ 390 nodev, /* int (*cb_aread)() */ 391 nodev /* int (*cb_awrite)() */ 392 }; 393 394 static struct dev_ops vsw_ops = { 395 DEVO_REV, /* devo_rev */ 396 0, /* devo_refcnt */ 397 NULL, /* devo_getinfo */ 398 nulldev, /* devo_identify */ 399 nulldev, /* devo_probe */ 400 vsw_attach, /* devo_attach */ 401 vsw_detach, /* devo_detach */ 402 nodev, /* devo_reset */ 403 &vsw_cb_ops, /* devo_cb_ops */ 404 (struct bus_ops *)NULL, /* devo_bus_ops */ 405 ddi_power /* devo_power */ 406 }; 407 408 extern struct mod_ops mod_driverops; 409 static struct modldrv vswmodldrv = { 410 &mod_driverops, 411 "sun4v Virtual Switch", 412 &vsw_ops, 413 }; 414 415 #define LDC_ENTER_LOCK(ldcp) \ 416 mutex_enter(&((ldcp)->ldc_cblock));\ 417 mutex_enter(&((ldcp)->ldc_rxlock));\ 418 mutex_enter(&((ldcp)->ldc_txlock)); 419 #define LDC_EXIT_LOCK(ldcp) \ 420 mutex_exit(&((ldcp)->ldc_txlock));\ 421 mutex_exit(&((ldcp)->ldc_rxlock));\ 422 mutex_exit(&((ldcp)->ldc_cblock)); 423 424 /* Driver soft state ptr */ 425 static void *vsw_state; 426 427 /* 428 * Linked list of "vsw_t" structures - one per instance. 429 */ 430 vsw_t *vsw_head = NULL; 431 krwlock_t vsw_rw; 432 433 /* 434 * Property names 435 */ 436 static char vdev_propname[] = "virtual-device"; 437 static char vsw_propname[] = "virtual-network-switch"; 438 static char physdev_propname[] = "vsw-phys-dev"; 439 static char smode_propname[] = "vsw-switch-mode"; 440 static char macaddr_propname[] = "local-mac-address"; 441 static char remaddr_propname[] = "remote-mac-address"; 442 static char ldcids_propname[] = "ldc-ids"; 443 static char chan_propname[] = "channel-endpoint"; 444 static char id_propname[] = "id"; 445 static char reg_propname[] = "reg"; 446 static char pri_types_propname[] = "priority-ether-types"; 447 static char vsw_pvid_propname[] = "port-vlan-id"; 448 static char vsw_vid_propname[] = "vlan-id"; 449 static char vsw_dvid_propname[] = "default-vlan-id"; 450 static char port_pvid_propname[] = "remote-port-vlan-id"; 451 static char port_vid_propname[] = "remote-vlan-id"; 452 static char hybrid_propname[] = "hybrid"; 453 static char vsw_mtu_propname[] = "mtu"; 454 static char vsw_linkprop_propname[] = "linkprop"; 455 static char vsw_maxbw_propname[] = "maxbw"; 456 static char port_maxbw_propname[] = "maxbw"; 457 458 /* 459 * Matching criteria passed to the MDEG to register interest 460 * in changes to 'virtual-device-port' nodes identified by their 461 * 'id' property. 462 */ 463 static md_prop_match_t vport_prop_match[] = { 464 { MDET_PROP_VAL, "id" }, 465 { MDET_LIST_END, NULL } 466 }; 467 468 static mdeg_node_match_t vport_match = { "virtual-device-port", 469 vport_prop_match }; 470 471 /* 472 * Matching criteria passed to the MDEG to register interest 473 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 474 * by their 'name' and 'cfg-handle' properties. 475 */ 476 static md_prop_match_t vdev_prop_match[] = { 477 { MDET_PROP_STR, "name" }, 478 { MDET_PROP_VAL, "cfg-handle" }, 479 { MDET_LIST_END, NULL } 480 }; 481 482 static mdeg_node_match_t vdev_match = { "virtual-device", 483 vdev_prop_match }; 484 485 486 /* 487 * Specification of an MD node passed to the MDEG to filter any 488 * 'vport' nodes that do not belong to the specified node. This 489 * template is copied for each vsw instance and filled in with 490 * the appropriate 'cfg-handle' value before being passed to the MDEG. 491 */ 492 static mdeg_prop_spec_t vsw_prop_template[] = { 493 { MDET_PROP_STR, "name", vsw_propname }, 494 { MDET_PROP_VAL, "cfg-handle", NULL }, 495 { MDET_LIST_END, NULL, NULL } 496 }; 497 498 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 499 500 #ifdef DEBUG 501 /* 502 * Print debug messages - set to 0x1f to enable all msgs 503 * or 0x0 to turn all off. 504 */ 505 int vswdbg = 0x0; 506 507 /* 508 * debug levels: 509 * 0x01: Function entry/exit tracing 510 * 0x02: Internal function messages 511 * 0x04: Verbose internal messages 512 * 0x08: Warning messages 513 * 0x10: Error messages 514 */ 515 516 void 517 vswdebug(vsw_t *vswp, const char *fmt, ...) 518 { 519 char buf[512]; 520 va_list ap; 521 522 va_start(ap, fmt); 523 (void) vsprintf(buf, fmt, ap); 524 va_end(ap); 525 526 if (vswp == NULL) 527 cmn_err(CE_CONT, "%s\n", buf); 528 else 529 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 530 } 531 532 #endif /* DEBUG */ 533 534 static struct modlinkage modlinkage = { 535 MODREV_1, 536 &vswmodldrv, 537 NULL 538 }; 539 540 int 541 _init(void) 542 { 543 int status; 544 545 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 546 547 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 548 if (status != 0) { 549 return (status); 550 } 551 552 mac_init_ops(&vsw_ops, DRV_NAME); 553 status = mod_install(&modlinkage); 554 if (status != 0) { 555 ddi_soft_state_fini(&vsw_state); 556 } 557 return (status); 558 } 559 560 int 561 _fini(void) 562 { 563 int status; 564 565 status = mod_remove(&modlinkage); 566 if (status != 0) 567 return (status); 568 mac_fini_ops(&vsw_ops); 569 ddi_soft_state_fini(&vsw_state); 570 571 rw_destroy(&vsw_rw); 572 573 return (status); 574 } 575 576 int 577 _info(struct modinfo *modinfop) 578 { 579 return (mod_info(&modlinkage, modinfop)); 580 } 581 582 static int 583 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 584 { 585 vsw_t *vswp; 586 int instance; 587 char hashname[MAXNAMELEN]; 588 char qname[TASKQ_NAMELEN]; 589 vsw_attach_progress_t progress = PROG_init; 590 int rv; 591 592 switch (cmd) { 593 case DDI_ATTACH: 594 break; 595 case DDI_RESUME: 596 /* nothing to do for this non-device */ 597 return (DDI_SUCCESS); 598 case DDI_PM_RESUME: 599 default: 600 return (DDI_FAILURE); 601 } 602 603 instance = ddi_get_instance(dip); 604 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 605 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 606 return (DDI_FAILURE); 607 } 608 vswp = ddi_get_soft_state(vsw_state, instance); 609 610 if (vswp == NULL) { 611 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 612 goto vsw_attach_fail; 613 } 614 615 vswp->dip = dip; 616 vswp->instance = instance; 617 vswp->phys_link_state = LINK_STATE_UNKNOWN; 618 ddi_set_driver_private(dip, (caddr_t)vswp); 619 620 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 621 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 622 mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL); 623 cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL); 624 rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL); 625 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 626 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 627 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 628 629 progress |= PROG_locks; 630 631 rv = vsw_read_mdprops(vswp); 632 if (rv != 0) 633 goto vsw_attach_fail; 634 635 progress |= PROG_readmd; 636 637 /* setup the unicast forwarding database */ 638 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 639 vswp->instance); 640 D2(vswp, "creating unicast hash table (%s)...", hashname); 641 vswp->fdb_nchains = vsw_fdb_nchains; 642 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 643 mod_hash_null_valdtor, sizeof (void *)); 644 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 645 progress |= PROG_fdb; 646 647 /* setup the multicast fowarding database */ 648 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 649 vswp->instance); 650 D2(vswp, "creating multicast hash table %s)...", hashname); 651 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 652 mod_hash_null_valdtor, sizeof (void *)); 653 654 progress |= PROG_mfdb; 655 656 /* 657 * Create the taskq which will process all the VIO 658 * control messages. 659 */ 660 (void) snprintf(qname, TASKQ_NAMELEN, "taskq%d", vswp->instance); 661 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 662 TASKQ_DEFAULTPRI, 0)) == NULL) { 663 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 664 vswp->instance); 665 goto vsw_attach_fail; 666 } 667 668 progress |= PROG_taskq; 669 670 (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d", 671 vswp->instance); 672 if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1, 673 TASKQ_DEFAULTPRI, 0)) == NULL) { 674 cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue", 675 vswp->instance); 676 goto vsw_attach_fail; 677 } 678 679 progress |= PROG_rxp_taskq; 680 681 /* prevent auto-detaching */ 682 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 683 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 684 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 685 "instance %u", DDI_NO_AUTODETACH, instance); 686 } 687 688 /* 689 * The null switching function is set to avoid panic until 690 * switch mode is setup. 691 */ 692 vswp->vsw_switch_frame = vsw_switch_frame_nop; 693 694 /* 695 * Setup the required switching mode, based on the mdprops that we read 696 * earlier. We start a thread to do this, to avoid calling mac_open() 697 * directly from attach(). 698 */ 699 rv = vsw_setup_switching_start(vswp); 700 if (rv != 0) { 701 goto vsw_attach_fail; 702 } 703 704 progress |= PROG_swmode; 705 706 /* Register with mac layer as a provider */ 707 rv = vsw_mac_register(vswp); 708 if (rv != 0) 709 goto vsw_attach_fail; 710 711 progress |= PROG_macreg; 712 713 /* 714 * Now we have everything setup, register an interest in 715 * specific MD nodes. 716 * 717 * The callback is invoked in 2 cases, firstly if upon mdeg 718 * registration there are existing nodes which match our specified 719 * criteria, and secondly if the MD is changed (and again, there 720 * are nodes which we are interested in present within it. Note 721 * that our callback will be invoked even if our specified nodes 722 * have not actually changed). 723 * 724 */ 725 rv = vsw_mdeg_register(vswp); 726 if (rv != 0) 727 goto vsw_attach_fail; 728 729 progress |= PROG_mdreg; 730 731 vswp->attach_progress = progress; 732 733 WRITE_ENTER(&vsw_rw); 734 vswp->next = vsw_head; 735 vsw_head = vswp; 736 RW_EXIT(&vsw_rw); 737 738 ddi_report_dev(vswp->dip); 739 return (DDI_SUCCESS); 740 741 vsw_attach_fail: 742 DERR(NULL, "vsw_attach: failed"); 743 744 vswp->attach_progress = progress; 745 (void) vsw_unattach(vswp); 746 ddi_soft_state_free(vsw_state, instance); 747 return (DDI_FAILURE); 748 } 749 750 static int 751 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 752 { 753 vsw_t **vswpp, *vswp; 754 int instance; 755 756 instance = ddi_get_instance(dip); 757 vswp = ddi_get_soft_state(vsw_state, instance); 758 759 if (vswp == NULL) { 760 return (DDI_FAILURE); 761 } 762 763 switch (cmd) { 764 case DDI_DETACH: 765 break; 766 case DDI_SUSPEND: 767 case DDI_PM_SUSPEND: 768 default: 769 return (DDI_FAILURE); 770 } 771 772 D2(vswp, "detaching instance %d", instance); 773 774 if (vsw_unattach(vswp) != 0) { 775 return (DDI_FAILURE); 776 } 777 778 ddi_remove_minor_node(dip, NULL); 779 780 WRITE_ENTER(&vsw_rw); 781 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 782 if (*vswpp == vswp) { 783 *vswpp = vswp->next; 784 break; 785 } 786 } 787 RW_EXIT(&vsw_rw); 788 789 ddi_soft_state_free(vsw_state, instance); 790 791 return (DDI_SUCCESS); 792 } 793 794 /* 795 * Common routine to handle vsw_attach() failure and vsw_detach(). Note that 796 * the only reason this function could fail is if mac_unregister() fails. 797 * Otherwise, this function must ensure that all resources are freed and return 798 * success. 799 */ 800 static int 801 vsw_unattach(vsw_t *vswp) 802 { 803 vsw_attach_progress_t progress; 804 805 progress = vswp->attach_progress; 806 807 /* 808 * Unregister from the gldv3 subsystem. This can fail, in particular 809 * if there are still any open references to this mac device; in which 810 * case we just return failure without continuing to detach further. 811 */ 812 if (progress & PROG_macreg) { 813 if (vsw_mac_unregister(vswp) != 0) { 814 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 815 "MAC layer", vswp->instance); 816 return (1); 817 } 818 progress &= ~PROG_macreg; 819 } 820 821 /* 822 * Now that we have unregistered from gldv3, we must finish all other 823 * steps and successfully return from this function; otherwise we will 824 * end up leaving the device in a broken/unusable state. 825 * 826 * If we have registered with mdeg, unregister now to stop further 827 * callbacks to this vsw device and/or its ports. Then, detach any 828 * existing ports. 829 */ 830 if (progress & PROG_mdreg) { 831 vsw_mdeg_unregister(vswp); 832 vsw_detach_ports(vswp); 833 progress &= ~PROG_mdreg; 834 } 835 836 /* 837 * If we have started a thread to setup the switching mode, stop it, if 838 * it is still running. If it has finished setting up the switching 839 * mode, then we need to clean up some additional things if we are 840 * running in L2 mode: first free up any hybrid resources; then stop 841 * and close the underlying physical device. Note that we would have 842 * already released all per mac_client resources (ucast, mcast addrs, 843 * hio-shares etc) as all the ports are detached and if the vsw device 844 * itself was in use as an interface, it has been unplumbed (otherwise 845 * mac_unregister() above would fail). 846 */ 847 if (progress & PROG_swmode) { 848 849 vsw_setup_switching_stop(vswp); 850 851 if (vswp->hio_capable == B_TRUE) { 852 vsw_hio_cleanup(vswp); 853 vswp->hio_capable = B_FALSE; 854 } 855 856 mutex_enter(&vswp->mac_lock); 857 vsw_mac_close(vswp); 858 mutex_exit(&vswp->mac_lock); 859 860 progress &= ~PROG_swmode; 861 } 862 863 /* 864 * We now destroy the taskq used to clean up rx mblk pools that 865 * couldn't be destroyed when the ports/channels were detached. 866 * We implicitly wait for those tasks to complete in 867 * ddi_taskq_destroy(). 868 */ 869 if (progress & PROG_rxp_taskq) { 870 ddi_taskq_destroy(vswp->rxp_taskq); 871 progress &= ~PROG_rxp_taskq; 872 } 873 874 /* 875 * By now any pending tasks have finished and the underlying 876 * ldc's have been destroyed, so its safe to delete the control 877 * message taskq. 878 */ 879 if (progress & PROG_taskq) { 880 ddi_taskq_destroy(vswp->taskq_p); 881 progress &= ~PROG_taskq; 882 } 883 884 /* Destroy the multicast hash table */ 885 if (progress & PROG_mfdb) { 886 mod_hash_destroy_hash(vswp->mfdb); 887 progress &= ~PROG_mfdb; 888 } 889 890 /* Destroy the vlan hash table and fdb */ 891 if (progress & PROG_fdb) { 892 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 893 mod_hash_destroy_hash(vswp->fdb_hashp); 894 progress &= ~PROG_fdb; 895 } 896 897 if (progress & PROG_readmd) { 898 if (VSW_PRI_ETH_DEFINED(vswp)) { 899 kmem_free(vswp->pri_types, 900 sizeof (uint16_t) * vswp->pri_num_types); 901 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 902 } 903 progress &= ~PROG_readmd; 904 } 905 906 if (progress & PROG_locks) { 907 rw_destroy(&vswp->plist.lockrw); 908 rw_destroy(&vswp->mfdbrw); 909 rw_destroy(&vswp->if_lockrw); 910 rw_destroy(&vswp->maccl_rwlock); 911 cv_destroy(&vswp->sw_thr_cv); 912 mutex_destroy(&vswp->sw_thr_lock); 913 mutex_destroy(&vswp->mca_lock); 914 mutex_destroy(&vswp->mac_lock); 915 progress &= ~PROG_locks; 916 } 917 918 vswp->attach_progress = progress; 919 920 return (0); 921 } 922 923 void 924 vsw_destroy_rxpools(void *arg) 925 { 926 vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg; 927 vio_mblk_pool_t *npoolp; 928 929 while (poolp != NULL) { 930 npoolp = poolp->nextp; 931 while (vio_destroy_mblks(poolp) != 0) { 932 delay(drv_usectohz(vsw_rxpool_cleanup_delay)); 933 } 934 poolp = npoolp; 935 } 936 } 937 938 /* 939 * Get the value of the "vsw-phys-dev" property in the specified 940 * node. This property is the name of the physical device that 941 * the virtual switch will use to talk to the outside world. 942 * 943 * Note it is valid for this property to be NULL (but the property 944 * itself must exist). Callers of this routine should verify that 945 * the value returned is what they expected (i.e. either NULL or non NULL). 946 * 947 * On success returns value of the property in region pointed to by 948 * the 'name' argument, and with return value of 0. Otherwise returns 1. 949 */ 950 static int 951 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 952 { 953 int len = 0; 954 int instance; 955 char *physname = NULL; 956 char *dev; 957 const char *dev_name; 958 char myname[MAXNAMELEN]; 959 960 dev_name = ddi_driver_name(vswp->dip); 961 instance = ddi_get_instance(vswp->dip); 962 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 963 964 if (md_get_prop_data(mdp, node, physdev_propname, 965 (uint8_t **)(&physname), &len) != 0) { 966 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 967 "device(s) from MD", vswp->instance); 968 return (1); 969 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 970 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 971 vswp->instance, physname); 972 return (1); 973 } else if (strcmp(myname, physname) == 0) { 974 /* 975 * Prevent the vswitch from opening itself as the 976 * network device. 977 */ 978 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 979 vswp->instance, physname); 980 return (1); 981 } else { 982 (void) strncpy(name, physname, strlen(physname) + 1); 983 D2(vswp, "%s: using first device specified (%s)", 984 __func__, physname); 985 } 986 987 #ifdef DEBUG 988 /* 989 * As a temporary measure to aid testing we check to see if there 990 * is a vsw.conf file present. If there is we use the value of the 991 * vsw_physname property in the file as the name of the physical 992 * device, overriding the value from the MD. 993 * 994 * There may be multiple devices listed, but for the moment 995 * we just use the first one. 996 */ 997 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 998 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 999 if ((strlen(dev) + 1) > LIFNAMSIZ) { 1000 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 1001 vswp->instance, dev); 1002 ddi_prop_free(dev); 1003 return (1); 1004 } else { 1005 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 1006 "config file", vswp->instance, dev); 1007 1008 (void) strncpy(name, dev, strlen(dev) + 1); 1009 } 1010 1011 ddi_prop_free(dev); 1012 } 1013 #endif 1014 1015 return (0); 1016 } 1017 1018 /* 1019 * Read the 'vsw-switch-mode' property from the specified MD node. 1020 * 1021 * Returns 0 on success, otherwise returns 1. 1022 */ 1023 static int 1024 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode) 1025 { 1026 int len = 0; 1027 char *smode = NULL; 1028 char *curr_mode = NULL; 1029 1030 D1(vswp, "%s: enter", __func__); 1031 1032 /* 1033 * Get the switch-mode property. The modes are listed in 1034 * decreasing order of preference, i.e. prefered mode is 1035 * first item in list. 1036 */ 1037 len = 0; 1038 if (md_get_prop_data(mdp, node, smode_propname, 1039 (uint8_t **)(&smode), &len) != 0) { 1040 /* 1041 * Unable to get switch-mode property from MD, nothing 1042 * more we can do. 1043 */ 1044 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 1045 " from the MD", vswp->instance); 1046 return (1); 1047 } 1048 1049 curr_mode = smode; 1050 /* 1051 * Modes of operation: 1052 * 'switched' - layer 2 switching, underlying HW in 1053 * programmed mode. 1054 * 'promiscuous' - layer 2 switching, underlying HW in 1055 * promiscuous mode. 1056 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 1057 * in non-promiscuous mode. 1058 */ 1059 while (curr_mode < (smode + len)) { 1060 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 1061 if (strcmp(curr_mode, "switched") == 0) { 1062 *mode = VSW_LAYER2; 1063 } else if (strcmp(curr_mode, "promiscuous") == 0) { 1064 *mode = VSW_LAYER2 | VSW_LAYER2_PROMISC; 1065 } else if (strcmp(curr_mode, "routed") == 0) { 1066 *mode = VSW_LAYER3; 1067 } else { 1068 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 1069 "setting to default switched mode", 1070 vswp->instance, curr_mode); 1071 *mode = VSW_LAYER2; 1072 } 1073 curr_mode += strlen(curr_mode) + 1; 1074 } 1075 1076 D2(vswp, "%s: %d mode", __func__, *mode); 1077 1078 D1(vswp, "%s: exit", __func__); 1079 1080 return (0); 1081 } 1082 1083 /* 1084 * Register with the MAC layer as a network device, so we 1085 * can be plumbed if necessary. 1086 */ 1087 static int 1088 vsw_mac_register(vsw_t *vswp) 1089 { 1090 mac_register_t *macp; 1091 int rv; 1092 1093 D1(vswp, "%s: enter", __func__); 1094 1095 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1096 return (EINVAL); 1097 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1098 macp->m_driver = vswp; 1099 macp->m_dip = vswp->dip; 1100 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1101 macp->m_callbacks = &vsw_m_callbacks; 1102 macp->m_min_sdu = 0; 1103 macp->m_max_sdu = vswp->mtu; 1104 macp->m_margin = VLAN_TAGSZ; 1105 rv = mac_register(macp, &vswp->if_mh); 1106 mac_free(macp); 1107 if (rv != 0) { 1108 /* 1109 * Treat this as a non-fatal error as we may be 1110 * able to operate in some other mode. 1111 */ 1112 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1113 "a provider with MAC layer", vswp->instance); 1114 return (rv); 1115 } 1116 1117 vswp->if_state |= VSW_IF_REG; 1118 1119 D1(vswp, "%s: exit", __func__); 1120 1121 return (rv); 1122 } 1123 1124 static int 1125 vsw_mac_unregister(vsw_t *vswp) 1126 { 1127 int rv = 0; 1128 1129 D1(vswp, "%s: enter", __func__); 1130 1131 WRITE_ENTER(&vswp->if_lockrw); 1132 1133 if (vswp->if_state & VSW_IF_REG) { 1134 rv = mac_unregister(vswp->if_mh); 1135 if (rv != 0) { 1136 DWARN(vswp, "%s: unable to unregister from MAC " 1137 "framework", __func__); 1138 1139 RW_EXIT(&vswp->if_lockrw); 1140 D1(vswp, "%s: fail exit", __func__); 1141 return (rv); 1142 } 1143 1144 /* mark i/f as down and unregistered */ 1145 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1146 } 1147 RW_EXIT(&vswp->if_lockrw); 1148 1149 D1(vswp, "%s: exit", __func__); 1150 1151 return (rv); 1152 } 1153 1154 static int 1155 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1156 { 1157 vsw_t *vswp = (vsw_t *)arg; 1158 1159 D1(vswp, "%s: enter", __func__); 1160 1161 mutex_enter(&vswp->mac_lock); 1162 if (vswp->mh == NULL) { 1163 mutex_exit(&vswp->mac_lock); 1164 return (EINVAL); 1165 } 1166 1167 /* return stats from underlying device */ 1168 *val = mac_stat_get(vswp->mh, stat); 1169 1170 mutex_exit(&vswp->mac_lock); 1171 1172 return (0); 1173 } 1174 1175 static void 1176 vsw_m_stop(void *arg) 1177 { 1178 vsw_t *vswp = (vsw_t *)arg; 1179 1180 D1(vswp, "%s: enter", __func__); 1181 1182 WRITE_ENTER(&vswp->if_lockrw); 1183 vswp->if_state &= ~VSW_IF_UP; 1184 RW_EXIT(&vswp->if_lockrw); 1185 1186 /* Cleanup and close the mac client */ 1187 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); 1188 1189 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1190 } 1191 1192 static int 1193 vsw_m_start(void *arg) 1194 { 1195 int rv; 1196 vsw_t *vswp = (vsw_t *)arg; 1197 1198 D1(vswp, "%s: enter", __func__); 1199 1200 WRITE_ENTER(&vswp->if_lockrw); 1201 1202 vswp->if_state |= VSW_IF_UP; 1203 1204 if (vswp->switching_setup_done == B_FALSE) { 1205 /* 1206 * If the switching mode has not been setup yet, just 1207 * return. The unicast address will be programmed 1208 * after the physical device is successfully setup by the 1209 * timeout handler. 1210 */ 1211 RW_EXIT(&vswp->if_lockrw); 1212 return (0); 1213 } 1214 1215 /* if in layer2 mode, program unicast address. */ 1216 if (vswp->mh != NULL) { 1217 /* Init a mac client and program addresses */ 1218 rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV); 1219 if (rv != 0) { 1220 cmn_err(CE_NOTE, 1221 "!vsw%d: failed to program interface " 1222 "unicast address\n", vswp->instance); 1223 } 1224 } 1225 1226 RW_EXIT(&vswp->if_lockrw); 1227 1228 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1229 return (0); 1230 } 1231 1232 /* 1233 * Change the local interface address. 1234 * 1235 * Note: we don't support this entry point. The local 1236 * mac address of the switch can only be changed via its 1237 * MD node properties. 1238 */ 1239 static int 1240 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1241 { 1242 _NOTE(ARGUNUSED(arg, macaddr)) 1243 1244 return (DDI_FAILURE); 1245 } 1246 1247 static int 1248 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1249 { 1250 vsw_t *vswp = (vsw_t *)arg; 1251 mcst_addr_t *mcst_p = NULL; 1252 uint64_t addr = 0x0; 1253 int i, ret = 0; 1254 1255 D1(vswp, "%s: enter", __func__); 1256 1257 /* 1258 * Convert address into form that can be used 1259 * as hash table key. 1260 */ 1261 for (i = 0; i < ETHERADDRL; i++) { 1262 addr = (addr << 8) | mca[i]; 1263 } 1264 1265 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1266 1267 if (add) { 1268 D2(vswp, "%s: adding multicast", __func__); 1269 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1270 /* 1271 * Update the list of multicast addresses 1272 * contained within the vsw_t structure to 1273 * include this new one. 1274 */ 1275 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1276 if (mcst_p == NULL) { 1277 DERR(vswp, "%s unable to alloc mem", __func__); 1278 (void) vsw_del_mcst(vswp, 1279 VSW_LOCALDEV, addr, NULL); 1280 return (1); 1281 } 1282 mcst_p->addr = addr; 1283 ether_copy(mca, &mcst_p->mca); 1284 1285 /* 1286 * Call into the underlying driver to program the 1287 * address into HW. 1288 */ 1289 ret = vsw_mac_multicast_add(vswp, NULL, mcst_p, 1290 VSW_LOCALDEV); 1291 if (ret != 0) { 1292 (void) vsw_del_mcst(vswp, 1293 VSW_LOCALDEV, addr, NULL); 1294 kmem_free(mcst_p, sizeof (*mcst_p)); 1295 return (ret); 1296 } 1297 1298 mutex_enter(&vswp->mca_lock); 1299 mcst_p->nextp = vswp->mcap; 1300 vswp->mcap = mcst_p; 1301 mutex_exit(&vswp->mca_lock); 1302 } else { 1303 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 1304 "address", vswp->instance); 1305 } 1306 return (ret); 1307 } 1308 1309 D2(vswp, "%s: removing multicast", __func__); 1310 /* 1311 * Remove the address from the hash table.. 1312 */ 1313 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1314 1315 /* 1316 * ..and then from the list maintained in the 1317 * vsw_t structure. 1318 */ 1319 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1320 ASSERT(mcst_p != NULL); 1321 1322 vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV); 1323 kmem_free(mcst_p, sizeof (*mcst_p)); 1324 } 1325 1326 D1(vswp, "%s: exit", __func__); 1327 1328 return (0); 1329 } 1330 1331 static int 1332 vsw_m_promisc(void *arg, boolean_t on) 1333 { 1334 vsw_t *vswp = (vsw_t *)arg; 1335 1336 D1(vswp, "%s: enter", __func__); 1337 1338 WRITE_ENTER(&vswp->if_lockrw); 1339 if (on) 1340 vswp->if_state |= VSW_IF_PROMISC; 1341 else 1342 vswp->if_state &= ~VSW_IF_PROMISC; 1343 RW_EXIT(&vswp->if_lockrw); 1344 1345 D1(vswp, "%s: exit", __func__); 1346 1347 return (0); 1348 } 1349 1350 static mblk_t * 1351 vsw_m_tx(void *arg, mblk_t *mp) 1352 { 1353 vsw_t *vswp = (vsw_t *)arg; 1354 1355 D1(vswp, "%s: enter", __func__); 1356 1357 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1358 1359 if (mp == NULL) { 1360 return (NULL); 1361 } 1362 1363 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1364 1365 D1(vswp, "%s: exit", __func__); 1366 1367 return (NULL); 1368 } 1369 1370 /* 1371 * Register for machine description (MD) updates. 1372 * 1373 * Returns 0 on success, 1 on failure. 1374 */ 1375 static int 1376 vsw_mdeg_register(vsw_t *vswp) 1377 { 1378 mdeg_prop_spec_t *pspecp; 1379 mdeg_node_spec_t *inst_specp; 1380 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1381 size_t templatesz; 1382 int rv; 1383 1384 D1(vswp, "%s: enter", __func__); 1385 1386 /* 1387 * Allocate and initialize a per-instance copy 1388 * of the global property spec array that will 1389 * uniquely identify this vsw instance. 1390 */ 1391 templatesz = sizeof (vsw_prop_template); 1392 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1393 1394 bcopy(vsw_prop_template, pspecp, templatesz); 1395 1396 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1397 1398 /* initialize the complete prop spec structure */ 1399 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1400 inst_specp->namep = "virtual-device"; 1401 inst_specp->specp = pspecp; 1402 1403 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1404 vswp->regprop); 1405 /* 1406 * Register an interest in 'virtual-device' nodes with a 1407 * 'name' property of 'virtual-network-switch' 1408 */ 1409 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1410 (void *)vswp, &mdeg_hdl); 1411 if (rv != MDEG_SUCCESS) { 1412 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1413 __func__, rv); 1414 goto mdeg_reg_fail; 1415 } 1416 1417 /* 1418 * Register an interest in 'vsw-port' nodes. 1419 */ 1420 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1421 (void *)vswp, &mdeg_port_hdl); 1422 if (rv != MDEG_SUCCESS) { 1423 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1424 (void) mdeg_unregister(mdeg_hdl); 1425 goto mdeg_reg_fail; 1426 } 1427 1428 /* save off data that will be needed later */ 1429 vswp->inst_spec = inst_specp; 1430 vswp->mdeg_hdl = mdeg_hdl; 1431 vswp->mdeg_port_hdl = mdeg_port_hdl; 1432 1433 D1(vswp, "%s: exit", __func__); 1434 return (0); 1435 1436 mdeg_reg_fail: 1437 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1438 vswp->instance); 1439 kmem_free(pspecp, templatesz); 1440 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1441 1442 vswp->mdeg_hdl = NULL; 1443 vswp->mdeg_port_hdl = NULL; 1444 1445 return (1); 1446 } 1447 1448 static void 1449 vsw_mdeg_unregister(vsw_t *vswp) 1450 { 1451 D1(vswp, "vsw_mdeg_unregister: enter"); 1452 1453 if (vswp->mdeg_hdl != NULL) 1454 (void) mdeg_unregister(vswp->mdeg_hdl); 1455 1456 if (vswp->mdeg_port_hdl != NULL) 1457 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1458 1459 if (vswp->inst_spec != NULL) { 1460 if (vswp->inst_spec->specp != NULL) { 1461 (void) kmem_free(vswp->inst_spec->specp, 1462 sizeof (vsw_prop_template)); 1463 vswp->inst_spec->specp = NULL; 1464 } 1465 1466 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1467 vswp->inst_spec = NULL; 1468 } 1469 1470 D1(vswp, "vsw_mdeg_unregister: exit"); 1471 } 1472 1473 /* 1474 * Mdeg callback invoked for the vsw node itself. 1475 */ 1476 static int 1477 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1478 { 1479 vsw_t *vswp; 1480 md_t *mdp; 1481 mde_cookie_t node; 1482 uint64_t inst; 1483 char *node_name = NULL; 1484 1485 if (resp == NULL) 1486 return (MDEG_FAILURE); 1487 1488 vswp = (vsw_t *)cb_argp; 1489 1490 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1491 " : prev matched %d", __func__, resp->added.nelem, 1492 resp->removed.nelem, resp->match_curr.nelem, 1493 resp->match_prev.nelem); 1494 1495 /* 1496 * We get an initial callback for this node as 'added' 1497 * after registering with mdeg. Note that we would have 1498 * already gathered information about this vsw node by 1499 * walking MD earlier during attach (in vsw_read_mdprops()). 1500 * So, there is a window where the properties of this 1501 * node might have changed when we get this initial 'added' 1502 * callback. We handle this as if an update occured 1503 * and invoke the same function which handles updates to 1504 * the properties of this vsw-node if any. 1505 * 1506 * A non-zero 'match' value indicates that the MD has been 1507 * updated and that a virtual-network-switch node is 1508 * present which may or may not have been updated. It is 1509 * up to the clients to examine their own nodes and 1510 * determine if they have changed. 1511 */ 1512 if (resp->added.nelem != 0) { 1513 1514 if (resp->added.nelem != 1) { 1515 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1516 "invalid: %d\n", vswp->instance, resp->added.nelem); 1517 return (MDEG_FAILURE); 1518 } 1519 1520 mdp = resp->added.mdp; 1521 node = resp->added.mdep[0]; 1522 1523 } else if (resp->match_curr.nelem != 0) { 1524 1525 if (resp->match_curr.nelem != 1) { 1526 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1527 "invalid: %d\n", vswp->instance, 1528 resp->match_curr.nelem); 1529 return (MDEG_FAILURE); 1530 } 1531 1532 mdp = resp->match_curr.mdp; 1533 node = resp->match_curr.mdep[0]; 1534 1535 } else { 1536 return (MDEG_FAILURE); 1537 } 1538 1539 /* Validate name and instance */ 1540 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1541 DERR(vswp, "%s: unable to get node name\n", __func__); 1542 return (MDEG_FAILURE); 1543 } 1544 1545 /* is this a virtual-network-switch? */ 1546 if (strcmp(node_name, vsw_propname) != 0) { 1547 DERR(vswp, "%s: Invalid node name: %s\n", 1548 __func__, node_name); 1549 return (MDEG_FAILURE); 1550 } 1551 1552 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1553 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1554 __func__); 1555 return (MDEG_FAILURE); 1556 } 1557 1558 /* is this the right instance of vsw? */ 1559 if (inst != vswp->regprop) { 1560 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1561 __func__, inst); 1562 return (MDEG_FAILURE); 1563 } 1564 1565 vsw_update_md_prop(vswp, mdp, node); 1566 1567 return (MDEG_SUCCESS); 1568 } 1569 1570 /* 1571 * Mdeg callback invoked for changes to the vsw-port nodes 1572 * under the vsw node. 1573 */ 1574 static int 1575 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1576 { 1577 vsw_t *vswp; 1578 int idx; 1579 md_t *mdp; 1580 mde_cookie_t node; 1581 uint64_t inst; 1582 int rv; 1583 1584 if ((resp == NULL) || (cb_argp == NULL)) 1585 return (MDEG_FAILURE); 1586 1587 vswp = (vsw_t *)cb_argp; 1588 1589 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1590 " : prev matched %d", __func__, resp->added.nelem, 1591 resp->removed.nelem, resp->match_curr.nelem, 1592 resp->match_prev.nelem); 1593 1594 /* process added ports */ 1595 for (idx = 0; idx < resp->added.nelem; idx++) { 1596 mdp = resp->added.mdp; 1597 node = resp->added.mdep[idx]; 1598 1599 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1600 1601 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1602 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1603 "(0x%lx), err=%d", vswp->instance, node, rv); 1604 } 1605 } 1606 1607 /* process removed ports */ 1608 for (idx = 0; idx < resp->removed.nelem; idx++) { 1609 mdp = resp->removed.mdp; 1610 node = resp->removed.mdep[idx]; 1611 1612 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1613 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1614 __func__, id_propname, idx); 1615 continue; 1616 } 1617 1618 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1619 1620 if (vsw_port_detach(vswp, inst) != 0) { 1621 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1622 vswp->instance, inst); 1623 } 1624 } 1625 1626 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1627 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1628 resp->match_curr.mdep[idx], 1629 resp->match_prev.mdp, 1630 resp->match_prev.mdep[idx]); 1631 } 1632 1633 D1(vswp, "%s: exit", __func__); 1634 1635 return (MDEG_SUCCESS); 1636 } 1637 1638 /* 1639 * Scan the machine description for this instance of vsw 1640 * and read its properties. Called only from vsw_attach(). 1641 * Returns: 0 on success, 1 on failure. 1642 */ 1643 static int 1644 vsw_read_mdprops(vsw_t *vswp) 1645 { 1646 md_t *mdp = NULL; 1647 mde_cookie_t rootnode; 1648 mde_cookie_t *listp = NULL; 1649 uint64_t inst; 1650 uint64_t cfgh; 1651 char *name; 1652 int rv = 1; 1653 int num_nodes = 0; 1654 int num_devs = 0; 1655 int listsz = 0; 1656 int i; 1657 1658 /* 1659 * In each 'virtual-device' node in the MD there is a 1660 * 'cfg-handle' property which is the MD's concept of 1661 * an instance number (this may be completely different from 1662 * the device drivers instance #). OBP reads that value and 1663 * stores it in the 'reg' property of the appropriate node in 1664 * the device tree. We first read this reg property and use this 1665 * to compare against the 'cfg-handle' property of vsw nodes 1666 * in MD to get to this specific vsw instance and then read 1667 * other properties that we are interested in. 1668 * We also cache the value of 'reg' property and use it later 1669 * to register callbacks with mdeg (see vsw_mdeg_register()) 1670 */ 1671 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1672 DDI_PROP_DONTPASS, reg_propname, -1); 1673 if (inst == -1) { 1674 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1675 "OBP device tree", vswp->instance, reg_propname); 1676 return (rv); 1677 } 1678 1679 vswp->regprop = inst; 1680 1681 if ((mdp = md_get_handle()) == NULL) { 1682 DWARN(vswp, "%s: cannot init MD\n", __func__); 1683 return (rv); 1684 } 1685 1686 num_nodes = md_node_count(mdp); 1687 ASSERT(num_nodes > 0); 1688 1689 listsz = num_nodes * sizeof (mde_cookie_t); 1690 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1691 1692 rootnode = md_root_node(mdp); 1693 1694 /* search for all "virtual_device" nodes */ 1695 num_devs = md_scan_dag(mdp, rootnode, 1696 md_find_name(mdp, vdev_propname), 1697 md_find_name(mdp, "fwd"), listp); 1698 if (num_devs <= 0) { 1699 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1700 goto vsw_readmd_exit; 1701 } 1702 1703 /* 1704 * Now loop through the list of virtual-devices looking for 1705 * devices with name "virtual-network-switch" and for each 1706 * such device compare its instance with what we have from 1707 * the 'reg' property to find the right node in MD and then 1708 * read all its properties. 1709 */ 1710 for (i = 0; i < num_devs; i++) { 1711 1712 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1713 DWARN(vswp, "%s: name property not found\n", 1714 __func__); 1715 goto vsw_readmd_exit; 1716 } 1717 1718 /* is this a virtual-network-switch? */ 1719 if (strcmp(name, vsw_propname) != 0) 1720 continue; 1721 1722 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1723 DWARN(vswp, "%s: cfg-handle property not found\n", 1724 __func__); 1725 goto vsw_readmd_exit; 1726 } 1727 1728 /* is this the required instance of vsw? */ 1729 if (inst != cfgh) 1730 continue; 1731 1732 /* now read all properties of this vsw instance */ 1733 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1734 break; 1735 } 1736 1737 vsw_readmd_exit: 1738 1739 kmem_free(listp, listsz); 1740 (void) md_fini_handle(mdp); 1741 return (rv); 1742 } 1743 1744 /* 1745 * Read the initial start-of-day values from the specified MD node. 1746 */ 1747 static int 1748 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1749 { 1750 uint64_t macaddr = 0; 1751 1752 D1(vswp, "%s: enter", __func__); 1753 1754 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1755 return (1); 1756 } 1757 1758 /* mac address for vswitch device itself */ 1759 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1760 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1761 vswp->instance); 1762 return (1); 1763 } 1764 1765 vsw_save_lmacaddr(vswp, macaddr); 1766 1767 if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) { 1768 DWARN(vswp, "%s: Unable to read %s property from MD, " 1769 "defaulting to 'switched' mode", 1770 __func__, smode_propname); 1771 1772 vswp->smode = VSW_LAYER2; 1773 } 1774 1775 /* 1776 * Read the 'linkprop' property to know if this 1777 * vsw device wants to get physical link updates. 1778 */ 1779 vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update); 1780 1781 /* read mtu */ 1782 vsw_mtu_read(vswp, mdp, node, &vswp->mtu); 1783 if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) { 1784 vswp->mtu = ETHERMTU; 1785 } 1786 vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) + 1787 VLAN_TAGSZ; 1788 1789 /* read vlan id properties of this vsw instance */ 1790 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1791 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1792 1793 /* read priority-ether-types */ 1794 vsw_read_pri_eth_types(vswp, mdp, node); 1795 1796 /* read bandwidth property of this vsw instance */ 1797 vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth); 1798 1799 D1(vswp, "%s: exit", __func__); 1800 return (0); 1801 } 1802 1803 /* 1804 * Read vlan id properties of the given MD node. 1805 * Arguments: 1806 * arg: device argument(vsw device or a port) 1807 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1808 * mdp: machine description 1809 * node: md node cookie 1810 * 1811 * Returns: 1812 * pvidp: port-vlan-id of the node 1813 * vidspp: list of vlan-ids of the node 1814 * nvidsp: # of vlan-ids in the list 1815 * default_idp: default-vlan-id of the node(if node is vsw device) 1816 */ 1817 static void 1818 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1819 uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp, 1820 uint16_t *default_idp) 1821 { 1822 vsw_t *vswp; 1823 vsw_port_t *portp; 1824 char *pvid_propname; 1825 char *vid_propname; 1826 uint_t nvids = 0; 1827 uint32_t vids_size; 1828 int rv; 1829 int i; 1830 uint64_t *data; 1831 uint64_t val; 1832 int size; 1833 int inst; 1834 1835 if (type == VSW_LOCALDEV) { 1836 1837 vswp = (vsw_t *)arg; 1838 pvid_propname = vsw_pvid_propname; 1839 vid_propname = vsw_vid_propname; 1840 inst = vswp->instance; 1841 1842 } else if (type == VSW_VNETPORT) { 1843 1844 portp = (vsw_port_t *)arg; 1845 vswp = portp->p_vswp; 1846 pvid_propname = port_pvid_propname; 1847 vid_propname = port_vid_propname; 1848 inst = portp->p_instance; 1849 1850 } else { 1851 return; 1852 } 1853 1854 if (type == VSW_LOCALDEV && default_idp != NULL) { 1855 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1856 if (rv != 0) { 1857 DWARN(vswp, "%s: prop(%s) not found", __func__, 1858 vsw_dvid_propname); 1859 1860 *default_idp = vsw_default_vlan_id; 1861 } else { 1862 *default_idp = val & 0xFFF; 1863 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1864 vsw_dvid_propname, inst, *default_idp); 1865 } 1866 } 1867 1868 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1869 if (rv != 0) { 1870 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1871 *pvidp = vsw_default_vlan_id; 1872 } else { 1873 1874 *pvidp = val & 0xFFF; 1875 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1876 pvid_propname, inst, *pvidp); 1877 } 1878 1879 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1880 &size); 1881 if (rv != 0) { 1882 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1883 size = 0; 1884 } else { 1885 size /= sizeof (uint64_t); 1886 } 1887 nvids = size; 1888 1889 if (nvids != 0) { 1890 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1891 vids_size = sizeof (vsw_vlanid_t) * nvids; 1892 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1893 for (i = 0; i < nvids; i++) { 1894 (*vidspp)[i].vl_vid = data[i] & 0xFFFF; 1895 (*vidspp)[i].vl_set = B_FALSE; 1896 D2(vswp, " %d ", (*vidspp)[i].vl_vid); 1897 } 1898 D2(vswp, "\n"); 1899 } 1900 1901 *nvidsp = nvids; 1902 } 1903 1904 static void 1905 vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node, 1906 uint64_t *bw) 1907 { 1908 int rv; 1909 uint64_t val; 1910 vsw_t *vswp; 1911 1912 vswp = portp->p_vswp; 1913 1914 rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val); 1915 1916 if (rv != 0) { 1917 *bw = 0; 1918 D3(vswp, "%s: prop(%s) not found\n", __func__, 1919 port_maxbw_propname); 1920 } else { 1921 *bw = val; 1922 D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname); 1923 } 1924 } 1925 1926 /* 1927 * This function reads "priority-ether-types" property from md. This property 1928 * is used to enable support for priority frames. Applications which need 1929 * guaranteed and timely delivery of certain high priority frames to/from 1930 * a vnet or vsw within ldoms, should configure this property by providing 1931 * the ether type(s) for which the priority facility is needed. 1932 * Normal data frames are delivered over a ldc channel using the descriptor 1933 * ring mechanism which is constrained by factors such as descriptor ring size, 1934 * the rate at which the ring is processed at the peer ldc end point, etc. 1935 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1936 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1937 * descriptor ring path and enables a more reliable and timely delivery of 1938 * frames to the peer. 1939 */ 1940 static void 1941 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1942 { 1943 int rv; 1944 uint16_t *types; 1945 uint64_t *data; 1946 int size; 1947 int i; 1948 size_t mblk_sz; 1949 1950 rv = md_get_prop_data(mdp, node, pri_types_propname, 1951 (uint8_t **)&data, &size); 1952 if (rv != 0) { 1953 /* 1954 * Property may not exist if we are running pre-ldoms1.1 f/w. 1955 * Check if 'vsw_pri_eth_type' has been set in that case. 1956 */ 1957 if (vsw_pri_eth_type != 0) { 1958 size = sizeof (vsw_pri_eth_type); 1959 data = &vsw_pri_eth_type; 1960 } else { 1961 D3(vswp, "%s: prop(%s) not found", __func__, 1962 pri_types_propname); 1963 size = 0; 1964 } 1965 } 1966 1967 if (size == 0) { 1968 vswp->pri_num_types = 0; 1969 return; 1970 } 1971 1972 /* 1973 * we have some priority-ether-types defined; 1974 * allocate a table of these types and also 1975 * allocate a pool of mblks to transmit these 1976 * priority packets. 1977 */ 1978 size /= sizeof (uint64_t); 1979 vswp->pri_num_types = size; 1980 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1981 for (i = 0, types = vswp->pri_types; i < size; i++) { 1982 types[i] = data[i] & 0xFFFF; 1983 } 1984 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1985 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, NULL, 1986 &vswp->pri_tx_vmp); 1987 } 1988 1989 static void 1990 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) 1991 { 1992 int rv; 1993 int inst; 1994 uint64_t val; 1995 char *mtu_propname; 1996 1997 mtu_propname = vsw_mtu_propname; 1998 inst = vswp->instance; 1999 2000 rv = md_get_prop_val(mdp, node, mtu_propname, &val); 2001 if (rv != 0) { 2002 D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname); 2003 *mtu = vsw_ethermtu; 2004 } else { 2005 2006 *mtu = val & 0xFFFF; 2007 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 2008 mtu_propname, inst, *mtu); 2009 } 2010 } 2011 2012 /* 2013 * Update the mtu of the vsw device. We first check if the device has been 2014 * plumbed and if so fail the mtu update. Otherwise, we continue to update the 2015 * new mtu and reset all ports to initiate handshake re-negotiation with peers 2016 * using the new mtu. 2017 */ 2018 static int 2019 vsw_mtu_update(vsw_t *vswp, uint32_t mtu) 2020 { 2021 int rv; 2022 2023 WRITE_ENTER(&vswp->if_lockrw); 2024 2025 if (vswp->if_state & VSW_IF_UP) { 2026 2027 RW_EXIT(&vswp->if_lockrw); 2028 2029 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2030 " as the device is plumbed\n", vswp->instance); 2031 return (EBUSY); 2032 2033 } else { 2034 2035 D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n", 2036 __func__, vswp->mtu, mtu); 2037 2038 vswp->mtu = mtu; 2039 vswp->max_frame_size = vswp->mtu + 2040 sizeof (struct ether_header) + VLAN_TAGSZ; 2041 2042 rv = mac_maxsdu_update(vswp->if_mh, mtu); 2043 if (rv != 0) { 2044 cmn_err(CE_NOTE, 2045 "!vsw%d: Unable to update mtu with mac" 2046 " layer\n", vswp->instance); 2047 } 2048 2049 RW_EXIT(&vswp->if_lockrw); 2050 2051 /* Reset ports to renegotiate with the new mtu */ 2052 vsw_reset_ports(vswp); 2053 2054 } 2055 2056 return (0); 2057 } 2058 2059 static void 2060 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 2061 boolean_t *pls) 2062 { 2063 int rv; 2064 uint64_t val; 2065 char *linkpropname; 2066 2067 linkpropname = vsw_linkprop_propname; 2068 2069 rv = md_get_prop_val(mdp, node, linkpropname, &val); 2070 if (rv != 0) { 2071 D3(vswp, "%s: prop(%s) not found", __func__, linkpropname); 2072 *pls = B_FALSE; 2073 } else { 2074 2075 *pls = (val & 0x1) ? B_TRUE : B_FALSE; 2076 D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname, 2077 vswp->instance, *pls); 2078 } 2079 } 2080 2081 void 2082 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state) 2083 { 2084 READ_ENTER(&vswp->if_lockrw); 2085 2086 if (vswp->if_state & VSW_IF_REG) { 2087 mac_link_update(vswp->if_mh, link_state); 2088 } 2089 2090 RW_EXIT(&vswp->if_lockrw); 2091 } 2092 2093 void 2094 vsw_physlink_state_update(vsw_t *vswp) 2095 { 2096 if (vswp->pls_update == B_TRUE) { 2097 vsw_mac_link_update(vswp, vswp->phys_link_state); 2098 } 2099 vsw_physlink_update_ports(vswp); 2100 } 2101 2102 static void 2103 vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw) 2104 { 2105 /* read the vsw bandwidth from md */ 2106 int rv; 2107 uint64_t val; 2108 2109 rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val); 2110 if (rv != 0) { 2111 *bw = 0; 2112 D3(vswp, "%s: prop(%s) not found", __func__, 2113 vsw_maxbw_propname); 2114 } else { 2115 *bw = val; 2116 D3(vswp, "%s: %s(%d): (%ld)\n", __func__, 2117 vsw_maxbw_propname, vswp->instance, *bw); 2118 } 2119 } 2120 2121 /* 2122 * Check to see if the relevant properties in the specified node have 2123 * changed, and if so take the appropriate action. 2124 * 2125 * If any of the properties are missing or invalid we don't take 2126 * any action, as this function should only be invoked when modifications 2127 * have been made to what we assume is a working configuration, which 2128 * we leave active. 2129 * 2130 * Note it is legal for this routine to be invoked even if none of the 2131 * properties in the port node within the MD have actually changed. 2132 */ 2133 static void 2134 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 2135 { 2136 char physname[LIFNAMSIZ]; 2137 char drv[LIFNAMSIZ]; 2138 uint_t ddi_instance; 2139 uint8_t new_smode; 2140 int i; 2141 uint64_t macaddr = 0; 2142 enum {MD_init = 0x1, 2143 MD_physname = 0x2, 2144 MD_macaddr = 0x4, 2145 MD_smode = 0x8, 2146 MD_vlans = 0x10, 2147 MD_mtu = 0x20, 2148 MD_pls = 0x40, 2149 MD_bw = 0x80} updated; 2150 int rv; 2151 uint16_t pvid; 2152 vsw_vlanid_t *vids; 2153 uint16_t nvids; 2154 uint32_t mtu; 2155 boolean_t pls_update; 2156 uint64_t maxbw; 2157 2158 updated = MD_init; 2159 2160 D1(vswp, "%s: enter", __func__); 2161 2162 /* 2163 * Check if name of physical device in MD has changed. 2164 */ 2165 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 2166 /* 2167 * Do basic sanity check on new device name/instance, 2168 * if its non NULL. It is valid for the device name to 2169 * have changed from a non NULL to a NULL value, i.e. 2170 * the vsw is being changed to 'routed' mode. 2171 */ 2172 if ((strlen(physname) != 0) && 2173 (ddi_parse(physname, drv, 2174 &ddi_instance) != DDI_SUCCESS)) { 2175 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 2176 " a valid device name/instance", 2177 vswp->instance, physname); 2178 goto fail_reconf; 2179 } 2180 2181 if (strcmp(physname, vswp->physname)) { 2182 D2(vswp, "%s: device name changed from %s to %s", 2183 __func__, vswp->physname, physname); 2184 2185 updated |= MD_physname; 2186 } else { 2187 D2(vswp, "%s: device name unchanged at %s", 2188 __func__, vswp->physname); 2189 } 2190 } else { 2191 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2192 "device from updated MD.", vswp->instance); 2193 goto fail_reconf; 2194 } 2195 2196 /* 2197 * Check if MAC address has changed. 2198 */ 2199 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2200 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2201 vswp->instance); 2202 goto fail_reconf; 2203 } else { 2204 uint64_t maddr = macaddr; 2205 READ_ENTER(&vswp->if_lockrw); 2206 for (i = ETHERADDRL - 1; i >= 0; i--) { 2207 if (vswp->if_addr.ether_addr_octet[i] 2208 != (macaddr & 0xFF)) { 2209 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 2210 __func__, i, 2211 vswp->if_addr.ether_addr_octet[i], 2212 (macaddr & 0xFF)); 2213 updated |= MD_macaddr; 2214 macaddr = maddr; 2215 break; 2216 } 2217 macaddr >>= 8; 2218 } 2219 RW_EXIT(&vswp->if_lockrw); 2220 if (updated & MD_macaddr) { 2221 vsw_save_lmacaddr(vswp, macaddr); 2222 } 2223 } 2224 2225 /* 2226 * Check if switching modes have changed. 2227 */ 2228 if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) { 2229 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2230 vswp->instance, smode_propname); 2231 goto fail_reconf; 2232 } else { 2233 if (new_smode != vswp->smode) { 2234 D2(vswp, "%s: switching mode changed from %d to %d", 2235 __func__, vswp->smode, new_smode); 2236 2237 updated |= MD_smode; 2238 } 2239 } 2240 2241 /* Read the vlan ids */ 2242 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2243 &nvids, NULL); 2244 2245 /* Determine if there are any vlan id updates */ 2246 if ((pvid != vswp->pvid) || /* pvid changed? */ 2247 (nvids != vswp->nvids) || /* # of vids changed? */ 2248 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2249 !vsw_cmp_vids(vids, vswp->vids, nvids))) { 2250 updated |= MD_vlans; 2251 } 2252 2253 /* Read mtu */ 2254 vsw_mtu_read(vswp, mdp, node, &mtu); 2255 if (mtu != vswp->mtu) { 2256 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { 2257 updated |= MD_mtu; 2258 } else { 2259 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2260 " as the specified value:%d is invalid\n", 2261 vswp->instance, mtu); 2262 } 2263 } 2264 2265 /* 2266 * Read the 'linkprop' property. 2267 */ 2268 vsw_linkprop_read(vswp, mdp, node, &pls_update); 2269 if (pls_update != vswp->pls_update) { 2270 updated |= MD_pls; 2271 } 2272 2273 /* Read bandwidth */ 2274 vsw_bandwidth_read(vswp, mdp, node, &maxbw); 2275 if (maxbw != vswp->bandwidth) { 2276 if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { 2277 updated |= MD_bw; 2278 } else { 2279 cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth" 2280 " update as the specified value:%ld is invalid\n", 2281 vswp->instance, maxbw); 2282 } 2283 } 2284 2285 /* 2286 * Now make any changes which are needed... 2287 */ 2288 if (updated & MD_pls) { 2289 2290 /* save the updated property. */ 2291 vswp->pls_update = pls_update; 2292 2293 if (pls_update == B_FALSE) { 2294 /* 2295 * Phys link state update is now disabled for this vsw 2296 * interface. If we had previously reported a link-down 2297 * to the stack, undo that by sending a link-up. 2298 */ 2299 if (vswp->phys_link_state == LINK_STATE_DOWN) { 2300 vsw_mac_link_update(vswp, LINK_STATE_UP); 2301 } 2302 } else { 2303 /* 2304 * Phys link state update is now enabled. Send up an 2305 * update based on the current phys link state. 2306 */ 2307 if (vswp->smode & VSW_LAYER2) { 2308 vsw_mac_link_update(vswp, 2309 vswp->phys_link_state); 2310 } 2311 } 2312 2313 } 2314 2315 if (updated & (MD_physname | MD_smode | MD_mtu)) { 2316 2317 /* 2318 * Stop any pending thread to setup switching mode. 2319 */ 2320 vsw_setup_switching_stop(vswp); 2321 2322 /* Cleanup HybridIO */ 2323 vsw_hio_cleanup(vswp); 2324 2325 /* 2326 * Remove unicst, mcst addrs of vsw interface 2327 * and ports from the physdev. This also closes 2328 * the corresponding mac clients. 2329 */ 2330 vsw_unset_addrs(vswp); 2331 2332 /* 2333 * Stop, detach and close the old device.. 2334 */ 2335 mutex_enter(&vswp->mac_lock); 2336 vsw_mac_close(vswp); 2337 mutex_exit(&vswp->mac_lock); 2338 2339 /* 2340 * Update phys name. 2341 */ 2342 if (updated & MD_physname) { 2343 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2344 vswp->instance, vswp->physname, physname); 2345 (void) strncpy(vswp->physname, 2346 physname, strlen(physname) + 1); 2347 } 2348 2349 /* 2350 * Update array with the new switch mode values. 2351 */ 2352 if (updated & MD_smode) { 2353 vswp->smode = new_smode; 2354 } 2355 2356 /* Update mtu */ 2357 if (updated & MD_mtu) { 2358 rv = vsw_mtu_update(vswp, mtu); 2359 if (rv != 0) { 2360 goto fail_update; 2361 } 2362 } 2363 2364 /* 2365 * ..and attach, start the new device. 2366 */ 2367 rv = vsw_setup_switching(vswp); 2368 if (rv == EAGAIN) { 2369 /* 2370 * Unable to setup switching mode. 2371 * As the error is EAGAIN, schedule a thread to retry 2372 * and return. Programming addresses of ports and 2373 * vsw interface will be done by the thread when the 2374 * switching setup completes successfully. 2375 */ 2376 if (vsw_setup_switching_start(vswp) != 0) { 2377 goto fail_update; 2378 } 2379 return; 2380 2381 } else if (rv) { 2382 goto fail_update; 2383 } 2384 2385 vsw_setup_switching_post_process(vswp); 2386 } else if (updated & MD_macaddr) { 2387 /* 2388 * We enter here if only MD_macaddr is exclusively updated. 2389 * If MD_physname and/or MD_smode are also updated, then 2390 * as part of that, we would have implicitly processed 2391 * MD_macaddr update (above). 2392 */ 2393 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2394 vswp->instance, macaddr); 2395 2396 READ_ENTER(&vswp->if_lockrw); 2397 if (vswp->if_state & VSW_IF_UP) { 2398 /* reconfigure with new address */ 2399 vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0); 2400 2401 /* 2402 * Notify the MAC layer of the changed address. 2403 */ 2404 mac_unicst_update(vswp->if_mh, 2405 (uint8_t *)&vswp->if_addr); 2406 2407 } 2408 RW_EXIT(&vswp->if_lockrw); 2409 2410 } 2411 2412 if (updated & MD_vlans) { 2413 /* Remove existing vlan ids from the hash table. */ 2414 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2415 2416 if (vswp->if_state & VSW_IF_UP) { 2417 vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids); 2418 } else { 2419 if (vswp->nvids != 0) { 2420 kmem_free(vswp->vids, 2421 sizeof (vsw_vlanid_t) * vswp->nvids); 2422 } 2423 vswp->vids = vids; 2424 vswp->nvids = nvids; 2425 vswp->pvid = pvid; 2426 } 2427 2428 /* add these new vlan ids into hash table */ 2429 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2430 } else { 2431 if (nvids != 0) { 2432 kmem_free(vids, sizeof (vsw_vlanid_t) * nvids); 2433 } 2434 } 2435 2436 if (updated & MD_bw) { 2437 vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw); 2438 } 2439 2440 return; 2441 2442 fail_reconf: 2443 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2444 return; 2445 2446 fail_update: 2447 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2448 vswp->instance); 2449 } 2450 2451 /* 2452 * Read the port's md properties. 2453 */ 2454 static int 2455 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2456 md_t *mdp, mde_cookie_t *node) 2457 { 2458 uint64_t ldc_id; 2459 uint8_t *addrp; 2460 int i, addrsz; 2461 int num_nodes = 0, nchan = 0; 2462 int listsz = 0; 2463 mde_cookie_t *listp = NULL; 2464 struct ether_addr ea; 2465 uint64_t macaddr; 2466 uint64_t inst = 0; 2467 uint64_t val; 2468 2469 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2470 DWARN(vswp, "%s: prop(%s) not found", __func__, 2471 id_propname); 2472 return (1); 2473 } 2474 2475 /* 2476 * Find the channel endpoint node(s) (which should be under this 2477 * port node) which contain the channel id(s). 2478 */ 2479 if ((num_nodes = md_node_count(mdp)) <= 0) { 2480 DERR(vswp, "%s: invalid number of nodes found (%d)", 2481 __func__, num_nodes); 2482 return (1); 2483 } 2484 2485 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2486 2487 /* allocate enough space for node list */ 2488 listsz = num_nodes * sizeof (mde_cookie_t); 2489 listp = kmem_zalloc(listsz, KM_SLEEP); 2490 2491 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2492 md_find_name(mdp, "fwd"), listp); 2493 2494 if (nchan <= 0) { 2495 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2496 kmem_free(listp, listsz); 2497 return (1); 2498 } 2499 2500 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2501 2502 /* use property from first node found */ 2503 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2504 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2505 id_propname); 2506 kmem_free(listp, listsz); 2507 return (1); 2508 } 2509 2510 /* don't need list any more */ 2511 kmem_free(listp, listsz); 2512 2513 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2514 2515 /* read mac-address property */ 2516 if (md_get_prop_data(mdp, *node, remaddr_propname, 2517 &addrp, &addrsz)) { 2518 DWARN(vswp, "%s: prop(%s) not found", 2519 __func__, remaddr_propname); 2520 return (1); 2521 } 2522 2523 if (addrsz < ETHERADDRL) { 2524 DWARN(vswp, "%s: invalid address size", __func__); 2525 return (1); 2526 } 2527 2528 macaddr = *((uint64_t *)addrp); 2529 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2530 2531 for (i = ETHERADDRL - 1; i >= 0; i--) { 2532 ea.ether_addr_octet[i] = macaddr & 0xFF; 2533 macaddr >>= 8; 2534 } 2535 2536 /* now update all properties into the port */ 2537 portp->p_vswp = vswp; 2538 portp->p_instance = inst; 2539 portp->addr_set = B_FALSE; 2540 ether_copy(&ea, &portp->p_macaddr); 2541 if (nchan > VSW_PORT_MAX_LDCS) { 2542 D2(vswp, "%s: using first of %d ldc ids", 2543 __func__, nchan); 2544 nchan = VSW_PORT_MAX_LDCS; 2545 } 2546 portp->num_ldcs = nchan; 2547 portp->ldc_ids = 2548 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2549 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2550 2551 /* read vlan id properties of this port node */ 2552 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2553 &portp->vids, &portp->nvids, NULL); 2554 2555 /* Check if hybrid property is present */ 2556 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2557 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2558 portp->p_hio_enabled = B_TRUE; 2559 } else { 2560 portp->p_hio_enabled = B_FALSE; 2561 } 2562 /* 2563 * Port hio capability determined after version 2564 * negotiation, i.e., when we know the peer is HybridIO capable. 2565 */ 2566 portp->p_hio_capable = B_FALSE; 2567 2568 /* Read bandwidth of this port */ 2569 vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth); 2570 2571 return (0); 2572 } 2573 2574 /* 2575 * Add a new port to the system. 2576 * 2577 * Returns 0 on success, 1 on failure. 2578 */ 2579 int 2580 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2581 { 2582 vsw_port_t *portp; 2583 int rv; 2584 2585 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2586 2587 rv = vsw_port_read_props(portp, vswp, mdp, node); 2588 if (rv != 0) { 2589 kmem_free(portp, sizeof (*portp)); 2590 return (1); 2591 } 2592 2593 rv = vsw_port_attach(portp); 2594 if (rv != 0) { 2595 DERR(vswp, "%s: failed to attach port", __func__); 2596 return (1); 2597 } 2598 2599 return (0); 2600 } 2601 2602 static int 2603 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2604 md_t *prev_mdp, mde_cookie_t prev_mdex) 2605 { 2606 uint64_t cport_num; 2607 uint64_t pport_num; 2608 vsw_port_list_t *plistp; 2609 vsw_port_t *portp; 2610 uint16_t pvid; 2611 vsw_vlanid_t *vids; 2612 uint16_t nvids; 2613 uint64_t val; 2614 boolean_t hio_enabled = B_FALSE; 2615 uint64_t maxbw; 2616 enum {P_MD_init = 0x1, 2617 P_MD_vlans = 0x2, 2618 P_MD_hio = 0x4, 2619 P_MD_maxbw = 0x8} updated; 2620 2621 updated = P_MD_init; 2622 2623 /* 2624 * For now, we get port updates only if vlan ids changed. 2625 * We read the port num and do some sanity check. 2626 */ 2627 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2628 return (1); 2629 } 2630 2631 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2632 return (1); 2633 } 2634 if (cport_num != pport_num) 2635 return (1); 2636 2637 plistp = &(vswp->plist); 2638 2639 READ_ENTER(&plistp->lockrw); 2640 2641 portp = vsw_lookup_port(vswp, cport_num); 2642 if (portp == NULL) { 2643 RW_EXIT(&plistp->lockrw); 2644 return (1); 2645 } 2646 2647 /* Read the vlan ids */ 2648 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2649 &vids, &nvids, NULL); 2650 2651 /* Determine if there are any vlan id updates */ 2652 if ((pvid != portp->pvid) || /* pvid changed? */ 2653 (nvids != portp->nvids) || /* # of vids changed? */ 2654 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2655 !vsw_cmp_vids(vids, portp->vids, nvids))) { 2656 updated |= P_MD_vlans; 2657 } 2658 2659 /* Check if hybrid property is present */ 2660 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2661 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2662 hio_enabled = B_TRUE; 2663 } 2664 2665 if (portp->p_hio_enabled != hio_enabled) { 2666 updated |= P_MD_hio; 2667 } 2668 2669 /* Check if maxbw property is present */ 2670 vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw); 2671 if (maxbw != portp->p_bandwidth) { 2672 if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { 2673 updated |= P_MD_maxbw; 2674 } else { 2675 cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth" 2676 " update for port %d as the specified value:%ld" 2677 " is invalid\n", 2678 vswp->instance, portp->p_instance, maxbw); 2679 } 2680 } 2681 2682 if (updated & P_MD_vlans) { 2683 /* Remove existing vlan ids from the hash table. */ 2684 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2685 2686 /* Reconfigure vlans with network device */ 2687 vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids); 2688 2689 /* add these new vlan ids into hash table */ 2690 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2691 2692 /* reset the port if it is vlan unaware (ver < 1.3) */ 2693 vsw_vlan_unaware_port_reset(portp); 2694 } 2695 2696 if (updated & P_MD_hio) { 2697 vsw_hio_port_update(portp, hio_enabled); 2698 } 2699 2700 if (updated & P_MD_maxbw) { 2701 vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw); 2702 } 2703 2704 RW_EXIT(&plistp->lockrw); 2705 2706 return (0); 2707 } 2708 2709 /* 2710 * vsw_mac_rx -- A common function to send packets to the interface. 2711 * By default this function check if the interface is UP or not, the 2712 * rest of the behaviour depends on the flags as below: 2713 * 2714 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2715 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2716 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2717 */ 2718 void 2719 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2720 mblk_t *mp, vsw_macrx_flags_t flags) 2721 { 2722 mblk_t *mpt; 2723 2724 D1(vswp, "%s:enter\n", __func__); 2725 READ_ENTER(&vswp->if_lockrw); 2726 /* Check if the interface is up */ 2727 if (!(vswp->if_state & VSW_IF_UP)) { 2728 RW_EXIT(&vswp->if_lockrw); 2729 /* Free messages only if FREEMSG flag specified */ 2730 if (flags & VSW_MACRX_FREEMSG) { 2731 freemsgchain(mp); 2732 } 2733 D1(vswp, "%s:exit\n", __func__); 2734 return; 2735 } 2736 /* 2737 * If PROMISC flag is passed, then check if 2738 * the interface is in the PROMISC mode. 2739 * If not, drop the messages. 2740 */ 2741 if (flags & VSW_MACRX_PROMISC) { 2742 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2743 RW_EXIT(&vswp->if_lockrw); 2744 /* Free messages only if FREEMSG flag specified */ 2745 if (flags & VSW_MACRX_FREEMSG) { 2746 freemsgchain(mp); 2747 } 2748 D1(vswp, "%s:exit\n", __func__); 2749 return; 2750 } 2751 } 2752 RW_EXIT(&vswp->if_lockrw); 2753 /* 2754 * If COPYMSG flag is passed, then make a copy 2755 * of the message chain and send up the copy. 2756 */ 2757 if (flags & VSW_MACRX_COPYMSG) { 2758 mp = copymsgchain(mp); 2759 if (mp == NULL) { 2760 D1(vswp, "%s:exit\n", __func__); 2761 return; 2762 } 2763 } 2764 2765 D2(vswp, "%s: sending up stack", __func__); 2766 2767 mpt = NULL; 2768 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2769 if (mp != NULL) { 2770 mac_rx(vswp->if_mh, mrh, mp); 2771 } 2772 D1(vswp, "%s:exit\n", __func__); 2773 } 2774 2775 /* copy mac address of vsw into soft state structure */ 2776 static void 2777 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2778 { 2779 int i; 2780 2781 WRITE_ENTER(&vswp->if_lockrw); 2782 for (i = ETHERADDRL - 1; i >= 0; i--) { 2783 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2784 macaddr >>= 8; 2785 } 2786 RW_EXIT(&vswp->if_lockrw); 2787 } 2788 2789 /* Compare VLAN ids, array size expected to be same. */ 2790 static boolean_t 2791 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids) 2792 { 2793 int i, j; 2794 uint16_t vid; 2795 2796 for (i = 0; i < nvids; i++) { 2797 vid = vids1[i].vl_vid; 2798 for (j = 0; j < nvids; j++) { 2799 if (vid == vids2[i].vl_vid) 2800 break; 2801 } 2802 if (j == nvids) { 2803 return (B_FALSE); 2804 } 2805 } 2806 return (B_TRUE); 2807 } 2808