1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mac_provider.h> 62 #include <sys/mdeg.h> 63 #include <sys/ldc.h> 64 #include <sys/vsw_fdb.h> 65 #include <sys/vsw.h> 66 #include <sys/vio_mailbox.h> 67 #include <sys/vnet_mailbox.h> 68 #include <sys/vnet_common.h> 69 #include <sys/vio_util.h> 70 #include <sys/sdt.h> 71 #include <sys/atomic.h> 72 #include <sys/callb.h> 73 #include <sys/vlan.h> 74 75 /* 76 * Function prototypes. 77 */ 78 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 79 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 80 static int vsw_unattach(vsw_t *vswp); 81 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 82 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *); 83 void vsw_destroy_rxpools(void *); 84 85 /* MDEG routines */ 86 static int vsw_mdeg_register(vsw_t *vswp); 87 static void vsw_mdeg_unregister(vsw_t *vswp); 88 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 89 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 91 static int vsw_read_mdprops(vsw_t *vswp); 92 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 93 mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp, 94 uint16_t *nvidsp, uint16_t *default_idp); 95 static void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, 96 mde_cookie_t node, uint64_t *bw); 97 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 98 md_t *mdp, mde_cookie_t *node); 99 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 100 mde_cookie_t node); 101 static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 102 uint32_t *mtu); 103 static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu); 104 static void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 105 boolean_t *pls); 106 static void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 107 uint64_t *bw); 108 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 109 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 110 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1, 111 vsw_vlanid_t *vids2, int nvids); 112 113 /* Mac driver related routines */ 114 static int vsw_mac_register(vsw_t *); 115 static int vsw_mac_unregister(vsw_t *); 116 static int vsw_m_stat(void *, uint_t, uint64_t *); 117 static void vsw_m_stop(void *arg); 118 static int vsw_m_start(void *arg); 119 static int vsw_m_unicst(void *arg, const uint8_t *); 120 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 121 static int vsw_m_promisc(void *arg, boolean_t); 122 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 123 void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state); 124 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 125 mblk_t *mp, vsw_macrx_flags_t flags); 126 void vsw_physlink_state_update(vsw_t *vswp); 127 128 /* 129 * Functions imported from other files. 130 */ 131 extern void vsw_setup_switching_thread(void *arg); 132 extern int vsw_setup_switching_start(vsw_t *vswp); 133 extern void vsw_setup_switching_stop(vsw_t *vswp); 134 extern int vsw_setup_switching(vsw_t *); 135 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 136 vsw_port_t *port, mac_resource_handle_t mrh); 137 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 138 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 139 extern void vsw_del_mcst_vsw(vsw_t *); 140 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 141 extern void vsw_detach_ports(vsw_t *vswp); 142 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 143 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 144 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 145 md_t *prev_mdp, mde_cookie_t prev_mdex); 146 extern int vsw_port_attach(vsw_port_t *port); 147 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 148 extern int vsw_mac_open(vsw_t *vswp); 149 extern void vsw_mac_close(vsw_t *vswp); 150 extern void vsw_mac_cleanup_ports(vsw_t *vswp); 151 extern void vsw_unset_addrs(vsw_t *vswp); 152 extern void vsw_setup_switching_post_process(vsw_t *vswp); 153 extern void vsw_create_vlans(void *arg, int type); 154 extern void vsw_destroy_vlans(void *arg, int type); 155 extern void vsw_vlan_add_ids(void *arg, int type); 156 extern void vsw_vlan_remove_ids(void *arg, int type); 157 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 158 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 159 mblk_t **npt); 160 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 161 extern void vsw_hio_cleanup(vsw_t *vswp); 162 extern void vsw_hio_start_ports(vsw_t *vswp); 163 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 164 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 165 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 166 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid, 167 vsw_vlanid_t *new_vids, int new_nvids); 168 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 169 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 170 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans, 171 uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids); 172 extern void vsw_reset_ports(vsw_t *vswp); 173 extern void vsw_port_reset(vsw_port_t *portp); 174 extern void vsw_physlink_update_ports(vsw_t *vswp); 175 extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, 176 uint64_t maxbw); 177 178 /* 179 * Internal tunables. 180 */ 181 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 182 int vsw_wretries = 100; /* # of write attempts */ 183 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 184 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ 185 /* 300*3 = 900sec(15min) of max tmout */ 186 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 187 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 188 int vsw_ldc_retries = 5; /* # of ldc_close() retries */ 189 int vsw_ldc_delay = 1000; /* 1 ms delay for ldc_close() */ 190 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 191 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 192 int vsw_rxpool_cleanup_delay = 100000; /* 100ms */ 193 194 195 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 196 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 197 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 198 199 /* delay in usec to wait for all references on a fdb entry to be dropped */ 200 uint32_t vsw_fdbe_refcnt_delay = 10; 201 202 /* 203 * Default vlan id. This is only used internally when the "default-vlan-id" 204 * property is not present in the MD device node. Therefore, this should not be 205 * used as a tunable; if this value is changed, the corresponding variable 206 * should be updated to the same value in all vnets connected to this vsw. 207 */ 208 uint16_t vsw_default_vlan_id = 1; 209 210 /* 211 * Workaround for a version handshake bug in obp's vnet. 212 * If vsw initiates version negotiation starting from the highest version, 213 * obp sends a nack and terminates version handshake. To workaround 214 * this, we do not initiate version handshake when the channel comes up. 215 * Instead, we wait for the peer to send its version info msg and go through 216 * the version protocol exchange. If we successfully negotiate a version, 217 * before sending the ack, we send our version info msg to the peer 218 * using the <major,minor> version that we are about to ack. 219 */ 220 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 221 222 /* 223 * In the absence of "priority-ether-types" property in MD, the following 224 * internal tunable can be set to specify a single priority ethertype. 225 */ 226 uint64_t vsw_pri_eth_type = 0; 227 228 /* 229 * Number of transmit priority buffers that are preallocated per device. 230 * This number is chosen to be a small value to throttle transmission 231 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 232 */ 233 uint32_t vsw_pri_tx_nmblks = 64; 234 235 /* 236 * Number of RARP packets sent to announce macaddr to the physical switch, 237 * after vsw's physical device is changed dynamically or after a guest (client 238 * vnet) is live migrated in. 239 */ 240 uint32_t vsw_publish_macaddr_count = 3; 241 242 /* 243 * Enable/disable HybridIO 244 */ 245 boolean_t vsw_hio_enabled = B_TRUE; 246 247 /* 248 * Max retries for HybridIO cleanup 249 */ 250 int vsw_hio_max_cleanup_retries = 10; 251 252 /* 253 * 10ms delay for HybridIO cleanup 254 */ 255 int vsw_hio_cleanup_delay = 10000; 256 257 /* 258 * Descriptor ring modes of LDC data transfer: 259 * 260 * 1) TxDring mode: 261 * In versions < v1.6 of VIO Protocol, we support only TxDring mode. In this 262 * mode, we create a transmit descriptor ring and export it to the peer through 263 * dring registration process of handshake. The descriptor ring is exported 264 * using LDC shared memory. Each descriptor is associated with a data buffer. 265 * The data buffer is also exported over LDC and the cookies for this data 266 * buffer are provided in the descriptor. The peer maps this ring as its 267 * receive ring. Similarly, the peer exports a transmit descriptor ring which 268 * is mapped by this device as its receive ring. In this mode, in a given data 269 * transfer direction, the transmitter copies the data to the exported data 270 * buffer (owned by itself), bound to the descriptor. The receiver uses the LDC 271 * cookies specified in the descriptor to copy the data into the receiving 272 * guest through the hypervisor (ldc_mem_copy()). 273 * 274 * 2) RxDringData mode: 275 * In versions >= v1.6 of VIO Protocol, we also support RxDringData mode. In 276 * this mode, we create a receive descriptor ring and export it to the peer 277 * through dring registration process of handshake. In addition, we export a 278 * receive buffer area and provide that information also in the dring 279 * registration message. The descriptor ring and the data buffer area are 280 * exported using LDC shared memory. Each descriptor is associated with a data 281 * buffer in the data buffer area and the offset of the specific data buffer 282 * within this area is specified in the descriptor. The peer maps this ring 283 * along with the data buffer area as its transmit ring. Similarly, the peer 284 * exports a receive ring which is mapped by this device as its transmit ring, 285 * along with its buffer area. In this mode, in a given data transfer 286 * direction, the transmitter copies the data to the data buffer offset 287 * specified in the descriptor. The receiver simply picks up the data buffer 288 * (owned by itself) without any copy operation into the receiving guest. 289 * 290 * We provide a tunable to enable RxDringData mode for versions >= v1.6 of VIO 291 * Protocol. By default, this tunable is set to 1 (VIO_TX_DRING). To enable 292 * RxDringData mode set this tunable to 4 (VIO_RX_DRING_DATA). This enables us 293 * to negotiate RxDringData mode with peers that support versions >= v1.6. For 294 * peers that support version < v1.6, we continue to operate in TxDring mode 295 * with them though the tunable is enabled. 296 */ 297 uint8_t vsw_dring_mode = VIO_TX_DRING; 298 299 /* 300 * Number of descriptors; must be power of 2. 301 */ 302 uint32_t vsw_num_descriptors = VSW_NUM_DESCRIPTORS; 303 304 /* 305 * In RxDringData mode, # of buffers is determined by multiplying the # of 306 * descriptors with the factor below. Note that the factor must be > 1; i.e, 307 * the # of buffers must always be > # of descriptors. This is needed because, 308 * while the shared memory buffers are sent up the stack on the receiver, the 309 * sender needs additional buffers that can be used for further transmits. 310 * See vsw_setup_rx_dring() for details. 311 */ 312 uint32_t vsw_nrbufs_factor = 2; 313 314 /* 315 * Delay when rx descr not ready; used in both dring modes. 316 */ 317 int vsw_recv_delay = 0; 318 319 /* 320 * Retry when rx descr not ready; used in both dring modes. 321 */ 322 int vsw_recv_retries = 5; 323 324 /* 325 * Max number of mblks received in one receive operation. 326 */ 327 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 328 329 /* 330 * Internal tunables for receive buffer pools, that is, the size and number of 331 * mblks for each pool. At least 3 sizes must be specified if these are used. 332 * The sizes must be specified in increasing order. Non-zero value of the first 333 * size will be used as a hint to use these values instead of the algorithm 334 * that determines the sizes based on MTU. Used in TxDring mode only. 335 */ 336 uint32_t vsw_mblk_size1 = 0; 337 uint32_t vsw_mblk_size2 = 0; 338 uint32_t vsw_mblk_size3 = 0; 339 uint32_t vsw_mblk_size4 = 0; 340 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 341 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 342 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 343 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS; /* number of mblks for pool4 */ 344 345 /* 346 * Set this to non-zero to enable additional internal receive buffer pools 347 * based on the MTU of the device for better performance at the cost of more 348 * memory consumption. This is turned off by default, to use allocb(9F) for 349 * receive buffer allocations of sizes > 2K. 350 */ 351 boolean_t vsw_jumbo_rxpools = B_FALSE; 352 353 /* 354 * vsw_max_tx_qcount is the maximum # of packets that can be queued 355 * before the tx worker thread begins processing the queue. Its value 356 * is chosen to be 4x the default length of tx descriptor ring. 357 */ 358 uint32_t vsw_max_tx_qcount = 4 * VSW_NUM_DESCRIPTORS; 359 360 /* 361 * MAC callbacks 362 */ 363 static mac_callbacks_t vsw_m_callbacks = { 364 0, 365 vsw_m_stat, 366 vsw_m_start, 367 vsw_m_stop, 368 vsw_m_promisc, 369 vsw_m_multicst, 370 vsw_m_unicst, 371 vsw_m_tx 372 }; 373 374 static struct cb_ops vsw_cb_ops = { 375 nulldev, /* cb_open */ 376 nulldev, /* cb_close */ 377 nodev, /* cb_strategy */ 378 nodev, /* cb_print */ 379 nodev, /* cb_dump */ 380 nodev, /* cb_read */ 381 nodev, /* cb_write */ 382 nodev, /* cb_ioctl */ 383 nodev, /* cb_devmap */ 384 nodev, /* cb_mmap */ 385 nodev, /* cb_segmap */ 386 nochpoll, /* cb_chpoll */ 387 ddi_prop_op, /* cb_prop_op */ 388 NULL, /* cb_stream */ 389 D_MP, /* cb_flag */ 390 CB_REV, /* rev */ 391 nodev, /* int (*cb_aread)() */ 392 nodev /* int (*cb_awrite)() */ 393 }; 394 395 static struct dev_ops vsw_ops = { 396 DEVO_REV, /* devo_rev */ 397 0, /* devo_refcnt */ 398 NULL, /* devo_getinfo */ 399 nulldev, /* devo_identify */ 400 nulldev, /* devo_probe */ 401 vsw_attach, /* devo_attach */ 402 vsw_detach, /* devo_detach */ 403 nodev, /* devo_reset */ 404 &vsw_cb_ops, /* devo_cb_ops */ 405 (struct bus_ops *)NULL, /* devo_bus_ops */ 406 ddi_power /* devo_power */ 407 }; 408 409 extern struct mod_ops mod_driverops; 410 static struct modldrv vswmodldrv = { 411 &mod_driverops, 412 "sun4v Virtual Switch", 413 &vsw_ops, 414 }; 415 416 #define LDC_ENTER_LOCK(ldcp) \ 417 mutex_enter(&((ldcp)->ldc_cblock));\ 418 mutex_enter(&((ldcp)->ldc_rxlock));\ 419 mutex_enter(&((ldcp)->ldc_txlock)); 420 #define LDC_EXIT_LOCK(ldcp) \ 421 mutex_exit(&((ldcp)->ldc_txlock));\ 422 mutex_exit(&((ldcp)->ldc_rxlock));\ 423 mutex_exit(&((ldcp)->ldc_cblock)); 424 425 /* Driver soft state ptr */ 426 static void *vsw_state; 427 428 /* 429 * Linked list of "vsw_t" structures - one per instance. 430 */ 431 vsw_t *vsw_head = NULL; 432 krwlock_t vsw_rw; 433 434 /* 435 * Property names 436 */ 437 static char vdev_propname[] = "virtual-device"; 438 static char vsw_propname[] = "virtual-network-switch"; 439 static char physdev_propname[] = "vsw-phys-dev"; 440 static char smode_propname[] = "vsw-switch-mode"; 441 static char macaddr_propname[] = "local-mac-address"; 442 static char remaddr_propname[] = "remote-mac-address"; 443 static char ldcids_propname[] = "ldc-ids"; 444 static char chan_propname[] = "channel-endpoint"; 445 static char id_propname[] = "id"; 446 static char reg_propname[] = "reg"; 447 static char pri_types_propname[] = "priority-ether-types"; 448 static char vsw_pvid_propname[] = "port-vlan-id"; 449 static char vsw_vid_propname[] = "vlan-id"; 450 static char vsw_dvid_propname[] = "default-vlan-id"; 451 static char port_pvid_propname[] = "remote-port-vlan-id"; 452 static char port_vid_propname[] = "remote-vlan-id"; 453 static char hybrid_propname[] = "hybrid"; 454 static char vsw_mtu_propname[] = "mtu"; 455 static char vsw_linkprop_propname[] = "linkprop"; 456 static char vsw_maxbw_propname[] = "maxbw"; 457 static char port_maxbw_propname[] = "maxbw"; 458 459 /* 460 * Matching criteria passed to the MDEG to register interest 461 * in changes to 'virtual-device-port' nodes identified by their 462 * 'id' property. 463 */ 464 static md_prop_match_t vport_prop_match[] = { 465 { MDET_PROP_VAL, "id" }, 466 { MDET_LIST_END, NULL } 467 }; 468 469 static mdeg_node_match_t vport_match = { "virtual-device-port", 470 vport_prop_match }; 471 472 /* 473 * Matching criteria passed to the MDEG to register interest 474 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 475 * by their 'name' and 'cfg-handle' properties. 476 */ 477 static md_prop_match_t vdev_prop_match[] = { 478 { MDET_PROP_STR, "name" }, 479 { MDET_PROP_VAL, "cfg-handle" }, 480 { MDET_LIST_END, NULL } 481 }; 482 483 static mdeg_node_match_t vdev_match = { "virtual-device", 484 vdev_prop_match }; 485 486 487 /* 488 * Specification of an MD node passed to the MDEG to filter any 489 * 'vport' nodes that do not belong to the specified node. This 490 * template is copied for each vsw instance and filled in with 491 * the appropriate 'cfg-handle' value before being passed to the MDEG. 492 */ 493 static mdeg_prop_spec_t vsw_prop_template[] = { 494 { MDET_PROP_STR, "name", vsw_propname }, 495 { MDET_PROP_VAL, "cfg-handle", NULL }, 496 { MDET_LIST_END, NULL, NULL } 497 }; 498 499 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 500 501 #ifdef DEBUG 502 /* 503 * Print debug messages - set to 0x1f to enable all msgs 504 * or 0x0 to turn all off. 505 */ 506 int vswdbg = 0x0; 507 508 /* 509 * debug levels: 510 * 0x01: Function entry/exit tracing 511 * 0x02: Internal function messages 512 * 0x04: Verbose internal messages 513 * 0x08: Warning messages 514 * 0x10: Error messages 515 */ 516 517 void 518 vswdebug(vsw_t *vswp, const char *fmt, ...) 519 { 520 char buf[512]; 521 va_list ap; 522 523 va_start(ap, fmt); 524 (void) vsprintf(buf, fmt, ap); 525 va_end(ap); 526 527 if (vswp == NULL) 528 cmn_err(CE_CONT, "%s\n", buf); 529 else 530 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 531 } 532 533 #endif /* DEBUG */ 534 535 static struct modlinkage modlinkage = { 536 MODREV_1, 537 &vswmodldrv, 538 NULL 539 }; 540 541 int 542 _init(void) 543 { 544 int status; 545 546 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 547 548 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 549 if (status != 0) { 550 return (status); 551 } 552 553 mac_init_ops(&vsw_ops, DRV_NAME); 554 status = mod_install(&modlinkage); 555 if (status != 0) { 556 ddi_soft_state_fini(&vsw_state); 557 } 558 return (status); 559 } 560 561 int 562 _fini(void) 563 { 564 int status; 565 566 status = mod_remove(&modlinkage); 567 if (status != 0) 568 return (status); 569 mac_fini_ops(&vsw_ops); 570 ddi_soft_state_fini(&vsw_state); 571 572 rw_destroy(&vsw_rw); 573 574 return (status); 575 } 576 577 int 578 _info(struct modinfo *modinfop) 579 { 580 return (mod_info(&modlinkage, modinfop)); 581 } 582 583 static int 584 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 585 { 586 vsw_t *vswp; 587 int instance; 588 char hashname[MAXNAMELEN]; 589 char qname[TASKQ_NAMELEN]; 590 vsw_attach_progress_t progress = PROG_init; 591 int rv; 592 593 switch (cmd) { 594 case DDI_ATTACH: 595 break; 596 case DDI_RESUME: 597 /* nothing to do for this non-device */ 598 return (DDI_SUCCESS); 599 case DDI_PM_RESUME: 600 default: 601 return (DDI_FAILURE); 602 } 603 604 instance = ddi_get_instance(dip); 605 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 606 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 607 return (DDI_FAILURE); 608 } 609 vswp = ddi_get_soft_state(vsw_state, instance); 610 611 if (vswp == NULL) { 612 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 613 goto vsw_attach_fail; 614 } 615 616 vswp->dip = dip; 617 vswp->instance = instance; 618 vswp->phys_link_state = LINK_STATE_UNKNOWN; 619 ddi_set_driver_private(dip, (caddr_t)vswp); 620 621 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 622 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 623 mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL); 624 cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL); 625 rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL); 626 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 627 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 628 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 629 630 progress |= PROG_locks; 631 632 rv = vsw_read_mdprops(vswp); 633 if (rv != 0) 634 goto vsw_attach_fail; 635 636 progress |= PROG_readmd; 637 638 /* setup the unicast forwarding database */ 639 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 640 vswp->instance); 641 D2(vswp, "creating unicast hash table (%s)...", hashname); 642 vswp->fdb_nchains = vsw_fdb_nchains; 643 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 644 mod_hash_null_valdtor, sizeof (void *)); 645 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 646 progress |= PROG_fdb; 647 648 /* setup the multicast fowarding database */ 649 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 650 vswp->instance); 651 D2(vswp, "creating multicast hash table %s)...", hashname); 652 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 653 mod_hash_null_valdtor, sizeof (void *)); 654 655 progress |= PROG_mfdb; 656 657 /* 658 * Create the taskq which will process all the VIO 659 * control messages. 660 */ 661 (void) snprintf(qname, TASKQ_NAMELEN, "taskq%d", vswp->instance); 662 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 663 TASKQ_DEFAULTPRI, 0)) == NULL) { 664 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 665 vswp->instance); 666 goto vsw_attach_fail; 667 } 668 669 progress |= PROG_taskq; 670 671 (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d", 672 vswp->instance); 673 if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1, 674 TASKQ_DEFAULTPRI, 0)) == NULL) { 675 cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue", 676 vswp->instance); 677 goto vsw_attach_fail; 678 } 679 680 progress |= PROG_rxp_taskq; 681 682 /* prevent auto-detaching */ 683 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 684 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 685 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 686 "instance %u", DDI_NO_AUTODETACH, instance); 687 } 688 689 /* 690 * The null switching function is set to avoid panic until 691 * switch mode is setup. 692 */ 693 vswp->vsw_switch_frame = vsw_switch_frame_nop; 694 695 /* 696 * Setup the required switching mode, based on the mdprops that we read 697 * earlier. We start a thread to do this, to avoid calling mac_open() 698 * directly from attach(). 699 */ 700 rv = vsw_setup_switching_start(vswp); 701 if (rv != 0) { 702 goto vsw_attach_fail; 703 } 704 705 progress |= PROG_swmode; 706 707 /* Register with mac layer as a provider */ 708 rv = vsw_mac_register(vswp); 709 if (rv != 0) 710 goto vsw_attach_fail; 711 712 progress |= PROG_macreg; 713 714 /* 715 * Now we have everything setup, register an interest in 716 * specific MD nodes. 717 * 718 * The callback is invoked in 2 cases, firstly if upon mdeg 719 * registration there are existing nodes which match our specified 720 * criteria, and secondly if the MD is changed (and again, there 721 * are nodes which we are interested in present within it. Note 722 * that our callback will be invoked even if our specified nodes 723 * have not actually changed). 724 * 725 */ 726 rv = vsw_mdeg_register(vswp); 727 if (rv != 0) 728 goto vsw_attach_fail; 729 730 progress |= PROG_mdreg; 731 732 vswp->attach_progress = progress; 733 734 WRITE_ENTER(&vsw_rw); 735 vswp->next = vsw_head; 736 vsw_head = vswp; 737 RW_EXIT(&vsw_rw); 738 739 ddi_report_dev(vswp->dip); 740 return (DDI_SUCCESS); 741 742 vsw_attach_fail: 743 DERR(NULL, "vsw_attach: failed"); 744 745 vswp->attach_progress = progress; 746 (void) vsw_unattach(vswp); 747 ddi_soft_state_free(vsw_state, instance); 748 return (DDI_FAILURE); 749 } 750 751 static int 752 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 753 { 754 vsw_t **vswpp, *vswp; 755 int instance; 756 757 instance = ddi_get_instance(dip); 758 vswp = ddi_get_soft_state(vsw_state, instance); 759 760 if (vswp == NULL) { 761 return (DDI_FAILURE); 762 } 763 764 switch (cmd) { 765 case DDI_DETACH: 766 break; 767 case DDI_SUSPEND: 768 case DDI_PM_SUSPEND: 769 default: 770 return (DDI_FAILURE); 771 } 772 773 D2(vswp, "detaching instance %d", instance); 774 775 if (vsw_unattach(vswp) != 0) { 776 return (DDI_FAILURE); 777 } 778 779 ddi_remove_minor_node(dip, NULL); 780 781 WRITE_ENTER(&vsw_rw); 782 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 783 if (*vswpp == vswp) { 784 *vswpp = vswp->next; 785 break; 786 } 787 } 788 RW_EXIT(&vsw_rw); 789 790 ddi_soft_state_free(vsw_state, instance); 791 792 return (DDI_SUCCESS); 793 } 794 795 /* 796 * Common routine to handle vsw_attach() failure and vsw_detach(). Note that 797 * the only reason this function could fail is if mac_unregister() fails. 798 * Otherwise, this function must ensure that all resources are freed and return 799 * success. 800 */ 801 static int 802 vsw_unattach(vsw_t *vswp) 803 { 804 vsw_attach_progress_t progress; 805 806 progress = vswp->attach_progress; 807 808 /* 809 * Unregister from the gldv3 subsystem. This can fail, in particular 810 * if there are still any open references to this mac device; in which 811 * case we just return failure without continuing to detach further. 812 */ 813 if (progress & PROG_macreg) { 814 if (vsw_mac_unregister(vswp) != 0) { 815 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 816 "MAC layer", vswp->instance); 817 return (1); 818 } 819 progress &= ~PROG_macreg; 820 } 821 822 /* 823 * Now that we have unregistered from gldv3, we must finish all other 824 * steps and successfully return from this function; otherwise we will 825 * end up leaving the device in a broken/unusable state. 826 * 827 * If we have registered with mdeg, unregister now to stop further 828 * callbacks to this vsw device and/or its ports. Then, detach any 829 * existing ports. 830 */ 831 if (progress & PROG_mdreg) { 832 vsw_mdeg_unregister(vswp); 833 vsw_detach_ports(vswp); 834 progress &= ~PROG_mdreg; 835 } 836 837 /* 838 * If we have started a thread to setup the switching mode, stop it, if 839 * it is still running. If it has finished setting up the switching 840 * mode, then we need to clean up some additional things if we are 841 * running in L2 mode: first free up any hybrid resources; then stop 842 * and close the underlying physical device. Note that we would have 843 * already released all per mac_client resources (ucast, mcast addrs, 844 * hio-shares etc) as all the ports are detached and if the vsw device 845 * itself was in use as an interface, it has been unplumbed (otherwise 846 * mac_unregister() above would fail). 847 */ 848 if (progress & PROG_swmode) { 849 850 vsw_setup_switching_stop(vswp); 851 852 if (vswp->hio_capable == B_TRUE) { 853 vsw_hio_cleanup(vswp); 854 vswp->hio_capable = B_FALSE; 855 } 856 857 mutex_enter(&vswp->mac_lock); 858 vsw_mac_close(vswp); 859 mutex_exit(&vswp->mac_lock); 860 861 progress &= ~PROG_swmode; 862 } 863 864 /* 865 * We now destroy the taskq used to clean up rx mblk pools that 866 * couldn't be destroyed when the ports/channels were detached. 867 * We implicitly wait for those tasks to complete in 868 * ddi_taskq_destroy(). 869 */ 870 if (progress & PROG_rxp_taskq) { 871 ddi_taskq_destroy(vswp->rxp_taskq); 872 progress &= ~PROG_rxp_taskq; 873 } 874 875 /* 876 * By now any pending tasks have finished and the underlying 877 * ldc's have been destroyed, so its safe to delete the control 878 * message taskq. 879 */ 880 if (progress & PROG_taskq) { 881 ddi_taskq_destroy(vswp->taskq_p); 882 progress &= ~PROG_taskq; 883 } 884 885 /* Destroy the multicast hash table */ 886 if (progress & PROG_mfdb) { 887 mod_hash_destroy_hash(vswp->mfdb); 888 progress &= ~PROG_mfdb; 889 } 890 891 /* Destroy the vlan hash table and fdb */ 892 if (progress & PROG_fdb) { 893 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 894 mod_hash_destroy_hash(vswp->fdb_hashp); 895 progress &= ~PROG_fdb; 896 } 897 898 if (progress & PROG_readmd) { 899 if (VSW_PRI_ETH_DEFINED(vswp)) { 900 kmem_free(vswp->pri_types, 901 sizeof (uint16_t) * vswp->pri_num_types); 902 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 903 } 904 progress &= ~PROG_readmd; 905 } 906 907 if (progress & PROG_locks) { 908 rw_destroy(&vswp->plist.lockrw); 909 rw_destroy(&vswp->mfdbrw); 910 rw_destroy(&vswp->if_lockrw); 911 rw_destroy(&vswp->maccl_rwlock); 912 cv_destroy(&vswp->sw_thr_cv); 913 mutex_destroy(&vswp->sw_thr_lock); 914 mutex_destroy(&vswp->mca_lock); 915 mutex_destroy(&vswp->mac_lock); 916 progress &= ~PROG_locks; 917 } 918 919 vswp->attach_progress = progress; 920 921 return (0); 922 } 923 924 void 925 vsw_destroy_rxpools(void *arg) 926 { 927 vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg; 928 vio_mblk_pool_t *npoolp; 929 930 while (poolp != NULL) { 931 npoolp = poolp->nextp; 932 while (vio_destroy_mblks(poolp) != 0) { 933 drv_usecwait(vsw_rxpool_cleanup_delay); 934 } 935 poolp = npoolp; 936 } 937 } 938 939 /* 940 * Get the value of the "vsw-phys-dev" property in the specified 941 * node. This property is the name of the physical device that 942 * the virtual switch will use to talk to the outside world. 943 * 944 * Note it is valid for this property to be NULL (but the property 945 * itself must exist). Callers of this routine should verify that 946 * the value returned is what they expected (i.e. either NULL or non NULL). 947 * 948 * On success returns value of the property in region pointed to by 949 * the 'name' argument, and with return value of 0. Otherwise returns 1. 950 */ 951 static int 952 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 953 { 954 int len = 0; 955 int instance; 956 char *physname = NULL; 957 char *dev; 958 const char *dev_name; 959 char myname[MAXNAMELEN]; 960 961 dev_name = ddi_driver_name(vswp->dip); 962 instance = ddi_get_instance(vswp->dip); 963 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 964 965 if (md_get_prop_data(mdp, node, physdev_propname, 966 (uint8_t **)(&physname), &len) != 0) { 967 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 968 "device(s) from MD", vswp->instance); 969 return (1); 970 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 971 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 972 vswp->instance, physname); 973 return (1); 974 } else if (strcmp(myname, physname) == 0) { 975 /* 976 * Prevent the vswitch from opening itself as the 977 * network device. 978 */ 979 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 980 vswp->instance, physname); 981 return (1); 982 } else { 983 (void) strncpy(name, physname, strlen(physname) + 1); 984 D2(vswp, "%s: using first device specified (%s)", 985 __func__, physname); 986 } 987 988 #ifdef DEBUG 989 /* 990 * As a temporary measure to aid testing we check to see if there 991 * is a vsw.conf file present. If there is we use the value of the 992 * vsw_physname property in the file as the name of the physical 993 * device, overriding the value from the MD. 994 * 995 * There may be multiple devices listed, but for the moment 996 * we just use the first one. 997 */ 998 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 999 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 1000 if ((strlen(dev) + 1) > LIFNAMSIZ) { 1001 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 1002 vswp->instance, dev); 1003 ddi_prop_free(dev); 1004 return (1); 1005 } else { 1006 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 1007 "config file", vswp->instance, dev); 1008 1009 (void) strncpy(name, dev, strlen(dev) + 1); 1010 } 1011 1012 ddi_prop_free(dev); 1013 } 1014 #endif 1015 1016 return (0); 1017 } 1018 1019 /* 1020 * Read the 'vsw-switch-mode' property from the specified MD node. 1021 * 1022 * Returns 0 on success, otherwise returns 1. 1023 */ 1024 static int 1025 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode) 1026 { 1027 int len = 0; 1028 char *smode = NULL; 1029 char *curr_mode = NULL; 1030 1031 D1(vswp, "%s: enter", __func__); 1032 1033 /* 1034 * Get the switch-mode property. The modes are listed in 1035 * decreasing order of preference, i.e. prefered mode is 1036 * first item in list. 1037 */ 1038 len = 0; 1039 if (md_get_prop_data(mdp, node, smode_propname, 1040 (uint8_t **)(&smode), &len) != 0) { 1041 /* 1042 * Unable to get switch-mode property from MD, nothing 1043 * more we can do. 1044 */ 1045 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 1046 " from the MD", vswp->instance); 1047 return (1); 1048 } 1049 1050 curr_mode = smode; 1051 /* 1052 * Modes of operation: 1053 * 'switched' - layer 2 switching, underlying HW in 1054 * programmed mode. 1055 * 'promiscuous' - layer 2 switching, underlying HW in 1056 * promiscuous mode. 1057 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 1058 * in non-promiscuous mode. 1059 */ 1060 while (curr_mode < (smode + len)) { 1061 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 1062 if (strcmp(curr_mode, "switched") == 0) { 1063 *mode = VSW_LAYER2; 1064 } else if (strcmp(curr_mode, "promiscuous") == 0) { 1065 *mode = VSW_LAYER2 | VSW_LAYER2_PROMISC; 1066 } else if (strcmp(curr_mode, "routed") == 0) { 1067 *mode = VSW_LAYER3; 1068 } else { 1069 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 1070 "setting to default switched mode", 1071 vswp->instance, curr_mode); 1072 *mode = VSW_LAYER2; 1073 } 1074 curr_mode += strlen(curr_mode) + 1; 1075 } 1076 1077 D2(vswp, "%s: %d mode", __func__, *mode); 1078 1079 D1(vswp, "%s: exit", __func__); 1080 1081 return (0); 1082 } 1083 1084 /* 1085 * Register with the MAC layer as a network device, so we 1086 * can be plumbed if necessary. 1087 */ 1088 static int 1089 vsw_mac_register(vsw_t *vswp) 1090 { 1091 mac_register_t *macp; 1092 int rv; 1093 1094 D1(vswp, "%s: enter", __func__); 1095 1096 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1097 return (EINVAL); 1098 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1099 macp->m_driver = vswp; 1100 macp->m_dip = vswp->dip; 1101 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1102 macp->m_callbacks = &vsw_m_callbacks; 1103 macp->m_min_sdu = 0; 1104 macp->m_max_sdu = vswp->mtu; 1105 macp->m_margin = VLAN_TAGSZ; 1106 rv = mac_register(macp, &vswp->if_mh); 1107 mac_free(macp); 1108 if (rv != 0) { 1109 /* 1110 * Treat this as a non-fatal error as we may be 1111 * able to operate in some other mode. 1112 */ 1113 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1114 "a provider with MAC layer", vswp->instance); 1115 return (rv); 1116 } 1117 1118 vswp->if_state |= VSW_IF_REG; 1119 1120 D1(vswp, "%s: exit", __func__); 1121 1122 return (rv); 1123 } 1124 1125 static int 1126 vsw_mac_unregister(vsw_t *vswp) 1127 { 1128 int rv = 0; 1129 1130 D1(vswp, "%s: enter", __func__); 1131 1132 WRITE_ENTER(&vswp->if_lockrw); 1133 1134 if (vswp->if_state & VSW_IF_REG) { 1135 rv = mac_unregister(vswp->if_mh); 1136 if (rv != 0) { 1137 DWARN(vswp, "%s: unable to unregister from MAC " 1138 "framework", __func__); 1139 1140 RW_EXIT(&vswp->if_lockrw); 1141 D1(vswp, "%s: fail exit", __func__); 1142 return (rv); 1143 } 1144 1145 /* mark i/f as down and unregistered */ 1146 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1147 } 1148 RW_EXIT(&vswp->if_lockrw); 1149 1150 D1(vswp, "%s: exit", __func__); 1151 1152 return (rv); 1153 } 1154 1155 static int 1156 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1157 { 1158 vsw_t *vswp = (vsw_t *)arg; 1159 1160 D1(vswp, "%s: enter", __func__); 1161 1162 mutex_enter(&vswp->mac_lock); 1163 if (vswp->mh == NULL) { 1164 mutex_exit(&vswp->mac_lock); 1165 return (EINVAL); 1166 } 1167 1168 /* return stats from underlying device */ 1169 *val = mac_stat_get(vswp->mh, stat); 1170 1171 mutex_exit(&vswp->mac_lock); 1172 1173 return (0); 1174 } 1175 1176 static void 1177 vsw_m_stop(void *arg) 1178 { 1179 vsw_t *vswp = (vsw_t *)arg; 1180 1181 D1(vswp, "%s: enter", __func__); 1182 1183 WRITE_ENTER(&vswp->if_lockrw); 1184 vswp->if_state &= ~VSW_IF_UP; 1185 RW_EXIT(&vswp->if_lockrw); 1186 1187 /* Cleanup and close the mac client */ 1188 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); 1189 1190 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1191 } 1192 1193 static int 1194 vsw_m_start(void *arg) 1195 { 1196 int rv; 1197 vsw_t *vswp = (vsw_t *)arg; 1198 1199 D1(vswp, "%s: enter", __func__); 1200 1201 WRITE_ENTER(&vswp->if_lockrw); 1202 1203 vswp->if_state |= VSW_IF_UP; 1204 1205 if (vswp->switching_setup_done == B_FALSE) { 1206 /* 1207 * If the switching mode has not been setup yet, just 1208 * return. The unicast address will be programmed 1209 * after the physical device is successfully setup by the 1210 * timeout handler. 1211 */ 1212 RW_EXIT(&vswp->if_lockrw); 1213 return (0); 1214 } 1215 1216 /* if in layer2 mode, program unicast address. */ 1217 if (vswp->mh != NULL) { 1218 /* Init a mac client and program addresses */ 1219 rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV); 1220 if (rv != 0) { 1221 cmn_err(CE_NOTE, 1222 "!vsw%d: failed to program interface " 1223 "unicast address\n", vswp->instance); 1224 } 1225 } 1226 1227 RW_EXIT(&vswp->if_lockrw); 1228 1229 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1230 return (0); 1231 } 1232 1233 /* 1234 * Change the local interface address. 1235 * 1236 * Note: we don't support this entry point. The local 1237 * mac address of the switch can only be changed via its 1238 * MD node properties. 1239 */ 1240 static int 1241 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1242 { 1243 _NOTE(ARGUNUSED(arg, macaddr)) 1244 1245 return (DDI_FAILURE); 1246 } 1247 1248 static int 1249 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1250 { 1251 vsw_t *vswp = (vsw_t *)arg; 1252 mcst_addr_t *mcst_p = NULL; 1253 uint64_t addr = 0x0; 1254 int i, ret = 0; 1255 1256 D1(vswp, "%s: enter", __func__); 1257 1258 /* 1259 * Convert address into form that can be used 1260 * as hash table key. 1261 */ 1262 for (i = 0; i < ETHERADDRL; i++) { 1263 addr = (addr << 8) | mca[i]; 1264 } 1265 1266 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1267 1268 if (add) { 1269 D2(vswp, "%s: adding multicast", __func__); 1270 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1271 /* 1272 * Update the list of multicast addresses 1273 * contained within the vsw_t structure to 1274 * include this new one. 1275 */ 1276 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1277 if (mcst_p == NULL) { 1278 DERR(vswp, "%s unable to alloc mem", __func__); 1279 (void) vsw_del_mcst(vswp, 1280 VSW_LOCALDEV, addr, NULL); 1281 return (1); 1282 } 1283 mcst_p->addr = addr; 1284 ether_copy(mca, &mcst_p->mca); 1285 1286 /* 1287 * Call into the underlying driver to program the 1288 * address into HW. 1289 */ 1290 ret = vsw_mac_multicast_add(vswp, NULL, mcst_p, 1291 VSW_LOCALDEV); 1292 if (ret != 0) { 1293 (void) vsw_del_mcst(vswp, 1294 VSW_LOCALDEV, addr, NULL); 1295 kmem_free(mcst_p, sizeof (*mcst_p)); 1296 return (ret); 1297 } 1298 1299 mutex_enter(&vswp->mca_lock); 1300 mcst_p->nextp = vswp->mcap; 1301 vswp->mcap = mcst_p; 1302 mutex_exit(&vswp->mca_lock); 1303 } else { 1304 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 1305 "address", vswp->instance); 1306 } 1307 return (ret); 1308 } 1309 1310 D2(vswp, "%s: removing multicast", __func__); 1311 /* 1312 * Remove the address from the hash table.. 1313 */ 1314 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1315 1316 /* 1317 * ..and then from the list maintained in the 1318 * vsw_t structure. 1319 */ 1320 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1321 ASSERT(mcst_p != NULL); 1322 1323 vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV); 1324 kmem_free(mcst_p, sizeof (*mcst_p)); 1325 } 1326 1327 D1(vswp, "%s: exit", __func__); 1328 1329 return (0); 1330 } 1331 1332 static int 1333 vsw_m_promisc(void *arg, boolean_t on) 1334 { 1335 vsw_t *vswp = (vsw_t *)arg; 1336 1337 D1(vswp, "%s: enter", __func__); 1338 1339 WRITE_ENTER(&vswp->if_lockrw); 1340 if (on) 1341 vswp->if_state |= VSW_IF_PROMISC; 1342 else 1343 vswp->if_state &= ~VSW_IF_PROMISC; 1344 RW_EXIT(&vswp->if_lockrw); 1345 1346 D1(vswp, "%s: exit", __func__); 1347 1348 return (0); 1349 } 1350 1351 static mblk_t * 1352 vsw_m_tx(void *arg, mblk_t *mp) 1353 { 1354 vsw_t *vswp = (vsw_t *)arg; 1355 1356 D1(vswp, "%s: enter", __func__); 1357 1358 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1359 1360 if (mp == NULL) { 1361 return (NULL); 1362 } 1363 1364 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1365 1366 D1(vswp, "%s: exit", __func__); 1367 1368 return (NULL); 1369 } 1370 1371 /* 1372 * Register for machine description (MD) updates. 1373 * 1374 * Returns 0 on success, 1 on failure. 1375 */ 1376 static int 1377 vsw_mdeg_register(vsw_t *vswp) 1378 { 1379 mdeg_prop_spec_t *pspecp; 1380 mdeg_node_spec_t *inst_specp; 1381 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1382 size_t templatesz; 1383 int rv; 1384 1385 D1(vswp, "%s: enter", __func__); 1386 1387 /* 1388 * Allocate and initialize a per-instance copy 1389 * of the global property spec array that will 1390 * uniquely identify this vsw instance. 1391 */ 1392 templatesz = sizeof (vsw_prop_template); 1393 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1394 1395 bcopy(vsw_prop_template, pspecp, templatesz); 1396 1397 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1398 1399 /* initialize the complete prop spec structure */ 1400 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1401 inst_specp->namep = "virtual-device"; 1402 inst_specp->specp = pspecp; 1403 1404 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1405 vswp->regprop); 1406 /* 1407 * Register an interest in 'virtual-device' nodes with a 1408 * 'name' property of 'virtual-network-switch' 1409 */ 1410 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1411 (void *)vswp, &mdeg_hdl); 1412 if (rv != MDEG_SUCCESS) { 1413 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1414 __func__, rv); 1415 goto mdeg_reg_fail; 1416 } 1417 1418 /* 1419 * Register an interest in 'vsw-port' nodes. 1420 */ 1421 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1422 (void *)vswp, &mdeg_port_hdl); 1423 if (rv != MDEG_SUCCESS) { 1424 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1425 (void) mdeg_unregister(mdeg_hdl); 1426 goto mdeg_reg_fail; 1427 } 1428 1429 /* save off data that will be needed later */ 1430 vswp->inst_spec = inst_specp; 1431 vswp->mdeg_hdl = mdeg_hdl; 1432 vswp->mdeg_port_hdl = mdeg_port_hdl; 1433 1434 D1(vswp, "%s: exit", __func__); 1435 return (0); 1436 1437 mdeg_reg_fail: 1438 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1439 vswp->instance); 1440 kmem_free(pspecp, templatesz); 1441 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1442 1443 vswp->mdeg_hdl = NULL; 1444 vswp->mdeg_port_hdl = NULL; 1445 1446 return (1); 1447 } 1448 1449 static void 1450 vsw_mdeg_unregister(vsw_t *vswp) 1451 { 1452 D1(vswp, "vsw_mdeg_unregister: enter"); 1453 1454 if (vswp->mdeg_hdl != NULL) 1455 (void) mdeg_unregister(vswp->mdeg_hdl); 1456 1457 if (vswp->mdeg_port_hdl != NULL) 1458 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1459 1460 if (vswp->inst_spec != NULL) { 1461 if (vswp->inst_spec->specp != NULL) { 1462 (void) kmem_free(vswp->inst_spec->specp, 1463 sizeof (vsw_prop_template)); 1464 vswp->inst_spec->specp = NULL; 1465 } 1466 1467 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1468 vswp->inst_spec = NULL; 1469 } 1470 1471 D1(vswp, "vsw_mdeg_unregister: exit"); 1472 } 1473 1474 /* 1475 * Mdeg callback invoked for the vsw node itself. 1476 */ 1477 static int 1478 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1479 { 1480 vsw_t *vswp; 1481 md_t *mdp; 1482 mde_cookie_t node; 1483 uint64_t inst; 1484 char *node_name = NULL; 1485 1486 if (resp == NULL) 1487 return (MDEG_FAILURE); 1488 1489 vswp = (vsw_t *)cb_argp; 1490 1491 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1492 " : prev matched %d", __func__, resp->added.nelem, 1493 resp->removed.nelem, resp->match_curr.nelem, 1494 resp->match_prev.nelem); 1495 1496 /* 1497 * We get an initial callback for this node as 'added' 1498 * after registering with mdeg. Note that we would have 1499 * already gathered information about this vsw node by 1500 * walking MD earlier during attach (in vsw_read_mdprops()). 1501 * So, there is a window where the properties of this 1502 * node might have changed when we get this initial 'added' 1503 * callback. We handle this as if an update occured 1504 * and invoke the same function which handles updates to 1505 * the properties of this vsw-node if any. 1506 * 1507 * A non-zero 'match' value indicates that the MD has been 1508 * updated and that a virtual-network-switch node is 1509 * present which may or may not have been updated. It is 1510 * up to the clients to examine their own nodes and 1511 * determine if they have changed. 1512 */ 1513 if (resp->added.nelem != 0) { 1514 1515 if (resp->added.nelem != 1) { 1516 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1517 "invalid: %d\n", vswp->instance, resp->added.nelem); 1518 return (MDEG_FAILURE); 1519 } 1520 1521 mdp = resp->added.mdp; 1522 node = resp->added.mdep[0]; 1523 1524 } else if (resp->match_curr.nelem != 0) { 1525 1526 if (resp->match_curr.nelem != 1) { 1527 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1528 "invalid: %d\n", vswp->instance, 1529 resp->match_curr.nelem); 1530 return (MDEG_FAILURE); 1531 } 1532 1533 mdp = resp->match_curr.mdp; 1534 node = resp->match_curr.mdep[0]; 1535 1536 } else { 1537 return (MDEG_FAILURE); 1538 } 1539 1540 /* Validate name and instance */ 1541 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1542 DERR(vswp, "%s: unable to get node name\n", __func__); 1543 return (MDEG_FAILURE); 1544 } 1545 1546 /* is this a virtual-network-switch? */ 1547 if (strcmp(node_name, vsw_propname) != 0) { 1548 DERR(vswp, "%s: Invalid node name: %s\n", 1549 __func__, node_name); 1550 return (MDEG_FAILURE); 1551 } 1552 1553 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1554 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1555 __func__); 1556 return (MDEG_FAILURE); 1557 } 1558 1559 /* is this the right instance of vsw? */ 1560 if (inst != vswp->regprop) { 1561 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1562 __func__, inst); 1563 return (MDEG_FAILURE); 1564 } 1565 1566 vsw_update_md_prop(vswp, mdp, node); 1567 1568 return (MDEG_SUCCESS); 1569 } 1570 1571 /* 1572 * Mdeg callback invoked for changes to the vsw-port nodes 1573 * under the vsw node. 1574 */ 1575 static int 1576 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1577 { 1578 vsw_t *vswp; 1579 int idx; 1580 md_t *mdp; 1581 mde_cookie_t node; 1582 uint64_t inst; 1583 int rv; 1584 1585 if ((resp == NULL) || (cb_argp == NULL)) 1586 return (MDEG_FAILURE); 1587 1588 vswp = (vsw_t *)cb_argp; 1589 1590 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1591 " : prev matched %d", __func__, resp->added.nelem, 1592 resp->removed.nelem, resp->match_curr.nelem, 1593 resp->match_prev.nelem); 1594 1595 /* process added ports */ 1596 for (idx = 0; idx < resp->added.nelem; idx++) { 1597 mdp = resp->added.mdp; 1598 node = resp->added.mdep[idx]; 1599 1600 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1601 1602 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1603 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1604 "(0x%lx), err=%d", vswp->instance, node, rv); 1605 } 1606 } 1607 1608 /* process removed ports */ 1609 for (idx = 0; idx < resp->removed.nelem; idx++) { 1610 mdp = resp->removed.mdp; 1611 node = resp->removed.mdep[idx]; 1612 1613 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1614 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1615 __func__, id_propname, idx); 1616 continue; 1617 } 1618 1619 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1620 1621 if (vsw_port_detach(vswp, inst) != 0) { 1622 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1623 vswp->instance, inst); 1624 } 1625 } 1626 1627 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1628 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1629 resp->match_curr.mdep[idx], 1630 resp->match_prev.mdp, 1631 resp->match_prev.mdep[idx]); 1632 } 1633 1634 D1(vswp, "%s: exit", __func__); 1635 1636 return (MDEG_SUCCESS); 1637 } 1638 1639 /* 1640 * Scan the machine description for this instance of vsw 1641 * and read its properties. Called only from vsw_attach(). 1642 * Returns: 0 on success, 1 on failure. 1643 */ 1644 static int 1645 vsw_read_mdprops(vsw_t *vswp) 1646 { 1647 md_t *mdp = NULL; 1648 mde_cookie_t rootnode; 1649 mde_cookie_t *listp = NULL; 1650 uint64_t inst; 1651 uint64_t cfgh; 1652 char *name; 1653 int rv = 1; 1654 int num_nodes = 0; 1655 int num_devs = 0; 1656 int listsz = 0; 1657 int i; 1658 1659 /* 1660 * In each 'virtual-device' node in the MD there is a 1661 * 'cfg-handle' property which is the MD's concept of 1662 * an instance number (this may be completely different from 1663 * the device drivers instance #). OBP reads that value and 1664 * stores it in the 'reg' property of the appropriate node in 1665 * the device tree. We first read this reg property and use this 1666 * to compare against the 'cfg-handle' property of vsw nodes 1667 * in MD to get to this specific vsw instance and then read 1668 * other properties that we are interested in. 1669 * We also cache the value of 'reg' property and use it later 1670 * to register callbacks with mdeg (see vsw_mdeg_register()) 1671 */ 1672 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1673 DDI_PROP_DONTPASS, reg_propname, -1); 1674 if (inst == -1) { 1675 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1676 "OBP device tree", vswp->instance, reg_propname); 1677 return (rv); 1678 } 1679 1680 vswp->regprop = inst; 1681 1682 if ((mdp = md_get_handle()) == NULL) { 1683 DWARN(vswp, "%s: cannot init MD\n", __func__); 1684 return (rv); 1685 } 1686 1687 num_nodes = md_node_count(mdp); 1688 ASSERT(num_nodes > 0); 1689 1690 listsz = num_nodes * sizeof (mde_cookie_t); 1691 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1692 1693 rootnode = md_root_node(mdp); 1694 1695 /* search for all "virtual_device" nodes */ 1696 num_devs = md_scan_dag(mdp, rootnode, 1697 md_find_name(mdp, vdev_propname), 1698 md_find_name(mdp, "fwd"), listp); 1699 if (num_devs <= 0) { 1700 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1701 goto vsw_readmd_exit; 1702 } 1703 1704 /* 1705 * Now loop through the list of virtual-devices looking for 1706 * devices with name "virtual-network-switch" and for each 1707 * such device compare its instance with what we have from 1708 * the 'reg' property to find the right node in MD and then 1709 * read all its properties. 1710 */ 1711 for (i = 0; i < num_devs; i++) { 1712 1713 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1714 DWARN(vswp, "%s: name property not found\n", 1715 __func__); 1716 goto vsw_readmd_exit; 1717 } 1718 1719 /* is this a virtual-network-switch? */ 1720 if (strcmp(name, vsw_propname) != 0) 1721 continue; 1722 1723 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1724 DWARN(vswp, "%s: cfg-handle property not found\n", 1725 __func__); 1726 goto vsw_readmd_exit; 1727 } 1728 1729 /* is this the required instance of vsw? */ 1730 if (inst != cfgh) 1731 continue; 1732 1733 /* now read all properties of this vsw instance */ 1734 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1735 break; 1736 } 1737 1738 vsw_readmd_exit: 1739 1740 kmem_free(listp, listsz); 1741 (void) md_fini_handle(mdp); 1742 return (rv); 1743 } 1744 1745 /* 1746 * Read the initial start-of-day values from the specified MD node. 1747 */ 1748 static int 1749 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1750 { 1751 uint64_t macaddr = 0; 1752 1753 D1(vswp, "%s: enter", __func__); 1754 1755 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1756 return (1); 1757 } 1758 1759 /* mac address for vswitch device itself */ 1760 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1761 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1762 vswp->instance); 1763 return (1); 1764 } 1765 1766 vsw_save_lmacaddr(vswp, macaddr); 1767 1768 if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) { 1769 DWARN(vswp, "%s: Unable to read %s property from MD, " 1770 "defaulting to 'switched' mode", 1771 __func__, smode_propname); 1772 1773 vswp->smode = VSW_LAYER2; 1774 } 1775 1776 /* 1777 * Read the 'linkprop' property to know if this 1778 * vsw device wants to get physical link updates. 1779 */ 1780 vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update); 1781 1782 /* read mtu */ 1783 vsw_mtu_read(vswp, mdp, node, &vswp->mtu); 1784 if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) { 1785 vswp->mtu = ETHERMTU; 1786 } 1787 vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) + 1788 VLAN_TAGSZ; 1789 1790 /* read vlan id properties of this vsw instance */ 1791 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1792 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1793 1794 /* read priority-ether-types */ 1795 vsw_read_pri_eth_types(vswp, mdp, node); 1796 1797 /* read bandwidth property of this vsw instance */ 1798 vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth); 1799 1800 D1(vswp, "%s: exit", __func__); 1801 return (0); 1802 } 1803 1804 /* 1805 * Read vlan id properties of the given MD node. 1806 * Arguments: 1807 * arg: device argument(vsw device or a port) 1808 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1809 * mdp: machine description 1810 * node: md node cookie 1811 * 1812 * Returns: 1813 * pvidp: port-vlan-id of the node 1814 * vidspp: list of vlan-ids of the node 1815 * nvidsp: # of vlan-ids in the list 1816 * default_idp: default-vlan-id of the node(if node is vsw device) 1817 */ 1818 static void 1819 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1820 uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp, 1821 uint16_t *default_idp) 1822 { 1823 vsw_t *vswp; 1824 vsw_port_t *portp; 1825 char *pvid_propname; 1826 char *vid_propname; 1827 uint_t nvids = 0; 1828 uint32_t vids_size; 1829 int rv; 1830 int i; 1831 uint64_t *data; 1832 uint64_t val; 1833 int size; 1834 int inst; 1835 1836 if (type == VSW_LOCALDEV) { 1837 1838 vswp = (vsw_t *)arg; 1839 pvid_propname = vsw_pvid_propname; 1840 vid_propname = vsw_vid_propname; 1841 inst = vswp->instance; 1842 1843 } else if (type == VSW_VNETPORT) { 1844 1845 portp = (vsw_port_t *)arg; 1846 vswp = portp->p_vswp; 1847 pvid_propname = port_pvid_propname; 1848 vid_propname = port_vid_propname; 1849 inst = portp->p_instance; 1850 1851 } else { 1852 return; 1853 } 1854 1855 if (type == VSW_LOCALDEV && default_idp != NULL) { 1856 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1857 if (rv != 0) { 1858 DWARN(vswp, "%s: prop(%s) not found", __func__, 1859 vsw_dvid_propname); 1860 1861 *default_idp = vsw_default_vlan_id; 1862 } else { 1863 *default_idp = val & 0xFFF; 1864 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1865 vsw_dvid_propname, inst, *default_idp); 1866 } 1867 } 1868 1869 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1870 if (rv != 0) { 1871 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1872 *pvidp = vsw_default_vlan_id; 1873 } else { 1874 1875 *pvidp = val & 0xFFF; 1876 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1877 pvid_propname, inst, *pvidp); 1878 } 1879 1880 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1881 &size); 1882 if (rv != 0) { 1883 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1884 size = 0; 1885 } else { 1886 size /= sizeof (uint64_t); 1887 } 1888 nvids = size; 1889 1890 if (nvids != 0) { 1891 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1892 vids_size = sizeof (vsw_vlanid_t) * nvids; 1893 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1894 for (i = 0; i < nvids; i++) { 1895 (*vidspp)[i].vl_vid = data[i] & 0xFFFF; 1896 (*vidspp)[i].vl_set = B_FALSE; 1897 D2(vswp, " %d ", (*vidspp)[i].vl_vid); 1898 } 1899 D2(vswp, "\n"); 1900 } 1901 1902 *nvidsp = nvids; 1903 } 1904 1905 static void 1906 vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node, 1907 uint64_t *bw) 1908 { 1909 int rv; 1910 uint64_t val; 1911 vsw_t *vswp; 1912 1913 vswp = portp->p_vswp; 1914 1915 rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val); 1916 1917 if (rv != 0) { 1918 *bw = 0; 1919 D3(vswp, "%s: prop(%s) not found\n", __func__, 1920 port_maxbw_propname); 1921 } else { 1922 *bw = val; 1923 D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname); 1924 } 1925 } 1926 1927 /* 1928 * This function reads "priority-ether-types" property from md. This property 1929 * is used to enable support for priority frames. Applications which need 1930 * guaranteed and timely delivery of certain high priority frames to/from 1931 * a vnet or vsw within ldoms, should configure this property by providing 1932 * the ether type(s) for which the priority facility is needed. 1933 * Normal data frames are delivered over a ldc channel using the descriptor 1934 * ring mechanism which is constrained by factors such as descriptor ring size, 1935 * the rate at which the ring is processed at the peer ldc end point, etc. 1936 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1937 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1938 * descriptor ring path and enables a more reliable and timely delivery of 1939 * frames to the peer. 1940 */ 1941 static void 1942 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1943 { 1944 int rv; 1945 uint16_t *types; 1946 uint64_t *data; 1947 int size; 1948 int i; 1949 size_t mblk_sz; 1950 1951 rv = md_get_prop_data(mdp, node, pri_types_propname, 1952 (uint8_t **)&data, &size); 1953 if (rv != 0) { 1954 /* 1955 * Property may not exist if we are running pre-ldoms1.1 f/w. 1956 * Check if 'vsw_pri_eth_type' has been set in that case. 1957 */ 1958 if (vsw_pri_eth_type != 0) { 1959 size = sizeof (vsw_pri_eth_type); 1960 data = &vsw_pri_eth_type; 1961 } else { 1962 D3(vswp, "%s: prop(%s) not found", __func__, 1963 pri_types_propname); 1964 size = 0; 1965 } 1966 } 1967 1968 if (size == 0) { 1969 vswp->pri_num_types = 0; 1970 return; 1971 } 1972 1973 /* 1974 * we have some priority-ether-types defined; 1975 * allocate a table of these types and also 1976 * allocate a pool of mblks to transmit these 1977 * priority packets. 1978 */ 1979 size /= sizeof (uint64_t); 1980 vswp->pri_num_types = size; 1981 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1982 for (i = 0, types = vswp->pri_types; i < size; i++) { 1983 types[i] = data[i] & 0xFFFF; 1984 } 1985 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1986 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, NULL, 1987 &vswp->pri_tx_vmp); 1988 } 1989 1990 static void 1991 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) 1992 { 1993 int rv; 1994 int inst; 1995 uint64_t val; 1996 char *mtu_propname; 1997 1998 mtu_propname = vsw_mtu_propname; 1999 inst = vswp->instance; 2000 2001 rv = md_get_prop_val(mdp, node, mtu_propname, &val); 2002 if (rv != 0) { 2003 D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname); 2004 *mtu = vsw_ethermtu; 2005 } else { 2006 2007 *mtu = val & 0xFFFF; 2008 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 2009 mtu_propname, inst, *mtu); 2010 } 2011 } 2012 2013 /* 2014 * Update the mtu of the vsw device. We first check if the device has been 2015 * plumbed and if so fail the mtu update. Otherwise, we continue to update the 2016 * new mtu and reset all ports to initiate handshake re-negotiation with peers 2017 * using the new mtu. 2018 */ 2019 static int 2020 vsw_mtu_update(vsw_t *vswp, uint32_t mtu) 2021 { 2022 int rv; 2023 2024 WRITE_ENTER(&vswp->if_lockrw); 2025 2026 if (vswp->if_state & VSW_IF_UP) { 2027 2028 RW_EXIT(&vswp->if_lockrw); 2029 2030 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2031 " as the device is plumbed\n", vswp->instance); 2032 return (EBUSY); 2033 2034 } else { 2035 2036 D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n", 2037 __func__, vswp->mtu, mtu); 2038 2039 vswp->mtu = mtu; 2040 vswp->max_frame_size = vswp->mtu + 2041 sizeof (struct ether_header) + VLAN_TAGSZ; 2042 2043 rv = mac_maxsdu_update(vswp->if_mh, mtu); 2044 if (rv != 0) { 2045 cmn_err(CE_NOTE, 2046 "!vsw%d: Unable to update mtu with mac" 2047 " layer\n", vswp->instance); 2048 } 2049 2050 RW_EXIT(&vswp->if_lockrw); 2051 2052 /* Reset ports to renegotiate with the new mtu */ 2053 vsw_reset_ports(vswp); 2054 2055 } 2056 2057 return (0); 2058 } 2059 2060 static void 2061 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 2062 boolean_t *pls) 2063 { 2064 int rv; 2065 uint64_t val; 2066 char *linkpropname; 2067 2068 linkpropname = vsw_linkprop_propname; 2069 2070 rv = md_get_prop_val(mdp, node, linkpropname, &val); 2071 if (rv != 0) { 2072 D3(vswp, "%s: prop(%s) not found", __func__, linkpropname); 2073 *pls = B_FALSE; 2074 } else { 2075 2076 *pls = (val & 0x1) ? B_TRUE : B_FALSE; 2077 D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname, 2078 vswp->instance, *pls); 2079 } 2080 } 2081 2082 void 2083 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state) 2084 { 2085 READ_ENTER(&vswp->if_lockrw); 2086 2087 if (vswp->if_state & VSW_IF_REG) { 2088 mac_link_update(vswp->if_mh, link_state); 2089 } 2090 2091 RW_EXIT(&vswp->if_lockrw); 2092 } 2093 2094 void 2095 vsw_physlink_state_update(vsw_t *vswp) 2096 { 2097 if (vswp->pls_update == B_TRUE) { 2098 vsw_mac_link_update(vswp, vswp->phys_link_state); 2099 } 2100 vsw_physlink_update_ports(vswp); 2101 } 2102 2103 static void 2104 vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw) 2105 { 2106 /* read the vsw bandwidth from md */ 2107 int rv; 2108 uint64_t val; 2109 2110 rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val); 2111 if (rv != 0) { 2112 *bw = 0; 2113 D3(vswp, "%s: prop(%s) not found", __func__, 2114 vsw_maxbw_propname); 2115 } else { 2116 *bw = val; 2117 D3(vswp, "%s: %s(%d): (%ld)\n", __func__, 2118 vsw_maxbw_propname, vswp->instance, *bw); 2119 } 2120 } 2121 2122 /* 2123 * Check to see if the relevant properties in the specified node have 2124 * changed, and if so take the appropriate action. 2125 * 2126 * If any of the properties are missing or invalid we don't take 2127 * any action, as this function should only be invoked when modifications 2128 * have been made to what we assume is a working configuration, which 2129 * we leave active. 2130 * 2131 * Note it is legal for this routine to be invoked even if none of the 2132 * properties in the port node within the MD have actually changed. 2133 */ 2134 static void 2135 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 2136 { 2137 char physname[LIFNAMSIZ]; 2138 char drv[LIFNAMSIZ]; 2139 uint_t ddi_instance; 2140 uint8_t new_smode; 2141 int i; 2142 uint64_t macaddr = 0; 2143 enum {MD_init = 0x1, 2144 MD_physname = 0x2, 2145 MD_macaddr = 0x4, 2146 MD_smode = 0x8, 2147 MD_vlans = 0x10, 2148 MD_mtu = 0x20, 2149 MD_pls = 0x40, 2150 MD_bw = 0x80} updated; 2151 int rv; 2152 uint16_t pvid; 2153 vsw_vlanid_t *vids; 2154 uint16_t nvids; 2155 uint32_t mtu; 2156 boolean_t pls_update; 2157 uint64_t maxbw; 2158 2159 updated = MD_init; 2160 2161 D1(vswp, "%s: enter", __func__); 2162 2163 /* 2164 * Check if name of physical device in MD has changed. 2165 */ 2166 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 2167 /* 2168 * Do basic sanity check on new device name/instance, 2169 * if its non NULL. It is valid for the device name to 2170 * have changed from a non NULL to a NULL value, i.e. 2171 * the vsw is being changed to 'routed' mode. 2172 */ 2173 if ((strlen(physname) != 0) && 2174 (ddi_parse(physname, drv, 2175 &ddi_instance) != DDI_SUCCESS)) { 2176 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 2177 " a valid device name/instance", 2178 vswp->instance, physname); 2179 goto fail_reconf; 2180 } 2181 2182 if (strcmp(physname, vswp->physname)) { 2183 D2(vswp, "%s: device name changed from %s to %s", 2184 __func__, vswp->physname, physname); 2185 2186 updated |= MD_physname; 2187 } else { 2188 D2(vswp, "%s: device name unchanged at %s", 2189 __func__, vswp->physname); 2190 } 2191 } else { 2192 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2193 "device from updated MD.", vswp->instance); 2194 goto fail_reconf; 2195 } 2196 2197 /* 2198 * Check if MAC address has changed. 2199 */ 2200 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2201 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2202 vswp->instance); 2203 goto fail_reconf; 2204 } else { 2205 uint64_t maddr = macaddr; 2206 READ_ENTER(&vswp->if_lockrw); 2207 for (i = ETHERADDRL - 1; i >= 0; i--) { 2208 if (vswp->if_addr.ether_addr_octet[i] 2209 != (macaddr & 0xFF)) { 2210 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 2211 __func__, i, 2212 vswp->if_addr.ether_addr_octet[i], 2213 (macaddr & 0xFF)); 2214 updated |= MD_macaddr; 2215 macaddr = maddr; 2216 break; 2217 } 2218 macaddr >>= 8; 2219 } 2220 RW_EXIT(&vswp->if_lockrw); 2221 if (updated & MD_macaddr) { 2222 vsw_save_lmacaddr(vswp, macaddr); 2223 } 2224 } 2225 2226 /* 2227 * Check if switching modes have changed. 2228 */ 2229 if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) { 2230 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2231 vswp->instance, smode_propname); 2232 goto fail_reconf; 2233 } else { 2234 if (new_smode != vswp->smode) { 2235 D2(vswp, "%s: switching mode changed from %d to %d", 2236 __func__, vswp->smode, new_smode); 2237 2238 updated |= MD_smode; 2239 } 2240 } 2241 2242 /* Read the vlan ids */ 2243 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2244 &nvids, NULL); 2245 2246 /* Determine if there are any vlan id updates */ 2247 if ((pvid != vswp->pvid) || /* pvid changed? */ 2248 (nvids != vswp->nvids) || /* # of vids changed? */ 2249 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2250 !vsw_cmp_vids(vids, vswp->vids, nvids))) { 2251 updated |= MD_vlans; 2252 } 2253 2254 /* Read mtu */ 2255 vsw_mtu_read(vswp, mdp, node, &mtu); 2256 if (mtu != vswp->mtu) { 2257 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { 2258 updated |= MD_mtu; 2259 } else { 2260 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2261 " as the specified value:%d is invalid\n", 2262 vswp->instance, mtu); 2263 } 2264 } 2265 2266 /* 2267 * Read the 'linkprop' property. 2268 */ 2269 vsw_linkprop_read(vswp, mdp, node, &pls_update); 2270 if (pls_update != vswp->pls_update) { 2271 updated |= MD_pls; 2272 } 2273 2274 /* Read bandwidth */ 2275 vsw_bandwidth_read(vswp, mdp, node, &maxbw); 2276 if (maxbw != vswp->bandwidth) { 2277 if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { 2278 updated |= MD_bw; 2279 } else { 2280 cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth" 2281 " update as the specified value:%ld is invalid\n", 2282 vswp->instance, maxbw); 2283 } 2284 } 2285 2286 /* 2287 * Now make any changes which are needed... 2288 */ 2289 if (updated & MD_pls) { 2290 2291 /* save the updated property. */ 2292 vswp->pls_update = pls_update; 2293 2294 if (pls_update == B_FALSE) { 2295 /* 2296 * Phys link state update is now disabled for this vsw 2297 * interface. If we had previously reported a link-down 2298 * to the stack, undo that by sending a link-up. 2299 */ 2300 if (vswp->phys_link_state == LINK_STATE_DOWN) { 2301 vsw_mac_link_update(vswp, LINK_STATE_UP); 2302 } 2303 } else { 2304 /* 2305 * Phys link state update is now enabled. Send up an 2306 * update based on the current phys link state. 2307 */ 2308 if (vswp->smode & VSW_LAYER2) { 2309 vsw_mac_link_update(vswp, 2310 vswp->phys_link_state); 2311 } 2312 } 2313 2314 } 2315 2316 if (updated & (MD_physname | MD_smode | MD_mtu)) { 2317 2318 /* 2319 * Stop any pending thread to setup switching mode. 2320 */ 2321 vsw_setup_switching_stop(vswp); 2322 2323 /* Cleanup HybridIO */ 2324 vsw_hio_cleanup(vswp); 2325 2326 /* 2327 * Remove unicst, mcst addrs of vsw interface 2328 * and ports from the physdev. This also closes 2329 * the corresponding mac clients. 2330 */ 2331 vsw_unset_addrs(vswp); 2332 2333 /* 2334 * Stop, detach and close the old device.. 2335 */ 2336 mutex_enter(&vswp->mac_lock); 2337 vsw_mac_close(vswp); 2338 mutex_exit(&vswp->mac_lock); 2339 2340 /* 2341 * Update phys name. 2342 */ 2343 if (updated & MD_physname) { 2344 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2345 vswp->instance, vswp->physname, physname); 2346 (void) strncpy(vswp->physname, 2347 physname, strlen(physname) + 1); 2348 } 2349 2350 /* 2351 * Update array with the new switch mode values. 2352 */ 2353 if (updated & MD_smode) { 2354 vswp->smode = new_smode; 2355 } 2356 2357 /* Update mtu */ 2358 if (updated & MD_mtu) { 2359 rv = vsw_mtu_update(vswp, mtu); 2360 if (rv != 0) { 2361 goto fail_update; 2362 } 2363 } 2364 2365 /* 2366 * ..and attach, start the new device. 2367 */ 2368 rv = vsw_setup_switching(vswp); 2369 if (rv == EAGAIN) { 2370 /* 2371 * Unable to setup switching mode. 2372 * As the error is EAGAIN, schedule a thread to retry 2373 * and return. Programming addresses of ports and 2374 * vsw interface will be done by the thread when the 2375 * switching setup completes successfully. 2376 */ 2377 if (vsw_setup_switching_start(vswp) != 0) { 2378 goto fail_update; 2379 } 2380 return; 2381 2382 } else if (rv) { 2383 goto fail_update; 2384 } 2385 2386 vsw_setup_switching_post_process(vswp); 2387 } else if (updated & MD_macaddr) { 2388 /* 2389 * We enter here if only MD_macaddr is exclusively updated. 2390 * If MD_physname and/or MD_smode are also updated, then 2391 * as part of that, we would have implicitly processed 2392 * MD_macaddr update (above). 2393 */ 2394 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2395 vswp->instance, macaddr); 2396 2397 READ_ENTER(&vswp->if_lockrw); 2398 if (vswp->if_state & VSW_IF_UP) { 2399 /* reconfigure with new address */ 2400 vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0); 2401 2402 /* 2403 * Notify the MAC layer of the changed address. 2404 */ 2405 mac_unicst_update(vswp->if_mh, 2406 (uint8_t *)&vswp->if_addr); 2407 2408 } 2409 RW_EXIT(&vswp->if_lockrw); 2410 2411 } 2412 2413 if (updated & MD_vlans) { 2414 /* Remove existing vlan ids from the hash table. */ 2415 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2416 2417 if (vswp->if_state & VSW_IF_UP) { 2418 vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids); 2419 } else { 2420 if (vswp->nvids != 0) { 2421 kmem_free(vswp->vids, 2422 sizeof (vsw_vlanid_t) * vswp->nvids); 2423 } 2424 vswp->vids = vids; 2425 vswp->nvids = nvids; 2426 vswp->pvid = pvid; 2427 } 2428 2429 /* add these new vlan ids into hash table */ 2430 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2431 } else { 2432 if (nvids != 0) { 2433 kmem_free(vids, sizeof (vsw_vlanid_t) * nvids); 2434 } 2435 } 2436 2437 if (updated & MD_bw) { 2438 vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw); 2439 } 2440 2441 return; 2442 2443 fail_reconf: 2444 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2445 return; 2446 2447 fail_update: 2448 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2449 vswp->instance); 2450 } 2451 2452 /* 2453 * Read the port's md properties. 2454 */ 2455 static int 2456 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2457 md_t *mdp, mde_cookie_t *node) 2458 { 2459 uint64_t ldc_id; 2460 uint8_t *addrp; 2461 int i, addrsz; 2462 int num_nodes = 0, nchan = 0; 2463 int listsz = 0; 2464 mde_cookie_t *listp = NULL; 2465 struct ether_addr ea; 2466 uint64_t macaddr; 2467 uint64_t inst = 0; 2468 uint64_t val; 2469 2470 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2471 DWARN(vswp, "%s: prop(%s) not found", __func__, 2472 id_propname); 2473 return (1); 2474 } 2475 2476 /* 2477 * Find the channel endpoint node(s) (which should be under this 2478 * port node) which contain the channel id(s). 2479 */ 2480 if ((num_nodes = md_node_count(mdp)) <= 0) { 2481 DERR(vswp, "%s: invalid number of nodes found (%d)", 2482 __func__, num_nodes); 2483 return (1); 2484 } 2485 2486 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2487 2488 /* allocate enough space for node list */ 2489 listsz = num_nodes * sizeof (mde_cookie_t); 2490 listp = kmem_zalloc(listsz, KM_SLEEP); 2491 2492 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2493 md_find_name(mdp, "fwd"), listp); 2494 2495 if (nchan <= 0) { 2496 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2497 kmem_free(listp, listsz); 2498 return (1); 2499 } 2500 2501 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2502 2503 /* use property from first node found */ 2504 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2505 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2506 id_propname); 2507 kmem_free(listp, listsz); 2508 return (1); 2509 } 2510 2511 /* don't need list any more */ 2512 kmem_free(listp, listsz); 2513 2514 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2515 2516 /* read mac-address property */ 2517 if (md_get_prop_data(mdp, *node, remaddr_propname, 2518 &addrp, &addrsz)) { 2519 DWARN(vswp, "%s: prop(%s) not found", 2520 __func__, remaddr_propname); 2521 return (1); 2522 } 2523 2524 if (addrsz < ETHERADDRL) { 2525 DWARN(vswp, "%s: invalid address size", __func__); 2526 return (1); 2527 } 2528 2529 macaddr = *((uint64_t *)addrp); 2530 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2531 2532 for (i = ETHERADDRL - 1; i >= 0; i--) { 2533 ea.ether_addr_octet[i] = macaddr & 0xFF; 2534 macaddr >>= 8; 2535 } 2536 2537 /* now update all properties into the port */ 2538 portp->p_vswp = vswp; 2539 portp->p_instance = inst; 2540 portp->addr_set = B_FALSE; 2541 ether_copy(&ea, &portp->p_macaddr); 2542 if (nchan > VSW_PORT_MAX_LDCS) { 2543 D2(vswp, "%s: using first of %d ldc ids", 2544 __func__, nchan); 2545 nchan = VSW_PORT_MAX_LDCS; 2546 } 2547 portp->num_ldcs = nchan; 2548 portp->ldc_ids = 2549 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2550 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2551 2552 /* read vlan id properties of this port node */ 2553 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2554 &portp->vids, &portp->nvids, NULL); 2555 2556 /* Check if hybrid property is present */ 2557 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2558 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2559 portp->p_hio_enabled = B_TRUE; 2560 } else { 2561 portp->p_hio_enabled = B_FALSE; 2562 } 2563 /* 2564 * Port hio capability determined after version 2565 * negotiation, i.e., when we know the peer is HybridIO capable. 2566 */ 2567 portp->p_hio_capable = B_FALSE; 2568 2569 /* Read bandwidth of this port */ 2570 vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth); 2571 2572 return (0); 2573 } 2574 2575 /* 2576 * Add a new port to the system. 2577 * 2578 * Returns 0 on success, 1 on failure. 2579 */ 2580 int 2581 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2582 { 2583 vsw_port_t *portp; 2584 int rv; 2585 2586 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2587 2588 rv = vsw_port_read_props(portp, vswp, mdp, node); 2589 if (rv != 0) { 2590 kmem_free(portp, sizeof (*portp)); 2591 return (1); 2592 } 2593 2594 rv = vsw_port_attach(portp); 2595 if (rv != 0) { 2596 DERR(vswp, "%s: failed to attach port", __func__); 2597 return (1); 2598 } 2599 2600 return (0); 2601 } 2602 2603 static int 2604 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2605 md_t *prev_mdp, mde_cookie_t prev_mdex) 2606 { 2607 uint64_t cport_num; 2608 uint64_t pport_num; 2609 vsw_port_list_t *plistp; 2610 vsw_port_t *portp; 2611 uint16_t pvid; 2612 vsw_vlanid_t *vids; 2613 uint16_t nvids; 2614 uint64_t val; 2615 boolean_t hio_enabled = B_FALSE; 2616 uint64_t maxbw; 2617 enum {P_MD_init = 0x1, 2618 P_MD_vlans = 0x2, 2619 P_MD_hio = 0x4, 2620 P_MD_maxbw = 0x8} updated; 2621 2622 updated = P_MD_init; 2623 2624 /* 2625 * For now, we get port updates only if vlan ids changed. 2626 * We read the port num and do some sanity check. 2627 */ 2628 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2629 return (1); 2630 } 2631 2632 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2633 return (1); 2634 } 2635 if (cport_num != pport_num) 2636 return (1); 2637 2638 plistp = &(vswp->plist); 2639 2640 READ_ENTER(&plistp->lockrw); 2641 2642 portp = vsw_lookup_port(vswp, cport_num); 2643 if (portp == NULL) { 2644 RW_EXIT(&plistp->lockrw); 2645 return (1); 2646 } 2647 2648 /* Read the vlan ids */ 2649 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2650 &vids, &nvids, NULL); 2651 2652 /* Determine if there are any vlan id updates */ 2653 if ((pvid != portp->pvid) || /* pvid changed? */ 2654 (nvids != portp->nvids) || /* # of vids changed? */ 2655 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2656 !vsw_cmp_vids(vids, portp->vids, nvids))) { 2657 updated |= P_MD_vlans; 2658 } 2659 2660 /* Check if hybrid property is present */ 2661 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2662 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2663 hio_enabled = B_TRUE; 2664 } 2665 2666 if (portp->p_hio_enabled != hio_enabled) { 2667 updated |= P_MD_hio; 2668 } 2669 2670 /* Check if maxbw property is present */ 2671 vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw); 2672 if (maxbw != portp->p_bandwidth) { 2673 if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { 2674 updated |= P_MD_maxbw; 2675 } else { 2676 cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth" 2677 " update for port %d as the specified value:%ld" 2678 " is invalid\n", 2679 vswp->instance, portp->p_instance, maxbw); 2680 } 2681 } 2682 2683 if (updated & P_MD_vlans) { 2684 /* Remove existing vlan ids from the hash table. */ 2685 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2686 2687 /* Reconfigure vlans with network device */ 2688 vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids); 2689 2690 /* add these new vlan ids into hash table */ 2691 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2692 2693 /* reset the port if it is vlan unaware (ver < 1.3) */ 2694 vsw_vlan_unaware_port_reset(portp); 2695 } 2696 2697 if (updated & P_MD_hio) { 2698 vsw_hio_port_update(portp, hio_enabled); 2699 } 2700 2701 if (updated & P_MD_maxbw) { 2702 vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw); 2703 } 2704 2705 RW_EXIT(&plistp->lockrw); 2706 2707 return (0); 2708 } 2709 2710 /* 2711 * vsw_mac_rx -- A common function to send packets to the interface. 2712 * By default this function check if the interface is UP or not, the 2713 * rest of the behaviour depends on the flags as below: 2714 * 2715 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2716 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2717 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2718 */ 2719 void 2720 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2721 mblk_t *mp, vsw_macrx_flags_t flags) 2722 { 2723 mblk_t *mpt; 2724 2725 D1(vswp, "%s:enter\n", __func__); 2726 READ_ENTER(&vswp->if_lockrw); 2727 /* Check if the interface is up */ 2728 if (!(vswp->if_state & VSW_IF_UP)) { 2729 RW_EXIT(&vswp->if_lockrw); 2730 /* Free messages only if FREEMSG flag specified */ 2731 if (flags & VSW_MACRX_FREEMSG) { 2732 freemsgchain(mp); 2733 } 2734 D1(vswp, "%s:exit\n", __func__); 2735 return; 2736 } 2737 /* 2738 * If PROMISC flag is passed, then check if 2739 * the interface is in the PROMISC mode. 2740 * If not, drop the messages. 2741 */ 2742 if (flags & VSW_MACRX_PROMISC) { 2743 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2744 RW_EXIT(&vswp->if_lockrw); 2745 /* Free messages only if FREEMSG flag specified */ 2746 if (flags & VSW_MACRX_FREEMSG) { 2747 freemsgchain(mp); 2748 } 2749 D1(vswp, "%s:exit\n", __func__); 2750 return; 2751 } 2752 } 2753 RW_EXIT(&vswp->if_lockrw); 2754 /* 2755 * If COPYMSG flag is passed, then make a copy 2756 * of the message chain and send up the copy. 2757 */ 2758 if (flags & VSW_MACRX_COPYMSG) { 2759 mp = copymsgchain(mp); 2760 if (mp == NULL) { 2761 D1(vswp, "%s:exit\n", __func__); 2762 return; 2763 } 2764 } 2765 2766 D2(vswp, "%s: sending up stack", __func__); 2767 2768 mpt = NULL; 2769 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2770 if (mp != NULL) { 2771 mac_rx(vswp->if_mh, mrh, mp); 2772 } 2773 D1(vswp, "%s:exit\n", __func__); 2774 } 2775 2776 /* copy mac address of vsw into soft state structure */ 2777 static void 2778 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2779 { 2780 int i; 2781 2782 WRITE_ENTER(&vswp->if_lockrw); 2783 for (i = ETHERADDRL - 1; i >= 0; i--) { 2784 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2785 macaddr >>= 8; 2786 } 2787 RW_EXIT(&vswp->if_lockrw); 2788 } 2789 2790 /* Compare VLAN ids, array size expected to be same. */ 2791 static boolean_t 2792 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids) 2793 { 2794 int i, j; 2795 uint16_t vid; 2796 2797 for (i = 0; i < nvids; i++) { 2798 vid = vids1[i].vl_vid; 2799 for (j = 0; j < nvids; j++) { 2800 if (vid == vids2[i].vl_vid) 2801 break; 2802 } 2803 if (j == nvids) { 2804 return (B_FALSE); 2805 } 2806 } 2807 return (B_TRUE); 2808 } 2809