1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/errno.h> 28 #include <sys/debug.h> 29 #include <sys/time.h> 30 #include <sys/sysmacros.h> 31 #include <sys/systm.h> 32 #include <sys/user.h> 33 #include <sys/stropts.h> 34 #include <sys/stream.h> 35 #include <sys/strlog.h> 36 #include <sys/strsubr.h> 37 #include <sys/cmn_err.h> 38 #include <sys/cpu.h> 39 #include <sys/kmem.h> 40 #include <sys/conf.h> 41 #include <sys/ddi.h> 42 #include <sys/sunddi.h> 43 #include <sys/ksynch.h> 44 #include <sys/stat.h> 45 #include <sys/kstat.h> 46 #include <sys/vtrace.h> 47 #include <sys/strsun.h> 48 #include <sys/dlpi.h> 49 #include <sys/ethernet.h> 50 #include <net/if.h> 51 #include <sys/varargs.h> 52 #include <sys/machsystm.h> 53 #include <sys/modctl.h> 54 #include <sys/modhash.h> 55 #include <sys/mac_provider.h> 56 #include <sys/mac_ether.h> 57 #include <sys/taskq.h> 58 #include <sys/note.h> 59 #include <sys/mach_descrip.h> 60 #include <sys/mac_provider.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/callb.h> 72 #include <sys/vlan.h> 73 74 /* 75 * Function prototypes. 76 */ 77 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 78 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 79 static int vsw_unattach(vsw_t *vswp); 80 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 81 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *); 82 void vsw_destroy_rxpools(void *); 83 84 /* MDEG routines */ 85 static int vsw_mdeg_register(vsw_t *vswp); 86 static void vsw_mdeg_unregister(vsw_t *vswp); 87 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 88 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 89 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 90 static int vsw_read_mdprops(vsw_t *vswp); 91 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 92 mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp, 93 uint16_t *nvidsp, uint16_t *default_idp); 94 static void vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, 95 mde_cookie_t node, uint64_t *bw); 96 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 97 md_t *mdp, mde_cookie_t *node); 98 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 99 mde_cookie_t node); 100 static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 101 uint32_t *mtu); 102 static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu); 103 static void vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 104 boolean_t *pls); 105 static void vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 106 uint64_t *bw); 107 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 108 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 109 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1, 110 vsw_vlanid_t *vids2, int nvids); 111 112 /* Mac driver related routines */ 113 static int vsw_mac_register(vsw_t *); 114 static int vsw_mac_unregister(vsw_t *); 115 static int vsw_m_stat(void *, uint_t, uint64_t *); 116 static void vsw_m_stop(void *arg); 117 static int vsw_m_start(void *arg); 118 static int vsw_m_unicst(void *arg, const uint8_t *); 119 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 120 static int vsw_m_promisc(void *arg, boolean_t); 121 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 122 void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state); 123 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 124 mblk_t *mp, vsw_macrx_flags_t flags); 125 void vsw_physlink_state_update(vsw_t *vswp); 126 127 /* 128 * Functions imported from other files. 129 */ 130 extern void vsw_setup_switching_thread(void *arg); 131 extern int vsw_setup_switching_start(vsw_t *vswp); 132 extern void vsw_setup_switching_stop(vsw_t *vswp); 133 extern int vsw_setup_switching(vsw_t *); 134 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 135 vsw_port_t *port, mac_resource_handle_t mrh); 136 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 137 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 138 extern void vsw_del_mcst_vsw(vsw_t *); 139 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 140 extern void vsw_detach_ports(vsw_t *vswp); 141 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 142 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 143 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 144 md_t *prev_mdp, mde_cookie_t prev_mdex); 145 extern int vsw_port_attach(vsw_port_t *port); 146 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 147 extern int vsw_mac_open(vsw_t *vswp); 148 extern void vsw_mac_close(vsw_t *vswp); 149 extern void vsw_mac_cleanup_ports(vsw_t *vswp); 150 extern void vsw_unset_addrs(vsw_t *vswp); 151 extern void vsw_setup_switching_post_process(vsw_t *vswp); 152 extern void vsw_create_vlans(void *arg, int type); 153 extern void vsw_destroy_vlans(void *arg, int type); 154 extern void vsw_vlan_add_ids(void *arg, int type); 155 extern void vsw_vlan_remove_ids(void *arg, int type); 156 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 157 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 158 mblk_t **npt); 159 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 160 extern void vsw_hio_cleanup(vsw_t *vswp); 161 extern void vsw_hio_start_ports(vsw_t *vswp); 162 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 163 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 164 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 165 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid, 166 vsw_vlanid_t *new_vids, int new_nvids); 167 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 168 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 169 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans, 170 uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids); 171 extern void vsw_reset_ports(vsw_t *vswp); 172 extern void vsw_port_reset(vsw_port_t *portp); 173 extern void vsw_physlink_update_ports(vsw_t *vswp); 174 extern void vsw_update_bandwidth(vsw_t *vswp, vsw_port_t *port, int type, 175 uint64_t maxbw); 176 177 /* 178 * Internal tunables. 179 */ 180 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 181 int vsw_wretries = 100; /* # of write attempts */ 182 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 183 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ 184 /* 300*3 = 900sec(15min) of max tmout */ 185 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 186 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 187 int vsw_ldc_retries = 5; /* # of ldc_close() retries */ 188 int vsw_ldc_delay = 1000; /* 1 ms delay for ldc_close() */ 189 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 190 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 191 int vsw_rxpool_cleanup_delay = 100000; /* 100ms */ 192 193 194 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 195 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 196 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 197 198 /* delay in usec to wait for all references on a fdb entry to be dropped */ 199 uint32_t vsw_fdbe_refcnt_delay = 10; 200 201 /* 202 * Default vlan id. This is only used internally when the "default-vlan-id" 203 * property is not present in the MD device node. Therefore, this should not be 204 * used as a tunable; if this value is changed, the corresponding variable 205 * should be updated to the same value in all vnets connected to this vsw. 206 */ 207 uint16_t vsw_default_vlan_id = 1; 208 209 /* 210 * Workaround for a version handshake bug in obp's vnet. 211 * If vsw initiates version negotiation starting from the highest version, 212 * obp sends a nack and terminates version handshake. To workaround 213 * this, we do not initiate version handshake when the channel comes up. 214 * Instead, we wait for the peer to send its version info msg and go through 215 * the version protocol exchange. If we successfully negotiate a version, 216 * before sending the ack, we send our version info msg to the peer 217 * using the <major,minor> version that we are about to ack. 218 */ 219 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 220 221 /* 222 * In the absence of "priority-ether-types" property in MD, the following 223 * internal tunable can be set to specify a single priority ethertype. 224 */ 225 uint64_t vsw_pri_eth_type = 0; 226 227 /* 228 * Number of transmit priority buffers that are preallocated per device. 229 * This number is chosen to be a small value to throttle transmission 230 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 231 */ 232 uint32_t vsw_pri_tx_nmblks = 64; 233 234 /* 235 * Number of RARP packets sent to announce macaddr to the physical switch, 236 * after vsw's physical device is changed dynamically or after a guest (client 237 * vnet) is live migrated in. 238 */ 239 uint32_t vsw_publish_macaddr_count = 3; 240 241 /* 242 * Enable/disable HybridIO 243 */ 244 boolean_t vsw_hio_enabled = B_TRUE; 245 246 /* 247 * Max retries for HybridIO cleanup 248 */ 249 int vsw_hio_max_cleanup_retries = 10; 250 251 /* 252 * 10ms delay for HybridIO cleanup 253 */ 254 int vsw_hio_cleanup_delay = 10000; 255 256 /* 257 * Descriptor ring modes of LDC data transfer: 258 * 259 * 1) TxDring mode: 260 * In versions < v1.6 of VIO Protocol, we support only TxDring mode. In this 261 * mode, we create a transmit descriptor ring and export it to the peer through 262 * dring registration process of handshake. The descriptor ring is exported 263 * using LDC shared memory. Each descriptor is associated with a data buffer. 264 * The data buffer is also exported over LDC and the cookies for this data 265 * buffer are provided in the descriptor. The peer maps this ring as its 266 * receive ring. Similarly, the peer exports a transmit descriptor ring which 267 * is mapped by this device as its receive ring. In this mode, in a given data 268 * transfer direction, the transmitter copies the data to the exported data 269 * buffer (owned by itself), bound to the descriptor. The receiver uses the LDC 270 * cookies specified in the descriptor to copy the data into the receiving 271 * guest through the hypervisor (ldc_mem_copy()). 272 * 273 * 2) RxDringData mode: 274 * In versions >= v1.6 of VIO Protocol, we also support RxDringData mode. In 275 * this mode, we create a receive descriptor ring and export it to the peer 276 * through dring registration process of handshake. In addition, we export a 277 * receive buffer area and provide that information also in the dring 278 * registration message. The descriptor ring and the data buffer area are 279 * exported using LDC shared memory. Each descriptor is associated with a data 280 * buffer in the data buffer area and the offset of the specific data buffer 281 * within this area is specified in the descriptor. The peer maps this ring 282 * along with the data buffer area as its transmit ring. Similarly, the peer 283 * exports a receive ring which is mapped by this device as its transmit ring, 284 * along with its buffer area. In this mode, in a given data transfer 285 * direction, the transmitter copies the data to the data buffer offset 286 * specified in the descriptor. The receiver simply picks up the data buffer 287 * (owned by itself) without any copy operation into the receiving guest. 288 * 289 * We enable RxDringData mode during handshake negotiations if LDC supports 290 * mapping in large areas of shared memory(see ldc_is_viotsb_configured() API), 291 * which is required to support RxDringData mode. 292 */ 293 294 /* 295 * Number of descriptors; must be power of 2. 296 */ 297 uint32_t vsw_num_descriptors = VSW_NUM_DESCRIPTORS; 298 299 /* 300 * In RxDringData mode, # of buffers is determined by multiplying the # of 301 * descriptors with the factor below. Note that the factor must be > 1; i.e, 302 * the # of buffers must always be > # of descriptors. This is needed because, 303 * while the shared memory buffers are sent up the stack on the receiver, the 304 * sender needs additional buffers that can be used for further transmits. 305 * See vsw_setup_rx_dring() for details. 306 */ 307 uint32_t vsw_nrbufs_factor = 2; 308 309 /* 310 * Delay when rx descr not ready; used in both dring modes. 311 */ 312 int vsw_recv_delay = 0; 313 314 /* 315 * Retry when rx descr not ready; used in both dring modes. 316 */ 317 int vsw_recv_retries = 5; 318 319 /* 320 * Max number of mblks received in one receive operation. 321 */ 322 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 323 324 /* 325 * Internal tunables for receive buffer pools, that is, the size and number of 326 * mblks for each pool. At least 3 sizes must be specified if these are used. 327 * The sizes must be specified in increasing order. Non-zero value of the first 328 * size will be used as a hint to use these values instead of the algorithm 329 * that determines the sizes based on MTU. Used in TxDring mode only. 330 */ 331 uint32_t vsw_mblk_size1 = 0; 332 uint32_t vsw_mblk_size2 = 0; 333 uint32_t vsw_mblk_size3 = 0; 334 uint32_t vsw_mblk_size4 = 0; 335 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 336 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 337 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 338 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS; /* number of mblks for pool4 */ 339 340 /* 341 * Set this to non-zero to enable additional internal receive buffer pools 342 * based on the MTU of the device for better performance at the cost of more 343 * memory consumption. This is turned off by default, to use allocb(9F) for 344 * receive buffer allocations of sizes > 2K. 345 */ 346 boolean_t vsw_jumbo_rxpools = B_FALSE; 347 348 /* 349 * vsw_max_tx_qcount is the maximum # of packets that can be queued 350 * before the tx worker thread begins processing the queue. Its value 351 * is chosen to be 4x the default length of tx descriptor ring. 352 */ 353 uint32_t vsw_max_tx_qcount = 4 * VSW_NUM_DESCRIPTORS; 354 355 /* 356 * MAC callbacks 357 */ 358 static mac_callbacks_t vsw_m_callbacks = { 359 0, 360 vsw_m_stat, 361 vsw_m_start, 362 vsw_m_stop, 363 vsw_m_promisc, 364 vsw_m_multicst, 365 vsw_m_unicst, 366 vsw_m_tx 367 }; 368 369 static struct cb_ops vsw_cb_ops = { 370 nulldev, /* cb_open */ 371 nulldev, /* cb_close */ 372 nodev, /* cb_strategy */ 373 nodev, /* cb_print */ 374 nodev, /* cb_dump */ 375 nodev, /* cb_read */ 376 nodev, /* cb_write */ 377 nodev, /* cb_ioctl */ 378 nodev, /* cb_devmap */ 379 nodev, /* cb_mmap */ 380 nodev, /* cb_segmap */ 381 nochpoll, /* cb_chpoll */ 382 ddi_prop_op, /* cb_prop_op */ 383 NULL, /* cb_stream */ 384 D_MP, /* cb_flag */ 385 CB_REV, /* rev */ 386 nodev, /* int (*cb_aread)() */ 387 nodev /* int (*cb_awrite)() */ 388 }; 389 390 static struct dev_ops vsw_ops = { 391 DEVO_REV, /* devo_rev */ 392 0, /* devo_refcnt */ 393 NULL, /* devo_getinfo */ 394 nulldev, /* devo_identify */ 395 nulldev, /* devo_probe */ 396 vsw_attach, /* devo_attach */ 397 vsw_detach, /* devo_detach */ 398 nodev, /* devo_reset */ 399 &vsw_cb_ops, /* devo_cb_ops */ 400 (struct bus_ops *)NULL, /* devo_bus_ops */ 401 ddi_power /* devo_power */ 402 }; 403 404 extern struct mod_ops mod_driverops; 405 static struct modldrv vswmodldrv = { 406 &mod_driverops, 407 "sun4v Virtual Switch", 408 &vsw_ops, 409 }; 410 411 #define LDC_ENTER_LOCK(ldcp) \ 412 mutex_enter(&((ldcp)->ldc_cblock));\ 413 mutex_enter(&((ldcp)->ldc_rxlock));\ 414 mutex_enter(&((ldcp)->ldc_txlock)); 415 #define LDC_EXIT_LOCK(ldcp) \ 416 mutex_exit(&((ldcp)->ldc_txlock));\ 417 mutex_exit(&((ldcp)->ldc_rxlock));\ 418 mutex_exit(&((ldcp)->ldc_cblock)); 419 420 /* Driver soft state ptr */ 421 static void *vsw_state; 422 423 /* 424 * Linked list of "vsw_t" structures - one per instance. 425 */ 426 vsw_t *vsw_head = NULL; 427 krwlock_t vsw_rw; 428 429 /* 430 * Property names 431 */ 432 static char vdev_propname[] = "virtual-device"; 433 static char vsw_propname[] = "virtual-network-switch"; 434 static char physdev_propname[] = "vsw-phys-dev"; 435 static char smode_propname[] = "vsw-switch-mode"; 436 static char macaddr_propname[] = "local-mac-address"; 437 static char remaddr_propname[] = "remote-mac-address"; 438 static char ldcids_propname[] = "ldc-ids"; 439 static char chan_propname[] = "channel-endpoint"; 440 static char id_propname[] = "id"; 441 static char reg_propname[] = "reg"; 442 static char pri_types_propname[] = "priority-ether-types"; 443 static char vsw_pvid_propname[] = "port-vlan-id"; 444 static char vsw_vid_propname[] = "vlan-id"; 445 static char vsw_dvid_propname[] = "default-vlan-id"; 446 static char port_pvid_propname[] = "remote-port-vlan-id"; 447 static char port_vid_propname[] = "remote-vlan-id"; 448 static char hybrid_propname[] = "hybrid"; 449 static char vsw_mtu_propname[] = "mtu"; 450 static char vsw_linkprop_propname[] = "linkprop"; 451 static char vsw_maxbw_propname[] = "maxbw"; 452 static char port_maxbw_propname[] = "maxbw"; 453 454 /* 455 * Matching criteria passed to the MDEG to register interest 456 * in changes to 'virtual-device-port' nodes identified by their 457 * 'id' property. 458 */ 459 static md_prop_match_t vport_prop_match[] = { 460 { MDET_PROP_VAL, "id" }, 461 { MDET_LIST_END, NULL } 462 }; 463 464 static mdeg_node_match_t vport_match = { "virtual-device-port", 465 vport_prop_match }; 466 467 /* 468 * Matching criteria passed to the MDEG to register interest 469 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 470 * by their 'name' and 'cfg-handle' properties. 471 */ 472 static md_prop_match_t vdev_prop_match[] = { 473 { MDET_PROP_STR, "name" }, 474 { MDET_PROP_VAL, "cfg-handle" }, 475 { MDET_LIST_END, NULL } 476 }; 477 478 static mdeg_node_match_t vdev_match = { "virtual-device", 479 vdev_prop_match }; 480 481 482 /* 483 * Specification of an MD node passed to the MDEG to filter any 484 * 'vport' nodes that do not belong to the specified node. This 485 * template is copied for each vsw instance and filled in with 486 * the appropriate 'cfg-handle' value before being passed to the MDEG. 487 */ 488 static mdeg_prop_spec_t vsw_prop_template[] = { 489 { MDET_PROP_STR, "name", vsw_propname }, 490 { MDET_PROP_VAL, "cfg-handle", NULL }, 491 { MDET_LIST_END, NULL, NULL } 492 }; 493 494 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 495 496 #ifdef DEBUG 497 /* 498 * Print debug messages - set to 0x1f to enable all msgs 499 * or 0x0 to turn all off. 500 */ 501 int vswdbg = 0x0; 502 503 /* 504 * debug levels: 505 * 0x01: Function entry/exit tracing 506 * 0x02: Internal function messages 507 * 0x04: Verbose internal messages 508 * 0x08: Warning messages 509 * 0x10: Error messages 510 */ 511 512 void 513 vswdebug(vsw_t *vswp, const char *fmt, ...) 514 { 515 char buf[512]; 516 va_list ap; 517 518 va_start(ap, fmt); 519 (void) vsprintf(buf, fmt, ap); 520 va_end(ap); 521 522 if (vswp == NULL) 523 cmn_err(CE_CONT, "%s\n", buf); 524 else 525 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 526 } 527 528 #endif /* DEBUG */ 529 530 static struct modlinkage modlinkage = { 531 MODREV_1, 532 &vswmodldrv, 533 NULL 534 }; 535 536 int 537 _init(void) 538 { 539 int status; 540 541 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 542 543 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 544 if (status != 0) { 545 return (status); 546 } 547 548 mac_init_ops(&vsw_ops, DRV_NAME); 549 status = mod_install(&modlinkage); 550 if (status != 0) { 551 ddi_soft_state_fini(&vsw_state); 552 } 553 return (status); 554 } 555 556 int 557 _fini(void) 558 { 559 int status; 560 561 status = mod_remove(&modlinkage); 562 if (status != 0) 563 return (status); 564 mac_fini_ops(&vsw_ops); 565 ddi_soft_state_fini(&vsw_state); 566 567 rw_destroy(&vsw_rw); 568 569 return (status); 570 } 571 572 int 573 _info(struct modinfo *modinfop) 574 { 575 return (mod_info(&modlinkage, modinfop)); 576 } 577 578 static int 579 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 580 { 581 vsw_t *vswp; 582 int instance; 583 char hashname[MAXNAMELEN]; 584 char qname[TASKQ_NAMELEN]; 585 vsw_attach_progress_t progress = PROG_init; 586 int rv; 587 588 switch (cmd) { 589 case DDI_ATTACH: 590 break; 591 case DDI_RESUME: 592 /* nothing to do for this non-device */ 593 return (DDI_SUCCESS); 594 case DDI_PM_RESUME: 595 default: 596 return (DDI_FAILURE); 597 } 598 599 instance = ddi_get_instance(dip); 600 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 601 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 602 return (DDI_FAILURE); 603 } 604 vswp = ddi_get_soft_state(vsw_state, instance); 605 606 if (vswp == NULL) { 607 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 608 goto vsw_attach_fail; 609 } 610 611 vswp->dip = dip; 612 vswp->instance = instance; 613 vswp->phys_link_state = LINK_STATE_UNKNOWN; 614 ddi_set_driver_private(dip, (caddr_t)vswp); 615 616 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 617 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 618 mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL); 619 cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL); 620 rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL); 621 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 622 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 623 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 624 625 progress |= PROG_locks; 626 627 rv = vsw_read_mdprops(vswp); 628 if (rv != 0) 629 goto vsw_attach_fail; 630 631 progress |= PROG_readmd; 632 633 /* setup the unicast forwarding database */ 634 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 635 vswp->instance); 636 D2(vswp, "creating unicast hash table (%s)...", hashname); 637 vswp->fdb_nchains = vsw_fdb_nchains; 638 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 639 mod_hash_null_valdtor, sizeof (void *)); 640 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 641 progress |= PROG_fdb; 642 643 /* setup the multicast fowarding database */ 644 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 645 vswp->instance); 646 D2(vswp, "creating multicast hash table %s)...", hashname); 647 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 648 mod_hash_null_valdtor, sizeof (void *)); 649 650 progress |= PROG_mfdb; 651 652 /* 653 * Create the taskq which will process all the VIO 654 * control messages. 655 */ 656 (void) snprintf(qname, TASKQ_NAMELEN, "taskq%d", vswp->instance); 657 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 658 TASKQ_DEFAULTPRI, 0)) == NULL) { 659 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 660 vswp->instance); 661 goto vsw_attach_fail; 662 } 663 664 progress |= PROG_taskq; 665 666 (void) snprintf(qname, TASKQ_NAMELEN, "rxpool_taskq%d", 667 vswp->instance); 668 if ((vswp->rxp_taskq = ddi_taskq_create(vswp->dip, qname, 1, 669 TASKQ_DEFAULTPRI, 0)) == NULL) { 670 cmn_err(CE_WARN, "!vsw%d: Unable to create rxp task queue", 671 vswp->instance); 672 goto vsw_attach_fail; 673 } 674 675 progress |= PROG_rxp_taskq; 676 677 /* prevent auto-detaching */ 678 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 679 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 680 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 681 "instance %u", DDI_NO_AUTODETACH, instance); 682 } 683 684 /* 685 * The null switching function is set to avoid panic until 686 * switch mode is setup. 687 */ 688 vswp->vsw_switch_frame = vsw_switch_frame_nop; 689 690 /* 691 * Setup the required switching mode, based on the mdprops that we read 692 * earlier. We start a thread to do this, to avoid calling mac_open() 693 * directly from attach(). 694 */ 695 rv = vsw_setup_switching_start(vswp); 696 if (rv != 0) { 697 goto vsw_attach_fail; 698 } 699 700 progress |= PROG_swmode; 701 702 /* Register with mac layer as a provider */ 703 rv = vsw_mac_register(vswp); 704 if (rv != 0) 705 goto vsw_attach_fail; 706 707 progress |= PROG_macreg; 708 709 /* 710 * Now we have everything setup, register an interest in 711 * specific MD nodes. 712 * 713 * The callback is invoked in 2 cases, firstly if upon mdeg 714 * registration there are existing nodes which match our specified 715 * criteria, and secondly if the MD is changed (and again, there 716 * are nodes which we are interested in present within it. Note 717 * that our callback will be invoked even if our specified nodes 718 * have not actually changed). 719 * 720 */ 721 rv = vsw_mdeg_register(vswp); 722 if (rv != 0) 723 goto vsw_attach_fail; 724 725 progress |= PROG_mdreg; 726 727 vswp->attach_progress = progress; 728 729 WRITE_ENTER(&vsw_rw); 730 vswp->next = vsw_head; 731 vsw_head = vswp; 732 RW_EXIT(&vsw_rw); 733 734 ddi_report_dev(vswp->dip); 735 return (DDI_SUCCESS); 736 737 vsw_attach_fail: 738 DERR(NULL, "vsw_attach: failed"); 739 740 vswp->attach_progress = progress; 741 (void) vsw_unattach(vswp); 742 ddi_soft_state_free(vsw_state, instance); 743 return (DDI_FAILURE); 744 } 745 746 static int 747 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 748 { 749 vsw_t **vswpp, *vswp; 750 int instance; 751 752 instance = ddi_get_instance(dip); 753 vswp = ddi_get_soft_state(vsw_state, instance); 754 755 if (vswp == NULL) { 756 return (DDI_FAILURE); 757 } 758 759 switch (cmd) { 760 case DDI_DETACH: 761 break; 762 case DDI_SUSPEND: 763 case DDI_PM_SUSPEND: 764 default: 765 return (DDI_FAILURE); 766 } 767 768 D2(vswp, "detaching instance %d", instance); 769 770 if (vsw_unattach(vswp) != 0) { 771 return (DDI_FAILURE); 772 } 773 774 ddi_remove_minor_node(dip, NULL); 775 776 WRITE_ENTER(&vsw_rw); 777 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 778 if (*vswpp == vswp) { 779 *vswpp = vswp->next; 780 break; 781 } 782 } 783 RW_EXIT(&vsw_rw); 784 785 ddi_soft_state_free(vsw_state, instance); 786 787 return (DDI_SUCCESS); 788 } 789 790 /* 791 * Common routine to handle vsw_attach() failure and vsw_detach(). Note that 792 * the only reason this function could fail is if mac_unregister() fails. 793 * Otherwise, this function must ensure that all resources are freed and return 794 * success. 795 */ 796 static int 797 vsw_unattach(vsw_t *vswp) 798 { 799 vsw_attach_progress_t progress; 800 801 progress = vswp->attach_progress; 802 803 /* 804 * Unregister from the gldv3 subsystem. This can fail, in particular 805 * if there are still any open references to this mac device; in which 806 * case we just return failure without continuing to detach further. 807 */ 808 if (progress & PROG_macreg) { 809 if (vsw_mac_unregister(vswp) != 0) { 810 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 811 "MAC layer", vswp->instance); 812 return (1); 813 } 814 progress &= ~PROG_macreg; 815 } 816 817 /* 818 * Now that we have unregistered from gldv3, we must finish all other 819 * steps and successfully return from this function; otherwise we will 820 * end up leaving the device in a broken/unusable state. 821 * 822 * If we have registered with mdeg, unregister now to stop further 823 * callbacks to this vsw device and/or its ports. Then, detach any 824 * existing ports. 825 */ 826 if (progress & PROG_mdreg) { 827 vsw_mdeg_unregister(vswp); 828 vsw_detach_ports(vswp); 829 progress &= ~PROG_mdreg; 830 } 831 832 /* 833 * If we have started a thread to setup the switching mode, stop it, if 834 * it is still running. If it has finished setting up the switching 835 * mode, then we need to clean up some additional things if we are 836 * running in L2 mode: first free up any hybrid resources; then stop 837 * and close the underlying physical device. Note that we would have 838 * already released all per mac_client resources (ucast, mcast addrs, 839 * hio-shares etc) as all the ports are detached and if the vsw device 840 * itself was in use as an interface, it has been unplumbed (otherwise 841 * mac_unregister() above would fail). 842 */ 843 if (progress & PROG_swmode) { 844 845 vsw_setup_switching_stop(vswp); 846 847 if (vswp->hio_capable == B_TRUE) { 848 vsw_hio_cleanup(vswp); 849 vswp->hio_capable = B_FALSE; 850 } 851 852 mutex_enter(&vswp->mac_lock); 853 vsw_mac_close(vswp); 854 mutex_exit(&vswp->mac_lock); 855 856 progress &= ~PROG_swmode; 857 } 858 859 /* 860 * We now destroy the taskq used to clean up rx mblk pools that 861 * couldn't be destroyed when the ports/channels were detached. 862 * We implicitly wait for those tasks to complete in 863 * ddi_taskq_destroy(). 864 */ 865 if (progress & PROG_rxp_taskq) { 866 ddi_taskq_destroy(vswp->rxp_taskq); 867 progress &= ~PROG_rxp_taskq; 868 } 869 870 /* 871 * By now any pending tasks have finished and the underlying 872 * ldc's have been destroyed, so its safe to delete the control 873 * message taskq. 874 */ 875 if (progress & PROG_taskq) { 876 ddi_taskq_destroy(vswp->taskq_p); 877 progress &= ~PROG_taskq; 878 } 879 880 /* Destroy the multicast hash table */ 881 if (progress & PROG_mfdb) { 882 mod_hash_destroy_hash(vswp->mfdb); 883 progress &= ~PROG_mfdb; 884 } 885 886 /* Destroy the vlan hash table and fdb */ 887 if (progress & PROG_fdb) { 888 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 889 mod_hash_destroy_hash(vswp->fdb_hashp); 890 progress &= ~PROG_fdb; 891 } 892 893 if (progress & PROG_readmd) { 894 if (VSW_PRI_ETH_DEFINED(vswp)) { 895 kmem_free(vswp->pri_types, 896 sizeof (uint16_t) * vswp->pri_num_types); 897 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 898 } 899 progress &= ~PROG_readmd; 900 } 901 902 if (progress & PROG_locks) { 903 rw_destroy(&vswp->plist.lockrw); 904 rw_destroy(&vswp->mfdbrw); 905 rw_destroy(&vswp->if_lockrw); 906 rw_destroy(&vswp->maccl_rwlock); 907 cv_destroy(&vswp->sw_thr_cv); 908 mutex_destroy(&vswp->sw_thr_lock); 909 mutex_destroy(&vswp->mca_lock); 910 mutex_destroy(&vswp->mac_lock); 911 progress &= ~PROG_locks; 912 } 913 914 vswp->attach_progress = progress; 915 916 return (0); 917 } 918 919 void 920 vsw_destroy_rxpools(void *arg) 921 { 922 vio_mblk_pool_t *poolp = (vio_mblk_pool_t *)arg; 923 vio_mblk_pool_t *npoolp; 924 925 while (poolp != NULL) { 926 npoolp = poolp->nextp; 927 while (vio_destroy_mblks(poolp) != 0) { 928 delay(drv_usectohz(vsw_rxpool_cleanup_delay)); 929 } 930 poolp = npoolp; 931 } 932 } 933 934 /* 935 * Get the value of the "vsw-phys-dev" property in the specified 936 * node. This property is the name of the physical device that 937 * the virtual switch will use to talk to the outside world. 938 * 939 * Note it is valid for this property to be NULL (but the property 940 * itself must exist). Callers of this routine should verify that 941 * the value returned is what they expected (i.e. either NULL or non NULL). 942 * 943 * On success returns value of the property in region pointed to by 944 * the 'name' argument, and with return value of 0. Otherwise returns 1. 945 */ 946 static int 947 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 948 { 949 int len = 0; 950 int instance; 951 char *physname = NULL; 952 char *dev; 953 const char *dev_name; 954 char myname[MAXNAMELEN]; 955 956 dev_name = ddi_driver_name(vswp->dip); 957 instance = ddi_get_instance(vswp->dip); 958 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 959 960 if (md_get_prop_data(mdp, node, physdev_propname, 961 (uint8_t **)(&physname), &len) != 0) { 962 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 963 "device(s) from MD", vswp->instance); 964 return (1); 965 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 966 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 967 vswp->instance, physname); 968 return (1); 969 } else if (strcmp(myname, physname) == 0) { 970 /* 971 * Prevent the vswitch from opening itself as the 972 * network device. 973 */ 974 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 975 vswp->instance, physname); 976 return (1); 977 } else { 978 (void) strncpy(name, physname, strlen(physname) + 1); 979 D2(vswp, "%s: using first device specified (%s)", 980 __func__, physname); 981 } 982 983 #ifdef DEBUG 984 /* 985 * As a temporary measure to aid testing we check to see if there 986 * is a vsw.conf file present. If there is we use the value of the 987 * vsw_physname property in the file as the name of the physical 988 * device, overriding the value from the MD. 989 * 990 * There may be multiple devices listed, but for the moment 991 * we just use the first one. 992 */ 993 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 994 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 995 if ((strlen(dev) + 1) > LIFNAMSIZ) { 996 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 997 vswp->instance, dev); 998 ddi_prop_free(dev); 999 return (1); 1000 } else { 1001 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 1002 "config file", vswp->instance, dev); 1003 1004 (void) strncpy(name, dev, strlen(dev) + 1); 1005 } 1006 1007 ddi_prop_free(dev); 1008 } 1009 #endif 1010 1011 return (0); 1012 } 1013 1014 /* 1015 * Read the 'vsw-switch-mode' property from the specified MD node. 1016 * 1017 * Returns 0 on success, otherwise returns 1. 1018 */ 1019 static int 1020 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode) 1021 { 1022 int len = 0; 1023 char *smode = NULL; 1024 char *curr_mode = NULL; 1025 1026 D1(vswp, "%s: enter", __func__); 1027 1028 /* 1029 * Get the switch-mode property. The modes are listed in 1030 * decreasing order of preference, i.e. prefered mode is 1031 * first item in list. 1032 */ 1033 len = 0; 1034 if (md_get_prop_data(mdp, node, smode_propname, 1035 (uint8_t **)(&smode), &len) != 0) { 1036 /* 1037 * Unable to get switch-mode property from MD, nothing 1038 * more we can do. 1039 */ 1040 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 1041 " from the MD", vswp->instance); 1042 return (1); 1043 } 1044 1045 curr_mode = smode; 1046 /* 1047 * Modes of operation: 1048 * 'switched' - layer 2 switching, underlying HW in 1049 * programmed mode. 1050 * 'promiscuous' - layer 2 switching, underlying HW in 1051 * promiscuous mode. 1052 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 1053 * in non-promiscuous mode. 1054 */ 1055 while (curr_mode < (smode + len)) { 1056 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 1057 if (strcmp(curr_mode, "switched") == 0) { 1058 *mode = VSW_LAYER2; 1059 } else if (strcmp(curr_mode, "promiscuous") == 0) { 1060 *mode = VSW_LAYER2 | VSW_LAYER2_PROMISC; 1061 } else if (strcmp(curr_mode, "routed") == 0) { 1062 *mode = VSW_LAYER3; 1063 } else { 1064 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 1065 "setting to default switched mode", 1066 vswp->instance, curr_mode); 1067 *mode = VSW_LAYER2; 1068 } 1069 curr_mode += strlen(curr_mode) + 1; 1070 } 1071 1072 D2(vswp, "%s: %d mode", __func__, *mode); 1073 1074 D1(vswp, "%s: exit", __func__); 1075 1076 return (0); 1077 } 1078 1079 /* 1080 * Register with the MAC layer as a network device, so we 1081 * can be plumbed if necessary. 1082 */ 1083 static int 1084 vsw_mac_register(vsw_t *vswp) 1085 { 1086 mac_register_t *macp; 1087 int rv; 1088 1089 D1(vswp, "%s: enter", __func__); 1090 1091 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1092 return (EINVAL); 1093 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1094 macp->m_driver = vswp; 1095 macp->m_dip = vswp->dip; 1096 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1097 macp->m_callbacks = &vsw_m_callbacks; 1098 macp->m_min_sdu = 0; 1099 macp->m_max_sdu = vswp->mtu; 1100 macp->m_margin = VLAN_TAGSZ; 1101 rv = mac_register(macp, &vswp->if_mh); 1102 mac_free(macp); 1103 if (rv != 0) { 1104 /* 1105 * Treat this as a non-fatal error as we may be 1106 * able to operate in some other mode. 1107 */ 1108 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1109 "a provider with MAC layer", vswp->instance); 1110 return (rv); 1111 } 1112 1113 vswp->if_state |= VSW_IF_REG; 1114 1115 D1(vswp, "%s: exit", __func__); 1116 1117 return (rv); 1118 } 1119 1120 static int 1121 vsw_mac_unregister(vsw_t *vswp) 1122 { 1123 int rv = 0; 1124 1125 D1(vswp, "%s: enter", __func__); 1126 1127 WRITE_ENTER(&vswp->if_lockrw); 1128 1129 if (vswp->if_state & VSW_IF_REG) { 1130 rv = mac_unregister(vswp->if_mh); 1131 if (rv != 0) { 1132 DWARN(vswp, "%s: unable to unregister from MAC " 1133 "framework", __func__); 1134 1135 RW_EXIT(&vswp->if_lockrw); 1136 D1(vswp, "%s: fail exit", __func__); 1137 return (rv); 1138 } 1139 1140 /* mark i/f as down and unregistered */ 1141 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1142 } 1143 RW_EXIT(&vswp->if_lockrw); 1144 1145 D1(vswp, "%s: exit", __func__); 1146 1147 return (rv); 1148 } 1149 1150 static int 1151 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1152 { 1153 vsw_t *vswp = (vsw_t *)arg; 1154 1155 D1(vswp, "%s: enter", __func__); 1156 1157 mutex_enter(&vswp->mac_lock); 1158 if (vswp->mh == NULL) { 1159 mutex_exit(&vswp->mac_lock); 1160 return (EINVAL); 1161 } 1162 1163 /* return stats from underlying device */ 1164 *val = mac_stat_get(vswp->mh, stat); 1165 1166 mutex_exit(&vswp->mac_lock); 1167 1168 return (0); 1169 } 1170 1171 static void 1172 vsw_m_stop(void *arg) 1173 { 1174 vsw_t *vswp = (vsw_t *)arg; 1175 1176 D1(vswp, "%s: enter", __func__); 1177 1178 WRITE_ENTER(&vswp->if_lockrw); 1179 vswp->if_state &= ~VSW_IF_UP; 1180 RW_EXIT(&vswp->if_lockrw); 1181 1182 /* Cleanup and close the mac client */ 1183 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); 1184 1185 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1186 } 1187 1188 static int 1189 vsw_m_start(void *arg) 1190 { 1191 int rv; 1192 vsw_t *vswp = (vsw_t *)arg; 1193 1194 D1(vswp, "%s: enter", __func__); 1195 1196 WRITE_ENTER(&vswp->if_lockrw); 1197 1198 vswp->if_state |= VSW_IF_UP; 1199 1200 if (vswp->switching_setup_done == B_FALSE) { 1201 /* 1202 * If the switching mode has not been setup yet, just 1203 * return. The unicast address will be programmed 1204 * after the physical device is successfully setup by the 1205 * timeout handler. 1206 */ 1207 RW_EXIT(&vswp->if_lockrw); 1208 return (0); 1209 } 1210 1211 /* if in layer2 mode, program unicast address. */ 1212 if (vswp->mh != NULL) { 1213 /* Init a mac client and program addresses */ 1214 rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV); 1215 if (rv != 0) { 1216 cmn_err(CE_NOTE, 1217 "!vsw%d: failed to program interface " 1218 "unicast address\n", vswp->instance); 1219 } 1220 } 1221 1222 RW_EXIT(&vswp->if_lockrw); 1223 1224 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1225 return (0); 1226 } 1227 1228 /* 1229 * Change the local interface address. 1230 * 1231 * Note: we don't support this entry point. The local 1232 * mac address of the switch can only be changed via its 1233 * MD node properties. 1234 */ 1235 static int 1236 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1237 { 1238 _NOTE(ARGUNUSED(arg, macaddr)) 1239 1240 return (DDI_FAILURE); 1241 } 1242 1243 static int 1244 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1245 { 1246 vsw_t *vswp = (vsw_t *)arg; 1247 mcst_addr_t *mcst_p = NULL; 1248 uint64_t addr = 0x0; 1249 int i, ret = 0; 1250 1251 D1(vswp, "%s: enter", __func__); 1252 1253 /* 1254 * Convert address into form that can be used 1255 * as hash table key. 1256 */ 1257 for (i = 0; i < ETHERADDRL; i++) { 1258 addr = (addr << 8) | mca[i]; 1259 } 1260 1261 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1262 1263 if (add) { 1264 D2(vswp, "%s: adding multicast", __func__); 1265 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1266 /* 1267 * Update the list of multicast addresses 1268 * contained within the vsw_t structure to 1269 * include this new one. 1270 */ 1271 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1272 if (mcst_p == NULL) { 1273 DERR(vswp, "%s unable to alloc mem", __func__); 1274 (void) vsw_del_mcst(vswp, 1275 VSW_LOCALDEV, addr, NULL); 1276 return (1); 1277 } 1278 mcst_p->addr = addr; 1279 ether_copy(mca, &mcst_p->mca); 1280 1281 /* 1282 * Call into the underlying driver to program the 1283 * address into HW. 1284 */ 1285 ret = vsw_mac_multicast_add(vswp, NULL, mcst_p, 1286 VSW_LOCALDEV); 1287 if (ret != 0) { 1288 (void) vsw_del_mcst(vswp, 1289 VSW_LOCALDEV, addr, NULL); 1290 kmem_free(mcst_p, sizeof (*mcst_p)); 1291 return (ret); 1292 } 1293 1294 mutex_enter(&vswp->mca_lock); 1295 mcst_p->nextp = vswp->mcap; 1296 vswp->mcap = mcst_p; 1297 mutex_exit(&vswp->mca_lock); 1298 } else { 1299 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 1300 "address", vswp->instance); 1301 } 1302 return (ret); 1303 } 1304 1305 D2(vswp, "%s: removing multicast", __func__); 1306 /* 1307 * Remove the address from the hash table.. 1308 */ 1309 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1310 1311 /* 1312 * ..and then from the list maintained in the 1313 * vsw_t structure. 1314 */ 1315 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1316 ASSERT(mcst_p != NULL); 1317 1318 vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV); 1319 kmem_free(mcst_p, sizeof (*mcst_p)); 1320 } 1321 1322 D1(vswp, "%s: exit", __func__); 1323 1324 return (0); 1325 } 1326 1327 static int 1328 vsw_m_promisc(void *arg, boolean_t on) 1329 { 1330 vsw_t *vswp = (vsw_t *)arg; 1331 1332 D1(vswp, "%s: enter", __func__); 1333 1334 WRITE_ENTER(&vswp->if_lockrw); 1335 if (on) 1336 vswp->if_state |= VSW_IF_PROMISC; 1337 else 1338 vswp->if_state &= ~VSW_IF_PROMISC; 1339 RW_EXIT(&vswp->if_lockrw); 1340 1341 D1(vswp, "%s: exit", __func__); 1342 1343 return (0); 1344 } 1345 1346 static mblk_t * 1347 vsw_m_tx(void *arg, mblk_t *mp) 1348 { 1349 vsw_t *vswp = (vsw_t *)arg; 1350 1351 D1(vswp, "%s: enter", __func__); 1352 1353 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1354 1355 if (mp == NULL) { 1356 return (NULL); 1357 } 1358 1359 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1360 1361 D1(vswp, "%s: exit", __func__); 1362 1363 return (NULL); 1364 } 1365 1366 /* 1367 * Register for machine description (MD) updates. 1368 * 1369 * Returns 0 on success, 1 on failure. 1370 */ 1371 static int 1372 vsw_mdeg_register(vsw_t *vswp) 1373 { 1374 mdeg_prop_spec_t *pspecp; 1375 mdeg_node_spec_t *inst_specp; 1376 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1377 size_t templatesz; 1378 int rv; 1379 1380 D1(vswp, "%s: enter", __func__); 1381 1382 /* 1383 * Allocate and initialize a per-instance copy 1384 * of the global property spec array that will 1385 * uniquely identify this vsw instance. 1386 */ 1387 templatesz = sizeof (vsw_prop_template); 1388 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1389 1390 bcopy(vsw_prop_template, pspecp, templatesz); 1391 1392 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1393 1394 /* initialize the complete prop spec structure */ 1395 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1396 inst_specp->namep = "virtual-device"; 1397 inst_specp->specp = pspecp; 1398 1399 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1400 vswp->regprop); 1401 /* 1402 * Register an interest in 'virtual-device' nodes with a 1403 * 'name' property of 'virtual-network-switch' 1404 */ 1405 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1406 (void *)vswp, &mdeg_hdl); 1407 if (rv != MDEG_SUCCESS) { 1408 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1409 __func__, rv); 1410 goto mdeg_reg_fail; 1411 } 1412 1413 /* 1414 * Register an interest in 'vsw-port' nodes. 1415 */ 1416 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1417 (void *)vswp, &mdeg_port_hdl); 1418 if (rv != MDEG_SUCCESS) { 1419 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1420 (void) mdeg_unregister(mdeg_hdl); 1421 goto mdeg_reg_fail; 1422 } 1423 1424 /* save off data that will be needed later */ 1425 vswp->inst_spec = inst_specp; 1426 vswp->mdeg_hdl = mdeg_hdl; 1427 vswp->mdeg_port_hdl = mdeg_port_hdl; 1428 1429 D1(vswp, "%s: exit", __func__); 1430 return (0); 1431 1432 mdeg_reg_fail: 1433 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1434 vswp->instance); 1435 kmem_free(pspecp, templatesz); 1436 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1437 1438 vswp->mdeg_hdl = 0; 1439 vswp->mdeg_port_hdl = 0; 1440 1441 return (1); 1442 } 1443 1444 static void 1445 vsw_mdeg_unregister(vsw_t *vswp) 1446 { 1447 D1(vswp, "vsw_mdeg_unregister: enter"); 1448 1449 if (vswp->mdeg_hdl != 0) 1450 (void) mdeg_unregister(vswp->mdeg_hdl); 1451 1452 if (vswp->mdeg_port_hdl != 0) 1453 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1454 1455 if (vswp->inst_spec != NULL) { 1456 if (vswp->inst_spec->specp != NULL) { 1457 (void) kmem_free(vswp->inst_spec->specp, 1458 sizeof (vsw_prop_template)); 1459 vswp->inst_spec->specp = NULL; 1460 } 1461 1462 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1463 vswp->inst_spec = NULL; 1464 } 1465 1466 D1(vswp, "vsw_mdeg_unregister: exit"); 1467 } 1468 1469 /* 1470 * Mdeg callback invoked for the vsw node itself. 1471 */ 1472 static int 1473 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1474 { 1475 vsw_t *vswp; 1476 md_t *mdp; 1477 mde_cookie_t node; 1478 uint64_t inst; 1479 char *node_name = NULL; 1480 1481 if (resp == NULL) 1482 return (MDEG_FAILURE); 1483 1484 vswp = (vsw_t *)cb_argp; 1485 1486 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1487 " : prev matched %d", __func__, resp->added.nelem, 1488 resp->removed.nelem, resp->match_curr.nelem, 1489 resp->match_prev.nelem); 1490 1491 /* 1492 * We get an initial callback for this node as 'added' 1493 * after registering with mdeg. Note that we would have 1494 * already gathered information about this vsw node by 1495 * walking MD earlier during attach (in vsw_read_mdprops()). 1496 * So, there is a window where the properties of this 1497 * node might have changed when we get this initial 'added' 1498 * callback. We handle this as if an update occured 1499 * and invoke the same function which handles updates to 1500 * the properties of this vsw-node if any. 1501 * 1502 * A non-zero 'match' value indicates that the MD has been 1503 * updated and that a virtual-network-switch node is 1504 * present which may or may not have been updated. It is 1505 * up to the clients to examine their own nodes and 1506 * determine if they have changed. 1507 */ 1508 if (resp->added.nelem != 0) { 1509 1510 if (resp->added.nelem != 1) { 1511 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1512 "invalid: %d\n", vswp->instance, resp->added.nelem); 1513 return (MDEG_FAILURE); 1514 } 1515 1516 mdp = resp->added.mdp; 1517 node = resp->added.mdep[0]; 1518 1519 } else if (resp->match_curr.nelem != 0) { 1520 1521 if (resp->match_curr.nelem != 1) { 1522 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1523 "invalid: %d\n", vswp->instance, 1524 resp->match_curr.nelem); 1525 return (MDEG_FAILURE); 1526 } 1527 1528 mdp = resp->match_curr.mdp; 1529 node = resp->match_curr.mdep[0]; 1530 1531 } else { 1532 return (MDEG_FAILURE); 1533 } 1534 1535 /* Validate name and instance */ 1536 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1537 DERR(vswp, "%s: unable to get node name\n", __func__); 1538 return (MDEG_FAILURE); 1539 } 1540 1541 /* is this a virtual-network-switch? */ 1542 if (strcmp(node_name, vsw_propname) != 0) { 1543 DERR(vswp, "%s: Invalid node name: %s\n", 1544 __func__, node_name); 1545 return (MDEG_FAILURE); 1546 } 1547 1548 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1549 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1550 __func__); 1551 return (MDEG_FAILURE); 1552 } 1553 1554 /* is this the right instance of vsw? */ 1555 if (inst != vswp->regprop) { 1556 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1557 __func__, inst); 1558 return (MDEG_FAILURE); 1559 } 1560 1561 vsw_update_md_prop(vswp, mdp, node); 1562 1563 return (MDEG_SUCCESS); 1564 } 1565 1566 /* 1567 * Mdeg callback invoked for changes to the vsw-port nodes 1568 * under the vsw node. 1569 */ 1570 static int 1571 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1572 { 1573 vsw_t *vswp; 1574 int idx; 1575 md_t *mdp; 1576 mde_cookie_t node; 1577 uint64_t inst; 1578 int rv; 1579 1580 if ((resp == NULL) || (cb_argp == NULL)) 1581 return (MDEG_FAILURE); 1582 1583 vswp = (vsw_t *)cb_argp; 1584 1585 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1586 " : prev matched %d", __func__, resp->added.nelem, 1587 resp->removed.nelem, resp->match_curr.nelem, 1588 resp->match_prev.nelem); 1589 1590 /* process added ports */ 1591 for (idx = 0; idx < resp->added.nelem; idx++) { 1592 mdp = resp->added.mdp; 1593 node = resp->added.mdep[idx]; 1594 1595 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1596 1597 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1598 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1599 "(0x%lx), err=%d", vswp->instance, node, rv); 1600 } 1601 } 1602 1603 /* process removed ports */ 1604 for (idx = 0; idx < resp->removed.nelem; idx++) { 1605 mdp = resp->removed.mdp; 1606 node = resp->removed.mdep[idx]; 1607 1608 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1609 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1610 __func__, id_propname, idx); 1611 continue; 1612 } 1613 1614 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1615 1616 if (vsw_port_detach(vswp, inst) != 0) { 1617 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1618 vswp->instance, inst); 1619 } 1620 } 1621 1622 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1623 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1624 resp->match_curr.mdep[idx], 1625 resp->match_prev.mdp, 1626 resp->match_prev.mdep[idx]); 1627 } 1628 1629 D1(vswp, "%s: exit", __func__); 1630 1631 return (MDEG_SUCCESS); 1632 } 1633 1634 /* 1635 * Scan the machine description for this instance of vsw 1636 * and read its properties. Called only from vsw_attach(). 1637 * Returns: 0 on success, 1 on failure. 1638 */ 1639 static int 1640 vsw_read_mdprops(vsw_t *vswp) 1641 { 1642 md_t *mdp = NULL; 1643 mde_cookie_t rootnode; 1644 mde_cookie_t *listp = NULL; 1645 uint64_t inst; 1646 uint64_t cfgh; 1647 char *name; 1648 int rv = 1; 1649 int num_nodes = 0; 1650 int num_devs = 0; 1651 int listsz = 0; 1652 int i; 1653 1654 /* 1655 * In each 'virtual-device' node in the MD there is a 1656 * 'cfg-handle' property which is the MD's concept of 1657 * an instance number (this may be completely different from 1658 * the device drivers instance #). OBP reads that value and 1659 * stores it in the 'reg' property of the appropriate node in 1660 * the device tree. We first read this reg property and use this 1661 * to compare against the 'cfg-handle' property of vsw nodes 1662 * in MD to get to this specific vsw instance and then read 1663 * other properties that we are interested in. 1664 * We also cache the value of 'reg' property and use it later 1665 * to register callbacks with mdeg (see vsw_mdeg_register()) 1666 */ 1667 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1668 DDI_PROP_DONTPASS, reg_propname, -1); 1669 if (inst == -1) { 1670 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1671 "OBP device tree", vswp->instance, reg_propname); 1672 return (rv); 1673 } 1674 1675 vswp->regprop = inst; 1676 1677 if ((mdp = md_get_handle()) == NULL) { 1678 DWARN(vswp, "%s: cannot init MD\n", __func__); 1679 return (rv); 1680 } 1681 1682 num_nodes = md_node_count(mdp); 1683 ASSERT(num_nodes > 0); 1684 1685 listsz = num_nodes * sizeof (mde_cookie_t); 1686 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1687 1688 rootnode = md_root_node(mdp); 1689 1690 /* search for all "virtual_device" nodes */ 1691 num_devs = md_scan_dag(mdp, rootnode, 1692 md_find_name(mdp, vdev_propname), 1693 md_find_name(mdp, "fwd"), listp); 1694 if (num_devs <= 0) { 1695 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1696 goto vsw_readmd_exit; 1697 } 1698 1699 /* 1700 * Now loop through the list of virtual-devices looking for 1701 * devices with name "virtual-network-switch" and for each 1702 * such device compare its instance with what we have from 1703 * the 'reg' property to find the right node in MD and then 1704 * read all its properties. 1705 */ 1706 for (i = 0; i < num_devs; i++) { 1707 1708 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1709 DWARN(vswp, "%s: name property not found\n", 1710 __func__); 1711 goto vsw_readmd_exit; 1712 } 1713 1714 /* is this a virtual-network-switch? */ 1715 if (strcmp(name, vsw_propname) != 0) 1716 continue; 1717 1718 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1719 DWARN(vswp, "%s: cfg-handle property not found\n", 1720 __func__); 1721 goto vsw_readmd_exit; 1722 } 1723 1724 /* is this the required instance of vsw? */ 1725 if (inst != cfgh) 1726 continue; 1727 1728 /* now read all properties of this vsw instance */ 1729 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1730 break; 1731 } 1732 1733 vsw_readmd_exit: 1734 1735 kmem_free(listp, listsz); 1736 (void) md_fini_handle(mdp); 1737 return (rv); 1738 } 1739 1740 /* 1741 * Read the initial start-of-day values from the specified MD node. 1742 */ 1743 static int 1744 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1745 { 1746 uint64_t macaddr = 0; 1747 1748 D1(vswp, "%s: enter", __func__); 1749 1750 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1751 return (1); 1752 } 1753 1754 /* mac address for vswitch device itself */ 1755 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1756 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1757 vswp->instance); 1758 return (1); 1759 } 1760 1761 vsw_save_lmacaddr(vswp, macaddr); 1762 1763 if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) { 1764 DWARN(vswp, "%s: Unable to read %s property from MD, " 1765 "defaulting to 'switched' mode", 1766 __func__, smode_propname); 1767 1768 vswp->smode = VSW_LAYER2; 1769 } 1770 1771 /* 1772 * Read the 'linkprop' property to know if this 1773 * vsw device wants to get physical link updates. 1774 */ 1775 vsw_linkprop_read(vswp, mdp, node, &vswp->pls_update); 1776 1777 /* read mtu */ 1778 vsw_mtu_read(vswp, mdp, node, &vswp->mtu); 1779 if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) { 1780 vswp->mtu = ETHERMTU; 1781 } 1782 vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) + 1783 VLAN_TAGSZ; 1784 1785 /* read vlan id properties of this vsw instance */ 1786 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1787 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1788 1789 /* read priority-ether-types */ 1790 vsw_read_pri_eth_types(vswp, mdp, node); 1791 1792 /* read bandwidth property of this vsw instance */ 1793 vsw_bandwidth_read(vswp, mdp, node, &vswp->bandwidth); 1794 1795 D1(vswp, "%s: exit", __func__); 1796 return (0); 1797 } 1798 1799 /* 1800 * Read vlan id properties of the given MD node. 1801 * Arguments: 1802 * arg: device argument(vsw device or a port) 1803 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1804 * mdp: machine description 1805 * node: md node cookie 1806 * 1807 * Returns: 1808 * pvidp: port-vlan-id of the node 1809 * vidspp: list of vlan-ids of the node 1810 * nvidsp: # of vlan-ids in the list 1811 * default_idp: default-vlan-id of the node(if node is vsw device) 1812 */ 1813 static void 1814 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1815 uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp, 1816 uint16_t *default_idp) 1817 { 1818 vsw_t *vswp; 1819 vsw_port_t *portp; 1820 char *pvid_propname; 1821 char *vid_propname; 1822 uint_t nvids = 0; 1823 uint32_t vids_size; 1824 int rv; 1825 int i; 1826 uint64_t *data; 1827 uint64_t val; 1828 int size; 1829 int inst; 1830 1831 if (type == VSW_LOCALDEV) { 1832 1833 vswp = (vsw_t *)arg; 1834 pvid_propname = vsw_pvid_propname; 1835 vid_propname = vsw_vid_propname; 1836 inst = vswp->instance; 1837 1838 } else if (type == VSW_VNETPORT) { 1839 1840 portp = (vsw_port_t *)arg; 1841 vswp = portp->p_vswp; 1842 pvid_propname = port_pvid_propname; 1843 vid_propname = port_vid_propname; 1844 inst = portp->p_instance; 1845 1846 } else { 1847 return; 1848 } 1849 1850 if (type == VSW_LOCALDEV && default_idp != NULL) { 1851 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1852 if (rv != 0) { 1853 DWARN(vswp, "%s: prop(%s) not found", __func__, 1854 vsw_dvid_propname); 1855 1856 *default_idp = vsw_default_vlan_id; 1857 } else { 1858 *default_idp = val & 0xFFF; 1859 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1860 vsw_dvid_propname, inst, *default_idp); 1861 } 1862 } 1863 1864 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1865 if (rv != 0) { 1866 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1867 *pvidp = vsw_default_vlan_id; 1868 } else { 1869 1870 *pvidp = val & 0xFFF; 1871 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1872 pvid_propname, inst, *pvidp); 1873 } 1874 1875 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1876 &size); 1877 if (rv != 0) { 1878 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1879 size = 0; 1880 } else { 1881 size /= sizeof (uint64_t); 1882 } 1883 nvids = size; 1884 1885 if (nvids != 0) { 1886 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1887 vids_size = sizeof (vsw_vlanid_t) * nvids; 1888 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1889 for (i = 0; i < nvids; i++) { 1890 (*vidspp)[i].vl_vid = data[i] & 0xFFFF; 1891 (*vidspp)[i].vl_set = B_FALSE; 1892 D2(vswp, " %d ", (*vidspp)[i].vl_vid); 1893 } 1894 D2(vswp, "\n"); 1895 } 1896 1897 *nvidsp = nvids; 1898 } 1899 1900 static void 1901 vsw_port_read_bandwidth(vsw_port_t *portp, md_t *mdp, mde_cookie_t node, 1902 uint64_t *bw) 1903 { 1904 int rv; 1905 uint64_t val; 1906 vsw_t *vswp; 1907 1908 vswp = portp->p_vswp; 1909 1910 rv = md_get_prop_val(mdp, node, port_maxbw_propname, &val); 1911 1912 if (rv != 0) { 1913 *bw = 0; 1914 D3(vswp, "%s: prop(%s) not found\n", __func__, 1915 port_maxbw_propname); 1916 } else { 1917 *bw = val; 1918 D3(vswp, "%s: %s nodes found", __func__, port_maxbw_propname); 1919 } 1920 } 1921 1922 /* 1923 * This function reads "priority-ether-types" property from md. This property 1924 * is used to enable support for priority frames. Applications which need 1925 * guaranteed and timely delivery of certain high priority frames to/from 1926 * a vnet or vsw within ldoms, should configure this property by providing 1927 * the ether type(s) for which the priority facility is needed. 1928 * Normal data frames are delivered over a ldc channel using the descriptor 1929 * ring mechanism which is constrained by factors such as descriptor ring size, 1930 * the rate at which the ring is processed at the peer ldc end point, etc. 1931 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1932 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1933 * descriptor ring path and enables a more reliable and timely delivery of 1934 * frames to the peer. 1935 */ 1936 static void 1937 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1938 { 1939 int rv; 1940 uint16_t *types; 1941 uint64_t *data; 1942 int size; 1943 int i; 1944 size_t mblk_sz; 1945 1946 rv = md_get_prop_data(mdp, node, pri_types_propname, 1947 (uint8_t **)&data, &size); 1948 if (rv != 0) { 1949 /* 1950 * Property may not exist if we are running pre-ldoms1.1 f/w. 1951 * Check if 'vsw_pri_eth_type' has been set in that case. 1952 */ 1953 if (vsw_pri_eth_type != 0) { 1954 size = sizeof (vsw_pri_eth_type); 1955 data = &vsw_pri_eth_type; 1956 } else { 1957 D3(vswp, "%s: prop(%s) not found", __func__, 1958 pri_types_propname); 1959 size = 0; 1960 } 1961 } 1962 1963 if (size == 0) { 1964 vswp->pri_num_types = 0; 1965 return; 1966 } 1967 1968 /* 1969 * we have some priority-ether-types defined; 1970 * allocate a table of these types and also 1971 * allocate a pool of mblks to transmit these 1972 * priority packets. 1973 */ 1974 size /= sizeof (uint64_t); 1975 vswp->pri_num_types = size; 1976 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1977 for (i = 0, types = vswp->pri_types; i < size; i++) { 1978 types[i] = data[i] & 0xFFFF; 1979 } 1980 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1981 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, NULL, 1982 &vswp->pri_tx_vmp); 1983 } 1984 1985 static void 1986 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) 1987 { 1988 int rv; 1989 int inst; 1990 uint64_t val; 1991 char *mtu_propname; 1992 1993 mtu_propname = vsw_mtu_propname; 1994 inst = vswp->instance; 1995 1996 rv = md_get_prop_val(mdp, node, mtu_propname, &val); 1997 if (rv != 0) { 1998 D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname); 1999 *mtu = vsw_ethermtu; 2000 } else { 2001 2002 *mtu = val & 0xFFFF; 2003 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 2004 mtu_propname, inst, *mtu); 2005 } 2006 } 2007 2008 /* 2009 * Update the mtu of the vsw device. We first check if the device has been 2010 * plumbed and if so fail the mtu update. Otherwise, we continue to update the 2011 * new mtu and reset all ports to initiate handshake re-negotiation with peers 2012 * using the new mtu. 2013 */ 2014 static int 2015 vsw_mtu_update(vsw_t *vswp, uint32_t mtu) 2016 { 2017 int rv; 2018 2019 WRITE_ENTER(&vswp->if_lockrw); 2020 2021 if (vswp->if_state & VSW_IF_UP) { 2022 2023 RW_EXIT(&vswp->if_lockrw); 2024 2025 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2026 " as the device is plumbed\n", vswp->instance); 2027 return (EBUSY); 2028 2029 } else { 2030 2031 D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n", 2032 __func__, vswp->mtu, mtu); 2033 2034 vswp->mtu = mtu; 2035 vswp->max_frame_size = vswp->mtu + 2036 sizeof (struct ether_header) + VLAN_TAGSZ; 2037 2038 rv = mac_maxsdu_update(vswp->if_mh, mtu); 2039 if (rv != 0) { 2040 cmn_err(CE_NOTE, 2041 "!vsw%d: Unable to update mtu with mac" 2042 " layer\n", vswp->instance); 2043 } 2044 2045 RW_EXIT(&vswp->if_lockrw); 2046 2047 /* Reset ports to renegotiate with the new mtu */ 2048 vsw_reset_ports(vswp); 2049 2050 } 2051 2052 return (0); 2053 } 2054 2055 static void 2056 vsw_linkprop_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 2057 boolean_t *pls) 2058 { 2059 int rv; 2060 uint64_t val; 2061 char *linkpropname; 2062 2063 linkpropname = vsw_linkprop_propname; 2064 2065 rv = md_get_prop_val(mdp, node, linkpropname, &val); 2066 if (rv != 0) { 2067 D3(vswp, "%s: prop(%s) not found", __func__, linkpropname); 2068 *pls = B_FALSE; 2069 } else { 2070 2071 *pls = (val & 0x1) ? B_TRUE : B_FALSE; 2072 D2(vswp, "%s: %s(%d): (%d)\n", __func__, linkpropname, 2073 vswp->instance, *pls); 2074 } 2075 } 2076 2077 void 2078 vsw_mac_link_update(vsw_t *vswp, link_state_t link_state) 2079 { 2080 READ_ENTER(&vswp->if_lockrw); 2081 2082 if (vswp->if_state & VSW_IF_REG) { 2083 mac_link_update(vswp->if_mh, link_state); 2084 } 2085 2086 RW_EXIT(&vswp->if_lockrw); 2087 } 2088 2089 void 2090 vsw_physlink_state_update(vsw_t *vswp) 2091 { 2092 if (vswp->pls_update == B_TRUE) { 2093 vsw_mac_link_update(vswp, vswp->phys_link_state); 2094 } 2095 vsw_physlink_update_ports(vswp); 2096 } 2097 2098 static void 2099 vsw_bandwidth_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint64_t *bw) 2100 { 2101 /* read the vsw bandwidth from md */ 2102 int rv; 2103 uint64_t val; 2104 2105 rv = md_get_prop_val(mdp, node, vsw_maxbw_propname, &val); 2106 if (rv != 0) { 2107 *bw = 0; 2108 D3(vswp, "%s: prop(%s) not found", __func__, 2109 vsw_maxbw_propname); 2110 } else { 2111 *bw = val; 2112 D3(vswp, "%s: %s(%d): (%ld)\n", __func__, 2113 vsw_maxbw_propname, vswp->instance, *bw); 2114 } 2115 } 2116 2117 /* 2118 * Check to see if the relevant properties in the specified node have 2119 * changed, and if so take the appropriate action. 2120 * 2121 * If any of the properties are missing or invalid we don't take 2122 * any action, as this function should only be invoked when modifications 2123 * have been made to what we assume is a working configuration, which 2124 * we leave active. 2125 * 2126 * Note it is legal for this routine to be invoked even if none of the 2127 * properties in the port node within the MD have actually changed. 2128 */ 2129 static void 2130 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 2131 { 2132 char physname[LIFNAMSIZ]; 2133 char drv[LIFNAMSIZ]; 2134 uint_t ddi_instance; 2135 uint8_t new_smode; 2136 int i; 2137 uint64_t macaddr = 0; 2138 enum {MD_init = 0x1, 2139 MD_physname = 0x2, 2140 MD_macaddr = 0x4, 2141 MD_smode = 0x8, 2142 MD_vlans = 0x10, 2143 MD_mtu = 0x20, 2144 MD_pls = 0x40, 2145 MD_bw = 0x80} updated; 2146 int rv; 2147 uint16_t pvid; 2148 vsw_vlanid_t *vids; 2149 uint16_t nvids; 2150 uint32_t mtu; 2151 boolean_t pls_update; 2152 uint64_t maxbw; 2153 2154 updated = MD_init; 2155 2156 D1(vswp, "%s: enter", __func__); 2157 2158 /* 2159 * Check if name of physical device in MD has changed. 2160 */ 2161 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 2162 /* 2163 * Do basic sanity check on new device name/instance, 2164 * if its non NULL. It is valid for the device name to 2165 * have changed from a non NULL to a NULL value, i.e. 2166 * the vsw is being changed to 'routed' mode. 2167 */ 2168 if ((strlen(physname) != 0) && 2169 (ddi_parse(physname, drv, 2170 &ddi_instance) != DDI_SUCCESS)) { 2171 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 2172 " a valid device name/instance", 2173 vswp->instance, physname); 2174 goto fail_reconf; 2175 } 2176 2177 if (strcmp(physname, vswp->physname)) { 2178 D2(vswp, "%s: device name changed from %s to %s", 2179 __func__, vswp->physname, physname); 2180 2181 updated |= MD_physname; 2182 } else { 2183 D2(vswp, "%s: device name unchanged at %s", 2184 __func__, vswp->physname); 2185 } 2186 } else { 2187 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2188 "device from updated MD.", vswp->instance); 2189 goto fail_reconf; 2190 } 2191 2192 /* 2193 * Check if MAC address has changed. 2194 */ 2195 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2196 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2197 vswp->instance); 2198 goto fail_reconf; 2199 } else { 2200 uint64_t maddr = macaddr; 2201 READ_ENTER(&vswp->if_lockrw); 2202 for (i = ETHERADDRL - 1; i >= 0; i--) { 2203 if (vswp->if_addr.ether_addr_octet[i] 2204 != (macaddr & 0xFF)) { 2205 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 2206 __func__, i, 2207 vswp->if_addr.ether_addr_octet[i], 2208 (macaddr & 0xFF)); 2209 updated |= MD_macaddr; 2210 macaddr = maddr; 2211 break; 2212 } 2213 macaddr >>= 8; 2214 } 2215 RW_EXIT(&vswp->if_lockrw); 2216 if (updated & MD_macaddr) { 2217 vsw_save_lmacaddr(vswp, macaddr); 2218 } 2219 } 2220 2221 /* 2222 * Check if switching modes have changed. 2223 */ 2224 if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) { 2225 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2226 vswp->instance, smode_propname); 2227 goto fail_reconf; 2228 } else { 2229 if (new_smode != vswp->smode) { 2230 D2(vswp, "%s: switching mode changed from %d to %d", 2231 __func__, vswp->smode, new_smode); 2232 2233 updated |= MD_smode; 2234 } 2235 } 2236 2237 /* Read the vlan ids */ 2238 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2239 &nvids, NULL); 2240 2241 /* Determine if there are any vlan id updates */ 2242 if ((pvid != vswp->pvid) || /* pvid changed? */ 2243 (nvids != vswp->nvids) || /* # of vids changed? */ 2244 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2245 !vsw_cmp_vids(vids, vswp->vids, nvids))) { 2246 updated |= MD_vlans; 2247 } 2248 2249 /* Read mtu */ 2250 vsw_mtu_read(vswp, mdp, node, &mtu); 2251 if (mtu != vswp->mtu) { 2252 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { 2253 updated |= MD_mtu; 2254 } else { 2255 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2256 " as the specified value:%d is invalid\n", 2257 vswp->instance, mtu); 2258 } 2259 } 2260 2261 /* 2262 * Read the 'linkprop' property. 2263 */ 2264 vsw_linkprop_read(vswp, mdp, node, &pls_update); 2265 if (pls_update != vswp->pls_update) { 2266 updated |= MD_pls; 2267 } 2268 2269 /* Read bandwidth */ 2270 vsw_bandwidth_read(vswp, mdp, node, &maxbw); 2271 if (maxbw != vswp->bandwidth) { 2272 if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { 2273 updated |= MD_bw; 2274 } else { 2275 cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth" 2276 " update as the specified value:%ld is invalid\n", 2277 vswp->instance, maxbw); 2278 } 2279 } 2280 2281 /* 2282 * Now make any changes which are needed... 2283 */ 2284 if (updated & MD_pls) { 2285 2286 /* save the updated property. */ 2287 vswp->pls_update = pls_update; 2288 2289 if (pls_update == B_FALSE) { 2290 /* 2291 * Phys link state update is now disabled for this vsw 2292 * interface. If we had previously reported a link-down 2293 * to the stack, undo that by sending a link-up. 2294 */ 2295 if (vswp->phys_link_state == LINK_STATE_DOWN) { 2296 vsw_mac_link_update(vswp, LINK_STATE_UP); 2297 } 2298 } else { 2299 /* 2300 * Phys link state update is now enabled. Send up an 2301 * update based on the current phys link state. 2302 */ 2303 if (vswp->smode & VSW_LAYER2) { 2304 vsw_mac_link_update(vswp, 2305 vswp->phys_link_state); 2306 } 2307 } 2308 2309 } 2310 2311 if (updated & (MD_physname | MD_smode | MD_mtu)) { 2312 2313 /* 2314 * Stop any pending thread to setup switching mode. 2315 */ 2316 vsw_setup_switching_stop(vswp); 2317 2318 /* Cleanup HybridIO */ 2319 vsw_hio_cleanup(vswp); 2320 2321 /* 2322 * Remove unicst, mcst addrs of vsw interface 2323 * and ports from the physdev. This also closes 2324 * the corresponding mac clients. 2325 */ 2326 vsw_unset_addrs(vswp); 2327 2328 /* 2329 * Stop, detach and close the old device.. 2330 */ 2331 mutex_enter(&vswp->mac_lock); 2332 vsw_mac_close(vswp); 2333 mutex_exit(&vswp->mac_lock); 2334 2335 /* 2336 * Update phys name. 2337 */ 2338 if (updated & MD_physname) { 2339 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2340 vswp->instance, vswp->physname, physname); 2341 (void) strncpy(vswp->physname, 2342 physname, strlen(physname) + 1); 2343 } 2344 2345 /* 2346 * Update array with the new switch mode values. 2347 */ 2348 if (updated & MD_smode) { 2349 vswp->smode = new_smode; 2350 } 2351 2352 /* Update mtu */ 2353 if (updated & MD_mtu) { 2354 rv = vsw_mtu_update(vswp, mtu); 2355 if (rv != 0) { 2356 goto fail_update; 2357 } 2358 } 2359 2360 /* 2361 * ..and attach, start the new device. 2362 */ 2363 rv = vsw_setup_switching(vswp); 2364 if (rv == EAGAIN) { 2365 /* 2366 * Unable to setup switching mode. 2367 * As the error is EAGAIN, schedule a thread to retry 2368 * and return. Programming addresses of ports and 2369 * vsw interface will be done by the thread when the 2370 * switching setup completes successfully. 2371 */ 2372 if (vsw_setup_switching_start(vswp) != 0) { 2373 goto fail_update; 2374 } 2375 return; 2376 2377 } else if (rv) { 2378 goto fail_update; 2379 } 2380 2381 vsw_setup_switching_post_process(vswp); 2382 } else if (updated & MD_macaddr) { 2383 /* 2384 * We enter here if only MD_macaddr is exclusively updated. 2385 * If MD_physname and/or MD_smode are also updated, then 2386 * as part of that, we would have implicitly processed 2387 * MD_macaddr update (above). 2388 */ 2389 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2390 vswp->instance, macaddr); 2391 2392 READ_ENTER(&vswp->if_lockrw); 2393 if (vswp->if_state & VSW_IF_UP) { 2394 /* reconfigure with new address */ 2395 vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0); 2396 2397 /* 2398 * Notify the MAC layer of the changed address. 2399 */ 2400 mac_unicst_update(vswp->if_mh, 2401 (uint8_t *)&vswp->if_addr); 2402 2403 } 2404 RW_EXIT(&vswp->if_lockrw); 2405 2406 } 2407 2408 if (updated & MD_vlans) { 2409 /* Remove existing vlan ids from the hash table. */ 2410 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2411 2412 if (vswp->if_state & VSW_IF_UP) { 2413 vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids); 2414 } else { 2415 if (vswp->nvids != 0) { 2416 kmem_free(vswp->vids, 2417 sizeof (vsw_vlanid_t) * vswp->nvids); 2418 } 2419 vswp->vids = vids; 2420 vswp->nvids = nvids; 2421 vswp->pvid = pvid; 2422 } 2423 2424 /* add these new vlan ids into hash table */ 2425 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2426 } else { 2427 if (nvids != 0) { 2428 kmem_free(vids, sizeof (vsw_vlanid_t) * nvids); 2429 } 2430 } 2431 2432 if (updated & MD_bw) { 2433 vsw_update_bandwidth(vswp, NULL, VSW_LOCALDEV, maxbw); 2434 } 2435 2436 return; 2437 2438 fail_reconf: 2439 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2440 return; 2441 2442 fail_update: 2443 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2444 vswp->instance); 2445 } 2446 2447 /* 2448 * Read the port's md properties. 2449 */ 2450 static int 2451 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2452 md_t *mdp, mde_cookie_t *node) 2453 { 2454 uint64_t ldc_id; 2455 uint8_t *addrp; 2456 int i, addrsz; 2457 int num_nodes = 0, nchan = 0; 2458 int listsz = 0; 2459 mde_cookie_t *listp = NULL; 2460 struct ether_addr ea; 2461 uint64_t macaddr; 2462 uint64_t inst = 0; 2463 uint64_t val; 2464 2465 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2466 DWARN(vswp, "%s: prop(%s) not found", __func__, 2467 id_propname); 2468 return (1); 2469 } 2470 2471 /* 2472 * Find the channel endpoint node(s) (which should be under this 2473 * port node) which contain the channel id(s). 2474 */ 2475 if ((num_nodes = md_node_count(mdp)) <= 0) { 2476 DERR(vswp, "%s: invalid number of nodes found (%d)", 2477 __func__, num_nodes); 2478 return (1); 2479 } 2480 2481 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2482 2483 /* allocate enough space for node list */ 2484 listsz = num_nodes * sizeof (mde_cookie_t); 2485 listp = kmem_zalloc(listsz, KM_SLEEP); 2486 2487 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2488 md_find_name(mdp, "fwd"), listp); 2489 2490 if (nchan <= 0) { 2491 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2492 kmem_free(listp, listsz); 2493 return (1); 2494 } 2495 2496 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2497 2498 /* use property from first node found */ 2499 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2500 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2501 id_propname); 2502 kmem_free(listp, listsz); 2503 return (1); 2504 } 2505 2506 /* don't need list any more */ 2507 kmem_free(listp, listsz); 2508 2509 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2510 2511 /* read mac-address property */ 2512 if (md_get_prop_data(mdp, *node, remaddr_propname, 2513 &addrp, &addrsz)) { 2514 DWARN(vswp, "%s: prop(%s) not found", 2515 __func__, remaddr_propname); 2516 return (1); 2517 } 2518 2519 if (addrsz < ETHERADDRL) { 2520 DWARN(vswp, "%s: invalid address size", __func__); 2521 return (1); 2522 } 2523 2524 macaddr = *((uint64_t *)addrp); 2525 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2526 2527 for (i = ETHERADDRL - 1; i >= 0; i--) { 2528 ea.ether_addr_octet[i] = macaddr & 0xFF; 2529 macaddr >>= 8; 2530 } 2531 2532 /* now update all properties into the port */ 2533 portp->p_vswp = vswp; 2534 portp->p_instance = inst; 2535 portp->addr_set = B_FALSE; 2536 ether_copy(&ea, &portp->p_macaddr); 2537 if (nchan > VSW_PORT_MAX_LDCS) { 2538 D2(vswp, "%s: using first of %d ldc ids", 2539 __func__, nchan); 2540 nchan = VSW_PORT_MAX_LDCS; 2541 } 2542 portp->num_ldcs = nchan; 2543 portp->ldc_ids = 2544 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2545 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2546 2547 /* read vlan id properties of this port node */ 2548 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2549 &portp->vids, &portp->nvids, NULL); 2550 2551 /* Check if hybrid property is present */ 2552 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2553 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2554 portp->p_hio_enabled = B_TRUE; 2555 } else { 2556 portp->p_hio_enabled = B_FALSE; 2557 } 2558 /* 2559 * Port hio capability determined after version 2560 * negotiation, i.e., when we know the peer is HybridIO capable. 2561 */ 2562 portp->p_hio_capable = B_FALSE; 2563 2564 /* Read bandwidth of this port */ 2565 vsw_port_read_bandwidth(portp, mdp, *node, &portp->p_bandwidth); 2566 2567 return (0); 2568 } 2569 2570 /* 2571 * Add a new port to the system. 2572 * 2573 * Returns 0 on success, 1 on failure. 2574 */ 2575 int 2576 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2577 { 2578 vsw_port_t *portp; 2579 int rv; 2580 2581 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2582 2583 rv = vsw_port_read_props(portp, vswp, mdp, node); 2584 if (rv != 0) { 2585 kmem_free(portp, sizeof (*portp)); 2586 return (1); 2587 } 2588 2589 rv = vsw_port_attach(portp); 2590 if (rv != 0) { 2591 DERR(vswp, "%s: failed to attach port", __func__); 2592 return (1); 2593 } 2594 2595 return (0); 2596 } 2597 2598 static int 2599 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2600 md_t *prev_mdp, mde_cookie_t prev_mdex) 2601 { 2602 uint64_t cport_num; 2603 uint64_t pport_num; 2604 vsw_port_list_t *plistp; 2605 vsw_port_t *portp; 2606 uint16_t pvid; 2607 vsw_vlanid_t *vids; 2608 uint16_t nvids; 2609 uint64_t val; 2610 boolean_t hio_enabled = B_FALSE; 2611 uint64_t maxbw; 2612 enum {P_MD_init = 0x1, 2613 P_MD_vlans = 0x2, 2614 P_MD_hio = 0x4, 2615 P_MD_maxbw = 0x8} updated; 2616 2617 updated = P_MD_init; 2618 2619 /* 2620 * For now, we get port updates only if vlan ids changed. 2621 * We read the port num and do some sanity check. 2622 */ 2623 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2624 return (1); 2625 } 2626 2627 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2628 return (1); 2629 } 2630 if (cport_num != pport_num) 2631 return (1); 2632 2633 plistp = &(vswp->plist); 2634 2635 READ_ENTER(&plistp->lockrw); 2636 2637 portp = vsw_lookup_port(vswp, cport_num); 2638 if (portp == NULL) { 2639 RW_EXIT(&plistp->lockrw); 2640 return (1); 2641 } 2642 2643 /* Read the vlan ids */ 2644 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2645 &vids, &nvids, NULL); 2646 2647 /* Determine if there are any vlan id updates */ 2648 if ((pvid != portp->pvid) || /* pvid changed? */ 2649 (nvids != portp->nvids) || /* # of vids changed? */ 2650 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2651 !vsw_cmp_vids(vids, portp->vids, nvids))) { 2652 updated |= P_MD_vlans; 2653 } 2654 2655 /* Check if hybrid property is present */ 2656 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2657 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2658 hio_enabled = B_TRUE; 2659 } 2660 2661 if (portp->p_hio_enabled != hio_enabled) { 2662 updated |= P_MD_hio; 2663 } 2664 2665 /* Check if maxbw property is present */ 2666 vsw_port_read_bandwidth(portp, curr_mdp, curr_mdex, &maxbw); 2667 if (maxbw != portp->p_bandwidth) { 2668 if (maxbw >= MRP_MAXBW_MINVAL || maxbw == 0) { 2669 updated |= P_MD_maxbw; 2670 } else { 2671 cmn_err(CE_NOTE, "!vsw%d: Unable to process bandwidth" 2672 " update for port %d as the specified value:%ld" 2673 " is invalid\n", 2674 vswp->instance, portp->p_instance, maxbw); 2675 } 2676 } 2677 2678 if (updated & P_MD_vlans) { 2679 /* Remove existing vlan ids from the hash table. */ 2680 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2681 2682 /* Reconfigure vlans with network device */ 2683 vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids); 2684 2685 /* add these new vlan ids into hash table */ 2686 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2687 2688 /* reset the port if it is vlan unaware (ver < 1.3) */ 2689 vsw_vlan_unaware_port_reset(portp); 2690 } 2691 2692 if (updated & P_MD_hio) { 2693 vsw_hio_port_update(portp, hio_enabled); 2694 } 2695 2696 if (updated & P_MD_maxbw) { 2697 vsw_update_bandwidth(NULL, portp, VSW_VNETPORT, maxbw); 2698 } 2699 2700 RW_EXIT(&plistp->lockrw); 2701 2702 return (0); 2703 } 2704 2705 /* 2706 * vsw_mac_rx -- A common function to send packets to the interface. 2707 * By default this function check if the interface is UP or not, the 2708 * rest of the behaviour depends on the flags as below: 2709 * 2710 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2711 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2712 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2713 */ 2714 void 2715 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2716 mblk_t *mp, vsw_macrx_flags_t flags) 2717 { 2718 mblk_t *mpt; 2719 2720 D1(vswp, "%s:enter\n", __func__); 2721 READ_ENTER(&vswp->if_lockrw); 2722 /* Check if the interface is up */ 2723 if (!(vswp->if_state & VSW_IF_UP)) { 2724 RW_EXIT(&vswp->if_lockrw); 2725 /* Free messages only if FREEMSG flag specified */ 2726 if (flags & VSW_MACRX_FREEMSG) { 2727 freemsgchain(mp); 2728 } 2729 D1(vswp, "%s:exit\n", __func__); 2730 return; 2731 } 2732 /* 2733 * If PROMISC flag is passed, then check if 2734 * the interface is in the PROMISC mode. 2735 * If not, drop the messages. 2736 */ 2737 if (flags & VSW_MACRX_PROMISC) { 2738 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2739 RW_EXIT(&vswp->if_lockrw); 2740 /* Free messages only if FREEMSG flag specified */ 2741 if (flags & VSW_MACRX_FREEMSG) { 2742 freemsgchain(mp); 2743 } 2744 D1(vswp, "%s:exit\n", __func__); 2745 return; 2746 } 2747 } 2748 RW_EXIT(&vswp->if_lockrw); 2749 /* 2750 * If COPYMSG flag is passed, then make a copy 2751 * of the message chain and send up the copy. 2752 */ 2753 if (flags & VSW_MACRX_COPYMSG) { 2754 mp = copymsgchain(mp); 2755 if (mp == NULL) { 2756 D1(vswp, "%s:exit\n", __func__); 2757 return; 2758 } 2759 } 2760 2761 D2(vswp, "%s: sending up stack", __func__); 2762 2763 mpt = NULL; 2764 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2765 if (mp != NULL) { 2766 mac_rx(vswp->if_mh, mrh, mp); 2767 } 2768 D1(vswp, "%s:exit\n", __func__); 2769 } 2770 2771 /* copy mac address of vsw into soft state structure */ 2772 static void 2773 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2774 { 2775 int i; 2776 2777 WRITE_ENTER(&vswp->if_lockrw); 2778 for (i = ETHERADDRL - 1; i >= 0; i--) { 2779 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2780 macaddr >>= 8; 2781 } 2782 RW_EXIT(&vswp->if_lockrw); 2783 } 2784 2785 /* Compare VLAN ids, array size expected to be same. */ 2786 static boolean_t 2787 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids) 2788 { 2789 int i, j; 2790 uint16_t vid; 2791 2792 for (i = 0; i < nvids; i++) { 2793 vid = vids1[i].vl_vid; 2794 for (j = 0; j < nvids; j++) { 2795 if (vid == vids2[i].vl_vid) 2796 break; 2797 } 2798 if (j == nvids) { 2799 return (B_FALSE); 2800 } 2801 } 2802 return (B_TRUE); 2803 } 2804