1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* 78 * Function prototypes. 79 */ 80 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 81 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 82 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 84 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 85 86 /* MDEG routines */ 87 static int vsw_mdeg_register(vsw_t *vswp); 88 static void vsw_mdeg_unregister(vsw_t *vswp); 89 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 91 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 92 static int vsw_read_mdprops(vsw_t *vswp); 93 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 94 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, 95 uint16_t *nvidsp, uint16_t *default_idp); 96 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 97 md_t *mdp, mde_cookie_t *node); 98 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 99 mde_cookie_t node); 100 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 102 103 /* Mac driver related routines */ 104 static int vsw_mac_register(vsw_t *); 105 static int vsw_mac_unregister(vsw_t *); 106 static int vsw_m_stat(void *, uint_t, uint64_t *); 107 static void vsw_m_stop(void *arg); 108 static int vsw_m_start(void *arg); 109 static int vsw_m_unicst(void *arg, const uint8_t *); 110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 111 static int vsw_m_promisc(void *arg, boolean_t); 112 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 114 mblk_t *mp, vsw_macrx_flags_t flags); 115 116 /* 117 * Functions imported from other files. 118 */ 119 extern void vsw_setup_switching_timeout(void *arg); 120 extern void vsw_stop_switching_timeout(vsw_t *vswp); 121 extern int vsw_setup_switching(vsw_t *); 122 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 123 vsw_port_t *port, mac_resource_handle_t mrh); 124 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 125 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 126 extern void vsw_del_mcst_vsw(vsw_t *); 127 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 128 extern int vsw_detach_ports(vsw_t *vswp); 129 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 130 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 131 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 132 md_t *prev_mdp, mde_cookie_t prev_mdex); 133 extern int vsw_port_attach(vsw_port_t *port); 134 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 135 extern int vsw_mac_attach(vsw_t *vswp); 136 extern void vsw_mac_detach(vsw_t *vswp); 137 extern int vsw_mac_open(vsw_t *vswp); 138 extern void vsw_mac_close(vsw_t *vswp); 139 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 140 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 141 extern void vsw_reconfig_hw(vsw_t *); 142 extern void vsw_unset_addrs(vsw_t *vswp); 143 extern void vsw_set_addrs(vsw_t *vswp); 144 extern void vsw_create_vlans(void *arg, int type); 145 extern void vsw_destroy_vlans(void *arg, int type); 146 extern void vsw_vlan_add_ids(void *arg, int type); 147 extern void vsw_vlan_remove_ids(void *arg, int type); 148 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 149 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 150 mblk_t **npt); 151 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 152 extern void vsw_hio_cleanup(vsw_t *vswp); 153 extern void vsw_hio_start_ports(vsw_t *vswp); 154 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 155 156 /* 157 * Internal tunables. 158 */ 159 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 160 int vsw_wretries = 100; /* # of write attempts */ 161 int vsw_desc_delay = 0; /* delay in us */ 162 int vsw_read_attempts = 5; /* # of reads of descriptor */ 163 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 164 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ 165 /* 300*3 = 900sec(15min) of max tmout */ 166 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 167 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 168 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 169 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 170 171 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 172 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 173 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 174 175 /* sw timeout for boot delay only, in milliseconds */ 176 int vsw_setup_switching_boot_delay = 100 * MILLISEC; 177 178 /* delay in usec to wait for all references on a fdb entry to be dropped */ 179 uint32_t vsw_fdbe_refcnt_delay = 10; 180 181 /* 182 * Default vlan id. This is only used internally when the "default-vlan-id" 183 * property is not present in the MD device node. Therefore, this should not be 184 * used as a tunable; if this value is changed, the corresponding variable 185 * should be updated to the same value in all vnets connected to this vsw. 186 */ 187 uint16_t vsw_default_vlan_id = 1; 188 189 /* 190 * Workaround for a version handshake bug in obp's vnet. 191 * If vsw initiates version negotiation starting from the highest version, 192 * obp sends a nack and terminates version handshake. To workaround 193 * this, we do not initiate version handshake when the channel comes up. 194 * Instead, we wait for the peer to send its version info msg and go through 195 * the version protocol exchange. If we successfully negotiate a version, 196 * before sending the ack, we send our version info msg to the peer 197 * using the <major,minor> version that we are about to ack. 198 */ 199 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 200 201 /* 202 * In the absence of "priority-ether-types" property in MD, the following 203 * internal tunable can be set to specify a single priority ethertype. 204 */ 205 uint64_t vsw_pri_eth_type = 0; 206 207 /* 208 * Number of transmit priority buffers that are preallocated per device. 209 * This number is chosen to be a small value to throttle transmission 210 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 211 */ 212 uint32_t vsw_pri_tx_nmblks = 64; 213 214 /* 215 * Number of RARP packets sent to announce macaddr to the physical switch, 216 * after vsw's physical device is changed dynamically or after a guest (client 217 * vnet) is live migrated in. 218 */ 219 uint32_t vsw_publish_macaddr_count = 3; 220 221 boolean_t vsw_hio_enabled = B_TRUE; /* Enable/disable HybridIO */ 222 int vsw_hio_max_cleanup_retries = 10; /* Max retries for HybridIO cleanp */ 223 int vsw_hio_cleanup_delay = 10000; /* 10ms */ 224 225 /* 226 * External tunables. 227 */ 228 /* 229 * Enable/disable thread per ring. This is a mode selection 230 * that is done a vsw driver attach time. 231 */ 232 boolean_t vsw_multi_ring_enable = B_FALSE; 233 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 234 235 /* Number of transmit descriptors - must be power of 2 */ 236 uint32_t vsw_ntxds = VSW_RING_NUM_EL; 237 238 /* 239 * Max number of mblks received in one receive operation. 240 */ 241 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 242 243 /* 244 * Tunables for three different pools, that is, the size and 245 * number of mblks for each pool. 246 */ 247 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128; /* size=128 for pool1 */ 248 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256; /* size=256 for pool2 */ 249 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048; /* size=2048 for pool3 */ 250 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 251 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 252 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 253 254 /* 255 * vsw_max_tx_qcount is the maximum # of packets that can be queued 256 * before the tx worker thread begins processing the queue. Its value 257 * is chosen to be 4x the default length of tx descriptor ring. 258 */ 259 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL; 260 261 /* 262 * MAC callbacks 263 */ 264 static mac_callbacks_t vsw_m_callbacks = { 265 0, 266 vsw_m_stat, 267 vsw_m_start, 268 vsw_m_stop, 269 vsw_m_promisc, 270 vsw_m_multicst, 271 vsw_m_unicst, 272 vsw_m_tx, 273 NULL, 274 NULL, 275 NULL 276 }; 277 278 static struct cb_ops vsw_cb_ops = { 279 nulldev, /* cb_open */ 280 nulldev, /* cb_close */ 281 nodev, /* cb_strategy */ 282 nodev, /* cb_print */ 283 nodev, /* cb_dump */ 284 nodev, /* cb_read */ 285 nodev, /* cb_write */ 286 nodev, /* cb_ioctl */ 287 nodev, /* cb_devmap */ 288 nodev, /* cb_mmap */ 289 nodev, /* cb_segmap */ 290 nochpoll, /* cb_chpoll */ 291 ddi_prop_op, /* cb_prop_op */ 292 NULL, /* cb_stream */ 293 D_MP, /* cb_flag */ 294 CB_REV, /* rev */ 295 nodev, /* int (*cb_aread)() */ 296 nodev /* int (*cb_awrite)() */ 297 }; 298 299 static struct dev_ops vsw_ops = { 300 DEVO_REV, /* devo_rev */ 301 0, /* devo_refcnt */ 302 vsw_getinfo, /* devo_getinfo */ 303 nulldev, /* devo_identify */ 304 nulldev, /* devo_probe */ 305 vsw_attach, /* devo_attach */ 306 vsw_detach, /* devo_detach */ 307 nodev, /* devo_reset */ 308 &vsw_cb_ops, /* devo_cb_ops */ 309 (struct bus_ops *)NULL, /* devo_bus_ops */ 310 ddi_power /* devo_power */ 311 }; 312 313 extern struct mod_ops mod_driverops; 314 static struct modldrv vswmodldrv = { 315 &mod_driverops, 316 "sun4v Virtual Switch", 317 &vsw_ops, 318 }; 319 320 #define LDC_ENTER_LOCK(ldcp) \ 321 mutex_enter(&((ldcp)->ldc_cblock));\ 322 mutex_enter(&((ldcp)->ldc_rxlock));\ 323 mutex_enter(&((ldcp)->ldc_txlock)); 324 #define LDC_EXIT_LOCK(ldcp) \ 325 mutex_exit(&((ldcp)->ldc_txlock));\ 326 mutex_exit(&((ldcp)->ldc_rxlock));\ 327 mutex_exit(&((ldcp)->ldc_cblock)); 328 329 /* Driver soft state ptr */ 330 static void *vsw_state; 331 332 /* 333 * Linked list of "vsw_t" structures - one per instance. 334 */ 335 vsw_t *vsw_head = NULL; 336 krwlock_t vsw_rw; 337 338 /* 339 * Property names 340 */ 341 static char vdev_propname[] = "virtual-device"; 342 static char vsw_propname[] = "virtual-network-switch"; 343 static char physdev_propname[] = "vsw-phys-dev"; 344 static char smode_propname[] = "vsw-switch-mode"; 345 static char macaddr_propname[] = "local-mac-address"; 346 static char remaddr_propname[] = "remote-mac-address"; 347 static char ldcids_propname[] = "ldc-ids"; 348 static char chan_propname[] = "channel-endpoint"; 349 static char id_propname[] = "id"; 350 static char reg_propname[] = "reg"; 351 static char pri_types_propname[] = "priority-ether-types"; 352 static char vsw_pvid_propname[] = "port-vlan-id"; 353 static char vsw_vid_propname[] = "vlan-id"; 354 static char vsw_dvid_propname[] = "default-vlan-id"; 355 static char port_pvid_propname[] = "remote-port-vlan-id"; 356 static char port_vid_propname[] = "remote-vlan-id"; 357 static char hybrid_propname[] = "hybrid"; 358 359 /* 360 * Matching criteria passed to the MDEG to register interest 361 * in changes to 'virtual-device-port' nodes identified by their 362 * 'id' property. 363 */ 364 static md_prop_match_t vport_prop_match[] = { 365 { MDET_PROP_VAL, "id" }, 366 { MDET_LIST_END, NULL } 367 }; 368 369 static mdeg_node_match_t vport_match = { "virtual-device-port", 370 vport_prop_match }; 371 372 /* 373 * Matching criteria passed to the MDEG to register interest 374 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 375 * by their 'name' and 'cfg-handle' properties. 376 */ 377 static md_prop_match_t vdev_prop_match[] = { 378 { MDET_PROP_STR, "name" }, 379 { MDET_PROP_VAL, "cfg-handle" }, 380 { MDET_LIST_END, NULL } 381 }; 382 383 static mdeg_node_match_t vdev_match = { "virtual-device", 384 vdev_prop_match }; 385 386 387 /* 388 * Specification of an MD node passed to the MDEG to filter any 389 * 'vport' nodes that do not belong to the specified node. This 390 * template is copied for each vsw instance and filled in with 391 * the appropriate 'cfg-handle' value before being passed to the MDEG. 392 */ 393 static mdeg_prop_spec_t vsw_prop_template[] = { 394 { MDET_PROP_STR, "name", vsw_propname }, 395 { MDET_PROP_VAL, "cfg-handle", NULL }, 396 { MDET_LIST_END, NULL, NULL } 397 }; 398 399 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 400 401 #ifdef DEBUG 402 /* 403 * Print debug messages - set to 0x1f to enable all msgs 404 * or 0x0 to turn all off. 405 */ 406 int vswdbg = 0x0; 407 408 /* 409 * debug levels: 410 * 0x01: Function entry/exit tracing 411 * 0x02: Internal function messages 412 * 0x04: Verbose internal messages 413 * 0x08: Warning messages 414 * 0x10: Error messages 415 */ 416 417 void 418 vswdebug(vsw_t *vswp, const char *fmt, ...) 419 { 420 char buf[512]; 421 va_list ap; 422 423 va_start(ap, fmt); 424 (void) vsprintf(buf, fmt, ap); 425 va_end(ap); 426 427 if (vswp == NULL) 428 cmn_err(CE_CONT, "%s\n", buf); 429 else 430 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 431 } 432 433 #endif /* DEBUG */ 434 435 static struct modlinkage modlinkage = { 436 MODREV_1, 437 &vswmodldrv, 438 NULL 439 }; 440 441 int 442 _init(void) 443 { 444 int status; 445 446 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 447 448 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 449 if (status != 0) { 450 return (status); 451 } 452 453 mac_init_ops(&vsw_ops, DRV_NAME); 454 status = mod_install(&modlinkage); 455 if (status != 0) { 456 ddi_soft_state_fini(&vsw_state); 457 } 458 return (status); 459 } 460 461 int 462 _fini(void) 463 { 464 int status; 465 466 status = mod_remove(&modlinkage); 467 if (status != 0) 468 return (status); 469 mac_fini_ops(&vsw_ops); 470 ddi_soft_state_fini(&vsw_state); 471 472 rw_destroy(&vsw_rw); 473 474 return (status); 475 } 476 477 int 478 _info(struct modinfo *modinfop) 479 { 480 return (mod_info(&modlinkage, modinfop)); 481 } 482 483 static int 484 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 485 { 486 vsw_t *vswp; 487 int instance; 488 char hashname[MAXNAMELEN]; 489 char qname[TASKQ_NAMELEN]; 490 enum { PROG_init = 0x00, 491 PROG_locks = 0x01, 492 PROG_readmd = 0x02, 493 PROG_fdb = 0x04, 494 PROG_mfdb = 0x08, 495 PROG_taskq = 0x10, 496 PROG_swmode = 0x20, 497 PROG_macreg = 0x40, 498 PROG_mdreg = 0x80} 499 progress; 500 501 progress = PROG_init; 502 int rv; 503 504 switch (cmd) { 505 case DDI_ATTACH: 506 break; 507 case DDI_RESUME: 508 /* nothing to do for this non-device */ 509 return (DDI_SUCCESS); 510 case DDI_PM_RESUME: 511 default: 512 return (DDI_FAILURE); 513 } 514 515 instance = ddi_get_instance(dip); 516 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 517 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 518 return (DDI_FAILURE); 519 } 520 vswp = ddi_get_soft_state(vsw_state, instance); 521 522 if (vswp == NULL) { 523 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 524 goto vsw_attach_fail; 525 } 526 527 vswp->dip = dip; 528 vswp->instance = instance; 529 ddi_set_driver_private(dip, (caddr_t)vswp); 530 531 mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL); 532 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 533 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 534 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 535 rw_init(&vswp->mac_rwlock, NULL, RW_DRIVER, NULL); 536 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 537 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 538 539 progress |= PROG_locks; 540 541 rv = vsw_read_mdprops(vswp); 542 if (rv != 0) 543 goto vsw_attach_fail; 544 545 progress |= PROG_readmd; 546 547 /* setup the unicast forwarding database */ 548 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 549 vswp->instance); 550 D2(vswp, "creating unicast hash table (%s)...", hashname); 551 vswp->fdb_nchains = vsw_fdb_nchains; 552 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 553 mod_hash_null_valdtor, sizeof (void *)); 554 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 555 progress |= PROG_fdb; 556 557 /* setup the multicast fowarding database */ 558 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 559 vswp->instance); 560 D2(vswp, "creating multicast hash table %s)...", hashname); 561 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 562 mod_hash_null_valdtor, sizeof (void *)); 563 564 progress |= PROG_mfdb; 565 566 /* 567 * Create the taskq which will process all the VIO 568 * control messages. 569 */ 570 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 571 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 572 TASKQ_DEFAULTPRI, 0)) == NULL) { 573 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 574 vswp->instance); 575 goto vsw_attach_fail; 576 } 577 578 progress |= PROG_taskq; 579 580 /* prevent auto-detaching */ 581 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 582 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 583 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 584 "instance %u", DDI_NO_AUTODETACH, instance); 585 } 586 587 /* 588 * The null switching function is set to avoid panic until 589 * switch mode is setup. 590 */ 591 vswp->vsw_switch_frame = vsw_switch_frame_nop; 592 593 /* 594 * Setup the required switching mode, 595 * based on the mdprops that we read earlier. 596 * schedule a short timeout (0.1 sec) for the first time 597 * setup and avoid calling mac_open() directly here, 598 * others are regular timeout 3 secs. 599 */ 600 mutex_enter(&vswp->swtmout_lock); 601 602 vswp->swtmout_enabled = B_TRUE; 603 vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp, 604 drv_usectohz(vsw_setup_switching_boot_delay)); 605 606 mutex_exit(&vswp->swtmout_lock); 607 608 progress |= PROG_swmode; 609 610 /* Register with mac layer as a provider */ 611 rv = vsw_mac_register(vswp); 612 if (rv != 0) 613 goto vsw_attach_fail; 614 615 progress |= PROG_macreg; 616 617 /* 618 * Now we have everything setup, register an interest in 619 * specific MD nodes. 620 * 621 * The callback is invoked in 2 cases, firstly if upon mdeg 622 * registration there are existing nodes which match our specified 623 * criteria, and secondly if the MD is changed (and again, there 624 * are nodes which we are interested in present within it. Note 625 * that our callback will be invoked even if our specified nodes 626 * have not actually changed). 627 * 628 */ 629 rv = vsw_mdeg_register(vswp); 630 if (rv != 0) 631 goto vsw_attach_fail; 632 633 progress |= PROG_mdreg; 634 635 WRITE_ENTER(&vsw_rw); 636 vswp->next = vsw_head; 637 vsw_head = vswp; 638 RW_EXIT(&vsw_rw); 639 640 ddi_report_dev(vswp->dip); 641 return (DDI_SUCCESS); 642 643 vsw_attach_fail: 644 DERR(NULL, "vsw_attach: failed"); 645 646 if (progress & PROG_mdreg) { 647 vsw_mdeg_unregister(vswp); 648 (void) vsw_detach_ports(vswp); 649 } 650 651 if (progress & PROG_macreg) 652 (void) vsw_mac_unregister(vswp); 653 654 if (progress & PROG_swmode) { 655 vsw_stop_switching_timeout(vswp); 656 vsw_hio_cleanup(vswp); 657 WRITE_ENTER(&vswp->mac_rwlock); 658 vsw_mac_detach(vswp); 659 vsw_mac_close(vswp); 660 RW_EXIT(&vswp->mac_rwlock); 661 } 662 663 if (progress & PROG_taskq) 664 ddi_taskq_destroy(vswp->taskq_p); 665 666 if (progress & PROG_mfdb) 667 mod_hash_destroy_hash(vswp->mfdb); 668 669 if (progress & PROG_fdb) { 670 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 671 mod_hash_destroy_hash(vswp->fdb_hashp); 672 } 673 674 if (progress & PROG_readmd) { 675 if (VSW_PRI_ETH_DEFINED(vswp)) { 676 kmem_free(vswp->pri_types, 677 sizeof (uint16_t) * vswp->pri_num_types); 678 } 679 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 680 } 681 682 if (progress & PROG_locks) { 683 rw_destroy(&vswp->plist.lockrw); 684 rw_destroy(&vswp->mfdbrw); 685 rw_destroy(&vswp->mac_rwlock); 686 rw_destroy(&vswp->if_lockrw); 687 mutex_destroy(&vswp->swtmout_lock); 688 mutex_destroy(&vswp->mca_lock); 689 mutex_destroy(&vswp->hw_lock); 690 } 691 692 ddi_soft_state_free(vsw_state, instance); 693 return (DDI_FAILURE); 694 } 695 696 static int 697 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 698 { 699 vio_mblk_pool_t *poolp, *npoolp; 700 vsw_t **vswpp, *vswp; 701 int instance; 702 703 instance = ddi_get_instance(dip); 704 vswp = ddi_get_soft_state(vsw_state, instance); 705 706 if (vswp == NULL) { 707 return (DDI_FAILURE); 708 } 709 710 switch (cmd) { 711 case DDI_DETACH: 712 break; 713 case DDI_SUSPEND: 714 case DDI_PM_SUSPEND: 715 default: 716 return (DDI_FAILURE); 717 } 718 719 D2(vswp, "detaching instance %d", instance); 720 721 /* Stop any pending timeout to setup switching mode. */ 722 vsw_stop_switching_timeout(vswp); 723 724 if (vswp->if_state & VSW_IF_REG) { 725 if (vsw_mac_unregister(vswp) != 0) { 726 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 727 "MAC layer", vswp->instance); 728 return (DDI_FAILURE); 729 } 730 } 731 732 vsw_mdeg_unregister(vswp); 733 734 /* remove mac layer callback */ 735 WRITE_ENTER(&vswp->mac_rwlock); 736 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 737 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 738 vswp->mrh = NULL; 739 } 740 RW_EXIT(&vswp->mac_rwlock); 741 742 if (vsw_detach_ports(vswp) != 0) { 743 cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports", 744 vswp->instance); 745 return (DDI_FAILURE); 746 } 747 748 rw_destroy(&vswp->if_lockrw); 749 750 /* cleanup HybridIO */ 751 vsw_hio_cleanup(vswp); 752 753 mutex_destroy(&vswp->hw_lock); 754 755 /* 756 * Now that the ports have been deleted, stop and close 757 * the physical device. 758 */ 759 WRITE_ENTER(&vswp->mac_rwlock); 760 761 vsw_mac_detach(vswp); 762 vsw_mac_close(vswp); 763 764 RW_EXIT(&vswp->mac_rwlock); 765 766 rw_destroy(&vswp->mac_rwlock); 767 mutex_destroy(&vswp->swtmout_lock); 768 769 /* 770 * Destroy any free pools that may still exist. 771 */ 772 poolp = vswp->rxh; 773 while (poolp != NULL) { 774 npoolp = vswp->rxh = poolp->nextp; 775 if (vio_destroy_mblks(poolp) != 0) { 776 vswp->rxh = poolp; 777 return (DDI_FAILURE); 778 } 779 poolp = npoolp; 780 } 781 782 /* 783 * Remove this instance from any entries it may be on in 784 * the hash table by using the list of addresses maintained 785 * in the vsw_t structure. 786 */ 787 vsw_del_mcst_vsw(vswp); 788 789 vswp->mcap = NULL; 790 mutex_destroy(&vswp->mca_lock); 791 792 /* 793 * By now any pending tasks have finished and the underlying 794 * ldc's have been destroyed, so its safe to delete the control 795 * message taskq. 796 */ 797 if (vswp->taskq_p != NULL) 798 ddi_taskq_destroy(vswp->taskq_p); 799 800 /* 801 * At this stage all the data pointers in the hash table 802 * should be NULL, as all the ports have been removed and will 803 * have deleted themselves from the port lists which the data 804 * pointers point to. Hence we can destroy the table using the 805 * default destructors. 806 */ 807 D2(vswp, "vsw_detach: destroying hash tables.."); 808 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 809 mod_hash_destroy_hash(vswp->fdb_hashp); 810 vswp->fdb_hashp = NULL; 811 812 WRITE_ENTER(&vswp->mfdbrw); 813 mod_hash_destroy_hash(vswp->mfdb); 814 vswp->mfdb = NULL; 815 RW_EXIT(&vswp->mfdbrw); 816 rw_destroy(&vswp->mfdbrw); 817 818 /* free pri_types table */ 819 if (VSW_PRI_ETH_DEFINED(vswp)) { 820 kmem_free(vswp->pri_types, 821 sizeof (uint16_t) * vswp->pri_num_types); 822 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 823 } 824 825 ddi_remove_minor_node(dip, NULL); 826 827 rw_destroy(&vswp->plist.lockrw); 828 WRITE_ENTER(&vsw_rw); 829 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 830 if (*vswpp == vswp) { 831 *vswpp = vswp->next; 832 break; 833 } 834 } 835 RW_EXIT(&vsw_rw); 836 ddi_soft_state_free(vsw_state, instance); 837 838 return (DDI_SUCCESS); 839 } 840 841 static int 842 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 843 { 844 _NOTE(ARGUNUSED(dip)) 845 846 vsw_t *vswp = NULL; 847 dev_t dev = (dev_t)arg; 848 int instance; 849 850 instance = getminor(dev); 851 852 switch (infocmd) { 853 case DDI_INFO_DEVT2DEVINFO: 854 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 855 *result = NULL; 856 return (DDI_FAILURE); 857 } 858 *result = vswp->dip; 859 return (DDI_SUCCESS); 860 861 case DDI_INFO_DEVT2INSTANCE: 862 *result = (void *)(uintptr_t)instance; 863 return (DDI_SUCCESS); 864 865 default: 866 *result = NULL; 867 return (DDI_FAILURE); 868 } 869 } 870 871 /* 872 * Get the value of the "vsw-phys-dev" property in the specified 873 * node. This property is the name of the physical device that 874 * the virtual switch will use to talk to the outside world. 875 * 876 * Note it is valid for this property to be NULL (but the property 877 * itself must exist). Callers of this routine should verify that 878 * the value returned is what they expected (i.e. either NULL or non NULL). 879 * 880 * On success returns value of the property in region pointed to by 881 * the 'name' argument, and with return value of 0. Otherwise returns 1. 882 */ 883 static int 884 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 885 { 886 int len = 0; 887 int instance; 888 char *physname = NULL; 889 char *dev; 890 const char *dev_name; 891 char myname[MAXNAMELEN]; 892 893 dev_name = ddi_driver_name(vswp->dip); 894 instance = ddi_get_instance(vswp->dip); 895 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 896 897 if (md_get_prop_data(mdp, node, physdev_propname, 898 (uint8_t **)(&physname), &len) != 0) { 899 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 900 "device(s) from MD", vswp->instance); 901 return (1); 902 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 903 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 904 vswp->instance, physname); 905 return (1); 906 } else if (strcmp(myname, physname) == 0) { 907 /* 908 * Prevent the vswitch from opening itself as the 909 * network device. 910 */ 911 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 912 vswp->instance, physname); 913 return (1); 914 } else { 915 (void) strncpy(name, physname, strlen(physname) + 1); 916 D2(vswp, "%s: using first device specified (%s)", 917 __func__, physname); 918 } 919 920 #ifdef DEBUG 921 /* 922 * As a temporary measure to aid testing we check to see if there 923 * is a vsw.conf file present. If there is we use the value of the 924 * vsw_physname property in the file as the name of the physical 925 * device, overriding the value from the MD. 926 * 927 * There may be multiple devices listed, but for the moment 928 * we just use the first one. 929 */ 930 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 931 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 932 if ((strlen(dev) + 1) > LIFNAMSIZ) { 933 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 934 vswp->instance, dev); 935 ddi_prop_free(dev); 936 return (1); 937 } else { 938 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 939 "config file", vswp->instance, dev); 940 941 (void) strncpy(name, dev, strlen(dev) + 1); 942 } 943 944 ddi_prop_free(dev); 945 } 946 #endif 947 948 return (0); 949 } 950 951 /* 952 * Read the 'vsw-switch-mode' property from the specified MD node. 953 * 954 * Returns 0 on success and the number of modes found in 'found', 955 * otherwise returns 1. 956 */ 957 static int 958 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 959 uint8_t *modes, int *found) 960 { 961 int len = 0; 962 int smode_num = 0; 963 char *smode = NULL; 964 char *curr_mode = NULL; 965 966 D1(vswp, "%s: enter", __func__); 967 968 /* 969 * Get the switch-mode property. The modes are listed in 970 * decreasing order of preference, i.e. prefered mode is 971 * first item in list. 972 */ 973 len = 0; 974 smode_num = 0; 975 if (md_get_prop_data(mdp, node, smode_propname, 976 (uint8_t **)(&smode), &len) != 0) { 977 /* 978 * Unable to get switch-mode property from MD, nothing 979 * more we can do. 980 */ 981 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 982 " from the MD", vswp->instance); 983 *found = 0; 984 return (1); 985 } 986 987 curr_mode = smode; 988 /* 989 * Modes of operation: 990 * 'switched' - layer 2 switching, underlying HW in 991 * programmed mode. 992 * 'promiscuous' - layer 2 switching, underlying HW in 993 * promiscuous mode. 994 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 995 * in non-promiscuous mode. 996 */ 997 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 998 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 999 if (strcmp(curr_mode, "switched") == 0) { 1000 modes[smode_num++] = VSW_LAYER2; 1001 } else if (strcmp(curr_mode, "promiscuous") == 0) { 1002 modes[smode_num++] = VSW_LAYER2_PROMISC; 1003 } else if (strcmp(curr_mode, "routed") == 0) { 1004 modes[smode_num++] = VSW_LAYER3; 1005 } else { 1006 DWARN(vswp, "%s: Unknown switch mode %s, " 1007 "setting to default 'switched' mode", 1008 __func__, curr_mode); 1009 modes[smode_num++] = VSW_LAYER2; 1010 } 1011 curr_mode += strlen(curr_mode) + 1; 1012 } 1013 *found = smode_num; 1014 1015 D2(vswp, "%s: %d modes found", __func__, smode_num); 1016 1017 D1(vswp, "%s: exit", __func__); 1018 1019 return (0); 1020 } 1021 1022 /* 1023 * Register with the MAC layer as a network device, so we 1024 * can be plumbed if necessary. 1025 */ 1026 static int 1027 vsw_mac_register(vsw_t *vswp) 1028 { 1029 mac_register_t *macp; 1030 int rv; 1031 1032 D1(vswp, "%s: enter", __func__); 1033 1034 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1035 return (EINVAL); 1036 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1037 macp->m_driver = vswp; 1038 macp->m_dip = vswp->dip; 1039 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1040 macp->m_callbacks = &vsw_m_callbacks; 1041 macp->m_min_sdu = 0; 1042 macp->m_max_sdu = vsw_ethermtu; 1043 macp->m_margin = VLAN_TAGSZ; 1044 rv = mac_register(macp, &vswp->if_mh); 1045 mac_free(macp); 1046 if (rv != 0) { 1047 /* 1048 * Treat this as a non-fatal error as we may be 1049 * able to operate in some other mode. 1050 */ 1051 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1052 "a provider with MAC layer", vswp->instance); 1053 return (rv); 1054 } 1055 1056 vswp->if_state |= VSW_IF_REG; 1057 1058 vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header) 1059 + VLAN_TAGSZ; 1060 1061 D1(vswp, "%s: exit", __func__); 1062 1063 return (rv); 1064 } 1065 1066 static int 1067 vsw_mac_unregister(vsw_t *vswp) 1068 { 1069 int rv = 0; 1070 1071 D1(vswp, "%s: enter", __func__); 1072 1073 WRITE_ENTER(&vswp->if_lockrw); 1074 1075 if (vswp->if_state & VSW_IF_REG) { 1076 rv = mac_unregister(vswp->if_mh); 1077 if (rv != 0) { 1078 DWARN(vswp, "%s: unable to unregister from MAC " 1079 "framework", __func__); 1080 1081 RW_EXIT(&vswp->if_lockrw); 1082 D1(vswp, "%s: fail exit", __func__); 1083 return (rv); 1084 } 1085 1086 /* mark i/f as down and unregistered */ 1087 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1088 } 1089 RW_EXIT(&vswp->if_lockrw); 1090 1091 D1(vswp, "%s: exit", __func__); 1092 1093 return (rv); 1094 } 1095 1096 static int 1097 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1098 { 1099 vsw_t *vswp = (vsw_t *)arg; 1100 1101 D1(vswp, "%s: enter", __func__); 1102 1103 WRITE_ENTER(&vswp->mac_rwlock); 1104 if (vswp->mh == NULL) { 1105 RW_EXIT(&vswp->mac_rwlock); 1106 return (EINVAL); 1107 } 1108 1109 /* return stats from underlying device */ 1110 *val = mac_stat_get(vswp->mh, stat); 1111 1112 RW_EXIT(&vswp->mac_rwlock); 1113 1114 return (0); 1115 } 1116 1117 static void 1118 vsw_m_stop(void *arg) 1119 { 1120 vsw_t *vswp = (vsw_t *)arg; 1121 1122 D1(vswp, "%s: enter", __func__); 1123 1124 WRITE_ENTER(&vswp->if_lockrw); 1125 vswp->if_state &= ~VSW_IF_UP; 1126 RW_EXIT(&vswp->if_lockrw); 1127 1128 mutex_enter(&vswp->hw_lock); 1129 1130 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1131 1132 if (vswp->recfg_reqd) 1133 vsw_reconfig_hw(vswp); 1134 1135 mutex_exit(&vswp->hw_lock); 1136 1137 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1138 } 1139 1140 static int 1141 vsw_m_start(void *arg) 1142 { 1143 vsw_t *vswp = (vsw_t *)arg; 1144 1145 D1(vswp, "%s: enter", __func__); 1146 1147 WRITE_ENTER(&vswp->if_lockrw); 1148 1149 vswp->if_state |= VSW_IF_UP; 1150 1151 if (vswp->switching_setup_done == B_FALSE) { 1152 /* 1153 * If the switching mode has not been setup yet, just 1154 * return. The unicast address will be programmed 1155 * after the physical device is successfully setup by the 1156 * timeout handler. 1157 */ 1158 RW_EXIT(&vswp->if_lockrw); 1159 return (0); 1160 } 1161 1162 /* if in layer2 mode, program unicast address. */ 1163 if (vswp->mh != NULL) { 1164 mutex_enter(&vswp->hw_lock); 1165 (void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1166 mutex_exit(&vswp->hw_lock); 1167 } 1168 1169 RW_EXIT(&vswp->if_lockrw); 1170 1171 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1172 return (0); 1173 } 1174 1175 /* 1176 * Change the local interface address. 1177 * 1178 * Note: we don't support this entry point. The local 1179 * mac address of the switch can only be changed via its 1180 * MD node properties. 1181 */ 1182 static int 1183 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1184 { 1185 _NOTE(ARGUNUSED(arg, macaddr)) 1186 1187 return (DDI_FAILURE); 1188 } 1189 1190 static int 1191 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1192 { 1193 vsw_t *vswp = (vsw_t *)arg; 1194 mcst_addr_t *mcst_p = NULL; 1195 uint64_t addr = 0x0; 1196 int i, ret = 0; 1197 1198 D1(vswp, "%s: enter", __func__); 1199 1200 /* 1201 * Convert address into form that can be used 1202 * as hash table key. 1203 */ 1204 for (i = 0; i < ETHERADDRL; i++) { 1205 addr = (addr << 8) | mca[i]; 1206 } 1207 1208 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1209 1210 if (add) { 1211 D2(vswp, "%s: adding multicast", __func__); 1212 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1213 /* 1214 * Update the list of multicast addresses 1215 * contained within the vsw_t structure to 1216 * include this new one. 1217 */ 1218 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1219 if (mcst_p == NULL) { 1220 DERR(vswp, "%s unable to alloc mem", __func__); 1221 (void) vsw_del_mcst(vswp, 1222 VSW_LOCALDEV, addr, NULL); 1223 return (1); 1224 } 1225 mcst_p->addr = addr; 1226 ether_copy(mca, &mcst_p->mca); 1227 1228 /* 1229 * Call into the underlying driver to program the 1230 * address into HW. 1231 */ 1232 WRITE_ENTER(&vswp->mac_rwlock); 1233 if (vswp->mh != NULL) { 1234 ret = mac_multicst_add(vswp->mh, mca); 1235 if (ret != 0) { 1236 cmn_err(CE_NOTE, "!vsw%d: unable to " 1237 "add multicast address", 1238 vswp->instance); 1239 RW_EXIT(&vswp->mac_rwlock); 1240 (void) vsw_del_mcst(vswp, 1241 VSW_LOCALDEV, addr, NULL); 1242 kmem_free(mcst_p, sizeof (*mcst_p)); 1243 return (ret); 1244 } 1245 mcst_p->mac_added = B_TRUE; 1246 } 1247 RW_EXIT(&vswp->mac_rwlock); 1248 1249 mutex_enter(&vswp->mca_lock); 1250 mcst_p->nextp = vswp->mcap; 1251 vswp->mcap = mcst_p; 1252 mutex_exit(&vswp->mca_lock); 1253 } else { 1254 cmn_err(CE_NOTE, "!vsw%d: unable to add multicast " 1255 "address", vswp->instance); 1256 } 1257 return (ret); 1258 } 1259 1260 D2(vswp, "%s: removing multicast", __func__); 1261 /* 1262 * Remove the address from the hash table.. 1263 */ 1264 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1265 1266 /* 1267 * ..and then from the list maintained in the 1268 * vsw_t structure. 1269 */ 1270 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1271 ASSERT(mcst_p != NULL); 1272 1273 WRITE_ENTER(&vswp->mac_rwlock); 1274 if (vswp->mh != NULL && mcst_p->mac_added) { 1275 (void) mac_multicst_remove(vswp->mh, mca); 1276 mcst_p->mac_added = B_FALSE; 1277 } 1278 RW_EXIT(&vswp->mac_rwlock); 1279 kmem_free(mcst_p, sizeof (*mcst_p)); 1280 } 1281 1282 D1(vswp, "%s: exit", __func__); 1283 1284 return (0); 1285 } 1286 1287 static int 1288 vsw_m_promisc(void *arg, boolean_t on) 1289 { 1290 vsw_t *vswp = (vsw_t *)arg; 1291 1292 D1(vswp, "%s: enter", __func__); 1293 1294 WRITE_ENTER(&vswp->if_lockrw); 1295 if (on) 1296 vswp->if_state |= VSW_IF_PROMISC; 1297 else 1298 vswp->if_state &= ~VSW_IF_PROMISC; 1299 RW_EXIT(&vswp->if_lockrw); 1300 1301 D1(vswp, "%s: exit", __func__); 1302 1303 return (0); 1304 } 1305 1306 static mblk_t * 1307 vsw_m_tx(void *arg, mblk_t *mp) 1308 { 1309 vsw_t *vswp = (vsw_t *)arg; 1310 1311 D1(vswp, "%s: enter", __func__); 1312 1313 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1314 1315 if (mp == NULL) { 1316 return (NULL); 1317 } 1318 1319 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1320 1321 D1(vswp, "%s: exit", __func__); 1322 1323 return (NULL); 1324 } 1325 1326 /* 1327 * Register for machine description (MD) updates. 1328 * 1329 * Returns 0 on success, 1 on failure. 1330 */ 1331 static int 1332 vsw_mdeg_register(vsw_t *vswp) 1333 { 1334 mdeg_prop_spec_t *pspecp; 1335 mdeg_node_spec_t *inst_specp; 1336 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1337 size_t templatesz; 1338 int rv; 1339 1340 D1(vswp, "%s: enter", __func__); 1341 1342 /* 1343 * Allocate and initialize a per-instance copy 1344 * of the global property spec array that will 1345 * uniquely identify this vsw instance. 1346 */ 1347 templatesz = sizeof (vsw_prop_template); 1348 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1349 1350 bcopy(vsw_prop_template, pspecp, templatesz); 1351 1352 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1353 1354 /* initialize the complete prop spec structure */ 1355 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1356 inst_specp->namep = "virtual-device"; 1357 inst_specp->specp = pspecp; 1358 1359 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1360 vswp->regprop); 1361 /* 1362 * Register an interest in 'virtual-device' nodes with a 1363 * 'name' property of 'virtual-network-switch' 1364 */ 1365 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1366 (void *)vswp, &mdeg_hdl); 1367 if (rv != MDEG_SUCCESS) { 1368 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1369 __func__, rv); 1370 goto mdeg_reg_fail; 1371 } 1372 1373 /* 1374 * Register an interest in 'vsw-port' nodes. 1375 */ 1376 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1377 (void *)vswp, &mdeg_port_hdl); 1378 if (rv != MDEG_SUCCESS) { 1379 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1380 (void) mdeg_unregister(mdeg_hdl); 1381 goto mdeg_reg_fail; 1382 } 1383 1384 /* save off data that will be needed later */ 1385 vswp->inst_spec = inst_specp; 1386 vswp->mdeg_hdl = mdeg_hdl; 1387 vswp->mdeg_port_hdl = mdeg_port_hdl; 1388 1389 D1(vswp, "%s: exit", __func__); 1390 return (0); 1391 1392 mdeg_reg_fail: 1393 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1394 vswp->instance); 1395 kmem_free(pspecp, templatesz); 1396 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1397 1398 vswp->mdeg_hdl = NULL; 1399 vswp->mdeg_port_hdl = NULL; 1400 1401 return (1); 1402 } 1403 1404 static void 1405 vsw_mdeg_unregister(vsw_t *vswp) 1406 { 1407 D1(vswp, "vsw_mdeg_unregister: enter"); 1408 1409 if (vswp->mdeg_hdl != NULL) 1410 (void) mdeg_unregister(vswp->mdeg_hdl); 1411 1412 if (vswp->mdeg_port_hdl != NULL) 1413 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1414 1415 if (vswp->inst_spec != NULL) { 1416 if (vswp->inst_spec->specp != NULL) { 1417 (void) kmem_free(vswp->inst_spec->specp, 1418 sizeof (vsw_prop_template)); 1419 vswp->inst_spec->specp = NULL; 1420 } 1421 1422 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1423 vswp->inst_spec = NULL; 1424 } 1425 1426 D1(vswp, "vsw_mdeg_unregister: exit"); 1427 } 1428 1429 /* 1430 * Mdeg callback invoked for the vsw node itself. 1431 */ 1432 static int 1433 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1434 { 1435 vsw_t *vswp; 1436 md_t *mdp; 1437 mde_cookie_t node; 1438 uint64_t inst; 1439 char *node_name = NULL; 1440 1441 if (resp == NULL) 1442 return (MDEG_FAILURE); 1443 1444 vswp = (vsw_t *)cb_argp; 1445 1446 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1447 " : prev matched %d", __func__, resp->added.nelem, 1448 resp->removed.nelem, resp->match_curr.nelem, 1449 resp->match_prev.nelem); 1450 1451 /* 1452 * We get an initial callback for this node as 'added' 1453 * after registering with mdeg. Note that we would have 1454 * already gathered information about this vsw node by 1455 * walking MD earlier during attach (in vsw_read_mdprops()). 1456 * So, there is a window where the properties of this 1457 * node might have changed when we get this initial 'added' 1458 * callback. We handle this as if an update occured 1459 * and invoke the same function which handles updates to 1460 * the properties of this vsw-node if any. 1461 * 1462 * A non-zero 'match' value indicates that the MD has been 1463 * updated and that a virtual-network-switch node is 1464 * present which may or may not have been updated. It is 1465 * up to the clients to examine their own nodes and 1466 * determine if they have changed. 1467 */ 1468 if (resp->added.nelem != 0) { 1469 1470 if (resp->added.nelem != 1) { 1471 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1472 "invalid: %d\n", vswp->instance, resp->added.nelem); 1473 return (MDEG_FAILURE); 1474 } 1475 1476 mdp = resp->added.mdp; 1477 node = resp->added.mdep[0]; 1478 1479 } else if (resp->match_curr.nelem != 0) { 1480 1481 if (resp->match_curr.nelem != 1) { 1482 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1483 "invalid: %d\n", vswp->instance, 1484 resp->match_curr.nelem); 1485 return (MDEG_FAILURE); 1486 } 1487 1488 mdp = resp->match_curr.mdp; 1489 node = resp->match_curr.mdep[0]; 1490 1491 } else { 1492 return (MDEG_FAILURE); 1493 } 1494 1495 /* Validate name and instance */ 1496 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1497 DERR(vswp, "%s: unable to get node name\n", __func__); 1498 return (MDEG_FAILURE); 1499 } 1500 1501 /* is this a virtual-network-switch? */ 1502 if (strcmp(node_name, vsw_propname) != 0) { 1503 DERR(vswp, "%s: Invalid node name: %s\n", 1504 __func__, node_name); 1505 return (MDEG_FAILURE); 1506 } 1507 1508 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1509 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1510 __func__); 1511 return (MDEG_FAILURE); 1512 } 1513 1514 /* is this the right instance of vsw? */ 1515 if (inst != vswp->regprop) { 1516 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1517 __func__, inst); 1518 return (MDEG_FAILURE); 1519 } 1520 1521 vsw_update_md_prop(vswp, mdp, node); 1522 1523 return (MDEG_SUCCESS); 1524 } 1525 1526 /* 1527 * Mdeg callback invoked for changes to the vsw-port nodes 1528 * under the vsw node. 1529 */ 1530 static int 1531 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1532 { 1533 vsw_t *vswp; 1534 int idx; 1535 md_t *mdp; 1536 mde_cookie_t node; 1537 uint64_t inst; 1538 int rv; 1539 1540 if ((resp == NULL) || (cb_argp == NULL)) 1541 return (MDEG_FAILURE); 1542 1543 vswp = (vsw_t *)cb_argp; 1544 1545 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1546 " : prev matched %d", __func__, resp->added.nelem, 1547 resp->removed.nelem, resp->match_curr.nelem, 1548 resp->match_prev.nelem); 1549 1550 /* process added ports */ 1551 for (idx = 0; idx < resp->added.nelem; idx++) { 1552 mdp = resp->added.mdp; 1553 node = resp->added.mdep[idx]; 1554 1555 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1556 1557 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1558 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1559 "(0x%lx), err=%d", vswp->instance, node, rv); 1560 } 1561 } 1562 1563 /* process removed ports */ 1564 for (idx = 0; idx < resp->removed.nelem; idx++) { 1565 mdp = resp->removed.mdp; 1566 node = resp->removed.mdep[idx]; 1567 1568 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1569 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1570 __func__, id_propname, idx); 1571 continue; 1572 } 1573 1574 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1575 1576 if (vsw_port_detach(vswp, inst) != 0) { 1577 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1578 vswp->instance, inst); 1579 } 1580 } 1581 1582 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1583 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1584 resp->match_curr.mdep[idx], 1585 resp->match_prev.mdp, 1586 resp->match_prev.mdep[idx]); 1587 } 1588 1589 D1(vswp, "%s: exit", __func__); 1590 1591 return (MDEG_SUCCESS); 1592 } 1593 1594 /* 1595 * Scan the machine description for this instance of vsw 1596 * and read its properties. Called only from vsw_attach(). 1597 * Returns: 0 on success, 1 on failure. 1598 */ 1599 static int 1600 vsw_read_mdprops(vsw_t *vswp) 1601 { 1602 md_t *mdp = NULL; 1603 mde_cookie_t rootnode; 1604 mde_cookie_t *listp = NULL; 1605 uint64_t inst; 1606 uint64_t cfgh; 1607 char *name; 1608 int rv = 1; 1609 int num_nodes = 0; 1610 int num_devs = 0; 1611 int listsz = 0; 1612 int i; 1613 1614 /* 1615 * In each 'virtual-device' node in the MD there is a 1616 * 'cfg-handle' property which is the MD's concept of 1617 * an instance number (this may be completely different from 1618 * the device drivers instance #). OBP reads that value and 1619 * stores it in the 'reg' property of the appropriate node in 1620 * the device tree. We first read this reg property and use this 1621 * to compare against the 'cfg-handle' property of vsw nodes 1622 * in MD to get to this specific vsw instance and then read 1623 * other properties that we are interested in. 1624 * We also cache the value of 'reg' property and use it later 1625 * to register callbacks with mdeg (see vsw_mdeg_register()) 1626 */ 1627 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1628 DDI_PROP_DONTPASS, reg_propname, -1); 1629 if (inst == -1) { 1630 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1631 "OBP device tree", vswp->instance, reg_propname); 1632 return (rv); 1633 } 1634 1635 vswp->regprop = inst; 1636 1637 if ((mdp = md_get_handle()) == NULL) { 1638 DWARN(vswp, "%s: cannot init MD\n", __func__); 1639 return (rv); 1640 } 1641 1642 num_nodes = md_node_count(mdp); 1643 ASSERT(num_nodes > 0); 1644 1645 listsz = num_nodes * sizeof (mde_cookie_t); 1646 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1647 1648 rootnode = md_root_node(mdp); 1649 1650 /* search for all "virtual_device" nodes */ 1651 num_devs = md_scan_dag(mdp, rootnode, 1652 md_find_name(mdp, vdev_propname), 1653 md_find_name(mdp, "fwd"), listp); 1654 if (num_devs <= 0) { 1655 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1656 goto vsw_readmd_exit; 1657 } 1658 1659 /* 1660 * Now loop through the list of virtual-devices looking for 1661 * devices with name "virtual-network-switch" and for each 1662 * such device compare its instance with what we have from 1663 * the 'reg' property to find the right node in MD and then 1664 * read all its properties. 1665 */ 1666 for (i = 0; i < num_devs; i++) { 1667 1668 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1669 DWARN(vswp, "%s: name property not found\n", 1670 __func__); 1671 goto vsw_readmd_exit; 1672 } 1673 1674 /* is this a virtual-network-switch? */ 1675 if (strcmp(name, vsw_propname) != 0) 1676 continue; 1677 1678 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1679 DWARN(vswp, "%s: cfg-handle property not found\n", 1680 __func__); 1681 goto vsw_readmd_exit; 1682 } 1683 1684 /* is this the required instance of vsw? */ 1685 if (inst != cfgh) 1686 continue; 1687 1688 /* now read all properties of this vsw instance */ 1689 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1690 break; 1691 } 1692 1693 vsw_readmd_exit: 1694 1695 kmem_free(listp, listsz); 1696 (void) md_fini_handle(mdp); 1697 return (rv); 1698 } 1699 1700 /* 1701 * Read the initial start-of-day values from the specified MD node. 1702 */ 1703 static int 1704 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1705 { 1706 int i; 1707 uint64_t macaddr = 0; 1708 1709 D1(vswp, "%s: enter", __func__); 1710 1711 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1712 return (1); 1713 } 1714 1715 /* mac address for vswitch device itself */ 1716 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1717 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1718 vswp->instance); 1719 return (1); 1720 } 1721 1722 vsw_save_lmacaddr(vswp, macaddr); 1723 1724 if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) { 1725 DWARN(vswp, "%s: Unable to read %s property from MD, " 1726 "defaulting to 'switched' mode", 1727 __func__, smode_propname); 1728 1729 for (i = 0; i < NUM_SMODES; i++) 1730 vswp->smode[i] = VSW_LAYER2; 1731 1732 vswp->smode_num = NUM_SMODES; 1733 } else { 1734 ASSERT(vswp->smode_num != 0); 1735 } 1736 1737 /* read vlan id properties of this vsw instance */ 1738 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1739 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1740 1741 /* read priority-ether-types */ 1742 vsw_read_pri_eth_types(vswp, mdp, node); 1743 1744 D1(vswp, "%s: exit", __func__); 1745 return (0); 1746 } 1747 1748 /* 1749 * Read vlan id properties of the given MD node. 1750 * Arguments: 1751 * arg: device argument(vsw device or a port) 1752 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1753 * mdp: machine description 1754 * node: md node cookie 1755 * 1756 * Returns: 1757 * pvidp: port-vlan-id of the node 1758 * vidspp: list of vlan-ids of the node 1759 * nvidsp: # of vlan-ids in the list 1760 * default_idp: default-vlan-id of the node(if node is vsw device) 1761 */ 1762 static void 1763 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1764 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, 1765 uint16_t *default_idp) 1766 { 1767 vsw_t *vswp; 1768 vsw_port_t *portp; 1769 char *pvid_propname; 1770 char *vid_propname; 1771 uint_t nvids = 0; 1772 uint32_t vids_size; 1773 int rv; 1774 int i; 1775 uint64_t *data; 1776 uint64_t val; 1777 int size; 1778 int inst; 1779 1780 if (type == VSW_LOCALDEV) { 1781 1782 vswp = (vsw_t *)arg; 1783 pvid_propname = vsw_pvid_propname; 1784 vid_propname = vsw_vid_propname; 1785 inst = vswp->instance; 1786 1787 } else if (type == VSW_VNETPORT) { 1788 1789 portp = (vsw_port_t *)arg; 1790 vswp = portp->p_vswp; 1791 pvid_propname = port_pvid_propname; 1792 vid_propname = port_vid_propname; 1793 inst = portp->p_instance; 1794 1795 } else { 1796 return; 1797 } 1798 1799 if (type == VSW_LOCALDEV && default_idp != NULL) { 1800 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1801 if (rv != 0) { 1802 DWARN(vswp, "%s: prop(%s) not found", __func__, 1803 vsw_dvid_propname); 1804 1805 *default_idp = vsw_default_vlan_id; 1806 } else { 1807 *default_idp = val & 0xFFF; 1808 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1809 vsw_dvid_propname, inst, *default_idp); 1810 } 1811 } 1812 1813 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1814 if (rv != 0) { 1815 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1816 *pvidp = vsw_default_vlan_id; 1817 } else { 1818 1819 *pvidp = val & 0xFFF; 1820 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1821 pvid_propname, inst, *pvidp); 1822 } 1823 1824 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1825 &size); 1826 if (rv != 0) { 1827 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1828 size = 0; 1829 } else { 1830 size /= sizeof (uint64_t); 1831 } 1832 nvids = size; 1833 1834 if (nvids != 0) { 1835 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1836 vids_size = sizeof (uint16_t) * nvids; 1837 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1838 for (i = 0; i < nvids; i++) { 1839 (*vidspp)[i] = data[i] & 0xFFFF; 1840 D2(vswp, " %d ", (*vidspp)[i]); 1841 } 1842 D2(vswp, "\n"); 1843 } 1844 1845 *nvidsp = nvids; 1846 } 1847 1848 /* 1849 * This function reads "priority-ether-types" property from md. This property 1850 * is used to enable support for priority frames. Applications which need 1851 * guaranteed and timely delivery of certain high priority frames to/from 1852 * a vnet or vsw within ldoms, should configure this property by providing 1853 * the ether type(s) for which the priority facility is needed. 1854 * Normal data frames are delivered over a ldc channel using the descriptor 1855 * ring mechanism which is constrained by factors such as descriptor ring size, 1856 * the rate at which the ring is processed at the peer ldc end point, etc. 1857 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1858 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1859 * descriptor ring path and enables a more reliable and timely delivery of 1860 * frames to the peer. 1861 */ 1862 static void 1863 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1864 { 1865 int rv; 1866 uint16_t *types; 1867 uint64_t *data; 1868 int size; 1869 int i; 1870 size_t mblk_sz; 1871 1872 rv = md_get_prop_data(mdp, node, pri_types_propname, 1873 (uint8_t **)&data, &size); 1874 if (rv != 0) { 1875 /* 1876 * Property may not exist if we are running pre-ldoms1.1 f/w. 1877 * Check if 'vsw_pri_eth_type' has been set in that case. 1878 */ 1879 if (vsw_pri_eth_type != 0) { 1880 size = sizeof (vsw_pri_eth_type); 1881 data = &vsw_pri_eth_type; 1882 } else { 1883 D3(vswp, "%s: prop(%s) not found", __func__, 1884 pri_types_propname); 1885 size = 0; 1886 } 1887 } 1888 1889 if (size == 0) { 1890 vswp->pri_num_types = 0; 1891 return; 1892 } 1893 1894 /* 1895 * we have some priority-ether-types defined; 1896 * allocate a table of these types and also 1897 * allocate a pool of mblks to transmit these 1898 * priority packets. 1899 */ 1900 size /= sizeof (uint64_t); 1901 vswp->pri_num_types = size; 1902 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1903 for (i = 0, types = vswp->pri_types; i < size; i++) { 1904 types[i] = data[i] & 0xFFFF; 1905 } 1906 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1907 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp); 1908 } 1909 1910 /* 1911 * Check to see if the relevant properties in the specified node have 1912 * changed, and if so take the appropriate action. 1913 * 1914 * If any of the properties are missing or invalid we don't take 1915 * any action, as this function should only be invoked when modifications 1916 * have been made to what we assume is a working configuration, which 1917 * we leave active. 1918 * 1919 * Note it is legal for this routine to be invoked even if none of the 1920 * properties in the port node within the MD have actually changed. 1921 */ 1922 static void 1923 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1924 { 1925 char physname[LIFNAMSIZ]; 1926 char drv[LIFNAMSIZ]; 1927 uint_t ddi_instance; 1928 uint8_t new_smode[NUM_SMODES]; 1929 int i, smode_num = 0; 1930 uint64_t macaddr = 0; 1931 enum {MD_init = 0x1, 1932 MD_physname = 0x2, 1933 MD_macaddr = 0x4, 1934 MD_smode = 0x8, 1935 MD_vlans = 0x10} updated; 1936 int rv; 1937 uint16_t pvid; 1938 uint16_t *vids; 1939 uint16_t nvids; 1940 1941 updated = MD_init; 1942 1943 D1(vswp, "%s: enter", __func__); 1944 1945 /* 1946 * Check if name of physical device in MD has changed. 1947 */ 1948 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1949 /* 1950 * Do basic sanity check on new device name/instance, 1951 * if its non NULL. It is valid for the device name to 1952 * have changed from a non NULL to a NULL value, i.e. 1953 * the vsw is being changed to 'routed' mode. 1954 */ 1955 if ((strlen(physname) != 0) && 1956 (ddi_parse(physname, drv, 1957 &ddi_instance) != DDI_SUCCESS)) { 1958 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 1959 " a valid device name/instance", 1960 vswp->instance, physname); 1961 goto fail_reconf; 1962 } 1963 1964 if (strcmp(physname, vswp->physname)) { 1965 D2(vswp, "%s: device name changed from %s to %s", 1966 __func__, vswp->physname, physname); 1967 1968 updated |= MD_physname; 1969 } else { 1970 D2(vswp, "%s: device name unchanged at %s", 1971 __func__, vswp->physname); 1972 } 1973 } else { 1974 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 1975 "device from updated MD.", vswp->instance); 1976 goto fail_reconf; 1977 } 1978 1979 /* 1980 * Check if MAC address has changed. 1981 */ 1982 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1983 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1984 vswp->instance); 1985 goto fail_reconf; 1986 } else { 1987 uint64_t maddr = macaddr; 1988 READ_ENTER(&vswp->if_lockrw); 1989 for (i = ETHERADDRL - 1; i >= 0; i--) { 1990 if (vswp->if_addr.ether_addr_octet[i] 1991 != (macaddr & 0xFF)) { 1992 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 1993 __func__, i, 1994 vswp->if_addr.ether_addr_octet[i], 1995 (macaddr & 0xFF)); 1996 updated |= MD_macaddr; 1997 macaddr = maddr; 1998 break; 1999 } 2000 macaddr >>= 8; 2001 } 2002 RW_EXIT(&vswp->if_lockrw); 2003 if (updated & MD_macaddr) { 2004 vsw_save_lmacaddr(vswp, macaddr); 2005 } 2006 } 2007 2008 /* 2009 * Check if switching modes have changed. 2010 */ 2011 if (vsw_get_md_smodes(vswp, mdp, node, 2012 new_smode, &smode_num)) { 2013 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2014 vswp->instance, smode_propname); 2015 goto fail_reconf; 2016 } else { 2017 ASSERT(smode_num != 0); 2018 if (smode_num != vswp->smode_num) { 2019 D2(vswp, "%s: number of modes changed from %d to %d", 2020 __func__, vswp->smode_num, smode_num); 2021 } 2022 2023 for (i = 0; i < smode_num; i++) { 2024 if (new_smode[i] != vswp->smode[i]) { 2025 D2(vswp, "%s: mode changed from %d to %d", 2026 __func__, vswp->smode[i], new_smode[i]); 2027 updated |= MD_smode; 2028 break; 2029 } 2030 } 2031 } 2032 2033 /* Read the vlan ids */ 2034 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2035 &nvids, NULL); 2036 2037 /* Determine if there are any vlan id updates */ 2038 if ((pvid != vswp->pvid) || /* pvid changed? */ 2039 (nvids != vswp->nvids) || /* # of vids changed? */ 2040 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2041 bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) { 2042 updated |= MD_vlans; 2043 } 2044 2045 /* 2046 * Now make any changes which are needed... 2047 */ 2048 2049 if (updated & (MD_physname | MD_smode)) { 2050 2051 /* 2052 * Stop any pending timeout to setup switching mode. 2053 */ 2054 vsw_stop_switching_timeout(vswp); 2055 2056 /* Cleanup HybridIO */ 2057 vsw_hio_cleanup(vswp); 2058 2059 /* 2060 * Remove unicst, mcst addrs of vsw interface 2061 * and ports from the physdev. 2062 */ 2063 vsw_unset_addrs(vswp); 2064 2065 /* 2066 * Stop, detach and close the old device.. 2067 */ 2068 WRITE_ENTER(&vswp->mac_rwlock); 2069 2070 vsw_mac_detach(vswp); 2071 vsw_mac_close(vswp); 2072 2073 RW_EXIT(&vswp->mac_rwlock); 2074 2075 /* 2076 * Update phys name. 2077 */ 2078 if (updated & MD_physname) { 2079 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2080 vswp->instance, vswp->physname, physname); 2081 (void) strncpy(vswp->physname, 2082 physname, strlen(physname) + 1); 2083 } 2084 2085 /* 2086 * Update array with the new switch mode values. 2087 */ 2088 if (updated & MD_smode) { 2089 for (i = 0; i < smode_num; i++) 2090 vswp->smode[i] = new_smode[i]; 2091 2092 vswp->smode_num = smode_num; 2093 vswp->smode_idx = 0; 2094 } 2095 2096 /* 2097 * ..and attach, start the new device. 2098 */ 2099 rv = vsw_setup_switching(vswp); 2100 if (rv == EAGAIN) { 2101 /* 2102 * Unable to setup switching mode. 2103 * As the error is EAGAIN, schedule a timeout to retry 2104 * and return. Programming addresses of ports and 2105 * vsw interface will be done when the timeout handler 2106 * completes successfully. 2107 */ 2108 mutex_enter(&vswp->swtmout_lock); 2109 2110 vswp->swtmout_enabled = B_TRUE; 2111 vswp->swtmout_id = 2112 timeout(vsw_setup_switching_timeout, vswp, 2113 (vsw_setup_switching_delay * 2114 drv_usectohz(MICROSEC))); 2115 2116 mutex_exit(&vswp->swtmout_lock); 2117 2118 return; 2119 2120 } else if (rv) { 2121 goto fail_update; 2122 } 2123 2124 /* 2125 * program unicst, mcst addrs of vsw interface 2126 * and ports in the physdev. 2127 */ 2128 vsw_set_addrs(vswp); 2129 2130 /* Start HIO for ports that have already connected */ 2131 vsw_hio_start_ports(vswp); 2132 2133 } else if (updated & MD_macaddr) { 2134 /* 2135 * We enter here if only MD_macaddr is exclusively updated. 2136 * If MD_physname and/or MD_smode are also updated, then 2137 * as part of that, we would have implicitly processed 2138 * MD_macaddr update (above). 2139 */ 2140 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2141 vswp->instance, macaddr); 2142 2143 READ_ENTER(&vswp->if_lockrw); 2144 if (vswp->if_state & VSW_IF_UP) { 2145 2146 mutex_enter(&vswp->hw_lock); 2147 /* 2148 * Remove old mac address of vsw interface 2149 * from the physdev 2150 */ 2151 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 2152 /* 2153 * Program new mac address of vsw interface 2154 * in the physdev 2155 */ 2156 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 2157 mutex_exit(&vswp->hw_lock); 2158 if (rv != 0) { 2159 cmn_err(CE_NOTE, 2160 "!vsw%d: failed to program interface " 2161 "unicast address\n", vswp->instance); 2162 } 2163 /* 2164 * Notify the MAC layer of the changed address. 2165 */ 2166 mac_unicst_update(vswp->if_mh, 2167 (uint8_t *)&vswp->if_addr); 2168 2169 } 2170 RW_EXIT(&vswp->if_lockrw); 2171 2172 } 2173 2174 if (updated & MD_vlans) { 2175 /* Remove existing vlan ids from the hash table. */ 2176 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2177 2178 /* save the new vlan ids */ 2179 vswp->pvid = pvid; 2180 if (vswp->nvids != 0) { 2181 kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids); 2182 vswp->nvids = 0; 2183 } 2184 if (nvids != 0) { 2185 vswp->nvids = nvids; 2186 vswp->vids = vids; 2187 } 2188 2189 /* add these new vlan ids into hash table */ 2190 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2191 } else { 2192 if (nvids != 0) { 2193 kmem_free(vids, sizeof (uint16_t) * nvids); 2194 } 2195 } 2196 2197 return; 2198 2199 fail_reconf: 2200 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2201 return; 2202 2203 fail_update: 2204 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2205 vswp->instance); 2206 } 2207 2208 /* 2209 * Read the port's md properties. 2210 */ 2211 static int 2212 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2213 md_t *mdp, mde_cookie_t *node) 2214 { 2215 uint64_t ldc_id; 2216 uint8_t *addrp; 2217 int i, addrsz; 2218 int num_nodes = 0, nchan = 0; 2219 int listsz = 0; 2220 mde_cookie_t *listp = NULL; 2221 struct ether_addr ea; 2222 uint64_t macaddr; 2223 uint64_t inst = 0; 2224 uint64_t val; 2225 2226 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2227 DWARN(vswp, "%s: prop(%s) not found", __func__, 2228 id_propname); 2229 return (1); 2230 } 2231 2232 /* 2233 * Find the channel endpoint node(s) (which should be under this 2234 * port node) which contain the channel id(s). 2235 */ 2236 if ((num_nodes = md_node_count(mdp)) <= 0) { 2237 DERR(vswp, "%s: invalid number of nodes found (%d)", 2238 __func__, num_nodes); 2239 return (1); 2240 } 2241 2242 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2243 2244 /* allocate enough space for node list */ 2245 listsz = num_nodes * sizeof (mde_cookie_t); 2246 listp = kmem_zalloc(listsz, KM_SLEEP); 2247 2248 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2249 md_find_name(mdp, "fwd"), listp); 2250 2251 if (nchan <= 0) { 2252 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2253 kmem_free(listp, listsz); 2254 return (1); 2255 } 2256 2257 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2258 2259 /* use property from first node found */ 2260 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2261 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2262 id_propname); 2263 kmem_free(listp, listsz); 2264 return (1); 2265 } 2266 2267 /* don't need list any more */ 2268 kmem_free(listp, listsz); 2269 2270 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2271 2272 /* read mac-address property */ 2273 if (md_get_prop_data(mdp, *node, remaddr_propname, 2274 &addrp, &addrsz)) { 2275 DWARN(vswp, "%s: prop(%s) not found", 2276 __func__, remaddr_propname); 2277 return (1); 2278 } 2279 2280 if (addrsz < ETHERADDRL) { 2281 DWARN(vswp, "%s: invalid address size", __func__); 2282 return (1); 2283 } 2284 2285 macaddr = *((uint64_t *)addrp); 2286 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2287 2288 for (i = ETHERADDRL - 1; i >= 0; i--) { 2289 ea.ether_addr_octet[i] = macaddr & 0xFF; 2290 macaddr >>= 8; 2291 } 2292 2293 /* now update all properties into the port */ 2294 portp->p_vswp = vswp; 2295 portp->p_instance = inst; 2296 portp->addr_set = VSW_ADDR_UNSET; 2297 ether_copy(&ea, &portp->p_macaddr); 2298 if (nchan > VSW_PORT_MAX_LDCS) { 2299 D2(vswp, "%s: using first of %d ldc ids", 2300 __func__, nchan); 2301 nchan = VSW_PORT_MAX_LDCS; 2302 } 2303 portp->num_ldcs = nchan; 2304 portp->ldc_ids = 2305 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2306 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2307 2308 /* read vlan id properties of this port node */ 2309 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2310 &portp->vids, &portp->nvids, NULL); 2311 2312 /* Check if hybrid property is present */ 2313 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2314 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2315 portp->p_hio_enabled = B_TRUE; 2316 } else { 2317 portp->p_hio_enabled = B_FALSE; 2318 } 2319 /* 2320 * Port hio capability determined after version 2321 * negotiation, i.e., when we know the peer is HybridIO capable. 2322 */ 2323 portp->p_hio_capable = B_FALSE; 2324 return (0); 2325 } 2326 2327 /* 2328 * Add a new port to the system. 2329 * 2330 * Returns 0 on success, 1 on failure. 2331 */ 2332 int 2333 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2334 { 2335 vsw_port_t *portp; 2336 int rv; 2337 2338 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2339 2340 rv = vsw_port_read_props(portp, vswp, mdp, node); 2341 if (rv != 0) { 2342 kmem_free(portp, sizeof (*portp)); 2343 return (1); 2344 } 2345 2346 rv = vsw_port_attach(portp); 2347 if (rv != 0) { 2348 DERR(vswp, "%s: failed to attach port", __func__); 2349 return (1); 2350 } 2351 2352 return (0); 2353 } 2354 2355 static int 2356 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2357 md_t *prev_mdp, mde_cookie_t prev_mdex) 2358 { 2359 uint64_t cport_num; 2360 uint64_t pport_num; 2361 vsw_port_list_t *plistp; 2362 vsw_port_t *portp; 2363 boolean_t updated_vlans = B_FALSE; 2364 uint16_t pvid; 2365 uint16_t *vids; 2366 uint16_t nvids; 2367 uint64_t val; 2368 boolean_t hio_enabled = B_FALSE; 2369 2370 /* 2371 * For now, we get port updates only if vlan ids changed. 2372 * We read the port num and do some sanity check. 2373 */ 2374 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2375 return (1); 2376 } 2377 2378 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2379 return (1); 2380 } 2381 if (cport_num != pport_num) 2382 return (1); 2383 2384 plistp = &(vswp->plist); 2385 2386 READ_ENTER(&plistp->lockrw); 2387 2388 portp = vsw_lookup_port(vswp, cport_num); 2389 if (portp == NULL) { 2390 RW_EXIT(&plistp->lockrw); 2391 return (1); 2392 } 2393 2394 /* Read the vlan ids */ 2395 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2396 &vids, &nvids, NULL); 2397 2398 /* Determine if there are any vlan id updates */ 2399 if ((pvid != portp->pvid) || /* pvid changed? */ 2400 (nvids != portp->nvids) || /* # of vids changed? */ 2401 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2402 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) { 2403 updated_vlans = B_TRUE; 2404 } 2405 2406 if (updated_vlans == B_TRUE) { 2407 2408 /* Remove existing vlan ids from the hash table. */ 2409 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2410 2411 /* save the new vlan ids */ 2412 portp->pvid = pvid; 2413 if (portp->nvids != 0) { 2414 kmem_free(portp->vids, 2415 sizeof (uint16_t) * portp->nvids); 2416 portp->nvids = 0; 2417 } 2418 if (nvids != 0) { 2419 portp->vids = kmem_zalloc(sizeof (uint16_t) * 2420 nvids, KM_SLEEP); 2421 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids); 2422 portp->nvids = nvids; 2423 kmem_free(vids, sizeof (uint16_t) * nvids); 2424 } 2425 2426 /* add these new vlan ids into hash table */ 2427 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2428 2429 /* reset the port if it is vlan unaware (ver < 1.3) */ 2430 vsw_vlan_unaware_port_reset(portp); 2431 } 2432 2433 /* Check if hybrid property is present */ 2434 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2435 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2436 hio_enabled = B_TRUE; 2437 } 2438 2439 if (portp->p_hio_enabled != hio_enabled) { 2440 vsw_hio_port_update(portp, hio_enabled); 2441 } 2442 2443 RW_EXIT(&plistp->lockrw); 2444 2445 return (0); 2446 } 2447 2448 /* 2449 * vsw_mac_rx -- A common function to send packets to the interface. 2450 * By default this function check if the interface is UP or not, the 2451 * rest of the behaviour depends on the flags as below: 2452 * 2453 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2454 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2455 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2456 */ 2457 void 2458 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2459 mblk_t *mp, vsw_macrx_flags_t flags) 2460 { 2461 mblk_t *mpt; 2462 2463 D1(vswp, "%s:enter\n", __func__); 2464 READ_ENTER(&vswp->if_lockrw); 2465 /* Check if the interface is up */ 2466 if (!(vswp->if_state & VSW_IF_UP)) { 2467 RW_EXIT(&vswp->if_lockrw); 2468 /* Free messages only if FREEMSG flag specified */ 2469 if (flags & VSW_MACRX_FREEMSG) { 2470 freemsgchain(mp); 2471 } 2472 D1(vswp, "%s:exit\n", __func__); 2473 return; 2474 } 2475 /* 2476 * If PROMISC flag is passed, then check if 2477 * the interface is in the PROMISC mode. 2478 * If not, drop the messages. 2479 */ 2480 if (flags & VSW_MACRX_PROMISC) { 2481 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2482 RW_EXIT(&vswp->if_lockrw); 2483 /* Free messages only if FREEMSG flag specified */ 2484 if (flags & VSW_MACRX_FREEMSG) { 2485 freemsgchain(mp); 2486 } 2487 D1(vswp, "%s:exit\n", __func__); 2488 return; 2489 } 2490 } 2491 RW_EXIT(&vswp->if_lockrw); 2492 /* 2493 * If COPYMSG flag is passed, then make a copy 2494 * of the message chain and send up the copy. 2495 */ 2496 if (flags & VSW_MACRX_COPYMSG) { 2497 mp = copymsgchain(mp); 2498 if (mp == NULL) { 2499 D1(vswp, "%s:exit\n", __func__); 2500 return; 2501 } 2502 } 2503 2504 D2(vswp, "%s: sending up stack", __func__); 2505 2506 mpt = NULL; 2507 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2508 if (mp != NULL) { 2509 mac_rx(vswp->if_mh, mrh, mp); 2510 } 2511 D1(vswp, "%s:exit\n", __func__); 2512 } 2513 2514 /* copy mac address of vsw into soft state structure */ 2515 static void 2516 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2517 { 2518 int i; 2519 2520 WRITE_ENTER(&vswp->if_lockrw); 2521 for (i = ETHERADDRL - 1; i >= 0; i--) { 2522 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2523 macaddr >>= 8; 2524 } 2525 RW_EXIT(&vswp->if_lockrw); 2526 } 2527