1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* 78 * Function prototypes. 79 */ 80 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 81 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 82 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 84 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 85 86 /* MDEG routines */ 87 static int vsw_mdeg_register(vsw_t *vswp); 88 static void vsw_mdeg_unregister(vsw_t *vswp); 89 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 91 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 92 static int vsw_read_mdprops(vsw_t *vswp); 93 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 94 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, 95 uint16_t *nvidsp, uint16_t *default_idp); 96 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 97 md_t *mdp, mde_cookie_t *node); 98 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 99 mde_cookie_t node); 100 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 102 103 /* Mac driver related routines */ 104 static int vsw_mac_register(vsw_t *); 105 static int vsw_mac_unregister(vsw_t *); 106 static int vsw_m_stat(void *, uint_t, uint64_t *); 107 static void vsw_m_stop(void *arg); 108 static int vsw_m_start(void *arg); 109 static int vsw_m_unicst(void *arg, const uint8_t *); 110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 111 static int vsw_m_promisc(void *arg, boolean_t); 112 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 114 mblk_t *mp, vsw_macrx_flags_t flags); 115 116 /* 117 * Functions imported from other files. 118 */ 119 extern void vsw_setup_switching_timeout(void *arg); 120 extern void vsw_stop_switching_timeout(vsw_t *vswp); 121 extern int vsw_setup_switching(vsw_t *); 122 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 123 vsw_port_t *port, mac_resource_handle_t mrh); 124 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 125 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 126 extern void vsw_del_mcst_vsw(vsw_t *); 127 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 128 extern int vsw_detach_ports(vsw_t *vswp); 129 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 130 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 131 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 132 md_t *prev_mdp, mde_cookie_t prev_mdex); 133 extern int vsw_port_attach(vsw_port_t *port); 134 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 135 extern int vsw_mac_attach(vsw_t *vswp); 136 extern void vsw_mac_detach(vsw_t *vswp); 137 extern int vsw_mac_open(vsw_t *vswp); 138 extern void vsw_mac_close(vsw_t *vswp); 139 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 140 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 141 extern void vsw_reconfig_hw(vsw_t *); 142 extern void vsw_unset_addrs(vsw_t *vswp); 143 extern void vsw_set_addrs(vsw_t *vswp); 144 extern void vsw_create_vlans(void *arg, int type); 145 extern void vsw_destroy_vlans(void *arg, int type); 146 extern void vsw_vlan_add_ids(void *arg, int type); 147 extern void vsw_vlan_remove_ids(void *arg, int type); 148 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 149 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 150 mblk_t **npt); 151 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 152 extern void vsw_hio_cleanup(vsw_t *vswp); 153 extern void vsw_hio_start_ports(vsw_t *vswp); 154 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 155 156 /* 157 * Internal tunables. 158 */ 159 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 160 int vsw_wretries = 100; /* # of write attempts */ 161 int vsw_desc_delay = 0; /* delay in us */ 162 int vsw_read_attempts = 5; /* # of reads of descriptor */ 163 int vsw_mac_open_retries = 20; /* max # of mac_open() retries */ 164 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 165 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 166 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 167 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 168 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 169 170 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 171 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 172 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 173 174 /* sw timeout for boot delay only, in milliseconds */ 175 int vsw_setup_switching_boot_delay = 100 * MILLISEC; 176 177 /* delay in usec to wait for all references on a fdb entry to be dropped */ 178 uint32_t vsw_fdbe_refcnt_delay = 10; 179 180 /* 181 * Default vlan id. This is only used internally when the "default-vlan-id" 182 * property is not present in the MD device node. Therefore, this should not be 183 * used as a tunable; if this value is changed, the corresponding variable 184 * should be updated to the same value in all vnets connected to this vsw. 185 */ 186 uint16_t vsw_default_vlan_id = 1; 187 188 /* 189 * Workaround for a version handshake bug in obp's vnet. 190 * If vsw initiates version negotiation starting from the highest version, 191 * obp sends a nack and terminates version handshake. To workaround 192 * this, we do not initiate version handshake when the channel comes up. 193 * Instead, we wait for the peer to send its version info msg and go through 194 * the version protocol exchange. If we successfully negotiate a version, 195 * before sending the ack, we send our version info msg to the peer 196 * using the <major,minor> version that we are about to ack. 197 */ 198 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 199 200 /* 201 * In the absence of "priority-ether-types" property in MD, the following 202 * internal tunable can be set to specify a single priority ethertype. 203 */ 204 uint64_t vsw_pri_eth_type = 0; 205 206 /* 207 * Number of transmit priority buffers that are preallocated per device. 208 * This number is chosen to be a small value to throttle transmission 209 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 210 */ 211 uint32_t vsw_pri_tx_nmblks = 64; 212 213 boolean_t vsw_hio_enabled = B_TRUE; /* Enable/disable HybridIO */ 214 int vsw_hio_max_cleanup_retries = 10; /* Max retries for HybridIO cleanp */ 215 int vsw_hio_cleanup_delay = 10000; /* 10ms */ 216 217 /* 218 * External tunables. 219 */ 220 /* 221 * Enable/disable thread per ring. This is a mode selection 222 * that is done a vsw driver attach time. 223 */ 224 boolean_t vsw_multi_ring_enable = B_FALSE; 225 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 226 227 /* Number of transmit descriptors - must be power of 2 */ 228 uint32_t vsw_ntxds = VSW_RING_NUM_EL; 229 230 /* 231 * Max number of mblks received in one receive operation. 232 */ 233 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 234 235 /* 236 * Tunables for three different pools, that is, the size and 237 * number of mblks for each pool. 238 */ 239 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128; /* size=128 for pool1 */ 240 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256; /* size=256 for pool2 */ 241 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048; /* size=2048 for pool3 */ 242 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 243 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 244 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 245 246 /* 247 * vsw_max_tx_qcount is the maximum # of packets that can be queued 248 * before the tx worker thread begins processing the queue. Its value 249 * is chosen to be 4x the default length of tx descriptor ring. 250 */ 251 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL; 252 253 /* 254 * MAC callbacks 255 */ 256 static mac_callbacks_t vsw_m_callbacks = { 257 0, 258 vsw_m_stat, 259 vsw_m_start, 260 vsw_m_stop, 261 vsw_m_promisc, 262 vsw_m_multicst, 263 vsw_m_unicst, 264 vsw_m_tx, 265 NULL, 266 NULL, 267 NULL 268 }; 269 270 static struct cb_ops vsw_cb_ops = { 271 nulldev, /* cb_open */ 272 nulldev, /* cb_close */ 273 nodev, /* cb_strategy */ 274 nodev, /* cb_print */ 275 nodev, /* cb_dump */ 276 nodev, /* cb_read */ 277 nodev, /* cb_write */ 278 nodev, /* cb_ioctl */ 279 nodev, /* cb_devmap */ 280 nodev, /* cb_mmap */ 281 nodev, /* cb_segmap */ 282 nochpoll, /* cb_chpoll */ 283 ddi_prop_op, /* cb_prop_op */ 284 NULL, /* cb_stream */ 285 D_MP, /* cb_flag */ 286 CB_REV, /* rev */ 287 nodev, /* int (*cb_aread)() */ 288 nodev /* int (*cb_awrite)() */ 289 }; 290 291 static struct dev_ops vsw_ops = { 292 DEVO_REV, /* devo_rev */ 293 0, /* devo_refcnt */ 294 vsw_getinfo, /* devo_getinfo */ 295 nulldev, /* devo_identify */ 296 nulldev, /* devo_probe */ 297 vsw_attach, /* devo_attach */ 298 vsw_detach, /* devo_detach */ 299 nodev, /* devo_reset */ 300 &vsw_cb_ops, /* devo_cb_ops */ 301 (struct bus_ops *)NULL, /* devo_bus_ops */ 302 ddi_power /* devo_power */ 303 }; 304 305 extern struct mod_ops mod_driverops; 306 static struct modldrv vswmodldrv = { 307 &mod_driverops, 308 "sun4v Virtual Switch", 309 &vsw_ops, 310 }; 311 312 #define LDC_ENTER_LOCK(ldcp) \ 313 mutex_enter(&((ldcp)->ldc_cblock));\ 314 mutex_enter(&((ldcp)->ldc_rxlock));\ 315 mutex_enter(&((ldcp)->ldc_txlock)); 316 #define LDC_EXIT_LOCK(ldcp) \ 317 mutex_exit(&((ldcp)->ldc_txlock));\ 318 mutex_exit(&((ldcp)->ldc_rxlock));\ 319 mutex_exit(&((ldcp)->ldc_cblock)); 320 321 /* Driver soft state ptr */ 322 static void *vsw_state; 323 324 /* 325 * Linked list of "vsw_t" structures - one per instance. 326 */ 327 vsw_t *vsw_head = NULL; 328 krwlock_t vsw_rw; 329 330 /* 331 * Property names 332 */ 333 static char vdev_propname[] = "virtual-device"; 334 static char vsw_propname[] = "virtual-network-switch"; 335 static char physdev_propname[] = "vsw-phys-dev"; 336 static char smode_propname[] = "vsw-switch-mode"; 337 static char macaddr_propname[] = "local-mac-address"; 338 static char remaddr_propname[] = "remote-mac-address"; 339 static char ldcids_propname[] = "ldc-ids"; 340 static char chan_propname[] = "channel-endpoint"; 341 static char id_propname[] = "id"; 342 static char reg_propname[] = "reg"; 343 static char pri_types_propname[] = "priority-ether-types"; 344 static char vsw_pvid_propname[] = "port-vlan-id"; 345 static char vsw_vid_propname[] = "vlan-id"; 346 static char vsw_dvid_propname[] = "default-vlan-id"; 347 static char port_pvid_propname[] = "remote-port-vlan-id"; 348 static char port_vid_propname[] = "remote-vlan-id"; 349 static char hybrid_propname[] = "hybrid"; 350 351 /* 352 * Matching criteria passed to the MDEG to register interest 353 * in changes to 'virtual-device-port' nodes identified by their 354 * 'id' property. 355 */ 356 static md_prop_match_t vport_prop_match[] = { 357 { MDET_PROP_VAL, "id" }, 358 { MDET_LIST_END, NULL } 359 }; 360 361 static mdeg_node_match_t vport_match = { "virtual-device-port", 362 vport_prop_match }; 363 364 /* 365 * Matching criteria passed to the MDEG to register interest 366 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 367 * by their 'name' and 'cfg-handle' properties. 368 */ 369 static md_prop_match_t vdev_prop_match[] = { 370 { MDET_PROP_STR, "name" }, 371 { MDET_PROP_VAL, "cfg-handle" }, 372 { MDET_LIST_END, NULL } 373 }; 374 375 static mdeg_node_match_t vdev_match = { "virtual-device", 376 vdev_prop_match }; 377 378 379 /* 380 * Specification of an MD node passed to the MDEG to filter any 381 * 'vport' nodes that do not belong to the specified node. This 382 * template is copied for each vsw instance and filled in with 383 * the appropriate 'cfg-handle' value before being passed to the MDEG. 384 */ 385 static mdeg_prop_spec_t vsw_prop_template[] = { 386 { MDET_PROP_STR, "name", vsw_propname }, 387 { MDET_PROP_VAL, "cfg-handle", NULL }, 388 { MDET_LIST_END, NULL, NULL } 389 }; 390 391 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 392 393 #ifdef DEBUG 394 /* 395 * Print debug messages - set to 0x1f to enable all msgs 396 * or 0x0 to turn all off. 397 */ 398 int vswdbg = 0x0; 399 400 /* 401 * debug levels: 402 * 0x01: Function entry/exit tracing 403 * 0x02: Internal function messages 404 * 0x04: Verbose internal messages 405 * 0x08: Warning messages 406 * 0x10: Error messages 407 */ 408 409 void 410 vswdebug(vsw_t *vswp, const char *fmt, ...) 411 { 412 char buf[512]; 413 va_list ap; 414 415 va_start(ap, fmt); 416 (void) vsprintf(buf, fmt, ap); 417 va_end(ap); 418 419 if (vswp == NULL) 420 cmn_err(CE_CONT, "%s\n", buf); 421 else 422 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 423 } 424 425 #endif /* DEBUG */ 426 427 static struct modlinkage modlinkage = { 428 MODREV_1, 429 &vswmodldrv, 430 NULL 431 }; 432 433 int 434 _init(void) 435 { 436 int status; 437 438 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 439 440 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 441 if (status != 0) { 442 return (status); 443 } 444 445 mac_init_ops(&vsw_ops, DRV_NAME); 446 status = mod_install(&modlinkage); 447 if (status != 0) { 448 ddi_soft_state_fini(&vsw_state); 449 } 450 return (status); 451 } 452 453 int 454 _fini(void) 455 { 456 int status; 457 458 status = mod_remove(&modlinkage); 459 if (status != 0) 460 return (status); 461 mac_fini_ops(&vsw_ops); 462 ddi_soft_state_fini(&vsw_state); 463 464 rw_destroy(&vsw_rw); 465 466 return (status); 467 } 468 469 int 470 _info(struct modinfo *modinfop) 471 { 472 return (mod_info(&modlinkage, modinfop)); 473 } 474 475 static int 476 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 477 { 478 vsw_t *vswp; 479 int instance; 480 char hashname[MAXNAMELEN]; 481 char qname[TASKQ_NAMELEN]; 482 enum { PROG_init = 0x00, 483 PROG_locks = 0x01, 484 PROG_readmd = 0x02, 485 PROG_fdb = 0x04, 486 PROG_mfdb = 0x08, 487 PROG_taskq = 0x10, 488 PROG_swmode = 0x20, 489 PROG_macreg = 0x40, 490 PROG_mdreg = 0x80} 491 progress; 492 493 progress = PROG_init; 494 int rv; 495 496 switch (cmd) { 497 case DDI_ATTACH: 498 break; 499 case DDI_RESUME: 500 /* nothing to do for this non-device */ 501 return (DDI_SUCCESS); 502 case DDI_PM_RESUME: 503 default: 504 return (DDI_FAILURE); 505 } 506 507 instance = ddi_get_instance(dip); 508 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 509 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 510 return (DDI_FAILURE); 511 } 512 vswp = ddi_get_soft_state(vsw_state, instance); 513 514 if (vswp == NULL) { 515 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 516 goto vsw_attach_fail; 517 } 518 519 vswp->dip = dip; 520 vswp->instance = instance; 521 ddi_set_driver_private(dip, (caddr_t)vswp); 522 523 mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL); 524 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 525 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 526 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 527 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 528 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 529 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 530 531 progress |= PROG_locks; 532 533 rv = vsw_read_mdprops(vswp); 534 if (rv != 0) 535 goto vsw_attach_fail; 536 537 progress |= PROG_readmd; 538 539 /* setup the unicast forwarding database */ 540 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 541 vswp->instance); 542 D2(vswp, "creating unicast hash table (%s)...", hashname); 543 vswp->fdb_nchains = vsw_fdb_nchains; 544 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 545 mod_hash_null_valdtor, sizeof (void *)); 546 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 547 progress |= PROG_fdb; 548 549 /* setup the multicast fowarding database */ 550 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 551 vswp->instance); 552 D2(vswp, "creating multicast hash table %s)...", hashname); 553 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 554 mod_hash_null_valdtor, sizeof (void *)); 555 556 progress |= PROG_mfdb; 557 558 /* 559 * Create the taskq which will process all the VIO 560 * control messages. 561 */ 562 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 563 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 564 TASKQ_DEFAULTPRI, 0)) == NULL) { 565 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 566 vswp->instance); 567 goto vsw_attach_fail; 568 } 569 570 progress |= PROG_taskq; 571 572 /* prevent auto-detaching */ 573 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 574 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 575 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 576 "instance %u", DDI_NO_AUTODETACH, instance); 577 } 578 579 /* 580 * The null switching function is set to avoid panic until 581 * switch mode is setup. 582 */ 583 vswp->vsw_switch_frame = vsw_switch_frame_nop; 584 585 /* 586 * Setup the required switching mode, 587 * based on the mdprops that we read earlier. 588 * schedule a short timeout (0.1 sec) for the first time 589 * setup and avoid calling mac_open() directly here, 590 * others are regular timeout 3 secs. 591 */ 592 mutex_enter(&vswp->swtmout_lock); 593 594 vswp->swtmout_enabled = B_TRUE; 595 vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp, 596 drv_usectohz(vsw_setup_switching_boot_delay)); 597 598 mutex_exit(&vswp->swtmout_lock); 599 600 progress |= PROG_swmode; 601 602 /* Register with mac layer as a provider */ 603 rv = vsw_mac_register(vswp); 604 if (rv != 0) 605 goto vsw_attach_fail; 606 607 progress |= PROG_macreg; 608 609 /* 610 * Now we have everything setup, register an interest in 611 * specific MD nodes. 612 * 613 * The callback is invoked in 2 cases, firstly if upon mdeg 614 * registration there are existing nodes which match our specified 615 * criteria, and secondly if the MD is changed (and again, there 616 * are nodes which we are interested in present within it. Note 617 * that our callback will be invoked even if our specified nodes 618 * have not actually changed). 619 * 620 */ 621 rv = vsw_mdeg_register(vswp); 622 if (rv != 0) 623 goto vsw_attach_fail; 624 625 progress |= PROG_mdreg; 626 627 WRITE_ENTER(&vsw_rw); 628 vswp->next = vsw_head; 629 vsw_head = vswp; 630 RW_EXIT(&vsw_rw); 631 632 ddi_report_dev(vswp->dip); 633 return (DDI_SUCCESS); 634 635 vsw_attach_fail: 636 DERR(NULL, "vsw_attach: failed"); 637 638 if (progress & PROG_mdreg) { 639 vsw_mdeg_unregister(vswp); 640 (void) vsw_detach_ports(vswp); 641 } 642 643 if (progress & PROG_macreg) 644 (void) vsw_mac_unregister(vswp); 645 646 if (progress & PROG_swmode) { 647 vsw_stop_switching_timeout(vswp); 648 vsw_hio_cleanup(vswp); 649 mutex_enter(&vswp->mac_lock); 650 vsw_mac_detach(vswp); 651 vsw_mac_close(vswp); 652 mutex_exit(&vswp->mac_lock); 653 } 654 655 if (progress & PROG_taskq) 656 ddi_taskq_destroy(vswp->taskq_p); 657 658 if (progress & PROG_mfdb) 659 mod_hash_destroy_hash(vswp->mfdb); 660 661 if (progress & PROG_fdb) { 662 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 663 mod_hash_destroy_hash(vswp->fdb_hashp); 664 } 665 666 if (progress & PROG_readmd) { 667 if (VSW_PRI_ETH_DEFINED(vswp)) { 668 kmem_free(vswp->pri_types, 669 sizeof (uint16_t) * vswp->pri_num_types); 670 } 671 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 672 } 673 674 if (progress & PROG_locks) { 675 rw_destroy(&vswp->plist.lockrw); 676 rw_destroy(&vswp->mfdbrw); 677 rw_destroy(&vswp->if_lockrw); 678 mutex_destroy(&vswp->swtmout_lock); 679 mutex_destroy(&vswp->mca_lock); 680 mutex_destroy(&vswp->mac_lock); 681 mutex_destroy(&vswp->hw_lock); 682 } 683 684 ddi_soft_state_free(vsw_state, instance); 685 return (DDI_FAILURE); 686 } 687 688 static int 689 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 690 { 691 vio_mblk_pool_t *poolp, *npoolp; 692 vsw_t **vswpp, *vswp; 693 int instance; 694 695 instance = ddi_get_instance(dip); 696 vswp = ddi_get_soft_state(vsw_state, instance); 697 698 if (vswp == NULL) { 699 return (DDI_FAILURE); 700 } 701 702 switch (cmd) { 703 case DDI_DETACH: 704 break; 705 case DDI_SUSPEND: 706 case DDI_PM_SUSPEND: 707 default: 708 return (DDI_FAILURE); 709 } 710 711 D2(vswp, "detaching instance %d", instance); 712 713 /* Stop any pending timeout to setup switching mode. */ 714 vsw_stop_switching_timeout(vswp); 715 716 if (vswp->if_state & VSW_IF_REG) { 717 if (vsw_mac_unregister(vswp) != 0) { 718 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 719 "MAC layer", vswp->instance); 720 return (DDI_FAILURE); 721 } 722 } 723 724 vsw_mdeg_unregister(vswp); 725 726 /* remove mac layer callback */ 727 mutex_enter(&vswp->mac_lock); 728 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 729 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 730 vswp->mrh = NULL; 731 } 732 mutex_exit(&vswp->mac_lock); 733 734 if (vsw_detach_ports(vswp) != 0) { 735 cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports", 736 vswp->instance); 737 return (DDI_FAILURE); 738 } 739 740 rw_destroy(&vswp->if_lockrw); 741 742 /* cleanup HybridIO */ 743 vsw_hio_cleanup(vswp); 744 745 mutex_destroy(&vswp->hw_lock); 746 747 /* 748 * Now that the ports have been deleted, stop and close 749 * the physical device. 750 */ 751 mutex_enter(&vswp->mac_lock); 752 753 vsw_mac_detach(vswp); 754 vsw_mac_close(vswp); 755 756 mutex_exit(&vswp->mac_lock); 757 758 mutex_destroy(&vswp->mac_lock); 759 mutex_destroy(&vswp->swtmout_lock); 760 761 /* 762 * Destroy any free pools that may still exist. 763 */ 764 poolp = vswp->rxh; 765 while (poolp != NULL) { 766 npoolp = vswp->rxh = poolp->nextp; 767 if (vio_destroy_mblks(poolp) != 0) { 768 vswp->rxh = poolp; 769 return (DDI_FAILURE); 770 } 771 poolp = npoolp; 772 } 773 774 /* 775 * Remove this instance from any entries it may be on in 776 * the hash table by using the list of addresses maintained 777 * in the vsw_t structure. 778 */ 779 vsw_del_mcst_vsw(vswp); 780 781 vswp->mcap = NULL; 782 mutex_destroy(&vswp->mca_lock); 783 784 /* 785 * By now any pending tasks have finished and the underlying 786 * ldc's have been destroyed, so its safe to delete the control 787 * message taskq. 788 */ 789 if (vswp->taskq_p != NULL) 790 ddi_taskq_destroy(vswp->taskq_p); 791 792 /* 793 * At this stage all the data pointers in the hash table 794 * should be NULL, as all the ports have been removed and will 795 * have deleted themselves from the port lists which the data 796 * pointers point to. Hence we can destroy the table using the 797 * default destructors. 798 */ 799 D2(vswp, "vsw_detach: destroying hash tables.."); 800 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 801 mod_hash_destroy_hash(vswp->fdb_hashp); 802 vswp->fdb_hashp = NULL; 803 804 WRITE_ENTER(&vswp->mfdbrw); 805 mod_hash_destroy_hash(vswp->mfdb); 806 vswp->mfdb = NULL; 807 RW_EXIT(&vswp->mfdbrw); 808 rw_destroy(&vswp->mfdbrw); 809 810 /* free pri_types table */ 811 if (VSW_PRI_ETH_DEFINED(vswp)) { 812 kmem_free(vswp->pri_types, 813 sizeof (uint16_t) * vswp->pri_num_types); 814 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 815 } 816 817 ddi_remove_minor_node(dip, NULL); 818 819 rw_destroy(&vswp->plist.lockrw); 820 WRITE_ENTER(&vsw_rw); 821 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 822 if (*vswpp == vswp) { 823 *vswpp = vswp->next; 824 break; 825 } 826 } 827 RW_EXIT(&vsw_rw); 828 ddi_soft_state_free(vsw_state, instance); 829 830 return (DDI_SUCCESS); 831 } 832 833 static int 834 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 835 { 836 _NOTE(ARGUNUSED(dip)) 837 838 vsw_t *vswp = NULL; 839 dev_t dev = (dev_t)arg; 840 int instance; 841 842 instance = getminor(dev); 843 844 switch (infocmd) { 845 case DDI_INFO_DEVT2DEVINFO: 846 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 847 *result = NULL; 848 return (DDI_FAILURE); 849 } 850 *result = vswp->dip; 851 return (DDI_SUCCESS); 852 853 case DDI_INFO_DEVT2INSTANCE: 854 *result = (void *)(uintptr_t)instance; 855 return (DDI_SUCCESS); 856 857 default: 858 *result = NULL; 859 return (DDI_FAILURE); 860 } 861 } 862 863 /* 864 * Get the value of the "vsw-phys-dev" property in the specified 865 * node. This property is the name of the physical device that 866 * the virtual switch will use to talk to the outside world. 867 * 868 * Note it is valid for this property to be NULL (but the property 869 * itself must exist). Callers of this routine should verify that 870 * the value returned is what they expected (i.e. either NULL or non NULL). 871 * 872 * On success returns value of the property in region pointed to by 873 * the 'name' argument, and with return value of 0. Otherwise returns 1. 874 */ 875 static int 876 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 877 { 878 int len = 0; 879 int instance; 880 char *physname = NULL; 881 char *dev; 882 const char *dev_name; 883 char myname[MAXNAMELEN]; 884 885 dev_name = ddi_driver_name(vswp->dip); 886 instance = ddi_get_instance(vswp->dip); 887 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 888 889 if (md_get_prop_data(mdp, node, physdev_propname, 890 (uint8_t **)(&physname), &len) != 0) { 891 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 892 "device(s) from MD", vswp->instance); 893 return (1); 894 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 895 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 896 vswp->instance, physname); 897 return (1); 898 } else if (strcmp(myname, physname) == 0) { 899 /* 900 * Prevent the vswitch from opening itself as the 901 * network device. 902 */ 903 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 904 vswp->instance, physname); 905 return (1); 906 } else { 907 (void) strncpy(name, physname, strlen(physname) + 1); 908 D2(vswp, "%s: using first device specified (%s)", 909 __func__, physname); 910 } 911 912 #ifdef DEBUG 913 /* 914 * As a temporary measure to aid testing we check to see if there 915 * is a vsw.conf file present. If there is we use the value of the 916 * vsw_physname property in the file as the name of the physical 917 * device, overriding the value from the MD. 918 * 919 * There may be multiple devices listed, but for the moment 920 * we just use the first one. 921 */ 922 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 923 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 924 if ((strlen(dev) + 1) > LIFNAMSIZ) { 925 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 926 vswp->instance, dev); 927 ddi_prop_free(dev); 928 return (1); 929 } else { 930 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 931 "config file", vswp->instance, dev); 932 933 (void) strncpy(name, dev, strlen(dev) + 1); 934 } 935 936 ddi_prop_free(dev); 937 } 938 #endif 939 940 return (0); 941 } 942 943 /* 944 * Read the 'vsw-switch-mode' property from the specified MD node. 945 * 946 * Returns 0 on success and the number of modes found in 'found', 947 * otherwise returns 1. 948 */ 949 static int 950 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 951 uint8_t *modes, int *found) 952 { 953 int len = 0; 954 int smode_num = 0; 955 char *smode = NULL; 956 char *curr_mode = NULL; 957 958 D1(vswp, "%s: enter", __func__); 959 960 /* 961 * Get the switch-mode property. The modes are listed in 962 * decreasing order of preference, i.e. prefered mode is 963 * first item in list. 964 */ 965 len = 0; 966 smode_num = 0; 967 if (md_get_prop_data(mdp, node, smode_propname, 968 (uint8_t **)(&smode), &len) != 0) { 969 /* 970 * Unable to get switch-mode property from MD, nothing 971 * more we can do. 972 */ 973 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 974 " from the MD", vswp->instance); 975 *found = 0; 976 return (1); 977 } 978 979 curr_mode = smode; 980 /* 981 * Modes of operation: 982 * 'switched' - layer 2 switching, underlying HW in 983 * programmed mode. 984 * 'promiscuous' - layer 2 switching, underlying HW in 985 * promiscuous mode. 986 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 987 * in non-promiscuous mode. 988 */ 989 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 990 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 991 if (strcmp(curr_mode, "switched") == 0) { 992 modes[smode_num++] = VSW_LAYER2; 993 } else if (strcmp(curr_mode, "promiscuous") == 0) { 994 modes[smode_num++] = VSW_LAYER2_PROMISC; 995 } else if (strcmp(curr_mode, "routed") == 0) { 996 modes[smode_num++] = VSW_LAYER3; 997 } else { 998 DWARN(vswp, "%s: Unknown switch mode %s, " 999 "setting to default 'switched' mode", 1000 __func__, curr_mode); 1001 modes[smode_num++] = VSW_LAYER2; 1002 } 1003 curr_mode += strlen(curr_mode) + 1; 1004 } 1005 *found = smode_num; 1006 1007 D2(vswp, "%s: %d modes found", __func__, smode_num); 1008 1009 D1(vswp, "%s: exit", __func__); 1010 1011 return (0); 1012 } 1013 1014 /* 1015 * Register with the MAC layer as a network device, so we 1016 * can be plumbed if necessary. 1017 */ 1018 static int 1019 vsw_mac_register(vsw_t *vswp) 1020 { 1021 mac_register_t *macp; 1022 int rv; 1023 1024 D1(vswp, "%s: enter", __func__); 1025 1026 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1027 return (EINVAL); 1028 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1029 macp->m_driver = vswp; 1030 macp->m_dip = vswp->dip; 1031 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1032 macp->m_callbacks = &vsw_m_callbacks; 1033 macp->m_min_sdu = 0; 1034 macp->m_max_sdu = vsw_ethermtu; 1035 macp->m_margin = VLAN_TAGSZ; 1036 rv = mac_register(macp, &vswp->if_mh); 1037 mac_free(macp); 1038 if (rv != 0) { 1039 /* 1040 * Treat this as a non-fatal error as we may be 1041 * able to operate in some other mode. 1042 */ 1043 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1044 "a provider with MAC layer", vswp->instance); 1045 return (rv); 1046 } 1047 1048 vswp->if_state |= VSW_IF_REG; 1049 1050 vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header) 1051 + VLAN_TAGSZ; 1052 1053 D1(vswp, "%s: exit", __func__); 1054 1055 return (rv); 1056 } 1057 1058 static int 1059 vsw_mac_unregister(vsw_t *vswp) 1060 { 1061 int rv = 0; 1062 1063 D1(vswp, "%s: enter", __func__); 1064 1065 WRITE_ENTER(&vswp->if_lockrw); 1066 1067 if (vswp->if_state & VSW_IF_REG) { 1068 rv = mac_unregister(vswp->if_mh); 1069 if (rv != 0) { 1070 DWARN(vswp, "%s: unable to unregister from MAC " 1071 "framework", __func__); 1072 1073 RW_EXIT(&vswp->if_lockrw); 1074 D1(vswp, "%s: fail exit", __func__); 1075 return (rv); 1076 } 1077 1078 /* mark i/f as down and unregistered */ 1079 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1080 } 1081 RW_EXIT(&vswp->if_lockrw); 1082 1083 D1(vswp, "%s: exit", __func__); 1084 1085 return (rv); 1086 } 1087 1088 static int 1089 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1090 { 1091 vsw_t *vswp = (vsw_t *)arg; 1092 1093 D1(vswp, "%s: enter", __func__); 1094 1095 mutex_enter(&vswp->mac_lock); 1096 if (vswp->mh == NULL) { 1097 mutex_exit(&vswp->mac_lock); 1098 return (EINVAL); 1099 } 1100 1101 /* return stats from underlying device */ 1102 *val = mac_stat_get(vswp->mh, stat); 1103 1104 mutex_exit(&vswp->mac_lock); 1105 1106 return (0); 1107 } 1108 1109 static void 1110 vsw_m_stop(void *arg) 1111 { 1112 vsw_t *vswp = (vsw_t *)arg; 1113 1114 D1(vswp, "%s: enter", __func__); 1115 1116 WRITE_ENTER(&vswp->if_lockrw); 1117 vswp->if_state &= ~VSW_IF_UP; 1118 RW_EXIT(&vswp->if_lockrw); 1119 1120 mutex_enter(&vswp->hw_lock); 1121 1122 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1123 1124 if (vswp->recfg_reqd) 1125 vsw_reconfig_hw(vswp); 1126 1127 mutex_exit(&vswp->hw_lock); 1128 1129 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1130 } 1131 1132 static int 1133 vsw_m_start(void *arg) 1134 { 1135 vsw_t *vswp = (vsw_t *)arg; 1136 1137 D1(vswp, "%s: enter", __func__); 1138 1139 WRITE_ENTER(&vswp->if_lockrw); 1140 1141 vswp->if_state |= VSW_IF_UP; 1142 1143 if (vswp->switching_setup_done == B_FALSE) { 1144 /* 1145 * If the switching mode has not been setup yet, just 1146 * return. The unicast address will be programmed 1147 * after the physical device is successfully setup by the 1148 * timeout handler. 1149 */ 1150 RW_EXIT(&vswp->if_lockrw); 1151 return (0); 1152 } 1153 1154 /* if in layer2 mode, program unicast address. */ 1155 if (vswp->mh != NULL) { 1156 mutex_enter(&vswp->hw_lock); 1157 (void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1158 mutex_exit(&vswp->hw_lock); 1159 } 1160 1161 RW_EXIT(&vswp->if_lockrw); 1162 1163 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1164 return (0); 1165 } 1166 1167 /* 1168 * Change the local interface address. 1169 * 1170 * Note: we don't support this entry point. The local 1171 * mac address of the switch can only be changed via its 1172 * MD node properties. 1173 */ 1174 static int 1175 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1176 { 1177 _NOTE(ARGUNUSED(arg, macaddr)) 1178 1179 return (DDI_FAILURE); 1180 } 1181 1182 static int 1183 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1184 { 1185 vsw_t *vswp = (vsw_t *)arg; 1186 mcst_addr_t *mcst_p = NULL; 1187 uint64_t addr = 0x0; 1188 int i, ret = 0; 1189 1190 D1(vswp, "%s: enter", __func__); 1191 1192 /* 1193 * Convert address into form that can be used 1194 * as hash table key. 1195 */ 1196 for (i = 0; i < ETHERADDRL; i++) { 1197 addr = (addr << 8) | mca[i]; 1198 } 1199 1200 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1201 1202 if (add) { 1203 D2(vswp, "%s: adding multicast", __func__); 1204 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1205 /* 1206 * Update the list of multicast addresses 1207 * contained within the vsw_t structure to 1208 * include this new one. 1209 */ 1210 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1211 if (mcst_p == NULL) { 1212 DERR(vswp, "%s unable to alloc mem", __func__); 1213 (void) vsw_del_mcst(vswp, 1214 VSW_LOCALDEV, addr, NULL); 1215 return (1); 1216 } 1217 mcst_p->addr = addr; 1218 ether_copy(mca, &mcst_p->mca); 1219 1220 /* 1221 * Call into the underlying driver to program the 1222 * address into HW. 1223 */ 1224 mutex_enter(&vswp->mac_lock); 1225 if (vswp->mh != NULL) { 1226 ret = mac_multicst_add(vswp->mh, mca); 1227 if (ret != 0) { 1228 cmn_err(CE_NOTE, "!vsw%d: unable to " 1229 "add multicast address", 1230 vswp->instance); 1231 mutex_exit(&vswp->mac_lock); 1232 (void) vsw_del_mcst(vswp, 1233 VSW_LOCALDEV, addr, NULL); 1234 kmem_free(mcst_p, sizeof (*mcst_p)); 1235 return (ret); 1236 } 1237 mcst_p->mac_added = B_TRUE; 1238 } 1239 mutex_exit(&vswp->mac_lock); 1240 1241 mutex_enter(&vswp->mca_lock); 1242 mcst_p->nextp = vswp->mcap; 1243 vswp->mcap = mcst_p; 1244 mutex_exit(&vswp->mca_lock); 1245 } else { 1246 cmn_err(CE_NOTE, "!vsw%d: unable to add multicast " 1247 "address", vswp->instance); 1248 } 1249 return (ret); 1250 } 1251 1252 D2(vswp, "%s: removing multicast", __func__); 1253 /* 1254 * Remove the address from the hash table.. 1255 */ 1256 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1257 1258 /* 1259 * ..and then from the list maintained in the 1260 * vsw_t structure. 1261 */ 1262 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1263 ASSERT(mcst_p != NULL); 1264 1265 mutex_enter(&vswp->mac_lock); 1266 if (vswp->mh != NULL && mcst_p->mac_added) { 1267 (void) mac_multicst_remove(vswp->mh, mca); 1268 mcst_p->mac_added = B_FALSE; 1269 } 1270 mutex_exit(&vswp->mac_lock); 1271 kmem_free(mcst_p, sizeof (*mcst_p)); 1272 } 1273 1274 D1(vswp, "%s: exit", __func__); 1275 1276 return (0); 1277 } 1278 1279 static int 1280 vsw_m_promisc(void *arg, boolean_t on) 1281 { 1282 vsw_t *vswp = (vsw_t *)arg; 1283 1284 D1(vswp, "%s: enter", __func__); 1285 1286 WRITE_ENTER(&vswp->if_lockrw); 1287 if (on) 1288 vswp->if_state |= VSW_IF_PROMISC; 1289 else 1290 vswp->if_state &= ~VSW_IF_PROMISC; 1291 RW_EXIT(&vswp->if_lockrw); 1292 1293 D1(vswp, "%s: exit", __func__); 1294 1295 return (0); 1296 } 1297 1298 static mblk_t * 1299 vsw_m_tx(void *arg, mblk_t *mp) 1300 { 1301 vsw_t *vswp = (vsw_t *)arg; 1302 1303 D1(vswp, "%s: enter", __func__); 1304 1305 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1306 1307 if (mp == NULL) { 1308 return (NULL); 1309 } 1310 1311 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1312 1313 D1(vswp, "%s: exit", __func__); 1314 1315 return (NULL); 1316 } 1317 1318 /* 1319 * Register for machine description (MD) updates. 1320 * 1321 * Returns 0 on success, 1 on failure. 1322 */ 1323 static int 1324 vsw_mdeg_register(vsw_t *vswp) 1325 { 1326 mdeg_prop_spec_t *pspecp; 1327 mdeg_node_spec_t *inst_specp; 1328 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1329 size_t templatesz; 1330 int rv; 1331 1332 D1(vswp, "%s: enter", __func__); 1333 1334 /* 1335 * Allocate and initialize a per-instance copy 1336 * of the global property spec array that will 1337 * uniquely identify this vsw instance. 1338 */ 1339 templatesz = sizeof (vsw_prop_template); 1340 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1341 1342 bcopy(vsw_prop_template, pspecp, templatesz); 1343 1344 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1345 1346 /* initialize the complete prop spec structure */ 1347 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1348 inst_specp->namep = "virtual-device"; 1349 inst_specp->specp = pspecp; 1350 1351 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1352 vswp->regprop); 1353 /* 1354 * Register an interest in 'virtual-device' nodes with a 1355 * 'name' property of 'virtual-network-switch' 1356 */ 1357 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1358 (void *)vswp, &mdeg_hdl); 1359 if (rv != MDEG_SUCCESS) { 1360 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1361 __func__, rv); 1362 goto mdeg_reg_fail; 1363 } 1364 1365 /* 1366 * Register an interest in 'vsw-port' nodes. 1367 */ 1368 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1369 (void *)vswp, &mdeg_port_hdl); 1370 if (rv != MDEG_SUCCESS) { 1371 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1372 (void) mdeg_unregister(mdeg_hdl); 1373 goto mdeg_reg_fail; 1374 } 1375 1376 /* save off data that will be needed later */ 1377 vswp->inst_spec = inst_specp; 1378 vswp->mdeg_hdl = mdeg_hdl; 1379 vswp->mdeg_port_hdl = mdeg_port_hdl; 1380 1381 D1(vswp, "%s: exit", __func__); 1382 return (0); 1383 1384 mdeg_reg_fail: 1385 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1386 vswp->instance); 1387 kmem_free(pspecp, templatesz); 1388 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1389 1390 vswp->mdeg_hdl = NULL; 1391 vswp->mdeg_port_hdl = NULL; 1392 1393 return (1); 1394 } 1395 1396 static void 1397 vsw_mdeg_unregister(vsw_t *vswp) 1398 { 1399 D1(vswp, "vsw_mdeg_unregister: enter"); 1400 1401 if (vswp->mdeg_hdl != NULL) 1402 (void) mdeg_unregister(vswp->mdeg_hdl); 1403 1404 if (vswp->mdeg_port_hdl != NULL) 1405 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1406 1407 if (vswp->inst_spec != NULL) { 1408 if (vswp->inst_spec->specp != NULL) { 1409 (void) kmem_free(vswp->inst_spec->specp, 1410 sizeof (vsw_prop_template)); 1411 vswp->inst_spec->specp = NULL; 1412 } 1413 1414 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1415 vswp->inst_spec = NULL; 1416 } 1417 1418 D1(vswp, "vsw_mdeg_unregister: exit"); 1419 } 1420 1421 /* 1422 * Mdeg callback invoked for the vsw node itself. 1423 */ 1424 static int 1425 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1426 { 1427 vsw_t *vswp; 1428 md_t *mdp; 1429 mde_cookie_t node; 1430 uint64_t inst; 1431 char *node_name = NULL; 1432 1433 if (resp == NULL) 1434 return (MDEG_FAILURE); 1435 1436 vswp = (vsw_t *)cb_argp; 1437 1438 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1439 " : prev matched %d", __func__, resp->added.nelem, 1440 resp->removed.nelem, resp->match_curr.nelem, 1441 resp->match_prev.nelem); 1442 1443 /* 1444 * We get an initial callback for this node as 'added' 1445 * after registering with mdeg. Note that we would have 1446 * already gathered information about this vsw node by 1447 * walking MD earlier during attach (in vsw_read_mdprops()). 1448 * So, there is a window where the properties of this 1449 * node might have changed when we get this initial 'added' 1450 * callback. We handle this as if an update occured 1451 * and invoke the same function which handles updates to 1452 * the properties of this vsw-node if any. 1453 * 1454 * A non-zero 'match' value indicates that the MD has been 1455 * updated and that a virtual-network-switch node is 1456 * present which may or may not have been updated. It is 1457 * up to the clients to examine their own nodes and 1458 * determine if they have changed. 1459 */ 1460 if (resp->added.nelem != 0) { 1461 1462 if (resp->added.nelem != 1) { 1463 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1464 "invalid: %d\n", vswp->instance, resp->added.nelem); 1465 return (MDEG_FAILURE); 1466 } 1467 1468 mdp = resp->added.mdp; 1469 node = resp->added.mdep[0]; 1470 1471 } else if (resp->match_curr.nelem != 0) { 1472 1473 if (resp->match_curr.nelem != 1) { 1474 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1475 "invalid: %d\n", vswp->instance, 1476 resp->match_curr.nelem); 1477 return (MDEG_FAILURE); 1478 } 1479 1480 mdp = resp->match_curr.mdp; 1481 node = resp->match_curr.mdep[0]; 1482 1483 } else { 1484 return (MDEG_FAILURE); 1485 } 1486 1487 /* Validate name and instance */ 1488 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1489 DERR(vswp, "%s: unable to get node name\n", __func__); 1490 return (MDEG_FAILURE); 1491 } 1492 1493 /* is this a virtual-network-switch? */ 1494 if (strcmp(node_name, vsw_propname) != 0) { 1495 DERR(vswp, "%s: Invalid node name: %s\n", 1496 __func__, node_name); 1497 return (MDEG_FAILURE); 1498 } 1499 1500 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1501 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1502 __func__); 1503 return (MDEG_FAILURE); 1504 } 1505 1506 /* is this the right instance of vsw? */ 1507 if (inst != vswp->regprop) { 1508 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1509 __func__, inst); 1510 return (MDEG_FAILURE); 1511 } 1512 1513 vsw_update_md_prop(vswp, mdp, node); 1514 1515 return (MDEG_SUCCESS); 1516 } 1517 1518 /* 1519 * Mdeg callback invoked for changes to the vsw-port nodes 1520 * under the vsw node. 1521 */ 1522 static int 1523 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1524 { 1525 vsw_t *vswp; 1526 int idx; 1527 md_t *mdp; 1528 mde_cookie_t node; 1529 uint64_t inst; 1530 int rv; 1531 1532 if ((resp == NULL) || (cb_argp == NULL)) 1533 return (MDEG_FAILURE); 1534 1535 vswp = (vsw_t *)cb_argp; 1536 1537 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1538 " : prev matched %d", __func__, resp->added.nelem, 1539 resp->removed.nelem, resp->match_curr.nelem, 1540 resp->match_prev.nelem); 1541 1542 /* process added ports */ 1543 for (idx = 0; idx < resp->added.nelem; idx++) { 1544 mdp = resp->added.mdp; 1545 node = resp->added.mdep[idx]; 1546 1547 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1548 1549 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1550 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1551 "(0x%lx), err=%d", vswp->instance, node, rv); 1552 } 1553 } 1554 1555 /* process removed ports */ 1556 for (idx = 0; idx < resp->removed.nelem; idx++) { 1557 mdp = resp->removed.mdp; 1558 node = resp->removed.mdep[idx]; 1559 1560 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1561 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1562 __func__, id_propname, idx); 1563 continue; 1564 } 1565 1566 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1567 1568 if (vsw_port_detach(vswp, inst) != 0) { 1569 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1570 vswp->instance, inst); 1571 } 1572 } 1573 1574 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1575 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1576 resp->match_curr.mdep[idx], 1577 resp->match_prev.mdp, 1578 resp->match_prev.mdep[idx]); 1579 } 1580 1581 D1(vswp, "%s: exit", __func__); 1582 1583 return (MDEG_SUCCESS); 1584 } 1585 1586 /* 1587 * Scan the machine description for this instance of vsw 1588 * and read its properties. Called only from vsw_attach(). 1589 * Returns: 0 on success, 1 on failure. 1590 */ 1591 static int 1592 vsw_read_mdprops(vsw_t *vswp) 1593 { 1594 md_t *mdp = NULL; 1595 mde_cookie_t rootnode; 1596 mde_cookie_t *listp = NULL; 1597 uint64_t inst; 1598 uint64_t cfgh; 1599 char *name; 1600 int rv = 1; 1601 int num_nodes = 0; 1602 int num_devs = 0; 1603 int listsz = 0; 1604 int i; 1605 1606 /* 1607 * In each 'virtual-device' node in the MD there is a 1608 * 'cfg-handle' property which is the MD's concept of 1609 * an instance number (this may be completely different from 1610 * the device drivers instance #). OBP reads that value and 1611 * stores it in the 'reg' property of the appropriate node in 1612 * the device tree. We first read this reg property and use this 1613 * to compare against the 'cfg-handle' property of vsw nodes 1614 * in MD to get to this specific vsw instance and then read 1615 * other properties that we are interested in. 1616 * We also cache the value of 'reg' property and use it later 1617 * to register callbacks with mdeg (see vsw_mdeg_register()) 1618 */ 1619 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1620 DDI_PROP_DONTPASS, reg_propname, -1); 1621 if (inst == -1) { 1622 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1623 "OBP device tree", vswp->instance, reg_propname); 1624 return (rv); 1625 } 1626 1627 vswp->regprop = inst; 1628 1629 if ((mdp = md_get_handle()) == NULL) { 1630 DWARN(vswp, "%s: cannot init MD\n", __func__); 1631 return (rv); 1632 } 1633 1634 num_nodes = md_node_count(mdp); 1635 ASSERT(num_nodes > 0); 1636 1637 listsz = num_nodes * sizeof (mde_cookie_t); 1638 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1639 1640 rootnode = md_root_node(mdp); 1641 1642 /* search for all "virtual_device" nodes */ 1643 num_devs = md_scan_dag(mdp, rootnode, 1644 md_find_name(mdp, vdev_propname), 1645 md_find_name(mdp, "fwd"), listp); 1646 if (num_devs <= 0) { 1647 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1648 goto vsw_readmd_exit; 1649 } 1650 1651 /* 1652 * Now loop through the list of virtual-devices looking for 1653 * devices with name "virtual-network-switch" and for each 1654 * such device compare its instance with what we have from 1655 * the 'reg' property to find the right node in MD and then 1656 * read all its properties. 1657 */ 1658 for (i = 0; i < num_devs; i++) { 1659 1660 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1661 DWARN(vswp, "%s: name property not found\n", 1662 __func__); 1663 goto vsw_readmd_exit; 1664 } 1665 1666 /* is this a virtual-network-switch? */ 1667 if (strcmp(name, vsw_propname) != 0) 1668 continue; 1669 1670 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1671 DWARN(vswp, "%s: cfg-handle property not found\n", 1672 __func__); 1673 goto vsw_readmd_exit; 1674 } 1675 1676 /* is this the required instance of vsw? */ 1677 if (inst != cfgh) 1678 continue; 1679 1680 /* now read all properties of this vsw instance */ 1681 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1682 break; 1683 } 1684 1685 vsw_readmd_exit: 1686 1687 kmem_free(listp, listsz); 1688 (void) md_fini_handle(mdp); 1689 return (rv); 1690 } 1691 1692 /* 1693 * Read the initial start-of-day values from the specified MD node. 1694 */ 1695 static int 1696 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1697 { 1698 int i; 1699 uint64_t macaddr = 0; 1700 1701 D1(vswp, "%s: enter", __func__); 1702 1703 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1704 return (1); 1705 } 1706 1707 /* mac address for vswitch device itself */ 1708 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1709 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1710 vswp->instance); 1711 return (1); 1712 } 1713 1714 vsw_save_lmacaddr(vswp, macaddr); 1715 1716 if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) { 1717 DWARN(vswp, "%s: Unable to read %s property from MD, " 1718 "defaulting to 'switched' mode", 1719 __func__, smode_propname); 1720 1721 for (i = 0; i < NUM_SMODES; i++) 1722 vswp->smode[i] = VSW_LAYER2; 1723 1724 vswp->smode_num = NUM_SMODES; 1725 } else { 1726 ASSERT(vswp->smode_num != 0); 1727 } 1728 1729 /* read vlan id properties of this vsw instance */ 1730 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1731 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1732 1733 /* read priority-ether-types */ 1734 vsw_read_pri_eth_types(vswp, mdp, node); 1735 1736 D1(vswp, "%s: exit", __func__); 1737 return (0); 1738 } 1739 1740 /* 1741 * Read vlan id properties of the given MD node. 1742 * Arguments: 1743 * arg: device argument(vsw device or a port) 1744 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1745 * mdp: machine description 1746 * node: md node cookie 1747 * 1748 * Returns: 1749 * pvidp: port-vlan-id of the node 1750 * vidspp: list of vlan-ids of the node 1751 * nvidsp: # of vlan-ids in the list 1752 * default_idp: default-vlan-id of the node(if node is vsw device) 1753 */ 1754 static void 1755 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1756 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, 1757 uint16_t *default_idp) 1758 { 1759 vsw_t *vswp; 1760 vsw_port_t *portp; 1761 char *pvid_propname; 1762 char *vid_propname; 1763 uint_t nvids = 0; 1764 uint32_t vids_size; 1765 int rv; 1766 int i; 1767 uint64_t *data; 1768 uint64_t val; 1769 int size; 1770 int inst; 1771 1772 if (type == VSW_LOCALDEV) { 1773 1774 vswp = (vsw_t *)arg; 1775 pvid_propname = vsw_pvid_propname; 1776 vid_propname = vsw_vid_propname; 1777 inst = vswp->instance; 1778 1779 } else if (type == VSW_VNETPORT) { 1780 1781 portp = (vsw_port_t *)arg; 1782 vswp = portp->p_vswp; 1783 pvid_propname = port_pvid_propname; 1784 vid_propname = port_vid_propname; 1785 inst = portp->p_instance; 1786 1787 } else { 1788 return; 1789 } 1790 1791 if (type == VSW_LOCALDEV && default_idp != NULL) { 1792 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1793 if (rv != 0) { 1794 DWARN(vswp, "%s: prop(%s) not found", __func__, 1795 vsw_dvid_propname); 1796 1797 *default_idp = vsw_default_vlan_id; 1798 } else { 1799 *default_idp = val & 0xFFF; 1800 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1801 vsw_dvid_propname, inst, *default_idp); 1802 } 1803 } 1804 1805 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1806 if (rv != 0) { 1807 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1808 *pvidp = vsw_default_vlan_id; 1809 } else { 1810 1811 *pvidp = val & 0xFFF; 1812 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1813 pvid_propname, inst, *pvidp); 1814 } 1815 1816 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1817 &size); 1818 if (rv != 0) { 1819 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1820 size = 0; 1821 } else { 1822 size /= sizeof (uint64_t); 1823 } 1824 nvids = size; 1825 1826 if (nvids != 0) { 1827 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1828 vids_size = sizeof (uint16_t) * nvids; 1829 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1830 for (i = 0; i < nvids; i++) { 1831 (*vidspp)[i] = data[i] & 0xFFFF; 1832 D2(vswp, " %d ", (*vidspp)[i]); 1833 } 1834 D2(vswp, "\n"); 1835 } 1836 1837 *nvidsp = nvids; 1838 } 1839 1840 /* 1841 * This function reads "priority-ether-types" property from md. This property 1842 * is used to enable support for priority frames. Applications which need 1843 * guaranteed and timely delivery of certain high priority frames to/from 1844 * a vnet or vsw within ldoms, should configure this property by providing 1845 * the ether type(s) for which the priority facility is needed. 1846 * Normal data frames are delivered over a ldc channel using the descriptor 1847 * ring mechanism which is constrained by factors such as descriptor ring size, 1848 * the rate at which the ring is processed at the peer ldc end point, etc. 1849 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1850 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1851 * descriptor ring path and enables a more reliable and timely delivery of 1852 * frames to the peer. 1853 */ 1854 static void 1855 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1856 { 1857 int rv; 1858 uint16_t *types; 1859 uint64_t *data; 1860 int size; 1861 int i; 1862 size_t mblk_sz; 1863 1864 rv = md_get_prop_data(mdp, node, pri_types_propname, 1865 (uint8_t **)&data, &size); 1866 if (rv != 0) { 1867 /* 1868 * Property may not exist if we are running pre-ldoms1.1 f/w. 1869 * Check if 'vsw_pri_eth_type' has been set in that case. 1870 */ 1871 if (vsw_pri_eth_type != 0) { 1872 size = sizeof (vsw_pri_eth_type); 1873 data = &vsw_pri_eth_type; 1874 } else { 1875 D3(vswp, "%s: prop(%s) not found", __func__, 1876 pri_types_propname); 1877 size = 0; 1878 } 1879 } 1880 1881 if (size == 0) { 1882 vswp->pri_num_types = 0; 1883 return; 1884 } 1885 1886 /* 1887 * we have some priority-ether-types defined; 1888 * allocate a table of these types and also 1889 * allocate a pool of mblks to transmit these 1890 * priority packets. 1891 */ 1892 size /= sizeof (uint64_t); 1893 vswp->pri_num_types = size; 1894 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1895 for (i = 0, types = vswp->pri_types; i < size; i++) { 1896 types[i] = data[i] & 0xFFFF; 1897 } 1898 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1899 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp); 1900 } 1901 1902 /* 1903 * Check to see if the relevant properties in the specified node have 1904 * changed, and if so take the appropriate action. 1905 * 1906 * If any of the properties are missing or invalid we don't take 1907 * any action, as this function should only be invoked when modifications 1908 * have been made to what we assume is a working configuration, which 1909 * we leave active. 1910 * 1911 * Note it is legal for this routine to be invoked even if none of the 1912 * properties in the port node within the MD have actually changed. 1913 */ 1914 static void 1915 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1916 { 1917 char physname[LIFNAMSIZ]; 1918 char drv[LIFNAMSIZ]; 1919 uint_t ddi_instance; 1920 uint8_t new_smode[NUM_SMODES]; 1921 int i, smode_num = 0; 1922 uint64_t macaddr = 0; 1923 enum {MD_init = 0x1, 1924 MD_physname = 0x2, 1925 MD_macaddr = 0x4, 1926 MD_smode = 0x8, 1927 MD_vlans = 0x10} updated; 1928 int rv; 1929 uint16_t pvid; 1930 uint16_t *vids; 1931 uint16_t nvids; 1932 1933 updated = MD_init; 1934 1935 D1(vswp, "%s: enter", __func__); 1936 1937 /* 1938 * Check if name of physical device in MD has changed. 1939 */ 1940 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1941 /* 1942 * Do basic sanity check on new device name/instance, 1943 * if its non NULL. It is valid for the device name to 1944 * have changed from a non NULL to a NULL value, i.e. 1945 * the vsw is being changed to 'routed' mode. 1946 */ 1947 if ((strlen(physname) != 0) && 1948 (ddi_parse(physname, drv, 1949 &ddi_instance) != DDI_SUCCESS)) { 1950 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 1951 " a valid device name/instance", 1952 vswp->instance, physname); 1953 goto fail_reconf; 1954 } 1955 1956 if (strcmp(physname, vswp->physname)) { 1957 D2(vswp, "%s: device name changed from %s to %s", 1958 __func__, vswp->physname, physname); 1959 1960 updated |= MD_physname; 1961 } else { 1962 D2(vswp, "%s: device name unchanged at %s", 1963 __func__, vswp->physname); 1964 } 1965 } else { 1966 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 1967 "device from updated MD.", vswp->instance); 1968 goto fail_reconf; 1969 } 1970 1971 /* 1972 * Check if MAC address has changed. 1973 */ 1974 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1975 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1976 vswp->instance); 1977 goto fail_reconf; 1978 } else { 1979 uint64_t maddr = macaddr; 1980 READ_ENTER(&vswp->if_lockrw); 1981 for (i = ETHERADDRL - 1; i >= 0; i--) { 1982 if (vswp->if_addr.ether_addr_octet[i] 1983 != (macaddr & 0xFF)) { 1984 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 1985 __func__, i, 1986 vswp->if_addr.ether_addr_octet[i], 1987 (macaddr & 0xFF)); 1988 updated |= MD_macaddr; 1989 macaddr = maddr; 1990 break; 1991 } 1992 macaddr >>= 8; 1993 } 1994 RW_EXIT(&vswp->if_lockrw); 1995 if (updated & MD_macaddr) { 1996 vsw_save_lmacaddr(vswp, macaddr); 1997 } 1998 } 1999 2000 /* 2001 * Check if switching modes have changed. 2002 */ 2003 if (vsw_get_md_smodes(vswp, mdp, node, 2004 new_smode, &smode_num)) { 2005 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2006 vswp->instance, smode_propname); 2007 goto fail_reconf; 2008 } else { 2009 ASSERT(smode_num != 0); 2010 if (smode_num != vswp->smode_num) { 2011 D2(vswp, "%s: number of modes changed from %d to %d", 2012 __func__, vswp->smode_num, smode_num); 2013 } 2014 2015 for (i = 0; i < smode_num; i++) { 2016 if (new_smode[i] != vswp->smode[i]) { 2017 D2(vswp, "%s: mode changed from %d to %d", 2018 __func__, vswp->smode[i], new_smode[i]); 2019 updated |= MD_smode; 2020 break; 2021 } 2022 } 2023 } 2024 2025 /* Read the vlan ids */ 2026 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2027 &nvids, NULL); 2028 2029 /* Determine if there are any vlan id updates */ 2030 if ((pvid != vswp->pvid) || /* pvid changed? */ 2031 (nvids != vswp->nvids) || /* # of vids changed? */ 2032 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2033 bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) { 2034 updated |= MD_vlans; 2035 } 2036 2037 /* 2038 * Now make any changes which are needed... 2039 */ 2040 2041 if (updated & (MD_physname | MD_smode)) { 2042 2043 /* 2044 * Stop any pending timeout to setup switching mode. 2045 */ 2046 vsw_stop_switching_timeout(vswp); 2047 2048 /* Cleanup HybridIO */ 2049 vsw_hio_cleanup(vswp); 2050 2051 /* 2052 * Remove unicst, mcst addrs of vsw interface 2053 * and ports from the physdev. 2054 */ 2055 vsw_unset_addrs(vswp); 2056 2057 /* 2058 * Stop, detach and close the old device.. 2059 */ 2060 mutex_enter(&vswp->mac_lock); 2061 2062 vsw_mac_detach(vswp); 2063 vsw_mac_close(vswp); 2064 2065 mutex_exit(&vswp->mac_lock); 2066 2067 /* 2068 * Update phys name. 2069 */ 2070 if (updated & MD_physname) { 2071 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2072 vswp->instance, vswp->physname, physname); 2073 (void) strncpy(vswp->physname, 2074 physname, strlen(physname) + 1); 2075 } 2076 2077 /* 2078 * Update array with the new switch mode values. 2079 */ 2080 if (updated & MD_smode) { 2081 for (i = 0; i < smode_num; i++) 2082 vswp->smode[i] = new_smode[i]; 2083 2084 vswp->smode_num = smode_num; 2085 vswp->smode_idx = 0; 2086 } 2087 2088 /* 2089 * ..and attach, start the new device. 2090 */ 2091 rv = vsw_setup_switching(vswp); 2092 if (rv == EAGAIN) { 2093 /* 2094 * Unable to setup switching mode. 2095 * As the error is EAGAIN, schedule a timeout to retry 2096 * and return. Programming addresses of ports and 2097 * vsw interface will be done when the timeout handler 2098 * completes successfully. 2099 */ 2100 mutex_enter(&vswp->swtmout_lock); 2101 2102 vswp->swtmout_enabled = B_TRUE; 2103 vswp->swtmout_id = 2104 timeout(vsw_setup_switching_timeout, vswp, 2105 (vsw_setup_switching_delay * 2106 drv_usectohz(MICROSEC))); 2107 2108 mutex_exit(&vswp->swtmout_lock); 2109 2110 return; 2111 2112 } else if (rv) { 2113 goto fail_update; 2114 } 2115 2116 /* 2117 * program unicst, mcst addrs of vsw interface 2118 * and ports in the physdev. 2119 */ 2120 vsw_set_addrs(vswp); 2121 2122 /* Start HIO for ports that have already connected */ 2123 vsw_hio_start_ports(vswp); 2124 2125 } else if (updated & MD_macaddr) { 2126 /* 2127 * We enter here if only MD_macaddr is exclusively updated. 2128 * If MD_physname and/or MD_smode are also updated, then 2129 * as part of that, we would have implicitly processed 2130 * MD_macaddr update (above). 2131 */ 2132 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2133 vswp->instance, macaddr); 2134 2135 READ_ENTER(&vswp->if_lockrw); 2136 if (vswp->if_state & VSW_IF_UP) { 2137 2138 mutex_enter(&vswp->hw_lock); 2139 /* 2140 * Remove old mac address of vsw interface 2141 * from the physdev 2142 */ 2143 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 2144 /* 2145 * Program new mac address of vsw interface 2146 * in the physdev 2147 */ 2148 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 2149 mutex_exit(&vswp->hw_lock); 2150 if (rv != 0) { 2151 cmn_err(CE_NOTE, 2152 "!vsw%d: failed to program interface " 2153 "unicast address\n", vswp->instance); 2154 } 2155 /* 2156 * Notify the MAC layer of the changed address. 2157 */ 2158 mac_unicst_update(vswp->if_mh, 2159 (uint8_t *)&vswp->if_addr); 2160 2161 } 2162 RW_EXIT(&vswp->if_lockrw); 2163 2164 } 2165 2166 if (updated & MD_vlans) { 2167 /* Remove existing vlan ids from the hash table. */ 2168 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2169 2170 /* save the new vlan ids */ 2171 vswp->pvid = pvid; 2172 if (vswp->nvids != 0) { 2173 kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids); 2174 vswp->nvids = 0; 2175 } 2176 if (nvids != 0) { 2177 vswp->nvids = nvids; 2178 vswp->vids = vids; 2179 } 2180 2181 /* add these new vlan ids into hash table */ 2182 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2183 } else { 2184 if (nvids != 0) { 2185 kmem_free(vids, sizeof (uint16_t) * nvids); 2186 } 2187 } 2188 2189 return; 2190 2191 fail_reconf: 2192 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2193 return; 2194 2195 fail_update: 2196 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2197 vswp->instance); 2198 } 2199 2200 /* 2201 * Read the port's md properties. 2202 */ 2203 static int 2204 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2205 md_t *mdp, mde_cookie_t *node) 2206 { 2207 uint64_t ldc_id; 2208 uint8_t *addrp; 2209 int i, addrsz; 2210 int num_nodes = 0, nchan = 0; 2211 int listsz = 0; 2212 mde_cookie_t *listp = NULL; 2213 struct ether_addr ea; 2214 uint64_t macaddr; 2215 uint64_t inst = 0; 2216 uint64_t val; 2217 2218 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2219 DWARN(vswp, "%s: prop(%s) not found", __func__, 2220 id_propname); 2221 return (1); 2222 } 2223 2224 /* 2225 * Find the channel endpoint node(s) (which should be under this 2226 * port node) which contain the channel id(s). 2227 */ 2228 if ((num_nodes = md_node_count(mdp)) <= 0) { 2229 DERR(vswp, "%s: invalid number of nodes found (%d)", 2230 __func__, num_nodes); 2231 return (1); 2232 } 2233 2234 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2235 2236 /* allocate enough space for node list */ 2237 listsz = num_nodes * sizeof (mde_cookie_t); 2238 listp = kmem_zalloc(listsz, KM_SLEEP); 2239 2240 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2241 md_find_name(mdp, "fwd"), listp); 2242 2243 if (nchan <= 0) { 2244 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2245 kmem_free(listp, listsz); 2246 return (1); 2247 } 2248 2249 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2250 2251 /* use property from first node found */ 2252 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2253 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2254 id_propname); 2255 kmem_free(listp, listsz); 2256 return (1); 2257 } 2258 2259 /* don't need list any more */ 2260 kmem_free(listp, listsz); 2261 2262 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2263 2264 /* read mac-address property */ 2265 if (md_get_prop_data(mdp, *node, remaddr_propname, 2266 &addrp, &addrsz)) { 2267 DWARN(vswp, "%s: prop(%s) not found", 2268 __func__, remaddr_propname); 2269 return (1); 2270 } 2271 2272 if (addrsz < ETHERADDRL) { 2273 DWARN(vswp, "%s: invalid address size", __func__); 2274 return (1); 2275 } 2276 2277 macaddr = *((uint64_t *)addrp); 2278 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2279 2280 for (i = ETHERADDRL - 1; i >= 0; i--) { 2281 ea.ether_addr_octet[i] = macaddr & 0xFF; 2282 macaddr >>= 8; 2283 } 2284 2285 /* now update all properties into the port */ 2286 portp->p_vswp = vswp; 2287 portp->p_instance = inst; 2288 portp->addr_set = VSW_ADDR_UNSET; 2289 ether_copy(&ea, &portp->p_macaddr); 2290 if (nchan > VSW_PORT_MAX_LDCS) { 2291 D2(vswp, "%s: using first of %d ldc ids", 2292 __func__, nchan); 2293 nchan = VSW_PORT_MAX_LDCS; 2294 } 2295 portp->num_ldcs = nchan; 2296 portp->ldc_ids = 2297 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2298 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2299 2300 /* read vlan id properties of this port node */ 2301 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2302 &portp->vids, &portp->nvids, NULL); 2303 2304 /* Check if hybrid property is present */ 2305 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2306 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2307 portp->p_hio_enabled = B_TRUE; 2308 } else { 2309 portp->p_hio_enabled = B_FALSE; 2310 } 2311 /* 2312 * Port hio capability determined after version 2313 * negotiation, i.e., when we know the peer is HybridIO capable. 2314 */ 2315 portp->p_hio_capable = B_FALSE; 2316 return (0); 2317 } 2318 2319 /* 2320 * Add a new port to the system. 2321 * 2322 * Returns 0 on success, 1 on failure. 2323 */ 2324 int 2325 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2326 { 2327 vsw_port_t *portp; 2328 int rv; 2329 2330 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2331 2332 rv = vsw_port_read_props(portp, vswp, mdp, node); 2333 if (rv != 0) { 2334 kmem_free(portp, sizeof (*portp)); 2335 return (1); 2336 } 2337 2338 rv = vsw_port_attach(portp); 2339 if (rv != 0) { 2340 DERR(vswp, "%s: failed to attach port", __func__); 2341 return (1); 2342 } 2343 2344 return (0); 2345 } 2346 2347 static int 2348 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2349 md_t *prev_mdp, mde_cookie_t prev_mdex) 2350 { 2351 uint64_t cport_num; 2352 uint64_t pport_num; 2353 vsw_port_list_t *plistp; 2354 vsw_port_t *portp; 2355 boolean_t updated_vlans = B_FALSE; 2356 uint16_t pvid; 2357 uint16_t *vids; 2358 uint16_t nvids; 2359 uint64_t val; 2360 boolean_t hio_enabled = B_FALSE; 2361 2362 /* 2363 * For now, we get port updates only if vlan ids changed. 2364 * We read the port num and do some sanity check. 2365 */ 2366 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2367 return (1); 2368 } 2369 2370 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2371 return (1); 2372 } 2373 if (cport_num != pport_num) 2374 return (1); 2375 2376 plistp = &(vswp->plist); 2377 2378 READ_ENTER(&plistp->lockrw); 2379 2380 portp = vsw_lookup_port(vswp, cport_num); 2381 if (portp == NULL) { 2382 RW_EXIT(&plistp->lockrw); 2383 return (1); 2384 } 2385 2386 /* Read the vlan ids */ 2387 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2388 &vids, &nvids, NULL); 2389 2390 /* Determine if there are any vlan id updates */ 2391 if ((pvid != portp->pvid) || /* pvid changed? */ 2392 (nvids != portp->nvids) || /* # of vids changed? */ 2393 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2394 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) { 2395 updated_vlans = B_TRUE; 2396 } 2397 2398 if (updated_vlans == B_TRUE) { 2399 2400 /* Remove existing vlan ids from the hash table. */ 2401 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2402 2403 /* save the new vlan ids */ 2404 portp->pvid = pvid; 2405 if (portp->nvids != 0) { 2406 kmem_free(portp->vids, 2407 sizeof (uint16_t) * portp->nvids); 2408 portp->nvids = 0; 2409 } 2410 if (nvids != 0) { 2411 portp->vids = kmem_zalloc(sizeof (uint16_t) * 2412 nvids, KM_SLEEP); 2413 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids); 2414 portp->nvids = nvids; 2415 kmem_free(vids, sizeof (uint16_t) * nvids); 2416 } 2417 2418 /* add these new vlan ids into hash table */ 2419 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2420 2421 /* reset the port if it is vlan unaware (ver < 1.3) */ 2422 vsw_vlan_unaware_port_reset(portp); 2423 } 2424 2425 /* Check if hybrid property is present */ 2426 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2427 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2428 hio_enabled = B_TRUE; 2429 } 2430 2431 if (portp->p_hio_enabled != hio_enabled) { 2432 vsw_hio_port_update(portp, hio_enabled); 2433 } 2434 2435 RW_EXIT(&plistp->lockrw); 2436 2437 return (0); 2438 } 2439 2440 /* 2441 * vsw_mac_rx -- A common function to send packets to the interface. 2442 * By default this function check if the interface is UP or not, the 2443 * rest of the behaviour depends on the flags as below: 2444 * 2445 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2446 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2447 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2448 */ 2449 void 2450 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2451 mblk_t *mp, vsw_macrx_flags_t flags) 2452 { 2453 mblk_t *mpt; 2454 2455 D1(vswp, "%s:enter\n", __func__); 2456 READ_ENTER(&vswp->if_lockrw); 2457 /* Check if the interface is up */ 2458 if (!(vswp->if_state & VSW_IF_UP)) { 2459 RW_EXIT(&vswp->if_lockrw); 2460 /* Free messages only if FREEMSG flag specified */ 2461 if (flags & VSW_MACRX_FREEMSG) { 2462 freemsgchain(mp); 2463 } 2464 D1(vswp, "%s:exit\n", __func__); 2465 return; 2466 } 2467 /* 2468 * If PROMISC flag is passed, then check if 2469 * the interface is in the PROMISC mode. 2470 * If not, drop the messages. 2471 */ 2472 if (flags & VSW_MACRX_PROMISC) { 2473 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2474 RW_EXIT(&vswp->if_lockrw); 2475 /* Free messages only if FREEMSG flag specified */ 2476 if (flags & VSW_MACRX_FREEMSG) { 2477 freemsgchain(mp); 2478 } 2479 D1(vswp, "%s:exit\n", __func__); 2480 return; 2481 } 2482 } 2483 RW_EXIT(&vswp->if_lockrw); 2484 /* 2485 * If COPYMSG flag is passed, then make a copy 2486 * of the message chain and send up the copy. 2487 */ 2488 if (flags & VSW_MACRX_COPYMSG) { 2489 mp = copymsgchain(mp); 2490 if (mp == NULL) { 2491 D1(vswp, "%s:exit\n", __func__); 2492 return; 2493 } 2494 } 2495 2496 D2(vswp, "%s: sending up stack", __func__); 2497 2498 mpt = NULL; 2499 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2500 if (mp != NULL) { 2501 mac_rx(vswp->if_mh, mrh, mp); 2502 } 2503 D1(vswp, "%s:exit\n", __func__); 2504 } 2505 2506 /* copy mac address of vsw into soft state structure */ 2507 static void 2508 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2509 { 2510 int i; 2511 2512 WRITE_ENTER(&vswp->if_lockrw); 2513 for (i = ETHERADDRL - 1; i >= 0; i--) { 2514 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2515 macaddr >>= 8; 2516 } 2517 RW_EXIT(&vswp->if_lockrw); 2518 } 2519