1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* 78 * Function prototypes. 79 */ 80 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 81 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 82 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 84 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 85 86 /* MDEG routines */ 87 static int vsw_mdeg_register(vsw_t *vswp); 88 static void vsw_mdeg_unregister(vsw_t *vswp); 89 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 91 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 92 static int vsw_read_mdprops(vsw_t *vswp); 93 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 94 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, 95 uint16_t *nvidsp, uint16_t *default_idp); 96 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 97 md_t *mdp, mde_cookie_t *node); 98 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 99 mde_cookie_t node); 100 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 102 103 /* Mac driver related routines */ 104 static int vsw_mac_register(vsw_t *); 105 static int vsw_mac_unregister(vsw_t *); 106 static int vsw_m_stat(void *, uint_t, uint64_t *); 107 static void vsw_m_stop(void *arg); 108 static int vsw_m_start(void *arg); 109 static int vsw_m_unicst(void *arg, const uint8_t *); 110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 111 static int vsw_m_promisc(void *arg, boolean_t); 112 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 114 mblk_t *mp, vsw_macrx_flags_t flags); 115 116 /* 117 * Functions imported from other files. 118 */ 119 extern void vsw_setup_switching_timeout(void *arg); 120 extern void vsw_stop_switching_timeout(vsw_t *vswp); 121 extern int vsw_setup_switching(vsw_t *); 122 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 123 vsw_port_t *port, mac_resource_handle_t mrh); 124 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 125 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 126 extern void vsw_del_mcst_vsw(vsw_t *); 127 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 128 extern int vsw_detach_ports(vsw_t *vswp); 129 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 130 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 131 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 132 md_t *prev_mdp, mde_cookie_t prev_mdex); 133 extern int vsw_port_attach(vsw_port_t *port); 134 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 135 extern int vsw_mac_attach(vsw_t *vswp); 136 extern void vsw_mac_detach(vsw_t *vswp); 137 extern int vsw_mac_open(vsw_t *vswp); 138 extern void vsw_mac_close(vsw_t *vswp); 139 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 140 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 141 extern void vsw_reconfig_hw(vsw_t *); 142 extern void vsw_unset_addrs(vsw_t *vswp); 143 extern void vsw_set_addrs(vsw_t *vswp); 144 extern void vsw_create_vlans(void *arg, int type); 145 extern void vsw_destroy_vlans(void *arg, int type); 146 extern void vsw_vlan_add_ids(void *arg, int type); 147 extern void vsw_vlan_remove_ids(void *arg, int type); 148 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 149 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 150 mblk_t **npt); 151 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 152 extern void vsw_hio_cleanup(vsw_t *vswp); 153 extern void vsw_hio_start_ports(vsw_t *vswp); 154 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 155 156 /* 157 * Internal tunables. 158 */ 159 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 160 int vsw_wretries = 100; /* # of write attempts */ 161 int vsw_desc_delay = 0; /* delay in us */ 162 int vsw_read_attempts = 5; /* # of reads of descriptor */ 163 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 164 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ 165 /* 300*3 = 900sec(15min) of max tmout */ 166 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 167 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 168 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 169 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 170 171 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 172 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 173 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 174 175 /* sw timeout for boot delay only, in milliseconds */ 176 int vsw_setup_switching_boot_delay = 100 * MILLISEC; 177 178 /* delay in usec to wait for all references on a fdb entry to be dropped */ 179 uint32_t vsw_fdbe_refcnt_delay = 10; 180 181 /* 182 * Default vlan id. This is only used internally when the "default-vlan-id" 183 * property is not present in the MD device node. Therefore, this should not be 184 * used as a tunable; if this value is changed, the corresponding variable 185 * should be updated to the same value in all vnets connected to this vsw. 186 */ 187 uint16_t vsw_default_vlan_id = 1; 188 189 /* 190 * Workaround for a version handshake bug in obp's vnet. 191 * If vsw initiates version negotiation starting from the highest version, 192 * obp sends a nack and terminates version handshake. To workaround 193 * this, we do not initiate version handshake when the channel comes up. 194 * Instead, we wait for the peer to send its version info msg and go through 195 * the version protocol exchange. If we successfully negotiate a version, 196 * before sending the ack, we send our version info msg to the peer 197 * using the <major,minor> version that we are about to ack. 198 */ 199 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 200 201 /* 202 * In the absence of "priority-ether-types" property in MD, the following 203 * internal tunable can be set to specify a single priority ethertype. 204 */ 205 uint64_t vsw_pri_eth_type = 0; 206 207 /* 208 * Number of transmit priority buffers that are preallocated per device. 209 * This number is chosen to be a small value to throttle transmission 210 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 211 */ 212 uint32_t vsw_pri_tx_nmblks = 64; 213 214 boolean_t vsw_hio_enabled = B_TRUE; /* Enable/disable HybridIO */ 215 int vsw_hio_max_cleanup_retries = 10; /* Max retries for HybridIO cleanp */ 216 int vsw_hio_cleanup_delay = 10000; /* 10ms */ 217 218 /* 219 * External tunables. 220 */ 221 /* 222 * Enable/disable thread per ring. This is a mode selection 223 * that is done a vsw driver attach time. 224 */ 225 boolean_t vsw_multi_ring_enable = B_FALSE; 226 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 227 228 /* Number of transmit descriptors - must be power of 2 */ 229 uint32_t vsw_ntxds = VSW_RING_NUM_EL; 230 231 /* 232 * Max number of mblks received in one receive operation. 233 */ 234 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 235 236 /* 237 * Tunables for three different pools, that is, the size and 238 * number of mblks for each pool. 239 */ 240 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128; /* size=128 for pool1 */ 241 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256; /* size=256 for pool2 */ 242 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048; /* size=2048 for pool3 */ 243 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 244 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 245 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 246 247 /* 248 * vsw_max_tx_qcount is the maximum # of packets that can be queued 249 * before the tx worker thread begins processing the queue. Its value 250 * is chosen to be 4x the default length of tx descriptor ring. 251 */ 252 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL; 253 254 /* 255 * MAC callbacks 256 */ 257 static mac_callbacks_t vsw_m_callbacks = { 258 0, 259 vsw_m_stat, 260 vsw_m_start, 261 vsw_m_stop, 262 vsw_m_promisc, 263 vsw_m_multicst, 264 vsw_m_unicst, 265 vsw_m_tx, 266 NULL, 267 NULL, 268 NULL 269 }; 270 271 static struct cb_ops vsw_cb_ops = { 272 nulldev, /* cb_open */ 273 nulldev, /* cb_close */ 274 nodev, /* cb_strategy */ 275 nodev, /* cb_print */ 276 nodev, /* cb_dump */ 277 nodev, /* cb_read */ 278 nodev, /* cb_write */ 279 nodev, /* cb_ioctl */ 280 nodev, /* cb_devmap */ 281 nodev, /* cb_mmap */ 282 nodev, /* cb_segmap */ 283 nochpoll, /* cb_chpoll */ 284 ddi_prop_op, /* cb_prop_op */ 285 NULL, /* cb_stream */ 286 D_MP, /* cb_flag */ 287 CB_REV, /* rev */ 288 nodev, /* int (*cb_aread)() */ 289 nodev /* int (*cb_awrite)() */ 290 }; 291 292 static struct dev_ops vsw_ops = { 293 DEVO_REV, /* devo_rev */ 294 0, /* devo_refcnt */ 295 vsw_getinfo, /* devo_getinfo */ 296 nulldev, /* devo_identify */ 297 nulldev, /* devo_probe */ 298 vsw_attach, /* devo_attach */ 299 vsw_detach, /* devo_detach */ 300 nodev, /* devo_reset */ 301 &vsw_cb_ops, /* devo_cb_ops */ 302 (struct bus_ops *)NULL, /* devo_bus_ops */ 303 ddi_power /* devo_power */ 304 }; 305 306 extern struct mod_ops mod_driverops; 307 static struct modldrv vswmodldrv = { 308 &mod_driverops, 309 "sun4v Virtual Switch", 310 &vsw_ops, 311 }; 312 313 #define LDC_ENTER_LOCK(ldcp) \ 314 mutex_enter(&((ldcp)->ldc_cblock));\ 315 mutex_enter(&((ldcp)->ldc_rxlock));\ 316 mutex_enter(&((ldcp)->ldc_txlock)); 317 #define LDC_EXIT_LOCK(ldcp) \ 318 mutex_exit(&((ldcp)->ldc_txlock));\ 319 mutex_exit(&((ldcp)->ldc_rxlock));\ 320 mutex_exit(&((ldcp)->ldc_cblock)); 321 322 /* Driver soft state ptr */ 323 static void *vsw_state; 324 325 /* 326 * Linked list of "vsw_t" structures - one per instance. 327 */ 328 vsw_t *vsw_head = NULL; 329 krwlock_t vsw_rw; 330 331 /* 332 * Property names 333 */ 334 static char vdev_propname[] = "virtual-device"; 335 static char vsw_propname[] = "virtual-network-switch"; 336 static char physdev_propname[] = "vsw-phys-dev"; 337 static char smode_propname[] = "vsw-switch-mode"; 338 static char macaddr_propname[] = "local-mac-address"; 339 static char remaddr_propname[] = "remote-mac-address"; 340 static char ldcids_propname[] = "ldc-ids"; 341 static char chan_propname[] = "channel-endpoint"; 342 static char id_propname[] = "id"; 343 static char reg_propname[] = "reg"; 344 static char pri_types_propname[] = "priority-ether-types"; 345 static char vsw_pvid_propname[] = "port-vlan-id"; 346 static char vsw_vid_propname[] = "vlan-id"; 347 static char vsw_dvid_propname[] = "default-vlan-id"; 348 static char port_pvid_propname[] = "remote-port-vlan-id"; 349 static char port_vid_propname[] = "remote-vlan-id"; 350 static char hybrid_propname[] = "hybrid"; 351 352 /* 353 * Matching criteria passed to the MDEG to register interest 354 * in changes to 'virtual-device-port' nodes identified by their 355 * 'id' property. 356 */ 357 static md_prop_match_t vport_prop_match[] = { 358 { MDET_PROP_VAL, "id" }, 359 { MDET_LIST_END, NULL } 360 }; 361 362 static mdeg_node_match_t vport_match = { "virtual-device-port", 363 vport_prop_match }; 364 365 /* 366 * Matching criteria passed to the MDEG to register interest 367 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 368 * by their 'name' and 'cfg-handle' properties. 369 */ 370 static md_prop_match_t vdev_prop_match[] = { 371 { MDET_PROP_STR, "name" }, 372 { MDET_PROP_VAL, "cfg-handle" }, 373 { MDET_LIST_END, NULL } 374 }; 375 376 static mdeg_node_match_t vdev_match = { "virtual-device", 377 vdev_prop_match }; 378 379 380 /* 381 * Specification of an MD node passed to the MDEG to filter any 382 * 'vport' nodes that do not belong to the specified node. This 383 * template is copied for each vsw instance and filled in with 384 * the appropriate 'cfg-handle' value before being passed to the MDEG. 385 */ 386 static mdeg_prop_spec_t vsw_prop_template[] = { 387 { MDET_PROP_STR, "name", vsw_propname }, 388 { MDET_PROP_VAL, "cfg-handle", NULL }, 389 { MDET_LIST_END, NULL, NULL } 390 }; 391 392 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 393 394 #ifdef DEBUG 395 /* 396 * Print debug messages - set to 0x1f to enable all msgs 397 * or 0x0 to turn all off. 398 */ 399 int vswdbg = 0x0; 400 401 /* 402 * debug levels: 403 * 0x01: Function entry/exit tracing 404 * 0x02: Internal function messages 405 * 0x04: Verbose internal messages 406 * 0x08: Warning messages 407 * 0x10: Error messages 408 */ 409 410 void 411 vswdebug(vsw_t *vswp, const char *fmt, ...) 412 { 413 char buf[512]; 414 va_list ap; 415 416 va_start(ap, fmt); 417 (void) vsprintf(buf, fmt, ap); 418 va_end(ap); 419 420 if (vswp == NULL) 421 cmn_err(CE_CONT, "%s\n", buf); 422 else 423 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 424 } 425 426 #endif /* DEBUG */ 427 428 static struct modlinkage modlinkage = { 429 MODREV_1, 430 &vswmodldrv, 431 NULL 432 }; 433 434 int 435 _init(void) 436 { 437 int status; 438 439 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 440 441 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 442 if (status != 0) { 443 return (status); 444 } 445 446 mac_init_ops(&vsw_ops, DRV_NAME); 447 status = mod_install(&modlinkage); 448 if (status != 0) { 449 ddi_soft_state_fini(&vsw_state); 450 } 451 return (status); 452 } 453 454 int 455 _fini(void) 456 { 457 int status; 458 459 status = mod_remove(&modlinkage); 460 if (status != 0) 461 return (status); 462 mac_fini_ops(&vsw_ops); 463 ddi_soft_state_fini(&vsw_state); 464 465 rw_destroy(&vsw_rw); 466 467 return (status); 468 } 469 470 int 471 _info(struct modinfo *modinfop) 472 { 473 return (mod_info(&modlinkage, modinfop)); 474 } 475 476 static int 477 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 478 { 479 vsw_t *vswp; 480 int instance; 481 char hashname[MAXNAMELEN]; 482 char qname[TASKQ_NAMELEN]; 483 enum { PROG_init = 0x00, 484 PROG_locks = 0x01, 485 PROG_readmd = 0x02, 486 PROG_fdb = 0x04, 487 PROG_mfdb = 0x08, 488 PROG_taskq = 0x10, 489 PROG_swmode = 0x20, 490 PROG_macreg = 0x40, 491 PROG_mdreg = 0x80} 492 progress; 493 494 progress = PROG_init; 495 int rv; 496 497 switch (cmd) { 498 case DDI_ATTACH: 499 break; 500 case DDI_RESUME: 501 /* nothing to do for this non-device */ 502 return (DDI_SUCCESS); 503 case DDI_PM_RESUME: 504 default: 505 return (DDI_FAILURE); 506 } 507 508 instance = ddi_get_instance(dip); 509 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 510 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 511 return (DDI_FAILURE); 512 } 513 vswp = ddi_get_soft_state(vsw_state, instance); 514 515 if (vswp == NULL) { 516 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 517 goto vsw_attach_fail; 518 } 519 520 vswp->dip = dip; 521 vswp->instance = instance; 522 ddi_set_driver_private(dip, (caddr_t)vswp); 523 524 mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL); 525 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 526 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 527 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 528 rw_init(&vswp->mac_rwlock, NULL, RW_DRIVER, NULL); 529 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 530 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 531 532 progress |= PROG_locks; 533 534 rv = vsw_read_mdprops(vswp); 535 if (rv != 0) 536 goto vsw_attach_fail; 537 538 progress |= PROG_readmd; 539 540 /* setup the unicast forwarding database */ 541 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 542 vswp->instance); 543 D2(vswp, "creating unicast hash table (%s)...", hashname); 544 vswp->fdb_nchains = vsw_fdb_nchains; 545 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 546 mod_hash_null_valdtor, sizeof (void *)); 547 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 548 progress |= PROG_fdb; 549 550 /* setup the multicast fowarding database */ 551 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 552 vswp->instance); 553 D2(vswp, "creating multicast hash table %s)...", hashname); 554 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 555 mod_hash_null_valdtor, sizeof (void *)); 556 557 progress |= PROG_mfdb; 558 559 /* 560 * Create the taskq which will process all the VIO 561 * control messages. 562 */ 563 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 564 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 565 TASKQ_DEFAULTPRI, 0)) == NULL) { 566 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 567 vswp->instance); 568 goto vsw_attach_fail; 569 } 570 571 progress |= PROG_taskq; 572 573 /* prevent auto-detaching */ 574 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 575 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 576 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 577 "instance %u", DDI_NO_AUTODETACH, instance); 578 } 579 580 /* 581 * The null switching function is set to avoid panic until 582 * switch mode is setup. 583 */ 584 vswp->vsw_switch_frame = vsw_switch_frame_nop; 585 586 /* 587 * Setup the required switching mode, 588 * based on the mdprops that we read earlier. 589 * schedule a short timeout (0.1 sec) for the first time 590 * setup and avoid calling mac_open() directly here, 591 * others are regular timeout 3 secs. 592 */ 593 mutex_enter(&vswp->swtmout_lock); 594 595 vswp->swtmout_enabled = B_TRUE; 596 vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp, 597 drv_usectohz(vsw_setup_switching_boot_delay)); 598 599 mutex_exit(&vswp->swtmout_lock); 600 601 progress |= PROG_swmode; 602 603 /* Register with mac layer as a provider */ 604 rv = vsw_mac_register(vswp); 605 if (rv != 0) 606 goto vsw_attach_fail; 607 608 progress |= PROG_macreg; 609 610 /* 611 * Now we have everything setup, register an interest in 612 * specific MD nodes. 613 * 614 * The callback is invoked in 2 cases, firstly if upon mdeg 615 * registration there are existing nodes which match our specified 616 * criteria, and secondly if the MD is changed (and again, there 617 * are nodes which we are interested in present within it. Note 618 * that our callback will be invoked even if our specified nodes 619 * have not actually changed). 620 * 621 */ 622 rv = vsw_mdeg_register(vswp); 623 if (rv != 0) 624 goto vsw_attach_fail; 625 626 progress |= PROG_mdreg; 627 628 WRITE_ENTER(&vsw_rw); 629 vswp->next = vsw_head; 630 vsw_head = vswp; 631 RW_EXIT(&vsw_rw); 632 633 ddi_report_dev(vswp->dip); 634 return (DDI_SUCCESS); 635 636 vsw_attach_fail: 637 DERR(NULL, "vsw_attach: failed"); 638 639 if (progress & PROG_mdreg) { 640 vsw_mdeg_unregister(vswp); 641 (void) vsw_detach_ports(vswp); 642 } 643 644 if (progress & PROG_macreg) 645 (void) vsw_mac_unregister(vswp); 646 647 if (progress & PROG_swmode) { 648 vsw_stop_switching_timeout(vswp); 649 vsw_hio_cleanup(vswp); 650 WRITE_ENTER(&vswp->mac_rwlock); 651 vsw_mac_detach(vswp); 652 vsw_mac_close(vswp); 653 RW_EXIT(&vswp->mac_rwlock); 654 } 655 656 if (progress & PROG_taskq) 657 ddi_taskq_destroy(vswp->taskq_p); 658 659 if (progress & PROG_mfdb) 660 mod_hash_destroy_hash(vswp->mfdb); 661 662 if (progress & PROG_fdb) { 663 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 664 mod_hash_destroy_hash(vswp->fdb_hashp); 665 } 666 667 if (progress & PROG_readmd) { 668 if (VSW_PRI_ETH_DEFINED(vswp)) { 669 kmem_free(vswp->pri_types, 670 sizeof (uint16_t) * vswp->pri_num_types); 671 } 672 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 673 } 674 675 if (progress & PROG_locks) { 676 rw_destroy(&vswp->plist.lockrw); 677 rw_destroy(&vswp->mfdbrw); 678 rw_destroy(&vswp->mac_rwlock); 679 rw_destroy(&vswp->if_lockrw); 680 mutex_destroy(&vswp->swtmout_lock); 681 mutex_destroy(&vswp->mca_lock); 682 mutex_destroy(&vswp->hw_lock); 683 } 684 685 ddi_soft_state_free(vsw_state, instance); 686 return (DDI_FAILURE); 687 } 688 689 static int 690 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 691 { 692 vio_mblk_pool_t *poolp, *npoolp; 693 vsw_t **vswpp, *vswp; 694 int instance; 695 696 instance = ddi_get_instance(dip); 697 vswp = ddi_get_soft_state(vsw_state, instance); 698 699 if (vswp == NULL) { 700 return (DDI_FAILURE); 701 } 702 703 switch (cmd) { 704 case DDI_DETACH: 705 break; 706 case DDI_SUSPEND: 707 case DDI_PM_SUSPEND: 708 default: 709 return (DDI_FAILURE); 710 } 711 712 D2(vswp, "detaching instance %d", instance); 713 714 /* Stop any pending timeout to setup switching mode. */ 715 vsw_stop_switching_timeout(vswp); 716 717 if (vswp->if_state & VSW_IF_REG) { 718 if (vsw_mac_unregister(vswp) != 0) { 719 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 720 "MAC layer", vswp->instance); 721 return (DDI_FAILURE); 722 } 723 } 724 725 vsw_mdeg_unregister(vswp); 726 727 /* remove mac layer callback */ 728 WRITE_ENTER(&vswp->mac_rwlock); 729 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 730 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 731 vswp->mrh = NULL; 732 } 733 RW_EXIT(&vswp->mac_rwlock); 734 735 if (vsw_detach_ports(vswp) != 0) { 736 cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports", 737 vswp->instance); 738 return (DDI_FAILURE); 739 } 740 741 rw_destroy(&vswp->if_lockrw); 742 743 /* cleanup HybridIO */ 744 vsw_hio_cleanup(vswp); 745 746 mutex_destroy(&vswp->hw_lock); 747 748 /* 749 * Now that the ports have been deleted, stop and close 750 * the physical device. 751 */ 752 WRITE_ENTER(&vswp->mac_rwlock); 753 754 vsw_mac_detach(vswp); 755 vsw_mac_close(vswp); 756 757 RW_EXIT(&vswp->mac_rwlock); 758 759 rw_destroy(&vswp->mac_rwlock); 760 mutex_destroy(&vswp->swtmout_lock); 761 762 /* 763 * Destroy any free pools that may still exist. 764 */ 765 poolp = vswp->rxh; 766 while (poolp != NULL) { 767 npoolp = vswp->rxh = poolp->nextp; 768 if (vio_destroy_mblks(poolp) != 0) { 769 vswp->rxh = poolp; 770 return (DDI_FAILURE); 771 } 772 poolp = npoolp; 773 } 774 775 /* 776 * Remove this instance from any entries it may be on in 777 * the hash table by using the list of addresses maintained 778 * in the vsw_t structure. 779 */ 780 vsw_del_mcst_vsw(vswp); 781 782 vswp->mcap = NULL; 783 mutex_destroy(&vswp->mca_lock); 784 785 /* 786 * By now any pending tasks have finished and the underlying 787 * ldc's have been destroyed, so its safe to delete the control 788 * message taskq. 789 */ 790 if (vswp->taskq_p != NULL) 791 ddi_taskq_destroy(vswp->taskq_p); 792 793 /* 794 * At this stage all the data pointers in the hash table 795 * should be NULL, as all the ports have been removed and will 796 * have deleted themselves from the port lists which the data 797 * pointers point to. Hence we can destroy the table using the 798 * default destructors. 799 */ 800 D2(vswp, "vsw_detach: destroying hash tables.."); 801 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 802 mod_hash_destroy_hash(vswp->fdb_hashp); 803 vswp->fdb_hashp = NULL; 804 805 WRITE_ENTER(&vswp->mfdbrw); 806 mod_hash_destroy_hash(vswp->mfdb); 807 vswp->mfdb = NULL; 808 RW_EXIT(&vswp->mfdbrw); 809 rw_destroy(&vswp->mfdbrw); 810 811 /* free pri_types table */ 812 if (VSW_PRI_ETH_DEFINED(vswp)) { 813 kmem_free(vswp->pri_types, 814 sizeof (uint16_t) * vswp->pri_num_types); 815 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 816 } 817 818 ddi_remove_minor_node(dip, NULL); 819 820 rw_destroy(&vswp->plist.lockrw); 821 WRITE_ENTER(&vsw_rw); 822 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 823 if (*vswpp == vswp) { 824 *vswpp = vswp->next; 825 break; 826 } 827 } 828 RW_EXIT(&vsw_rw); 829 ddi_soft_state_free(vsw_state, instance); 830 831 return (DDI_SUCCESS); 832 } 833 834 static int 835 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 836 { 837 _NOTE(ARGUNUSED(dip)) 838 839 vsw_t *vswp = NULL; 840 dev_t dev = (dev_t)arg; 841 int instance; 842 843 instance = getminor(dev); 844 845 switch (infocmd) { 846 case DDI_INFO_DEVT2DEVINFO: 847 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 848 *result = NULL; 849 return (DDI_FAILURE); 850 } 851 *result = vswp->dip; 852 return (DDI_SUCCESS); 853 854 case DDI_INFO_DEVT2INSTANCE: 855 *result = (void *)(uintptr_t)instance; 856 return (DDI_SUCCESS); 857 858 default: 859 *result = NULL; 860 return (DDI_FAILURE); 861 } 862 } 863 864 /* 865 * Get the value of the "vsw-phys-dev" property in the specified 866 * node. This property is the name of the physical device that 867 * the virtual switch will use to talk to the outside world. 868 * 869 * Note it is valid for this property to be NULL (but the property 870 * itself must exist). Callers of this routine should verify that 871 * the value returned is what they expected (i.e. either NULL or non NULL). 872 * 873 * On success returns value of the property in region pointed to by 874 * the 'name' argument, and with return value of 0. Otherwise returns 1. 875 */ 876 static int 877 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 878 { 879 int len = 0; 880 int instance; 881 char *physname = NULL; 882 char *dev; 883 const char *dev_name; 884 char myname[MAXNAMELEN]; 885 886 dev_name = ddi_driver_name(vswp->dip); 887 instance = ddi_get_instance(vswp->dip); 888 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 889 890 if (md_get_prop_data(mdp, node, physdev_propname, 891 (uint8_t **)(&physname), &len) != 0) { 892 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 893 "device(s) from MD", vswp->instance); 894 return (1); 895 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 896 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 897 vswp->instance, physname); 898 return (1); 899 } else if (strcmp(myname, physname) == 0) { 900 /* 901 * Prevent the vswitch from opening itself as the 902 * network device. 903 */ 904 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 905 vswp->instance, physname); 906 return (1); 907 } else { 908 (void) strncpy(name, physname, strlen(physname) + 1); 909 D2(vswp, "%s: using first device specified (%s)", 910 __func__, physname); 911 } 912 913 #ifdef DEBUG 914 /* 915 * As a temporary measure to aid testing we check to see if there 916 * is a vsw.conf file present. If there is we use the value of the 917 * vsw_physname property in the file as the name of the physical 918 * device, overriding the value from the MD. 919 * 920 * There may be multiple devices listed, but for the moment 921 * we just use the first one. 922 */ 923 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 924 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 925 if ((strlen(dev) + 1) > LIFNAMSIZ) { 926 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 927 vswp->instance, dev); 928 ddi_prop_free(dev); 929 return (1); 930 } else { 931 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 932 "config file", vswp->instance, dev); 933 934 (void) strncpy(name, dev, strlen(dev) + 1); 935 } 936 937 ddi_prop_free(dev); 938 } 939 #endif 940 941 return (0); 942 } 943 944 /* 945 * Read the 'vsw-switch-mode' property from the specified MD node. 946 * 947 * Returns 0 on success and the number of modes found in 'found', 948 * otherwise returns 1. 949 */ 950 static int 951 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 952 uint8_t *modes, int *found) 953 { 954 int len = 0; 955 int smode_num = 0; 956 char *smode = NULL; 957 char *curr_mode = NULL; 958 959 D1(vswp, "%s: enter", __func__); 960 961 /* 962 * Get the switch-mode property. The modes are listed in 963 * decreasing order of preference, i.e. prefered mode is 964 * first item in list. 965 */ 966 len = 0; 967 smode_num = 0; 968 if (md_get_prop_data(mdp, node, smode_propname, 969 (uint8_t **)(&smode), &len) != 0) { 970 /* 971 * Unable to get switch-mode property from MD, nothing 972 * more we can do. 973 */ 974 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 975 " from the MD", vswp->instance); 976 *found = 0; 977 return (1); 978 } 979 980 curr_mode = smode; 981 /* 982 * Modes of operation: 983 * 'switched' - layer 2 switching, underlying HW in 984 * programmed mode. 985 * 'promiscuous' - layer 2 switching, underlying HW in 986 * promiscuous mode. 987 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 988 * in non-promiscuous mode. 989 */ 990 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 991 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 992 if (strcmp(curr_mode, "switched") == 0) { 993 modes[smode_num++] = VSW_LAYER2; 994 } else if (strcmp(curr_mode, "promiscuous") == 0) { 995 modes[smode_num++] = VSW_LAYER2_PROMISC; 996 } else if (strcmp(curr_mode, "routed") == 0) { 997 modes[smode_num++] = VSW_LAYER3; 998 } else { 999 DWARN(vswp, "%s: Unknown switch mode %s, " 1000 "setting to default 'switched' mode", 1001 __func__, curr_mode); 1002 modes[smode_num++] = VSW_LAYER2; 1003 } 1004 curr_mode += strlen(curr_mode) + 1; 1005 } 1006 *found = smode_num; 1007 1008 D2(vswp, "%s: %d modes found", __func__, smode_num); 1009 1010 D1(vswp, "%s: exit", __func__); 1011 1012 return (0); 1013 } 1014 1015 /* 1016 * Register with the MAC layer as a network device, so we 1017 * can be plumbed if necessary. 1018 */ 1019 static int 1020 vsw_mac_register(vsw_t *vswp) 1021 { 1022 mac_register_t *macp; 1023 int rv; 1024 1025 D1(vswp, "%s: enter", __func__); 1026 1027 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1028 return (EINVAL); 1029 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1030 macp->m_driver = vswp; 1031 macp->m_dip = vswp->dip; 1032 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1033 macp->m_callbacks = &vsw_m_callbacks; 1034 macp->m_min_sdu = 0; 1035 macp->m_max_sdu = vsw_ethermtu; 1036 macp->m_margin = VLAN_TAGSZ; 1037 rv = mac_register(macp, &vswp->if_mh); 1038 mac_free(macp); 1039 if (rv != 0) { 1040 /* 1041 * Treat this as a non-fatal error as we may be 1042 * able to operate in some other mode. 1043 */ 1044 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1045 "a provider with MAC layer", vswp->instance); 1046 return (rv); 1047 } 1048 1049 vswp->if_state |= VSW_IF_REG; 1050 1051 vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header) 1052 + VLAN_TAGSZ; 1053 1054 D1(vswp, "%s: exit", __func__); 1055 1056 return (rv); 1057 } 1058 1059 static int 1060 vsw_mac_unregister(vsw_t *vswp) 1061 { 1062 int rv = 0; 1063 1064 D1(vswp, "%s: enter", __func__); 1065 1066 WRITE_ENTER(&vswp->if_lockrw); 1067 1068 if (vswp->if_state & VSW_IF_REG) { 1069 rv = mac_unregister(vswp->if_mh); 1070 if (rv != 0) { 1071 DWARN(vswp, "%s: unable to unregister from MAC " 1072 "framework", __func__); 1073 1074 RW_EXIT(&vswp->if_lockrw); 1075 D1(vswp, "%s: fail exit", __func__); 1076 return (rv); 1077 } 1078 1079 /* mark i/f as down and unregistered */ 1080 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1081 } 1082 RW_EXIT(&vswp->if_lockrw); 1083 1084 D1(vswp, "%s: exit", __func__); 1085 1086 return (rv); 1087 } 1088 1089 static int 1090 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1091 { 1092 vsw_t *vswp = (vsw_t *)arg; 1093 1094 D1(vswp, "%s: enter", __func__); 1095 1096 WRITE_ENTER(&vswp->mac_rwlock); 1097 if (vswp->mh == NULL) { 1098 RW_EXIT(&vswp->mac_rwlock); 1099 return (EINVAL); 1100 } 1101 1102 /* return stats from underlying device */ 1103 *val = mac_stat_get(vswp->mh, stat); 1104 1105 RW_EXIT(&vswp->mac_rwlock); 1106 1107 return (0); 1108 } 1109 1110 static void 1111 vsw_m_stop(void *arg) 1112 { 1113 vsw_t *vswp = (vsw_t *)arg; 1114 1115 D1(vswp, "%s: enter", __func__); 1116 1117 WRITE_ENTER(&vswp->if_lockrw); 1118 vswp->if_state &= ~VSW_IF_UP; 1119 RW_EXIT(&vswp->if_lockrw); 1120 1121 mutex_enter(&vswp->hw_lock); 1122 1123 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1124 1125 if (vswp->recfg_reqd) 1126 vsw_reconfig_hw(vswp); 1127 1128 mutex_exit(&vswp->hw_lock); 1129 1130 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1131 } 1132 1133 static int 1134 vsw_m_start(void *arg) 1135 { 1136 vsw_t *vswp = (vsw_t *)arg; 1137 1138 D1(vswp, "%s: enter", __func__); 1139 1140 WRITE_ENTER(&vswp->if_lockrw); 1141 1142 vswp->if_state |= VSW_IF_UP; 1143 1144 if (vswp->switching_setup_done == B_FALSE) { 1145 /* 1146 * If the switching mode has not been setup yet, just 1147 * return. The unicast address will be programmed 1148 * after the physical device is successfully setup by the 1149 * timeout handler. 1150 */ 1151 RW_EXIT(&vswp->if_lockrw); 1152 return (0); 1153 } 1154 1155 /* if in layer2 mode, program unicast address. */ 1156 if (vswp->mh != NULL) { 1157 mutex_enter(&vswp->hw_lock); 1158 (void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1159 mutex_exit(&vswp->hw_lock); 1160 } 1161 1162 RW_EXIT(&vswp->if_lockrw); 1163 1164 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1165 return (0); 1166 } 1167 1168 /* 1169 * Change the local interface address. 1170 * 1171 * Note: we don't support this entry point. The local 1172 * mac address of the switch can only be changed via its 1173 * MD node properties. 1174 */ 1175 static int 1176 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1177 { 1178 _NOTE(ARGUNUSED(arg, macaddr)) 1179 1180 return (DDI_FAILURE); 1181 } 1182 1183 static int 1184 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1185 { 1186 vsw_t *vswp = (vsw_t *)arg; 1187 mcst_addr_t *mcst_p = NULL; 1188 uint64_t addr = 0x0; 1189 int i, ret = 0; 1190 1191 D1(vswp, "%s: enter", __func__); 1192 1193 /* 1194 * Convert address into form that can be used 1195 * as hash table key. 1196 */ 1197 for (i = 0; i < ETHERADDRL; i++) { 1198 addr = (addr << 8) | mca[i]; 1199 } 1200 1201 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1202 1203 if (add) { 1204 D2(vswp, "%s: adding multicast", __func__); 1205 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1206 /* 1207 * Update the list of multicast addresses 1208 * contained within the vsw_t structure to 1209 * include this new one. 1210 */ 1211 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1212 if (mcst_p == NULL) { 1213 DERR(vswp, "%s unable to alloc mem", __func__); 1214 (void) vsw_del_mcst(vswp, 1215 VSW_LOCALDEV, addr, NULL); 1216 return (1); 1217 } 1218 mcst_p->addr = addr; 1219 ether_copy(mca, &mcst_p->mca); 1220 1221 /* 1222 * Call into the underlying driver to program the 1223 * address into HW. 1224 */ 1225 WRITE_ENTER(&vswp->mac_rwlock); 1226 if (vswp->mh != NULL) { 1227 ret = mac_multicst_add(vswp->mh, mca); 1228 if (ret != 0) { 1229 cmn_err(CE_NOTE, "!vsw%d: unable to " 1230 "add multicast address", 1231 vswp->instance); 1232 RW_EXIT(&vswp->mac_rwlock); 1233 (void) vsw_del_mcst(vswp, 1234 VSW_LOCALDEV, addr, NULL); 1235 kmem_free(mcst_p, sizeof (*mcst_p)); 1236 return (ret); 1237 } 1238 mcst_p->mac_added = B_TRUE; 1239 } 1240 RW_EXIT(&vswp->mac_rwlock); 1241 1242 mutex_enter(&vswp->mca_lock); 1243 mcst_p->nextp = vswp->mcap; 1244 vswp->mcap = mcst_p; 1245 mutex_exit(&vswp->mca_lock); 1246 } else { 1247 cmn_err(CE_NOTE, "!vsw%d: unable to add multicast " 1248 "address", vswp->instance); 1249 } 1250 return (ret); 1251 } 1252 1253 D2(vswp, "%s: removing multicast", __func__); 1254 /* 1255 * Remove the address from the hash table.. 1256 */ 1257 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1258 1259 /* 1260 * ..and then from the list maintained in the 1261 * vsw_t structure. 1262 */ 1263 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1264 ASSERT(mcst_p != NULL); 1265 1266 WRITE_ENTER(&vswp->mac_rwlock); 1267 if (vswp->mh != NULL && mcst_p->mac_added) { 1268 (void) mac_multicst_remove(vswp->mh, mca); 1269 mcst_p->mac_added = B_FALSE; 1270 } 1271 RW_EXIT(&vswp->mac_rwlock); 1272 kmem_free(mcst_p, sizeof (*mcst_p)); 1273 } 1274 1275 D1(vswp, "%s: exit", __func__); 1276 1277 return (0); 1278 } 1279 1280 static int 1281 vsw_m_promisc(void *arg, boolean_t on) 1282 { 1283 vsw_t *vswp = (vsw_t *)arg; 1284 1285 D1(vswp, "%s: enter", __func__); 1286 1287 WRITE_ENTER(&vswp->if_lockrw); 1288 if (on) 1289 vswp->if_state |= VSW_IF_PROMISC; 1290 else 1291 vswp->if_state &= ~VSW_IF_PROMISC; 1292 RW_EXIT(&vswp->if_lockrw); 1293 1294 D1(vswp, "%s: exit", __func__); 1295 1296 return (0); 1297 } 1298 1299 static mblk_t * 1300 vsw_m_tx(void *arg, mblk_t *mp) 1301 { 1302 vsw_t *vswp = (vsw_t *)arg; 1303 1304 D1(vswp, "%s: enter", __func__); 1305 1306 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1307 1308 if (mp == NULL) { 1309 return (NULL); 1310 } 1311 1312 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1313 1314 D1(vswp, "%s: exit", __func__); 1315 1316 return (NULL); 1317 } 1318 1319 /* 1320 * Register for machine description (MD) updates. 1321 * 1322 * Returns 0 on success, 1 on failure. 1323 */ 1324 static int 1325 vsw_mdeg_register(vsw_t *vswp) 1326 { 1327 mdeg_prop_spec_t *pspecp; 1328 mdeg_node_spec_t *inst_specp; 1329 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1330 size_t templatesz; 1331 int rv; 1332 1333 D1(vswp, "%s: enter", __func__); 1334 1335 /* 1336 * Allocate and initialize a per-instance copy 1337 * of the global property spec array that will 1338 * uniquely identify this vsw instance. 1339 */ 1340 templatesz = sizeof (vsw_prop_template); 1341 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1342 1343 bcopy(vsw_prop_template, pspecp, templatesz); 1344 1345 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1346 1347 /* initialize the complete prop spec structure */ 1348 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1349 inst_specp->namep = "virtual-device"; 1350 inst_specp->specp = pspecp; 1351 1352 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1353 vswp->regprop); 1354 /* 1355 * Register an interest in 'virtual-device' nodes with a 1356 * 'name' property of 'virtual-network-switch' 1357 */ 1358 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1359 (void *)vswp, &mdeg_hdl); 1360 if (rv != MDEG_SUCCESS) { 1361 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1362 __func__, rv); 1363 goto mdeg_reg_fail; 1364 } 1365 1366 /* 1367 * Register an interest in 'vsw-port' nodes. 1368 */ 1369 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1370 (void *)vswp, &mdeg_port_hdl); 1371 if (rv != MDEG_SUCCESS) { 1372 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1373 (void) mdeg_unregister(mdeg_hdl); 1374 goto mdeg_reg_fail; 1375 } 1376 1377 /* save off data that will be needed later */ 1378 vswp->inst_spec = inst_specp; 1379 vswp->mdeg_hdl = mdeg_hdl; 1380 vswp->mdeg_port_hdl = mdeg_port_hdl; 1381 1382 D1(vswp, "%s: exit", __func__); 1383 return (0); 1384 1385 mdeg_reg_fail: 1386 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1387 vswp->instance); 1388 kmem_free(pspecp, templatesz); 1389 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1390 1391 vswp->mdeg_hdl = NULL; 1392 vswp->mdeg_port_hdl = NULL; 1393 1394 return (1); 1395 } 1396 1397 static void 1398 vsw_mdeg_unregister(vsw_t *vswp) 1399 { 1400 D1(vswp, "vsw_mdeg_unregister: enter"); 1401 1402 if (vswp->mdeg_hdl != NULL) 1403 (void) mdeg_unregister(vswp->mdeg_hdl); 1404 1405 if (vswp->mdeg_port_hdl != NULL) 1406 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1407 1408 if (vswp->inst_spec != NULL) { 1409 if (vswp->inst_spec->specp != NULL) { 1410 (void) kmem_free(vswp->inst_spec->specp, 1411 sizeof (vsw_prop_template)); 1412 vswp->inst_spec->specp = NULL; 1413 } 1414 1415 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1416 vswp->inst_spec = NULL; 1417 } 1418 1419 D1(vswp, "vsw_mdeg_unregister: exit"); 1420 } 1421 1422 /* 1423 * Mdeg callback invoked for the vsw node itself. 1424 */ 1425 static int 1426 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1427 { 1428 vsw_t *vswp; 1429 md_t *mdp; 1430 mde_cookie_t node; 1431 uint64_t inst; 1432 char *node_name = NULL; 1433 1434 if (resp == NULL) 1435 return (MDEG_FAILURE); 1436 1437 vswp = (vsw_t *)cb_argp; 1438 1439 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1440 " : prev matched %d", __func__, resp->added.nelem, 1441 resp->removed.nelem, resp->match_curr.nelem, 1442 resp->match_prev.nelem); 1443 1444 /* 1445 * We get an initial callback for this node as 'added' 1446 * after registering with mdeg. Note that we would have 1447 * already gathered information about this vsw node by 1448 * walking MD earlier during attach (in vsw_read_mdprops()). 1449 * So, there is a window where the properties of this 1450 * node might have changed when we get this initial 'added' 1451 * callback. We handle this as if an update occured 1452 * and invoke the same function which handles updates to 1453 * the properties of this vsw-node if any. 1454 * 1455 * A non-zero 'match' value indicates that the MD has been 1456 * updated and that a virtual-network-switch node is 1457 * present which may or may not have been updated. It is 1458 * up to the clients to examine their own nodes and 1459 * determine if they have changed. 1460 */ 1461 if (resp->added.nelem != 0) { 1462 1463 if (resp->added.nelem != 1) { 1464 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1465 "invalid: %d\n", vswp->instance, resp->added.nelem); 1466 return (MDEG_FAILURE); 1467 } 1468 1469 mdp = resp->added.mdp; 1470 node = resp->added.mdep[0]; 1471 1472 } else if (resp->match_curr.nelem != 0) { 1473 1474 if (resp->match_curr.nelem != 1) { 1475 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1476 "invalid: %d\n", vswp->instance, 1477 resp->match_curr.nelem); 1478 return (MDEG_FAILURE); 1479 } 1480 1481 mdp = resp->match_curr.mdp; 1482 node = resp->match_curr.mdep[0]; 1483 1484 } else { 1485 return (MDEG_FAILURE); 1486 } 1487 1488 /* Validate name and instance */ 1489 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1490 DERR(vswp, "%s: unable to get node name\n", __func__); 1491 return (MDEG_FAILURE); 1492 } 1493 1494 /* is this a virtual-network-switch? */ 1495 if (strcmp(node_name, vsw_propname) != 0) { 1496 DERR(vswp, "%s: Invalid node name: %s\n", 1497 __func__, node_name); 1498 return (MDEG_FAILURE); 1499 } 1500 1501 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1502 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1503 __func__); 1504 return (MDEG_FAILURE); 1505 } 1506 1507 /* is this the right instance of vsw? */ 1508 if (inst != vswp->regprop) { 1509 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1510 __func__, inst); 1511 return (MDEG_FAILURE); 1512 } 1513 1514 vsw_update_md_prop(vswp, mdp, node); 1515 1516 return (MDEG_SUCCESS); 1517 } 1518 1519 /* 1520 * Mdeg callback invoked for changes to the vsw-port nodes 1521 * under the vsw node. 1522 */ 1523 static int 1524 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1525 { 1526 vsw_t *vswp; 1527 int idx; 1528 md_t *mdp; 1529 mde_cookie_t node; 1530 uint64_t inst; 1531 int rv; 1532 1533 if ((resp == NULL) || (cb_argp == NULL)) 1534 return (MDEG_FAILURE); 1535 1536 vswp = (vsw_t *)cb_argp; 1537 1538 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1539 " : prev matched %d", __func__, resp->added.nelem, 1540 resp->removed.nelem, resp->match_curr.nelem, 1541 resp->match_prev.nelem); 1542 1543 /* process added ports */ 1544 for (idx = 0; idx < resp->added.nelem; idx++) { 1545 mdp = resp->added.mdp; 1546 node = resp->added.mdep[idx]; 1547 1548 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1549 1550 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1551 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1552 "(0x%lx), err=%d", vswp->instance, node, rv); 1553 } 1554 } 1555 1556 /* process removed ports */ 1557 for (idx = 0; idx < resp->removed.nelem; idx++) { 1558 mdp = resp->removed.mdp; 1559 node = resp->removed.mdep[idx]; 1560 1561 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1562 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1563 __func__, id_propname, idx); 1564 continue; 1565 } 1566 1567 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1568 1569 if (vsw_port_detach(vswp, inst) != 0) { 1570 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1571 vswp->instance, inst); 1572 } 1573 } 1574 1575 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1576 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1577 resp->match_curr.mdep[idx], 1578 resp->match_prev.mdp, 1579 resp->match_prev.mdep[idx]); 1580 } 1581 1582 D1(vswp, "%s: exit", __func__); 1583 1584 return (MDEG_SUCCESS); 1585 } 1586 1587 /* 1588 * Scan the machine description for this instance of vsw 1589 * and read its properties. Called only from vsw_attach(). 1590 * Returns: 0 on success, 1 on failure. 1591 */ 1592 static int 1593 vsw_read_mdprops(vsw_t *vswp) 1594 { 1595 md_t *mdp = NULL; 1596 mde_cookie_t rootnode; 1597 mde_cookie_t *listp = NULL; 1598 uint64_t inst; 1599 uint64_t cfgh; 1600 char *name; 1601 int rv = 1; 1602 int num_nodes = 0; 1603 int num_devs = 0; 1604 int listsz = 0; 1605 int i; 1606 1607 /* 1608 * In each 'virtual-device' node in the MD there is a 1609 * 'cfg-handle' property which is the MD's concept of 1610 * an instance number (this may be completely different from 1611 * the device drivers instance #). OBP reads that value and 1612 * stores it in the 'reg' property of the appropriate node in 1613 * the device tree. We first read this reg property and use this 1614 * to compare against the 'cfg-handle' property of vsw nodes 1615 * in MD to get to this specific vsw instance and then read 1616 * other properties that we are interested in. 1617 * We also cache the value of 'reg' property and use it later 1618 * to register callbacks with mdeg (see vsw_mdeg_register()) 1619 */ 1620 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1621 DDI_PROP_DONTPASS, reg_propname, -1); 1622 if (inst == -1) { 1623 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1624 "OBP device tree", vswp->instance, reg_propname); 1625 return (rv); 1626 } 1627 1628 vswp->regprop = inst; 1629 1630 if ((mdp = md_get_handle()) == NULL) { 1631 DWARN(vswp, "%s: cannot init MD\n", __func__); 1632 return (rv); 1633 } 1634 1635 num_nodes = md_node_count(mdp); 1636 ASSERT(num_nodes > 0); 1637 1638 listsz = num_nodes * sizeof (mde_cookie_t); 1639 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1640 1641 rootnode = md_root_node(mdp); 1642 1643 /* search for all "virtual_device" nodes */ 1644 num_devs = md_scan_dag(mdp, rootnode, 1645 md_find_name(mdp, vdev_propname), 1646 md_find_name(mdp, "fwd"), listp); 1647 if (num_devs <= 0) { 1648 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1649 goto vsw_readmd_exit; 1650 } 1651 1652 /* 1653 * Now loop through the list of virtual-devices looking for 1654 * devices with name "virtual-network-switch" and for each 1655 * such device compare its instance with what we have from 1656 * the 'reg' property to find the right node in MD and then 1657 * read all its properties. 1658 */ 1659 for (i = 0; i < num_devs; i++) { 1660 1661 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1662 DWARN(vswp, "%s: name property not found\n", 1663 __func__); 1664 goto vsw_readmd_exit; 1665 } 1666 1667 /* is this a virtual-network-switch? */ 1668 if (strcmp(name, vsw_propname) != 0) 1669 continue; 1670 1671 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1672 DWARN(vswp, "%s: cfg-handle property not found\n", 1673 __func__); 1674 goto vsw_readmd_exit; 1675 } 1676 1677 /* is this the required instance of vsw? */ 1678 if (inst != cfgh) 1679 continue; 1680 1681 /* now read all properties of this vsw instance */ 1682 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1683 break; 1684 } 1685 1686 vsw_readmd_exit: 1687 1688 kmem_free(listp, listsz); 1689 (void) md_fini_handle(mdp); 1690 return (rv); 1691 } 1692 1693 /* 1694 * Read the initial start-of-day values from the specified MD node. 1695 */ 1696 static int 1697 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1698 { 1699 int i; 1700 uint64_t macaddr = 0; 1701 1702 D1(vswp, "%s: enter", __func__); 1703 1704 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1705 return (1); 1706 } 1707 1708 /* mac address for vswitch device itself */ 1709 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1710 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1711 vswp->instance); 1712 return (1); 1713 } 1714 1715 vsw_save_lmacaddr(vswp, macaddr); 1716 1717 if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) { 1718 DWARN(vswp, "%s: Unable to read %s property from MD, " 1719 "defaulting to 'switched' mode", 1720 __func__, smode_propname); 1721 1722 for (i = 0; i < NUM_SMODES; i++) 1723 vswp->smode[i] = VSW_LAYER2; 1724 1725 vswp->smode_num = NUM_SMODES; 1726 } else { 1727 ASSERT(vswp->smode_num != 0); 1728 } 1729 1730 /* read vlan id properties of this vsw instance */ 1731 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1732 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1733 1734 /* read priority-ether-types */ 1735 vsw_read_pri_eth_types(vswp, mdp, node); 1736 1737 D1(vswp, "%s: exit", __func__); 1738 return (0); 1739 } 1740 1741 /* 1742 * Read vlan id properties of the given MD node. 1743 * Arguments: 1744 * arg: device argument(vsw device or a port) 1745 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1746 * mdp: machine description 1747 * node: md node cookie 1748 * 1749 * Returns: 1750 * pvidp: port-vlan-id of the node 1751 * vidspp: list of vlan-ids of the node 1752 * nvidsp: # of vlan-ids in the list 1753 * default_idp: default-vlan-id of the node(if node is vsw device) 1754 */ 1755 static void 1756 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1757 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, 1758 uint16_t *default_idp) 1759 { 1760 vsw_t *vswp; 1761 vsw_port_t *portp; 1762 char *pvid_propname; 1763 char *vid_propname; 1764 uint_t nvids = 0; 1765 uint32_t vids_size; 1766 int rv; 1767 int i; 1768 uint64_t *data; 1769 uint64_t val; 1770 int size; 1771 int inst; 1772 1773 if (type == VSW_LOCALDEV) { 1774 1775 vswp = (vsw_t *)arg; 1776 pvid_propname = vsw_pvid_propname; 1777 vid_propname = vsw_vid_propname; 1778 inst = vswp->instance; 1779 1780 } else if (type == VSW_VNETPORT) { 1781 1782 portp = (vsw_port_t *)arg; 1783 vswp = portp->p_vswp; 1784 pvid_propname = port_pvid_propname; 1785 vid_propname = port_vid_propname; 1786 inst = portp->p_instance; 1787 1788 } else { 1789 return; 1790 } 1791 1792 if (type == VSW_LOCALDEV && default_idp != NULL) { 1793 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1794 if (rv != 0) { 1795 DWARN(vswp, "%s: prop(%s) not found", __func__, 1796 vsw_dvid_propname); 1797 1798 *default_idp = vsw_default_vlan_id; 1799 } else { 1800 *default_idp = val & 0xFFF; 1801 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1802 vsw_dvid_propname, inst, *default_idp); 1803 } 1804 } 1805 1806 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1807 if (rv != 0) { 1808 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1809 *pvidp = vsw_default_vlan_id; 1810 } else { 1811 1812 *pvidp = val & 0xFFF; 1813 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1814 pvid_propname, inst, *pvidp); 1815 } 1816 1817 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1818 &size); 1819 if (rv != 0) { 1820 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1821 size = 0; 1822 } else { 1823 size /= sizeof (uint64_t); 1824 } 1825 nvids = size; 1826 1827 if (nvids != 0) { 1828 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1829 vids_size = sizeof (uint16_t) * nvids; 1830 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1831 for (i = 0; i < nvids; i++) { 1832 (*vidspp)[i] = data[i] & 0xFFFF; 1833 D2(vswp, " %d ", (*vidspp)[i]); 1834 } 1835 D2(vswp, "\n"); 1836 } 1837 1838 *nvidsp = nvids; 1839 } 1840 1841 /* 1842 * This function reads "priority-ether-types" property from md. This property 1843 * is used to enable support for priority frames. Applications which need 1844 * guaranteed and timely delivery of certain high priority frames to/from 1845 * a vnet or vsw within ldoms, should configure this property by providing 1846 * the ether type(s) for which the priority facility is needed. 1847 * Normal data frames are delivered over a ldc channel using the descriptor 1848 * ring mechanism which is constrained by factors such as descriptor ring size, 1849 * the rate at which the ring is processed at the peer ldc end point, etc. 1850 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1851 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1852 * descriptor ring path and enables a more reliable and timely delivery of 1853 * frames to the peer. 1854 */ 1855 static void 1856 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1857 { 1858 int rv; 1859 uint16_t *types; 1860 uint64_t *data; 1861 int size; 1862 int i; 1863 size_t mblk_sz; 1864 1865 rv = md_get_prop_data(mdp, node, pri_types_propname, 1866 (uint8_t **)&data, &size); 1867 if (rv != 0) { 1868 /* 1869 * Property may not exist if we are running pre-ldoms1.1 f/w. 1870 * Check if 'vsw_pri_eth_type' has been set in that case. 1871 */ 1872 if (vsw_pri_eth_type != 0) { 1873 size = sizeof (vsw_pri_eth_type); 1874 data = &vsw_pri_eth_type; 1875 } else { 1876 D3(vswp, "%s: prop(%s) not found", __func__, 1877 pri_types_propname); 1878 size = 0; 1879 } 1880 } 1881 1882 if (size == 0) { 1883 vswp->pri_num_types = 0; 1884 return; 1885 } 1886 1887 /* 1888 * we have some priority-ether-types defined; 1889 * allocate a table of these types and also 1890 * allocate a pool of mblks to transmit these 1891 * priority packets. 1892 */ 1893 size /= sizeof (uint64_t); 1894 vswp->pri_num_types = size; 1895 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1896 for (i = 0, types = vswp->pri_types; i < size; i++) { 1897 types[i] = data[i] & 0xFFFF; 1898 } 1899 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1900 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp); 1901 } 1902 1903 /* 1904 * Check to see if the relevant properties in the specified node have 1905 * changed, and if so take the appropriate action. 1906 * 1907 * If any of the properties are missing or invalid we don't take 1908 * any action, as this function should only be invoked when modifications 1909 * have been made to what we assume is a working configuration, which 1910 * we leave active. 1911 * 1912 * Note it is legal for this routine to be invoked even if none of the 1913 * properties in the port node within the MD have actually changed. 1914 */ 1915 static void 1916 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1917 { 1918 char physname[LIFNAMSIZ]; 1919 char drv[LIFNAMSIZ]; 1920 uint_t ddi_instance; 1921 uint8_t new_smode[NUM_SMODES]; 1922 int i, smode_num = 0; 1923 uint64_t macaddr = 0; 1924 enum {MD_init = 0x1, 1925 MD_physname = 0x2, 1926 MD_macaddr = 0x4, 1927 MD_smode = 0x8, 1928 MD_vlans = 0x10} updated; 1929 int rv; 1930 uint16_t pvid; 1931 uint16_t *vids; 1932 uint16_t nvids; 1933 1934 updated = MD_init; 1935 1936 D1(vswp, "%s: enter", __func__); 1937 1938 /* 1939 * Check if name of physical device in MD has changed. 1940 */ 1941 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1942 /* 1943 * Do basic sanity check on new device name/instance, 1944 * if its non NULL. It is valid for the device name to 1945 * have changed from a non NULL to a NULL value, i.e. 1946 * the vsw is being changed to 'routed' mode. 1947 */ 1948 if ((strlen(physname) != 0) && 1949 (ddi_parse(physname, drv, 1950 &ddi_instance) != DDI_SUCCESS)) { 1951 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 1952 " a valid device name/instance", 1953 vswp->instance, physname); 1954 goto fail_reconf; 1955 } 1956 1957 if (strcmp(physname, vswp->physname)) { 1958 D2(vswp, "%s: device name changed from %s to %s", 1959 __func__, vswp->physname, physname); 1960 1961 updated |= MD_physname; 1962 } else { 1963 D2(vswp, "%s: device name unchanged at %s", 1964 __func__, vswp->physname); 1965 } 1966 } else { 1967 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 1968 "device from updated MD.", vswp->instance); 1969 goto fail_reconf; 1970 } 1971 1972 /* 1973 * Check if MAC address has changed. 1974 */ 1975 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1976 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1977 vswp->instance); 1978 goto fail_reconf; 1979 } else { 1980 uint64_t maddr = macaddr; 1981 READ_ENTER(&vswp->if_lockrw); 1982 for (i = ETHERADDRL - 1; i >= 0; i--) { 1983 if (vswp->if_addr.ether_addr_octet[i] 1984 != (macaddr & 0xFF)) { 1985 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 1986 __func__, i, 1987 vswp->if_addr.ether_addr_octet[i], 1988 (macaddr & 0xFF)); 1989 updated |= MD_macaddr; 1990 macaddr = maddr; 1991 break; 1992 } 1993 macaddr >>= 8; 1994 } 1995 RW_EXIT(&vswp->if_lockrw); 1996 if (updated & MD_macaddr) { 1997 vsw_save_lmacaddr(vswp, macaddr); 1998 } 1999 } 2000 2001 /* 2002 * Check if switching modes have changed. 2003 */ 2004 if (vsw_get_md_smodes(vswp, mdp, node, 2005 new_smode, &smode_num)) { 2006 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2007 vswp->instance, smode_propname); 2008 goto fail_reconf; 2009 } else { 2010 ASSERT(smode_num != 0); 2011 if (smode_num != vswp->smode_num) { 2012 D2(vswp, "%s: number of modes changed from %d to %d", 2013 __func__, vswp->smode_num, smode_num); 2014 } 2015 2016 for (i = 0; i < smode_num; i++) { 2017 if (new_smode[i] != vswp->smode[i]) { 2018 D2(vswp, "%s: mode changed from %d to %d", 2019 __func__, vswp->smode[i], new_smode[i]); 2020 updated |= MD_smode; 2021 break; 2022 } 2023 } 2024 } 2025 2026 /* Read the vlan ids */ 2027 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2028 &nvids, NULL); 2029 2030 /* Determine if there are any vlan id updates */ 2031 if ((pvid != vswp->pvid) || /* pvid changed? */ 2032 (nvids != vswp->nvids) || /* # of vids changed? */ 2033 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2034 bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) { 2035 updated |= MD_vlans; 2036 } 2037 2038 /* 2039 * Now make any changes which are needed... 2040 */ 2041 2042 if (updated & (MD_physname | MD_smode)) { 2043 2044 /* 2045 * Stop any pending timeout to setup switching mode. 2046 */ 2047 vsw_stop_switching_timeout(vswp); 2048 2049 /* Cleanup HybridIO */ 2050 vsw_hio_cleanup(vswp); 2051 2052 /* 2053 * Remove unicst, mcst addrs of vsw interface 2054 * and ports from the physdev. 2055 */ 2056 vsw_unset_addrs(vswp); 2057 2058 /* 2059 * Stop, detach and close the old device.. 2060 */ 2061 WRITE_ENTER(&vswp->mac_rwlock); 2062 2063 vsw_mac_detach(vswp); 2064 vsw_mac_close(vswp); 2065 2066 RW_EXIT(&vswp->mac_rwlock); 2067 2068 /* 2069 * Update phys name. 2070 */ 2071 if (updated & MD_physname) { 2072 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2073 vswp->instance, vswp->physname, physname); 2074 (void) strncpy(vswp->physname, 2075 physname, strlen(physname) + 1); 2076 } 2077 2078 /* 2079 * Update array with the new switch mode values. 2080 */ 2081 if (updated & MD_smode) { 2082 for (i = 0; i < smode_num; i++) 2083 vswp->smode[i] = new_smode[i]; 2084 2085 vswp->smode_num = smode_num; 2086 vswp->smode_idx = 0; 2087 } 2088 2089 /* 2090 * ..and attach, start the new device. 2091 */ 2092 rv = vsw_setup_switching(vswp); 2093 if (rv == EAGAIN) { 2094 /* 2095 * Unable to setup switching mode. 2096 * As the error is EAGAIN, schedule a timeout to retry 2097 * and return. Programming addresses of ports and 2098 * vsw interface will be done when the timeout handler 2099 * completes successfully. 2100 */ 2101 mutex_enter(&vswp->swtmout_lock); 2102 2103 vswp->swtmout_enabled = B_TRUE; 2104 vswp->swtmout_id = 2105 timeout(vsw_setup_switching_timeout, vswp, 2106 (vsw_setup_switching_delay * 2107 drv_usectohz(MICROSEC))); 2108 2109 mutex_exit(&vswp->swtmout_lock); 2110 2111 return; 2112 2113 } else if (rv) { 2114 goto fail_update; 2115 } 2116 2117 /* 2118 * program unicst, mcst addrs of vsw interface 2119 * and ports in the physdev. 2120 */ 2121 vsw_set_addrs(vswp); 2122 2123 /* Start HIO for ports that have already connected */ 2124 vsw_hio_start_ports(vswp); 2125 2126 } else if (updated & MD_macaddr) { 2127 /* 2128 * We enter here if only MD_macaddr is exclusively updated. 2129 * If MD_physname and/or MD_smode are also updated, then 2130 * as part of that, we would have implicitly processed 2131 * MD_macaddr update (above). 2132 */ 2133 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2134 vswp->instance, macaddr); 2135 2136 READ_ENTER(&vswp->if_lockrw); 2137 if (vswp->if_state & VSW_IF_UP) { 2138 2139 mutex_enter(&vswp->hw_lock); 2140 /* 2141 * Remove old mac address of vsw interface 2142 * from the physdev 2143 */ 2144 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 2145 /* 2146 * Program new mac address of vsw interface 2147 * in the physdev 2148 */ 2149 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 2150 mutex_exit(&vswp->hw_lock); 2151 if (rv != 0) { 2152 cmn_err(CE_NOTE, 2153 "!vsw%d: failed to program interface " 2154 "unicast address\n", vswp->instance); 2155 } 2156 /* 2157 * Notify the MAC layer of the changed address. 2158 */ 2159 mac_unicst_update(vswp->if_mh, 2160 (uint8_t *)&vswp->if_addr); 2161 2162 } 2163 RW_EXIT(&vswp->if_lockrw); 2164 2165 } 2166 2167 if (updated & MD_vlans) { 2168 /* Remove existing vlan ids from the hash table. */ 2169 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2170 2171 /* save the new vlan ids */ 2172 vswp->pvid = pvid; 2173 if (vswp->nvids != 0) { 2174 kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids); 2175 vswp->nvids = 0; 2176 } 2177 if (nvids != 0) { 2178 vswp->nvids = nvids; 2179 vswp->vids = vids; 2180 } 2181 2182 /* add these new vlan ids into hash table */ 2183 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2184 } else { 2185 if (nvids != 0) { 2186 kmem_free(vids, sizeof (uint16_t) * nvids); 2187 } 2188 } 2189 2190 return; 2191 2192 fail_reconf: 2193 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2194 return; 2195 2196 fail_update: 2197 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2198 vswp->instance); 2199 } 2200 2201 /* 2202 * Read the port's md properties. 2203 */ 2204 static int 2205 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2206 md_t *mdp, mde_cookie_t *node) 2207 { 2208 uint64_t ldc_id; 2209 uint8_t *addrp; 2210 int i, addrsz; 2211 int num_nodes = 0, nchan = 0; 2212 int listsz = 0; 2213 mde_cookie_t *listp = NULL; 2214 struct ether_addr ea; 2215 uint64_t macaddr; 2216 uint64_t inst = 0; 2217 uint64_t val; 2218 2219 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2220 DWARN(vswp, "%s: prop(%s) not found", __func__, 2221 id_propname); 2222 return (1); 2223 } 2224 2225 /* 2226 * Find the channel endpoint node(s) (which should be under this 2227 * port node) which contain the channel id(s). 2228 */ 2229 if ((num_nodes = md_node_count(mdp)) <= 0) { 2230 DERR(vswp, "%s: invalid number of nodes found (%d)", 2231 __func__, num_nodes); 2232 return (1); 2233 } 2234 2235 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2236 2237 /* allocate enough space for node list */ 2238 listsz = num_nodes * sizeof (mde_cookie_t); 2239 listp = kmem_zalloc(listsz, KM_SLEEP); 2240 2241 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2242 md_find_name(mdp, "fwd"), listp); 2243 2244 if (nchan <= 0) { 2245 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2246 kmem_free(listp, listsz); 2247 return (1); 2248 } 2249 2250 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2251 2252 /* use property from first node found */ 2253 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2254 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2255 id_propname); 2256 kmem_free(listp, listsz); 2257 return (1); 2258 } 2259 2260 /* don't need list any more */ 2261 kmem_free(listp, listsz); 2262 2263 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2264 2265 /* read mac-address property */ 2266 if (md_get_prop_data(mdp, *node, remaddr_propname, 2267 &addrp, &addrsz)) { 2268 DWARN(vswp, "%s: prop(%s) not found", 2269 __func__, remaddr_propname); 2270 return (1); 2271 } 2272 2273 if (addrsz < ETHERADDRL) { 2274 DWARN(vswp, "%s: invalid address size", __func__); 2275 return (1); 2276 } 2277 2278 macaddr = *((uint64_t *)addrp); 2279 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2280 2281 for (i = ETHERADDRL - 1; i >= 0; i--) { 2282 ea.ether_addr_octet[i] = macaddr & 0xFF; 2283 macaddr >>= 8; 2284 } 2285 2286 /* now update all properties into the port */ 2287 portp->p_vswp = vswp; 2288 portp->p_instance = inst; 2289 portp->addr_set = VSW_ADDR_UNSET; 2290 ether_copy(&ea, &portp->p_macaddr); 2291 if (nchan > VSW_PORT_MAX_LDCS) { 2292 D2(vswp, "%s: using first of %d ldc ids", 2293 __func__, nchan); 2294 nchan = VSW_PORT_MAX_LDCS; 2295 } 2296 portp->num_ldcs = nchan; 2297 portp->ldc_ids = 2298 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2299 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2300 2301 /* read vlan id properties of this port node */ 2302 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2303 &portp->vids, &portp->nvids, NULL); 2304 2305 /* Check if hybrid property is present */ 2306 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2307 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2308 portp->p_hio_enabled = B_TRUE; 2309 } else { 2310 portp->p_hio_enabled = B_FALSE; 2311 } 2312 /* 2313 * Port hio capability determined after version 2314 * negotiation, i.e., when we know the peer is HybridIO capable. 2315 */ 2316 portp->p_hio_capable = B_FALSE; 2317 return (0); 2318 } 2319 2320 /* 2321 * Add a new port to the system. 2322 * 2323 * Returns 0 on success, 1 on failure. 2324 */ 2325 int 2326 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2327 { 2328 vsw_port_t *portp; 2329 int rv; 2330 2331 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2332 2333 rv = vsw_port_read_props(portp, vswp, mdp, node); 2334 if (rv != 0) { 2335 kmem_free(portp, sizeof (*portp)); 2336 return (1); 2337 } 2338 2339 rv = vsw_port_attach(portp); 2340 if (rv != 0) { 2341 DERR(vswp, "%s: failed to attach port", __func__); 2342 return (1); 2343 } 2344 2345 return (0); 2346 } 2347 2348 static int 2349 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2350 md_t *prev_mdp, mde_cookie_t prev_mdex) 2351 { 2352 uint64_t cport_num; 2353 uint64_t pport_num; 2354 vsw_port_list_t *plistp; 2355 vsw_port_t *portp; 2356 boolean_t updated_vlans = B_FALSE; 2357 uint16_t pvid; 2358 uint16_t *vids; 2359 uint16_t nvids; 2360 uint64_t val; 2361 boolean_t hio_enabled = B_FALSE; 2362 2363 /* 2364 * For now, we get port updates only if vlan ids changed. 2365 * We read the port num and do some sanity check. 2366 */ 2367 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2368 return (1); 2369 } 2370 2371 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2372 return (1); 2373 } 2374 if (cport_num != pport_num) 2375 return (1); 2376 2377 plistp = &(vswp->plist); 2378 2379 READ_ENTER(&plistp->lockrw); 2380 2381 portp = vsw_lookup_port(vswp, cport_num); 2382 if (portp == NULL) { 2383 RW_EXIT(&plistp->lockrw); 2384 return (1); 2385 } 2386 2387 /* Read the vlan ids */ 2388 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2389 &vids, &nvids, NULL); 2390 2391 /* Determine if there are any vlan id updates */ 2392 if ((pvid != portp->pvid) || /* pvid changed? */ 2393 (nvids != portp->nvids) || /* # of vids changed? */ 2394 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2395 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) { 2396 updated_vlans = B_TRUE; 2397 } 2398 2399 if (updated_vlans == B_TRUE) { 2400 2401 /* Remove existing vlan ids from the hash table. */ 2402 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2403 2404 /* save the new vlan ids */ 2405 portp->pvid = pvid; 2406 if (portp->nvids != 0) { 2407 kmem_free(portp->vids, 2408 sizeof (uint16_t) * portp->nvids); 2409 portp->nvids = 0; 2410 } 2411 if (nvids != 0) { 2412 portp->vids = kmem_zalloc(sizeof (uint16_t) * 2413 nvids, KM_SLEEP); 2414 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids); 2415 portp->nvids = nvids; 2416 kmem_free(vids, sizeof (uint16_t) * nvids); 2417 } 2418 2419 /* add these new vlan ids into hash table */ 2420 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2421 2422 /* reset the port if it is vlan unaware (ver < 1.3) */ 2423 vsw_vlan_unaware_port_reset(portp); 2424 } 2425 2426 /* Check if hybrid property is present */ 2427 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2428 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2429 hio_enabled = B_TRUE; 2430 } 2431 2432 if (portp->p_hio_enabled != hio_enabled) { 2433 vsw_hio_port_update(portp, hio_enabled); 2434 } 2435 2436 RW_EXIT(&plistp->lockrw); 2437 2438 return (0); 2439 } 2440 2441 /* 2442 * vsw_mac_rx -- A common function to send packets to the interface. 2443 * By default this function check if the interface is UP or not, the 2444 * rest of the behaviour depends on the flags as below: 2445 * 2446 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2447 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2448 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2449 */ 2450 void 2451 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2452 mblk_t *mp, vsw_macrx_flags_t flags) 2453 { 2454 mblk_t *mpt; 2455 2456 D1(vswp, "%s:enter\n", __func__); 2457 READ_ENTER(&vswp->if_lockrw); 2458 /* Check if the interface is up */ 2459 if (!(vswp->if_state & VSW_IF_UP)) { 2460 RW_EXIT(&vswp->if_lockrw); 2461 /* Free messages only if FREEMSG flag specified */ 2462 if (flags & VSW_MACRX_FREEMSG) { 2463 freemsgchain(mp); 2464 } 2465 D1(vswp, "%s:exit\n", __func__); 2466 return; 2467 } 2468 /* 2469 * If PROMISC flag is passed, then check if 2470 * the interface is in the PROMISC mode. 2471 * If not, drop the messages. 2472 */ 2473 if (flags & VSW_MACRX_PROMISC) { 2474 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2475 RW_EXIT(&vswp->if_lockrw); 2476 /* Free messages only if FREEMSG flag specified */ 2477 if (flags & VSW_MACRX_FREEMSG) { 2478 freemsgchain(mp); 2479 } 2480 D1(vswp, "%s:exit\n", __func__); 2481 return; 2482 } 2483 } 2484 RW_EXIT(&vswp->if_lockrw); 2485 /* 2486 * If COPYMSG flag is passed, then make a copy 2487 * of the message chain and send up the copy. 2488 */ 2489 if (flags & VSW_MACRX_COPYMSG) { 2490 mp = copymsgchain(mp); 2491 if (mp == NULL) { 2492 D1(vswp, "%s:exit\n", __func__); 2493 return; 2494 } 2495 } 2496 2497 D2(vswp, "%s: sending up stack", __func__); 2498 2499 mpt = NULL; 2500 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2501 if (mp != NULL) { 2502 mac_rx(vswp->if_mh, mrh, mp); 2503 } 2504 D1(vswp, "%s:exit\n", __func__); 2505 } 2506 2507 /* copy mac address of vsw into soft state structure */ 2508 static void 2509 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2510 { 2511 int i; 2512 2513 WRITE_ENTER(&vswp->if_lockrw); 2514 for (i = ETHERADDRL - 1; i >= 0; i--) { 2515 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2516 macaddr >>= 8; 2517 } 2518 RW_EXIT(&vswp->if_lockrw); 2519 } 2520