1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* 78 * Function prototypes. 79 */ 80 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 81 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 82 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 84 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 85 86 /* MDEG routines */ 87 static int vsw_mdeg_register(vsw_t *vswp); 88 static void vsw_mdeg_unregister(vsw_t *vswp); 89 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 91 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 92 static int vsw_read_mdprops(vsw_t *vswp); 93 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 94 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, 95 uint16_t *nvidsp, uint16_t *default_idp); 96 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 97 md_t *mdp, mde_cookie_t *node); 98 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 99 mde_cookie_t node); 100 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 102 103 /* Mac driver related routines */ 104 static int vsw_mac_register(vsw_t *); 105 static int vsw_mac_unregister(vsw_t *); 106 static int vsw_m_stat(void *, uint_t, uint64_t *); 107 static void vsw_m_stop(void *arg); 108 static int vsw_m_start(void *arg); 109 static int vsw_m_unicst(void *arg, const uint8_t *); 110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 111 static int vsw_m_promisc(void *arg, boolean_t); 112 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 114 mblk_t *mp, vsw_macrx_flags_t flags); 115 116 /* 117 * Functions imported from other files. 118 */ 119 extern void vsw_setup_switching_timeout(void *arg); 120 extern void vsw_stop_switching_timeout(vsw_t *vswp); 121 extern int vsw_setup_switching(vsw_t *); 122 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 123 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 124 extern void vsw_del_mcst_vsw(vsw_t *); 125 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 126 extern int vsw_detach_ports(vsw_t *vswp); 127 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 128 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 129 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 130 md_t *prev_mdp, mde_cookie_t prev_mdex); 131 extern int vsw_port_attach(vsw_port_t *port); 132 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 133 extern int vsw_mac_attach(vsw_t *vswp); 134 extern void vsw_mac_detach(vsw_t *vswp); 135 extern int vsw_mac_open(vsw_t *vswp); 136 extern void vsw_mac_close(vsw_t *vswp); 137 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 138 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 139 extern void vsw_reconfig_hw(vsw_t *); 140 extern void vsw_unset_addrs(vsw_t *vswp); 141 extern void vsw_set_addrs(vsw_t *vswp); 142 extern void vsw_create_vlans(void *arg, int type); 143 extern void vsw_destroy_vlans(void *arg, int type); 144 extern void vsw_vlan_add_ids(void *arg, int type); 145 extern void vsw_vlan_remove_ids(void *arg, int type); 146 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 147 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 148 mblk_t **npt); 149 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 150 extern void vsw_hio_cleanup(vsw_t *vswp); 151 extern void vsw_hio_start_ports(vsw_t *vswp); 152 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 153 154 /* 155 * Internal tunables. 156 */ 157 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 158 int vsw_wretries = 100; /* # of write attempts */ 159 int vsw_desc_delay = 0; /* delay in us */ 160 int vsw_read_attempts = 5; /* # of reads of descriptor */ 161 int vsw_mac_open_retries = 20; /* max # of mac_open() retries */ 162 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 163 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 164 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 165 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 166 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 167 168 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 169 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 170 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 171 172 /* delay in usec to wait for all references on a fdb entry to be dropped */ 173 uint32_t vsw_fdbe_refcnt_delay = 10; 174 175 /* 176 * Default vlan id. This is only used internally when the "default-vlan-id" 177 * property is not present in the MD device node. Therefore, this should not be 178 * used as a tunable; if this value is changed, the corresponding variable 179 * should be updated to the same value in all vnets connected to this vsw. 180 */ 181 uint16_t vsw_default_vlan_id = 1; 182 183 /* 184 * Workaround for a version handshake bug in obp's vnet. 185 * If vsw initiates version negotiation starting from the highest version, 186 * obp sends a nack and terminates version handshake. To workaround 187 * this, we do not initiate version handshake when the channel comes up. 188 * Instead, we wait for the peer to send its version info msg and go through 189 * the version protocol exchange. If we successfully negotiate a version, 190 * before sending the ack, we send our version info msg to the peer 191 * using the <major,minor> version that we are about to ack. 192 */ 193 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 194 195 /* 196 * In the absence of "priority-ether-types" property in MD, the following 197 * internal tunable can be set to specify a single priority ethertype. 198 */ 199 uint64_t vsw_pri_eth_type = 0; 200 201 /* 202 * Number of transmit priority buffers that are preallocated per device. 203 * This number is chosen to be a small value to throttle transmission 204 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 205 */ 206 uint32_t vsw_pri_tx_nmblks = 64; 207 208 boolean_t vsw_hio_enabled = B_TRUE; /* Enable/disable HybridIO */ 209 int vsw_hio_max_cleanup_retries = 10; /* Max retries for HybridIO cleanp */ 210 int vsw_hio_cleanup_delay = 10000; /* 10ms */ 211 212 /* 213 * External tunables. 214 */ 215 /* 216 * Enable/disable thread per ring. This is a mode selection 217 * that is done a vsw driver attach time. 218 */ 219 boolean_t vsw_multi_ring_enable = B_FALSE; 220 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 221 222 /* Number of transmit descriptors - must be power of 2 */ 223 uint32_t vsw_ntxds = VSW_RING_NUM_EL; 224 225 /* 226 * Max number of mblks received in one receive operation. 227 */ 228 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 229 230 /* 231 * Tunables for three different pools, that is, the size and 232 * number of mblks for each pool. 233 */ 234 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128; /* size=128 for pool1 */ 235 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256; /* size=256 for pool2 */ 236 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048; /* size=2048 for pool3 */ 237 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 238 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 239 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 240 241 /* 242 * vsw_max_tx_qcount is the maximum # of packets that can be queued 243 * before the tx worker thread begins processing the queue. Its value 244 * is chosen to be 4x the default length of tx descriptor ring. 245 */ 246 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL; 247 248 /* 249 * MAC callbacks 250 */ 251 static mac_callbacks_t vsw_m_callbacks = { 252 0, 253 vsw_m_stat, 254 vsw_m_start, 255 vsw_m_stop, 256 vsw_m_promisc, 257 vsw_m_multicst, 258 vsw_m_unicst, 259 vsw_m_tx, 260 NULL, 261 NULL, 262 NULL 263 }; 264 265 static struct cb_ops vsw_cb_ops = { 266 nulldev, /* cb_open */ 267 nulldev, /* cb_close */ 268 nodev, /* cb_strategy */ 269 nodev, /* cb_print */ 270 nodev, /* cb_dump */ 271 nodev, /* cb_read */ 272 nodev, /* cb_write */ 273 nodev, /* cb_ioctl */ 274 nodev, /* cb_devmap */ 275 nodev, /* cb_mmap */ 276 nodev, /* cb_segmap */ 277 nochpoll, /* cb_chpoll */ 278 ddi_prop_op, /* cb_prop_op */ 279 NULL, /* cb_stream */ 280 D_MP, /* cb_flag */ 281 CB_REV, /* rev */ 282 nodev, /* int (*cb_aread)() */ 283 nodev /* int (*cb_awrite)() */ 284 }; 285 286 static struct dev_ops vsw_ops = { 287 DEVO_REV, /* devo_rev */ 288 0, /* devo_refcnt */ 289 vsw_getinfo, /* devo_getinfo */ 290 nulldev, /* devo_identify */ 291 nulldev, /* devo_probe */ 292 vsw_attach, /* devo_attach */ 293 vsw_detach, /* devo_detach */ 294 nodev, /* devo_reset */ 295 &vsw_cb_ops, /* devo_cb_ops */ 296 (struct bus_ops *)NULL, /* devo_bus_ops */ 297 ddi_power /* devo_power */ 298 }; 299 300 extern struct mod_ops mod_driverops; 301 static struct modldrv vswmodldrv = { 302 &mod_driverops, 303 "sun4v Virtual Switch", 304 &vsw_ops, 305 }; 306 307 #define LDC_ENTER_LOCK(ldcp) \ 308 mutex_enter(&((ldcp)->ldc_cblock));\ 309 mutex_enter(&((ldcp)->ldc_rxlock));\ 310 mutex_enter(&((ldcp)->ldc_txlock)); 311 #define LDC_EXIT_LOCK(ldcp) \ 312 mutex_exit(&((ldcp)->ldc_txlock));\ 313 mutex_exit(&((ldcp)->ldc_rxlock));\ 314 mutex_exit(&((ldcp)->ldc_cblock)); 315 316 /* Driver soft state ptr */ 317 static void *vsw_state; 318 319 /* 320 * Linked list of "vsw_t" structures - one per instance. 321 */ 322 vsw_t *vsw_head = NULL; 323 krwlock_t vsw_rw; 324 325 /* 326 * Property names 327 */ 328 static char vdev_propname[] = "virtual-device"; 329 static char vsw_propname[] = "virtual-network-switch"; 330 static char physdev_propname[] = "vsw-phys-dev"; 331 static char smode_propname[] = "vsw-switch-mode"; 332 static char macaddr_propname[] = "local-mac-address"; 333 static char remaddr_propname[] = "remote-mac-address"; 334 static char ldcids_propname[] = "ldc-ids"; 335 static char chan_propname[] = "channel-endpoint"; 336 static char id_propname[] = "id"; 337 static char reg_propname[] = "reg"; 338 static char pri_types_propname[] = "priority-ether-types"; 339 static char vsw_pvid_propname[] = "port-vlan-id"; 340 static char vsw_vid_propname[] = "vlan-id"; 341 static char vsw_dvid_propname[] = "default-vlan-id"; 342 static char port_pvid_propname[] = "remote-port-vlan-id"; 343 static char port_vid_propname[] = "remote-vlan-id"; 344 static char hybrid_propname[] = "hybrid"; 345 346 /* 347 * Matching criteria passed to the MDEG to register interest 348 * in changes to 'virtual-device-port' nodes identified by their 349 * 'id' property. 350 */ 351 static md_prop_match_t vport_prop_match[] = { 352 { MDET_PROP_VAL, "id" }, 353 { MDET_LIST_END, NULL } 354 }; 355 356 static mdeg_node_match_t vport_match = { "virtual-device-port", 357 vport_prop_match }; 358 359 /* 360 * Matching criteria passed to the MDEG to register interest 361 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 362 * by their 'name' and 'cfg-handle' properties. 363 */ 364 static md_prop_match_t vdev_prop_match[] = { 365 { MDET_PROP_STR, "name" }, 366 { MDET_PROP_VAL, "cfg-handle" }, 367 { MDET_LIST_END, NULL } 368 }; 369 370 static mdeg_node_match_t vdev_match = { "virtual-device", 371 vdev_prop_match }; 372 373 374 /* 375 * Specification of an MD node passed to the MDEG to filter any 376 * 'vport' nodes that do not belong to the specified node. This 377 * template is copied for each vsw instance and filled in with 378 * the appropriate 'cfg-handle' value before being passed to the MDEG. 379 */ 380 static mdeg_prop_spec_t vsw_prop_template[] = { 381 { MDET_PROP_STR, "name", vsw_propname }, 382 { MDET_PROP_VAL, "cfg-handle", NULL }, 383 { MDET_LIST_END, NULL, NULL } 384 }; 385 386 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 387 388 #ifdef DEBUG 389 /* 390 * Print debug messages - set to 0x1f to enable all msgs 391 * or 0x0 to turn all off. 392 */ 393 int vswdbg = 0x0; 394 395 /* 396 * debug levels: 397 * 0x01: Function entry/exit tracing 398 * 0x02: Internal function messages 399 * 0x04: Verbose internal messages 400 * 0x08: Warning messages 401 * 0x10: Error messages 402 */ 403 404 void 405 vswdebug(vsw_t *vswp, const char *fmt, ...) 406 { 407 char buf[512]; 408 va_list ap; 409 410 va_start(ap, fmt); 411 (void) vsprintf(buf, fmt, ap); 412 va_end(ap); 413 414 if (vswp == NULL) 415 cmn_err(CE_CONT, "%s\n", buf); 416 else 417 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 418 } 419 420 #endif /* DEBUG */ 421 422 static struct modlinkage modlinkage = { 423 MODREV_1, 424 &vswmodldrv, 425 NULL 426 }; 427 428 int 429 _init(void) 430 { 431 int status; 432 433 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 434 435 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 436 if (status != 0) { 437 return (status); 438 } 439 440 mac_init_ops(&vsw_ops, DRV_NAME); 441 status = mod_install(&modlinkage); 442 if (status != 0) { 443 ddi_soft_state_fini(&vsw_state); 444 } 445 return (status); 446 } 447 448 int 449 _fini(void) 450 { 451 int status; 452 453 status = mod_remove(&modlinkage); 454 if (status != 0) 455 return (status); 456 mac_fini_ops(&vsw_ops); 457 ddi_soft_state_fini(&vsw_state); 458 459 rw_destroy(&vsw_rw); 460 461 return (status); 462 } 463 464 int 465 _info(struct modinfo *modinfop) 466 { 467 return (mod_info(&modlinkage, modinfop)); 468 } 469 470 static int 471 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 472 { 473 vsw_t *vswp; 474 int instance; 475 char hashname[MAXNAMELEN]; 476 char qname[TASKQ_NAMELEN]; 477 enum { PROG_init = 0x00, 478 PROG_locks = 0x01, 479 PROG_readmd = 0x02, 480 PROG_fdb = 0x04, 481 PROG_mfdb = 0x08, 482 PROG_taskq = 0x10, 483 PROG_swmode = 0x20, 484 PROG_macreg = 0x40, 485 PROG_mdreg = 0x80} 486 progress; 487 488 progress = PROG_init; 489 int rv; 490 491 switch (cmd) { 492 case DDI_ATTACH: 493 break; 494 case DDI_RESUME: 495 /* nothing to do for this non-device */ 496 return (DDI_SUCCESS); 497 case DDI_PM_RESUME: 498 default: 499 return (DDI_FAILURE); 500 } 501 502 instance = ddi_get_instance(dip); 503 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 504 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 505 return (DDI_FAILURE); 506 } 507 vswp = ddi_get_soft_state(vsw_state, instance); 508 509 if (vswp == NULL) { 510 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 511 goto vsw_attach_fail; 512 } 513 514 vswp->dip = dip; 515 vswp->instance = instance; 516 ddi_set_driver_private(dip, (caddr_t)vswp); 517 518 mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL); 519 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 520 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 521 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 522 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 523 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 524 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 525 526 progress |= PROG_locks; 527 528 rv = vsw_read_mdprops(vswp); 529 if (rv != 0) 530 goto vsw_attach_fail; 531 532 progress |= PROG_readmd; 533 534 /* setup the unicast forwarding database */ 535 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 536 vswp->instance); 537 D2(vswp, "creating unicast hash table (%s)...", hashname); 538 vswp->fdb_nchains = vsw_fdb_nchains; 539 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 540 mod_hash_null_valdtor, sizeof (void *)); 541 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 542 progress |= PROG_fdb; 543 544 /* setup the multicast fowarding database */ 545 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 546 vswp->instance); 547 D2(vswp, "creating multicast hash table %s)...", hashname); 548 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 549 mod_hash_null_valdtor, sizeof (void *)); 550 551 progress |= PROG_mfdb; 552 553 /* 554 * Create the taskq which will process all the VIO 555 * control messages. 556 */ 557 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 558 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 559 TASKQ_DEFAULTPRI, 0)) == NULL) { 560 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 561 vswp->instance); 562 goto vsw_attach_fail; 563 } 564 565 progress |= PROG_taskq; 566 567 /* prevent auto-detaching */ 568 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 569 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 570 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 571 "instance %u", DDI_NO_AUTODETACH, instance); 572 } 573 574 /* 575 * Setup the required switching mode, 576 * based on the mdprops that we read earlier. 577 */ 578 rv = vsw_setup_switching(vswp); 579 if (rv == EAGAIN) { 580 /* 581 * Unable to setup switching mode; 582 * as the error is EAGAIN, schedule a timeout to retry. 583 */ 584 mutex_enter(&vswp->swtmout_lock); 585 586 vswp->swtmout_enabled = B_TRUE; 587 vswp->swtmout_id = 588 timeout(vsw_setup_switching_timeout, vswp, 589 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 590 591 mutex_exit(&vswp->swtmout_lock); 592 } else if (rv != 0) { 593 goto vsw_attach_fail; 594 } 595 596 progress |= PROG_swmode; 597 598 /* Register with mac layer as a provider */ 599 rv = vsw_mac_register(vswp); 600 if (rv != 0) 601 goto vsw_attach_fail; 602 603 progress |= PROG_macreg; 604 605 /* 606 * Now we have everything setup, register an interest in 607 * specific MD nodes. 608 * 609 * The callback is invoked in 2 cases, firstly if upon mdeg 610 * registration there are existing nodes which match our specified 611 * criteria, and secondly if the MD is changed (and again, there 612 * are nodes which we are interested in present within it. Note 613 * that our callback will be invoked even if our specified nodes 614 * have not actually changed). 615 * 616 */ 617 rv = vsw_mdeg_register(vswp); 618 if (rv != 0) 619 goto vsw_attach_fail; 620 621 progress |= PROG_mdreg; 622 623 WRITE_ENTER(&vsw_rw); 624 vswp->next = vsw_head; 625 vsw_head = vswp; 626 RW_EXIT(&vsw_rw); 627 628 ddi_report_dev(vswp->dip); 629 return (DDI_SUCCESS); 630 631 vsw_attach_fail: 632 DERR(NULL, "vsw_attach: failed"); 633 634 if (progress & PROG_mdreg) { 635 vsw_mdeg_unregister(vswp); 636 (void) vsw_detach_ports(vswp); 637 } 638 639 if (progress & PROG_macreg) 640 (void) vsw_mac_unregister(vswp); 641 642 if (progress & PROG_swmode) { 643 vsw_stop_switching_timeout(vswp); 644 vsw_hio_cleanup(vswp); 645 mutex_enter(&vswp->mac_lock); 646 vsw_mac_detach(vswp); 647 vsw_mac_close(vswp); 648 mutex_exit(&vswp->mac_lock); 649 } 650 651 if (progress & PROG_taskq) 652 ddi_taskq_destroy(vswp->taskq_p); 653 654 if (progress & PROG_mfdb) 655 mod_hash_destroy_hash(vswp->mfdb); 656 657 if (progress & PROG_fdb) { 658 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 659 mod_hash_destroy_hash(vswp->fdb_hashp); 660 } 661 662 if (progress & PROG_readmd) { 663 if (VSW_PRI_ETH_DEFINED(vswp)) { 664 kmem_free(vswp->pri_types, 665 sizeof (uint16_t) * vswp->pri_num_types); 666 } 667 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 668 } 669 670 if (progress & PROG_locks) { 671 rw_destroy(&vswp->plist.lockrw); 672 rw_destroy(&vswp->mfdbrw); 673 rw_destroy(&vswp->if_lockrw); 674 mutex_destroy(&vswp->swtmout_lock); 675 mutex_destroy(&vswp->mca_lock); 676 mutex_destroy(&vswp->mac_lock); 677 mutex_destroy(&vswp->hw_lock); 678 } 679 680 ddi_soft_state_free(vsw_state, instance); 681 return (DDI_FAILURE); 682 } 683 684 static int 685 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 686 { 687 vio_mblk_pool_t *poolp, *npoolp; 688 vsw_t **vswpp, *vswp; 689 int instance; 690 691 instance = ddi_get_instance(dip); 692 vswp = ddi_get_soft_state(vsw_state, instance); 693 694 if (vswp == NULL) { 695 return (DDI_FAILURE); 696 } 697 698 switch (cmd) { 699 case DDI_DETACH: 700 break; 701 case DDI_SUSPEND: 702 case DDI_PM_SUSPEND: 703 default: 704 return (DDI_FAILURE); 705 } 706 707 D2(vswp, "detaching instance %d", instance); 708 709 /* Stop any pending timeout to setup switching mode. */ 710 vsw_stop_switching_timeout(vswp); 711 712 if (vswp->if_state & VSW_IF_REG) { 713 if (vsw_mac_unregister(vswp) != 0) { 714 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 715 "MAC layer", vswp->instance); 716 return (DDI_FAILURE); 717 } 718 } 719 720 vsw_mdeg_unregister(vswp); 721 722 /* remove mac layer callback */ 723 mutex_enter(&vswp->mac_lock); 724 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 725 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 726 vswp->mrh = NULL; 727 } 728 mutex_exit(&vswp->mac_lock); 729 730 if (vsw_detach_ports(vswp) != 0) { 731 cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports", 732 vswp->instance); 733 return (DDI_FAILURE); 734 } 735 736 rw_destroy(&vswp->if_lockrw); 737 738 /* cleanup HybridIO */ 739 vsw_hio_cleanup(vswp); 740 741 mutex_destroy(&vswp->hw_lock); 742 743 /* 744 * Now that the ports have been deleted, stop and close 745 * the physical device. 746 */ 747 mutex_enter(&vswp->mac_lock); 748 749 vsw_mac_detach(vswp); 750 vsw_mac_close(vswp); 751 752 mutex_exit(&vswp->mac_lock); 753 754 mutex_destroy(&vswp->mac_lock); 755 mutex_destroy(&vswp->swtmout_lock); 756 757 /* 758 * Destroy any free pools that may still exist. 759 */ 760 poolp = vswp->rxh; 761 while (poolp != NULL) { 762 npoolp = vswp->rxh = poolp->nextp; 763 if (vio_destroy_mblks(poolp) != 0) { 764 vswp->rxh = poolp; 765 return (DDI_FAILURE); 766 } 767 poolp = npoolp; 768 } 769 770 /* 771 * Remove this instance from any entries it may be on in 772 * the hash table by using the list of addresses maintained 773 * in the vsw_t structure. 774 */ 775 vsw_del_mcst_vsw(vswp); 776 777 vswp->mcap = NULL; 778 mutex_destroy(&vswp->mca_lock); 779 780 /* 781 * By now any pending tasks have finished and the underlying 782 * ldc's have been destroyed, so its safe to delete the control 783 * message taskq. 784 */ 785 if (vswp->taskq_p != NULL) 786 ddi_taskq_destroy(vswp->taskq_p); 787 788 /* 789 * At this stage all the data pointers in the hash table 790 * should be NULL, as all the ports have been removed and will 791 * have deleted themselves from the port lists which the data 792 * pointers point to. Hence we can destroy the table using the 793 * default destructors. 794 */ 795 D2(vswp, "vsw_detach: destroying hash tables.."); 796 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 797 mod_hash_destroy_hash(vswp->fdb_hashp); 798 vswp->fdb_hashp = NULL; 799 800 WRITE_ENTER(&vswp->mfdbrw); 801 mod_hash_destroy_hash(vswp->mfdb); 802 vswp->mfdb = NULL; 803 RW_EXIT(&vswp->mfdbrw); 804 rw_destroy(&vswp->mfdbrw); 805 806 /* free pri_types table */ 807 if (VSW_PRI_ETH_DEFINED(vswp)) { 808 kmem_free(vswp->pri_types, 809 sizeof (uint16_t) * vswp->pri_num_types); 810 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 811 } 812 813 ddi_remove_minor_node(dip, NULL); 814 815 rw_destroy(&vswp->plist.lockrw); 816 WRITE_ENTER(&vsw_rw); 817 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 818 if (*vswpp == vswp) { 819 *vswpp = vswp->next; 820 break; 821 } 822 } 823 RW_EXIT(&vsw_rw); 824 ddi_soft_state_free(vsw_state, instance); 825 826 return (DDI_SUCCESS); 827 } 828 829 static int 830 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 831 { 832 _NOTE(ARGUNUSED(dip)) 833 834 vsw_t *vswp = NULL; 835 dev_t dev = (dev_t)arg; 836 int instance; 837 838 instance = getminor(dev); 839 840 switch (infocmd) { 841 case DDI_INFO_DEVT2DEVINFO: 842 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 843 *result = NULL; 844 return (DDI_FAILURE); 845 } 846 *result = vswp->dip; 847 return (DDI_SUCCESS); 848 849 case DDI_INFO_DEVT2INSTANCE: 850 *result = (void *)(uintptr_t)instance; 851 return (DDI_SUCCESS); 852 853 default: 854 *result = NULL; 855 return (DDI_FAILURE); 856 } 857 } 858 859 /* 860 * Get the value of the "vsw-phys-dev" property in the specified 861 * node. This property is the name of the physical device that 862 * the virtual switch will use to talk to the outside world. 863 * 864 * Note it is valid for this property to be NULL (but the property 865 * itself must exist). Callers of this routine should verify that 866 * the value returned is what they expected (i.e. either NULL or non NULL). 867 * 868 * On success returns value of the property in region pointed to by 869 * the 'name' argument, and with return value of 0. Otherwise returns 1. 870 */ 871 static int 872 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 873 { 874 int len = 0; 875 int instance; 876 char *physname = NULL; 877 char *dev; 878 const char *dev_name; 879 char myname[MAXNAMELEN]; 880 881 dev_name = ddi_driver_name(vswp->dip); 882 instance = ddi_get_instance(vswp->dip); 883 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 884 885 if (md_get_prop_data(mdp, node, physdev_propname, 886 (uint8_t **)(&physname), &len) != 0) { 887 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 888 "device(s) from MD", vswp->instance); 889 return (1); 890 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 891 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 892 vswp->instance, physname); 893 return (1); 894 } else if (strcmp(myname, physname) == 0) { 895 /* 896 * Prevent the vswitch from opening itself as the 897 * network device. 898 */ 899 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 900 vswp->instance, physname); 901 return (1); 902 } else { 903 (void) strncpy(name, physname, strlen(physname) + 1); 904 D2(vswp, "%s: using first device specified (%s)", 905 __func__, physname); 906 } 907 908 #ifdef DEBUG 909 /* 910 * As a temporary measure to aid testing we check to see if there 911 * is a vsw.conf file present. If there is we use the value of the 912 * vsw_physname property in the file as the name of the physical 913 * device, overriding the value from the MD. 914 * 915 * There may be multiple devices listed, but for the moment 916 * we just use the first one. 917 */ 918 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 919 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 920 if ((strlen(dev) + 1) > LIFNAMSIZ) { 921 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 922 vswp->instance, dev); 923 ddi_prop_free(dev); 924 return (1); 925 } else { 926 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 927 "config file", vswp->instance, dev); 928 929 (void) strncpy(name, dev, strlen(dev) + 1); 930 } 931 932 ddi_prop_free(dev); 933 } 934 #endif 935 936 return (0); 937 } 938 939 /* 940 * Read the 'vsw-switch-mode' property from the specified MD node. 941 * 942 * Returns 0 on success and the number of modes found in 'found', 943 * otherwise returns 1. 944 */ 945 static int 946 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 947 uint8_t *modes, int *found) 948 { 949 int len = 0; 950 int smode_num = 0; 951 char *smode = NULL; 952 char *curr_mode = NULL; 953 954 D1(vswp, "%s: enter", __func__); 955 956 /* 957 * Get the switch-mode property. The modes are listed in 958 * decreasing order of preference, i.e. prefered mode is 959 * first item in list. 960 */ 961 len = 0; 962 smode_num = 0; 963 if (md_get_prop_data(mdp, node, smode_propname, 964 (uint8_t **)(&smode), &len) != 0) { 965 /* 966 * Unable to get switch-mode property from MD, nothing 967 * more we can do. 968 */ 969 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 970 " from the MD", vswp->instance); 971 *found = 0; 972 return (1); 973 } 974 975 curr_mode = smode; 976 /* 977 * Modes of operation: 978 * 'switched' - layer 2 switching, underlying HW in 979 * programmed mode. 980 * 'promiscuous' - layer 2 switching, underlying HW in 981 * promiscuous mode. 982 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 983 * in non-promiscuous mode. 984 */ 985 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 986 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 987 if (strcmp(curr_mode, "switched") == 0) { 988 modes[smode_num++] = VSW_LAYER2; 989 } else if (strcmp(curr_mode, "promiscuous") == 0) { 990 modes[smode_num++] = VSW_LAYER2_PROMISC; 991 } else if (strcmp(curr_mode, "routed") == 0) { 992 modes[smode_num++] = VSW_LAYER3; 993 } else { 994 DWARN(vswp, "%s: Unknown switch mode %s, " 995 "setting to default 'switched' mode", 996 __func__, curr_mode); 997 modes[smode_num++] = VSW_LAYER2; 998 } 999 curr_mode += strlen(curr_mode) + 1; 1000 } 1001 *found = smode_num; 1002 1003 D2(vswp, "%s: %d modes found", __func__, smode_num); 1004 1005 D1(vswp, "%s: exit", __func__); 1006 1007 return (0); 1008 } 1009 1010 /* 1011 * Register with the MAC layer as a network device, so we 1012 * can be plumbed if necessary. 1013 */ 1014 static int 1015 vsw_mac_register(vsw_t *vswp) 1016 { 1017 mac_register_t *macp; 1018 int rv; 1019 1020 D1(vswp, "%s: enter", __func__); 1021 1022 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1023 return (EINVAL); 1024 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1025 macp->m_driver = vswp; 1026 macp->m_dip = vswp->dip; 1027 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1028 macp->m_callbacks = &vsw_m_callbacks; 1029 macp->m_min_sdu = 0; 1030 macp->m_max_sdu = vsw_ethermtu; 1031 macp->m_margin = VLAN_TAGSZ; 1032 rv = mac_register(macp, &vswp->if_mh); 1033 mac_free(macp); 1034 if (rv != 0) { 1035 /* 1036 * Treat this as a non-fatal error as we may be 1037 * able to operate in some other mode. 1038 */ 1039 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1040 "a provider with MAC layer", vswp->instance); 1041 return (rv); 1042 } 1043 1044 vswp->if_state |= VSW_IF_REG; 1045 1046 vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header) 1047 + VLAN_TAGSZ; 1048 1049 D1(vswp, "%s: exit", __func__); 1050 1051 return (rv); 1052 } 1053 1054 static int 1055 vsw_mac_unregister(vsw_t *vswp) 1056 { 1057 int rv = 0; 1058 1059 D1(vswp, "%s: enter", __func__); 1060 1061 WRITE_ENTER(&vswp->if_lockrw); 1062 1063 if (vswp->if_state & VSW_IF_REG) { 1064 rv = mac_unregister(vswp->if_mh); 1065 if (rv != 0) { 1066 DWARN(vswp, "%s: unable to unregister from MAC " 1067 "framework", __func__); 1068 1069 RW_EXIT(&vswp->if_lockrw); 1070 D1(vswp, "%s: fail exit", __func__); 1071 return (rv); 1072 } 1073 1074 /* mark i/f as down and unregistered */ 1075 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1076 } 1077 RW_EXIT(&vswp->if_lockrw); 1078 1079 D1(vswp, "%s: exit", __func__); 1080 1081 return (rv); 1082 } 1083 1084 static int 1085 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1086 { 1087 vsw_t *vswp = (vsw_t *)arg; 1088 1089 D1(vswp, "%s: enter", __func__); 1090 1091 mutex_enter(&vswp->mac_lock); 1092 if (vswp->mh == NULL) { 1093 mutex_exit(&vswp->mac_lock); 1094 return (EINVAL); 1095 } 1096 1097 /* return stats from underlying device */ 1098 *val = mac_stat_get(vswp->mh, stat); 1099 1100 mutex_exit(&vswp->mac_lock); 1101 1102 return (0); 1103 } 1104 1105 static void 1106 vsw_m_stop(void *arg) 1107 { 1108 vsw_t *vswp = (vsw_t *)arg; 1109 1110 D1(vswp, "%s: enter", __func__); 1111 1112 WRITE_ENTER(&vswp->if_lockrw); 1113 vswp->if_state &= ~VSW_IF_UP; 1114 RW_EXIT(&vswp->if_lockrw); 1115 1116 mutex_enter(&vswp->hw_lock); 1117 1118 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1119 1120 if (vswp->recfg_reqd) 1121 vsw_reconfig_hw(vswp); 1122 1123 mutex_exit(&vswp->hw_lock); 1124 1125 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1126 } 1127 1128 static int 1129 vsw_m_start(void *arg) 1130 { 1131 vsw_t *vswp = (vsw_t *)arg; 1132 1133 D1(vswp, "%s: enter", __func__); 1134 1135 WRITE_ENTER(&vswp->if_lockrw); 1136 1137 vswp->if_state |= VSW_IF_UP; 1138 1139 if (vswp->switching_setup_done == B_FALSE) { 1140 /* 1141 * If the switching mode has not been setup yet, just 1142 * return. The unicast address will be programmed 1143 * after the physical device is successfully setup by the 1144 * timeout handler. 1145 */ 1146 RW_EXIT(&vswp->if_lockrw); 1147 return (0); 1148 } 1149 1150 /* if in layer2 mode, program unicast address. */ 1151 if (vswp->mh != NULL) { 1152 mutex_enter(&vswp->hw_lock); 1153 (void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1154 mutex_exit(&vswp->hw_lock); 1155 } 1156 1157 RW_EXIT(&vswp->if_lockrw); 1158 1159 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1160 return (0); 1161 } 1162 1163 /* 1164 * Change the local interface address. 1165 * 1166 * Note: we don't support this entry point. The local 1167 * mac address of the switch can only be changed via its 1168 * MD node properties. 1169 */ 1170 static int 1171 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1172 { 1173 _NOTE(ARGUNUSED(arg, macaddr)) 1174 1175 return (DDI_FAILURE); 1176 } 1177 1178 static int 1179 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1180 { 1181 vsw_t *vswp = (vsw_t *)arg; 1182 mcst_addr_t *mcst_p = NULL; 1183 uint64_t addr = 0x0; 1184 int i, ret = 0; 1185 1186 D1(vswp, "%s: enter", __func__); 1187 1188 /* 1189 * Convert address into form that can be used 1190 * as hash table key. 1191 */ 1192 for (i = 0; i < ETHERADDRL; i++) { 1193 addr = (addr << 8) | mca[i]; 1194 } 1195 1196 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1197 1198 if (add) { 1199 D2(vswp, "%s: adding multicast", __func__); 1200 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1201 /* 1202 * Update the list of multicast addresses 1203 * contained within the vsw_t structure to 1204 * include this new one. 1205 */ 1206 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1207 if (mcst_p == NULL) { 1208 DERR(vswp, "%s unable to alloc mem", __func__); 1209 (void) vsw_del_mcst(vswp, 1210 VSW_LOCALDEV, addr, NULL); 1211 return (1); 1212 } 1213 mcst_p->addr = addr; 1214 ether_copy(mca, &mcst_p->mca); 1215 1216 /* 1217 * Call into the underlying driver to program the 1218 * address into HW. 1219 */ 1220 mutex_enter(&vswp->mac_lock); 1221 if (vswp->mh != NULL) { 1222 ret = mac_multicst_add(vswp->mh, mca); 1223 if (ret != 0) { 1224 cmn_err(CE_NOTE, "!vsw%d: unable to " 1225 "add multicast address", 1226 vswp->instance); 1227 mutex_exit(&vswp->mac_lock); 1228 (void) vsw_del_mcst(vswp, 1229 VSW_LOCALDEV, addr, NULL); 1230 kmem_free(mcst_p, sizeof (*mcst_p)); 1231 return (ret); 1232 } 1233 mcst_p->mac_added = B_TRUE; 1234 } 1235 mutex_exit(&vswp->mac_lock); 1236 1237 mutex_enter(&vswp->mca_lock); 1238 mcst_p->nextp = vswp->mcap; 1239 vswp->mcap = mcst_p; 1240 mutex_exit(&vswp->mca_lock); 1241 } else { 1242 cmn_err(CE_NOTE, "!vsw%d: unable to add multicast " 1243 "address", vswp->instance); 1244 } 1245 return (ret); 1246 } 1247 1248 D2(vswp, "%s: removing multicast", __func__); 1249 /* 1250 * Remove the address from the hash table.. 1251 */ 1252 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1253 1254 /* 1255 * ..and then from the list maintained in the 1256 * vsw_t structure. 1257 */ 1258 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1259 ASSERT(mcst_p != NULL); 1260 1261 mutex_enter(&vswp->mac_lock); 1262 if (vswp->mh != NULL && mcst_p->mac_added) { 1263 (void) mac_multicst_remove(vswp->mh, mca); 1264 mcst_p->mac_added = B_FALSE; 1265 } 1266 mutex_exit(&vswp->mac_lock); 1267 kmem_free(mcst_p, sizeof (*mcst_p)); 1268 } 1269 1270 D1(vswp, "%s: exit", __func__); 1271 1272 return (0); 1273 } 1274 1275 static int 1276 vsw_m_promisc(void *arg, boolean_t on) 1277 { 1278 vsw_t *vswp = (vsw_t *)arg; 1279 1280 D1(vswp, "%s: enter", __func__); 1281 1282 WRITE_ENTER(&vswp->if_lockrw); 1283 if (on) 1284 vswp->if_state |= VSW_IF_PROMISC; 1285 else 1286 vswp->if_state &= ~VSW_IF_PROMISC; 1287 RW_EXIT(&vswp->if_lockrw); 1288 1289 D1(vswp, "%s: exit", __func__); 1290 1291 return (0); 1292 } 1293 1294 static mblk_t * 1295 vsw_m_tx(void *arg, mblk_t *mp) 1296 { 1297 vsw_t *vswp = (vsw_t *)arg; 1298 1299 D1(vswp, "%s: enter", __func__); 1300 1301 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1302 1303 if (mp == NULL) { 1304 return (NULL); 1305 } 1306 1307 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1308 1309 D1(vswp, "%s: exit", __func__); 1310 1311 return (NULL); 1312 } 1313 1314 /* 1315 * Register for machine description (MD) updates. 1316 * 1317 * Returns 0 on success, 1 on failure. 1318 */ 1319 static int 1320 vsw_mdeg_register(vsw_t *vswp) 1321 { 1322 mdeg_prop_spec_t *pspecp; 1323 mdeg_node_spec_t *inst_specp; 1324 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1325 size_t templatesz; 1326 int rv; 1327 1328 D1(vswp, "%s: enter", __func__); 1329 1330 /* 1331 * Allocate and initialize a per-instance copy 1332 * of the global property spec array that will 1333 * uniquely identify this vsw instance. 1334 */ 1335 templatesz = sizeof (vsw_prop_template); 1336 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1337 1338 bcopy(vsw_prop_template, pspecp, templatesz); 1339 1340 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1341 1342 /* initialize the complete prop spec structure */ 1343 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1344 inst_specp->namep = "virtual-device"; 1345 inst_specp->specp = pspecp; 1346 1347 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1348 vswp->regprop); 1349 /* 1350 * Register an interest in 'virtual-device' nodes with a 1351 * 'name' property of 'virtual-network-switch' 1352 */ 1353 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1354 (void *)vswp, &mdeg_hdl); 1355 if (rv != MDEG_SUCCESS) { 1356 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1357 __func__, rv); 1358 goto mdeg_reg_fail; 1359 } 1360 1361 /* 1362 * Register an interest in 'vsw-port' nodes. 1363 */ 1364 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1365 (void *)vswp, &mdeg_port_hdl); 1366 if (rv != MDEG_SUCCESS) { 1367 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1368 (void) mdeg_unregister(mdeg_hdl); 1369 goto mdeg_reg_fail; 1370 } 1371 1372 /* save off data that will be needed later */ 1373 vswp->inst_spec = inst_specp; 1374 vswp->mdeg_hdl = mdeg_hdl; 1375 vswp->mdeg_port_hdl = mdeg_port_hdl; 1376 1377 D1(vswp, "%s: exit", __func__); 1378 return (0); 1379 1380 mdeg_reg_fail: 1381 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1382 vswp->instance); 1383 kmem_free(pspecp, templatesz); 1384 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1385 1386 vswp->mdeg_hdl = NULL; 1387 vswp->mdeg_port_hdl = NULL; 1388 1389 return (1); 1390 } 1391 1392 static void 1393 vsw_mdeg_unregister(vsw_t *vswp) 1394 { 1395 D1(vswp, "vsw_mdeg_unregister: enter"); 1396 1397 if (vswp->mdeg_hdl != NULL) 1398 (void) mdeg_unregister(vswp->mdeg_hdl); 1399 1400 if (vswp->mdeg_port_hdl != NULL) 1401 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1402 1403 if (vswp->inst_spec != NULL) { 1404 if (vswp->inst_spec->specp != NULL) { 1405 (void) kmem_free(vswp->inst_spec->specp, 1406 sizeof (vsw_prop_template)); 1407 vswp->inst_spec->specp = NULL; 1408 } 1409 1410 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1411 vswp->inst_spec = NULL; 1412 } 1413 1414 D1(vswp, "vsw_mdeg_unregister: exit"); 1415 } 1416 1417 /* 1418 * Mdeg callback invoked for the vsw node itself. 1419 */ 1420 static int 1421 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1422 { 1423 vsw_t *vswp; 1424 md_t *mdp; 1425 mde_cookie_t node; 1426 uint64_t inst; 1427 char *node_name = NULL; 1428 1429 if (resp == NULL) 1430 return (MDEG_FAILURE); 1431 1432 vswp = (vsw_t *)cb_argp; 1433 1434 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1435 " : prev matched %d", __func__, resp->added.nelem, 1436 resp->removed.nelem, resp->match_curr.nelem, 1437 resp->match_prev.nelem); 1438 1439 /* 1440 * We get an initial callback for this node as 'added' 1441 * after registering with mdeg. Note that we would have 1442 * already gathered information about this vsw node by 1443 * walking MD earlier during attach (in vsw_read_mdprops()). 1444 * So, there is a window where the properties of this 1445 * node might have changed when we get this initial 'added' 1446 * callback. We handle this as if an update occured 1447 * and invoke the same function which handles updates to 1448 * the properties of this vsw-node if any. 1449 * 1450 * A non-zero 'match' value indicates that the MD has been 1451 * updated and that a virtual-network-switch node is 1452 * present which may or may not have been updated. It is 1453 * up to the clients to examine their own nodes and 1454 * determine if they have changed. 1455 */ 1456 if (resp->added.nelem != 0) { 1457 1458 if (resp->added.nelem != 1) { 1459 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1460 "invalid: %d\n", vswp->instance, resp->added.nelem); 1461 return (MDEG_FAILURE); 1462 } 1463 1464 mdp = resp->added.mdp; 1465 node = resp->added.mdep[0]; 1466 1467 } else if (resp->match_curr.nelem != 0) { 1468 1469 if (resp->match_curr.nelem != 1) { 1470 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1471 "invalid: %d\n", vswp->instance, 1472 resp->match_curr.nelem); 1473 return (MDEG_FAILURE); 1474 } 1475 1476 mdp = resp->match_curr.mdp; 1477 node = resp->match_curr.mdep[0]; 1478 1479 } else { 1480 return (MDEG_FAILURE); 1481 } 1482 1483 /* Validate name and instance */ 1484 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1485 DERR(vswp, "%s: unable to get node name\n", __func__); 1486 return (MDEG_FAILURE); 1487 } 1488 1489 /* is this a virtual-network-switch? */ 1490 if (strcmp(node_name, vsw_propname) != 0) { 1491 DERR(vswp, "%s: Invalid node name: %s\n", 1492 __func__, node_name); 1493 return (MDEG_FAILURE); 1494 } 1495 1496 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1497 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1498 __func__); 1499 return (MDEG_FAILURE); 1500 } 1501 1502 /* is this the right instance of vsw? */ 1503 if (inst != vswp->regprop) { 1504 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1505 __func__, inst); 1506 return (MDEG_FAILURE); 1507 } 1508 1509 vsw_update_md_prop(vswp, mdp, node); 1510 1511 return (MDEG_SUCCESS); 1512 } 1513 1514 /* 1515 * Mdeg callback invoked for changes to the vsw-port nodes 1516 * under the vsw node. 1517 */ 1518 static int 1519 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1520 { 1521 vsw_t *vswp; 1522 int idx; 1523 md_t *mdp; 1524 mde_cookie_t node; 1525 uint64_t inst; 1526 int rv; 1527 1528 if ((resp == NULL) || (cb_argp == NULL)) 1529 return (MDEG_FAILURE); 1530 1531 vswp = (vsw_t *)cb_argp; 1532 1533 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1534 " : prev matched %d", __func__, resp->added.nelem, 1535 resp->removed.nelem, resp->match_curr.nelem, 1536 resp->match_prev.nelem); 1537 1538 /* process added ports */ 1539 for (idx = 0; idx < resp->added.nelem; idx++) { 1540 mdp = resp->added.mdp; 1541 node = resp->added.mdep[idx]; 1542 1543 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1544 1545 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1546 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1547 "(0x%lx), err=%d", vswp->instance, node, rv); 1548 } 1549 } 1550 1551 /* process removed ports */ 1552 for (idx = 0; idx < resp->removed.nelem; idx++) { 1553 mdp = resp->removed.mdp; 1554 node = resp->removed.mdep[idx]; 1555 1556 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1557 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1558 __func__, id_propname, idx); 1559 continue; 1560 } 1561 1562 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1563 1564 if (vsw_port_detach(vswp, inst) != 0) { 1565 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1566 vswp->instance, inst); 1567 } 1568 } 1569 1570 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1571 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1572 resp->match_curr.mdep[idx], 1573 resp->match_prev.mdp, 1574 resp->match_prev.mdep[idx]); 1575 } 1576 1577 D1(vswp, "%s: exit", __func__); 1578 1579 return (MDEG_SUCCESS); 1580 } 1581 1582 /* 1583 * Scan the machine description for this instance of vsw 1584 * and read its properties. Called only from vsw_attach(). 1585 * Returns: 0 on success, 1 on failure. 1586 */ 1587 static int 1588 vsw_read_mdprops(vsw_t *vswp) 1589 { 1590 md_t *mdp = NULL; 1591 mde_cookie_t rootnode; 1592 mde_cookie_t *listp = NULL; 1593 uint64_t inst; 1594 uint64_t cfgh; 1595 char *name; 1596 int rv = 1; 1597 int num_nodes = 0; 1598 int num_devs = 0; 1599 int listsz = 0; 1600 int i; 1601 1602 /* 1603 * In each 'virtual-device' node in the MD there is a 1604 * 'cfg-handle' property which is the MD's concept of 1605 * an instance number (this may be completely different from 1606 * the device drivers instance #). OBP reads that value and 1607 * stores it in the 'reg' property of the appropriate node in 1608 * the device tree. We first read this reg property and use this 1609 * to compare against the 'cfg-handle' property of vsw nodes 1610 * in MD to get to this specific vsw instance and then read 1611 * other properties that we are interested in. 1612 * We also cache the value of 'reg' property and use it later 1613 * to register callbacks with mdeg (see vsw_mdeg_register()) 1614 */ 1615 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1616 DDI_PROP_DONTPASS, reg_propname, -1); 1617 if (inst == -1) { 1618 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1619 "OBP device tree", vswp->instance, reg_propname); 1620 return (rv); 1621 } 1622 1623 vswp->regprop = inst; 1624 1625 if ((mdp = md_get_handle()) == NULL) { 1626 DWARN(vswp, "%s: cannot init MD\n", __func__); 1627 return (rv); 1628 } 1629 1630 num_nodes = md_node_count(mdp); 1631 ASSERT(num_nodes > 0); 1632 1633 listsz = num_nodes * sizeof (mde_cookie_t); 1634 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1635 1636 rootnode = md_root_node(mdp); 1637 1638 /* search for all "virtual_device" nodes */ 1639 num_devs = md_scan_dag(mdp, rootnode, 1640 md_find_name(mdp, vdev_propname), 1641 md_find_name(mdp, "fwd"), listp); 1642 if (num_devs <= 0) { 1643 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1644 goto vsw_readmd_exit; 1645 } 1646 1647 /* 1648 * Now loop through the list of virtual-devices looking for 1649 * devices with name "virtual-network-switch" and for each 1650 * such device compare its instance with what we have from 1651 * the 'reg' property to find the right node in MD and then 1652 * read all its properties. 1653 */ 1654 for (i = 0; i < num_devs; i++) { 1655 1656 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1657 DWARN(vswp, "%s: name property not found\n", 1658 __func__); 1659 goto vsw_readmd_exit; 1660 } 1661 1662 /* is this a virtual-network-switch? */ 1663 if (strcmp(name, vsw_propname) != 0) 1664 continue; 1665 1666 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1667 DWARN(vswp, "%s: cfg-handle property not found\n", 1668 __func__); 1669 goto vsw_readmd_exit; 1670 } 1671 1672 /* is this the required instance of vsw? */ 1673 if (inst != cfgh) 1674 continue; 1675 1676 /* now read all properties of this vsw instance */ 1677 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1678 break; 1679 } 1680 1681 vsw_readmd_exit: 1682 1683 kmem_free(listp, listsz); 1684 (void) md_fini_handle(mdp); 1685 return (rv); 1686 } 1687 1688 /* 1689 * Read the initial start-of-day values from the specified MD node. 1690 */ 1691 static int 1692 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1693 { 1694 int i; 1695 uint64_t macaddr = 0; 1696 1697 D1(vswp, "%s: enter", __func__); 1698 1699 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1700 return (1); 1701 } 1702 1703 /* mac address for vswitch device itself */ 1704 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1705 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1706 vswp->instance); 1707 return (1); 1708 } 1709 1710 vsw_save_lmacaddr(vswp, macaddr); 1711 1712 if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) { 1713 DWARN(vswp, "%s: Unable to read %s property from MD, " 1714 "defaulting to 'switched' mode", 1715 __func__, smode_propname); 1716 1717 for (i = 0; i < NUM_SMODES; i++) 1718 vswp->smode[i] = VSW_LAYER2; 1719 1720 vswp->smode_num = NUM_SMODES; 1721 } else { 1722 ASSERT(vswp->smode_num != 0); 1723 } 1724 1725 /* read vlan id properties of this vsw instance */ 1726 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1727 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1728 1729 /* read priority-ether-types */ 1730 vsw_read_pri_eth_types(vswp, mdp, node); 1731 1732 D1(vswp, "%s: exit", __func__); 1733 return (0); 1734 } 1735 1736 /* 1737 * Read vlan id properties of the given MD node. 1738 * Arguments: 1739 * arg: device argument(vsw device or a port) 1740 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1741 * mdp: machine description 1742 * node: md node cookie 1743 * 1744 * Returns: 1745 * pvidp: port-vlan-id of the node 1746 * vidspp: list of vlan-ids of the node 1747 * nvidsp: # of vlan-ids in the list 1748 * default_idp: default-vlan-id of the node(if node is vsw device) 1749 */ 1750 static void 1751 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1752 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, 1753 uint16_t *default_idp) 1754 { 1755 vsw_t *vswp; 1756 vsw_port_t *portp; 1757 char *pvid_propname; 1758 char *vid_propname; 1759 uint_t nvids = 0; 1760 uint32_t vids_size; 1761 int rv; 1762 int i; 1763 uint64_t *data; 1764 uint64_t val; 1765 int size; 1766 int inst; 1767 1768 if (type == VSW_LOCALDEV) { 1769 1770 vswp = (vsw_t *)arg; 1771 pvid_propname = vsw_pvid_propname; 1772 vid_propname = vsw_vid_propname; 1773 inst = vswp->instance; 1774 1775 } else if (type == VSW_VNETPORT) { 1776 1777 portp = (vsw_port_t *)arg; 1778 vswp = portp->p_vswp; 1779 pvid_propname = port_pvid_propname; 1780 vid_propname = port_vid_propname; 1781 inst = portp->p_instance; 1782 1783 } else { 1784 return; 1785 } 1786 1787 if (type == VSW_LOCALDEV && default_idp != NULL) { 1788 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1789 if (rv != 0) { 1790 DWARN(vswp, "%s: prop(%s) not found", __func__, 1791 vsw_dvid_propname); 1792 1793 *default_idp = vsw_default_vlan_id; 1794 } else { 1795 *default_idp = val & 0xFFF; 1796 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1797 vsw_dvid_propname, inst, *default_idp); 1798 } 1799 } 1800 1801 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1802 if (rv != 0) { 1803 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1804 *pvidp = vsw_default_vlan_id; 1805 } else { 1806 1807 *pvidp = val & 0xFFF; 1808 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1809 pvid_propname, inst, *pvidp); 1810 } 1811 1812 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1813 &size); 1814 if (rv != 0) { 1815 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1816 size = 0; 1817 } else { 1818 size /= sizeof (uint64_t); 1819 } 1820 nvids = size; 1821 1822 if (nvids != 0) { 1823 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1824 vids_size = sizeof (uint16_t) * nvids; 1825 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1826 for (i = 0; i < nvids; i++) { 1827 (*vidspp)[i] = data[i] & 0xFFFF; 1828 D2(vswp, " %d ", (*vidspp)[i]); 1829 } 1830 D2(vswp, "\n"); 1831 } 1832 1833 *nvidsp = nvids; 1834 } 1835 1836 /* 1837 * This function reads "priority-ether-types" property from md. This property 1838 * is used to enable support for priority frames. Applications which need 1839 * guaranteed and timely delivery of certain high priority frames to/from 1840 * a vnet or vsw within ldoms, should configure this property by providing 1841 * the ether type(s) for which the priority facility is needed. 1842 * Normal data frames are delivered over a ldc channel using the descriptor 1843 * ring mechanism which is constrained by factors such as descriptor ring size, 1844 * the rate at which the ring is processed at the peer ldc end point, etc. 1845 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1846 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1847 * descriptor ring path and enables a more reliable and timely delivery of 1848 * frames to the peer. 1849 */ 1850 static void 1851 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1852 { 1853 int rv; 1854 uint16_t *types; 1855 uint64_t *data; 1856 int size; 1857 int i; 1858 size_t mblk_sz; 1859 1860 rv = md_get_prop_data(mdp, node, pri_types_propname, 1861 (uint8_t **)&data, &size); 1862 if (rv != 0) { 1863 /* 1864 * Property may not exist if we are running pre-ldoms1.1 f/w. 1865 * Check if 'vsw_pri_eth_type' has been set in that case. 1866 */ 1867 if (vsw_pri_eth_type != 0) { 1868 size = sizeof (vsw_pri_eth_type); 1869 data = &vsw_pri_eth_type; 1870 } else { 1871 D3(vswp, "%s: prop(%s) not found", __func__, 1872 pri_types_propname); 1873 size = 0; 1874 } 1875 } 1876 1877 if (size == 0) { 1878 vswp->pri_num_types = 0; 1879 return; 1880 } 1881 1882 /* 1883 * we have some priority-ether-types defined; 1884 * allocate a table of these types and also 1885 * allocate a pool of mblks to transmit these 1886 * priority packets. 1887 */ 1888 size /= sizeof (uint64_t); 1889 vswp->pri_num_types = size; 1890 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1891 for (i = 0, types = vswp->pri_types; i < size; i++) { 1892 types[i] = data[i] & 0xFFFF; 1893 } 1894 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1895 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp); 1896 } 1897 1898 /* 1899 * Check to see if the relevant properties in the specified node have 1900 * changed, and if so take the appropriate action. 1901 * 1902 * If any of the properties are missing or invalid we don't take 1903 * any action, as this function should only be invoked when modifications 1904 * have been made to what we assume is a working configuration, which 1905 * we leave active. 1906 * 1907 * Note it is legal for this routine to be invoked even if none of the 1908 * properties in the port node within the MD have actually changed. 1909 */ 1910 static void 1911 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1912 { 1913 char physname[LIFNAMSIZ]; 1914 char drv[LIFNAMSIZ]; 1915 uint_t ddi_instance; 1916 uint8_t new_smode[NUM_SMODES]; 1917 int i, smode_num = 0; 1918 uint64_t macaddr = 0; 1919 enum {MD_init = 0x1, 1920 MD_physname = 0x2, 1921 MD_macaddr = 0x4, 1922 MD_smode = 0x8, 1923 MD_vlans = 0x10} updated; 1924 int rv; 1925 uint16_t pvid; 1926 uint16_t *vids; 1927 uint16_t nvids; 1928 1929 updated = MD_init; 1930 1931 D1(vswp, "%s: enter", __func__); 1932 1933 /* 1934 * Check if name of physical device in MD has changed. 1935 */ 1936 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1937 /* 1938 * Do basic sanity check on new device name/instance, 1939 * if its non NULL. It is valid for the device name to 1940 * have changed from a non NULL to a NULL value, i.e. 1941 * the vsw is being changed to 'routed' mode. 1942 */ 1943 if ((strlen(physname) != 0) && 1944 (ddi_parse(physname, drv, 1945 &ddi_instance) != DDI_SUCCESS)) { 1946 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 1947 " a valid device name/instance", 1948 vswp->instance, physname); 1949 goto fail_reconf; 1950 } 1951 1952 if (strcmp(physname, vswp->physname)) { 1953 D2(vswp, "%s: device name changed from %s to %s", 1954 __func__, vswp->physname, physname); 1955 1956 updated |= MD_physname; 1957 } else { 1958 D2(vswp, "%s: device name unchanged at %s", 1959 __func__, vswp->physname); 1960 } 1961 } else { 1962 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 1963 "device from updated MD.", vswp->instance); 1964 goto fail_reconf; 1965 } 1966 1967 /* 1968 * Check if MAC address has changed. 1969 */ 1970 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1971 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1972 vswp->instance); 1973 goto fail_reconf; 1974 } else { 1975 uint64_t maddr = macaddr; 1976 READ_ENTER(&vswp->if_lockrw); 1977 for (i = ETHERADDRL - 1; i >= 0; i--) { 1978 if (vswp->if_addr.ether_addr_octet[i] 1979 != (macaddr & 0xFF)) { 1980 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 1981 __func__, i, 1982 vswp->if_addr.ether_addr_octet[i], 1983 (macaddr & 0xFF)); 1984 updated |= MD_macaddr; 1985 macaddr = maddr; 1986 break; 1987 } 1988 macaddr >>= 8; 1989 } 1990 RW_EXIT(&vswp->if_lockrw); 1991 if (updated & MD_macaddr) { 1992 vsw_save_lmacaddr(vswp, macaddr); 1993 } 1994 } 1995 1996 /* 1997 * Check if switching modes have changed. 1998 */ 1999 if (vsw_get_md_smodes(vswp, mdp, node, 2000 new_smode, &smode_num)) { 2001 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2002 vswp->instance, smode_propname); 2003 goto fail_reconf; 2004 } else { 2005 ASSERT(smode_num != 0); 2006 if (smode_num != vswp->smode_num) { 2007 D2(vswp, "%s: number of modes changed from %d to %d", 2008 __func__, vswp->smode_num, smode_num); 2009 } 2010 2011 for (i = 0; i < smode_num; i++) { 2012 if (new_smode[i] != vswp->smode[i]) { 2013 D2(vswp, "%s: mode changed from %d to %d", 2014 __func__, vswp->smode[i], new_smode[i]); 2015 updated |= MD_smode; 2016 break; 2017 } 2018 } 2019 } 2020 2021 /* Read the vlan ids */ 2022 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2023 &nvids, NULL); 2024 2025 /* Determine if there are any vlan id updates */ 2026 if ((pvid != vswp->pvid) || /* pvid changed? */ 2027 (nvids != vswp->nvids) || /* # of vids changed? */ 2028 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2029 bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) { 2030 updated |= MD_vlans; 2031 } 2032 2033 /* 2034 * Now make any changes which are needed... 2035 */ 2036 2037 if (updated & (MD_physname | MD_smode)) { 2038 2039 /* 2040 * Stop any pending timeout to setup switching mode. 2041 */ 2042 vsw_stop_switching_timeout(vswp); 2043 2044 /* Cleanup HybridIO */ 2045 vsw_hio_cleanup(vswp); 2046 2047 /* 2048 * Remove unicst, mcst addrs of vsw interface 2049 * and ports from the physdev. 2050 */ 2051 vsw_unset_addrs(vswp); 2052 2053 /* 2054 * Stop, detach and close the old device.. 2055 */ 2056 mutex_enter(&vswp->mac_lock); 2057 2058 vsw_mac_detach(vswp); 2059 vsw_mac_close(vswp); 2060 2061 mutex_exit(&vswp->mac_lock); 2062 2063 /* 2064 * Update phys name. 2065 */ 2066 if (updated & MD_physname) { 2067 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2068 vswp->instance, vswp->physname, physname); 2069 (void) strncpy(vswp->physname, 2070 physname, strlen(physname) + 1); 2071 } 2072 2073 /* 2074 * Update array with the new switch mode values. 2075 */ 2076 if (updated & MD_smode) { 2077 for (i = 0; i < smode_num; i++) 2078 vswp->smode[i] = new_smode[i]; 2079 2080 vswp->smode_num = smode_num; 2081 vswp->smode_idx = 0; 2082 } 2083 2084 /* 2085 * ..and attach, start the new device. 2086 */ 2087 rv = vsw_setup_switching(vswp); 2088 if (rv == EAGAIN) { 2089 /* 2090 * Unable to setup switching mode. 2091 * As the error is EAGAIN, schedule a timeout to retry 2092 * and return. Programming addresses of ports and 2093 * vsw interface will be done when the timeout handler 2094 * completes successfully. 2095 */ 2096 mutex_enter(&vswp->swtmout_lock); 2097 2098 vswp->swtmout_enabled = B_TRUE; 2099 vswp->swtmout_id = 2100 timeout(vsw_setup_switching_timeout, vswp, 2101 (vsw_setup_switching_delay * 2102 drv_usectohz(MICROSEC))); 2103 2104 mutex_exit(&vswp->swtmout_lock); 2105 2106 return; 2107 2108 } else if (rv) { 2109 goto fail_update; 2110 } 2111 2112 /* 2113 * program unicst, mcst addrs of vsw interface 2114 * and ports in the physdev. 2115 */ 2116 vsw_set_addrs(vswp); 2117 2118 /* Start HIO for ports that have already connected */ 2119 vsw_hio_start_ports(vswp); 2120 2121 } else if (updated & MD_macaddr) { 2122 /* 2123 * We enter here if only MD_macaddr is exclusively updated. 2124 * If MD_physname and/or MD_smode are also updated, then 2125 * as part of that, we would have implicitly processed 2126 * MD_macaddr update (above). 2127 */ 2128 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2129 vswp->instance, macaddr); 2130 2131 READ_ENTER(&vswp->if_lockrw); 2132 if (vswp->if_state & VSW_IF_UP) { 2133 2134 mutex_enter(&vswp->hw_lock); 2135 /* 2136 * Remove old mac address of vsw interface 2137 * from the physdev 2138 */ 2139 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 2140 /* 2141 * Program new mac address of vsw interface 2142 * in the physdev 2143 */ 2144 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 2145 mutex_exit(&vswp->hw_lock); 2146 if (rv != 0) { 2147 cmn_err(CE_NOTE, 2148 "!vsw%d: failed to program interface " 2149 "unicast address\n", vswp->instance); 2150 } 2151 /* 2152 * Notify the MAC layer of the changed address. 2153 */ 2154 mac_unicst_update(vswp->if_mh, 2155 (uint8_t *)&vswp->if_addr); 2156 2157 } 2158 RW_EXIT(&vswp->if_lockrw); 2159 2160 } 2161 2162 if (updated & MD_vlans) { 2163 /* Remove existing vlan ids from the hash table. */ 2164 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2165 2166 /* save the new vlan ids */ 2167 vswp->pvid = pvid; 2168 if (vswp->nvids != 0) { 2169 kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids); 2170 vswp->nvids = 0; 2171 } 2172 if (nvids != 0) { 2173 vswp->nvids = nvids; 2174 vswp->vids = vids; 2175 } 2176 2177 /* add these new vlan ids into hash table */ 2178 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2179 } else { 2180 if (nvids != 0) { 2181 kmem_free(vids, sizeof (uint16_t) * nvids); 2182 } 2183 } 2184 2185 return; 2186 2187 fail_reconf: 2188 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2189 return; 2190 2191 fail_update: 2192 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2193 vswp->instance); 2194 } 2195 2196 /* 2197 * Read the port's md properties. 2198 */ 2199 static int 2200 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2201 md_t *mdp, mde_cookie_t *node) 2202 { 2203 uint64_t ldc_id; 2204 uint8_t *addrp; 2205 int i, addrsz; 2206 int num_nodes = 0, nchan = 0; 2207 int listsz = 0; 2208 mde_cookie_t *listp = NULL; 2209 struct ether_addr ea; 2210 uint64_t macaddr; 2211 uint64_t inst = 0; 2212 uint64_t val; 2213 2214 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2215 DWARN(vswp, "%s: prop(%s) not found", __func__, 2216 id_propname); 2217 return (1); 2218 } 2219 2220 /* 2221 * Find the channel endpoint node(s) (which should be under this 2222 * port node) which contain the channel id(s). 2223 */ 2224 if ((num_nodes = md_node_count(mdp)) <= 0) { 2225 DERR(vswp, "%s: invalid number of nodes found (%d)", 2226 __func__, num_nodes); 2227 return (1); 2228 } 2229 2230 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2231 2232 /* allocate enough space for node list */ 2233 listsz = num_nodes * sizeof (mde_cookie_t); 2234 listp = kmem_zalloc(listsz, KM_SLEEP); 2235 2236 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2237 md_find_name(mdp, "fwd"), listp); 2238 2239 if (nchan <= 0) { 2240 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2241 kmem_free(listp, listsz); 2242 return (1); 2243 } 2244 2245 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2246 2247 /* use property from first node found */ 2248 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2249 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2250 id_propname); 2251 kmem_free(listp, listsz); 2252 return (1); 2253 } 2254 2255 /* don't need list any more */ 2256 kmem_free(listp, listsz); 2257 2258 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2259 2260 /* read mac-address property */ 2261 if (md_get_prop_data(mdp, *node, remaddr_propname, 2262 &addrp, &addrsz)) { 2263 DWARN(vswp, "%s: prop(%s) not found", 2264 __func__, remaddr_propname); 2265 return (1); 2266 } 2267 2268 if (addrsz < ETHERADDRL) { 2269 DWARN(vswp, "%s: invalid address size", __func__); 2270 return (1); 2271 } 2272 2273 macaddr = *((uint64_t *)addrp); 2274 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2275 2276 for (i = ETHERADDRL - 1; i >= 0; i--) { 2277 ea.ether_addr_octet[i] = macaddr & 0xFF; 2278 macaddr >>= 8; 2279 } 2280 2281 /* now update all properties into the port */ 2282 portp->p_vswp = vswp; 2283 portp->p_instance = inst; 2284 portp->addr_set = VSW_ADDR_UNSET; 2285 ether_copy(&ea, &portp->p_macaddr); 2286 if (nchan > VSW_PORT_MAX_LDCS) { 2287 D2(vswp, "%s: using first of %d ldc ids", 2288 __func__, nchan); 2289 nchan = VSW_PORT_MAX_LDCS; 2290 } 2291 portp->num_ldcs = nchan; 2292 portp->ldc_ids = 2293 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2294 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2295 2296 /* read vlan id properties of this port node */ 2297 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2298 &portp->vids, &portp->nvids, NULL); 2299 2300 /* Check if hybrid property is present */ 2301 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2302 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2303 portp->p_hio_enabled = B_TRUE; 2304 } else { 2305 portp->p_hio_enabled = B_FALSE; 2306 } 2307 /* 2308 * Port hio capability determined after version 2309 * negotiation, i.e., when we know the peer is HybridIO capable. 2310 */ 2311 portp->p_hio_capable = B_FALSE; 2312 return (0); 2313 } 2314 2315 /* 2316 * Add a new port to the system. 2317 * 2318 * Returns 0 on success, 1 on failure. 2319 */ 2320 int 2321 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2322 { 2323 vsw_port_t *portp; 2324 int rv; 2325 2326 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2327 2328 rv = vsw_port_read_props(portp, vswp, mdp, node); 2329 if (rv != 0) { 2330 kmem_free(portp, sizeof (*portp)); 2331 return (1); 2332 } 2333 2334 rv = vsw_port_attach(portp); 2335 if (rv != 0) { 2336 DERR(vswp, "%s: failed to attach port", __func__); 2337 return (1); 2338 } 2339 2340 return (0); 2341 } 2342 2343 static int 2344 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2345 md_t *prev_mdp, mde_cookie_t prev_mdex) 2346 { 2347 uint64_t cport_num; 2348 uint64_t pport_num; 2349 vsw_port_list_t *plistp; 2350 vsw_port_t *portp; 2351 boolean_t updated_vlans = B_FALSE; 2352 uint16_t pvid; 2353 uint16_t *vids; 2354 uint16_t nvids; 2355 uint64_t val; 2356 boolean_t hio_enabled = B_FALSE; 2357 2358 /* 2359 * For now, we get port updates only if vlan ids changed. 2360 * We read the port num and do some sanity check. 2361 */ 2362 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2363 return (1); 2364 } 2365 2366 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2367 return (1); 2368 } 2369 if (cport_num != pport_num) 2370 return (1); 2371 2372 plistp = &(vswp->plist); 2373 2374 READ_ENTER(&plistp->lockrw); 2375 2376 portp = vsw_lookup_port(vswp, cport_num); 2377 if (portp == NULL) { 2378 RW_EXIT(&plistp->lockrw); 2379 return (1); 2380 } 2381 2382 /* Read the vlan ids */ 2383 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2384 &vids, &nvids, NULL); 2385 2386 /* Determine if there are any vlan id updates */ 2387 if ((pvid != portp->pvid) || /* pvid changed? */ 2388 (nvids != portp->nvids) || /* # of vids changed? */ 2389 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2390 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) { 2391 updated_vlans = B_TRUE; 2392 } 2393 2394 if (updated_vlans == B_TRUE) { 2395 2396 /* Remove existing vlan ids from the hash table. */ 2397 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2398 2399 /* save the new vlan ids */ 2400 portp->pvid = pvid; 2401 if (portp->nvids != 0) { 2402 kmem_free(portp->vids, 2403 sizeof (uint16_t) * portp->nvids); 2404 portp->nvids = 0; 2405 } 2406 if (nvids != 0) { 2407 portp->vids = kmem_zalloc(sizeof (uint16_t) * 2408 nvids, KM_SLEEP); 2409 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids); 2410 portp->nvids = nvids; 2411 kmem_free(vids, sizeof (uint16_t) * nvids); 2412 } 2413 2414 /* add these new vlan ids into hash table */ 2415 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2416 2417 /* reset the port if it is vlan unaware (ver < 1.3) */ 2418 vsw_vlan_unaware_port_reset(portp); 2419 } 2420 2421 /* Check if hybrid property is present */ 2422 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2423 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2424 hio_enabled = B_TRUE; 2425 } 2426 2427 if (portp->p_hio_enabled != hio_enabled) { 2428 vsw_hio_port_update(portp, hio_enabled); 2429 } 2430 2431 RW_EXIT(&plistp->lockrw); 2432 2433 return (0); 2434 } 2435 2436 /* 2437 * vsw_mac_rx -- A common function to send packets to the interface. 2438 * By default this function check if the interface is UP or not, the 2439 * rest of the behaviour depends on the flags as below: 2440 * 2441 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2442 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2443 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2444 */ 2445 void 2446 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2447 mblk_t *mp, vsw_macrx_flags_t flags) 2448 { 2449 mblk_t *mpt; 2450 2451 D1(vswp, "%s:enter\n", __func__); 2452 READ_ENTER(&vswp->if_lockrw); 2453 /* Check if the interface is up */ 2454 if (!(vswp->if_state & VSW_IF_UP)) { 2455 RW_EXIT(&vswp->if_lockrw); 2456 /* Free messages only if FREEMSG flag specified */ 2457 if (flags & VSW_MACRX_FREEMSG) { 2458 freemsgchain(mp); 2459 } 2460 D1(vswp, "%s:exit\n", __func__); 2461 return; 2462 } 2463 /* 2464 * If PROMISC flag is passed, then check if 2465 * the interface is in the PROMISC mode. 2466 * If not, drop the messages. 2467 */ 2468 if (flags & VSW_MACRX_PROMISC) { 2469 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2470 RW_EXIT(&vswp->if_lockrw); 2471 /* Free messages only if FREEMSG flag specified */ 2472 if (flags & VSW_MACRX_FREEMSG) { 2473 freemsgchain(mp); 2474 } 2475 D1(vswp, "%s:exit\n", __func__); 2476 return; 2477 } 2478 } 2479 RW_EXIT(&vswp->if_lockrw); 2480 /* 2481 * If COPYMSG flag is passed, then make a copy 2482 * of the message chain and send up the copy. 2483 */ 2484 if (flags & VSW_MACRX_COPYMSG) { 2485 mp = copymsgchain(mp); 2486 if (mp == NULL) { 2487 D1(vswp, "%s:exit\n", __func__); 2488 return; 2489 } 2490 } 2491 2492 D2(vswp, "%s: sending up stack", __func__); 2493 2494 mpt = NULL; 2495 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2496 if (mp != NULL) { 2497 mac_rx(vswp->if_mh, mrh, mp); 2498 } 2499 D1(vswp, "%s:exit\n", __func__); 2500 } 2501 2502 /* copy mac address of vsw into soft state structure */ 2503 static void 2504 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2505 { 2506 int i; 2507 2508 WRITE_ENTER(&vswp->if_lockrw); 2509 for (i = ETHERADDRL - 1; i >= 0; i--) { 2510 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2511 macaddr >>= 8; 2512 } 2513 RW_EXIT(&vswp->if_lockrw); 2514 } 2515