1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 #include <sys/vlan.h> 76 77 /* 78 * Function prototypes. 79 */ 80 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 81 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 82 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 83 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 84 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 85 86 /* MDEG routines */ 87 static int vsw_mdeg_register(vsw_t *vswp); 88 static void vsw_mdeg_unregister(vsw_t *vswp); 89 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 91 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 92 static int vsw_read_mdprops(vsw_t *vswp); 93 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 94 mde_cookie_t node, uint16_t *pvidp, uint16_t **vidspp, 95 uint16_t *nvidsp, uint16_t *default_idp); 96 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 97 md_t *mdp, mde_cookie_t *node); 98 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 99 mde_cookie_t node); 100 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 102 103 /* Mac driver related routines */ 104 static int vsw_mac_register(vsw_t *); 105 static int vsw_mac_unregister(vsw_t *); 106 static int vsw_m_stat(void *, uint_t, uint64_t *); 107 static void vsw_m_stop(void *arg); 108 static int vsw_m_start(void *arg); 109 static int vsw_m_unicst(void *arg, const uint8_t *); 110 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 111 static int vsw_m_promisc(void *arg, boolean_t); 112 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 113 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 114 mblk_t *mp, vsw_macrx_flags_t flags); 115 116 /* 117 * Functions imported from other files. 118 */ 119 extern void vsw_setup_switching_timeout(void *arg); 120 extern void vsw_stop_switching_timeout(vsw_t *vswp); 121 extern int vsw_setup_switching(vsw_t *); 122 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 123 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 124 extern void vsw_del_mcst_vsw(vsw_t *); 125 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 126 extern int vsw_detach_ports(vsw_t *vswp); 127 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 128 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 129 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 130 md_t *prev_mdp, mde_cookie_t prev_mdex); 131 extern int vsw_port_attach(vsw_port_t *port); 132 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 133 extern int vsw_mac_attach(vsw_t *vswp); 134 extern void vsw_mac_detach(vsw_t *vswp); 135 extern int vsw_mac_open(vsw_t *vswp); 136 extern void vsw_mac_close(vsw_t *vswp); 137 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 138 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 139 extern void vsw_reconfig_hw(vsw_t *); 140 extern void vsw_unset_addrs(vsw_t *vswp); 141 extern void vsw_set_addrs(vsw_t *vswp); 142 extern void vsw_create_vlans(void *arg, int type); 143 extern void vsw_destroy_vlans(void *arg, int type); 144 extern void vsw_vlan_add_ids(void *arg, int type); 145 extern void vsw_vlan_remove_ids(void *arg, int type); 146 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 147 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 148 mblk_t **npt); 149 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 150 151 /* 152 * Internal tunables. 153 */ 154 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 155 int vsw_wretries = 100; /* # of write attempts */ 156 int vsw_desc_delay = 0; /* delay in us */ 157 int vsw_read_attempts = 5; /* # of reads of descriptor */ 158 int vsw_mac_open_retries = 20; /* max # of mac_open() retries */ 159 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 160 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 161 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 162 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 163 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 164 165 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 166 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 167 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 168 169 /* delay in usec to wait for all references on a fdb entry to be dropped */ 170 uint32_t vsw_fdbe_refcnt_delay = 10; 171 172 /* 173 * Default vlan id. This is only used internally when the "default-vlan-id" 174 * property is not present in the MD device node. Therefore, this should not be 175 * used as a tunable; if this value is changed, the corresponding variable 176 * should be updated to the same value in all vnets connected to this vsw. 177 */ 178 uint16_t vsw_default_vlan_id = 1; 179 180 /* 181 * Workaround for a version handshake bug in obp's vnet. 182 * If vsw initiates version negotiation starting from the highest version, 183 * obp sends a nack and terminates version handshake. To workaround 184 * this, we do not initiate version handshake when the channel comes up. 185 * Instead, we wait for the peer to send its version info msg and go through 186 * the version protocol exchange. If we successfully negotiate a version, 187 * before sending the ack, we send our version info msg to the peer 188 * using the <major,minor> version that we are about to ack. 189 */ 190 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 191 192 /* 193 * In the absence of "priority-ether-types" property in MD, the following 194 * internal tunable can be set to specify a single priority ethertype. 195 */ 196 uint64_t vsw_pri_eth_type = 0; 197 198 /* 199 * Number of transmit priority buffers that are preallocated per device. 200 * This number is chosen to be a small value to throttle transmission 201 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 202 */ 203 uint32_t vsw_pri_tx_nmblks = 64; 204 205 /* 206 * External tunables. 207 */ 208 /* 209 * Enable/disable thread per ring. This is a mode selection 210 * that is done a vsw driver attach time. 211 */ 212 boolean_t vsw_multi_ring_enable = B_FALSE; 213 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 214 215 /* Number of transmit descriptors - must be power of 2 */ 216 uint32_t vsw_ntxds = VSW_RING_NUM_EL; 217 218 /* 219 * Max number of mblks received in one receive operation. 220 */ 221 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 222 223 /* 224 * Tunables for three different pools, that is, the size and 225 * number of mblks for each pool. 226 */ 227 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128; /* size=128 for pool1 */ 228 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256; /* size=256 for pool2 */ 229 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048; /* size=2048 for pool3 */ 230 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 231 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 232 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 233 234 /* 235 * vsw_max_tx_qcount is the maximum # of packets that can be queued 236 * before the tx worker thread begins processing the queue. Its value 237 * is chosen to be 4x the default length of tx descriptor ring. 238 */ 239 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL; 240 241 /* 242 * MAC callbacks 243 */ 244 static mac_callbacks_t vsw_m_callbacks = { 245 0, 246 vsw_m_stat, 247 vsw_m_start, 248 vsw_m_stop, 249 vsw_m_promisc, 250 vsw_m_multicst, 251 vsw_m_unicst, 252 vsw_m_tx, 253 NULL, 254 NULL, 255 NULL 256 }; 257 258 static struct cb_ops vsw_cb_ops = { 259 nulldev, /* cb_open */ 260 nulldev, /* cb_close */ 261 nodev, /* cb_strategy */ 262 nodev, /* cb_print */ 263 nodev, /* cb_dump */ 264 nodev, /* cb_read */ 265 nodev, /* cb_write */ 266 nodev, /* cb_ioctl */ 267 nodev, /* cb_devmap */ 268 nodev, /* cb_mmap */ 269 nodev, /* cb_segmap */ 270 nochpoll, /* cb_chpoll */ 271 ddi_prop_op, /* cb_prop_op */ 272 NULL, /* cb_stream */ 273 D_MP, /* cb_flag */ 274 CB_REV, /* rev */ 275 nodev, /* int (*cb_aread)() */ 276 nodev /* int (*cb_awrite)() */ 277 }; 278 279 static struct dev_ops vsw_ops = { 280 DEVO_REV, /* devo_rev */ 281 0, /* devo_refcnt */ 282 vsw_getinfo, /* devo_getinfo */ 283 nulldev, /* devo_identify */ 284 nulldev, /* devo_probe */ 285 vsw_attach, /* devo_attach */ 286 vsw_detach, /* devo_detach */ 287 nodev, /* devo_reset */ 288 &vsw_cb_ops, /* devo_cb_ops */ 289 (struct bus_ops *)NULL, /* devo_bus_ops */ 290 ddi_power /* devo_power */ 291 }; 292 293 extern struct mod_ops mod_driverops; 294 static struct modldrv vswmodldrv = { 295 &mod_driverops, 296 "sun4v Virtual Switch", 297 &vsw_ops, 298 }; 299 300 #define LDC_ENTER_LOCK(ldcp) \ 301 mutex_enter(&((ldcp)->ldc_cblock));\ 302 mutex_enter(&((ldcp)->ldc_rxlock));\ 303 mutex_enter(&((ldcp)->ldc_txlock)); 304 #define LDC_EXIT_LOCK(ldcp) \ 305 mutex_exit(&((ldcp)->ldc_txlock));\ 306 mutex_exit(&((ldcp)->ldc_rxlock));\ 307 mutex_exit(&((ldcp)->ldc_cblock)); 308 309 /* Driver soft state ptr */ 310 static void *vsw_state; 311 312 /* 313 * Linked list of "vsw_t" structures - one per instance. 314 */ 315 vsw_t *vsw_head = NULL; 316 krwlock_t vsw_rw; 317 318 /* 319 * Property names 320 */ 321 static char vdev_propname[] = "virtual-device"; 322 static char vsw_propname[] = "virtual-network-switch"; 323 static char physdev_propname[] = "vsw-phys-dev"; 324 static char smode_propname[] = "vsw-switch-mode"; 325 static char macaddr_propname[] = "local-mac-address"; 326 static char remaddr_propname[] = "remote-mac-address"; 327 static char ldcids_propname[] = "ldc-ids"; 328 static char chan_propname[] = "channel-endpoint"; 329 static char id_propname[] = "id"; 330 static char reg_propname[] = "reg"; 331 static char pri_types_propname[] = "priority-ether-types"; 332 static char vsw_pvid_propname[] = "port-vlan-id"; 333 static char vsw_vid_propname[] = "vlan-id"; 334 static char vsw_dvid_propname[] = "default-vlan-id"; 335 static char port_pvid_propname[] = "remote-port-vlan-id"; 336 static char port_vid_propname[] = "remote-vlan-id"; 337 338 /* 339 * Matching criteria passed to the MDEG to register interest 340 * in changes to 'virtual-device-port' nodes identified by their 341 * 'id' property. 342 */ 343 static md_prop_match_t vport_prop_match[] = { 344 { MDET_PROP_VAL, "id" }, 345 { MDET_LIST_END, NULL } 346 }; 347 348 static mdeg_node_match_t vport_match = { "virtual-device-port", 349 vport_prop_match }; 350 351 /* 352 * Matching criteria passed to the MDEG to register interest 353 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 354 * by their 'name' and 'cfg-handle' properties. 355 */ 356 static md_prop_match_t vdev_prop_match[] = { 357 { MDET_PROP_STR, "name" }, 358 { MDET_PROP_VAL, "cfg-handle" }, 359 { MDET_LIST_END, NULL } 360 }; 361 362 static mdeg_node_match_t vdev_match = { "virtual-device", 363 vdev_prop_match }; 364 365 366 /* 367 * Specification of an MD node passed to the MDEG to filter any 368 * 'vport' nodes that do not belong to the specified node. This 369 * template is copied for each vsw instance and filled in with 370 * the appropriate 'cfg-handle' value before being passed to the MDEG. 371 */ 372 static mdeg_prop_spec_t vsw_prop_template[] = { 373 { MDET_PROP_STR, "name", vsw_propname }, 374 { MDET_PROP_VAL, "cfg-handle", NULL }, 375 { MDET_LIST_END, NULL, NULL } 376 }; 377 378 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 379 380 #ifdef DEBUG 381 /* 382 * Print debug messages - set to 0x1f to enable all msgs 383 * or 0x0 to turn all off. 384 */ 385 int vswdbg = 0x0; 386 387 /* 388 * debug levels: 389 * 0x01: Function entry/exit tracing 390 * 0x02: Internal function messages 391 * 0x04: Verbose internal messages 392 * 0x08: Warning messages 393 * 0x10: Error messages 394 */ 395 396 void 397 vswdebug(vsw_t *vswp, const char *fmt, ...) 398 { 399 char buf[512]; 400 va_list ap; 401 402 va_start(ap, fmt); 403 (void) vsprintf(buf, fmt, ap); 404 va_end(ap); 405 406 if (vswp == NULL) 407 cmn_err(CE_CONT, "%s\n", buf); 408 else 409 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 410 } 411 412 #endif /* DEBUG */ 413 414 static struct modlinkage modlinkage = { 415 MODREV_1, 416 &vswmodldrv, 417 NULL 418 }; 419 420 int 421 _init(void) 422 { 423 int status; 424 425 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 426 427 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 428 if (status != 0) { 429 return (status); 430 } 431 432 mac_init_ops(&vsw_ops, DRV_NAME); 433 status = mod_install(&modlinkage); 434 if (status != 0) { 435 ddi_soft_state_fini(&vsw_state); 436 } 437 return (status); 438 } 439 440 int 441 _fini(void) 442 { 443 int status; 444 445 status = mod_remove(&modlinkage); 446 if (status != 0) 447 return (status); 448 mac_fini_ops(&vsw_ops); 449 ddi_soft_state_fini(&vsw_state); 450 451 rw_destroy(&vsw_rw); 452 453 return (status); 454 } 455 456 int 457 _info(struct modinfo *modinfop) 458 { 459 return (mod_info(&modlinkage, modinfop)); 460 } 461 462 static int 463 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 464 { 465 vsw_t *vswp; 466 int instance; 467 char hashname[MAXNAMELEN]; 468 char qname[TASKQ_NAMELEN]; 469 enum { PROG_init = 0x00, 470 PROG_locks = 0x01, 471 PROG_readmd = 0x02, 472 PROG_fdb = 0x04, 473 PROG_mfdb = 0x08, 474 PROG_taskq = 0x10, 475 PROG_swmode = 0x20, 476 PROG_macreg = 0x40, 477 PROG_mdreg = 0x80} 478 progress; 479 480 progress = PROG_init; 481 int rv; 482 483 switch (cmd) { 484 case DDI_ATTACH: 485 break; 486 case DDI_RESUME: 487 /* nothing to do for this non-device */ 488 return (DDI_SUCCESS); 489 case DDI_PM_RESUME: 490 default: 491 return (DDI_FAILURE); 492 } 493 494 instance = ddi_get_instance(dip); 495 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 496 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 497 return (DDI_FAILURE); 498 } 499 vswp = ddi_get_soft_state(vsw_state, instance); 500 501 if (vswp == NULL) { 502 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 503 goto vsw_attach_fail; 504 } 505 506 vswp->dip = dip; 507 vswp->instance = instance; 508 ddi_set_driver_private(dip, (caddr_t)vswp); 509 510 mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL); 511 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 512 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 513 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 514 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 515 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 516 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 517 518 progress |= PROG_locks; 519 520 rv = vsw_read_mdprops(vswp); 521 if (rv != 0) 522 goto vsw_attach_fail; 523 524 progress |= PROG_readmd; 525 526 /* setup the unicast forwarding database */ 527 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 528 vswp->instance); 529 D2(vswp, "creating unicast hash table (%s)...", hashname); 530 vswp->fdb_nchains = vsw_fdb_nchains; 531 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 532 mod_hash_null_valdtor, sizeof (void *)); 533 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 534 progress |= PROG_fdb; 535 536 /* setup the multicast fowarding database */ 537 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 538 vswp->instance); 539 D2(vswp, "creating multicast hash table %s)...", hashname); 540 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 541 mod_hash_null_valdtor, sizeof (void *)); 542 543 progress |= PROG_mfdb; 544 545 /* 546 * Create the taskq which will process all the VIO 547 * control messages. 548 */ 549 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 550 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 551 TASKQ_DEFAULTPRI, 0)) == NULL) { 552 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 553 vswp->instance); 554 goto vsw_attach_fail; 555 } 556 557 progress |= PROG_taskq; 558 559 /* prevent auto-detaching */ 560 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 561 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 562 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 563 "instance %u", DDI_NO_AUTODETACH, instance); 564 } 565 566 /* 567 * Setup the required switching mode, 568 * based on the mdprops that we read earlier. 569 */ 570 rv = vsw_setup_switching(vswp); 571 if (rv == EAGAIN) { 572 /* 573 * Unable to setup switching mode; 574 * as the error is EAGAIN, schedule a timeout to retry. 575 */ 576 mutex_enter(&vswp->swtmout_lock); 577 578 vswp->swtmout_enabled = B_TRUE; 579 vswp->swtmout_id = 580 timeout(vsw_setup_switching_timeout, vswp, 581 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 582 583 mutex_exit(&vswp->swtmout_lock); 584 } else if (rv != 0) { 585 goto vsw_attach_fail; 586 } 587 588 progress |= PROG_swmode; 589 590 /* Register with mac layer as a provider */ 591 rv = vsw_mac_register(vswp); 592 if (rv != 0) 593 goto vsw_attach_fail; 594 595 progress |= PROG_macreg; 596 597 /* 598 * Now we have everything setup, register an interest in 599 * specific MD nodes. 600 * 601 * The callback is invoked in 2 cases, firstly if upon mdeg 602 * registration there are existing nodes which match our specified 603 * criteria, and secondly if the MD is changed (and again, there 604 * are nodes which we are interested in present within it. Note 605 * that our callback will be invoked even if our specified nodes 606 * have not actually changed). 607 * 608 */ 609 rv = vsw_mdeg_register(vswp); 610 if (rv != 0) 611 goto vsw_attach_fail; 612 613 progress |= PROG_mdreg; 614 615 WRITE_ENTER(&vsw_rw); 616 vswp->next = vsw_head; 617 vsw_head = vswp; 618 RW_EXIT(&vsw_rw); 619 620 ddi_report_dev(vswp->dip); 621 return (DDI_SUCCESS); 622 623 vsw_attach_fail: 624 DERR(NULL, "vsw_attach: failed"); 625 626 if (progress & PROG_mdreg) { 627 vsw_mdeg_unregister(vswp); 628 (void) vsw_detach_ports(vswp); 629 } 630 631 if (progress & PROG_macreg) 632 (void) vsw_mac_unregister(vswp); 633 634 if (progress & PROG_swmode) { 635 vsw_stop_switching_timeout(vswp); 636 mutex_enter(&vswp->mac_lock); 637 vsw_mac_detach(vswp); 638 vsw_mac_close(vswp); 639 mutex_exit(&vswp->mac_lock); 640 } 641 642 if (progress & PROG_taskq) 643 ddi_taskq_destroy(vswp->taskq_p); 644 645 if (progress & PROG_mfdb) 646 mod_hash_destroy_hash(vswp->mfdb); 647 648 if (progress & PROG_fdb) { 649 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 650 mod_hash_destroy_hash(vswp->fdb_hashp); 651 } 652 653 if (progress & PROG_readmd) { 654 if (VSW_PRI_ETH_DEFINED(vswp)) { 655 kmem_free(vswp->pri_types, 656 sizeof (uint16_t) * vswp->pri_num_types); 657 } 658 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 659 } 660 661 if (progress & PROG_locks) { 662 rw_destroy(&vswp->plist.lockrw); 663 rw_destroy(&vswp->mfdbrw); 664 rw_destroy(&vswp->if_lockrw); 665 mutex_destroy(&vswp->swtmout_lock); 666 mutex_destroy(&vswp->mca_lock); 667 mutex_destroy(&vswp->mac_lock); 668 mutex_destroy(&vswp->hw_lock); 669 } 670 671 ddi_soft_state_free(vsw_state, instance); 672 return (DDI_FAILURE); 673 } 674 675 static int 676 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 677 { 678 vio_mblk_pool_t *poolp, *npoolp; 679 vsw_t **vswpp, *vswp; 680 int instance; 681 682 instance = ddi_get_instance(dip); 683 vswp = ddi_get_soft_state(vsw_state, instance); 684 685 if (vswp == NULL) { 686 return (DDI_FAILURE); 687 } 688 689 switch (cmd) { 690 case DDI_DETACH: 691 break; 692 case DDI_SUSPEND: 693 case DDI_PM_SUSPEND: 694 default: 695 return (DDI_FAILURE); 696 } 697 698 D2(vswp, "detaching instance %d", instance); 699 700 /* Stop any pending timeout to setup switching mode. */ 701 vsw_stop_switching_timeout(vswp); 702 703 if (vswp->if_state & VSW_IF_REG) { 704 if (vsw_mac_unregister(vswp) != 0) { 705 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 706 "MAC layer", vswp->instance); 707 return (DDI_FAILURE); 708 } 709 } 710 711 vsw_mdeg_unregister(vswp); 712 713 /* remove mac layer callback */ 714 mutex_enter(&vswp->mac_lock); 715 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 716 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 717 vswp->mrh = NULL; 718 } 719 mutex_exit(&vswp->mac_lock); 720 721 if (vsw_detach_ports(vswp) != 0) { 722 cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports", 723 vswp->instance); 724 return (DDI_FAILURE); 725 } 726 727 rw_destroy(&vswp->if_lockrw); 728 729 mutex_destroy(&vswp->hw_lock); 730 731 /* 732 * Now that the ports have been deleted, stop and close 733 * the physical device. 734 */ 735 mutex_enter(&vswp->mac_lock); 736 737 vsw_mac_detach(vswp); 738 vsw_mac_close(vswp); 739 740 mutex_exit(&vswp->mac_lock); 741 742 mutex_destroy(&vswp->mac_lock); 743 mutex_destroy(&vswp->swtmout_lock); 744 745 /* 746 * Destroy any free pools that may still exist. 747 */ 748 poolp = vswp->rxh; 749 while (poolp != NULL) { 750 npoolp = vswp->rxh = poolp->nextp; 751 if (vio_destroy_mblks(poolp) != 0) { 752 vswp->rxh = poolp; 753 return (DDI_FAILURE); 754 } 755 poolp = npoolp; 756 } 757 758 /* 759 * Remove this instance from any entries it may be on in 760 * the hash table by using the list of addresses maintained 761 * in the vsw_t structure. 762 */ 763 vsw_del_mcst_vsw(vswp); 764 765 vswp->mcap = NULL; 766 mutex_destroy(&vswp->mca_lock); 767 768 /* 769 * By now any pending tasks have finished and the underlying 770 * ldc's have been destroyed, so its safe to delete the control 771 * message taskq. 772 */ 773 if (vswp->taskq_p != NULL) 774 ddi_taskq_destroy(vswp->taskq_p); 775 776 /* 777 * At this stage all the data pointers in the hash table 778 * should be NULL, as all the ports have been removed and will 779 * have deleted themselves from the port lists which the data 780 * pointers point to. Hence we can destroy the table using the 781 * default destructors. 782 */ 783 D2(vswp, "vsw_detach: destroying hash tables.."); 784 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 785 mod_hash_destroy_hash(vswp->fdb_hashp); 786 vswp->fdb_hashp = NULL; 787 788 WRITE_ENTER(&vswp->mfdbrw); 789 mod_hash_destroy_hash(vswp->mfdb); 790 vswp->mfdb = NULL; 791 RW_EXIT(&vswp->mfdbrw); 792 rw_destroy(&vswp->mfdbrw); 793 794 /* free pri_types table */ 795 if (VSW_PRI_ETH_DEFINED(vswp)) { 796 kmem_free(vswp->pri_types, 797 sizeof (uint16_t) * vswp->pri_num_types); 798 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 799 } 800 801 ddi_remove_minor_node(dip, NULL); 802 803 rw_destroy(&vswp->plist.lockrw); 804 WRITE_ENTER(&vsw_rw); 805 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 806 if (*vswpp == vswp) { 807 *vswpp = vswp->next; 808 break; 809 } 810 } 811 RW_EXIT(&vsw_rw); 812 ddi_soft_state_free(vsw_state, instance); 813 814 return (DDI_SUCCESS); 815 } 816 817 static int 818 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 819 { 820 _NOTE(ARGUNUSED(dip)) 821 822 vsw_t *vswp = NULL; 823 dev_t dev = (dev_t)arg; 824 int instance; 825 826 instance = getminor(dev); 827 828 switch (infocmd) { 829 case DDI_INFO_DEVT2DEVINFO: 830 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 831 *result = NULL; 832 return (DDI_FAILURE); 833 } 834 *result = vswp->dip; 835 return (DDI_SUCCESS); 836 837 case DDI_INFO_DEVT2INSTANCE: 838 *result = (void *)(uintptr_t)instance; 839 return (DDI_SUCCESS); 840 841 default: 842 *result = NULL; 843 return (DDI_FAILURE); 844 } 845 } 846 847 /* 848 * Get the value of the "vsw-phys-dev" property in the specified 849 * node. This property is the name of the physical device that 850 * the virtual switch will use to talk to the outside world. 851 * 852 * Note it is valid for this property to be NULL (but the property 853 * itself must exist). Callers of this routine should verify that 854 * the value returned is what they expected (i.e. either NULL or non NULL). 855 * 856 * On success returns value of the property in region pointed to by 857 * the 'name' argument, and with return value of 0. Otherwise returns 1. 858 */ 859 static int 860 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 861 { 862 int len = 0; 863 int instance; 864 char *physname = NULL; 865 char *dev; 866 const char *dev_name; 867 char myname[MAXNAMELEN]; 868 869 dev_name = ddi_driver_name(vswp->dip); 870 instance = ddi_get_instance(vswp->dip); 871 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 872 873 if (md_get_prop_data(mdp, node, physdev_propname, 874 (uint8_t **)(&physname), &len) != 0) { 875 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 876 "device(s) from MD", vswp->instance); 877 return (1); 878 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 879 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 880 vswp->instance, physname); 881 return (1); 882 } else if (strcmp(myname, physname) == 0) { 883 /* 884 * Prevent the vswitch from opening itself as the 885 * network device. 886 */ 887 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 888 vswp->instance, physname); 889 return (1); 890 } else { 891 (void) strncpy(name, physname, strlen(physname) + 1); 892 D2(vswp, "%s: using first device specified (%s)", 893 __func__, physname); 894 } 895 896 #ifdef DEBUG 897 /* 898 * As a temporary measure to aid testing we check to see if there 899 * is a vsw.conf file present. If there is we use the value of the 900 * vsw_physname property in the file as the name of the physical 901 * device, overriding the value from the MD. 902 * 903 * There may be multiple devices listed, but for the moment 904 * we just use the first one. 905 */ 906 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 907 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 908 if ((strlen(dev) + 1) > LIFNAMSIZ) { 909 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 910 vswp->instance, dev); 911 ddi_prop_free(dev); 912 return (1); 913 } else { 914 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 915 "config file", vswp->instance, dev); 916 917 (void) strncpy(name, dev, strlen(dev) + 1); 918 } 919 920 ddi_prop_free(dev); 921 } 922 #endif 923 924 return (0); 925 } 926 927 /* 928 * Read the 'vsw-switch-mode' property from the specified MD node. 929 * 930 * Returns 0 on success and the number of modes found in 'found', 931 * otherwise returns 1. 932 */ 933 static int 934 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 935 uint8_t *modes, int *found) 936 { 937 int len = 0; 938 int smode_num = 0; 939 char *smode = NULL; 940 char *curr_mode = NULL; 941 942 D1(vswp, "%s: enter", __func__); 943 944 /* 945 * Get the switch-mode property. The modes are listed in 946 * decreasing order of preference, i.e. prefered mode is 947 * first item in list. 948 */ 949 len = 0; 950 smode_num = 0; 951 if (md_get_prop_data(mdp, node, smode_propname, 952 (uint8_t **)(&smode), &len) != 0) { 953 /* 954 * Unable to get switch-mode property from MD, nothing 955 * more we can do. 956 */ 957 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 958 " from the MD", vswp->instance); 959 *found = 0; 960 return (1); 961 } 962 963 curr_mode = smode; 964 /* 965 * Modes of operation: 966 * 'switched' - layer 2 switching, underlying HW in 967 * programmed mode. 968 * 'promiscuous' - layer 2 switching, underlying HW in 969 * promiscuous mode. 970 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 971 * in non-promiscuous mode. 972 */ 973 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 974 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 975 if (strcmp(curr_mode, "switched") == 0) { 976 modes[smode_num++] = VSW_LAYER2; 977 } else if (strcmp(curr_mode, "promiscuous") == 0) { 978 modes[smode_num++] = VSW_LAYER2_PROMISC; 979 } else if (strcmp(curr_mode, "routed") == 0) { 980 modes[smode_num++] = VSW_LAYER3; 981 } else { 982 DWARN(vswp, "%s: Unknown switch mode %s, " 983 "setting to default 'switched' mode", 984 __func__, curr_mode); 985 modes[smode_num++] = VSW_LAYER2; 986 } 987 curr_mode += strlen(curr_mode) + 1; 988 } 989 *found = smode_num; 990 991 D2(vswp, "%s: %d modes found", __func__, smode_num); 992 993 D1(vswp, "%s: exit", __func__); 994 995 return (0); 996 } 997 998 /* 999 * Register with the MAC layer as a network device, so we 1000 * can be plumbed if necessary. 1001 */ 1002 static int 1003 vsw_mac_register(vsw_t *vswp) 1004 { 1005 mac_register_t *macp; 1006 int rv; 1007 1008 D1(vswp, "%s: enter", __func__); 1009 1010 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1011 return (EINVAL); 1012 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1013 macp->m_driver = vswp; 1014 macp->m_dip = vswp->dip; 1015 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1016 macp->m_callbacks = &vsw_m_callbacks; 1017 macp->m_min_sdu = 0; 1018 macp->m_max_sdu = vsw_ethermtu; 1019 macp->m_margin = VLAN_TAGSZ; 1020 rv = mac_register(macp, &vswp->if_mh); 1021 mac_free(macp); 1022 if (rv != 0) { 1023 /* 1024 * Treat this as a non-fatal error as we may be 1025 * able to operate in some other mode. 1026 */ 1027 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1028 "a provider with MAC layer", vswp->instance); 1029 return (rv); 1030 } 1031 1032 vswp->if_state |= VSW_IF_REG; 1033 1034 vswp->max_frame_size = vsw_ethermtu + sizeof (struct ether_header) 1035 + VLAN_TAGSZ; 1036 1037 D1(vswp, "%s: exit", __func__); 1038 1039 return (rv); 1040 } 1041 1042 static int 1043 vsw_mac_unregister(vsw_t *vswp) 1044 { 1045 int rv = 0; 1046 1047 D1(vswp, "%s: enter", __func__); 1048 1049 WRITE_ENTER(&vswp->if_lockrw); 1050 1051 if (vswp->if_state & VSW_IF_REG) { 1052 rv = mac_unregister(vswp->if_mh); 1053 if (rv != 0) { 1054 DWARN(vswp, "%s: unable to unregister from MAC " 1055 "framework", __func__); 1056 1057 RW_EXIT(&vswp->if_lockrw); 1058 D1(vswp, "%s: fail exit", __func__); 1059 return (rv); 1060 } 1061 1062 /* mark i/f as down and unregistered */ 1063 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1064 } 1065 RW_EXIT(&vswp->if_lockrw); 1066 1067 D1(vswp, "%s: exit", __func__); 1068 1069 return (rv); 1070 } 1071 1072 static int 1073 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1074 { 1075 vsw_t *vswp = (vsw_t *)arg; 1076 1077 D1(vswp, "%s: enter", __func__); 1078 1079 mutex_enter(&vswp->mac_lock); 1080 if (vswp->mh == NULL) { 1081 mutex_exit(&vswp->mac_lock); 1082 return (EINVAL); 1083 } 1084 1085 /* return stats from underlying device */ 1086 *val = mac_stat_get(vswp->mh, stat); 1087 1088 mutex_exit(&vswp->mac_lock); 1089 1090 return (0); 1091 } 1092 1093 static void 1094 vsw_m_stop(void *arg) 1095 { 1096 vsw_t *vswp = (vsw_t *)arg; 1097 1098 D1(vswp, "%s: enter", __func__); 1099 1100 WRITE_ENTER(&vswp->if_lockrw); 1101 vswp->if_state &= ~VSW_IF_UP; 1102 RW_EXIT(&vswp->if_lockrw); 1103 1104 mutex_enter(&vswp->hw_lock); 1105 1106 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1107 1108 if (vswp->recfg_reqd) 1109 vsw_reconfig_hw(vswp); 1110 1111 mutex_exit(&vswp->hw_lock); 1112 1113 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1114 } 1115 1116 static int 1117 vsw_m_start(void *arg) 1118 { 1119 vsw_t *vswp = (vsw_t *)arg; 1120 1121 D1(vswp, "%s: enter", __func__); 1122 1123 WRITE_ENTER(&vswp->if_lockrw); 1124 1125 vswp->if_state |= VSW_IF_UP; 1126 1127 if (vswp->switching_setup_done == B_FALSE) { 1128 /* 1129 * If the switching mode has not been setup yet, just 1130 * return. The unicast address will be programmed 1131 * after the physical device is successfully setup by the 1132 * timeout handler. 1133 */ 1134 RW_EXIT(&vswp->if_lockrw); 1135 return (0); 1136 } 1137 1138 /* if in layer2 mode, program unicast address. */ 1139 if (vswp->mh != NULL) { 1140 mutex_enter(&vswp->hw_lock); 1141 (void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1142 mutex_exit(&vswp->hw_lock); 1143 } 1144 1145 RW_EXIT(&vswp->if_lockrw); 1146 1147 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1148 return (0); 1149 } 1150 1151 /* 1152 * Change the local interface address. 1153 * 1154 * Note: we don't support this entry point. The local 1155 * mac address of the switch can only be changed via its 1156 * MD node properties. 1157 */ 1158 static int 1159 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1160 { 1161 _NOTE(ARGUNUSED(arg, macaddr)) 1162 1163 return (DDI_FAILURE); 1164 } 1165 1166 static int 1167 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1168 { 1169 vsw_t *vswp = (vsw_t *)arg; 1170 mcst_addr_t *mcst_p = NULL; 1171 uint64_t addr = 0x0; 1172 int i, ret = 0; 1173 1174 D1(vswp, "%s: enter", __func__); 1175 1176 /* 1177 * Convert address into form that can be used 1178 * as hash table key. 1179 */ 1180 for (i = 0; i < ETHERADDRL; i++) { 1181 addr = (addr << 8) | mca[i]; 1182 } 1183 1184 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1185 1186 if (add) { 1187 D2(vswp, "%s: adding multicast", __func__); 1188 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1189 /* 1190 * Update the list of multicast addresses 1191 * contained within the vsw_t structure to 1192 * include this new one. 1193 */ 1194 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1195 if (mcst_p == NULL) { 1196 DERR(vswp, "%s unable to alloc mem", __func__); 1197 (void) vsw_del_mcst(vswp, 1198 VSW_LOCALDEV, addr, NULL); 1199 return (1); 1200 } 1201 mcst_p->addr = addr; 1202 ether_copy(mca, &mcst_p->mca); 1203 1204 /* 1205 * Call into the underlying driver to program the 1206 * address into HW. 1207 */ 1208 mutex_enter(&vswp->mac_lock); 1209 if (vswp->mh != NULL) { 1210 ret = mac_multicst_add(vswp->mh, mca); 1211 if (ret != 0) { 1212 cmn_err(CE_NOTE, "!vsw%d: unable to " 1213 "add multicast address", 1214 vswp->instance); 1215 mutex_exit(&vswp->mac_lock); 1216 (void) vsw_del_mcst(vswp, 1217 VSW_LOCALDEV, addr, NULL); 1218 kmem_free(mcst_p, sizeof (*mcst_p)); 1219 return (ret); 1220 } 1221 mcst_p->mac_added = B_TRUE; 1222 } 1223 mutex_exit(&vswp->mac_lock); 1224 1225 mutex_enter(&vswp->mca_lock); 1226 mcst_p->nextp = vswp->mcap; 1227 vswp->mcap = mcst_p; 1228 mutex_exit(&vswp->mca_lock); 1229 } else { 1230 cmn_err(CE_NOTE, "!vsw%d: unable to add multicast " 1231 "address", vswp->instance); 1232 } 1233 return (ret); 1234 } 1235 1236 D2(vswp, "%s: removing multicast", __func__); 1237 /* 1238 * Remove the address from the hash table.. 1239 */ 1240 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1241 1242 /* 1243 * ..and then from the list maintained in the 1244 * vsw_t structure. 1245 */ 1246 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1247 ASSERT(mcst_p != NULL); 1248 1249 mutex_enter(&vswp->mac_lock); 1250 if (vswp->mh != NULL && mcst_p->mac_added) { 1251 (void) mac_multicst_remove(vswp->mh, mca); 1252 mcst_p->mac_added = B_FALSE; 1253 } 1254 mutex_exit(&vswp->mac_lock); 1255 kmem_free(mcst_p, sizeof (*mcst_p)); 1256 } 1257 1258 D1(vswp, "%s: exit", __func__); 1259 1260 return (0); 1261 } 1262 1263 static int 1264 vsw_m_promisc(void *arg, boolean_t on) 1265 { 1266 vsw_t *vswp = (vsw_t *)arg; 1267 1268 D1(vswp, "%s: enter", __func__); 1269 1270 WRITE_ENTER(&vswp->if_lockrw); 1271 if (on) 1272 vswp->if_state |= VSW_IF_PROMISC; 1273 else 1274 vswp->if_state &= ~VSW_IF_PROMISC; 1275 RW_EXIT(&vswp->if_lockrw); 1276 1277 D1(vswp, "%s: exit", __func__); 1278 1279 return (0); 1280 } 1281 1282 static mblk_t * 1283 vsw_m_tx(void *arg, mblk_t *mp) 1284 { 1285 vsw_t *vswp = (vsw_t *)arg; 1286 1287 D1(vswp, "%s: enter", __func__); 1288 1289 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1290 1291 if (mp == NULL) { 1292 return (NULL); 1293 } 1294 1295 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1296 1297 D1(vswp, "%s: exit", __func__); 1298 1299 return (NULL); 1300 } 1301 1302 /* 1303 * Register for machine description (MD) updates. 1304 * 1305 * Returns 0 on success, 1 on failure. 1306 */ 1307 static int 1308 vsw_mdeg_register(vsw_t *vswp) 1309 { 1310 mdeg_prop_spec_t *pspecp; 1311 mdeg_node_spec_t *inst_specp; 1312 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1313 size_t templatesz; 1314 int rv; 1315 1316 D1(vswp, "%s: enter", __func__); 1317 1318 /* 1319 * Allocate and initialize a per-instance copy 1320 * of the global property spec array that will 1321 * uniquely identify this vsw instance. 1322 */ 1323 templatesz = sizeof (vsw_prop_template); 1324 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1325 1326 bcopy(vsw_prop_template, pspecp, templatesz); 1327 1328 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1329 1330 /* initialize the complete prop spec structure */ 1331 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1332 inst_specp->namep = "virtual-device"; 1333 inst_specp->specp = pspecp; 1334 1335 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1336 vswp->regprop); 1337 /* 1338 * Register an interest in 'virtual-device' nodes with a 1339 * 'name' property of 'virtual-network-switch' 1340 */ 1341 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1342 (void *)vswp, &mdeg_hdl); 1343 if (rv != MDEG_SUCCESS) { 1344 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1345 __func__, rv); 1346 goto mdeg_reg_fail; 1347 } 1348 1349 /* 1350 * Register an interest in 'vsw-port' nodes. 1351 */ 1352 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1353 (void *)vswp, &mdeg_port_hdl); 1354 if (rv != MDEG_SUCCESS) { 1355 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1356 (void) mdeg_unregister(mdeg_hdl); 1357 goto mdeg_reg_fail; 1358 } 1359 1360 /* save off data that will be needed later */ 1361 vswp->inst_spec = inst_specp; 1362 vswp->mdeg_hdl = mdeg_hdl; 1363 vswp->mdeg_port_hdl = mdeg_port_hdl; 1364 1365 D1(vswp, "%s: exit", __func__); 1366 return (0); 1367 1368 mdeg_reg_fail: 1369 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1370 vswp->instance); 1371 kmem_free(pspecp, templatesz); 1372 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1373 1374 vswp->mdeg_hdl = NULL; 1375 vswp->mdeg_port_hdl = NULL; 1376 1377 return (1); 1378 } 1379 1380 static void 1381 vsw_mdeg_unregister(vsw_t *vswp) 1382 { 1383 D1(vswp, "vsw_mdeg_unregister: enter"); 1384 1385 if (vswp->mdeg_hdl != NULL) 1386 (void) mdeg_unregister(vswp->mdeg_hdl); 1387 1388 if (vswp->mdeg_port_hdl != NULL) 1389 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1390 1391 if (vswp->inst_spec != NULL) { 1392 if (vswp->inst_spec->specp != NULL) { 1393 (void) kmem_free(vswp->inst_spec->specp, 1394 sizeof (vsw_prop_template)); 1395 vswp->inst_spec->specp = NULL; 1396 } 1397 1398 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1399 vswp->inst_spec = NULL; 1400 } 1401 1402 D1(vswp, "vsw_mdeg_unregister: exit"); 1403 } 1404 1405 /* 1406 * Mdeg callback invoked for the vsw node itself. 1407 */ 1408 static int 1409 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1410 { 1411 vsw_t *vswp; 1412 md_t *mdp; 1413 mde_cookie_t node; 1414 uint64_t inst; 1415 char *node_name = NULL; 1416 1417 if (resp == NULL) 1418 return (MDEG_FAILURE); 1419 1420 vswp = (vsw_t *)cb_argp; 1421 1422 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1423 " : prev matched %d", __func__, resp->added.nelem, 1424 resp->removed.nelem, resp->match_curr.nelem, 1425 resp->match_prev.nelem); 1426 1427 /* 1428 * We get an initial callback for this node as 'added' 1429 * after registering with mdeg. Note that we would have 1430 * already gathered information about this vsw node by 1431 * walking MD earlier during attach (in vsw_read_mdprops()). 1432 * So, there is a window where the properties of this 1433 * node might have changed when we get this initial 'added' 1434 * callback. We handle this as if an update occured 1435 * and invoke the same function which handles updates to 1436 * the properties of this vsw-node if any. 1437 * 1438 * A non-zero 'match' value indicates that the MD has been 1439 * updated and that a virtual-network-switch node is 1440 * present which may or may not have been updated. It is 1441 * up to the clients to examine their own nodes and 1442 * determine if they have changed. 1443 */ 1444 if (resp->added.nelem != 0) { 1445 1446 if (resp->added.nelem != 1) { 1447 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1448 "invalid: %d\n", vswp->instance, resp->added.nelem); 1449 return (MDEG_FAILURE); 1450 } 1451 1452 mdp = resp->added.mdp; 1453 node = resp->added.mdep[0]; 1454 1455 } else if (resp->match_curr.nelem != 0) { 1456 1457 if (resp->match_curr.nelem != 1) { 1458 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1459 "invalid: %d\n", vswp->instance, 1460 resp->match_curr.nelem); 1461 return (MDEG_FAILURE); 1462 } 1463 1464 mdp = resp->match_curr.mdp; 1465 node = resp->match_curr.mdep[0]; 1466 1467 } else { 1468 return (MDEG_FAILURE); 1469 } 1470 1471 /* Validate name and instance */ 1472 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1473 DERR(vswp, "%s: unable to get node name\n", __func__); 1474 return (MDEG_FAILURE); 1475 } 1476 1477 /* is this a virtual-network-switch? */ 1478 if (strcmp(node_name, vsw_propname) != 0) { 1479 DERR(vswp, "%s: Invalid node name: %s\n", 1480 __func__, node_name); 1481 return (MDEG_FAILURE); 1482 } 1483 1484 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1485 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1486 __func__); 1487 return (MDEG_FAILURE); 1488 } 1489 1490 /* is this the right instance of vsw? */ 1491 if (inst != vswp->regprop) { 1492 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1493 __func__, inst); 1494 return (MDEG_FAILURE); 1495 } 1496 1497 vsw_update_md_prop(vswp, mdp, node); 1498 1499 return (MDEG_SUCCESS); 1500 } 1501 1502 /* 1503 * Mdeg callback invoked for changes to the vsw-port nodes 1504 * under the vsw node. 1505 */ 1506 static int 1507 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1508 { 1509 vsw_t *vswp; 1510 int idx; 1511 md_t *mdp; 1512 mde_cookie_t node; 1513 uint64_t inst; 1514 int rv; 1515 1516 if ((resp == NULL) || (cb_argp == NULL)) 1517 return (MDEG_FAILURE); 1518 1519 vswp = (vsw_t *)cb_argp; 1520 1521 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1522 " : prev matched %d", __func__, resp->added.nelem, 1523 resp->removed.nelem, resp->match_curr.nelem, 1524 resp->match_prev.nelem); 1525 1526 /* process added ports */ 1527 for (idx = 0; idx < resp->added.nelem; idx++) { 1528 mdp = resp->added.mdp; 1529 node = resp->added.mdep[idx]; 1530 1531 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1532 1533 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1534 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1535 "(0x%lx), err=%d", vswp->instance, node, rv); 1536 } 1537 } 1538 1539 /* process removed ports */ 1540 for (idx = 0; idx < resp->removed.nelem; idx++) { 1541 mdp = resp->removed.mdp; 1542 node = resp->removed.mdep[idx]; 1543 1544 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1545 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1546 __func__, id_propname, idx); 1547 continue; 1548 } 1549 1550 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1551 1552 if (vsw_port_detach(vswp, inst) != 0) { 1553 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1554 vswp->instance, inst); 1555 } 1556 } 1557 1558 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1559 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1560 resp->match_curr.mdep[idx], 1561 resp->match_prev.mdp, 1562 resp->match_prev.mdep[idx]); 1563 } 1564 1565 D1(vswp, "%s: exit", __func__); 1566 1567 return (MDEG_SUCCESS); 1568 } 1569 1570 /* 1571 * Scan the machine description for this instance of vsw 1572 * and read its properties. Called only from vsw_attach(). 1573 * Returns: 0 on success, 1 on failure. 1574 */ 1575 static int 1576 vsw_read_mdprops(vsw_t *vswp) 1577 { 1578 md_t *mdp = NULL; 1579 mde_cookie_t rootnode; 1580 mde_cookie_t *listp = NULL; 1581 uint64_t inst; 1582 uint64_t cfgh; 1583 char *name; 1584 int rv = 1; 1585 int num_nodes = 0; 1586 int num_devs = 0; 1587 int listsz = 0; 1588 int i; 1589 1590 /* 1591 * In each 'virtual-device' node in the MD there is a 1592 * 'cfg-handle' property which is the MD's concept of 1593 * an instance number (this may be completely different from 1594 * the device drivers instance #). OBP reads that value and 1595 * stores it in the 'reg' property of the appropriate node in 1596 * the device tree. We first read this reg property and use this 1597 * to compare against the 'cfg-handle' property of vsw nodes 1598 * in MD to get to this specific vsw instance and then read 1599 * other properties that we are interested in. 1600 * We also cache the value of 'reg' property and use it later 1601 * to register callbacks with mdeg (see vsw_mdeg_register()) 1602 */ 1603 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1604 DDI_PROP_DONTPASS, reg_propname, -1); 1605 if (inst == -1) { 1606 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1607 "OBP device tree", vswp->instance, reg_propname); 1608 return (rv); 1609 } 1610 1611 vswp->regprop = inst; 1612 1613 if ((mdp = md_get_handle()) == NULL) { 1614 DWARN(vswp, "%s: cannot init MD\n", __func__); 1615 return (rv); 1616 } 1617 1618 num_nodes = md_node_count(mdp); 1619 ASSERT(num_nodes > 0); 1620 1621 listsz = num_nodes * sizeof (mde_cookie_t); 1622 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1623 1624 rootnode = md_root_node(mdp); 1625 1626 /* search for all "virtual_device" nodes */ 1627 num_devs = md_scan_dag(mdp, rootnode, 1628 md_find_name(mdp, vdev_propname), 1629 md_find_name(mdp, "fwd"), listp); 1630 if (num_devs <= 0) { 1631 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1632 goto vsw_readmd_exit; 1633 } 1634 1635 /* 1636 * Now loop through the list of virtual-devices looking for 1637 * devices with name "virtual-network-switch" and for each 1638 * such device compare its instance with what we have from 1639 * the 'reg' property to find the right node in MD and then 1640 * read all its properties. 1641 */ 1642 for (i = 0; i < num_devs; i++) { 1643 1644 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1645 DWARN(vswp, "%s: name property not found\n", 1646 __func__); 1647 goto vsw_readmd_exit; 1648 } 1649 1650 /* is this a virtual-network-switch? */ 1651 if (strcmp(name, vsw_propname) != 0) 1652 continue; 1653 1654 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1655 DWARN(vswp, "%s: cfg-handle property not found\n", 1656 __func__); 1657 goto vsw_readmd_exit; 1658 } 1659 1660 /* is this the required instance of vsw? */ 1661 if (inst != cfgh) 1662 continue; 1663 1664 /* now read all properties of this vsw instance */ 1665 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1666 break; 1667 } 1668 1669 vsw_readmd_exit: 1670 1671 kmem_free(listp, listsz); 1672 (void) md_fini_handle(mdp); 1673 return (rv); 1674 } 1675 1676 /* 1677 * Read the initial start-of-day values from the specified MD node. 1678 */ 1679 static int 1680 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1681 { 1682 int i; 1683 uint64_t macaddr = 0; 1684 1685 D1(vswp, "%s: enter", __func__); 1686 1687 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1688 return (1); 1689 } 1690 1691 /* mac address for vswitch device itself */ 1692 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1693 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1694 vswp->instance); 1695 return (1); 1696 } 1697 1698 vsw_save_lmacaddr(vswp, macaddr); 1699 1700 if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) { 1701 DWARN(vswp, "%s: Unable to read %s property from MD, " 1702 "defaulting to 'switched' mode", 1703 __func__, smode_propname); 1704 1705 for (i = 0; i < NUM_SMODES; i++) 1706 vswp->smode[i] = VSW_LAYER2; 1707 1708 vswp->smode_num = NUM_SMODES; 1709 } else { 1710 ASSERT(vswp->smode_num != 0); 1711 } 1712 1713 /* read vlan id properties of this vsw instance */ 1714 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1715 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1716 1717 /* read priority-ether-types */ 1718 vsw_read_pri_eth_types(vswp, mdp, node); 1719 1720 D1(vswp, "%s: exit", __func__); 1721 return (0); 1722 } 1723 1724 /* 1725 * Read vlan id properties of the given MD node. 1726 * Arguments: 1727 * arg: device argument(vsw device or a port) 1728 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1729 * mdp: machine description 1730 * node: md node cookie 1731 * 1732 * Returns: 1733 * pvidp: port-vlan-id of the node 1734 * vidspp: list of vlan-ids of the node 1735 * nvidsp: # of vlan-ids in the list 1736 * default_idp: default-vlan-id of the node(if node is vsw device) 1737 */ 1738 static void 1739 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1740 uint16_t *pvidp, uint16_t **vidspp, uint16_t *nvidsp, 1741 uint16_t *default_idp) 1742 { 1743 vsw_t *vswp; 1744 vsw_port_t *portp; 1745 char *pvid_propname; 1746 char *vid_propname; 1747 uint_t nvids = 0; 1748 uint32_t vids_size; 1749 int rv; 1750 int i; 1751 uint64_t *data; 1752 uint64_t val; 1753 int size; 1754 int inst; 1755 1756 if (type == VSW_LOCALDEV) { 1757 1758 vswp = (vsw_t *)arg; 1759 pvid_propname = vsw_pvid_propname; 1760 vid_propname = vsw_vid_propname; 1761 inst = vswp->instance; 1762 1763 } else if (type == VSW_VNETPORT) { 1764 1765 portp = (vsw_port_t *)arg; 1766 vswp = portp->p_vswp; 1767 pvid_propname = port_pvid_propname; 1768 vid_propname = port_vid_propname; 1769 inst = portp->p_instance; 1770 1771 } else { 1772 return; 1773 } 1774 1775 if (type == VSW_LOCALDEV && default_idp != NULL) { 1776 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1777 if (rv != 0) { 1778 DWARN(vswp, "%s: prop(%s) not found", __func__, 1779 vsw_dvid_propname); 1780 1781 *default_idp = vsw_default_vlan_id; 1782 } else { 1783 *default_idp = val & 0xFFF; 1784 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1785 vsw_dvid_propname, inst, *default_idp); 1786 } 1787 } 1788 1789 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1790 if (rv != 0) { 1791 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1792 *pvidp = vsw_default_vlan_id; 1793 } else { 1794 1795 *pvidp = val & 0xFFF; 1796 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1797 pvid_propname, inst, *pvidp); 1798 } 1799 1800 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1801 &size); 1802 if (rv != 0) { 1803 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1804 size = 0; 1805 } else { 1806 size /= sizeof (uint64_t); 1807 } 1808 nvids = size; 1809 1810 if (nvids != 0) { 1811 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1812 vids_size = sizeof (uint16_t) * nvids; 1813 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1814 for (i = 0; i < nvids; i++) { 1815 (*vidspp)[i] = data[i] & 0xFFFF; 1816 D2(vswp, " %d ", (*vidspp)[i]); 1817 } 1818 D2(vswp, "\n"); 1819 } 1820 1821 *nvidsp = nvids; 1822 } 1823 1824 /* 1825 * This function reads "priority-ether-types" property from md. This property 1826 * is used to enable support for priority frames. Applications which need 1827 * guaranteed and timely delivery of certain high priority frames to/from 1828 * a vnet or vsw within ldoms, should configure this property by providing 1829 * the ether type(s) for which the priority facility is needed. 1830 * Normal data frames are delivered over a ldc channel using the descriptor 1831 * ring mechanism which is constrained by factors such as descriptor ring size, 1832 * the rate at which the ring is processed at the peer ldc end point, etc. 1833 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1834 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1835 * descriptor ring path and enables a more reliable and timely delivery of 1836 * frames to the peer. 1837 */ 1838 static void 1839 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1840 { 1841 int rv; 1842 uint16_t *types; 1843 uint64_t *data; 1844 int size; 1845 int i; 1846 size_t mblk_sz; 1847 1848 rv = md_get_prop_data(mdp, node, pri_types_propname, 1849 (uint8_t **)&data, &size); 1850 if (rv != 0) { 1851 /* 1852 * Property may not exist if we are running pre-ldoms1.1 f/w. 1853 * Check if 'vsw_pri_eth_type' has been set in that case. 1854 */ 1855 if (vsw_pri_eth_type != 0) { 1856 size = sizeof (vsw_pri_eth_type); 1857 data = &vsw_pri_eth_type; 1858 } else { 1859 D3(vswp, "%s: prop(%s) not found", __func__, 1860 pri_types_propname); 1861 size = 0; 1862 } 1863 } 1864 1865 if (size == 0) { 1866 vswp->pri_num_types = 0; 1867 return; 1868 } 1869 1870 /* 1871 * we have some priority-ether-types defined; 1872 * allocate a table of these types and also 1873 * allocate a pool of mblks to transmit these 1874 * priority packets. 1875 */ 1876 size /= sizeof (uint64_t); 1877 vswp->pri_num_types = size; 1878 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1879 for (i = 0, types = vswp->pri_types; i < size; i++) { 1880 types[i] = data[i] & 0xFFFF; 1881 } 1882 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1883 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp); 1884 } 1885 1886 /* 1887 * Check to see if the relevant properties in the specified node have 1888 * changed, and if so take the appropriate action. 1889 * 1890 * If any of the properties are missing or invalid we don't take 1891 * any action, as this function should only be invoked when modifications 1892 * have been made to what we assume is a working configuration, which 1893 * we leave active. 1894 * 1895 * Note it is legal for this routine to be invoked even if none of the 1896 * properties in the port node within the MD have actually changed. 1897 */ 1898 static void 1899 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1900 { 1901 char physname[LIFNAMSIZ]; 1902 char drv[LIFNAMSIZ]; 1903 uint_t ddi_instance; 1904 uint8_t new_smode[NUM_SMODES]; 1905 int i, smode_num = 0; 1906 uint64_t macaddr = 0; 1907 enum {MD_init = 0x1, 1908 MD_physname = 0x2, 1909 MD_macaddr = 0x4, 1910 MD_smode = 0x8, 1911 MD_vlans = 0x10} updated; 1912 int rv; 1913 uint16_t pvid; 1914 uint16_t *vids; 1915 uint16_t nvids; 1916 1917 updated = MD_init; 1918 1919 D1(vswp, "%s: enter", __func__); 1920 1921 /* 1922 * Check if name of physical device in MD has changed. 1923 */ 1924 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1925 /* 1926 * Do basic sanity check on new device name/instance, 1927 * if its non NULL. It is valid for the device name to 1928 * have changed from a non NULL to a NULL value, i.e. 1929 * the vsw is being changed to 'routed' mode. 1930 */ 1931 if ((strlen(physname) != 0) && 1932 (ddi_parse(physname, drv, 1933 &ddi_instance) != DDI_SUCCESS)) { 1934 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 1935 " a valid device name/instance", 1936 vswp->instance, physname); 1937 goto fail_reconf; 1938 } 1939 1940 if (strcmp(physname, vswp->physname)) { 1941 D2(vswp, "%s: device name changed from %s to %s", 1942 __func__, vswp->physname, physname); 1943 1944 updated |= MD_physname; 1945 } else { 1946 D2(vswp, "%s: device name unchanged at %s", 1947 __func__, vswp->physname); 1948 } 1949 } else { 1950 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 1951 "device from updated MD.", vswp->instance); 1952 goto fail_reconf; 1953 } 1954 1955 /* 1956 * Check if MAC address has changed. 1957 */ 1958 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1959 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1960 vswp->instance); 1961 goto fail_reconf; 1962 } else { 1963 uint64_t maddr = macaddr; 1964 READ_ENTER(&vswp->if_lockrw); 1965 for (i = ETHERADDRL - 1; i >= 0; i--) { 1966 if (vswp->if_addr.ether_addr_octet[i] 1967 != (macaddr & 0xFF)) { 1968 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 1969 __func__, i, 1970 vswp->if_addr.ether_addr_octet[i], 1971 (macaddr & 0xFF)); 1972 updated |= MD_macaddr; 1973 macaddr = maddr; 1974 break; 1975 } 1976 macaddr >>= 8; 1977 } 1978 RW_EXIT(&vswp->if_lockrw); 1979 if (updated & MD_macaddr) { 1980 vsw_save_lmacaddr(vswp, macaddr); 1981 } 1982 } 1983 1984 /* 1985 * Check if switching modes have changed. 1986 */ 1987 if (vsw_get_md_smodes(vswp, mdp, node, 1988 new_smode, &smode_num)) { 1989 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 1990 vswp->instance, smode_propname); 1991 goto fail_reconf; 1992 } else { 1993 ASSERT(smode_num != 0); 1994 if (smode_num != vswp->smode_num) { 1995 D2(vswp, "%s: number of modes changed from %d to %d", 1996 __func__, vswp->smode_num, smode_num); 1997 } 1998 1999 for (i = 0; i < smode_num; i++) { 2000 if (new_smode[i] != vswp->smode[i]) { 2001 D2(vswp, "%s: mode changed from %d to %d", 2002 __func__, vswp->smode[i], new_smode[i]); 2003 updated |= MD_smode; 2004 break; 2005 } 2006 } 2007 } 2008 2009 /* Read the vlan ids */ 2010 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2011 &nvids, NULL); 2012 2013 /* Determine if there are any vlan id updates */ 2014 if ((pvid != vswp->pvid) || /* pvid changed? */ 2015 (nvids != vswp->nvids) || /* # of vids changed? */ 2016 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2017 bcmp(vids, vswp->vids, sizeof (uint16_t) * nvids))) { 2018 updated |= MD_vlans; 2019 } 2020 2021 /* 2022 * Now make any changes which are needed... 2023 */ 2024 2025 if (updated & (MD_physname | MD_smode)) { 2026 2027 /* 2028 * Stop any pending timeout to setup switching mode. 2029 */ 2030 vsw_stop_switching_timeout(vswp); 2031 2032 /* 2033 * Remove unicst, mcst addrs of vsw interface 2034 * and ports from the physdev. 2035 */ 2036 vsw_unset_addrs(vswp); 2037 2038 /* 2039 * Stop, detach and close the old device.. 2040 */ 2041 mutex_enter(&vswp->mac_lock); 2042 2043 vsw_mac_detach(vswp); 2044 vsw_mac_close(vswp); 2045 2046 mutex_exit(&vswp->mac_lock); 2047 2048 /* 2049 * Update phys name. 2050 */ 2051 if (updated & MD_physname) { 2052 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2053 vswp->instance, vswp->physname, physname); 2054 (void) strncpy(vswp->physname, 2055 physname, strlen(physname) + 1); 2056 } 2057 2058 /* 2059 * Update array with the new switch mode values. 2060 */ 2061 if (updated & MD_smode) { 2062 for (i = 0; i < smode_num; i++) 2063 vswp->smode[i] = new_smode[i]; 2064 2065 vswp->smode_num = smode_num; 2066 vswp->smode_idx = 0; 2067 } 2068 2069 /* 2070 * ..and attach, start the new device. 2071 */ 2072 rv = vsw_setup_switching(vswp); 2073 if (rv == EAGAIN) { 2074 /* 2075 * Unable to setup switching mode. 2076 * As the error is EAGAIN, schedule a timeout to retry 2077 * and return. Programming addresses of ports and 2078 * vsw interface will be done when the timeout handler 2079 * completes successfully. 2080 */ 2081 mutex_enter(&vswp->swtmout_lock); 2082 2083 vswp->swtmout_enabled = B_TRUE; 2084 vswp->swtmout_id = 2085 timeout(vsw_setup_switching_timeout, vswp, 2086 (vsw_setup_switching_delay * 2087 drv_usectohz(MICROSEC))); 2088 2089 mutex_exit(&vswp->swtmout_lock); 2090 2091 return; 2092 2093 } else if (rv) { 2094 goto fail_update; 2095 } 2096 2097 /* 2098 * program unicst, mcst addrs of vsw interface 2099 * and ports in the physdev. 2100 */ 2101 vsw_set_addrs(vswp); 2102 2103 } else if (updated & MD_macaddr) { 2104 /* 2105 * We enter here if only MD_macaddr is exclusively updated. 2106 * If MD_physname and/or MD_smode are also updated, then 2107 * as part of that, we would have implicitly processed 2108 * MD_macaddr update (above). 2109 */ 2110 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2111 vswp->instance, macaddr); 2112 2113 READ_ENTER(&vswp->if_lockrw); 2114 if (vswp->if_state & VSW_IF_UP) { 2115 2116 mutex_enter(&vswp->hw_lock); 2117 /* 2118 * Remove old mac address of vsw interface 2119 * from the physdev 2120 */ 2121 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 2122 /* 2123 * Program new mac address of vsw interface 2124 * in the physdev 2125 */ 2126 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 2127 mutex_exit(&vswp->hw_lock); 2128 if (rv != 0) { 2129 cmn_err(CE_NOTE, 2130 "!vsw%d: failed to program interface " 2131 "unicast address\n", vswp->instance); 2132 } 2133 /* 2134 * Notify the MAC layer of the changed address. 2135 */ 2136 mac_unicst_update(vswp->if_mh, 2137 (uint8_t *)&vswp->if_addr); 2138 2139 } 2140 RW_EXIT(&vswp->if_lockrw); 2141 2142 } 2143 2144 if (updated & MD_vlans) { 2145 /* Remove existing vlan ids from the hash table. */ 2146 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2147 2148 /* save the new vlan ids */ 2149 vswp->pvid = pvid; 2150 if (vswp->nvids != 0) { 2151 kmem_free(vswp->vids, sizeof (uint16_t) * vswp->nvids); 2152 vswp->nvids = 0; 2153 } 2154 if (nvids != 0) { 2155 vswp->nvids = nvids; 2156 vswp->vids = vids; 2157 } 2158 2159 /* add these new vlan ids into hash table */ 2160 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2161 } else { 2162 if (nvids != 0) { 2163 kmem_free(vids, sizeof (uint16_t) * nvids); 2164 } 2165 } 2166 2167 return; 2168 2169 fail_reconf: 2170 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2171 return; 2172 2173 fail_update: 2174 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2175 vswp->instance); 2176 } 2177 2178 /* 2179 * Read the port's md properties. 2180 */ 2181 static int 2182 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2183 md_t *mdp, mde_cookie_t *node) 2184 { 2185 uint64_t ldc_id; 2186 uint8_t *addrp; 2187 int i, addrsz; 2188 int num_nodes = 0, nchan = 0; 2189 int listsz = 0; 2190 mde_cookie_t *listp = NULL; 2191 struct ether_addr ea; 2192 uint64_t macaddr; 2193 uint64_t inst = 0; 2194 2195 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2196 DWARN(vswp, "%s: prop(%s) not found", __func__, 2197 id_propname); 2198 return (1); 2199 } 2200 2201 /* 2202 * Find the channel endpoint node(s) (which should be under this 2203 * port node) which contain the channel id(s). 2204 */ 2205 if ((num_nodes = md_node_count(mdp)) <= 0) { 2206 DERR(vswp, "%s: invalid number of nodes found (%d)", 2207 __func__, num_nodes); 2208 return (1); 2209 } 2210 2211 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2212 2213 /* allocate enough space for node list */ 2214 listsz = num_nodes * sizeof (mde_cookie_t); 2215 listp = kmem_zalloc(listsz, KM_SLEEP); 2216 2217 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2218 md_find_name(mdp, "fwd"), listp); 2219 2220 if (nchan <= 0) { 2221 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2222 kmem_free(listp, listsz); 2223 return (1); 2224 } 2225 2226 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2227 2228 /* use property from first node found */ 2229 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2230 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2231 id_propname); 2232 kmem_free(listp, listsz); 2233 return (1); 2234 } 2235 2236 /* don't need list any more */ 2237 kmem_free(listp, listsz); 2238 2239 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2240 2241 /* read mac-address property */ 2242 if (md_get_prop_data(mdp, *node, remaddr_propname, 2243 &addrp, &addrsz)) { 2244 DWARN(vswp, "%s: prop(%s) not found", 2245 __func__, remaddr_propname); 2246 return (1); 2247 } 2248 2249 if (addrsz < ETHERADDRL) { 2250 DWARN(vswp, "%s: invalid address size", __func__); 2251 return (1); 2252 } 2253 2254 macaddr = *((uint64_t *)addrp); 2255 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2256 2257 for (i = ETHERADDRL - 1; i >= 0; i--) { 2258 ea.ether_addr_octet[i] = macaddr & 0xFF; 2259 macaddr >>= 8; 2260 } 2261 2262 /* now update all properties into the port */ 2263 portp->p_vswp = vswp; 2264 portp->p_instance = inst; 2265 portp->addr_set = VSW_ADDR_UNSET; 2266 ether_copy(&ea, &portp->p_macaddr); 2267 if (nchan > VSW_PORT_MAX_LDCS) { 2268 D2(vswp, "%s: using first of %d ldc ids", 2269 __func__, nchan); 2270 nchan = VSW_PORT_MAX_LDCS; 2271 } 2272 portp->num_ldcs = nchan; 2273 portp->ldc_ids = 2274 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2275 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2276 2277 /* read vlan id properties of this port node */ 2278 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2279 &portp->vids, &portp->nvids, NULL); 2280 2281 return (0); 2282 } 2283 2284 /* 2285 * Add a new port to the system. 2286 * 2287 * Returns 0 on success, 1 on failure. 2288 */ 2289 int 2290 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2291 { 2292 vsw_port_t *portp; 2293 int rv; 2294 2295 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2296 2297 rv = vsw_port_read_props(portp, vswp, mdp, node); 2298 if (rv != 0) { 2299 kmem_free(portp, sizeof (*portp)); 2300 return (1); 2301 } 2302 2303 rv = vsw_port_attach(portp); 2304 if (rv != 0) { 2305 DERR(vswp, "%s: failed to attach port", __func__); 2306 return (1); 2307 } 2308 2309 return (0); 2310 } 2311 2312 static int 2313 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2314 md_t *prev_mdp, mde_cookie_t prev_mdex) 2315 { 2316 uint64_t cport_num; 2317 uint64_t pport_num; 2318 vsw_port_list_t *plistp; 2319 vsw_port_t *portp; 2320 boolean_t updated_vlans = B_FALSE; 2321 uint16_t pvid; 2322 uint16_t *vids; 2323 uint16_t nvids; 2324 2325 /* 2326 * For now, we get port updates only if vlan ids changed. 2327 * We read the port num and do some sanity check. 2328 */ 2329 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2330 return (1); 2331 } 2332 2333 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2334 return (1); 2335 } 2336 if (cport_num != pport_num) 2337 return (1); 2338 2339 plistp = &(vswp->plist); 2340 2341 READ_ENTER(&plistp->lockrw); 2342 2343 portp = vsw_lookup_port(vswp, cport_num); 2344 if (portp == NULL) { 2345 RW_EXIT(&plistp->lockrw); 2346 return (1); 2347 } 2348 2349 /* Read the vlan ids */ 2350 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2351 &vids, &nvids, NULL); 2352 2353 /* Determine if there are any vlan id updates */ 2354 if ((pvid != portp->pvid) || /* pvid changed? */ 2355 (nvids != portp->nvids) || /* # of vids changed? */ 2356 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2357 bcmp(vids, portp->vids, sizeof (uint16_t) * nvids))) { 2358 updated_vlans = B_TRUE; 2359 } 2360 2361 if (updated_vlans == B_FALSE) { 2362 RW_EXIT(&plistp->lockrw); 2363 return (1); 2364 } 2365 2366 /* Remove existing vlan ids from the hash table. */ 2367 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2368 2369 /* save the new vlan ids */ 2370 portp->pvid = pvid; 2371 if (portp->nvids != 0) { 2372 kmem_free(portp->vids, sizeof (uint16_t) * portp->nvids); 2373 portp->nvids = 0; 2374 } 2375 if (nvids != 0) { 2376 portp->vids = kmem_zalloc(sizeof (uint16_t) * nvids, KM_SLEEP); 2377 bcopy(vids, portp->vids, sizeof (uint16_t) * nvids); 2378 portp->nvids = nvids; 2379 kmem_free(vids, sizeof (uint16_t) * nvids); 2380 } 2381 2382 /* add these new vlan ids into hash table */ 2383 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2384 2385 /* reset the port if it is vlan unaware (ver < 1.3) */ 2386 vsw_vlan_unaware_port_reset(portp); 2387 2388 RW_EXIT(&plistp->lockrw); 2389 2390 return (0); 2391 } 2392 2393 /* 2394 * vsw_mac_rx -- A common function to send packets to the interface. 2395 * By default this function check if the interface is UP or not, the 2396 * rest of the behaviour depends on the flags as below: 2397 * 2398 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2399 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2400 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2401 */ 2402 void 2403 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2404 mblk_t *mp, vsw_macrx_flags_t flags) 2405 { 2406 mblk_t *mpt; 2407 2408 D1(vswp, "%s:enter\n", __func__); 2409 READ_ENTER(&vswp->if_lockrw); 2410 /* Check if the interface is up */ 2411 if (!(vswp->if_state & VSW_IF_UP)) { 2412 RW_EXIT(&vswp->if_lockrw); 2413 /* Free messages only if FREEMSG flag specified */ 2414 if (flags & VSW_MACRX_FREEMSG) { 2415 freemsgchain(mp); 2416 } 2417 D1(vswp, "%s:exit\n", __func__); 2418 return; 2419 } 2420 /* 2421 * If PROMISC flag is passed, then check if 2422 * the interface is in the PROMISC mode. 2423 * If not, drop the messages. 2424 */ 2425 if (flags & VSW_MACRX_PROMISC) { 2426 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2427 RW_EXIT(&vswp->if_lockrw); 2428 /* Free messages only if FREEMSG flag specified */ 2429 if (flags & VSW_MACRX_FREEMSG) { 2430 freemsgchain(mp); 2431 } 2432 D1(vswp, "%s:exit\n", __func__); 2433 return; 2434 } 2435 } 2436 RW_EXIT(&vswp->if_lockrw); 2437 /* 2438 * If COPYMSG flag is passed, then make a copy 2439 * of the message chain and send up the copy. 2440 */ 2441 if (flags & VSW_MACRX_COPYMSG) { 2442 mp = copymsgchain(mp); 2443 if (mp == NULL) { 2444 D1(vswp, "%s:exit\n", __func__); 2445 return; 2446 } 2447 } 2448 2449 D2(vswp, "%s: sending up stack", __func__); 2450 2451 mpt = NULL; 2452 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2453 if (mp != NULL) { 2454 mac_rx(vswp->if_mh, mrh, mp); 2455 } 2456 D1(vswp, "%s:exit\n", __func__); 2457 } 2458 2459 /* copy mac address of vsw into soft state structure */ 2460 static void 2461 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2462 { 2463 int i; 2464 2465 WRITE_ENTER(&vswp->if_lockrw); 2466 for (i = ETHERADDRL - 1; i >= 0; i--) { 2467 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2468 macaddr >>= 8; 2469 } 2470 RW_EXIT(&vswp->if_lockrw); 2471 } 2472