1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mac_provider.h> 62 #include <sys/mdeg.h> 63 #include <sys/ldc.h> 64 #include <sys/vsw_fdb.h> 65 #include <sys/vsw.h> 66 #include <sys/vio_mailbox.h> 67 #include <sys/vnet_mailbox.h> 68 #include <sys/vnet_common.h> 69 #include <sys/vio_util.h> 70 #include <sys/sdt.h> 71 #include <sys/atomic.h> 72 #include <sys/callb.h> 73 #include <sys/vlan.h> 74 75 /* 76 * Function prototypes. 77 */ 78 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 79 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 80 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 81 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *); 82 83 /* MDEG routines */ 84 static int vsw_mdeg_register(vsw_t *vswp); 85 static void vsw_mdeg_unregister(vsw_t *vswp); 86 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 87 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 88 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 89 static int vsw_read_mdprops(vsw_t *vswp); 90 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 91 mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp, 92 uint16_t *nvidsp, uint16_t *default_idp); 93 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 94 md_t *mdp, mde_cookie_t *node); 95 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 96 mde_cookie_t node); 97 static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 98 uint32_t *mtu); 99 static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu); 100 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 102 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1, 103 vsw_vlanid_t *vids2, int nvids); 104 105 /* Mac driver related routines */ 106 static int vsw_mac_register(vsw_t *); 107 static int vsw_mac_unregister(vsw_t *); 108 static int vsw_m_stat(void *, uint_t, uint64_t *); 109 static void vsw_m_stop(void *arg); 110 static int vsw_m_start(void *arg); 111 static int vsw_m_unicst(void *arg, const uint8_t *); 112 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 113 static int vsw_m_promisc(void *arg, boolean_t); 114 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 115 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 116 mblk_t *mp, vsw_macrx_flags_t flags); 117 118 /* 119 * Functions imported from other files. 120 */ 121 extern void vsw_setup_switching_timeout(void *arg); 122 extern void vsw_stop_switching_timeout(vsw_t *vswp); 123 extern int vsw_setup_switching(vsw_t *); 124 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 125 vsw_port_t *port, mac_resource_handle_t mrh); 126 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 127 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 128 extern void vsw_del_mcst_vsw(vsw_t *); 129 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 130 extern int vsw_detach_ports(vsw_t *vswp); 131 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 132 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 133 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 134 md_t *prev_mdp, mde_cookie_t prev_mdex); 135 extern int vsw_port_attach(vsw_port_t *port); 136 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 137 extern int vsw_mac_open(vsw_t *vswp); 138 extern void vsw_mac_close(vsw_t *vswp); 139 extern void vsw_mac_cleanup_ports(vsw_t *vswp); 140 extern void vsw_unset_addrs(vsw_t *vswp); 141 extern void vsw_setup_layer2_post_process(vsw_t *vswp); 142 extern void vsw_create_vlans(void *arg, int type); 143 extern void vsw_destroy_vlans(void *arg, int type); 144 extern void vsw_vlan_add_ids(void *arg, int type); 145 extern void vsw_vlan_remove_ids(void *arg, int type); 146 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 147 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 148 mblk_t **npt); 149 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 150 extern void vsw_hio_cleanup(vsw_t *vswp); 151 extern void vsw_hio_start_ports(vsw_t *vswp); 152 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 153 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 154 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 155 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid, 156 vsw_vlanid_t *new_vids, int new_nvids); 157 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 158 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 159 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans, 160 uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids); 161 extern void vsw_reset_ports(vsw_t *vswp); 162 extern void vsw_port_reset(vsw_port_t *portp); 163 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 164 165 /* 166 * Internal tunables. 167 */ 168 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 169 int vsw_wretries = 100; /* # of write attempts */ 170 int vsw_desc_delay = 0; /* delay in us */ 171 int vsw_read_attempts = 5; /* # of reads of descriptor */ 172 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 173 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ 174 /* 300*3 = 900sec(15min) of max tmout */ 175 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 176 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 177 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 178 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 179 180 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 181 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 182 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 183 184 /* sw timeout for boot delay only, in milliseconds */ 185 int vsw_setup_switching_boot_delay = 100 * MILLISEC; 186 187 /* delay in usec to wait for all references on a fdb entry to be dropped */ 188 uint32_t vsw_fdbe_refcnt_delay = 10; 189 190 /* 191 * Default vlan id. This is only used internally when the "default-vlan-id" 192 * property is not present in the MD device node. Therefore, this should not be 193 * used as a tunable; if this value is changed, the corresponding variable 194 * should be updated to the same value in all vnets connected to this vsw. 195 */ 196 uint16_t vsw_default_vlan_id = 1; 197 198 /* 199 * Workaround for a version handshake bug in obp's vnet. 200 * If vsw initiates version negotiation starting from the highest version, 201 * obp sends a nack and terminates version handshake. To workaround 202 * this, we do not initiate version handshake when the channel comes up. 203 * Instead, we wait for the peer to send its version info msg and go through 204 * the version protocol exchange. If we successfully negotiate a version, 205 * before sending the ack, we send our version info msg to the peer 206 * using the <major,minor> version that we are about to ack. 207 */ 208 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 209 210 /* 211 * In the absence of "priority-ether-types" property in MD, the following 212 * internal tunable can be set to specify a single priority ethertype. 213 */ 214 uint64_t vsw_pri_eth_type = 0; 215 216 /* 217 * Number of transmit priority buffers that are preallocated per device. 218 * This number is chosen to be a small value to throttle transmission 219 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 220 */ 221 uint32_t vsw_pri_tx_nmblks = 64; 222 223 /* 224 * Number of RARP packets sent to announce macaddr to the physical switch, 225 * after vsw's physical device is changed dynamically or after a guest (client 226 * vnet) is live migrated in. 227 */ 228 uint32_t vsw_publish_macaddr_count = 3; 229 230 boolean_t vsw_hio_enabled = B_TRUE; /* Enable/disable HybridIO */ 231 int vsw_hio_max_cleanup_retries = 10; /* Max retries for HybridIO cleanp */ 232 int vsw_hio_cleanup_delay = 10000; /* 10ms */ 233 234 /* Number of transmit descriptors - must be power of 2 */ 235 uint32_t vsw_ntxds = VSW_RING_NUM_EL; 236 237 /* 238 * Max number of mblks received in one receive operation. 239 */ 240 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 241 242 /* 243 * Internal tunables for receive buffer pools, that is, the size and number of 244 * mblks for each pool. At least 3 sizes must be specified if these are used. 245 * The sizes must be specified in increasing order. Non-zero value of the first 246 * size will be used as a hint to use these values instead of the algorithm 247 * that determines the sizes based on MTU. 248 */ 249 uint32_t vsw_mblk_size1 = 0; 250 uint32_t vsw_mblk_size2 = 0; 251 uint32_t vsw_mblk_size3 = 0; 252 uint32_t vsw_mblk_size4 = 0; 253 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 254 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 255 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 256 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS; /* number of mblks for pool4 */ 257 258 /* 259 * Set this to non-zero to enable additional internal receive buffer pools 260 * based on the MTU of the device for better performance at the cost of more 261 * memory consumption. This is turned off by default, to use allocb(9F) for 262 * receive buffer allocations of sizes > 2K. 263 */ 264 boolean_t vsw_jumbo_rxpools = B_FALSE; 265 266 /* 267 * vsw_max_tx_qcount is the maximum # of packets that can be queued 268 * before the tx worker thread begins processing the queue. Its value 269 * is chosen to be 4x the default length of tx descriptor ring. 270 */ 271 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL; 272 273 /* 274 * MAC callbacks 275 */ 276 static mac_callbacks_t vsw_m_callbacks = { 277 0, 278 vsw_m_stat, 279 vsw_m_start, 280 vsw_m_stop, 281 vsw_m_promisc, 282 vsw_m_multicst, 283 vsw_m_unicst, 284 vsw_m_tx, 285 NULL, 286 NULL, 287 NULL 288 }; 289 290 static struct cb_ops vsw_cb_ops = { 291 nulldev, /* cb_open */ 292 nulldev, /* cb_close */ 293 nodev, /* cb_strategy */ 294 nodev, /* cb_print */ 295 nodev, /* cb_dump */ 296 nodev, /* cb_read */ 297 nodev, /* cb_write */ 298 nodev, /* cb_ioctl */ 299 nodev, /* cb_devmap */ 300 nodev, /* cb_mmap */ 301 nodev, /* cb_segmap */ 302 nochpoll, /* cb_chpoll */ 303 ddi_prop_op, /* cb_prop_op */ 304 NULL, /* cb_stream */ 305 D_MP, /* cb_flag */ 306 CB_REV, /* rev */ 307 nodev, /* int (*cb_aread)() */ 308 nodev /* int (*cb_awrite)() */ 309 }; 310 311 static struct dev_ops vsw_ops = { 312 DEVO_REV, /* devo_rev */ 313 0, /* devo_refcnt */ 314 NULL, /* devo_getinfo */ 315 nulldev, /* devo_identify */ 316 nulldev, /* devo_probe */ 317 vsw_attach, /* devo_attach */ 318 vsw_detach, /* devo_detach */ 319 nodev, /* devo_reset */ 320 &vsw_cb_ops, /* devo_cb_ops */ 321 (struct bus_ops *)NULL, /* devo_bus_ops */ 322 ddi_power /* devo_power */ 323 }; 324 325 extern struct mod_ops mod_driverops; 326 static struct modldrv vswmodldrv = { 327 &mod_driverops, 328 "sun4v Virtual Switch", 329 &vsw_ops, 330 }; 331 332 #define LDC_ENTER_LOCK(ldcp) \ 333 mutex_enter(&((ldcp)->ldc_cblock));\ 334 mutex_enter(&((ldcp)->ldc_rxlock));\ 335 mutex_enter(&((ldcp)->ldc_txlock)); 336 #define LDC_EXIT_LOCK(ldcp) \ 337 mutex_exit(&((ldcp)->ldc_txlock));\ 338 mutex_exit(&((ldcp)->ldc_rxlock));\ 339 mutex_exit(&((ldcp)->ldc_cblock)); 340 341 /* Driver soft state ptr */ 342 static void *vsw_state; 343 344 /* 345 * Linked list of "vsw_t" structures - one per instance. 346 */ 347 vsw_t *vsw_head = NULL; 348 krwlock_t vsw_rw; 349 350 /* 351 * Property names 352 */ 353 static char vdev_propname[] = "virtual-device"; 354 static char vsw_propname[] = "virtual-network-switch"; 355 static char physdev_propname[] = "vsw-phys-dev"; 356 static char smode_propname[] = "vsw-switch-mode"; 357 static char macaddr_propname[] = "local-mac-address"; 358 static char remaddr_propname[] = "remote-mac-address"; 359 static char ldcids_propname[] = "ldc-ids"; 360 static char chan_propname[] = "channel-endpoint"; 361 static char id_propname[] = "id"; 362 static char reg_propname[] = "reg"; 363 static char pri_types_propname[] = "priority-ether-types"; 364 static char vsw_pvid_propname[] = "port-vlan-id"; 365 static char vsw_vid_propname[] = "vlan-id"; 366 static char vsw_dvid_propname[] = "default-vlan-id"; 367 static char port_pvid_propname[] = "remote-port-vlan-id"; 368 static char port_vid_propname[] = "remote-vlan-id"; 369 static char hybrid_propname[] = "hybrid"; 370 static char vsw_mtu_propname[] = "mtu"; 371 372 /* 373 * Matching criteria passed to the MDEG to register interest 374 * in changes to 'virtual-device-port' nodes identified by their 375 * 'id' property. 376 */ 377 static md_prop_match_t vport_prop_match[] = { 378 { MDET_PROP_VAL, "id" }, 379 { MDET_LIST_END, NULL } 380 }; 381 382 static mdeg_node_match_t vport_match = { "virtual-device-port", 383 vport_prop_match }; 384 385 /* 386 * Matching criteria passed to the MDEG to register interest 387 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 388 * by their 'name' and 'cfg-handle' properties. 389 */ 390 static md_prop_match_t vdev_prop_match[] = { 391 { MDET_PROP_STR, "name" }, 392 { MDET_PROP_VAL, "cfg-handle" }, 393 { MDET_LIST_END, NULL } 394 }; 395 396 static mdeg_node_match_t vdev_match = { "virtual-device", 397 vdev_prop_match }; 398 399 400 /* 401 * Specification of an MD node passed to the MDEG to filter any 402 * 'vport' nodes that do not belong to the specified node. This 403 * template is copied for each vsw instance and filled in with 404 * the appropriate 'cfg-handle' value before being passed to the MDEG. 405 */ 406 static mdeg_prop_spec_t vsw_prop_template[] = { 407 { MDET_PROP_STR, "name", vsw_propname }, 408 { MDET_PROP_VAL, "cfg-handle", NULL }, 409 { MDET_LIST_END, NULL, NULL } 410 }; 411 412 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 413 414 #ifdef DEBUG 415 /* 416 * Print debug messages - set to 0x1f to enable all msgs 417 * or 0x0 to turn all off. 418 */ 419 int vswdbg = 0x0; 420 421 /* 422 * debug levels: 423 * 0x01: Function entry/exit tracing 424 * 0x02: Internal function messages 425 * 0x04: Verbose internal messages 426 * 0x08: Warning messages 427 * 0x10: Error messages 428 */ 429 430 void 431 vswdebug(vsw_t *vswp, const char *fmt, ...) 432 { 433 char buf[512]; 434 va_list ap; 435 436 va_start(ap, fmt); 437 (void) vsprintf(buf, fmt, ap); 438 va_end(ap); 439 440 if (vswp == NULL) 441 cmn_err(CE_CONT, "%s\n", buf); 442 else 443 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 444 } 445 446 #endif /* DEBUG */ 447 448 static struct modlinkage modlinkage = { 449 MODREV_1, 450 &vswmodldrv, 451 NULL 452 }; 453 454 int 455 _init(void) 456 { 457 int status; 458 459 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 460 461 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 462 if (status != 0) { 463 return (status); 464 } 465 466 mac_init_ops(&vsw_ops, DRV_NAME); 467 status = mod_install(&modlinkage); 468 if (status != 0) { 469 ddi_soft_state_fini(&vsw_state); 470 } 471 return (status); 472 } 473 474 int 475 _fini(void) 476 { 477 int status; 478 479 status = mod_remove(&modlinkage); 480 if (status != 0) 481 return (status); 482 mac_fini_ops(&vsw_ops); 483 ddi_soft_state_fini(&vsw_state); 484 485 rw_destroy(&vsw_rw); 486 487 return (status); 488 } 489 490 int 491 _info(struct modinfo *modinfop) 492 { 493 return (mod_info(&modlinkage, modinfop)); 494 } 495 496 static int 497 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 498 { 499 vsw_t *vswp; 500 int instance; 501 char hashname[MAXNAMELEN]; 502 char qname[TASKQ_NAMELEN]; 503 enum { PROG_init = 0x00, 504 PROG_locks = 0x01, 505 PROG_readmd = 0x02, 506 PROG_fdb = 0x04, 507 PROG_mfdb = 0x08, 508 PROG_taskq = 0x10, 509 PROG_swmode = 0x20, 510 PROG_macreg = 0x40, 511 PROG_mdreg = 0x80} 512 progress; 513 514 progress = PROG_init; 515 int rv; 516 517 switch (cmd) { 518 case DDI_ATTACH: 519 break; 520 case DDI_RESUME: 521 /* nothing to do for this non-device */ 522 return (DDI_SUCCESS); 523 case DDI_PM_RESUME: 524 default: 525 return (DDI_FAILURE); 526 } 527 528 instance = ddi_get_instance(dip); 529 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 530 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 531 return (DDI_FAILURE); 532 } 533 vswp = ddi_get_soft_state(vsw_state, instance); 534 535 if (vswp == NULL) { 536 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 537 goto vsw_attach_fail; 538 } 539 540 vswp->dip = dip; 541 vswp->instance = instance; 542 ddi_set_driver_private(dip, (caddr_t)vswp); 543 544 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 545 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 546 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 547 rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL); 548 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 549 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 550 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 551 552 progress |= PROG_locks; 553 554 rv = vsw_read_mdprops(vswp); 555 if (rv != 0) 556 goto vsw_attach_fail; 557 558 progress |= PROG_readmd; 559 560 /* setup the unicast forwarding database */ 561 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 562 vswp->instance); 563 D2(vswp, "creating unicast hash table (%s)...", hashname); 564 vswp->fdb_nchains = vsw_fdb_nchains; 565 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 566 mod_hash_null_valdtor, sizeof (void *)); 567 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 568 progress |= PROG_fdb; 569 570 /* setup the multicast fowarding database */ 571 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 572 vswp->instance); 573 D2(vswp, "creating multicast hash table %s)...", hashname); 574 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 575 mod_hash_null_valdtor, sizeof (void *)); 576 577 progress |= PROG_mfdb; 578 579 /* 580 * Create the taskq which will process all the VIO 581 * control messages. 582 */ 583 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 584 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 585 TASKQ_DEFAULTPRI, 0)) == NULL) { 586 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 587 vswp->instance); 588 goto vsw_attach_fail; 589 } 590 591 progress |= PROG_taskq; 592 593 /* prevent auto-detaching */ 594 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 595 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 596 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 597 "instance %u", DDI_NO_AUTODETACH, instance); 598 } 599 600 /* 601 * The null switching function is set to avoid panic until 602 * switch mode is setup. 603 */ 604 vswp->vsw_switch_frame = vsw_switch_frame_nop; 605 606 /* 607 * Setup the required switching mode, 608 * based on the mdprops that we read earlier. 609 * schedule a short timeout (0.1 sec) for the first time 610 * setup and avoid calling mac_open() directly here, 611 * others are regular timeout 3 secs. 612 */ 613 mutex_enter(&vswp->swtmout_lock); 614 615 vswp->swtmout_enabled = B_TRUE; 616 vswp->swtmout_id = timeout(vsw_setup_switching_timeout, vswp, 617 drv_usectohz(vsw_setup_switching_boot_delay)); 618 619 mutex_exit(&vswp->swtmout_lock); 620 621 progress |= PROG_swmode; 622 623 /* Register with mac layer as a provider */ 624 rv = vsw_mac_register(vswp); 625 if (rv != 0) 626 goto vsw_attach_fail; 627 628 progress |= PROG_macreg; 629 630 /* 631 * Now we have everything setup, register an interest in 632 * specific MD nodes. 633 * 634 * The callback is invoked in 2 cases, firstly if upon mdeg 635 * registration there are existing nodes which match our specified 636 * criteria, and secondly if the MD is changed (and again, there 637 * are nodes which we are interested in present within it. Note 638 * that our callback will be invoked even if our specified nodes 639 * have not actually changed). 640 * 641 */ 642 rv = vsw_mdeg_register(vswp); 643 if (rv != 0) 644 goto vsw_attach_fail; 645 646 progress |= PROG_mdreg; 647 648 WRITE_ENTER(&vsw_rw); 649 vswp->next = vsw_head; 650 vsw_head = vswp; 651 RW_EXIT(&vsw_rw); 652 653 ddi_report_dev(vswp->dip); 654 return (DDI_SUCCESS); 655 656 vsw_attach_fail: 657 DERR(NULL, "vsw_attach: failed"); 658 659 if (progress & PROG_mdreg) { 660 vsw_mdeg_unregister(vswp); 661 (void) vsw_detach_ports(vswp); 662 } 663 664 if (progress & PROG_macreg) 665 (void) vsw_mac_unregister(vswp); 666 667 if (progress & PROG_swmode) { 668 vsw_stop_switching_timeout(vswp); 669 vsw_hio_cleanup(vswp); 670 mutex_enter(&vswp->mac_lock); 671 vsw_mac_close(vswp); 672 mutex_exit(&vswp->mac_lock); 673 } 674 675 if (progress & PROG_taskq) 676 ddi_taskq_destroy(vswp->taskq_p); 677 678 if (progress & PROG_mfdb) 679 mod_hash_destroy_hash(vswp->mfdb); 680 681 if (progress & PROG_fdb) { 682 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 683 mod_hash_destroy_hash(vswp->fdb_hashp); 684 } 685 686 if (progress & PROG_readmd) { 687 if (VSW_PRI_ETH_DEFINED(vswp)) { 688 kmem_free(vswp->pri_types, 689 sizeof (uint16_t) * vswp->pri_num_types); 690 } 691 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 692 } 693 694 if (progress & PROG_locks) { 695 rw_destroy(&vswp->plist.lockrw); 696 rw_destroy(&vswp->mfdbrw); 697 rw_destroy(&vswp->if_lockrw); 698 rw_destroy(&vswp->maccl_rwlock); 699 mutex_destroy(&vswp->swtmout_lock); 700 mutex_destroy(&vswp->mca_lock); 701 mutex_destroy(&vswp->mac_lock); 702 } 703 704 ddi_soft_state_free(vsw_state, instance); 705 return (DDI_FAILURE); 706 } 707 708 static int 709 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 710 { 711 vio_mblk_pool_t *poolp, *npoolp; 712 vsw_t **vswpp, *vswp; 713 int instance; 714 715 instance = ddi_get_instance(dip); 716 vswp = ddi_get_soft_state(vsw_state, instance); 717 718 if (vswp == NULL) { 719 return (DDI_FAILURE); 720 } 721 722 switch (cmd) { 723 case DDI_DETACH: 724 break; 725 case DDI_SUSPEND: 726 case DDI_PM_SUSPEND: 727 default: 728 return (DDI_FAILURE); 729 } 730 731 D2(vswp, "detaching instance %d", instance); 732 733 /* Stop any pending timeout to setup switching mode. */ 734 vsw_stop_switching_timeout(vswp); 735 736 /* Cleanup the interface's mac client */ 737 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); 738 739 if (vswp->if_state & VSW_IF_REG) { 740 if (vsw_mac_unregister(vswp) != 0) { 741 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 742 "MAC layer", vswp->instance); 743 return (DDI_FAILURE); 744 } 745 } 746 747 vsw_mdeg_unregister(vswp); 748 749 /* cleanup HybridIO */ 750 vsw_hio_cleanup(vswp); 751 752 if (vsw_detach_ports(vswp) != 0) { 753 cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports", 754 vswp->instance); 755 return (DDI_FAILURE); 756 } 757 758 rw_destroy(&vswp->if_lockrw); 759 760 vsw_mac_cleanup_ports(vswp); 761 762 /* 763 * Now that the ports have been deleted, stop and close 764 * the physical device. 765 */ 766 mutex_enter(&vswp->mac_lock); 767 vsw_mac_close(vswp); 768 mutex_exit(&vswp->mac_lock); 769 770 mutex_destroy(&vswp->mac_lock); 771 mutex_destroy(&vswp->swtmout_lock); 772 rw_destroy(&vswp->maccl_rwlock); 773 774 /* 775 * Destroy any free pools that may still exist. 776 */ 777 poolp = vswp->rxh; 778 while (poolp != NULL) { 779 npoolp = vswp->rxh = poolp->nextp; 780 if (vio_destroy_mblks(poolp) != 0) { 781 vswp->rxh = poolp; 782 return (DDI_FAILURE); 783 } 784 poolp = npoolp; 785 } 786 787 /* 788 * Remove this instance from any entries it may be on in 789 * the hash table by using the list of addresses maintained 790 * in the vsw_t structure. 791 */ 792 vsw_del_mcst_vsw(vswp); 793 794 vswp->mcap = NULL; 795 mutex_destroy(&vswp->mca_lock); 796 797 /* 798 * By now any pending tasks have finished and the underlying 799 * ldc's have been destroyed, so its safe to delete the control 800 * message taskq. 801 */ 802 if (vswp->taskq_p != NULL) 803 ddi_taskq_destroy(vswp->taskq_p); 804 805 /* 806 * At this stage all the data pointers in the hash table 807 * should be NULL, as all the ports have been removed and will 808 * have deleted themselves from the port lists which the data 809 * pointers point to. Hence we can destroy the table using the 810 * default destructors. 811 */ 812 D2(vswp, "vsw_detach: destroying hash tables.."); 813 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 814 mod_hash_destroy_hash(vswp->fdb_hashp); 815 vswp->fdb_hashp = NULL; 816 817 WRITE_ENTER(&vswp->mfdbrw); 818 mod_hash_destroy_hash(vswp->mfdb); 819 vswp->mfdb = NULL; 820 RW_EXIT(&vswp->mfdbrw); 821 rw_destroy(&vswp->mfdbrw); 822 823 /* free pri_types table */ 824 if (VSW_PRI_ETH_DEFINED(vswp)) { 825 kmem_free(vswp->pri_types, 826 sizeof (uint16_t) * vswp->pri_num_types); 827 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 828 } 829 830 ddi_remove_minor_node(dip, NULL); 831 832 rw_destroy(&vswp->plist.lockrw); 833 WRITE_ENTER(&vsw_rw); 834 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 835 if (*vswpp == vswp) { 836 *vswpp = vswp->next; 837 break; 838 } 839 } 840 RW_EXIT(&vsw_rw); 841 ddi_soft_state_free(vsw_state, instance); 842 843 return (DDI_SUCCESS); 844 } 845 846 /* 847 * Get the value of the "vsw-phys-dev" property in the specified 848 * node. This property is the name of the physical device that 849 * the virtual switch will use to talk to the outside world. 850 * 851 * Note it is valid for this property to be NULL (but the property 852 * itself must exist). Callers of this routine should verify that 853 * the value returned is what they expected (i.e. either NULL or non NULL). 854 * 855 * On success returns value of the property in region pointed to by 856 * the 'name' argument, and with return value of 0. Otherwise returns 1. 857 */ 858 static int 859 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 860 { 861 int len = 0; 862 int instance; 863 char *physname = NULL; 864 char *dev; 865 const char *dev_name; 866 char myname[MAXNAMELEN]; 867 868 dev_name = ddi_driver_name(vswp->dip); 869 instance = ddi_get_instance(vswp->dip); 870 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 871 872 if (md_get_prop_data(mdp, node, physdev_propname, 873 (uint8_t **)(&physname), &len) != 0) { 874 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 875 "device(s) from MD", vswp->instance); 876 return (1); 877 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 878 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 879 vswp->instance, physname); 880 return (1); 881 } else if (strcmp(myname, physname) == 0) { 882 /* 883 * Prevent the vswitch from opening itself as the 884 * network device. 885 */ 886 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 887 vswp->instance, physname); 888 return (1); 889 } else { 890 (void) strncpy(name, physname, strlen(physname) + 1); 891 D2(vswp, "%s: using first device specified (%s)", 892 __func__, physname); 893 } 894 895 #ifdef DEBUG 896 /* 897 * As a temporary measure to aid testing we check to see if there 898 * is a vsw.conf file present. If there is we use the value of the 899 * vsw_physname property in the file as the name of the physical 900 * device, overriding the value from the MD. 901 * 902 * There may be multiple devices listed, but for the moment 903 * we just use the first one. 904 */ 905 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 906 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 907 if ((strlen(dev) + 1) > LIFNAMSIZ) { 908 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 909 vswp->instance, dev); 910 ddi_prop_free(dev); 911 return (1); 912 } else { 913 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 914 "config file", vswp->instance, dev); 915 916 (void) strncpy(name, dev, strlen(dev) + 1); 917 } 918 919 ddi_prop_free(dev); 920 } 921 #endif 922 923 return (0); 924 } 925 926 /* 927 * Read the 'vsw-switch-mode' property from the specified MD node. 928 * 929 * Returns 0 on success, otherwise returns 1. 930 */ 931 static int 932 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode) 933 { 934 int len = 0; 935 char *smode = NULL; 936 char *curr_mode = NULL; 937 938 D1(vswp, "%s: enter", __func__); 939 940 /* 941 * Get the switch-mode property. The modes are listed in 942 * decreasing order of preference, i.e. prefered mode is 943 * first item in list. 944 */ 945 len = 0; 946 if (md_get_prop_data(mdp, node, smode_propname, 947 (uint8_t **)(&smode), &len) != 0) { 948 /* 949 * Unable to get switch-mode property from MD, nothing 950 * more we can do. 951 */ 952 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 953 " from the MD", vswp->instance); 954 return (1); 955 } 956 957 curr_mode = smode; 958 /* 959 * Modes of operation: 960 * 'switched' - layer 2 switching, underlying HW in 961 * programmed mode. 962 * 'promiscuous' - layer 2 switching, underlying HW in 963 * promiscuous mode. 964 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 965 * in non-promiscuous mode. 966 */ 967 while (curr_mode < (smode + len)) { 968 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 969 if (strcmp(curr_mode, "switched") == 0) { 970 *mode = VSW_LAYER2; 971 } else if (strcmp(curr_mode, "promiscuous") == 0) { 972 *mode = VSW_LAYER2 | VSW_LAYER2_PROMISC; 973 } else if (strcmp(curr_mode, "routed") == 0) { 974 *mode = VSW_LAYER3; 975 } else { 976 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 977 "setting to default switched mode", 978 vswp->instance, curr_mode); 979 *mode = VSW_LAYER2; 980 } 981 curr_mode += strlen(curr_mode) + 1; 982 } 983 984 D2(vswp, "%s: %d mode", __func__, *mode); 985 986 D1(vswp, "%s: exit", __func__); 987 988 return (0); 989 } 990 991 /* 992 * Register with the MAC layer as a network device, so we 993 * can be plumbed if necessary. 994 */ 995 static int 996 vsw_mac_register(vsw_t *vswp) 997 { 998 mac_register_t *macp; 999 int rv; 1000 1001 D1(vswp, "%s: enter", __func__); 1002 1003 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1004 return (EINVAL); 1005 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1006 macp->m_driver = vswp; 1007 macp->m_dip = vswp->dip; 1008 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1009 macp->m_callbacks = &vsw_m_callbacks; 1010 macp->m_min_sdu = 0; 1011 macp->m_max_sdu = vswp->mtu; 1012 macp->m_margin = VLAN_TAGSZ; 1013 rv = mac_register(macp, &vswp->if_mh); 1014 mac_free(macp); 1015 if (rv != 0) { 1016 /* 1017 * Treat this as a non-fatal error as we may be 1018 * able to operate in some other mode. 1019 */ 1020 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1021 "a provider with MAC layer", vswp->instance); 1022 return (rv); 1023 } 1024 1025 vswp->if_state |= VSW_IF_REG; 1026 1027 D1(vswp, "%s: exit", __func__); 1028 1029 return (rv); 1030 } 1031 1032 static int 1033 vsw_mac_unregister(vsw_t *vswp) 1034 { 1035 int rv = 0; 1036 1037 D1(vswp, "%s: enter", __func__); 1038 1039 WRITE_ENTER(&vswp->if_lockrw); 1040 1041 if (vswp->if_state & VSW_IF_REG) { 1042 rv = mac_unregister(vswp->if_mh); 1043 if (rv != 0) { 1044 DWARN(vswp, "%s: unable to unregister from MAC " 1045 "framework", __func__); 1046 1047 RW_EXIT(&vswp->if_lockrw); 1048 D1(vswp, "%s: fail exit", __func__); 1049 return (rv); 1050 } 1051 1052 /* mark i/f as down and unregistered */ 1053 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1054 } 1055 RW_EXIT(&vswp->if_lockrw); 1056 1057 D1(vswp, "%s: exit", __func__); 1058 1059 return (rv); 1060 } 1061 1062 static int 1063 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1064 { 1065 vsw_t *vswp = (vsw_t *)arg; 1066 1067 D1(vswp, "%s: enter", __func__); 1068 1069 mutex_enter(&vswp->mac_lock); 1070 if (vswp->mh == NULL) { 1071 mutex_exit(&vswp->mac_lock); 1072 return (EINVAL); 1073 } 1074 1075 /* return stats from underlying device */ 1076 *val = mac_stat_get(vswp->mh, stat); 1077 1078 mutex_exit(&vswp->mac_lock); 1079 1080 return (0); 1081 } 1082 1083 static void 1084 vsw_m_stop(void *arg) 1085 { 1086 vsw_t *vswp = (vsw_t *)arg; 1087 1088 D1(vswp, "%s: enter", __func__); 1089 1090 WRITE_ENTER(&vswp->if_lockrw); 1091 vswp->if_state &= ~VSW_IF_UP; 1092 RW_EXIT(&vswp->if_lockrw); 1093 1094 /* Cleanup and close the mac client */ 1095 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); 1096 1097 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1098 } 1099 1100 static int 1101 vsw_m_start(void *arg) 1102 { 1103 int rv; 1104 vsw_t *vswp = (vsw_t *)arg; 1105 1106 D1(vswp, "%s: enter", __func__); 1107 1108 WRITE_ENTER(&vswp->if_lockrw); 1109 1110 vswp->if_state |= VSW_IF_UP; 1111 1112 if (vswp->switching_setup_done == B_FALSE) { 1113 /* 1114 * If the switching mode has not been setup yet, just 1115 * return. The unicast address will be programmed 1116 * after the physical device is successfully setup by the 1117 * timeout handler. 1118 */ 1119 RW_EXIT(&vswp->if_lockrw); 1120 return (0); 1121 } 1122 1123 /* if in layer2 mode, program unicast address. */ 1124 if (vswp->mh != NULL) { 1125 /* Init a mac client and program addresses */ 1126 rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV); 1127 if (rv != 0) { 1128 cmn_err(CE_NOTE, 1129 "!vsw%d: failed to program interface " 1130 "unicast address\n", vswp->instance); 1131 } 1132 } 1133 1134 RW_EXIT(&vswp->if_lockrw); 1135 1136 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1137 return (0); 1138 } 1139 1140 /* 1141 * Change the local interface address. 1142 * 1143 * Note: we don't support this entry point. The local 1144 * mac address of the switch can only be changed via its 1145 * MD node properties. 1146 */ 1147 static int 1148 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1149 { 1150 _NOTE(ARGUNUSED(arg, macaddr)) 1151 1152 return (DDI_FAILURE); 1153 } 1154 1155 static int 1156 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1157 { 1158 vsw_t *vswp = (vsw_t *)arg; 1159 mcst_addr_t *mcst_p = NULL; 1160 uint64_t addr = 0x0; 1161 int i, ret = 0; 1162 1163 D1(vswp, "%s: enter", __func__); 1164 1165 /* 1166 * Convert address into form that can be used 1167 * as hash table key. 1168 */ 1169 for (i = 0; i < ETHERADDRL; i++) { 1170 addr = (addr << 8) | mca[i]; 1171 } 1172 1173 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1174 1175 if (add) { 1176 D2(vswp, "%s: adding multicast", __func__); 1177 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1178 /* 1179 * Update the list of multicast addresses 1180 * contained within the vsw_t structure to 1181 * include this new one. 1182 */ 1183 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1184 if (mcst_p == NULL) { 1185 DERR(vswp, "%s unable to alloc mem", __func__); 1186 (void) vsw_del_mcst(vswp, 1187 VSW_LOCALDEV, addr, NULL); 1188 return (1); 1189 } 1190 mcst_p->addr = addr; 1191 ether_copy(mca, &mcst_p->mca); 1192 1193 /* 1194 * Call into the underlying driver to program the 1195 * address into HW. 1196 */ 1197 ret = vsw_mac_multicast_add(vswp, NULL, mcst_p, 1198 VSW_LOCALDEV); 1199 if (ret != 0) { 1200 (void) vsw_del_mcst(vswp, 1201 VSW_LOCALDEV, addr, NULL); 1202 kmem_free(mcst_p, sizeof (*mcst_p)); 1203 return (ret); 1204 } 1205 1206 mutex_enter(&vswp->mca_lock); 1207 mcst_p->nextp = vswp->mcap; 1208 vswp->mcap = mcst_p; 1209 mutex_exit(&vswp->mca_lock); 1210 } else { 1211 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 1212 "address", vswp->instance); 1213 } 1214 return (ret); 1215 } 1216 1217 D2(vswp, "%s: removing multicast", __func__); 1218 /* 1219 * Remove the address from the hash table.. 1220 */ 1221 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1222 1223 /* 1224 * ..and then from the list maintained in the 1225 * vsw_t structure. 1226 */ 1227 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1228 ASSERT(mcst_p != NULL); 1229 1230 vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV); 1231 kmem_free(mcst_p, sizeof (*mcst_p)); 1232 } 1233 1234 D1(vswp, "%s: exit", __func__); 1235 1236 return (0); 1237 } 1238 1239 static int 1240 vsw_m_promisc(void *arg, boolean_t on) 1241 { 1242 vsw_t *vswp = (vsw_t *)arg; 1243 1244 D1(vswp, "%s: enter", __func__); 1245 1246 WRITE_ENTER(&vswp->if_lockrw); 1247 if (on) 1248 vswp->if_state |= VSW_IF_PROMISC; 1249 else 1250 vswp->if_state &= ~VSW_IF_PROMISC; 1251 RW_EXIT(&vswp->if_lockrw); 1252 1253 D1(vswp, "%s: exit", __func__); 1254 1255 return (0); 1256 } 1257 1258 static mblk_t * 1259 vsw_m_tx(void *arg, mblk_t *mp) 1260 { 1261 vsw_t *vswp = (vsw_t *)arg; 1262 1263 D1(vswp, "%s: enter", __func__); 1264 1265 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1266 1267 if (mp == NULL) { 1268 return (NULL); 1269 } 1270 1271 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1272 1273 D1(vswp, "%s: exit", __func__); 1274 1275 return (NULL); 1276 } 1277 1278 /* 1279 * Register for machine description (MD) updates. 1280 * 1281 * Returns 0 on success, 1 on failure. 1282 */ 1283 static int 1284 vsw_mdeg_register(vsw_t *vswp) 1285 { 1286 mdeg_prop_spec_t *pspecp; 1287 mdeg_node_spec_t *inst_specp; 1288 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1289 size_t templatesz; 1290 int rv; 1291 1292 D1(vswp, "%s: enter", __func__); 1293 1294 /* 1295 * Allocate and initialize a per-instance copy 1296 * of the global property spec array that will 1297 * uniquely identify this vsw instance. 1298 */ 1299 templatesz = sizeof (vsw_prop_template); 1300 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1301 1302 bcopy(vsw_prop_template, pspecp, templatesz); 1303 1304 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1305 1306 /* initialize the complete prop spec structure */ 1307 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1308 inst_specp->namep = "virtual-device"; 1309 inst_specp->specp = pspecp; 1310 1311 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1312 vswp->regprop); 1313 /* 1314 * Register an interest in 'virtual-device' nodes with a 1315 * 'name' property of 'virtual-network-switch' 1316 */ 1317 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1318 (void *)vswp, &mdeg_hdl); 1319 if (rv != MDEG_SUCCESS) { 1320 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1321 __func__, rv); 1322 goto mdeg_reg_fail; 1323 } 1324 1325 /* 1326 * Register an interest in 'vsw-port' nodes. 1327 */ 1328 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1329 (void *)vswp, &mdeg_port_hdl); 1330 if (rv != MDEG_SUCCESS) { 1331 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1332 (void) mdeg_unregister(mdeg_hdl); 1333 goto mdeg_reg_fail; 1334 } 1335 1336 /* save off data that will be needed later */ 1337 vswp->inst_spec = inst_specp; 1338 vswp->mdeg_hdl = mdeg_hdl; 1339 vswp->mdeg_port_hdl = mdeg_port_hdl; 1340 1341 D1(vswp, "%s: exit", __func__); 1342 return (0); 1343 1344 mdeg_reg_fail: 1345 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1346 vswp->instance); 1347 kmem_free(pspecp, templatesz); 1348 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1349 1350 vswp->mdeg_hdl = NULL; 1351 vswp->mdeg_port_hdl = NULL; 1352 1353 return (1); 1354 } 1355 1356 static void 1357 vsw_mdeg_unregister(vsw_t *vswp) 1358 { 1359 D1(vswp, "vsw_mdeg_unregister: enter"); 1360 1361 if (vswp->mdeg_hdl != NULL) 1362 (void) mdeg_unregister(vswp->mdeg_hdl); 1363 1364 if (vswp->mdeg_port_hdl != NULL) 1365 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1366 1367 if (vswp->inst_spec != NULL) { 1368 if (vswp->inst_spec->specp != NULL) { 1369 (void) kmem_free(vswp->inst_spec->specp, 1370 sizeof (vsw_prop_template)); 1371 vswp->inst_spec->specp = NULL; 1372 } 1373 1374 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1375 vswp->inst_spec = NULL; 1376 } 1377 1378 D1(vswp, "vsw_mdeg_unregister: exit"); 1379 } 1380 1381 /* 1382 * Mdeg callback invoked for the vsw node itself. 1383 */ 1384 static int 1385 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1386 { 1387 vsw_t *vswp; 1388 md_t *mdp; 1389 mde_cookie_t node; 1390 uint64_t inst; 1391 char *node_name = NULL; 1392 1393 if (resp == NULL) 1394 return (MDEG_FAILURE); 1395 1396 vswp = (vsw_t *)cb_argp; 1397 1398 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1399 " : prev matched %d", __func__, resp->added.nelem, 1400 resp->removed.nelem, resp->match_curr.nelem, 1401 resp->match_prev.nelem); 1402 1403 /* 1404 * We get an initial callback for this node as 'added' 1405 * after registering with mdeg. Note that we would have 1406 * already gathered information about this vsw node by 1407 * walking MD earlier during attach (in vsw_read_mdprops()). 1408 * So, there is a window where the properties of this 1409 * node might have changed when we get this initial 'added' 1410 * callback. We handle this as if an update occured 1411 * and invoke the same function which handles updates to 1412 * the properties of this vsw-node if any. 1413 * 1414 * A non-zero 'match' value indicates that the MD has been 1415 * updated and that a virtual-network-switch node is 1416 * present which may or may not have been updated. It is 1417 * up to the clients to examine their own nodes and 1418 * determine if they have changed. 1419 */ 1420 if (resp->added.nelem != 0) { 1421 1422 if (resp->added.nelem != 1) { 1423 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1424 "invalid: %d\n", vswp->instance, resp->added.nelem); 1425 return (MDEG_FAILURE); 1426 } 1427 1428 mdp = resp->added.mdp; 1429 node = resp->added.mdep[0]; 1430 1431 } else if (resp->match_curr.nelem != 0) { 1432 1433 if (resp->match_curr.nelem != 1) { 1434 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1435 "invalid: %d\n", vswp->instance, 1436 resp->match_curr.nelem); 1437 return (MDEG_FAILURE); 1438 } 1439 1440 mdp = resp->match_curr.mdp; 1441 node = resp->match_curr.mdep[0]; 1442 1443 } else { 1444 return (MDEG_FAILURE); 1445 } 1446 1447 /* Validate name and instance */ 1448 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1449 DERR(vswp, "%s: unable to get node name\n", __func__); 1450 return (MDEG_FAILURE); 1451 } 1452 1453 /* is this a virtual-network-switch? */ 1454 if (strcmp(node_name, vsw_propname) != 0) { 1455 DERR(vswp, "%s: Invalid node name: %s\n", 1456 __func__, node_name); 1457 return (MDEG_FAILURE); 1458 } 1459 1460 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1461 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1462 __func__); 1463 return (MDEG_FAILURE); 1464 } 1465 1466 /* is this the right instance of vsw? */ 1467 if (inst != vswp->regprop) { 1468 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1469 __func__, inst); 1470 return (MDEG_FAILURE); 1471 } 1472 1473 vsw_update_md_prop(vswp, mdp, node); 1474 1475 return (MDEG_SUCCESS); 1476 } 1477 1478 /* 1479 * Mdeg callback invoked for changes to the vsw-port nodes 1480 * under the vsw node. 1481 */ 1482 static int 1483 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1484 { 1485 vsw_t *vswp; 1486 int idx; 1487 md_t *mdp; 1488 mde_cookie_t node; 1489 uint64_t inst; 1490 int rv; 1491 1492 if ((resp == NULL) || (cb_argp == NULL)) 1493 return (MDEG_FAILURE); 1494 1495 vswp = (vsw_t *)cb_argp; 1496 1497 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1498 " : prev matched %d", __func__, resp->added.nelem, 1499 resp->removed.nelem, resp->match_curr.nelem, 1500 resp->match_prev.nelem); 1501 1502 /* process added ports */ 1503 for (idx = 0; idx < resp->added.nelem; idx++) { 1504 mdp = resp->added.mdp; 1505 node = resp->added.mdep[idx]; 1506 1507 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1508 1509 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1510 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1511 "(0x%lx), err=%d", vswp->instance, node, rv); 1512 } 1513 } 1514 1515 /* process removed ports */ 1516 for (idx = 0; idx < resp->removed.nelem; idx++) { 1517 mdp = resp->removed.mdp; 1518 node = resp->removed.mdep[idx]; 1519 1520 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1521 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1522 __func__, id_propname, idx); 1523 continue; 1524 } 1525 1526 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1527 1528 if (vsw_port_detach(vswp, inst) != 0) { 1529 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1530 vswp->instance, inst); 1531 } 1532 } 1533 1534 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1535 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1536 resp->match_curr.mdep[idx], 1537 resp->match_prev.mdp, 1538 resp->match_prev.mdep[idx]); 1539 } 1540 1541 D1(vswp, "%s: exit", __func__); 1542 1543 return (MDEG_SUCCESS); 1544 } 1545 1546 /* 1547 * Scan the machine description for this instance of vsw 1548 * and read its properties. Called only from vsw_attach(). 1549 * Returns: 0 on success, 1 on failure. 1550 */ 1551 static int 1552 vsw_read_mdprops(vsw_t *vswp) 1553 { 1554 md_t *mdp = NULL; 1555 mde_cookie_t rootnode; 1556 mde_cookie_t *listp = NULL; 1557 uint64_t inst; 1558 uint64_t cfgh; 1559 char *name; 1560 int rv = 1; 1561 int num_nodes = 0; 1562 int num_devs = 0; 1563 int listsz = 0; 1564 int i; 1565 1566 /* 1567 * In each 'virtual-device' node in the MD there is a 1568 * 'cfg-handle' property which is the MD's concept of 1569 * an instance number (this may be completely different from 1570 * the device drivers instance #). OBP reads that value and 1571 * stores it in the 'reg' property of the appropriate node in 1572 * the device tree. We first read this reg property and use this 1573 * to compare against the 'cfg-handle' property of vsw nodes 1574 * in MD to get to this specific vsw instance and then read 1575 * other properties that we are interested in. 1576 * We also cache the value of 'reg' property and use it later 1577 * to register callbacks with mdeg (see vsw_mdeg_register()) 1578 */ 1579 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1580 DDI_PROP_DONTPASS, reg_propname, -1); 1581 if (inst == -1) { 1582 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1583 "OBP device tree", vswp->instance, reg_propname); 1584 return (rv); 1585 } 1586 1587 vswp->regprop = inst; 1588 1589 if ((mdp = md_get_handle()) == NULL) { 1590 DWARN(vswp, "%s: cannot init MD\n", __func__); 1591 return (rv); 1592 } 1593 1594 num_nodes = md_node_count(mdp); 1595 ASSERT(num_nodes > 0); 1596 1597 listsz = num_nodes * sizeof (mde_cookie_t); 1598 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1599 1600 rootnode = md_root_node(mdp); 1601 1602 /* search for all "virtual_device" nodes */ 1603 num_devs = md_scan_dag(mdp, rootnode, 1604 md_find_name(mdp, vdev_propname), 1605 md_find_name(mdp, "fwd"), listp); 1606 if (num_devs <= 0) { 1607 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1608 goto vsw_readmd_exit; 1609 } 1610 1611 /* 1612 * Now loop through the list of virtual-devices looking for 1613 * devices with name "virtual-network-switch" and for each 1614 * such device compare its instance with what we have from 1615 * the 'reg' property to find the right node in MD and then 1616 * read all its properties. 1617 */ 1618 for (i = 0; i < num_devs; i++) { 1619 1620 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1621 DWARN(vswp, "%s: name property not found\n", 1622 __func__); 1623 goto vsw_readmd_exit; 1624 } 1625 1626 /* is this a virtual-network-switch? */ 1627 if (strcmp(name, vsw_propname) != 0) 1628 continue; 1629 1630 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1631 DWARN(vswp, "%s: cfg-handle property not found\n", 1632 __func__); 1633 goto vsw_readmd_exit; 1634 } 1635 1636 /* is this the required instance of vsw? */ 1637 if (inst != cfgh) 1638 continue; 1639 1640 /* now read all properties of this vsw instance */ 1641 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1642 break; 1643 } 1644 1645 vsw_readmd_exit: 1646 1647 kmem_free(listp, listsz); 1648 (void) md_fini_handle(mdp); 1649 return (rv); 1650 } 1651 1652 /* 1653 * Read the initial start-of-day values from the specified MD node. 1654 */ 1655 static int 1656 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1657 { 1658 uint64_t macaddr = 0; 1659 1660 D1(vswp, "%s: enter", __func__); 1661 1662 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1663 return (1); 1664 } 1665 1666 /* mac address for vswitch device itself */ 1667 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1668 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1669 vswp->instance); 1670 return (1); 1671 } 1672 1673 vsw_save_lmacaddr(vswp, macaddr); 1674 1675 if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) { 1676 DWARN(vswp, "%s: Unable to read %s property from MD, " 1677 "defaulting to 'switched' mode", 1678 __func__, smode_propname); 1679 1680 vswp->smode = VSW_LAYER2; 1681 } 1682 1683 /* read mtu */ 1684 vsw_mtu_read(vswp, mdp, node, &vswp->mtu); 1685 if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) { 1686 vswp->mtu = ETHERMTU; 1687 } 1688 vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) + 1689 VLAN_TAGSZ; 1690 1691 /* read vlan id properties of this vsw instance */ 1692 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1693 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1694 1695 /* read priority-ether-types */ 1696 vsw_read_pri_eth_types(vswp, mdp, node); 1697 1698 D1(vswp, "%s: exit", __func__); 1699 return (0); 1700 } 1701 1702 /* 1703 * Read vlan id properties of the given MD node. 1704 * Arguments: 1705 * arg: device argument(vsw device or a port) 1706 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1707 * mdp: machine description 1708 * node: md node cookie 1709 * 1710 * Returns: 1711 * pvidp: port-vlan-id of the node 1712 * vidspp: list of vlan-ids of the node 1713 * nvidsp: # of vlan-ids in the list 1714 * default_idp: default-vlan-id of the node(if node is vsw device) 1715 */ 1716 static void 1717 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1718 uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp, 1719 uint16_t *default_idp) 1720 { 1721 vsw_t *vswp; 1722 vsw_port_t *portp; 1723 char *pvid_propname; 1724 char *vid_propname; 1725 uint_t nvids = 0; 1726 uint32_t vids_size; 1727 int rv; 1728 int i; 1729 uint64_t *data; 1730 uint64_t val; 1731 int size; 1732 int inst; 1733 1734 if (type == VSW_LOCALDEV) { 1735 1736 vswp = (vsw_t *)arg; 1737 pvid_propname = vsw_pvid_propname; 1738 vid_propname = vsw_vid_propname; 1739 inst = vswp->instance; 1740 1741 } else if (type == VSW_VNETPORT) { 1742 1743 portp = (vsw_port_t *)arg; 1744 vswp = portp->p_vswp; 1745 pvid_propname = port_pvid_propname; 1746 vid_propname = port_vid_propname; 1747 inst = portp->p_instance; 1748 1749 } else { 1750 return; 1751 } 1752 1753 if (type == VSW_LOCALDEV && default_idp != NULL) { 1754 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1755 if (rv != 0) { 1756 DWARN(vswp, "%s: prop(%s) not found", __func__, 1757 vsw_dvid_propname); 1758 1759 *default_idp = vsw_default_vlan_id; 1760 } else { 1761 *default_idp = val & 0xFFF; 1762 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1763 vsw_dvid_propname, inst, *default_idp); 1764 } 1765 } 1766 1767 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1768 if (rv != 0) { 1769 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1770 *pvidp = vsw_default_vlan_id; 1771 } else { 1772 1773 *pvidp = val & 0xFFF; 1774 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1775 pvid_propname, inst, *pvidp); 1776 } 1777 1778 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1779 &size); 1780 if (rv != 0) { 1781 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1782 size = 0; 1783 } else { 1784 size /= sizeof (uint64_t); 1785 } 1786 nvids = size; 1787 1788 if (nvids != 0) { 1789 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1790 vids_size = sizeof (vsw_vlanid_t) * nvids; 1791 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1792 for (i = 0; i < nvids; i++) { 1793 (*vidspp)[i].vl_vid = data[i] & 0xFFFF; 1794 (*vidspp)[i].vl_set = B_FALSE; 1795 D2(vswp, " %d ", (*vidspp)[i].vl_vid); 1796 } 1797 D2(vswp, "\n"); 1798 } 1799 1800 *nvidsp = nvids; 1801 } 1802 1803 /* 1804 * This function reads "priority-ether-types" property from md. This property 1805 * is used to enable support for priority frames. Applications which need 1806 * guaranteed and timely delivery of certain high priority frames to/from 1807 * a vnet or vsw within ldoms, should configure this property by providing 1808 * the ether type(s) for which the priority facility is needed. 1809 * Normal data frames are delivered over a ldc channel using the descriptor 1810 * ring mechanism which is constrained by factors such as descriptor ring size, 1811 * the rate at which the ring is processed at the peer ldc end point, etc. 1812 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1813 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1814 * descriptor ring path and enables a more reliable and timely delivery of 1815 * frames to the peer. 1816 */ 1817 static void 1818 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1819 { 1820 int rv; 1821 uint16_t *types; 1822 uint64_t *data; 1823 int size; 1824 int i; 1825 size_t mblk_sz; 1826 1827 rv = md_get_prop_data(mdp, node, pri_types_propname, 1828 (uint8_t **)&data, &size); 1829 if (rv != 0) { 1830 /* 1831 * Property may not exist if we are running pre-ldoms1.1 f/w. 1832 * Check if 'vsw_pri_eth_type' has been set in that case. 1833 */ 1834 if (vsw_pri_eth_type != 0) { 1835 size = sizeof (vsw_pri_eth_type); 1836 data = &vsw_pri_eth_type; 1837 } else { 1838 D3(vswp, "%s: prop(%s) not found", __func__, 1839 pri_types_propname); 1840 size = 0; 1841 } 1842 } 1843 1844 if (size == 0) { 1845 vswp->pri_num_types = 0; 1846 return; 1847 } 1848 1849 /* 1850 * we have some priority-ether-types defined; 1851 * allocate a table of these types and also 1852 * allocate a pool of mblks to transmit these 1853 * priority packets. 1854 */ 1855 size /= sizeof (uint64_t); 1856 vswp->pri_num_types = size; 1857 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1858 for (i = 0, types = vswp->pri_types; i < size; i++) { 1859 types[i] = data[i] & 0xFFFF; 1860 } 1861 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1862 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp); 1863 } 1864 1865 static void 1866 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) 1867 { 1868 int rv; 1869 int inst; 1870 uint64_t val; 1871 char *mtu_propname; 1872 1873 mtu_propname = vsw_mtu_propname; 1874 inst = vswp->instance; 1875 1876 rv = md_get_prop_val(mdp, node, mtu_propname, &val); 1877 if (rv != 0) { 1878 D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname); 1879 *mtu = vsw_ethermtu; 1880 } else { 1881 1882 *mtu = val & 0xFFFF; 1883 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1884 mtu_propname, inst, *mtu); 1885 } 1886 } 1887 1888 /* 1889 * Update the mtu of the vsw device. We first check if the device has been 1890 * plumbed and if so fail the mtu update. Otherwise, we continue to update the 1891 * new mtu and reset all ports to initiate handshake re-negotiation with peers 1892 * using the new mtu. 1893 */ 1894 static int 1895 vsw_mtu_update(vsw_t *vswp, uint32_t mtu) 1896 { 1897 int rv; 1898 1899 WRITE_ENTER(&vswp->if_lockrw); 1900 1901 if (vswp->if_state & VSW_IF_UP) { 1902 1903 RW_EXIT(&vswp->if_lockrw); 1904 1905 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 1906 " as the device is plumbed\n", vswp->instance); 1907 return (EBUSY); 1908 1909 } else { 1910 1911 D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n", 1912 __func__, vswp->mtu, mtu); 1913 1914 vswp->mtu = mtu; 1915 vswp->max_frame_size = vswp->mtu + 1916 sizeof (struct ether_header) + VLAN_TAGSZ; 1917 1918 rv = mac_maxsdu_update(vswp->if_mh, mtu); 1919 if (rv != 0) { 1920 cmn_err(CE_NOTE, 1921 "!vsw%d: Unable to update mtu with mac" 1922 " layer\n", vswp->instance); 1923 } 1924 1925 RW_EXIT(&vswp->if_lockrw); 1926 1927 /* Reset ports to renegotiate with the new mtu */ 1928 vsw_reset_ports(vswp); 1929 1930 } 1931 1932 return (0); 1933 } 1934 1935 /* 1936 * Check to see if the relevant properties in the specified node have 1937 * changed, and if so take the appropriate action. 1938 * 1939 * If any of the properties are missing or invalid we don't take 1940 * any action, as this function should only be invoked when modifications 1941 * have been made to what we assume is a working configuration, which 1942 * we leave active. 1943 * 1944 * Note it is legal for this routine to be invoked even if none of the 1945 * properties in the port node within the MD have actually changed. 1946 */ 1947 static void 1948 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1949 { 1950 char physname[LIFNAMSIZ]; 1951 char drv[LIFNAMSIZ]; 1952 uint_t ddi_instance; 1953 uint8_t new_smode; 1954 int i; 1955 uint64_t macaddr = 0; 1956 enum {MD_init = 0x1, 1957 MD_physname = 0x2, 1958 MD_macaddr = 0x4, 1959 MD_smode = 0x8, 1960 MD_vlans = 0x10, 1961 MD_mtu = 0x20} updated; 1962 int rv; 1963 uint16_t pvid; 1964 vsw_vlanid_t *vids; 1965 uint16_t nvids; 1966 uint32_t mtu; 1967 1968 updated = MD_init; 1969 1970 D1(vswp, "%s: enter", __func__); 1971 1972 /* 1973 * Check if name of physical device in MD has changed. 1974 */ 1975 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1976 /* 1977 * Do basic sanity check on new device name/instance, 1978 * if its non NULL. It is valid for the device name to 1979 * have changed from a non NULL to a NULL value, i.e. 1980 * the vsw is being changed to 'routed' mode. 1981 */ 1982 if ((strlen(physname) != 0) && 1983 (ddi_parse(physname, drv, 1984 &ddi_instance) != DDI_SUCCESS)) { 1985 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 1986 " a valid device name/instance", 1987 vswp->instance, physname); 1988 goto fail_reconf; 1989 } 1990 1991 if (strcmp(physname, vswp->physname)) { 1992 D2(vswp, "%s: device name changed from %s to %s", 1993 __func__, vswp->physname, physname); 1994 1995 updated |= MD_physname; 1996 } else { 1997 D2(vswp, "%s: device name unchanged at %s", 1998 __func__, vswp->physname); 1999 } 2000 } else { 2001 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2002 "device from updated MD.", vswp->instance); 2003 goto fail_reconf; 2004 } 2005 2006 /* 2007 * Check if MAC address has changed. 2008 */ 2009 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2010 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2011 vswp->instance); 2012 goto fail_reconf; 2013 } else { 2014 uint64_t maddr = macaddr; 2015 READ_ENTER(&vswp->if_lockrw); 2016 for (i = ETHERADDRL - 1; i >= 0; i--) { 2017 if (vswp->if_addr.ether_addr_octet[i] 2018 != (macaddr & 0xFF)) { 2019 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 2020 __func__, i, 2021 vswp->if_addr.ether_addr_octet[i], 2022 (macaddr & 0xFF)); 2023 updated |= MD_macaddr; 2024 macaddr = maddr; 2025 break; 2026 } 2027 macaddr >>= 8; 2028 } 2029 RW_EXIT(&vswp->if_lockrw); 2030 if (updated & MD_macaddr) { 2031 vsw_save_lmacaddr(vswp, macaddr); 2032 } 2033 } 2034 2035 /* 2036 * Check if switching modes have changed. 2037 */ 2038 if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) { 2039 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2040 vswp->instance, smode_propname); 2041 goto fail_reconf; 2042 } else { 2043 if (new_smode != vswp->smode) { 2044 D2(vswp, "%s: switching mode changed from %d to %d", 2045 __func__, vswp->smode, new_smode); 2046 2047 updated |= MD_smode; 2048 } 2049 } 2050 2051 /* Read the vlan ids */ 2052 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2053 &nvids, NULL); 2054 2055 /* Determine if there are any vlan id updates */ 2056 if ((pvid != vswp->pvid) || /* pvid changed? */ 2057 (nvids != vswp->nvids) || /* # of vids changed? */ 2058 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2059 !vsw_cmp_vids(vids, vswp->vids, nvids))) { 2060 updated |= MD_vlans; 2061 } 2062 2063 /* Read mtu */ 2064 vsw_mtu_read(vswp, mdp, node, &mtu); 2065 if (mtu != vswp->mtu) { 2066 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { 2067 updated |= MD_mtu; 2068 } else { 2069 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2070 " as the specified value:%d is invalid\n", 2071 vswp->instance, mtu); 2072 } 2073 } 2074 2075 /* 2076 * Now make any changes which are needed... 2077 */ 2078 2079 if (updated & (MD_physname | MD_smode | MD_mtu)) { 2080 2081 /* 2082 * Stop any pending timeout to setup switching mode. 2083 */ 2084 vsw_stop_switching_timeout(vswp); 2085 2086 /* Cleanup HybridIO */ 2087 vsw_hio_cleanup(vswp); 2088 2089 /* 2090 * Remove unicst, mcst addrs of vsw interface 2091 * and ports from the physdev. This also closes 2092 * the corresponding mac clients. 2093 */ 2094 vsw_unset_addrs(vswp); 2095 2096 /* 2097 * Stop, detach and close the old device.. 2098 */ 2099 mutex_enter(&vswp->mac_lock); 2100 vsw_mac_close(vswp); 2101 mutex_exit(&vswp->mac_lock); 2102 2103 /* 2104 * Update phys name. 2105 */ 2106 if (updated & MD_physname) { 2107 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2108 vswp->instance, vswp->physname, physname); 2109 (void) strncpy(vswp->physname, 2110 physname, strlen(physname) + 1); 2111 } 2112 2113 /* 2114 * Update array with the new switch mode values. 2115 */ 2116 if (updated & MD_smode) { 2117 vswp->smode = new_smode; 2118 } 2119 2120 /* Update mtu */ 2121 if (updated & MD_mtu) { 2122 rv = vsw_mtu_update(vswp, mtu); 2123 if (rv != 0) { 2124 goto fail_update; 2125 } 2126 } 2127 2128 /* 2129 * ..and attach, start the new device. 2130 */ 2131 rv = vsw_setup_switching(vswp); 2132 if (rv == EAGAIN) { 2133 /* 2134 * Unable to setup switching mode. 2135 * As the error is EAGAIN, schedule a timeout to retry 2136 * and return. Programming addresses of ports and 2137 * vsw interface will be done when the timeout handler 2138 * completes successfully. 2139 */ 2140 mutex_enter(&vswp->swtmout_lock); 2141 2142 vswp->swtmout_enabled = B_TRUE; 2143 vswp->swtmout_id = 2144 timeout(vsw_setup_switching_timeout, vswp, 2145 (vsw_setup_switching_delay * 2146 drv_usectohz(MICROSEC))); 2147 2148 mutex_exit(&vswp->swtmout_lock); 2149 2150 return; 2151 2152 } else if (rv) { 2153 goto fail_update; 2154 } 2155 2156 vsw_setup_layer2_post_process(vswp); 2157 } else if (updated & MD_macaddr) { 2158 /* 2159 * We enter here if only MD_macaddr is exclusively updated. 2160 * If MD_physname and/or MD_smode are also updated, then 2161 * as part of that, we would have implicitly processed 2162 * MD_macaddr update (above). 2163 */ 2164 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2165 vswp->instance, macaddr); 2166 2167 READ_ENTER(&vswp->if_lockrw); 2168 if (vswp->if_state & VSW_IF_UP) { 2169 /* reconfigure with new address */ 2170 vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0); 2171 2172 /* 2173 * Notify the MAC layer of the changed address. 2174 */ 2175 mac_unicst_update(vswp->if_mh, 2176 (uint8_t *)&vswp->if_addr); 2177 2178 } 2179 RW_EXIT(&vswp->if_lockrw); 2180 2181 } 2182 2183 if (updated & MD_vlans) { 2184 /* Remove existing vlan ids from the hash table. */ 2185 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2186 2187 if (vswp->if_state & VSW_IF_UP) { 2188 vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids); 2189 } else { 2190 if (vswp->nvids != 0) { 2191 kmem_free(vswp->vids, 2192 sizeof (vsw_vlanid_t) * vswp->nvids); 2193 } 2194 vswp->vids = vids; 2195 vswp->nvids = nvids; 2196 vswp->pvid = pvid; 2197 } 2198 2199 /* add these new vlan ids into hash table */ 2200 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2201 } else { 2202 if (nvids != 0) { 2203 kmem_free(vids, sizeof (vsw_vlanid_t) * nvids); 2204 } 2205 } 2206 2207 return; 2208 2209 fail_reconf: 2210 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2211 return; 2212 2213 fail_update: 2214 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2215 vswp->instance); 2216 } 2217 2218 /* 2219 * Read the port's md properties. 2220 */ 2221 static int 2222 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2223 md_t *mdp, mde_cookie_t *node) 2224 { 2225 uint64_t ldc_id; 2226 uint8_t *addrp; 2227 int i, addrsz; 2228 int num_nodes = 0, nchan = 0; 2229 int listsz = 0; 2230 mde_cookie_t *listp = NULL; 2231 struct ether_addr ea; 2232 uint64_t macaddr; 2233 uint64_t inst = 0; 2234 uint64_t val; 2235 2236 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2237 DWARN(vswp, "%s: prop(%s) not found", __func__, 2238 id_propname); 2239 return (1); 2240 } 2241 2242 /* 2243 * Find the channel endpoint node(s) (which should be under this 2244 * port node) which contain the channel id(s). 2245 */ 2246 if ((num_nodes = md_node_count(mdp)) <= 0) { 2247 DERR(vswp, "%s: invalid number of nodes found (%d)", 2248 __func__, num_nodes); 2249 return (1); 2250 } 2251 2252 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2253 2254 /* allocate enough space for node list */ 2255 listsz = num_nodes * sizeof (mde_cookie_t); 2256 listp = kmem_zalloc(listsz, KM_SLEEP); 2257 2258 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2259 md_find_name(mdp, "fwd"), listp); 2260 2261 if (nchan <= 0) { 2262 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2263 kmem_free(listp, listsz); 2264 return (1); 2265 } 2266 2267 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2268 2269 /* use property from first node found */ 2270 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2271 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2272 id_propname); 2273 kmem_free(listp, listsz); 2274 return (1); 2275 } 2276 2277 /* don't need list any more */ 2278 kmem_free(listp, listsz); 2279 2280 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2281 2282 /* read mac-address property */ 2283 if (md_get_prop_data(mdp, *node, remaddr_propname, 2284 &addrp, &addrsz)) { 2285 DWARN(vswp, "%s: prop(%s) not found", 2286 __func__, remaddr_propname); 2287 return (1); 2288 } 2289 2290 if (addrsz < ETHERADDRL) { 2291 DWARN(vswp, "%s: invalid address size", __func__); 2292 return (1); 2293 } 2294 2295 macaddr = *((uint64_t *)addrp); 2296 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2297 2298 for (i = ETHERADDRL - 1; i >= 0; i--) { 2299 ea.ether_addr_octet[i] = macaddr & 0xFF; 2300 macaddr >>= 8; 2301 } 2302 2303 /* now update all properties into the port */ 2304 portp->p_vswp = vswp; 2305 portp->p_instance = inst; 2306 portp->addr_set = B_FALSE; 2307 ether_copy(&ea, &portp->p_macaddr); 2308 if (nchan > VSW_PORT_MAX_LDCS) { 2309 D2(vswp, "%s: using first of %d ldc ids", 2310 __func__, nchan); 2311 nchan = VSW_PORT_MAX_LDCS; 2312 } 2313 portp->num_ldcs = nchan; 2314 portp->ldc_ids = 2315 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2316 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2317 2318 /* read vlan id properties of this port node */ 2319 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2320 &portp->vids, &portp->nvids, NULL); 2321 2322 /* Check if hybrid property is present */ 2323 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2324 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2325 portp->p_hio_enabled = B_TRUE; 2326 } else { 2327 portp->p_hio_enabled = B_FALSE; 2328 } 2329 /* 2330 * Port hio capability determined after version 2331 * negotiation, i.e., when we know the peer is HybridIO capable. 2332 */ 2333 portp->p_hio_capable = B_FALSE; 2334 return (0); 2335 } 2336 2337 /* 2338 * Add a new port to the system. 2339 * 2340 * Returns 0 on success, 1 on failure. 2341 */ 2342 int 2343 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2344 { 2345 vsw_port_t *portp; 2346 int rv; 2347 2348 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2349 2350 rv = vsw_port_read_props(portp, vswp, mdp, node); 2351 if (rv != 0) { 2352 kmem_free(portp, sizeof (*portp)); 2353 return (1); 2354 } 2355 2356 rv = vsw_port_attach(portp); 2357 if (rv != 0) { 2358 DERR(vswp, "%s: failed to attach port", __func__); 2359 return (1); 2360 } 2361 2362 return (0); 2363 } 2364 2365 static int 2366 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2367 md_t *prev_mdp, mde_cookie_t prev_mdex) 2368 { 2369 uint64_t cport_num; 2370 uint64_t pport_num; 2371 vsw_port_list_t *plistp; 2372 vsw_port_t *portp; 2373 boolean_t updated_vlans = B_FALSE; 2374 uint16_t pvid; 2375 vsw_vlanid_t *vids; 2376 uint16_t nvids; 2377 uint64_t val; 2378 boolean_t hio_enabled = B_FALSE; 2379 2380 /* 2381 * For now, we get port updates only if vlan ids changed. 2382 * We read the port num and do some sanity check. 2383 */ 2384 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2385 return (1); 2386 } 2387 2388 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2389 return (1); 2390 } 2391 if (cport_num != pport_num) 2392 return (1); 2393 2394 plistp = &(vswp->plist); 2395 2396 READ_ENTER(&plistp->lockrw); 2397 2398 portp = vsw_lookup_port(vswp, cport_num); 2399 if (portp == NULL) { 2400 RW_EXIT(&plistp->lockrw); 2401 return (1); 2402 } 2403 2404 /* Read the vlan ids */ 2405 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2406 &vids, &nvids, NULL); 2407 2408 /* Determine if there are any vlan id updates */ 2409 if ((pvid != portp->pvid) || /* pvid changed? */ 2410 (nvids != portp->nvids) || /* # of vids changed? */ 2411 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2412 !vsw_cmp_vids(vids, portp->vids, nvids))) { 2413 updated_vlans = B_TRUE; 2414 } 2415 2416 if (updated_vlans == B_TRUE) { 2417 2418 /* Remove existing vlan ids from the hash table. */ 2419 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2420 2421 /* Reconfigure vlans with network device */ 2422 vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids); 2423 2424 /* add these new vlan ids into hash table */ 2425 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2426 2427 /* reset the port if it is vlan unaware (ver < 1.3) */ 2428 vsw_vlan_unaware_port_reset(portp); 2429 } 2430 2431 /* Check if hybrid property is present */ 2432 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2433 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2434 hio_enabled = B_TRUE; 2435 } 2436 2437 if (portp->p_hio_enabled != hio_enabled) { 2438 vsw_hio_port_update(portp, hio_enabled); 2439 } 2440 2441 RW_EXIT(&plistp->lockrw); 2442 2443 return (0); 2444 } 2445 2446 /* 2447 * vsw_mac_rx -- A common function to send packets to the interface. 2448 * By default this function check if the interface is UP or not, the 2449 * rest of the behaviour depends on the flags as below: 2450 * 2451 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2452 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2453 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2454 */ 2455 void 2456 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2457 mblk_t *mp, vsw_macrx_flags_t flags) 2458 { 2459 mblk_t *mpt; 2460 2461 D1(vswp, "%s:enter\n", __func__); 2462 READ_ENTER(&vswp->if_lockrw); 2463 /* Check if the interface is up */ 2464 if (!(vswp->if_state & VSW_IF_UP)) { 2465 RW_EXIT(&vswp->if_lockrw); 2466 /* Free messages only if FREEMSG flag specified */ 2467 if (flags & VSW_MACRX_FREEMSG) { 2468 freemsgchain(mp); 2469 } 2470 D1(vswp, "%s:exit\n", __func__); 2471 return; 2472 } 2473 /* 2474 * If PROMISC flag is passed, then check if 2475 * the interface is in the PROMISC mode. 2476 * If not, drop the messages. 2477 */ 2478 if (flags & VSW_MACRX_PROMISC) { 2479 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2480 RW_EXIT(&vswp->if_lockrw); 2481 /* Free messages only if FREEMSG flag specified */ 2482 if (flags & VSW_MACRX_FREEMSG) { 2483 freemsgchain(mp); 2484 } 2485 D1(vswp, "%s:exit\n", __func__); 2486 return; 2487 } 2488 } 2489 RW_EXIT(&vswp->if_lockrw); 2490 /* 2491 * If COPYMSG flag is passed, then make a copy 2492 * of the message chain and send up the copy. 2493 */ 2494 if (flags & VSW_MACRX_COPYMSG) { 2495 mp = copymsgchain(mp); 2496 if (mp == NULL) { 2497 D1(vswp, "%s:exit\n", __func__); 2498 return; 2499 } 2500 } 2501 2502 D2(vswp, "%s: sending up stack", __func__); 2503 2504 mpt = NULL; 2505 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2506 if (mp != NULL) { 2507 mac_rx(vswp->if_mh, mrh, mp); 2508 } 2509 D1(vswp, "%s:exit\n", __func__); 2510 } 2511 2512 /* copy mac address of vsw into soft state structure */ 2513 static void 2514 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2515 { 2516 int i; 2517 2518 WRITE_ENTER(&vswp->if_lockrw); 2519 for (i = ETHERADDRL - 1; i >= 0; i--) { 2520 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2521 macaddr >>= 8; 2522 } 2523 RW_EXIT(&vswp->if_lockrw); 2524 } 2525 2526 /* Compare VLAN ids, array size expected to be same. */ 2527 static boolean_t 2528 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids) 2529 { 2530 int i, j; 2531 uint16_t vid; 2532 2533 for (i = 0; i < nvids; i++) { 2534 vid = vids1[i].vl_vid; 2535 for (j = 0; j < nvids; j++) { 2536 if (vid == vids2[i].vl_vid) 2537 break; 2538 } 2539 if (j == nvids) { 2540 return (B_FALSE); 2541 } 2542 } 2543 return (B_TRUE); 2544 } 2545