1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mac_provider.h> 62 #include <sys/mdeg.h> 63 #include <sys/ldc.h> 64 #include <sys/vsw_fdb.h> 65 #include <sys/vsw.h> 66 #include <sys/vio_mailbox.h> 67 #include <sys/vnet_mailbox.h> 68 #include <sys/vnet_common.h> 69 #include <sys/vio_util.h> 70 #include <sys/sdt.h> 71 #include <sys/atomic.h> 72 #include <sys/callb.h> 73 #include <sys/vlan.h> 74 75 /* 76 * Function prototypes. 77 */ 78 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 79 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 80 static int vsw_unattach(vsw_t *vswp); 81 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 82 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *); 83 static int vsw_mod_cleanup(void); 84 85 /* MDEG routines */ 86 static int vsw_mdeg_register(vsw_t *vswp); 87 static void vsw_mdeg_unregister(vsw_t *vswp); 88 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 89 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 91 static int vsw_read_mdprops(vsw_t *vswp); 92 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 93 mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp, 94 uint16_t *nvidsp, uint16_t *default_idp); 95 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 96 md_t *mdp, mde_cookie_t *node); 97 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 98 mde_cookie_t node); 99 static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 100 uint32_t *mtu); 101 static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu); 102 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 103 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 104 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1, 105 vsw_vlanid_t *vids2, int nvids); 106 107 /* Mac driver related routines */ 108 static int vsw_mac_register(vsw_t *); 109 static int vsw_mac_unregister(vsw_t *); 110 static int vsw_m_stat(void *, uint_t, uint64_t *); 111 static void vsw_m_stop(void *arg); 112 static int vsw_m_start(void *arg); 113 static int vsw_m_unicst(void *arg, const uint8_t *); 114 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 115 static int vsw_m_promisc(void *arg, boolean_t); 116 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 117 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 118 mblk_t *mp, vsw_macrx_flags_t flags); 119 120 /* 121 * Functions imported from other files. 122 */ 123 extern void vsw_setup_switching_thread(void *arg); 124 extern int vsw_setup_switching_start(vsw_t *vswp); 125 extern void vsw_setup_switching_stop(vsw_t *vswp); 126 extern int vsw_setup_switching(vsw_t *); 127 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 128 vsw_port_t *port, mac_resource_handle_t mrh); 129 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 130 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 131 extern void vsw_del_mcst_vsw(vsw_t *); 132 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 133 extern void vsw_detach_ports(vsw_t *vswp); 134 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 135 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 136 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 137 md_t *prev_mdp, mde_cookie_t prev_mdex); 138 extern int vsw_port_attach(vsw_port_t *port); 139 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 140 extern int vsw_mac_open(vsw_t *vswp); 141 extern void vsw_mac_close(vsw_t *vswp); 142 extern void vsw_mac_cleanup_ports(vsw_t *vswp); 143 extern void vsw_unset_addrs(vsw_t *vswp); 144 extern void vsw_setup_layer2_post_process(vsw_t *vswp); 145 extern void vsw_create_vlans(void *arg, int type); 146 extern void vsw_destroy_vlans(void *arg, int type); 147 extern void vsw_vlan_add_ids(void *arg, int type); 148 extern void vsw_vlan_remove_ids(void *arg, int type); 149 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 150 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 151 mblk_t **npt); 152 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 153 extern void vsw_hio_cleanup(vsw_t *vswp); 154 extern void vsw_hio_start_ports(vsw_t *vswp); 155 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 156 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 157 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 158 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid, 159 vsw_vlanid_t *new_vids, int new_nvids); 160 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 161 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 162 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans, 163 uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids); 164 extern void vsw_reset_ports(vsw_t *vswp); 165 extern void vsw_port_reset(vsw_port_t *portp); 166 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 167 168 /* 169 * Internal tunables. 170 */ 171 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 172 int vsw_wretries = 100; /* # of write attempts */ 173 int vsw_desc_delay = 0; /* delay in us */ 174 int vsw_read_attempts = 5; /* # of reads of descriptor */ 175 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 176 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ 177 /* 300*3 = 900sec(15min) of max tmout */ 178 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 179 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 180 int vsw_ldc_retries = 5; /* # of ldc_close() retries */ 181 int vsw_ldc_delay = 1000; /* 1 ms delay for ldc_close() */ 182 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 183 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 184 185 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 186 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 187 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 188 189 /* delay in usec to wait for all references on a fdb entry to be dropped */ 190 uint32_t vsw_fdbe_refcnt_delay = 10; 191 192 /* 193 * Default vlan id. This is only used internally when the "default-vlan-id" 194 * property is not present in the MD device node. Therefore, this should not be 195 * used as a tunable; if this value is changed, the corresponding variable 196 * should be updated to the same value in all vnets connected to this vsw. 197 */ 198 uint16_t vsw_default_vlan_id = 1; 199 200 /* 201 * Workaround for a version handshake bug in obp's vnet. 202 * If vsw initiates version negotiation starting from the highest version, 203 * obp sends a nack and terminates version handshake. To workaround 204 * this, we do not initiate version handshake when the channel comes up. 205 * Instead, we wait for the peer to send its version info msg and go through 206 * the version protocol exchange. If we successfully negotiate a version, 207 * before sending the ack, we send our version info msg to the peer 208 * using the <major,minor> version that we are about to ack. 209 */ 210 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 211 212 /* 213 * In the absence of "priority-ether-types" property in MD, the following 214 * internal tunable can be set to specify a single priority ethertype. 215 */ 216 uint64_t vsw_pri_eth_type = 0; 217 218 /* 219 * Number of transmit priority buffers that are preallocated per device. 220 * This number is chosen to be a small value to throttle transmission 221 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 222 */ 223 uint32_t vsw_pri_tx_nmblks = 64; 224 225 /* 226 * Number of RARP packets sent to announce macaddr to the physical switch, 227 * after vsw's physical device is changed dynamically or after a guest (client 228 * vnet) is live migrated in. 229 */ 230 uint32_t vsw_publish_macaddr_count = 3; 231 232 boolean_t vsw_hio_enabled = B_TRUE; /* Enable/disable HybridIO */ 233 int vsw_hio_max_cleanup_retries = 10; /* Max retries for HybridIO cleanp */ 234 int vsw_hio_cleanup_delay = 10000; /* 10ms */ 235 236 /* Number of transmit descriptors - must be power of 2 */ 237 uint32_t vsw_ntxds = VSW_RING_NUM_EL; 238 239 /* 240 * Max number of mblks received in one receive operation. 241 */ 242 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 243 244 /* 245 * Internal tunables for receive buffer pools, that is, the size and number of 246 * mblks for each pool. At least 3 sizes must be specified if these are used. 247 * The sizes must be specified in increasing order. Non-zero value of the first 248 * size will be used as a hint to use these values instead of the algorithm 249 * that determines the sizes based on MTU. 250 */ 251 uint32_t vsw_mblk_size1 = 0; 252 uint32_t vsw_mblk_size2 = 0; 253 uint32_t vsw_mblk_size3 = 0; 254 uint32_t vsw_mblk_size4 = 0; 255 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 256 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 257 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 258 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS; /* number of mblks for pool4 */ 259 260 /* 261 * Set this to non-zero to enable additional internal receive buffer pools 262 * based on the MTU of the device for better performance at the cost of more 263 * memory consumption. This is turned off by default, to use allocb(9F) for 264 * receive buffer allocations of sizes > 2K. 265 */ 266 boolean_t vsw_jumbo_rxpools = B_FALSE; 267 268 /* 269 * vsw_max_tx_qcount is the maximum # of packets that can be queued 270 * before the tx worker thread begins processing the queue. Its value 271 * is chosen to be 4x the default length of tx descriptor ring. 272 */ 273 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL; 274 275 /* 276 * MAC callbacks 277 */ 278 static mac_callbacks_t vsw_m_callbacks = { 279 0, 280 vsw_m_stat, 281 vsw_m_start, 282 vsw_m_stop, 283 vsw_m_promisc, 284 vsw_m_multicst, 285 vsw_m_unicst, 286 vsw_m_tx, 287 NULL, 288 NULL, 289 NULL 290 }; 291 292 static struct cb_ops vsw_cb_ops = { 293 nulldev, /* cb_open */ 294 nulldev, /* cb_close */ 295 nodev, /* cb_strategy */ 296 nodev, /* cb_print */ 297 nodev, /* cb_dump */ 298 nodev, /* cb_read */ 299 nodev, /* cb_write */ 300 nodev, /* cb_ioctl */ 301 nodev, /* cb_devmap */ 302 nodev, /* cb_mmap */ 303 nodev, /* cb_segmap */ 304 nochpoll, /* cb_chpoll */ 305 ddi_prop_op, /* cb_prop_op */ 306 NULL, /* cb_stream */ 307 D_MP, /* cb_flag */ 308 CB_REV, /* rev */ 309 nodev, /* int (*cb_aread)() */ 310 nodev /* int (*cb_awrite)() */ 311 }; 312 313 static struct dev_ops vsw_ops = { 314 DEVO_REV, /* devo_rev */ 315 0, /* devo_refcnt */ 316 NULL, /* devo_getinfo */ 317 nulldev, /* devo_identify */ 318 nulldev, /* devo_probe */ 319 vsw_attach, /* devo_attach */ 320 vsw_detach, /* devo_detach */ 321 nodev, /* devo_reset */ 322 &vsw_cb_ops, /* devo_cb_ops */ 323 (struct bus_ops *)NULL, /* devo_bus_ops */ 324 ddi_power /* devo_power */ 325 }; 326 327 extern struct mod_ops mod_driverops; 328 static struct modldrv vswmodldrv = { 329 &mod_driverops, 330 "sun4v Virtual Switch", 331 &vsw_ops, 332 }; 333 334 #define LDC_ENTER_LOCK(ldcp) \ 335 mutex_enter(&((ldcp)->ldc_cblock));\ 336 mutex_enter(&((ldcp)->ldc_rxlock));\ 337 mutex_enter(&((ldcp)->ldc_txlock)); 338 #define LDC_EXIT_LOCK(ldcp) \ 339 mutex_exit(&((ldcp)->ldc_txlock));\ 340 mutex_exit(&((ldcp)->ldc_rxlock));\ 341 mutex_exit(&((ldcp)->ldc_cblock)); 342 343 /* Driver soft state ptr */ 344 static void *vsw_state; 345 346 /* 347 * Linked list of "vsw_t" structures - one per instance. 348 */ 349 vsw_t *vsw_head = NULL; 350 vio_mblk_pool_t *vsw_rx_poolp = NULL; 351 krwlock_t vsw_rw; 352 353 /* 354 * Property names 355 */ 356 static char vdev_propname[] = "virtual-device"; 357 static char vsw_propname[] = "virtual-network-switch"; 358 static char physdev_propname[] = "vsw-phys-dev"; 359 static char smode_propname[] = "vsw-switch-mode"; 360 static char macaddr_propname[] = "local-mac-address"; 361 static char remaddr_propname[] = "remote-mac-address"; 362 static char ldcids_propname[] = "ldc-ids"; 363 static char chan_propname[] = "channel-endpoint"; 364 static char id_propname[] = "id"; 365 static char reg_propname[] = "reg"; 366 static char pri_types_propname[] = "priority-ether-types"; 367 static char vsw_pvid_propname[] = "port-vlan-id"; 368 static char vsw_vid_propname[] = "vlan-id"; 369 static char vsw_dvid_propname[] = "default-vlan-id"; 370 static char port_pvid_propname[] = "remote-port-vlan-id"; 371 static char port_vid_propname[] = "remote-vlan-id"; 372 static char hybrid_propname[] = "hybrid"; 373 static char vsw_mtu_propname[] = "mtu"; 374 375 /* 376 * Matching criteria passed to the MDEG to register interest 377 * in changes to 'virtual-device-port' nodes identified by their 378 * 'id' property. 379 */ 380 static md_prop_match_t vport_prop_match[] = { 381 { MDET_PROP_VAL, "id" }, 382 { MDET_LIST_END, NULL } 383 }; 384 385 static mdeg_node_match_t vport_match = { "virtual-device-port", 386 vport_prop_match }; 387 388 /* 389 * Matching criteria passed to the MDEG to register interest 390 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 391 * by their 'name' and 'cfg-handle' properties. 392 */ 393 static md_prop_match_t vdev_prop_match[] = { 394 { MDET_PROP_STR, "name" }, 395 { MDET_PROP_VAL, "cfg-handle" }, 396 { MDET_LIST_END, NULL } 397 }; 398 399 static mdeg_node_match_t vdev_match = { "virtual-device", 400 vdev_prop_match }; 401 402 403 /* 404 * Specification of an MD node passed to the MDEG to filter any 405 * 'vport' nodes that do not belong to the specified node. This 406 * template is copied for each vsw instance and filled in with 407 * the appropriate 'cfg-handle' value before being passed to the MDEG. 408 */ 409 static mdeg_prop_spec_t vsw_prop_template[] = { 410 { MDET_PROP_STR, "name", vsw_propname }, 411 { MDET_PROP_VAL, "cfg-handle", NULL }, 412 { MDET_LIST_END, NULL, NULL } 413 }; 414 415 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 416 417 #ifdef DEBUG 418 /* 419 * Print debug messages - set to 0x1f to enable all msgs 420 * or 0x0 to turn all off. 421 */ 422 int vswdbg = 0x0; 423 424 /* 425 * debug levels: 426 * 0x01: Function entry/exit tracing 427 * 0x02: Internal function messages 428 * 0x04: Verbose internal messages 429 * 0x08: Warning messages 430 * 0x10: Error messages 431 */ 432 433 void 434 vswdebug(vsw_t *vswp, const char *fmt, ...) 435 { 436 char buf[512]; 437 va_list ap; 438 439 va_start(ap, fmt); 440 (void) vsprintf(buf, fmt, ap); 441 va_end(ap); 442 443 if (vswp == NULL) 444 cmn_err(CE_CONT, "%s\n", buf); 445 else 446 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 447 } 448 449 #endif /* DEBUG */ 450 451 static struct modlinkage modlinkage = { 452 MODREV_1, 453 &vswmodldrv, 454 NULL 455 }; 456 457 int 458 _init(void) 459 { 460 int status; 461 462 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 463 464 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 465 if (status != 0) { 466 return (status); 467 } 468 469 mac_init_ops(&vsw_ops, DRV_NAME); 470 status = mod_install(&modlinkage); 471 if (status != 0) { 472 ddi_soft_state_fini(&vsw_state); 473 } 474 return (status); 475 } 476 477 int 478 _fini(void) 479 { 480 int status; 481 482 status = vsw_mod_cleanup(); 483 if (status != 0) 484 return (status); 485 486 status = mod_remove(&modlinkage); 487 if (status != 0) 488 return (status); 489 mac_fini_ops(&vsw_ops); 490 ddi_soft_state_fini(&vsw_state); 491 492 rw_destroy(&vsw_rw); 493 494 return (status); 495 } 496 497 int 498 _info(struct modinfo *modinfop) 499 { 500 return (mod_info(&modlinkage, modinfop)); 501 } 502 503 static int 504 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 505 { 506 vsw_t *vswp; 507 int instance; 508 char hashname[MAXNAMELEN]; 509 char qname[TASKQ_NAMELEN]; 510 vsw_attach_progress_t progress = PROG_init; 511 int rv; 512 513 switch (cmd) { 514 case DDI_ATTACH: 515 break; 516 case DDI_RESUME: 517 /* nothing to do for this non-device */ 518 return (DDI_SUCCESS); 519 case DDI_PM_RESUME: 520 default: 521 return (DDI_FAILURE); 522 } 523 524 instance = ddi_get_instance(dip); 525 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 526 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 527 return (DDI_FAILURE); 528 } 529 vswp = ddi_get_soft_state(vsw_state, instance); 530 531 if (vswp == NULL) { 532 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 533 goto vsw_attach_fail; 534 } 535 536 vswp->dip = dip; 537 vswp->instance = instance; 538 ddi_set_driver_private(dip, (caddr_t)vswp); 539 540 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 541 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 542 mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL); 543 cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL); 544 rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL); 545 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 546 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 547 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 548 549 progress |= PROG_locks; 550 551 rv = vsw_read_mdprops(vswp); 552 if (rv != 0) 553 goto vsw_attach_fail; 554 555 progress |= PROG_readmd; 556 557 /* setup the unicast forwarding database */ 558 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 559 vswp->instance); 560 D2(vswp, "creating unicast hash table (%s)...", hashname); 561 vswp->fdb_nchains = vsw_fdb_nchains; 562 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 563 mod_hash_null_valdtor, sizeof (void *)); 564 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 565 progress |= PROG_fdb; 566 567 /* setup the multicast fowarding database */ 568 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 569 vswp->instance); 570 D2(vswp, "creating multicast hash table %s)...", hashname); 571 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 572 mod_hash_null_valdtor, sizeof (void *)); 573 574 progress |= PROG_mfdb; 575 576 /* 577 * Create the taskq which will process all the VIO 578 * control messages. 579 */ 580 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 581 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 582 TASKQ_DEFAULTPRI, 0)) == NULL) { 583 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 584 vswp->instance); 585 goto vsw_attach_fail; 586 } 587 588 progress |= PROG_taskq; 589 590 /* prevent auto-detaching */ 591 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 592 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 593 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 594 "instance %u", DDI_NO_AUTODETACH, instance); 595 } 596 597 /* 598 * The null switching function is set to avoid panic until 599 * switch mode is setup. 600 */ 601 vswp->vsw_switch_frame = vsw_switch_frame_nop; 602 603 /* 604 * Setup the required switching mode, based on the mdprops that we read 605 * earlier. We start a thread to do this, to avoid calling mac_open() 606 * directly from attach(). 607 */ 608 rv = vsw_setup_switching_start(vswp); 609 if (rv != 0) { 610 goto vsw_attach_fail; 611 } 612 613 progress |= PROG_swmode; 614 615 /* Register with mac layer as a provider */ 616 rv = vsw_mac_register(vswp); 617 if (rv != 0) 618 goto vsw_attach_fail; 619 620 progress |= PROG_macreg; 621 622 /* 623 * Now we have everything setup, register an interest in 624 * specific MD nodes. 625 * 626 * The callback is invoked in 2 cases, firstly if upon mdeg 627 * registration there are existing nodes which match our specified 628 * criteria, and secondly if the MD is changed (and again, there 629 * are nodes which we are interested in present within it. Note 630 * that our callback will be invoked even if our specified nodes 631 * have not actually changed). 632 * 633 */ 634 rv = vsw_mdeg_register(vswp); 635 if (rv != 0) 636 goto vsw_attach_fail; 637 638 progress |= PROG_mdreg; 639 640 vswp->attach_progress = progress; 641 642 WRITE_ENTER(&vsw_rw); 643 vswp->next = vsw_head; 644 vsw_head = vswp; 645 RW_EXIT(&vsw_rw); 646 647 ddi_report_dev(vswp->dip); 648 return (DDI_SUCCESS); 649 650 vsw_attach_fail: 651 DERR(NULL, "vsw_attach: failed"); 652 653 vswp->attach_progress = progress; 654 (void) vsw_unattach(vswp); 655 ddi_soft_state_free(vsw_state, instance); 656 return (DDI_FAILURE); 657 } 658 659 static int 660 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 661 { 662 vsw_t **vswpp, *vswp; 663 int instance; 664 665 instance = ddi_get_instance(dip); 666 vswp = ddi_get_soft_state(vsw_state, instance); 667 668 if (vswp == NULL) { 669 return (DDI_FAILURE); 670 } 671 672 switch (cmd) { 673 case DDI_DETACH: 674 break; 675 case DDI_SUSPEND: 676 case DDI_PM_SUSPEND: 677 default: 678 return (DDI_FAILURE); 679 } 680 681 D2(vswp, "detaching instance %d", instance); 682 683 if (vsw_unattach(vswp) != 0) { 684 return (DDI_FAILURE); 685 } 686 687 ddi_remove_minor_node(dip, NULL); 688 689 WRITE_ENTER(&vsw_rw); 690 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 691 if (*vswpp == vswp) { 692 *vswpp = vswp->next; 693 break; 694 } 695 } 696 RW_EXIT(&vsw_rw); 697 698 ddi_soft_state_free(vsw_state, instance); 699 700 return (DDI_SUCCESS); 701 } 702 703 /* 704 * Common routine to handle vsw_attach() failure and vsw_detach(). Note that 705 * the only reason this function could fail is if mac_unregister() fails. 706 * Otherwise, this function must ensure that all resources are freed and return 707 * success. 708 */ 709 static int 710 vsw_unattach(vsw_t *vswp) 711 { 712 vio_mblk_pool_t *poolp, *npoolp; 713 vsw_attach_progress_t progress; 714 715 progress = vswp->attach_progress; 716 717 /* 718 * Unregister from the gldv3 subsystem. This can fail, in particular 719 * if there are still any open references to this mac device; in which 720 * case we just return failure without continuing to detach further. 721 */ 722 if (progress & PROG_macreg) { 723 if (vsw_mac_unregister(vswp) != 0) { 724 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 725 "MAC layer", vswp->instance); 726 return (1); 727 } 728 progress &= ~PROG_macreg; 729 } 730 731 /* 732 * Now that we have unregistered from gldv3, we must finish all other 733 * steps and successfully return from this function; otherwise we will 734 * end up leaving the device in a broken/unusable state. 735 * 736 * If we have registered with mdeg, unregister now to stop further 737 * callbacks to this vsw device and/or its ports. Then, detach any 738 * existing ports. 739 */ 740 if (progress & PROG_mdreg) { 741 vsw_mdeg_unregister(vswp); 742 vsw_detach_ports(vswp); 743 744 /* 745 * At this point, we attempt to free receive mblk pools that 746 * couldn't be destroyed when the ports were detached; if this 747 * attempt also fails, we hook up the pool(s) to the module so 748 * they can be cleaned up in _fini(). 749 */ 750 poolp = vswp->rxh; 751 while (poolp != NULL) { 752 npoolp = vswp->rxh = poolp->nextp; 753 if (vio_destroy_mblks(poolp) != 0) { 754 WRITE_ENTER(&vsw_rw); 755 poolp->nextp = vsw_rx_poolp; 756 vsw_rx_poolp = poolp; 757 RW_EXIT(&vsw_rw); 758 } 759 poolp = npoolp; 760 } 761 progress &= ~PROG_mdreg; 762 } 763 764 /* 765 * If we have started a thread to setup the switching mode, stop it, if 766 * it is still running. If it has finished setting up the switching 767 * mode, then we need to clean up some additional things if we are 768 * running in L2 mode: first free up any hybrid resources; then stop 769 * and close the underlying physical device. Note that we would have 770 * already released all per mac_client resources (ucast, mcast addrs, 771 * hio-shares etc) as all the ports are detached and if the vsw device 772 * itself was in use as an interface, it has been unplumbed (otherwise 773 * mac_unregister() above would fail). 774 */ 775 if (progress & PROG_swmode) { 776 777 vsw_setup_switching_stop(vswp); 778 779 if (vswp->hio_capable == B_TRUE) { 780 vsw_hio_cleanup(vswp); 781 vswp->hio_capable = B_FALSE; 782 } 783 784 mutex_enter(&vswp->mac_lock); 785 vsw_mac_close(vswp); 786 mutex_exit(&vswp->mac_lock); 787 788 progress &= ~PROG_swmode; 789 } 790 791 /* 792 * By now any pending tasks have finished and the underlying 793 * ldc's have been destroyed, so its safe to delete the control 794 * message taskq. 795 */ 796 if (progress & PROG_taskq) { 797 ddi_taskq_destroy(vswp->taskq_p); 798 progress &= ~PROG_taskq; 799 } 800 801 /* Destroy the multicast hash table */ 802 if (progress & PROG_mfdb) { 803 mod_hash_destroy_hash(vswp->mfdb); 804 progress &= ~PROG_mfdb; 805 } 806 807 /* Destroy the vlan hash table and fdb */ 808 if (progress & PROG_fdb) { 809 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 810 mod_hash_destroy_hash(vswp->fdb_hashp); 811 progress &= ~PROG_fdb; 812 } 813 814 if (progress & PROG_readmd) { 815 if (VSW_PRI_ETH_DEFINED(vswp)) { 816 kmem_free(vswp->pri_types, 817 sizeof (uint16_t) * vswp->pri_num_types); 818 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 819 } 820 progress &= ~PROG_readmd; 821 } 822 823 if (progress & PROG_locks) { 824 rw_destroy(&vswp->plist.lockrw); 825 rw_destroy(&vswp->mfdbrw); 826 rw_destroy(&vswp->if_lockrw); 827 rw_destroy(&vswp->maccl_rwlock); 828 cv_destroy(&vswp->sw_thr_cv); 829 mutex_destroy(&vswp->sw_thr_lock); 830 mutex_destroy(&vswp->mca_lock); 831 mutex_destroy(&vswp->mac_lock); 832 progress &= ~PROG_locks; 833 } 834 835 vswp->attach_progress = progress; 836 837 return (0); 838 } 839 840 /* 841 * one time cleanup. 842 */ 843 static int 844 vsw_mod_cleanup(void) 845 { 846 vio_mblk_pool_t *poolp, *npoolp; 847 848 /* 849 * If any rx mblk pools are still in use, return 850 * error and stop the module from unloading. 851 */ 852 WRITE_ENTER(&vsw_rw); 853 poolp = vsw_rx_poolp; 854 while (poolp != NULL) { 855 npoolp = vsw_rx_poolp = poolp->nextp; 856 if (vio_destroy_mblks(poolp) != 0) { 857 vsw_rx_poolp = poolp; 858 RW_EXIT(&vsw_rw); 859 return (EBUSY); 860 } 861 poolp = npoolp; 862 } 863 RW_EXIT(&vsw_rw); 864 865 return (0); 866 } 867 868 /* 869 * Get the value of the "vsw-phys-dev" property in the specified 870 * node. This property is the name of the physical device that 871 * the virtual switch will use to talk to the outside world. 872 * 873 * Note it is valid for this property to be NULL (but the property 874 * itself must exist). Callers of this routine should verify that 875 * the value returned is what they expected (i.e. either NULL or non NULL). 876 * 877 * On success returns value of the property in region pointed to by 878 * the 'name' argument, and with return value of 0. Otherwise returns 1. 879 */ 880 static int 881 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 882 { 883 int len = 0; 884 int instance; 885 char *physname = NULL; 886 char *dev; 887 const char *dev_name; 888 char myname[MAXNAMELEN]; 889 890 dev_name = ddi_driver_name(vswp->dip); 891 instance = ddi_get_instance(vswp->dip); 892 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 893 894 if (md_get_prop_data(mdp, node, physdev_propname, 895 (uint8_t **)(&physname), &len) != 0) { 896 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 897 "device(s) from MD", vswp->instance); 898 return (1); 899 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 900 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 901 vswp->instance, physname); 902 return (1); 903 } else if (strcmp(myname, physname) == 0) { 904 /* 905 * Prevent the vswitch from opening itself as the 906 * network device. 907 */ 908 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 909 vswp->instance, physname); 910 return (1); 911 } else { 912 (void) strncpy(name, physname, strlen(physname) + 1); 913 D2(vswp, "%s: using first device specified (%s)", 914 __func__, physname); 915 } 916 917 #ifdef DEBUG 918 /* 919 * As a temporary measure to aid testing we check to see if there 920 * is a vsw.conf file present. If there is we use the value of the 921 * vsw_physname property in the file as the name of the physical 922 * device, overriding the value from the MD. 923 * 924 * There may be multiple devices listed, but for the moment 925 * we just use the first one. 926 */ 927 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 928 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 929 if ((strlen(dev) + 1) > LIFNAMSIZ) { 930 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 931 vswp->instance, dev); 932 ddi_prop_free(dev); 933 return (1); 934 } else { 935 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 936 "config file", vswp->instance, dev); 937 938 (void) strncpy(name, dev, strlen(dev) + 1); 939 } 940 941 ddi_prop_free(dev); 942 } 943 #endif 944 945 return (0); 946 } 947 948 /* 949 * Read the 'vsw-switch-mode' property from the specified MD node. 950 * 951 * Returns 0 on success, otherwise returns 1. 952 */ 953 static int 954 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode) 955 { 956 int len = 0; 957 char *smode = NULL; 958 char *curr_mode = NULL; 959 960 D1(vswp, "%s: enter", __func__); 961 962 /* 963 * Get the switch-mode property. The modes are listed in 964 * decreasing order of preference, i.e. prefered mode is 965 * first item in list. 966 */ 967 len = 0; 968 if (md_get_prop_data(mdp, node, smode_propname, 969 (uint8_t **)(&smode), &len) != 0) { 970 /* 971 * Unable to get switch-mode property from MD, nothing 972 * more we can do. 973 */ 974 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 975 " from the MD", vswp->instance); 976 return (1); 977 } 978 979 curr_mode = smode; 980 /* 981 * Modes of operation: 982 * 'switched' - layer 2 switching, underlying HW in 983 * programmed mode. 984 * 'promiscuous' - layer 2 switching, underlying HW in 985 * promiscuous mode. 986 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 987 * in non-promiscuous mode. 988 */ 989 while (curr_mode < (smode + len)) { 990 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 991 if (strcmp(curr_mode, "switched") == 0) { 992 *mode = VSW_LAYER2; 993 } else if (strcmp(curr_mode, "promiscuous") == 0) { 994 *mode = VSW_LAYER2 | VSW_LAYER2_PROMISC; 995 } else if (strcmp(curr_mode, "routed") == 0) { 996 *mode = VSW_LAYER3; 997 } else { 998 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 999 "setting to default switched mode", 1000 vswp->instance, curr_mode); 1001 *mode = VSW_LAYER2; 1002 } 1003 curr_mode += strlen(curr_mode) + 1; 1004 } 1005 1006 D2(vswp, "%s: %d mode", __func__, *mode); 1007 1008 D1(vswp, "%s: exit", __func__); 1009 1010 return (0); 1011 } 1012 1013 /* 1014 * Register with the MAC layer as a network device, so we 1015 * can be plumbed if necessary. 1016 */ 1017 static int 1018 vsw_mac_register(vsw_t *vswp) 1019 { 1020 mac_register_t *macp; 1021 int rv; 1022 1023 D1(vswp, "%s: enter", __func__); 1024 1025 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1026 return (EINVAL); 1027 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1028 macp->m_driver = vswp; 1029 macp->m_dip = vswp->dip; 1030 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1031 macp->m_callbacks = &vsw_m_callbacks; 1032 macp->m_min_sdu = 0; 1033 macp->m_max_sdu = vswp->mtu; 1034 macp->m_margin = VLAN_TAGSZ; 1035 rv = mac_register(macp, &vswp->if_mh); 1036 mac_free(macp); 1037 if (rv != 0) { 1038 /* 1039 * Treat this as a non-fatal error as we may be 1040 * able to operate in some other mode. 1041 */ 1042 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1043 "a provider with MAC layer", vswp->instance); 1044 return (rv); 1045 } 1046 1047 vswp->if_state |= VSW_IF_REG; 1048 1049 D1(vswp, "%s: exit", __func__); 1050 1051 return (rv); 1052 } 1053 1054 static int 1055 vsw_mac_unregister(vsw_t *vswp) 1056 { 1057 int rv = 0; 1058 1059 D1(vswp, "%s: enter", __func__); 1060 1061 WRITE_ENTER(&vswp->if_lockrw); 1062 1063 if (vswp->if_state & VSW_IF_REG) { 1064 rv = mac_unregister(vswp->if_mh); 1065 if (rv != 0) { 1066 DWARN(vswp, "%s: unable to unregister from MAC " 1067 "framework", __func__); 1068 1069 RW_EXIT(&vswp->if_lockrw); 1070 D1(vswp, "%s: fail exit", __func__); 1071 return (rv); 1072 } 1073 1074 /* mark i/f as down and unregistered */ 1075 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1076 } 1077 RW_EXIT(&vswp->if_lockrw); 1078 1079 D1(vswp, "%s: exit", __func__); 1080 1081 return (rv); 1082 } 1083 1084 static int 1085 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1086 { 1087 vsw_t *vswp = (vsw_t *)arg; 1088 1089 D1(vswp, "%s: enter", __func__); 1090 1091 mutex_enter(&vswp->mac_lock); 1092 if (vswp->mh == NULL) { 1093 mutex_exit(&vswp->mac_lock); 1094 return (EINVAL); 1095 } 1096 1097 /* return stats from underlying device */ 1098 *val = mac_stat_get(vswp->mh, stat); 1099 1100 mutex_exit(&vswp->mac_lock); 1101 1102 return (0); 1103 } 1104 1105 static void 1106 vsw_m_stop(void *arg) 1107 { 1108 vsw_t *vswp = (vsw_t *)arg; 1109 1110 D1(vswp, "%s: enter", __func__); 1111 1112 WRITE_ENTER(&vswp->if_lockrw); 1113 vswp->if_state &= ~VSW_IF_UP; 1114 RW_EXIT(&vswp->if_lockrw); 1115 1116 /* Cleanup and close the mac client */ 1117 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); 1118 1119 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1120 } 1121 1122 static int 1123 vsw_m_start(void *arg) 1124 { 1125 int rv; 1126 vsw_t *vswp = (vsw_t *)arg; 1127 1128 D1(vswp, "%s: enter", __func__); 1129 1130 WRITE_ENTER(&vswp->if_lockrw); 1131 1132 vswp->if_state |= VSW_IF_UP; 1133 1134 if (vswp->switching_setup_done == B_FALSE) { 1135 /* 1136 * If the switching mode has not been setup yet, just 1137 * return. The unicast address will be programmed 1138 * after the physical device is successfully setup by the 1139 * timeout handler. 1140 */ 1141 RW_EXIT(&vswp->if_lockrw); 1142 return (0); 1143 } 1144 1145 /* if in layer2 mode, program unicast address. */ 1146 if (vswp->mh != NULL) { 1147 /* Init a mac client and program addresses */ 1148 rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV); 1149 if (rv != 0) { 1150 cmn_err(CE_NOTE, 1151 "!vsw%d: failed to program interface " 1152 "unicast address\n", vswp->instance); 1153 } 1154 } 1155 1156 RW_EXIT(&vswp->if_lockrw); 1157 1158 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1159 return (0); 1160 } 1161 1162 /* 1163 * Change the local interface address. 1164 * 1165 * Note: we don't support this entry point. The local 1166 * mac address of the switch can only be changed via its 1167 * MD node properties. 1168 */ 1169 static int 1170 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1171 { 1172 _NOTE(ARGUNUSED(arg, macaddr)) 1173 1174 return (DDI_FAILURE); 1175 } 1176 1177 static int 1178 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1179 { 1180 vsw_t *vswp = (vsw_t *)arg; 1181 mcst_addr_t *mcst_p = NULL; 1182 uint64_t addr = 0x0; 1183 int i, ret = 0; 1184 1185 D1(vswp, "%s: enter", __func__); 1186 1187 /* 1188 * Convert address into form that can be used 1189 * as hash table key. 1190 */ 1191 for (i = 0; i < ETHERADDRL; i++) { 1192 addr = (addr << 8) | mca[i]; 1193 } 1194 1195 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1196 1197 if (add) { 1198 D2(vswp, "%s: adding multicast", __func__); 1199 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1200 /* 1201 * Update the list of multicast addresses 1202 * contained within the vsw_t structure to 1203 * include this new one. 1204 */ 1205 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1206 if (mcst_p == NULL) { 1207 DERR(vswp, "%s unable to alloc mem", __func__); 1208 (void) vsw_del_mcst(vswp, 1209 VSW_LOCALDEV, addr, NULL); 1210 return (1); 1211 } 1212 mcst_p->addr = addr; 1213 ether_copy(mca, &mcst_p->mca); 1214 1215 /* 1216 * Call into the underlying driver to program the 1217 * address into HW. 1218 */ 1219 ret = vsw_mac_multicast_add(vswp, NULL, mcst_p, 1220 VSW_LOCALDEV); 1221 if (ret != 0) { 1222 (void) vsw_del_mcst(vswp, 1223 VSW_LOCALDEV, addr, NULL); 1224 kmem_free(mcst_p, sizeof (*mcst_p)); 1225 return (ret); 1226 } 1227 1228 mutex_enter(&vswp->mca_lock); 1229 mcst_p->nextp = vswp->mcap; 1230 vswp->mcap = mcst_p; 1231 mutex_exit(&vswp->mca_lock); 1232 } else { 1233 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 1234 "address", vswp->instance); 1235 } 1236 return (ret); 1237 } 1238 1239 D2(vswp, "%s: removing multicast", __func__); 1240 /* 1241 * Remove the address from the hash table.. 1242 */ 1243 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1244 1245 /* 1246 * ..and then from the list maintained in the 1247 * vsw_t structure. 1248 */ 1249 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1250 ASSERT(mcst_p != NULL); 1251 1252 vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV); 1253 kmem_free(mcst_p, sizeof (*mcst_p)); 1254 } 1255 1256 D1(vswp, "%s: exit", __func__); 1257 1258 return (0); 1259 } 1260 1261 static int 1262 vsw_m_promisc(void *arg, boolean_t on) 1263 { 1264 vsw_t *vswp = (vsw_t *)arg; 1265 1266 D1(vswp, "%s: enter", __func__); 1267 1268 WRITE_ENTER(&vswp->if_lockrw); 1269 if (on) 1270 vswp->if_state |= VSW_IF_PROMISC; 1271 else 1272 vswp->if_state &= ~VSW_IF_PROMISC; 1273 RW_EXIT(&vswp->if_lockrw); 1274 1275 D1(vswp, "%s: exit", __func__); 1276 1277 return (0); 1278 } 1279 1280 static mblk_t * 1281 vsw_m_tx(void *arg, mblk_t *mp) 1282 { 1283 vsw_t *vswp = (vsw_t *)arg; 1284 1285 D1(vswp, "%s: enter", __func__); 1286 1287 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1288 1289 if (mp == NULL) { 1290 return (NULL); 1291 } 1292 1293 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1294 1295 D1(vswp, "%s: exit", __func__); 1296 1297 return (NULL); 1298 } 1299 1300 /* 1301 * Register for machine description (MD) updates. 1302 * 1303 * Returns 0 on success, 1 on failure. 1304 */ 1305 static int 1306 vsw_mdeg_register(vsw_t *vswp) 1307 { 1308 mdeg_prop_spec_t *pspecp; 1309 mdeg_node_spec_t *inst_specp; 1310 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1311 size_t templatesz; 1312 int rv; 1313 1314 D1(vswp, "%s: enter", __func__); 1315 1316 /* 1317 * Allocate and initialize a per-instance copy 1318 * of the global property spec array that will 1319 * uniquely identify this vsw instance. 1320 */ 1321 templatesz = sizeof (vsw_prop_template); 1322 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1323 1324 bcopy(vsw_prop_template, pspecp, templatesz); 1325 1326 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1327 1328 /* initialize the complete prop spec structure */ 1329 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1330 inst_specp->namep = "virtual-device"; 1331 inst_specp->specp = pspecp; 1332 1333 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1334 vswp->regprop); 1335 /* 1336 * Register an interest in 'virtual-device' nodes with a 1337 * 'name' property of 'virtual-network-switch' 1338 */ 1339 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1340 (void *)vswp, &mdeg_hdl); 1341 if (rv != MDEG_SUCCESS) { 1342 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1343 __func__, rv); 1344 goto mdeg_reg_fail; 1345 } 1346 1347 /* 1348 * Register an interest in 'vsw-port' nodes. 1349 */ 1350 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1351 (void *)vswp, &mdeg_port_hdl); 1352 if (rv != MDEG_SUCCESS) { 1353 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1354 (void) mdeg_unregister(mdeg_hdl); 1355 goto mdeg_reg_fail; 1356 } 1357 1358 /* save off data that will be needed later */ 1359 vswp->inst_spec = inst_specp; 1360 vswp->mdeg_hdl = mdeg_hdl; 1361 vswp->mdeg_port_hdl = mdeg_port_hdl; 1362 1363 D1(vswp, "%s: exit", __func__); 1364 return (0); 1365 1366 mdeg_reg_fail: 1367 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1368 vswp->instance); 1369 kmem_free(pspecp, templatesz); 1370 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1371 1372 vswp->mdeg_hdl = NULL; 1373 vswp->mdeg_port_hdl = NULL; 1374 1375 return (1); 1376 } 1377 1378 static void 1379 vsw_mdeg_unregister(vsw_t *vswp) 1380 { 1381 D1(vswp, "vsw_mdeg_unregister: enter"); 1382 1383 if (vswp->mdeg_hdl != NULL) 1384 (void) mdeg_unregister(vswp->mdeg_hdl); 1385 1386 if (vswp->mdeg_port_hdl != NULL) 1387 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1388 1389 if (vswp->inst_spec != NULL) { 1390 if (vswp->inst_spec->specp != NULL) { 1391 (void) kmem_free(vswp->inst_spec->specp, 1392 sizeof (vsw_prop_template)); 1393 vswp->inst_spec->specp = NULL; 1394 } 1395 1396 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1397 vswp->inst_spec = NULL; 1398 } 1399 1400 D1(vswp, "vsw_mdeg_unregister: exit"); 1401 } 1402 1403 /* 1404 * Mdeg callback invoked for the vsw node itself. 1405 */ 1406 static int 1407 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1408 { 1409 vsw_t *vswp; 1410 md_t *mdp; 1411 mde_cookie_t node; 1412 uint64_t inst; 1413 char *node_name = NULL; 1414 1415 if (resp == NULL) 1416 return (MDEG_FAILURE); 1417 1418 vswp = (vsw_t *)cb_argp; 1419 1420 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1421 " : prev matched %d", __func__, resp->added.nelem, 1422 resp->removed.nelem, resp->match_curr.nelem, 1423 resp->match_prev.nelem); 1424 1425 /* 1426 * We get an initial callback for this node as 'added' 1427 * after registering with mdeg. Note that we would have 1428 * already gathered information about this vsw node by 1429 * walking MD earlier during attach (in vsw_read_mdprops()). 1430 * So, there is a window where the properties of this 1431 * node might have changed when we get this initial 'added' 1432 * callback. We handle this as if an update occured 1433 * and invoke the same function which handles updates to 1434 * the properties of this vsw-node if any. 1435 * 1436 * A non-zero 'match' value indicates that the MD has been 1437 * updated and that a virtual-network-switch node is 1438 * present which may or may not have been updated. It is 1439 * up to the clients to examine their own nodes and 1440 * determine if they have changed. 1441 */ 1442 if (resp->added.nelem != 0) { 1443 1444 if (resp->added.nelem != 1) { 1445 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1446 "invalid: %d\n", vswp->instance, resp->added.nelem); 1447 return (MDEG_FAILURE); 1448 } 1449 1450 mdp = resp->added.mdp; 1451 node = resp->added.mdep[0]; 1452 1453 } else if (resp->match_curr.nelem != 0) { 1454 1455 if (resp->match_curr.nelem != 1) { 1456 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1457 "invalid: %d\n", vswp->instance, 1458 resp->match_curr.nelem); 1459 return (MDEG_FAILURE); 1460 } 1461 1462 mdp = resp->match_curr.mdp; 1463 node = resp->match_curr.mdep[0]; 1464 1465 } else { 1466 return (MDEG_FAILURE); 1467 } 1468 1469 /* Validate name and instance */ 1470 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1471 DERR(vswp, "%s: unable to get node name\n", __func__); 1472 return (MDEG_FAILURE); 1473 } 1474 1475 /* is this a virtual-network-switch? */ 1476 if (strcmp(node_name, vsw_propname) != 0) { 1477 DERR(vswp, "%s: Invalid node name: %s\n", 1478 __func__, node_name); 1479 return (MDEG_FAILURE); 1480 } 1481 1482 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1483 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1484 __func__); 1485 return (MDEG_FAILURE); 1486 } 1487 1488 /* is this the right instance of vsw? */ 1489 if (inst != vswp->regprop) { 1490 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1491 __func__, inst); 1492 return (MDEG_FAILURE); 1493 } 1494 1495 vsw_update_md_prop(vswp, mdp, node); 1496 1497 return (MDEG_SUCCESS); 1498 } 1499 1500 /* 1501 * Mdeg callback invoked for changes to the vsw-port nodes 1502 * under the vsw node. 1503 */ 1504 static int 1505 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1506 { 1507 vsw_t *vswp; 1508 int idx; 1509 md_t *mdp; 1510 mde_cookie_t node; 1511 uint64_t inst; 1512 int rv; 1513 1514 if ((resp == NULL) || (cb_argp == NULL)) 1515 return (MDEG_FAILURE); 1516 1517 vswp = (vsw_t *)cb_argp; 1518 1519 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1520 " : prev matched %d", __func__, resp->added.nelem, 1521 resp->removed.nelem, resp->match_curr.nelem, 1522 resp->match_prev.nelem); 1523 1524 /* process added ports */ 1525 for (idx = 0; idx < resp->added.nelem; idx++) { 1526 mdp = resp->added.mdp; 1527 node = resp->added.mdep[idx]; 1528 1529 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1530 1531 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1532 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1533 "(0x%lx), err=%d", vswp->instance, node, rv); 1534 } 1535 } 1536 1537 /* process removed ports */ 1538 for (idx = 0; idx < resp->removed.nelem; idx++) { 1539 mdp = resp->removed.mdp; 1540 node = resp->removed.mdep[idx]; 1541 1542 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1543 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1544 __func__, id_propname, idx); 1545 continue; 1546 } 1547 1548 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1549 1550 if (vsw_port_detach(vswp, inst) != 0) { 1551 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1552 vswp->instance, inst); 1553 } 1554 } 1555 1556 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1557 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1558 resp->match_curr.mdep[idx], 1559 resp->match_prev.mdp, 1560 resp->match_prev.mdep[idx]); 1561 } 1562 1563 D1(vswp, "%s: exit", __func__); 1564 1565 return (MDEG_SUCCESS); 1566 } 1567 1568 /* 1569 * Scan the machine description for this instance of vsw 1570 * and read its properties. Called only from vsw_attach(). 1571 * Returns: 0 on success, 1 on failure. 1572 */ 1573 static int 1574 vsw_read_mdprops(vsw_t *vswp) 1575 { 1576 md_t *mdp = NULL; 1577 mde_cookie_t rootnode; 1578 mde_cookie_t *listp = NULL; 1579 uint64_t inst; 1580 uint64_t cfgh; 1581 char *name; 1582 int rv = 1; 1583 int num_nodes = 0; 1584 int num_devs = 0; 1585 int listsz = 0; 1586 int i; 1587 1588 /* 1589 * In each 'virtual-device' node in the MD there is a 1590 * 'cfg-handle' property which is the MD's concept of 1591 * an instance number (this may be completely different from 1592 * the device drivers instance #). OBP reads that value and 1593 * stores it in the 'reg' property of the appropriate node in 1594 * the device tree. We first read this reg property and use this 1595 * to compare against the 'cfg-handle' property of vsw nodes 1596 * in MD to get to this specific vsw instance and then read 1597 * other properties that we are interested in. 1598 * We also cache the value of 'reg' property and use it later 1599 * to register callbacks with mdeg (see vsw_mdeg_register()) 1600 */ 1601 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1602 DDI_PROP_DONTPASS, reg_propname, -1); 1603 if (inst == -1) { 1604 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1605 "OBP device tree", vswp->instance, reg_propname); 1606 return (rv); 1607 } 1608 1609 vswp->regprop = inst; 1610 1611 if ((mdp = md_get_handle()) == NULL) { 1612 DWARN(vswp, "%s: cannot init MD\n", __func__); 1613 return (rv); 1614 } 1615 1616 num_nodes = md_node_count(mdp); 1617 ASSERT(num_nodes > 0); 1618 1619 listsz = num_nodes * sizeof (mde_cookie_t); 1620 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1621 1622 rootnode = md_root_node(mdp); 1623 1624 /* search for all "virtual_device" nodes */ 1625 num_devs = md_scan_dag(mdp, rootnode, 1626 md_find_name(mdp, vdev_propname), 1627 md_find_name(mdp, "fwd"), listp); 1628 if (num_devs <= 0) { 1629 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1630 goto vsw_readmd_exit; 1631 } 1632 1633 /* 1634 * Now loop through the list of virtual-devices looking for 1635 * devices with name "virtual-network-switch" and for each 1636 * such device compare its instance with what we have from 1637 * the 'reg' property to find the right node in MD and then 1638 * read all its properties. 1639 */ 1640 for (i = 0; i < num_devs; i++) { 1641 1642 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1643 DWARN(vswp, "%s: name property not found\n", 1644 __func__); 1645 goto vsw_readmd_exit; 1646 } 1647 1648 /* is this a virtual-network-switch? */ 1649 if (strcmp(name, vsw_propname) != 0) 1650 continue; 1651 1652 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1653 DWARN(vswp, "%s: cfg-handle property not found\n", 1654 __func__); 1655 goto vsw_readmd_exit; 1656 } 1657 1658 /* is this the required instance of vsw? */ 1659 if (inst != cfgh) 1660 continue; 1661 1662 /* now read all properties of this vsw instance */ 1663 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1664 break; 1665 } 1666 1667 vsw_readmd_exit: 1668 1669 kmem_free(listp, listsz); 1670 (void) md_fini_handle(mdp); 1671 return (rv); 1672 } 1673 1674 /* 1675 * Read the initial start-of-day values from the specified MD node. 1676 */ 1677 static int 1678 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1679 { 1680 uint64_t macaddr = 0; 1681 1682 D1(vswp, "%s: enter", __func__); 1683 1684 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1685 return (1); 1686 } 1687 1688 /* mac address for vswitch device itself */ 1689 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1690 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1691 vswp->instance); 1692 return (1); 1693 } 1694 1695 vsw_save_lmacaddr(vswp, macaddr); 1696 1697 if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) { 1698 DWARN(vswp, "%s: Unable to read %s property from MD, " 1699 "defaulting to 'switched' mode", 1700 __func__, smode_propname); 1701 1702 vswp->smode = VSW_LAYER2; 1703 } 1704 1705 /* read mtu */ 1706 vsw_mtu_read(vswp, mdp, node, &vswp->mtu); 1707 if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) { 1708 vswp->mtu = ETHERMTU; 1709 } 1710 vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) + 1711 VLAN_TAGSZ; 1712 1713 /* read vlan id properties of this vsw instance */ 1714 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1715 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1716 1717 /* read priority-ether-types */ 1718 vsw_read_pri_eth_types(vswp, mdp, node); 1719 1720 D1(vswp, "%s: exit", __func__); 1721 return (0); 1722 } 1723 1724 /* 1725 * Read vlan id properties of the given MD node. 1726 * Arguments: 1727 * arg: device argument(vsw device or a port) 1728 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1729 * mdp: machine description 1730 * node: md node cookie 1731 * 1732 * Returns: 1733 * pvidp: port-vlan-id of the node 1734 * vidspp: list of vlan-ids of the node 1735 * nvidsp: # of vlan-ids in the list 1736 * default_idp: default-vlan-id of the node(if node is vsw device) 1737 */ 1738 static void 1739 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1740 uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp, 1741 uint16_t *default_idp) 1742 { 1743 vsw_t *vswp; 1744 vsw_port_t *portp; 1745 char *pvid_propname; 1746 char *vid_propname; 1747 uint_t nvids = 0; 1748 uint32_t vids_size; 1749 int rv; 1750 int i; 1751 uint64_t *data; 1752 uint64_t val; 1753 int size; 1754 int inst; 1755 1756 if (type == VSW_LOCALDEV) { 1757 1758 vswp = (vsw_t *)arg; 1759 pvid_propname = vsw_pvid_propname; 1760 vid_propname = vsw_vid_propname; 1761 inst = vswp->instance; 1762 1763 } else if (type == VSW_VNETPORT) { 1764 1765 portp = (vsw_port_t *)arg; 1766 vswp = portp->p_vswp; 1767 pvid_propname = port_pvid_propname; 1768 vid_propname = port_vid_propname; 1769 inst = portp->p_instance; 1770 1771 } else { 1772 return; 1773 } 1774 1775 if (type == VSW_LOCALDEV && default_idp != NULL) { 1776 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1777 if (rv != 0) { 1778 DWARN(vswp, "%s: prop(%s) not found", __func__, 1779 vsw_dvid_propname); 1780 1781 *default_idp = vsw_default_vlan_id; 1782 } else { 1783 *default_idp = val & 0xFFF; 1784 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1785 vsw_dvid_propname, inst, *default_idp); 1786 } 1787 } 1788 1789 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1790 if (rv != 0) { 1791 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1792 *pvidp = vsw_default_vlan_id; 1793 } else { 1794 1795 *pvidp = val & 0xFFF; 1796 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1797 pvid_propname, inst, *pvidp); 1798 } 1799 1800 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1801 &size); 1802 if (rv != 0) { 1803 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1804 size = 0; 1805 } else { 1806 size /= sizeof (uint64_t); 1807 } 1808 nvids = size; 1809 1810 if (nvids != 0) { 1811 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1812 vids_size = sizeof (vsw_vlanid_t) * nvids; 1813 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1814 for (i = 0; i < nvids; i++) { 1815 (*vidspp)[i].vl_vid = data[i] & 0xFFFF; 1816 (*vidspp)[i].vl_set = B_FALSE; 1817 D2(vswp, " %d ", (*vidspp)[i].vl_vid); 1818 } 1819 D2(vswp, "\n"); 1820 } 1821 1822 *nvidsp = nvids; 1823 } 1824 1825 /* 1826 * This function reads "priority-ether-types" property from md. This property 1827 * is used to enable support for priority frames. Applications which need 1828 * guaranteed and timely delivery of certain high priority frames to/from 1829 * a vnet or vsw within ldoms, should configure this property by providing 1830 * the ether type(s) for which the priority facility is needed. 1831 * Normal data frames are delivered over a ldc channel using the descriptor 1832 * ring mechanism which is constrained by factors such as descriptor ring size, 1833 * the rate at which the ring is processed at the peer ldc end point, etc. 1834 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1835 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1836 * descriptor ring path and enables a more reliable and timely delivery of 1837 * frames to the peer. 1838 */ 1839 static void 1840 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1841 { 1842 int rv; 1843 uint16_t *types; 1844 uint64_t *data; 1845 int size; 1846 int i; 1847 size_t mblk_sz; 1848 1849 rv = md_get_prop_data(mdp, node, pri_types_propname, 1850 (uint8_t **)&data, &size); 1851 if (rv != 0) { 1852 /* 1853 * Property may not exist if we are running pre-ldoms1.1 f/w. 1854 * Check if 'vsw_pri_eth_type' has been set in that case. 1855 */ 1856 if (vsw_pri_eth_type != 0) { 1857 size = sizeof (vsw_pri_eth_type); 1858 data = &vsw_pri_eth_type; 1859 } else { 1860 D3(vswp, "%s: prop(%s) not found", __func__, 1861 pri_types_propname); 1862 size = 0; 1863 } 1864 } 1865 1866 if (size == 0) { 1867 vswp->pri_num_types = 0; 1868 return; 1869 } 1870 1871 /* 1872 * we have some priority-ether-types defined; 1873 * allocate a table of these types and also 1874 * allocate a pool of mblks to transmit these 1875 * priority packets. 1876 */ 1877 size /= sizeof (uint64_t); 1878 vswp->pri_num_types = size; 1879 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1880 for (i = 0, types = vswp->pri_types; i < size; i++) { 1881 types[i] = data[i] & 0xFFFF; 1882 } 1883 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1884 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp); 1885 } 1886 1887 static void 1888 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) 1889 { 1890 int rv; 1891 int inst; 1892 uint64_t val; 1893 char *mtu_propname; 1894 1895 mtu_propname = vsw_mtu_propname; 1896 inst = vswp->instance; 1897 1898 rv = md_get_prop_val(mdp, node, mtu_propname, &val); 1899 if (rv != 0) { 1900 D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname); 1901 *mtu = vsw_ethermtu; 1902 } else { 1903 1904 *mtu = val & 0xFFFF; 1905 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1906 mtu_propname, inst, *mtu); 1907 } 1908 } 1909 1910 /* 1911 * Update the mtu of the vsw device. We first check if the device has been 1912 * plumbed and if so fail the mtu update. Otherwise, we continue to update the 1913 * new mtu and reset all ports to initiate handshake re-negotiation with peers 1914 * using the new mtu. 1915 */ 1916 static int 1917 vsw_mtu_update(vsw_t *vswp, uint32_t mtu) 1918 { 1919 int rv; 1920 1921 WRITE_ENTER(&vswp->if_lockrw); 1922 1923 if (vswp->if_state & VSW_IF_UP) { 1924 1925 RW_EXIT(&vswp->if_lockrw); 1926 1927 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 1928 " as the device is plumbed\n", vswp->instance); 1929 return (EBUSY); 1930 1931 } else { 1932 1933 D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n", 1934 __func__, vswp->mtu, mtu); 1935 1936 vswp->mtu = mtu; 1937 vswp->max_frame_size = vswp->mtu + 1938 sizeof (struct ether_header) + VLAN_TAGSZ; 1939 1940 rv = mac_maxsdu_update(vswp->if_mh, mtu); 1941 if (rv != 0) { 1942 cmn_err(CE_NOTE, 1943 "!vsw%d: Unable to update mtu with mac" 1944 " layer\n", vswp->instance); 1945 } 1946 1947 RW_EXIT(&vswp->if_lockrw); 1948 1949 /* Reset ports to renegotiate with the new mtu */ 1950 vsw_reset_ports(vswp); 1951 1952 } 1953 1954 return (0); 1955 } 1956 1957 /* 1958 * Check to see if the relevant properties in the specified node have 1959 * changed, and if so take the appropriate action. 1960 * 1961 * If any of the properties are missing or invalid we don't take 1962 * any action, as this function should only be invoked when modifications 1963 * have been made to what we assume is a working configuration, which 1964 * we leave active. 1965 * 1966 * Note it is legal for this routine to be invoked even if none of the 1967 * properties in the port node within the MD have actually changed. 1968 */ 1969 static void 1970 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1971 { 1972 char physname[LIFNAMSIZ]; 1973 char drv[LIFNAMSIZ]; 1974 uint_t ddi_instance; 1975 uint8_t new_smode; 1976 int i; 1977 uint64_t macaddr = 0; 1978 enum {MD_init = 0x1, 1979 MD_physname = 0x2, 1980 MD_macaddr = 0x4, 1981 MD_smode = 0x8, 1982 MD_vlans = 0x10, 1983 MD_mtu = 0x20} updated; 1984 int rv; 1985 uint16_t pvid; 1986 vsw_vlanid_t *vids; 1987 uint16_t nvids; 1988 uint32_t mtu; 1989 1990 updated = MD_init; 1991 1992 D1(vswp, "%s: enter", __func__); 1993 1994 /* 1995 * Check if name of physical device in MD has changed. 1996 */ 1997 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1998 /* 1999 * Do basic sanity check on new device name/instance, 2000 * if its non NULL. It is valid for the device name to 2001 * have changed from a non NULL to a NULL value, i.e. 2002 * the vsw is being changed to 'routed' mode. 2003 */ 2004 if ((strlen(physname) != 0) && 2005 (ddi_parse(physname, drv, 2006 &ddi_instance) != DDI_SUCCESS)) { 2007 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 2008 " a valid device name/instance", 2009 vswp->instance, physname); 2010 goto fail_reconf; 2011 } 2012 2013 if (strcmp(physname, vswp->physname)) { 2014 D2(vswp, "%s: device name changed from %s to %s", 2015 __func__, vswp->physname, physname); 2016 2017 updated |= MD_physname; 2018 } else { 2019 D2(vswp, "%s: device name unchanged at %s", 2020 __func__, vswp->physname); 2021 } 2022 } else { 2023 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 2024 "device from updated MD.", vswp->instance); 2025 goto fail_reconf; 2026 } 2027 2028 /* 2029 * Check if MAC address has changed. 2030 */ 2031 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2032 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2033 vswp->instance); 2034 goto fail_reconf; 2035 } else { 2036 uint64_t maddr = macaddr; 2037 READ_ENTER(&vswp->if_lockrw); 2038 for (i = ETHERADDRL - 1; i >= 0; i--) { 2039 if (vswp->if_addr.ether_addr_octet[i] 2040 != (macaddr & 0xFF)) { 2041 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 2042 __func__, i, 2043 vswp->if_addr.ether_addr_octet[i], 2044 (macaddr & 0xFF)); 2045 updated |= MD_macaddr; 2046 macaddr = maddr; 2047 break; 2048 } 2049 macaddr >>= 8; 2050 } 2051 RW_EXIT(&vswp->if_lockrw); 2052 if (updated & MD_macaddr) { 2053 vsw_save_lmacaddr(vswp, macaddr); 2054 } 2055 } 2056 2057 /* 2058 * Check if switching modes have changed. 2059 */ 2060 if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) { 2061 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2062 vswp->instance, smode_propname); 2063 goto fail_reconf; 2064 } else { 2065 if (new_smode != vswp->smode) { 2066 D2(vswp, "%s: switching mode changed from %d to %d", 2067 __func__, vswp->smode, new_smode); 2068 2069 updated |= MD_smode; 2070 } 2071 } 2072 2073 /* Read the vlan ids */ 2074 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2075 &nvids, NULL); 2076 2077 /* Determine if there are any vlan id updates */ 2078 if ((pvid != vswp->pvid) || /* pvid changed? */ 2079 (nvids != vswp->nvids) || /* # of vids changed? */ 2080 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2081 !vsw_cmp_vids(vids, vswp->vids, nvids))) { 2082 updated |= MD_vlans; 2083 } 2084 2085 /* Read mtu */ 2086 vsw_mtu_read(vswp, mdp, node, &mtu); 2087 if (mtu != vswp->mtu) { 2088 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { 2089 updated |= MD_mtu; 2090 } else { 2091 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2092 " as the specified value:%d is invalid\n", 2093 vswp->instance, mtu); 2094 } 2095 } 2096 2097 /* 2098 * Now make any changes which are needed... 2099 */ 2100 2101 if (updated & (MD_physname | MD_smode | MD_mtu)) { 2102 2103 /* 2104 * Stop any pending thread to setup switching mode. 2105 */ 2106 vsw_setup_switching_stop(vswp); 2107 2108 /* Cleanup HybridIO */ 2109 vsw_hio_cleanup(vswp); 2110 2111 /* 2112 * Remove unicst, mcst addrs of vsw interface 2113 * and ports from the physdev. This also closes 2114 * the corresponding mac clients. 2115 */ 2116 vsw_unset_addrs(vswp); 2117 2118 /* 2119 * Stop, detach and close the old device.. 2120 */ 2121 mutex_enter(&vswp->mac_lock); 2122 vsw_mac_close(vswp); 2123 mutex_exit(&vswp->mac_lock); 2124 2125 /* 2126 * Update phys name. 2127 */ 2128 if (updated & MD_physname) { 2129 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2130 vswp->instance, vswp->physname, physname); 2131 (void) strncpy(vswp->physname, 2132 physname, strlen(physname) + 1); 2133 } 2134 2135 /* 2136 * Update array with the new switch mode values. 2137 */ 2138 if (updated & MD_smode) { 2139 vswp->smode = new_smode; 2140 } 2141 2142 /* Update mtu */ 2143 if (updated & MD_mtu) { 2144 rv = vsw_mtu_update(vswp, mtu); 2145 if (rv != 0) { 2146 goto fail_update; 2147 } 2148 } 2149 2150 /* 2151 * ..and attach, start the new device. 2152 */ 2153 rv = vsw_setup_switching(vswp); 2154 if (rv == EAGAIN) { 2155 /* 2156 * Unable to setup switching mode. 2157 * As the error is EAGAIN, schedule a thread to retry 2158 * and return. Programming addresses of ports and 2159 * vsw interface will be done by the thread when the 2160 * switching setup completes successfully. 2161 */ 2162 if (vsw_setup_switching_start(vswp) != 0) { 2163 goto fail_update; 2164 } 2165 return; 2166 2167 } else if (rv) { 2168 goto fail_update; 2169 } 2170 2171 vsw_setup_layer2_post_process(vswp); 2172 } else if (updated & MD_macaddr) { 2173 /* 2174 * We enter here if only MD_macaddr is exclusively updated. 2175 * If MD_physname and/or MD_smode are also updated, then 2176 * as part of that, we would have implicitly processed 2177 * MD_macaddr update (above). 2178 */ 2179 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2180 vswp->instance, macaddr); 2181 2182 READ_ENTER(&vswp->if_lockrw); 2183 if (vswp->if_state & VSW_IF_UP) { 2184 /* reconfigure with new address */ 2185 vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0); 2186 2187 /* 2188 * Notify the MAC layer of the changed address. 2189 */ 2190 mac_unicst_update(vswp->if_mh, 2191 (uint8_t *)&vswp->if_addr); 2192 2193 } 2194 RW_EXIT(&vswp->if_lockrw); 2195 2196 } 2197 2198 if (updated & MD_vlans) { 2199 /* Remove existing vlan ids from the hash table. */ 2200 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2201 2202 if (vswp->if_state & VSW_IF_UP) { 2203 vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids); 2204 } else { 2205 if (vswp->nvids != 0) { 2206 kmem_free(vswp->vids, 2207 sizeof (vsw_vlanid_t) * vswp->nvids); 2208 } 2209 vswp->vids = vids; 2210 vswp->nvids = nvids; 2211 vswp->pvid = pvid; 2212 } 2213 2214 /* add these new vlan ids into hash table */ 2215 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2216 } else { 2217 if (nvids != 0) { 2218 kmem_free(vids, sizeof (vsw_vlanid_t) * nvids); 2219 } 2220 } 2221 2222 return; 2223 2224 fail_reconf: 2225 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2226 return; 2227 2228 fail_update: 2229 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2230 vswp->instance); 2231 } 2232 2233 /* 2234 * Read the port's md properties. 2235 */ 2236 static int 2237 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2238 md_t *mdp, mde_cookie_t *node) 2239 { 2240 uint64_t ldc_id; 2241 uint8_t *addrp; 2242 int i, addrsz; 2243 int num_nodes = 0, nchan = 0; 2244 int listsz = 0; 2245 mde_cookie_t *listp = NULL; 2246 struct ether_addr ea; 2247 uint64_t macaddr; 2248 uint64_t inst = 0; 2249 uint64_t val; 2250 2251 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2252 DWARN(vswp, "%s: prop(%s) not found", __func__, 2253 id_propname); 2254 return (1); 2255 } 2256 2257 /* 2258 * Find the channel endpoint node(s) (which should be under this 2259 * port node) which contain the channel id(s). 2260 */ 2261 if ((num_nodes = md_node_count(mdp)) <= 0) { 2262 DERR(vswp, "%s: invalid number of nodes found (%d)", 2263 __func__, num_nodes); 2264 return (1); 2265 } 2266 2267 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2268 2269 /* allocate enough space for node list */ 2270 listsz = num_nodes * sizeof (mde_cookie_t); 2271 listp = kmem_zalloc(listsz, KM_SLEEP); 2272 2273 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2274 md_find_name(mdp, "fwd"), listp); 2275 2276 if (nchan <= 0) { 2277 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2278 kmem_free(listp, listsz); 2279 return (1); 2280 } 2281 2282 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2283 2284 /* use property from first node found */ 2285 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2286 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2287 id_propname); 2288 kmem_free(listp, listsz); 2289 return (1); 2290 } 2291 2292 /* don't need list any more */ 2293 kmem_free(listp, listsz); 2294 2295 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2296 2297 /* read mac-address property */ 2298 if (md_get_prop_data(mdp, *node, remaddr_propname, 2299 &addrp, &addrsz)) { 2300 DWARN(vswp, "%s: prop(%s) not found", 2301 __func__, remaddr_propname); 2302 return (1); 2303 } 2304 2305 if (addrsz < ETHERADDRL) { 2306 DWARN(vswp, "%s: invalid address size", __func__); 2307 return (1); 2308 } 2309 2310 macaddr = *((uint64_t *)addrp); 2311 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2312 2313 for (i = ETHERADDRL - 1; i >= 0; i--) { 2314 ea.ether_addr_octet[i] = macaddr & 0xFF; 2315 macaddr >>= 8; 2316 } 2317 2318 /* now update all properties into the port */ 2319 portp->p_vswp = vswp; 2320 portp->p_instance = inst; 2321 portp->addr_set = B_FALSE; 2322 ether_copy(&ea, &portp->p_macaddr); 2323 if (nchan > VSW_PORT_MAX_LDCS) { 2324 D2(vswp, "%s: using first of %d ldc ids", 2325 __func__, nchan); 2326 nchan = VSW_PORT_MAX_LDCS; 2327 } 2328 portp->num_ldcs = nchan; 2329 portp->ldc_ids = 2330 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2331 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2332 2333 /* read vlan id properties of this port node */ 2334 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2335 &portp->vids, &portp->nvids, NULL); 2336 2337 /* Check if hybrid property is present */ 2338 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2339 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2340 portp->p_hio_enabled = B_TRUE; 2341 } else { 2342 portp->p_hio_enabled = B_FALSE; 2343 } 2344 /* 2345 * Port hio capability determined after version 2346 * negotiation, i.e., when we know the peer is HybridIO capable. 2347 */ 2348 portp->p_hio_capable = B_FALSE; 2349 return (0); 2350 } 2351 2352 /* 2353 * Add a new port to the system. 2354 * 2355 * Returns 0 on success, 1 on failure. 2356 */ 2357 int 2358 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2359 { 2360 vsw_port_t *portp; 2361 int rv; 2362 2363 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2364 2365 rv = vsw_port_read_props(portp, vswp, mdp, node); 2366 if (rv != 0) { 2367 kmem_free(portp, sizeof (*portp)); 2368 return (1); 2369 } 2370 2371 rv = vsw_port_attach(portp); 2372 if (rv != 0) { 2373 DERR(vswp, "%s: failed to attach port", __func__); 2374 return (1); 2375 } 2376 2377 return (0); 2378 } 2379 2380 static int 2381 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2382 md_t *prev_mdp, mde_cookie_t prev_mdex) 2383 { 2384 uint64_t cport_num; 2385 uint64_t pport_num; 2386 vsw_port_list_t *plistp; 2387 vsw_port_t *portp; 2388 boolean_t updated_vlans = B_FALSE; 2389 uint16_t pvid; 2390 vsw_vlanid_t *vids; 2391 uint16_t nvids; 2392 uint64_t val; 2393 boolean_t hio_enabled = B_FALSE; 2394 2395 /* 2396 * For now, we get port updates only if vlan ids changed. 2397 * We read the port num and do some sanity check. 2398 */ 2399 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2400 return (1); 2401 } 2402 2403 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2404 return (1); 2405 } 2406 if (cport_num != pport_num) 2407 return (1); 2408 2409 plistp = &(vswp->plist); 2410 2411 READ_ENTER(&plistp->lockrw); 2412 2413 portp = vsw_lookup_port(vswp, cport_num); 2414 if (portp == NULL) { 2415 RW_EXIT(&plistp->lockrw); 2416 return (1); 2417 } 2418 2419 /* Read the vlan ids */ 2420 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2421 &vids, &nvids, NULL); 2422 2423 /* Determine if there are any vlan id updates */ 2424 if ((pvid != portp->pvid) || /* pvid changed? */ 2425 (nvids != portp->nvids) || /* # of vids changed? */ 2426 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2427 !vsw_cmp_vids(vids, portp->vids, nvids))) { 2428 updated_vlans = B_TRUE; 2429 } 2430 2431 if (updated_vlans == B_TRUE) { 2432 2433 /* Remove existing vlan ids from the hash table. */ 2434 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2435 2436 /* Reconfigure vlans with network device */ 2437 vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids); 2438 2439 /* add these new vlan ids into hash table */ 2440 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2441 2442 /* reset the port if it is vlan unaware (ver < 1.3) */ 2443 vsw_vlan_unaware_port_reset(portp); 2444 } 2445 2446 /* Check if hybrid property is present */ 2447 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2448 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2449 hio_enabled = B_TRUE; 2450 } 2451 2452 if (portp->p_hio_enabled != hio_enabled) { 2453 vsw_hio_port_update(portp, hio_enabled); 2454 } 2455 2456 RW_EXIT(&plistp->lockrw); 2457 2458 return (0); 2459 } 2460 2461 /* 2462 * vsw_mac_rx -- A common function to send packets to the interface. 2463 * By default this function check if the interface is UP or not, the 2464 * rest of the behaviour depends on the flags as below: 2465 * 2466 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2467 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2468 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2469 */ 2470 void 2471 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2472 mblk_t *mp, vsw_macrx_flags_t flags) 2473 { 2474 mblk_t *mpt; 2475 2476 D1(vswp, "%s:enter\n", __func__); 2477 READ_ENTER(&vswp->if_lockrw); 2478 /* Check if the interface is up */ 2479 if (!(vswp->if_state & VSW_IF_UP)) { 2480 RW_EXIT(&vswp->if_lockrw); 2481 /* Free messages only if FREEMSG flag specified */ 2482 if (flags & VSW_MACRX_FREEMSG) { 2483 freemsgchain(mp); 2484 } 2485 D1(vswp, "%s:exit\n", __func__); 2486 return; 2487 } 2488 /* 2489 * If PROMISC flag is passed, then check if 2490 * the interface is in the PROMISC mode. 2491 * If not, drop the messages. 2492 */ 2493 if (flags & VSW_MACRX_PROMISC) { 2494 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2495 RW_EXIT(&vswp->if_lockrw); 2496 /* Free messages only if FREEMSG flag specified */ 2497 if (flags & VSW_MACRX_FREEMSG) { 2498 freemsgchain(mp); 2499 } 2500 D1(vswp, "%s:exit\n", __func__); 2501 return; 2502 } 2503 } 2504 RW_EXIT(&vswp->if_lockrw); 2505 /* 2506 * If COPYMSG flag is passed, then make a copy 2507 * of the message chain and send up the copy. 2508 */ 2509 if (flags & VSW_MACRX_COPYMSG) { 2510 mp = copymsgchain(mp); 2511 if (mp == NULL) { 2512 D1(vswp, "%s:exit\n", __func__); 2513 return; 2514 } 2515 } 2516 2517 D2(vswp, "%s: sending up stack", __func__); 2518 2519 mpt = NULL; 2520 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2521 if (mp != NULL) { 2522 mac_rx(vswp->if_mh, mrh, mp); 2523 } 2524 D1(vswp, "%s:exit\n", __func__); 2525 } 2526 2527 /* copy mac address of vsw into soft state structure */ 2528 static void 2529 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2530 { 2531 int i; 2532 2533 WRITE_ENTER(&vswp->if_lockrw); 2534 for (i = ETHERADDRL - 1; i >= 0; i--) { 2535 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2536 macaddr >>= 8; 2537 } 2538 RW_EXIT(&vswp->if_lockrw); 2539 } 2540 2541 /* Compare VLAN ids, array size expected to be same. */ 2542 static boolean_t 2543 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids) 2544 { 2545 int i, j; 2546 uint16_t vid; 2547 2548 for (i = 0; i < nvids; i++) { 2549 vid = vids1[i].vl_vid; 2550 for (j = 0; j < nvids; j++) { 2551 if (vid == vids2[i].vl_vid) 2552 break; 2553 } 2554 if (j == nvids) { 2555 return (B_FALSE); 2556 } 2557 } 2558 return (B_TRUE); 2559 } 2560