1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mac_provider.h> 62 #include <sys/mdeg.h> 63 #include <sys/ldc.h> 64 #include <sys/vsw_fdb.h> 65 #include <sys/vsw.h> 66 #include <sys/vio_mailbox.h> 67 #include <sys/vnet_mailbox.h> 68 #include <sys/vnet_common.h> 69 #include <sys/vio_util.h> 70 #include <sys/sdt.h> 71 #include <sys/atomic.h> 72 #include <sys/callb.h> 73 #include <sys/vlan.h> 74 75 /* 76 * Function prototypes. 77 */ 78 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 79 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 80 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 81 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *); 82 83 /* MDEG routines */ 84 static int vsw_mdeg_register(vsw_t *vswp); 85 static void vsw_mdeg_unregister(vsw_t *vswp); 86 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 87 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 88 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 89 static int vsw_read_mdprops(vsw_t *vswp); 90 static void vsw_vlan_read_ids(void *arg, int type, md_t *mdp, 91 mde_cookie_t node, uint16_t *pvidp, vsw_vlanid_t **vidspp, 92 uint16_t *nvidsp, uint16_t *default_idp); 93 static int vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 94 md_t *mdp, mde_cookie_t *node); 95 static void vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, 96 mde_cookie_t node); 97 static void vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 98 uint32_t *mtu); 99 static int vsw_mtu_update(vsw_t *vswp, uint32_t mtu); 100 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 101 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 102 static boolean_t vsw_cmp_vids(vsw_vlanid_t *vids1, 103 vsw_vlanid_t *vids2, int nvids); 104 105 /* Mac driver related routines */ 106 static int vsw_mac_register(vsw_t *); 107 static int vsw_mac_unregister(vsw_t *); 108 static int vsw_m_stat(void *, uint_t, uint64_t *); 109 static void vsw_m_stop(void *arg); 110 static int vsw_m_start(void *arg); 111 static int vsw_m_unicst(void *arg, const uint8_t *); 112 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 113 static int vsw_m_promisc(void *arg, boolean_t); 114 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 115 void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 116 mblk_t *mp, vsw_macrx_flags_t flags); 117 118 /* 119 * Functions imported from other files. 120 */ 121 extern void vsw_setup_switching_thread(void *arg); 122 extern int vsw_setup_switching_start(vsw_t *vswp); 123 extern void vsw_setup_switching_stop(vsw_t *vswp); 124 extern int vsw_setup_switching(vsw_t *); 125 extern void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 126 vsw_port_t *port, mac_resource_handle_t mrh); 127 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 128 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 129 extern void vsw_del_mcst_vsw(vsw_t *); 130 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 131 extern int vsw_detach_ports(vsw_t *vswp); 132 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 133 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 134 static int vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 135 md_t *prev_mdp, mde_cookie_t prev_mdex); 136 extern int vsw_port_attach(vsw_port_t *port); 137 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 138 extern int vsw_mac_open(vsw_t *vswp); 139 extern void vsw_mac_close(vsw_t *vswp); 140 extern void vsw_mac_cleanup_ports(vsw_t *vswp); 141 extern void vsw_unset_addrs(vsw_t *vswp); 142 extern void vsw_setup_layer2_post_process(vsw_t *vswp); 143 extern void vsw_create_vlans(void *arg, int type); 144 extern void vsw_destroy_vlans(void *arg, int type); 145 extern void vsw_vlan_add_ids(void *arg, int type); 146 extern void vsw_vlan_remove_ids(void *arg, int type); 147 extern void vsw_vlan_unaware_port_reset(vsw_port_t *portp); 148 extern uint32_t vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, 149 mblk_t **npt); 150 extern mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 151 extern void vsw_hio_cleanup(vsw_t *vswp); 152 extern void vsw_hio_start_ports(vsw_t *vswp); 153 extern void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 154 extern int vsw_mac_multicast_add(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 155 extern void vsw_mac_multicast_remove(vsw_t *, vsw_port_t *, mcst_addr_t *, int); 156 extern void vsw_mac_port_reconfig_vlans(vsw_port_t *portp, uint16_t new_pvid, 157 vsw_vlanid_t *new_vids, int new_nvids); 158 extern int vsw_mac_client_init(vsw_t *vswp, vsw_port_t *port, int type); 159 extern void vsw_mac_client_cleanup(vsw_t *vswp, vsw_port_t *port, int type); 160 extern void vsw_if_mac_reconfig(vsw_t *vswp, boolean_t update_vlans, 161 uint16_t new_pvid, vsw_vlanid_t *new_vids, int new_nvids); 162 extern void vsw_reset_ports(vsw_t *vswp); 163 extern void vsw_port_reset(vsw_port_t *portp); 164 void vsw_hio_port_update(vsw_port_t *portp, boolean_t hio_enabled); 165 166 /* 167 * Internal tunables. 168 */ 169 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 170 int vsw_wretries = 100; /* # of write attempts */ 171 int vsw_desc_delay = 0; /* delay in us */ 172 int vsw_read_attempts = 5; /* # of reads of descriptor */ 173 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 174 int vsw_mac_open_retries = 300; /* max # of mac_open() retries */ 175 /* 300*3 = 900sec(15min) of max tmout */ 176 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 177 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 178 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 179 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 180 181 uint32_t vsw_fdb_nchains = 8; /* # of chains in fdb hash table */ 182 uint32_t vsw_vlan_nchains = 4; /* # of chains in vlan id hash table */ 183 uint32_t vsw_ethermtu = 1500; /* mtu of the device */ 184 185 /* delay in usec to wait for all references on a fdb entry to be dropped */ 186 uint32_t vsw_fdbe_refcnt_delay = 10; 187 188 /* 189 * Default vlan id. This is only used internally when the "default-vlan-id" 190 * property is not present in the MD device node. Therefore, this should not be 191 * used as a tunable; if this value is changed, the corresponding variable 192 * should be updated to the same value in all vnets connected to this vsw. 193 */ 194 uint16_t vsw_default_vlan_id = 1; 195 196 /* 197 * Workaround for a version handshake bug in obp's vnet. 198 * If vsw initiates version negotiation starting from the highest version, 199 * obp sends a nack and terminates version handshake. To workaround 200 * this, we do not initiate version handshake when the channel comes up. 201 * Instead, we wait for the peer to send its version info msg and go through 202 * the version protocol exchange. If we successfully negotiate a version, 203 * before sending the ack, we send our version info msg to the peer 204 * using the <major,minor> version that we are about to ack. 205 */ 206 boolean_t vsw_obp_ver_proto_workaround = B_TRUE; 207 208 /* 209 * In the absence of "priority-ether-types" property in MD, the following 210 * internal tunable can be set to specify a single priority ethertype. 211 */ 212 uint64_t vsw_pri_eth_type = 0; 213 214 /* 215 * Number of transmit priority buffers that are preallocated per device. 216 * This number is chosen to be a small value to throttle transmission 217 * of priority packets. Note: Must be a power of 2 for vio_create_mblks(). 218 */ 219 uint32_t vsw_pri_tx_nmblks = 64; 220 221 /* 222 * Number of RARP packets sent to announce macaddr to the physical switch, 223 * after vsw's physical device is changed dynamically or after a guest (client 224 * vnet) is live migrated in. 225 */ 226 uint32_t vsw_publish_macaddr_count = 3; 227 228 boolean_t vsw_hio_enabled = B_TRUE; /* Enable/disable HybridIO */ 229 int vsw_hio_max_cleanup_retries = 10; /* Max retries for HybridIO cleanp */ 230 int vsw_hio_cleanup_delay = 10000; /* 10ms */ 231 232 /* Number of transmit descriptors - must be power of 2 */ 233 uint32_t vsw_ntxds = VSW_RING_NUM_EL; 234 235 /* 236 * Max number of mblks received in one receive operation. 237 */ 238 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 239 240 /* 241 * Internal tunables for receive buffer pools, that is, the size and number of 242 * mblks for each pool. At least 3 sizes must be specified if these are used. 243 * The sizes must be specified in increasing order. Non-zero value of the first 244 * size will be used as a hint to use these values instead of the algorithm 245 * that determines the sizes based on MTU. 246 */ 247 uint32_t vsw_mblk_size1 = 0; 248 uint32_t vsw_mblk_size2 = 0; 249 uint32_t vsw_mblk_size3 = 0; 250 uint32_t vsw_mblk_size4 = 0; 251 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 252 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 253 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 254 uint32_t vsw_num_mblks4 = VSW_NUM_MBLKS; /* number of mblks for pool4 */ 255 256 /* 257 * Set this to non-zero to enable additional internal receive buffer pools 258 * based on the MTU of the device for better performance at the cost of more 259 * memory consumption. This is turned off by default, to use allocb(9F) for 260 * receive buffer allocations of sizes > 2K. 261 */ 262 boolean_t vsw_jumbo_rxpools = B_FALSE; 263 264 /* 265 * vsw_max_tx_qcount is the maximum # of packets that can be queued 266 * before the tx worker thread begins processing the queue. Its value 267 * is chosen to be 4x the default length of tx descriptor ring. 268 */ 269 uint32_t vsw_max_tx_qcount = 4 * VSW_RING_NUM_EL; 270 271 /* 272 * MAC callbacks 273 */ 274 static mac_callbacks_t vsw_m_callbacks = { 275 0, 276 vsw_m_stat, 277 vsw_m_start, 278 vsw_m_stop, 279 vsw_m_promisc, 280 vsw_m_multicst, 281 vsw_m_unicst, 282 vsw_m_tx, 283 NULL, 284 NULL, 285 NULL 286 }; 287 288 static struct cb_ops vsw_cb_ops = { 289 nulldev, /* cb_open */ 290 nulldev, /* cb_close */ 291 nodev, /* cb_strategy */ 292 nodev, /* cb_print */ 293 nodev, /* cb_dump */ 294 nodev, /* cb_read */ 295 nodev, /* cb_write */ 296 nodev, /* cb_ioctl */ 297 nodev, /* cb_devmap */ 298 nodev, /* cb_mmap */ 299 nodev, /* cb_segmap */ 300 nochpoll, /* cb_chpoll */ 301 ddi_prop_op, /* cb_prop_op */ 302 NULL, /* cb_stream */ 303 D_MP, /* cb_flag */ 304 CB_REV, /* rev */ 305 nodev, /* int (*cb_aread)() */ 306 nodev /* int (*cb_awrite)() */ 307 }; 308 309 static struct dev_ops vsw_ops = { 310 DEVO_REV, /* devo_rev */ 311 0, /* devo_refcnt */ 312 NULL, /* devo_getinfo */ 313 nulldev, /* devo_identify */ 314 nulldev, /* devo_probe */ 315 vsw_attach, /* devo_attach */ 316 vsw_detach, /* devo_detach */ 317 nodev, /* devo_reset */ 318 &vsw_cb_ops, /* devo_cb_ops */ 319 (struct bus_ops *)NULL, /* devo_bus_ops */ 320 ddi_power /* devo_power */ 321 }; 322 323 extern struct mod_ops mod_driverops; 324 static struct modldrv vswmodldrv = { 325 &mod_driverops, 326 "sun4v Virtual Switch", 327 &vsw_ops, 328 }; 329 330 #define LDC_ENTER_LOCK(ldcp) \ 331 mutex_enter(&((ldcp)->ldc_cblock));\ 332 mutex_enter(&((ldcp)->ldc_rxlock));\ 333 mutex_enter(&((ldcp)->ldc_txlock)); 334 #define LDC_EXIT_LOCK(ldcp) \ 335 mutex_exit(&((ldcp)->ldc_txlock));\ 336 mutex_exit(&((ldcp)->ldc_rxlock));\ 337 mutex_exit(&((ldcp)->ldc_cblock)); 338 339 /* Driver soft state ptr */ 340 static void *vsw_state; 341 342 /* 343 * Linked list of "vsw_t" structures - one per instance. 344 */ 345 vsw_t *vsw_head = NULL; 346 krwlock_t vsw_rw; 347 348 /* 349 * Property names 350 */ 351 static char vdev_propname[] = "virtual-device"; 352 static char vsw_propname[] = "virtual-network-switch"; 353 static char physdev_propname[] = "vsw-phys-dev"; 354 static char smode_propname[] = "vsw-switch-mode"; 355 static char macaddr_propname[] = "local-mac-address"; 356 static char remaddr_propname[] = "remote-mac-address"; 357 static char ldcids_propname[] = "ldc-ids"; 358 static char chan_propname[] = "channel-endpoint"; 359 static char id_propname[] = "id"; 360 static char reg_propname[] = "reg"; 361 static char pri_types_propname[] = "priority-ether-types"; 362 static char vsw_pvid_propname[] = "port-vlan-id"; 363 static char vsw_vid_propname[] = "vlan-id"; 364 static char vsw_dvid_propname[] = "default-vlan-id"; 365 static char port_pvid_propname[] = "remote-port-vlan-id"; 366 static char port_vid_propname[] = "remote-vlan-id"; 367 static char hybrid_propname[] = "hybrid"; 368 static char vsw_mtu_propname[] = "mtu"; 369 370 /* 371 * Matching criteria passed to the MDEG to register interest 372 * in changes to 'virtual-device-port' nodes identified by their 373 * 'id' property. 374 */ 375 static md_prop_match_t vport_prop_match[] = { 376 { MDET_PROP_VAL, "id" }, 377 { MDET_LIST_END, NULL } 378 }; 379 380 static mdeg_node_match_t vport_match = { "virtual-device-port", 381 vport_prop_match }; 382 383 /* 384 * Matching criteria passed to the MDEG to register interest 385 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 386 * by their 'name' and 'cfg-handle' properties. 387 */ 388 static md_prop_match_t vdev_prop_match[] = { 389 { MDET_PROP_STR, "name" }, 390 { MDET_PROP_VAL, "cfg-handle" }, 391 { MDET_LIST_END, NULL } 392 }; 393 394 static mdeg_node_match_t vdev_match = { "virtual-device", 395 vdev_prop_match }; 396 397 398 /* 399 * Specification of an MD node passed to the MDEG to filter any 400 * 'vport' nodes that do not belong to the specified node. This 401 * template is copied for each vsw instance and filled in with 402 * the appropriate 'cfg-handle' value before being passed to the MDEG. 403 */ 404 static mdeg_prop_spec_t vsw_prop_template[] = { 405 { MDET_PROP_STR, "name", vsw_propname }, 406 { MDET_PROP_VAL, "cfg-handle", NULL }, 407 { MDET_LIST_END, NULL, NULL } 408 }; 409 410 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 411 412 #ifdef DEBUG 413 /* 414 * Print debug messages - set to 0x1f to enable all msgs 415 * or 0x0 to turn all off. 416 */ 417 int vswdbg = 0x0; 418 419 /* 420 * debug levels: 421 * 0x01: Function entry/exit tracing 422 * 0x02: Internal function messages 423 * 0x04: Verbose internal messages 424 * 0x08: Warning messages 425 * 0x10: Error messages 426 */ 427 428 void 429 vswdebug(vsw_t *vswp, const char *fmt, ...) 430 { 431 char buf[512]; 432 va_list ap; 433 434 va_start(ap, fmt); 435 (void) vsprintf(buf, fmt, ap); 436 va_end(ap); 437 438 if (vswp == NULL) 439 cmn_err(CE_CONT, "%s\n", buf); 440 else 441 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 442 } 443 444 #endif /* DEBUG */ 445 446 static struct modlinkage modlinkage = { 447 MODREV_1, 448 &vswmodldrv, 449 NULL 450 }; 451 452 int 453 _init(void) 454 { 455 int status; 456 457 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 458 459 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 460 if (status != 0) { 461 return (status); 462 } 463 464 mac_init_ops(&vsw_ops, DRV_NAME); 465 status = mod_install(&modlinkage); 466 if (status != 0) { 467 ddi_soft_state_fini(&vsw_state); 468 } 469 return (status); 470 } 471 472 int 473 _fini(void) 474 { 475 int status; 476 477 status = mod_remove(&modlinkage); 478 if (status != 0) 479 return (status); 480 mac_fini_ops(&vsw_ops); 481 ddi_soft_state_fini(&vsw_state); 482 483 rw_destroy(&vsw_rw); 484 485 return (status); 486 } 487 488 int 489 _info(struct modinfo *modinfop) 490 { 491 return (mod_info(&modlinkage, modinfop)); 492 } 493 494 static int 495 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 496 { 497 vsw_t *vswp; 498 int instance; 499 char hashname[MAXNAMELEN]; 500 char qname[TASKQ_NAMELEN]; 501 enum { PROG_init = 0x00, 502 PROG_locks = 0x01, 503 PROG_readmd = 0x02, 504 PROG_fdb = 0x04, 505 PROG_mfdb = 0x08, 506 PROG_taskq = 0x10, 507 PROG_swmode = 0x20, 508 PROG_macreg = 0x40, 509 PROG_mdreg = 0x80} 510 progress; 511 512 progress = PROG_init; 513 int rv; 514 515 switch (cmd) { 516 case DDI_ATTACH: 517 break; 518 case DDI_RESUME: 519 /* nothing to do for this non-device */ 520 return (DDI_SUCCESS); 521 case DDI_PM_RESUME: 522 default: 523 return (DDI_FAILURE); 524 } 525 526 instance = ddi_get_instance(dip); 527 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 528 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 529 return (DDI_FAILURE); 530 } 531 vswp = ddi_get_soft_state(vsw_state, instance); 532 533 if (vswp == NULL) { 534 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 535 goto vsw_attach_fail; 536 } 537 538 vswp->dip = dip; 539 vswp->instance = instance; 540 ddi_set_driver_private(dip, (caddr_t)vswp); 541 542 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 543 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 544 mutex_init(&vswp->sw_thr_lock, NULL, MUTEX_DRIVER, NULL); 545 cv_init(&vswp->sw_thr_cv, NULL, CV_DRIVER, NULL); 546 rw_init(&vswp->maccl_rwlock, NULL, RW_DRIVER, NULL); 547 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 548 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 549 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 550 551 progress |= PROG_locks; 552 553 rv = vsw_read_mdprops(vswp); 554 if (rv != 0) 555 goto vsw_attach_fail; 556 557 progress |= PROG_readmd; 558 559 /* setup the unicast forwarding database */ 560 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 561 vswp->instance); 562 D2(vswp, "creating unicast hash table (%s)...", hashname); 563 vswp->fdb_nchains = vsw_fdb_nchains; 564 vswp->fdb_hashp = mod_hash_create_ptrhash(hashname, vswp->fdb_nchains, 565 mod_hash_null_valdtor, sizeof (void *)); 566 vsw_create_vlans((void *)vswp, VSW_LOCALDEV); 567 progress |= PROG_fdb; 568 569 /* setup the multicast fowarding database */ 570 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 571 vswp->instance); 572 D2(vswp, "creating multicast hash table %s)...", hashname); 573 vswp->mfdb = mod_hash_create_ptrhash(hashname, vsw_fdb_nchains, 574 mod_hash_null_valdtor, sizeof (void *)); 575 576 progress |= PROG_mfdb; 577 578 /* 579 * Create the taskq which will process all the VIO 580 * control messages. 581 */ 582 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 583 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 584 TASKQ_DEFAULTPRI, 0)) == NULL) { 585 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 586 vswp->instance); 587 goto vsw_attach_fail; 588 } 589 590 progress |= PROG_taskq; 591 592 /* prevent auto-detaching */ 593 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 594 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 595 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 596 "instance %u", DDI_NO_AUTODETACH, instance); 597 } 598 599 /* 600 * The null switching function is set to avoid panic until 601 * switch mode is setup. 602 */ 603 vswp->vsw_switch_frame = vsw_switch_frame_nop; 604 605 /* 606 * Setup the required switching mode, based on the mdprops that we read 607 * earlier. We start a thread to do this, to avoid calling mac_open() 608 * directly from attach(). 609 */ 610 rv = vsw_setup_switching_start(vswp); 611 if (rv != 0) { 612 goto vsw_attach_fail; 613 } 614 615 progress |= PROG_swmode; 616 617 /* Register with mac layer as a provider */ 618 rv = vsw_mac_register(vswp); 619 if (rv != 0) 620 goto vsw_attach_fail; 621 622 progress |= PROG_macreg; 623 624 /* 625 * Now we have everything setup, register an interest in 626 * specific MD nodes. 627 * 628 * The callback is invoked in 2 cases, firstly if upon mdeg 629 * registration there are existing nodes which match our specified 630 * criteria, and secondly if the MD is changed (and again, there 631 * are nodes which we are interested in present within it. Note 632 * that our callback will be invoked even if our specified nodes 633 * have not actually changed). 634 * 635 */ 636 rv = vsw_mdeg_register(vswp); 637 if (rv != 0) 638 goto vsw_attach_fail; 639 640 progress |= PROG_mdreg; 641 642 WRITE_ENTER(&vsw_rw); 643 vswp->next = vsw_head; 644 vsw_head = vswp; 645 RW_EXIT(&vsw_rw); 646 647 ddi_report_dev(vswp->dip); 648 return (DDI_SUCCESS); 649 650 vsw_attach_fail: 651 DERR(NULL, "vsw_attach: failed"); 652 653 if (progress & PROG_mdreg) { 654 vsw_mdeg_unregister(vswp); 655 (void) vsw_detach_ports(vswp); 656 } 657 658 if (progress & PROG_macreg) 659 (void) vsw_mac_unregister(vswp); 660 661 if (progress & PROG_swmode) { 662 vsw_setup_switching_stop(vswp); 663 vsw_hio_cleanup(vswp); 664 mutex_enter(&vswp->mac_lock); 665 vsw_mac_close(vswp); 666 mutex_exit(&vswp->mac_lock); 667 } 668 669 if (progress & PROG_taskq) 670 ddi_taskq_destroy(vswp->taskq_p); 671 672 if (progress & PROG_mfdb) 673 mod_hash_destroy_hash(vswp->mfdb); 674 675 if (progress & PROG_fdb) { 676 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 677 mod_hash_destroy_hash(vswp->fdb_hashp); 678 } 679 680 if (progress & PROG_readmd) { 681 if (VSW_PRI_ETH_DEFINED(vswp)) { 682 kmem_free(vswp->pri_types, 683 sizeof (uint16_t) * vswp->pri_num_types); 684 } 685 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 686 } 687 688 if (progress & PROG_locks) { 689 rw_destroy(&vswp->plist.lockrw); 690 rw_destroy(&vswp->mfdbrw); 691 rw_destroy(&vswp->if_lockrw); 692 rw_destroy(&vswp->maccl_rwlock); 693 cv_destroy(&vswp->sw_thr_cv); 694 mutex_destroy(&vswp->sw_thr_lock); 695 mutex_destroy(&vswp->mca_lock); 696 mutex_destroy(&vswp->mac_lock); 697 } 698 699 ddi_soft_state_free(vsw_state, instance); 700 return (DDI_FAILURE); 701 } 702 703 static int 704 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 705 { 706 vio_mblk_pool_t *poolp, *npoolp; 707 vsw_t **vswpp, *vswp; 708 int instance; 709 710 instance = ddi_get_instance(dip); 711 vswp = ddi_get_soft_state(vsw_state, instance); 712 713 if (vswp == NULL) { 714 return (DDI_FAILURE); 715 } 716 717 switch (cmd) { 718 case DDI_DETACH: 719 break; 720 case DDI_SUSPEND: 721 case DDI_PM_SUSPEND: 722 default: 723 return (DDI_FAILURE); 724 } 725 726 D2(vswp, "detaching instance %d", instance); 727 728 /* Stop any pending thread to setup switching mode. */ 729 vsw_setup_switching_stop(vswp); 730 731 /* Cleanup the interface's mac client */ 732 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); 733 734 if (vswp->if_state & VSW_IF_REG) { 735 if (vsw_mac_unregister(vswp) != 0) { 736 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 737 "MAC layer", vswp->instance); 738 return (DDI_FAILURE); 739 } 740 } 741 742 vsw_mdeg_unregister(vswp); 743 744 /* cleanup HybridIO */ 745 vsw_hio_cleanup(vswp); 746 747 if (vsw_detach_ports(vswp) != 0) { 748 cmn_err(CE_WARN, "!vsw%d: Unable to unconfigure ports", 749 vswp->instance); 750 return (DDI_FAILURE); 751 } 752 753 rw_destroy(&vswp->if_lockrw); 754 755 vsw_mac_cleanup_ports(vswp); 756 757 /* 758 * Now that the ports have been deleted, stop and close 759 * the physical device. 760 */ 761 mutex_enter(&vswp->mac_lock); 762 vsw_mac_close(vswp); 763 mutex_exit(&vswp->mac_lock); 764 765 mutex_destroy(&vswp->mac_lock); 766 cv_destroy(&vswp->sw_thr_cv); 767 mutex_destroy(&vswp->sw_thr_lock); 768 rw_destroy(&vswp->maccl_rwlock); 769 770 /* 771 * Destroy any free pools that may still exist. 772 */ 773 poolp = vswp->rxh; 774 while (poolp != NULL) { 775 npoolp = vswp->rxh = poolp->nextp; 776 if (vio_destroy_mblks(poolp) != 0) { 777 vswp->rxh = poolp; 778 return (DDI_FAILURE); 779 } 780 poolp = npoolp; 781 } 782 783 /* 784 * Remove this instance from any entries it may be on in 785 * the hash table by using the list of addresses maintained 786 * in the vsw_t structure. 787 */ 788 vsw_del_mcst_vsw(vswp); 789 790 vswp->mcap = NULL; 791 mutex_destroy(&vswp->mca_lock); 792 793 /* 794 * By now any pending tasks have finished and the underlying 795 * ldc's have been destroyed, so its safe to delete the control 796 * message taskq. 797 */ 798 if (vswp->taskq_p != NULL) 799 ddi_taskq_destroy(vswp->taskq_p); 800 801 /* 802 * At this stage all the data pointers in the hash table 803 * should be NULL, as all the ports have been removed and will 804 * have deleted themselves from the port lists which the data 805 * pointers point to. Hence we can destroy the table using the 806 * default destructors. 807 */ 808 D2(vswp, "vsw_detach: destroying hash tables.."); 809 vsw_destroy_vlans(vswp, VSW_LOCALDEV); 810 mod_hash_destroy_hash(vswp->fdb_hashp); 811 vswp->fdb_hashp = NULL; 812 813 WRITE_ENTER(&vswp->mfdbrw); 814 mod_hash_destroy_hash(vswp->mfdb); 815 vswp->mfdb = NULL; 816 RW_EXIT(&vswp->mfdbrw); 817 rw_destroy(&vswp->mfdbrw); 818 819 /* free pri_types table */ 820 if (VSW_PRI_ETH_DEFINED(vswp)) { 821 kmem_free(vswp->pri_types, 822 sizeof (uint16_t) * vswp->pri_num_types); 823 (void) vio_destroy_mblks(vswp->pri_tx_vmp); 824 } 825 826 ddi_remove_minor_node(dip, NULL); 827 828 rw_destroy(&vswp->plist.lockrw); 829 WRITE_ENTER(&vsw_rw); 830 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 831 if (*vswpp == vswp) { 832 *vswpp = vswp->next; 833 break; 834 } 835 } 836 RW_EXIT(&vsw_rw); 837 ddi_soft_state_free(vsw_state, instance); 838 839 return (DDI_SUCCESS); 840 } 841 842 /* 843 * Get the value of the "vsw-phys-dev" property in the specified 844 * node. This property is the name of the physical device that 845 * the virtual switch will use to talk to the outside world. 846 * 847 * Note it is valid for this property to be NULL (but the property 848 * itself must exist). Callers of this routine should verify that 849 * the value returned is what they expected (i.e. either NULL or non NULL). 850 * 851 * On success returns value of the property in region pointed to by 852 * the 'name' argument, and with return value of 0. Otherwise returns 1. 853 */ 854 static int 855 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 856 { 857 int len = 0; 858 int instance; 859 char *physname = NULL; 860 char *dev; 861 const char *dev_name; 862 char myname[MAXNAMELEN]; 863 864 dev_name = ddi_driver_name(vswp->dip); 865 instance = ddi_get_instance(vswp->dip); 866 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 867 868 if (md_get_prop_data(mdp, node, physdev_propname, 869 (uint8_t **)(&physname), &len) != 0) { 870 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 871 "device(s) from MD", vswp->instance); 872 return (1); 873 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 874 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 875 vswp->instance, physname); 876 return (1); 877 } else if (strcmp(myname, physname) == 0) { 878 /* 879 * Prevent the vswitch from opening itself as the 880 * network device. 881 */ 882 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 883 vswp->instance, physname); 884 return (1); 885 } else { 886 (void) strncpy(name, physname, strlen(physname) + 1); 887 D2(vswp, "%s: using first device specified (%s)", 888 __func__, physname); 889 } 890 891 #ifdef DEBUG 892 /* 893 * As a temporary measure to aid testing we check to see if there 894 * is a vsw.conf file present. If there is we use the value of the 895 * vsw_physname property in the file as the name of the physical 896 * device, overriding the value from the MD. 897 * 898 * There may be multiple devices listed, but for the moment 899 * we just use the first one. 900 */ 901 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 902 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 903 if ((strlen(dev) + 1) > LIFNAMSIZ) { 904 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 905 vswp->instance, dev); 906 ddi_prop_free(dev); 907 return (1); 908 } else { 909 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 910 "config file", vswp->instance, dev); 911 912 (void) strncpy(name, dev, strlen(dev) + 1); 913 } 914 915 ddi_prop_free(dev); 916 } 917 #endif 918 919 return (0); 920 } 921 922 /* 923 * Read the 'vsw-switch-mode' property from the specified MD node. 924 * 925 * Returns 0 on success, otherwise returns 1. 926 */ 927 static int 928 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint8_t *mode) 929 { 930 int len = 0; 931 char *smode = NULL; 932 char *curr_mode = NULL; 933 934 D1(vswp, "%s: enter", __func__); 935 936 /* 937 * Get the switch-mode property. The modes are listed in 938 * decreasing order of preference, i.e. prefered mode is 939 * first item in list. 940 */ 941 len = 0; 942 if (md_get_prop_data(mdp, node, smode_propname, 943 (uint8_t **)(&smode), &len) != 0) { 944 /* 945 * Unable to get switch-mode property from MD, nothing 946 * more we can do. 947 */ 948 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 949 " from the MD", vswp->instance); 950 return (1); 951 } 952 953 curr_mode = smode; 954 /* 955 * Modes of operation: 956 * 'switched' - layer 2 switching, underlying HW in 957 * programmed mode. 958 * 'promiscuous' - layer 2 switching, underlying HW in 959 * promiscuous mode. 960 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 961 * in non-promiscuous mode. 962 */ 963 while (curr_mode < (smode + len)) { 964 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 965 if (strcmp(curr_mode, "switched") == 0) { 966 *mode = VSW_LAYER2; 967 } else if (strcmp(curr_mode, "promiscuous") == 0) { 968 *mode = VSW_LAYER2 | VSW_LAYER2_PROMISC; 969 } else if (strcmp(curr_mode, "routed") == 0) { 970 *mode = VSW_LAYER3; 971 } else { 972 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 973 "setting to default switched mode", 974 vswp->instance, curr_mode); 975 *mode = VSW_LAYER2; 976 } 977 curr_mode += strlen(curr_mode) + 1; 978 } 979 980 D2(vswp, "%s: %d mode", __func__, *mode); 981 982 D1(vswp, "%s: exit", __func__); 983 984 return (0); 985 } 986 987 /* 988 * Register with the MAC layer as a network device, so we 989 * can be plumbed if necessary. 990 */ 991 static int 992 vsw_mac_register(vsw_t *vswp) 993 { 994 mac_register_t *macp; 995 int rv; 996 997 D1(vswp, "%s: enter", __func__); 998 999 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 1000 return (EINVAL); 1001 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1002 macp->m_driver = vswp; 1003 macp->m_dip = vswp->dip; 1004 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 1005 macp->m_callbacks = &vsw_m_callbacks; 1006 macp->m_min_sdu = 0; 1007 macp->m_max_sdu = vswp->mtu; 1008 macp->m_margin = VLAN_TAGSZ; 1009 rv = mac_register(macp, &vswp->if_mh); 1010 mac_free(macp); 1011 if (rv != 0) { 1012 /* 1013 * Treat this as a non-fatal error as we may be 1014 * able to operate in some other mode. 1015 */ 1016 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 1017 "a provider with MAC layer", vswp->instance); 1018 return (rv); 1019 } 1020 1021 vswp->if_state |= VSW_IF_REG; 1022 1023 D1(vswp, "%s: exit", __func__); 1024 1025 return (rv); 1026 } 1027 1028 static int 1029 vsw_mac_unregister(vsw_t *vswp) 1030 { 1031 int rv = 0; 1032 1033 D1(vswp, "%s: enter", __func__); 1034 1035 WRITE_ENTER(&vswp->if_lockrw); 1036 1037 if (vswp->if_state & VSW_IF_REG) { 1038 rv = mac_unregister(vswp->if_mh); 1039 if (rv != 0) { 1040 DWARN(vswp, "%s: unable to unregister from MAC " 1041 "framework", __func__); 1042 1043 RW_EXIT(&vswp->if_lockrw); 1044 D1(vswp, "%s: fail exit", __func__); 1045 return (rv); 1046 } 1047 1048 /* mark i/f as down and unregistered */ 1049 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1050 } 1051 RW_EXIT(&vswp->if_lockrw); 1052 1053 D1(vswp, "%s: exit", __func__); 1054 1055 return (rv); 1056 } 1057 1058 static int 1059 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1060 { 1061 vsw_t *vswp = (vsw_t *)arg; 1062 1063 D1(vswp, "%s: enter", __func__); 1064 1065 mutex_enter(&vswp->mac_lock); 1066 if (vswp->mh == NULL) { 1067 mutex_exit(&vswp->mac_lock); 1068 return (EINVAL); 1069 } 1070 1071 /* return stats from underlying device */ 1072 *val = mac_stat_get(vswp->mh, stat); 1073 1074 mutex_exit(&vswp->mac_lock); 1075 1076 return (0); 1077 } 1078 1079 static void 1080 vsw_m_stop(void *arg) 1081 { 1082 vsw_t *vswp = (vsw_t *)arg; 1083 1084 D1(vswp, "%s: enter", __func__); 1085 1086 WRITE_ENTER(&vswp->if_lockrw); 1087 vswp->if_state &= ~VSW_IF_UP; 1088 RW_EXIT(&vswp->if_lockrw); 1089 1090 /* Cleanup and close the mac client */ 1091 vsw_mac_client_cleanup(vswp, NULL, VSW_LOCALDEV); 1092 1093 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1094 } 1095 1096 static int 1097 vsw_m_start(void *arg) 1098 { 1099 int rv; 1100 vsw_t *vswp = (vsw_t *)arg; 1101 1102 D1(vswp, "%s: enter", __func__); 1103 1104 WRITE_ENTER(&vswp->if_lockrw); 1105 1106 vswp->if_state |= VSW_IF_UP; 1107 1108 if (vswp->switching_setup_done == B_FALSE) { 1109 /* 1110 * If the switching mode has not been setup yet, just 1111 * return. The unicast address will be programmed 1112 * after the physical device is successfully setup by the 1113 * timeout handler. 1114 */ 1115 RW_EXIT(&vswp->if_lockrw); 1116 return (0); 1117 } 1118 1119 /* if in layer2 mode, program unicast address. */ 1120 if (vswp->mh != NULL) { 1121 /* Init a mac client and program addresses */ 1122 rv = vsw_mac_client_init(vswp, NULL, VSW_LOCALDEV); 1123 if (rv != 0) { 1124 cmn_err(CE_NOTE, 1125 "!vsw%d: failed to program interface " 1126 "unicast address\n", vswp->instance); 1127 } 1128 } 1129 1130 RW_EXIT(&vswp->if_lockrw); 1131 1132 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1133 return (0); 1134 } 1135 1136 /* 1137 * Change the local interface address. 1138 * 1139 * Note: we don't support this entry point. The local 1140 * mac address of the switch can only be changed via its 1141 * MD node properties. 1142 */ 1143 static int 1144 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1145 { 1146 _NOTE(ARGUNUSED(arg, macaddr)) 1147 1148 return (DDI_FAILURE); 1149 } 1150 1151 static int 1152 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1153 { 1154 vsw_t *vswp = (vsw_t *)arg; 1155 mcst_addr_t *mcst_p = NULL; 1156 uint64_t addr = 0x0; 1157 int i, ret = 0; 1158 1159 D1(vswp, "%s: enter", __func__); 1160 1161 /* 1162 * Convert address into form that can be used 1163 * as hash table key. 1164 */ 1165 for (i = 0; i < ETHERADDRL; i++) { 1166 addr = (addr << 8) | mca[i]; 1167 } 1168 1169 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1170 1171 if (add) { 1172 D2(vswp, "%s: adding multicast", __func__); 1173 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1174 /* 1175 * Update the list of multicast addresses 1176 * contained within the vsw_t structure to 1177 * include this new one. 1178 */ 1179 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1180 if (mcst_p == NULL) { 1181 DERR(vswp, "%s unable to alloc mem", __func__); 1182 (void) vsw_del_mcst(vswp, 1183 VSW_LOCALDEV, addr, NULL); 1184 return (1); 1185 } 1186 mcst_p->addr = addr; 1187 ether_copy(mca, &mcst_p->mca); 1188 1189 /* 1190 * Call into the underlying driver to program the 1191 * address into HW. 1192 */ 1193 ret = vsw_mac_multicast_add(vswp, NULL, mcst_p, 1194 VSW_LOCALDEV); 1195 if (ret != 0) { 1196 (void) vsw_del_mcst(vswp, 1197 VSW_LOCALDEV, addr, NULL); 1198 kmem_free(mcst_p, sizeof (*mcst_p)); 1199 return (ret); 1200 } 1201 1202 mutex_enter(&vswp->mca_lock); 1203 mcst_p->nextp = vswp->mcap; 1204 vswp->mcap = mcst_p; 1205 mutex_exit(&vswp->mca_lock); 1206 } else { 1207 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 1208 "address", vswp->instance); 1209 } 1210 return (ret); 1211 } 1212 1213 D2(vswp, "%s: removing multicast", __func__); 1214 /* 1215 * Remove the address from the hash table.. 1216 */ 1217 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1218 1219 /* 1220 * ..and then from the list maintained in the 1221 * vsw_t structure. 1222 */ 1223 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1224 ASSERT(mcst_p != NULL); 1225 1226 vsw_mac_multicast_remove(vswp, NULL, mcst_p, VSW_LOCALDEV); 1227 kmem_free(mcst_p, sizeof (*mcst_p)); 1228 } 1229 1230 D1(vswp, "%s: exit", __func__); 1231 1232 return (0); 1233 } 1234 1235 static int 1236 vsw_m_promisc(void *arg, boolean_t on) 1237 { 1238 vsw_t *vswp = (vsw_t *)arg; 1239 1240 D1(vswp, "%s: enter", __func__); 1241 1242 WRITE_ENTER(&vswp->if_lockrw); 1243 if (on) 1244 vswp->if_state |= VSW_IF_PROMISC; 1245 else 1246 vswp->if_state &= ~VSW_IF_PROMISC; 1247 RW_EXIT(&vswp->if_lockrw); 1248 1249 D1(vswp, "%s: exit", __func__); 1250 1251 return (0); 1252 } 1253 1254 static mblk_t * 1255 vsw_m_tx(void *arg, mblk_t *mp) 1256 { 1257 vsw_t *vswp = (vsw_t *)arg; 1258 1259 D1(vswp, "%s: enter", __func__); 1260 1261 mp = vsw_vlan_frame_pretag(vswp, VSW_LOCALDEV, mp); 1262 1263 if (mp == NULL) { 1264 return (NULL); 1265 } 1266 1267 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1268 1269 D1(vswp, "%s: exit", __func__); 1270 1271 return (NULL); 1272 } 1273 1274 /* 1275 * Register for machine description (MD) updates. 1276 * 1277 * Returns 0 on success, 1 on failure. 1278 */ 1279 static int 1280 vsw_mdeg_register(vsw_t *vswp) 1281 { 1282 mdeg_prop_spec_t *pspecp; 1283 mdeg_node_spec_t *inst_specp; 1284 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1285 size_t templatesz; 1286 int rv; 1287 1288 D1(vswp, "%s: enter", __func__); 1289 1290 /* 1291 * Allocate and initialize a per-instance copy 1292 * of the global property spec array that will 1293 * uniquely identify this vsw instance. 1294 */ 1295 templatesz = sizeof (vsw_prop_template); 1296 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1297 1298 bcopy(vsw_prop_template, pspecp, templatesz); 1299 1300 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1301 1302 /* initialize the complete prop spec structure */ 1303 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1304 inst_specp->namep = "virtual-device"; 1305 inst_specp->specp = pspecp; 1306 1307 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1308 vswp->regprop); 1309 /* 1310 * Register an interest in 'virtual-device' nodes with a 1311 * 'name' property of 'virtual-network-switch' 1312 */ 1313 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1314 (void *)vswp, &mdeg_hdl); 1315 if (rv != MDEG_SUCCESS) { 1316 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1317 __func__, rv); 1318 goto mdeg_reg_fail; 1319 } 1320 1321 /* 1322 * Register an interest in 'vsw-port' nodes. 1323 */ 1324 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1325 (void *)vswp, &mdeg_port_hdl); 1326 if (rv != MDEG_SUCCESS) { 1327 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1328 (void) mdeg_unregister(mdeg_hdl); 1329 goto mdeg_reg_fail; 1330 } 1331 1332 /* save off data that will be needed later */ 1333 vswp->inst_spec = inst_specp; 1334 vswp->mdeg_hdl = mdeg_hdl; 1335 vswp->mdeg_port_hdl = mdeg_port_hdl; 1336 1337 D1(vswp, "%s: exit", __func__); 1338 return (0); 1339 1340 mdeg_reg_fail: 1341 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1342 vswp->instance); 1343 kmem_free(pspecp, templatesz); 1344 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1345 1346 vswp->mdeg_hdl = NULL; 1347 vswp->mdeg_port_hdl = NULL; 1348 1349 return (1); 1350 } 1351 1352 static void 1353 vsw_mdeg_unregister(vsw_t *vswp) 1354 { 1355 D1(vswp, "vsw_mdeg_unregister: enter"); 1356 1357 if (vswp->mdeg_hdl != NULL) 1358 (void) mdeg_unregister(vswp->mdeg_hdl); 1359 1360 if (vswp->mdeg_port_hdl != NULL) 1361 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1362 1363 if (vswp->inst_spec != NULL) { 1364 if (vswp->inst_spec->specp != NULL) { 1365 (void) kmem_free(vswp->inst_spec->specp, 1366 sizeof (vsw_prop_template)); 1367 vswp->inst_spec->specp = NULL; 1368 } 1369 1370 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1371 vswp->inst_spec = NULL; 1372 } 1373 1374 D1(vswp, "vsw_mdeg_unregister: exit"); 1375 } 1376 1377 /* 1378 * Mdeg callback invoked for the vsw node itself. 1379 */ 1380 static int 1381 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1382 { 1383 vsw_t *vswp; 1384 md_t *mdp; 1385 mde_cookie_t node; 1386 uint64_t inst; 1387 char *node_name = NULL; 1388 1389 if (resp == NULL) 1390 return (MDEG_FAILURE); 1391 1392 vswp = (vsw_t *)cb_argp; 1393 1394 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1395 " : prev matched %d", __func__, resp->added.nelem, 1396 resp->removed.nelem, resp->match_curr.nelem, 1397 resp->match_prev.nelem); 1398 1399 /* 1400 * We get an initial callback for this node as 'added' 1401 * after registering with mdeg. Note that we would have 1402 * already gathered information about this vsw node by 1403 * walking MD earlier during attach (in vsw_read_mdprops()). 1404 * So, there is a window where the properties of this 1405 * node might have changed when we get this initial 'added' 1406 * callback. We handle this as if an update occured 1407 * and invoke the same function which handles updates to 1408 * the properties of this vsw-node if any. 1409 * 1410 * A non-zero 'match' value indicates that the MD has been 1411 * updated and that a virtual-network-switch node is 1412 * present which may or may not have been updated. It is 1413 * up to the clients to examine their own nodes and 1414 * determine if they have changed. 1415 */ 1416 if (resp->added.nelem != 0) { 1417 1418 if (resp->added.nelem != 1) { 1419 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1420 "invalid: %d\n", vswp->instance, resp->added.nelem); 1421 return (MDEG_FAILURE); 1422 } 1423 1424 mdp = resp->added.mdp; 1425 node = resp->added.mdep[0]; 1426 1427 } else if (resp->match_curr.nelem != 0) { 1428 1429 if (resp->match_curr.nelem != 1) { 1430 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1431 "invalid: %d\n", vswp->instance, 1432 resp->match_curr.nelem); 1433 return (MDEG_FAILURE); 1434 } 1435 1436 mdp = resp->match_curr.mdp; 1437 node = resp->match_curr.mdep[0]; 1438 1439 } else { 1440 return (MDEG_FAILURE); 1441 } 1442 1443 /* Validate name and instance */ 1444 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1445 DERR(vswp, "%s: unable to get node name\n", __func__); 1446 return (MDEG_FAILURE); 1447 } 1448 1449 /* is this a virtual-network-switch? */ 1450 if (strcmp(node_name, vsw_propname) != 0) { 1451 DERR(vswp, "%s: Invalid node name: %s\n", 1452 __func__, node_name); 1453 return (MDEG_FAILURE); 1454 } 1455 1456 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1457 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1458 __func__); 1459 return (MDEG_FAILURE); 1460 } 1461 1462 /* is this the right instance of vsw? */ 1463 if (inst != vswp->regprop) { 1464 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1465 __func__, inst); 1466 return (MDEG_FAILURE); 1467 } 1468 1469 vsw_update_md_prop(vswp, mdp, node); 1470 1471 return (MDEG_SUCCESS); 1472 } 1473 1474 /* 1475 * Mdeg callback invoked for changes to the vsw-port nodes 1476 * under the vsw node. 1477 */ 1478 static int 1479 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1480 { 1481 vsw_t *vswp; 1482 int idx; 1483 md_t *mdp; 1484 mde_cookie_t node; 1485 uint64_t inst; 1486 int rv; 1487 1488 if ((resp == NULL) || (cb_argp == NULL)) 1489 return (MDEG_FAILURE); 1490 1491 vswp = (vsw_t *)cb_argp; 1492 1493 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1494 " : prev matched %d", __func__, resp->added.nelem, 1495 resp->removed.nelem, resp->match_curr.nelem, 1496 resp->match_prev.nelem); 1497 1498 /* process added ports */ 1499 for (idx = 0; idx < resp->added.nelem; idx++) { 1500 mdp = resp->added.mdp; 1501 node = resp->added.mdep[idx]; 1502 1503 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1504 1505 if ((rv = vsw_port_add(vswp, mdp, &node)) != 0) { 1506 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1507 "(0x%lx), err=%d", vswp->instance, node, rv); 1508 } 1509 } 1510 1511 /* process removed ports */ 1512 for (idx = 0; idx < resp->removed.nelem; idx++) { 1513 mdp = resp->removed.mdp; 1514 node = resp->removed.mdep[idx]; 1515 1516 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1517 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1518 __func__, id_propname, idx); 1519 continue; 1520 } 1521 1522 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1523 1524 if (vsw_port_detach(vswp, inst) != 0) { 1525 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1526 vswp->instance, inst); 1527 } 1528 } 1529 1530 for (idx = 0; idx < resp->match_curr.nelem; idx++) { 1531 (void) vsw_port_update(vswp, resp->match_curr.mdp, 1532 resp->match_curr.mdep[idx], 1533 resp->match_prev.mdp, 1534 resp->match_prev.mdep[idx]); 1535 } 1536 1537 D1(vswp, "%s: exit", __func__); 1538 1539 return (MDEG_SUCCESS); 1540 } 1541 1542 /* 1543 * Scan the machine description for this instance of vsw 1544 * and read its properties. Called only from vsw_attach(). 1545 * Returns: 0 on success, 1 on failure. 1546 */ 1547 static int 1548 vsw_read_mdprops(vsw_t *vswp) 1549 { 1550 md_t *mdp = NULL; 1551 mde_cookie_t rootnode; 1552 mde_cookie_t *listp = NULL; 1553 uint64_t inst; 1554 uint64_t cfgh; 1555 char *name; 1556 int rv = 1; 1557 int num_nodes = 0; 1558 int num_devs = 0; 1559 int listsz = 0; 1560 int i; 1561 1562 /* 1563 * In each 'virtual-device' node in the MD there is a 1564 * 'cfg-handle' property which is the MD's concept of 1565 * an instance number (this may be completely different from 1566 * the device drivers instance #). OBP reads that value and 1567 * stores it in the 'reg' property of the appropriate node in 1568 * the device tree. We first read this reg property and use this 1569 * to compare against the 'cfg-handle' property of vsw nodes 1570 * in MD to get to this specific vsw instance and then read 1571 * other properties that we are interested in. 1572 * We also cache the value of 'reg' property and use it later 1573 * to register callbacks with mdeg (see vsw_mdeg_register()) 1574 */ 1575 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1576 DDI_PROP_DONTPASS, reg_propname, -1); 1577 if (inst == -1) { 1578 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1579 "OBP device tree", vswp->instance, reg_propname); 1580 return (rv); 1581 } 1582 1583 vswp->regprop = inst; 1584 1585 if ((mdp = md_get_handle()) == NULL) { 1586 DWARN(vswp, "%s: cannot init MD\n", __func__); 1587 return (rv); 1588 } 1589 1590 num_nodes = md_node_count(mdp); 1591 ASSERT(num_nodes > 0); 1592 1593 listsz = num_nodes * sizeof (mde_cookie_t); 1594 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1595 1596 rootnode = md_root_node(mdp); 1597 1598 /* search for all "virtual_device" nodes */ 1599 num_devs = md_scan_dag(mdp, rootnode, 1600 md_find_name(mdp, vdev_propname), 1601 md_find_name(mdp, "fwd"), listp); 1602 if (num_devs <= 0) { 1603 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1604 goto vsw_readmd_exit; 1605 } 1606 1607 /* 1608 * Now loop through the list of virtual-devices looking for 1609 * devices with name "virtual-network-switch" and for each 1610 * such device compare its instance with what we have from 1611 * the 'reg' property to find the right node in MD and then 1612 * read all its properties. 1613 */ 1614 for (i = 0; i < num_devs; i++) { 1615 1616 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1617 DWARN(vswp, "%s: name property not found\n", 1618 __func__); 1619 goto vsw_readmd_exit; 1620 } 1621 1622 /* is this a virtual-network-switch? */ 1623 if (strcmp(name, vsw_propname) != 0) 1624 continue; 1625 1626 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1627 DWARN(vswp, "%s: cfg-handle property not found\n", 1628 __func__); 1629 goto vsw_readmd_exit; 1630 } 1631 1632 /* is this the required instance of vsw? */ 1633 if (inst != cfgh) 1634 continue; 1635 1636 /* now read all properties of this vsw instance */ 1637 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1638 break; 1639 } 1640 1641 vsw_readmd_exit: 1642 1643 kmem_free(listp, listsz); 1644 (void) md_fini_handle(mdp); 1645 return (rv); 1646 } 1647 1648 /* 1649 * Read the initial start-of-day values from the specified MD node. 1650 */ 1651 static int 1652 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1653 { 1654 uint64_t macaddr = 0; 1655 1656 D1(vswp, "%s: enter", __func__); 1657 1658 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1659 return (1); 1660 } 1661 1662 /* mac address for vswitch device itself */ 1663 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1664 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1665 vswp->instance); 1666 return (1); 1667 } 1668 1669 vsw_save_lmacaddr(vswp, macaddr); 1670 1671 if (vsw_get_md_smodes(vswp, mdp, node, &vswp->smode)) { 1672 DWARN(vswp, "%s: Unable to read %s property from MD, " 1673 "defaulting to 'switched' mode", 1674 __func__, smode_propname); 1675 1676 vswp->smode = VSW_LAYER2; 1677 } 1678 1679 /* read mtu */ 1680 vsw_mtu_read(vswp, mdp, node, &vswp->mtu); 1681 if (vswp->mtu < ETHERMTU || vswp->mtu > VNET_MAX_MTU) { 1682 vswp->mtu = ETHERMTU; 1683 } 1684 vswp->max_frame_size = vswp->mtu + sizeof (struct ether_header) + 1685 VLAN_TAGSZ; 1686 1687 /* read vlan id properties of this vsw instance */ 1688 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &vswp->pvid, 1689 &vswp->vids, &vswp->nvids, &vswp->default_vlan_id); 1690 1691 /* read priority-ether-types */ 1692 vsw_read_pri_eth_types(vswp, mdp, node); 1693 1694 D1(vswp, "%s: exit", __func__); 1695 return (0); 1696 } 1697 1698 /* 1699 * Read vlan id properties of the given MD node. 1700 * Arguments: 1701 * arg: device argument(vsw device or a port) 1702 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port) 1703 * mdp: machine description 1704 * node: md node cookie 1705 * 1706 * Returns: 1707 * pvidp: port-vlan-id of the node 1708 * vidspp: list of vlan-ids of the node 1709 * nvidsp: # of vlan-ids in the list 1710 * default_idp: default-vlan-id of the node(if node is vsw device) 1711 */ 1712 static void 1713 vsw_vlan_read_ids(void *arg, int type, md_t *mdp, mde_cookie_t node, 1714 uint16_t *pvidp, vsw_vlanid_t **vidspp, uint16_t *nvidsp, 1715 uint16_t *default_idp) 1716 { 1717 vsw_t *vswp; 1718 vsw_port_t *portp; 1719 char *pvid_propname; 1720 char *vid_propname; 1721 uint_t nvids = 0; 1722 uint32_t vids_size; 1723 int rv; 1724 int i; 1725 uint64_t *data; 1726 uint64_t val; 1727 int size; 1728 int inst; 1729 1730 if (type == VSW_LOCALDEV) { 1731 1732 vswp = (vsw_t *)arg; 1733 pvid_propname = vsw_pvid_propname; 1734 vid_propname = vsw_vid_propname; 1735 inst = vswp->instance; 1736 1737 } else if (type == VSW_VNETPORT) { 1738 1739 portp = (vsw_port_t *)arg; 1740 vswp = portp->p_vswp; 1741 pvid_propname = port_pvid_propname; 1742 vid_propname = port_vid_propname; 1743 inst = portp->p_instance; 1744 1745 } else { 1746 return; 1747 } 1748 1749 if (type == VSW_LOCALDEV && default_idp != NULL) { 1750 rv = md_get_prop_val(mdp, node, vsw_dvid_propname, &val); 1751 if (rv != 0) { 1752 DWARN(vswp, "%s: prop(%s) not found", __func__, 1753 vsw_dvid_propname); 1754 1755 *default_idp = vsw_default_vlan_id; 1756 } else { 1757 *default_idp = val & 0xFFF; 1758 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1759 vsw_dvid_propname, inst, *default_idp); 1760 } 1761 } 1762 1763 rv = md_get_prop_val(mdp, node, pvid_propname, &val); 1764 if (rv != 0) { 1765 DWARN(vswp, "%s: prop(%s) not found", __func__, pvid_propname); 1766 *pvidp = vsw_default_vlan_id; 1767 } else { 1768 1769 *pvidp = val & 0xFFF; 1770 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1771 pvid_propname, inst, *pvidp); 1772 } 1773 1774 rv = md_get_prop_data(mdp, node, vid_propname, (uint8_t **)&data, 1775 &size); 1776 if (rv != 0) { 1777 D2(vswp, "%s: prop(%s) not found", __func__, vid_propname); 1778 size = 0; 1779 } else { 1780 size /= sizeof (uint64_t); 1781 } 1782 nvids = size; 1783 1784 if (nvids != 0) { 1785 D2(vswp, "%s: %s(%d): ", __func__, vid_propname, inst); 1786 vids_size = sizeof (vsw_vlanid_t) * nvids; 1787 *vidspp = kmem_zalloc(vids_size, KM_SLEEP); 1788 for (i = 0; i < nvids; i++) { 1789 (*vidspp)[i].vl_vid = data[i] & 0xFFFF; 1790 (*vidspp)[i].vl_set = B_FALSE; 1791 D2(vswp, " %d ", (*vidspp)[i].vl_vid); 1792 } 1793 D2(vswp, "\n"); 1794 } 1795 1796 *nvidsp = nvids; 1797 } 1798 1799 /* 1800 * This function reads "priority-ether-types" property from md. This property 1801 * is used to enable support for priority frames. Applications which need 1802 * guaranteed and timely delivery of certain high priority frames to/from 1803 * a vnet or vsw within ldoms, should configure this property by providing 1804 * the ether type(s) for which the priority facility is needed. 1805 * Normal data frames are delivered over a ldc channel using the descriptor 1806 * ring mechanism which is constrained by factors such as descriptor ring size, 1807 * the rate at which the ring is processed at the peer ldc end point, etc. 1808 * The priority mechanism provides an Out-Of-Band path to send/receive frames 1809 * as raw pkt data (VIO_PKT_DATA) messages over the channel, avoiding the 1810 * descriptor ring path and enables a more reliable and timely delivery of 1811 * frames to the peer. 1812 */ 1813 static void 1814 vsw_read_pri_eth_types(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1815 { 1816 int rv; 1817 uint16_t *types; 1818 uint64_t *data; 1819 int size; 1820 int i; 1821 size_t mblk_sz; 1822 1823 rv = md_get_prop_data(mdp, node, pri_types_propname, 1824 (uint8_t **)&data, &size); 1825 if (rv != 0) { 1826 /* 1827 * Property may not exist if we are running pre-ldoms1.1 f/w. 1828 * Check if 'vsw_pri_eth_type' has been set in that case. 1829 */ 1830 if (vsw_pri_eth_type != 0) { 1831 size = sizeof (vsw_pri_eth_type); 1832 data = &vsw_pri_eth_type; 1833 } else { 1834 D3(vswp, "%s: prop(%s) not found", __func__, 1835 pri_types_propname); 1836 size = 0; 1837 } 1838 } 1839 1840 if (size == 0) { 1841 vswp->pri_num_types = 0; 1842 return; 1843 } 1844 1845 /* 1846 * we have some priority-ether-types defined; 1847 * allocate a table of these types and also 1848 * allocate a pool of mblks to transmit these 1849 * priority packets. 1850 */ 1851 size /= sizeof (uint64_t); 1852 vswp->pri_num_types = size; 1853 vswp->pri_types = kmem_zalloc(size * sizeof (uint16_t), KM_SLEEP); 1854 for (i = 0, types = vswp->pri_types; i < size; i++) { 1855 types[i] = data[i] & 0xFFFF; 1856 } 1857 mblk_sz = (VIO_PKT_DATA_HDRSIZE + ETHERMAX + 7) & ~7; 1858 (void) vio_create_mblks(vsw_pri_tx_nmblks, mblk_sz, &vswp->pri_tx_vmp); 1859 } 1860 1861 static void 1862 vsw_mtu_read(vsw_t *vswp, md_t *mdp, mde_cookie_t node, uint32_t *mtu) 1863 { 1864 int rv; 1865 int inst; 1866 uint64_t val; 1867 char *mtu_propname; 1868 1869 mtu_propname = vsw_mtu_propname; 1870 inst = vswp->instance; 1871 1872 rv = md_get_prop_val(mdp, node, mtu_propname, &val); 1873 if (rv != 0) { 1874 D3(vswp, "%s: prop(%s) not found", __func__, mtu_propname); 1875 *mtu = vsw_ethermtu; 1876 } else { 1877 1878 *mtu = val & 0xFFFF; 1879 D2(vswp, "%s: %s(%d): (%d)\n", __func__, 1880 mtu_propname, inst, *mtu); 1881 } 1882 } 1883 1884 /* 1885 * Update the mtu of the vsw device. We first check if the device has been 1886 * plumbed and if so fail the mtu update. Otherwise, we continue to update the 1887 * new mtu and reset all ports to initiate handshake re-negotiation with peers 1888 * using the new mtu. 1889 */ 1890 static int 1891 vsw_mtu_update(vsw_t *vswp, uint32_t mtu) 1892 { 1893 int rv; 1894 1895 WRITE_ENTER(&vswp->if_lockrw); 1896 1897 if (vswp->if_state & VSW_IF_UP) { 1898 1899 RW_EXIT(&vswp->if_lockrw); 1900 1901 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 1902 " as the device is plumbed\n", vswp->instance); 1903 return (EBUSY); 1904 1905 } else { 1906 1907 D2(vswp, "%s: curr_mtu(%d) new_mtu(%d)\n", 1908 __func__, vswp->mtu, mtu); 1909 1910 vswp->mtu = mtu; 1911 vswp->max_frame_size = vswp->mtu + 1912 sizeof (struct ether_header) + VLAN_TAGSZ; 1913 1914 rv = mac_maxsdu_update(vswp->if_mh, mtu); 1915 if (rv != 0) { 1916 cmn_err(CE_NOTE, 1917 "!vsw%d: Unable to update mtu with mac" 1918 " layer\n", vswp->instance); 1919 } 1920 1921 RW_EXIT(&vswp->if_lockrw); 1922 1923 /* Reset ports to renegotiate with the new mtu */ 1924 vsw_reset_ports(vswp); 1925 1926 } 1927 1928 return (0); 1929 } 1930 1931 /* 1932 * Check to see if the relevant properties in the specified node have 1933 * changed, and if so take the appropriate action. 1934 * 1935 * If any of the properties are missing or invalid we don't take 1936 * any action, as this function should only be invoked when modifications 1937 * have been made to what we assume is a working configuration, which 1938 * we leave active. 1939 * 1940 * Note it is legal for this routine to be invoked even if none of the 1941 * properties in the port node within the MD have actually changed. 1942 */ 1943 static void 1944 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1945 { 1946 char physname[LIFNAMSIZ]; 1947 char drv[LIFNAMSIZ]; 1948 uint_t ddi_instance; 1949 uint8_t new_smode; 1950 int i; 1951 uint64_t macaddr = 0; 1952 enum {MD_init = 0x1, 1953 MD_physname = 0x2, 1954 MD_macaddr = 0x4, 1955 MD_smode = 0x8, 1956 MD_vlans = 0x10, 1957 MD_mtu = 0x20} updated; 1958 int rv; 1959 uint16_t pvid; 1960 vsw_vlanid_t *vids; 1961 uint16_t nvids; 1962 uint32_t mtu; 1963 1964 updated = MD_init; 1965 1966 D1(vswp, "%s: enter", __func__); 1967 1968 /* 1969 * Check if name of physical device in MD has changed. 1970 */ 1971 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1972 /* 1973 * Do basic sanity check on new device name/instance, 1974 * if its non NULL. It is valid for the device name to 1975 * have changed from a non NULL to a NULL value, i.e. 1976 * the vsw is being changed to 'routed' mode. 1977 */ 1978 if ((strlen(physname) != 0) && 1979 (ddi_parse(physname, drv, 1980 &ddi_instance) != DDI_SUCCESS)) { 1981 cmn_err(CE_WARN, "!vsw%d: physical device %s is not" 1982 " a valid device name/instance", 1983 vswp->instance, physname); 1984 goto fail_reconf; 1985 } 1986 1987 if (strcmp(physname, vswp->physname)) { 1988 D2(vswp, "%s: device name changed from %s to %s", 1989 __func__, vswp->physname, physname); 1990 1991 updated |= MD_physname; 1992 } else { 1993 D2(vswp, "%s: device name unchanged at %s", 1994 __func__, vswp->physname); 1995 } 1996 } else { 1997 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 1998 "device from updated MD.", vswp->instance); 1999 goto fail_reconf; 2000 } 2001 2002 /* 2003 * Check if MAC address has changed. 2004 */ 2005 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 2006 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 2007 vswp->instance); 2008 goto fail_reconf; 2009 } else { 2010 uint64_t maddr = macaddr; 2011 READ_ENTER(&vswp->if_lockrw); 2012 for (i = ETHERADDRL - 1; i >= 0; i--) { 2013 if (vswp->if_addr.ether_addr_octet[i] 2014 != (macaddr & 0xFF)) { 2015 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 2016 __func__, i, 2017 vswp->if_addr.ether_addr_octet[i], 2018 (macaddr & 0xFF)); 2019 updated |= MD_macaddr; 2020 macaddr = maddr; 2021 break; 2022 } 2023 macaddr >>= 8; 2024 } 2025 RW_EXIT(&vswp->if_lockrw); 2026 if (updated & MD_macaddr) { 2027 vsw_save_lmacaddr(vswp, macaddr); 2028 } 2029 } 2030 2031 /* 2032 * Check if switching modes have changed. 2033 */ 2034 if (vsw_get_md_smodes(vswp, mdp, node, &new_smode)) { 2035 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 2036 vswp->instance, smode_propname); 2037 goto fail_reconf; 2038 } else { 2039 if (new_smode != vswp->smode) { 2040 D2(vswp, "%s: switching mode changed from %d to %d", 2041 __func__, vswp->smode, new_smode); 2042 2043 updated |= MD_smode; 2044 } 2045 } 2046 2047 /* Read the vlan ids */ 2048 vsw_vlan_read_ids(vswp, VSW_LOCALDEV, mdp, node, &pvid, &vids, 2049 &nvids, NULL); 2050 2051 /* Determine if there are any vlan id updates */ 2052 if ((pvid != vswp->pvid) || /* pvid changed? */ 2053 (nvids != vswp->nvids) || /* # of vids changed? */ 2054 ((nvids != 0) && (vswp->nvids != 0) && /* vids changed? */ 2055 !vsw_cmp_vids(vids, vswp->vids, nvids))) { 2056 updated |= MD_vlans; 2057 } 2058 2059 /* Read mtu */ 2060 vsw_mtu_read(vswp, mdp, node, &mtu); 2061 if (mtu != vswp->mtu) { 2062 if (mtu >= ETHERMTU && mtu <= VNET_MAX_MTU) { 2063 updated |= MD_mtu; 2064 } else { 2065 cmn_err(CE_NOTE, "!vsw%d: Unable to process mtu update" 2066 " as the specified value:%d is invalid\n", 2067 vswp->instance, mtu); 2068 } 2069 } 2070 2071 /* 2072 * Now make any changes which are needed... 2073 */ 2074 2075 if (updated & (MD_physname | MD_smode | MD_mtu)) { 2076 2077 /* 2078 * Stop any pending thread to setup switching mode. 2079 */ 2080 vsw_setup_switching_stop(vswp); 2081 2082 /* Cleanup HybridIO */ 2083 vsw_hio_cleanup(vswp); 2084 2085 /* 2086 * Remove unicst, mcst addrs of vsw interface 2087 * and ports from the physdev. This also closes 2088 * the corresponding mac clients. 2089 */ 2090 vsw_unset_addrs(vswp); 2091 2092 /* 2093 * Stop, detach and close the old device.. 2094 */ 2095 mutex_enter(&vswp->mac_lock); 2096 vsw_mac_close(vswp); 2097 mutex_exit(&vswp->mac_lock); 2098 2099 /* 2100 * Update phys name. 2101 */ 2102 if (updated & MD_physname) { 2103 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 2104 vswp->instance, vswp->physname, physname); 2105 (void) strncpy(vswp->physname, 2106 physname, strlen(physname) + 1); 2107 } 2108 2109 /* 2110 * Update array with the new switch mode values. 2111 */ 2112 if (updated & MD_smode) { 2113 vswp->smode = new_smode; 2114 } 2115 2116 /* Update mtu */ 2117 if (updated & MD_mtu) { 2118 rv = vsw_mtu_update(vswp, mtu); 2119 if (rv != 0) { 2120 goto fail_update; 2121 } 2122 } 2123 2124 /* 2125 * ..and attach, start the new device. 2126 */ 2127 rv = vsw_setup_switching(vswp); 2128 if (rv == EAGAIN) { 2129 /* 2130 * Unable to setup switching mode. 2131 * As the error is EAGAIN, schedule a thread to retry 2132 * and return. Programming addresses of ports and 2133 * vsw interface will be done by the thread when the 2134 * switching setup completes successfully. 2135 */ 2136 if (vsw_setup_switching_start(vswp) != 0) { 2137 goto fail_update; 2138 } 2139 return; 2140 2141 } else if (rv) { 2142 goto fail_update; 2143 } 2144 2145 vsw_setup_layer2_post_process(vswp); 2146 } else if (updated & MD_macaddr) { 2147 /* 2148 * We enter here if only MD_macaddr is exclusively updated. 2149 * If MD_physname and/or MD_smode are also updated, then 2150 * as part of that, we would have implicitly processed 2151 * MD_macaddr update (above). 2152 */ 2153 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 2154 vswp->instance, macaddr); 2155 2156 READ_ENTER(&vswp->if_lockrw); 2157 if (vswp->if_state & VSW_IF_UP) { 2158 /* reconfigure with new address */ 2159 vsw_if_mac_reconfig(vswp, B_FALSE, 0, NULL, 0); 2160 2161 /* 2162 * Notify the MAC layer of the changed address. 2163 */ 2164 mac_unicst_update(vswp->if_mh, 2165 (uint8_t *)&vswp->if_addr); 2166 2167 } 2168 RW_EXIT(&vswp->if_lockrw); 2169 2170 } 2171 2172 if (updated & MD_vlans) { 2173 /* Remove existing vlan ids from the hash table. */ 2174 vsw_vlan_remove_ids(vswp, VSW_LOCALDEV); 2175 2176 if (vswp->if_state & VSW_IF_UP) { 2177 vsw_if_mac_reconfig(vswp, B_TRUE, pvid, vids, nvids); 2178 } else { 2179 if (vswp->nvids != 0) { 2180 kmem_free(vswp->vids, 2181 sizeof (vsw_vlanid_t) * vswp->nvids); 2182 } 2183 vswp->vids = vids; 2184 vswp->nvids = nvids; 2185 vswp->pvid = pvid; 2186 } 2187 2188 /* add these new vlan ids into hash table */ 2189 vsw_vlan_add_ids(vswp, VSW_LOCALDEV); 2190 } else { 2191 if (nvids != 0) { 2192 kmem_free(vids, sizeof (vsw_vlanid_t) * nvids); 2193 } 2194 } 2195 2196 return; 2197 2198 fail_reconf: 2199 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 2200 return; 2201 2202 fail_update: 2203 cmn_err(CE_WARN, "!vsw%d: re-configuration failed", 2204 vswp->instance); 2205 } 2206 2207 /* 2208 * Read the port's md properties. 2209 */ 2210 static int 2211 vsw_port_read_props(vsw_port_t *portp, vsw_t *vswp, 2212 md_t *mdp, mde_cookie_t *node) 2213 { 2214 uint64_t ldc_id; 2215 uint8_t *addrp; 2216 int i, addrsz; 2217 int num_nodes = 0, nchan = 0; 2218 int listsz = 0; 2219 mde_cookie_t *listp = NULL; 2220 struct ether_addr ea; 2221 uint64_t macaddr; 2222 uint64_t inst = 0; 2223 uint64_t val; 2224 2225 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 2226 DWARN(vswp, "%s: prop(%s) not found", __func__, 2227 id_propname); 2228 return (1); 2229 } 2230 2231 /* 2232 * Find the channel endpoint node(s) (which should be under this 2233 * port node) which contain the channel id(s). 2234 */ 2235 if ((num_nodes = md_node_count(mdp)) <= 0) { 2236 DERR(vswp, "%s: invalid number of nodes found (%d)", 2237 __func__, num_nodes); 2238 return (1); 2239 } 2240 2241 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 2242 2243 /* allocate enough space for node list */ 2244 listsz = num_nodes * sizeof (mde_cookie_t); 2245 listp = kmem_zalloc(listsz, KM_SLEEP); 2246 2247 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 2248 md_find_name(mdp, "fwd"), listp); 2249 2250 if (nchan <= 0) { 2251 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 2252 kmem_free(listp, listsz); 2253 return (1); 2254 } 2255 2256 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 2257 2258 /* use property from first node found */ 2259 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 2260 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 2261 id_propname); 2262 kmem_free(listp, listsz); 2263 return (1); 2264 } 2265 2266 /* don't need list any more */ 2267 kmem_free(listp, listsz); 2268 2269 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 2270 2271 /* read mac-address property */ 2272 if (md_get_prop_data(mdp, *node, remaddr_propname, 2273 &addrp, &addrsz)) { 2274 DWARN(vswp, "%s: prop(%s) not found", 2275 __func__, remaddr_propname); 2276 return (1); 2277 } 2278 2279 if (addrsz < ETHERADDRL) { 2280 DWARN(vswp, "%s: invalid address size", __func__); 2281 return (1); 2282 } 2283 2284 macaddr = *((uint64_t *)addrp); 2285 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 2286 2287 for (i = ETHERADDRL - 1; i >= 0; i--) { 2288 ea.ether_addr_octet[i] = macaddr & 0xFF; 2289 macaddr >>= 8; 2290 } 2291 2292 /* now update all properties into the port */ 2293 portp->p_vswp = vswp; 2294 portp->p_instance = inst; 2295 portp->addr_set = B_FALSE; 2296 ether_copy(&ea, &portp->p_macaddr); 2297 if (nchan > VSW_PORT_MAX_LDCS) { 2298 D2(vswp, "%s: using first of %d ldc ids", 2299 __func__, nchan); 2300 nchan = VSW_PORT_MAX_LDCS; 2301 } 2302 portp->num_ldcs = nchan; 2303 portp->ldc_ids = 2304 kmem_zalloc(sizeof (uint64_t) * nchan, KM_SLEEP); 2305 bcopy(&ldc_id, (portp->ldc_ids), sizeof (uint64_t) * nchan); 2306 2307 /* read vlan id properties of this port node */ 2308 vsw_vlan_read_ids(portp, VSW_VNETPORT, mdp, *node, &portp->pvid, 2309 &portp->vids, &portp->nvids, NULL); 2310 2311 /* Check if hybrid property is present */ 2312 if (md_get_prop_val(mdp, *node, hybrid_propname, &val) == 0) { 2313 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2314 portp->p_hio_enabled = B_TRUE; 2315 } else { 2316 portp->p_hio_enabled = B_FALSE; 2317 } 2318 /* 2319 * Port hio capability determined after version 2320 * negotiation, i.e., when we know the peer is HybridIO capable. 2321 */ 2322 portp->p_hio_capable = B_FALSE; 2323 return (0); 2324 } 2325 2326 /* 2327 * Add a new port to the system. 2328 * 2329 * Returns 0 on success, 1 on failure. 2330 */ 2331 int 2332 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 2333 { 2334 vsw_port_t *portp; 2335 int rv; 2336 2337 portp = kmem_zalloc(sizeof (vsw_port_t), KM_SLEEP); 2338 2339 rv = vsw_port_read_props(portp, vswp, mdp, node); 2340 if (rv != 0) { 2341 kmem_free(portp, sizeof (*portp)); 2342 return (1); 2343 } 2344 2345 rv = vsw_port_attach(portp); 2346 if (rv != 0) { 2347 DERR(vswp, "%s: failed to attach port", __func__); 2348 return (1); 2349 } 2350 2351 return (0); 2352 } 2353 2354 static int 2355 vsw_port_update(vsw_t *vswp, md_t *curr_mdp, mde_cookie_t curr_mdex, 2356 md_t *prev_mdp, mde_cookie_t prev_mdex) 2357 { 2358 uint64_t cport_num; 2359 uint64_t pport_num; 2360 vsw_port_list_t *plistp; 2361 vsw_port_t *portp; 2362 boolean_t updated_vlans = B_FALSE; 2363 uint16_t pvid; 2364 vsw_vlanid_t *vids; 2365 uint16_t nvids; 2366 uint64_t val; 2367 boolean_t hio_enabled = B_FALSE; 2368 2369 /* 2370 * For now, we get port updates only if vlan ids changed. 2371 * We read the port num and do some sanity check. 2372 */ 2373 if (md_get_prop_val(curr_mdp, curr_mdex, id_propname, &cport_num)) { 2374 return (1); 2375 } 2376 2377 if (md_get_prop_val(prev_mdp, prev_mdex, id_propname, &pport_num)) { 2378 return (1); 2379 } 2380 if (cport_num != pport_num) 2381 return (1); 2382 2383 plistp = &(vswp->plist); 2384 2385 READ_ENTER(&plistp->lockrw); 2386 2387 portp = vsw_lookup_port(vswp, cport_num); 2388 if (portp == NULL) { 2389 RW_EXIT(&plistp->lockrw); 2390 return (1); 2391 } 2392 2393 /* Read the vlan ids */ 2394 vsw_vlan_read_ids(portp, VSW_VNETPORT, curr_mdp, curr_mdex, &pvid, 2395 &vids, &nvids, NULL); 2396 2397 /* Determine if there are any vlan id updates */ 2398 if ((pvid != portp->pvid) || /* pvid changed? */ 2399 (nvids != portp->nvids) || /* # of vids changed? */ 2400 ((nvids != 0) && (portp->nvids != 0) && /* vids changed? */ 2401 !vsw_cmp_vids(vids, portp->vids, nvids))) { 2402 updated_vlans = B_TRUE; 2403 } 2404 2405 if (updated_vlans == B_TRUE) { 2406 2407 /* Remove existing vlan ids from the hash table. */ 2408 vsw_vlan_remove_ids(portp, VSW_VNETPORT); 2409 2410 /* Reconfigure vlans with network device */ 2411 vsw_mac_port_reconfig_vlans(portp, pvid, vids, nvids); 2412 2413 /* add these new vlan ids into hash table */ 2414 vsw_vlan_add_ids(portp, VSW_VNETPORT); 2415 2416 /* reset the port if it is vlan unaware (ver < 1.3) */ 2417 vsw_vlan_unaware_port_reset(portp); 2418 } 2419 2420 /* Check if hybrid property is present */ 2421 if (md_get_prop_val(curr_mdp, curr_mdex, hybrid_propname, &val) == 0) { 2422 D1(vswp, "%s: prop(%s) found\n", __func__, hybrid_propname); 2423 hio_enabled = B_TRUE; 2424 } 2425 2426 if (portp->p_hio_enabled != hio_enabled) { 2427 vsw_hio_port_update(portp, hio_enabled); 2428 } 2429 2430 RW_EXIT(&plistp->lockrw); 2431 2432 return (0); 2433 } 2434 2435 /* 2436 * vsw_mac_rx -- A common function to send packets to the interface. 2437 * By default this function check if the interface is UP or not, the 2438 * rest of the behaviour depends on the flags as below: 2439 * 2440 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2441 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2442 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2443 */ 2444 void 2445 vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 2446 mblk_t *mp, vsw_macrx_flags_t flags) 2447 { 2448 mblk_t *mpt; 2449 2450 D1(vswp, "%s:enter\n", __func__); 2451 READ_ENTER(&vswp->if_lockrw); 2452 /* Check if the interface is up */ 2453 if (!(vswp->if_state & VSW_IF_UP)) { 2454 RW_EXIT(&vswp->if_lockrw); 2455 /* Free messages only if FREEMSG flag specified */ 2456 if (flags & VSW_MACRX_FREEMSG) { 2457 freemsgchain(mp); 2458 } 2459 D1(vswp, "%s:exit\n", __func__); 2460 return; 2461 } 2462 /* 2463 * If PROMISC flag is passed, then check if 2464 * the interface is in the PROMISC mode. 2465 * If not, drop the messages. 2466 */ 2467 if (flags & VSW_MACRX_PROMISC) { 2468 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2469 RW_EXIT(&vswp->if_lockrw); 2470 /* Free messages only if FREEMSG flag specified */ 2471 if (flags & VSW_MACRX_FREEMSG) { 2472 freemsgchain(mp); 2473 } 2474 D1(vswp, "%s:exit\n", __func__); 2475 return; 2476 } 2477 } 2478 RW_EXIT(&vswp->if_lockrw); 2479 /* 2480 * If COPYMSG flag is passed, then make a copy 2481 * of the message chain and send up the copy. 2482 */ 2483 if (flags & VSW_MACRX_COPYMSG) { 2484 mp = copymsgchain(mp); 2485 if (mp == NULL) { 2486 D1(vswp, "%s:exit\n", __func__); 2487 return; 2488 } 2489 } 2490 2491 D2(vswp, "%s: sending up stack", __func__); 2492 2493 mpt = NULL; 2494 (void) vsw_vlan_frame_untag(vswp, VSW_LOCALDEV, &mp, &mpt); 2495 if (mp != NULL) { 2496 mac_rx(vswp->if_mh, mrh, mp); 2497 } 2498 D1(vswp, "%s:exit\n", __func__); 2499 } 2500 2501 /* copy mac address of vsw into soft state structure */ 2502 static void 2503 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2504 { 2505 int i; 2506 2507 WRITE_ENTER(&vswp->if_lockrw); 2508 for (i = ETHERADDRL - 1; i >= 0; i--) { 2509 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2510 macaddr >>= 8; 2511 } 2512 RW_EXIT(&vswp->if_lockrw); 2513 } 2514 2515 /* Compare VLAN ids, array size expected to be same. */ 2516 static boolean_t 2517 vsw_cmp_vids(vsw_vlanid_t *vids1, vsw_vlanid_t *vids2, int nvids) 2518 { 2519 int i, j; 2520 uint16_t vid; 2521 2522 for (i = 0; i < nvids; i++) { 2523 vid = vids1[i].vl_vid; 2524 for (j = 0; j < nvids; j++) { 2525 if (vid == vids2[i].vl_vid) 2526 break; 2527 } 2528 if (j == nvids) { 2529 return (B_FALSE); 2530 } 2531 } 2532 return (B_TRUE); 2533 } 2534