1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 76 /* 77 * Function prototypes. 78 */ 79 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 80 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 81 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 82 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 83 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 84 85 /* MDEG routines */ 86 static int vsw_mdeg_register(vsw_t *vswp); 87 static void vsw_mdeg_unregister(vsw_t *vswp); 88 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 89 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 91 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 92 static int vsw_read_mdprops(vsw_t *vswp); 93 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 94 95 /* Mac driver related routines */ 96 static int vsw_mac_register(vsw_t *); 97 static int vsw_mac_unregister(vsw_t *); 98 static int vsw_m_stat(void *, uint_t, uint64_t *); 99 static void vsw_m_stop(void *arg); 100 static int vsw_m_start(void *arg); 101 static int vsw_m_unicst(void *arg, const uint8_t *); 102 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 103 static int vsw_m_promisc(void *arg, boolean_t); 104 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 105 static uint_t vsw_rx_softintr(caddr_t arg1, caddr_t arg2); 106 void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh, 107 mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags); 108 109 /* 110 * Functions imported from other files. 111 */ 112 extern void vsw_setup_switching_timeout(void *arg); 113 extern void vsw_stop_switching_timeout(vsw_t *vswp); 114 extern int vsw_setup_switching(vsw_t *); 115 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 116 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 117 extern void vsw_del_mcst_vsw(vsw_t *); 118 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 119 extern int vsw_detach_ports(vsw_t *vswp); 120 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 121 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 122 extern int vsw_port_attach(vsw_t *vswp, int p_instance, 123 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 124 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 125 extern int vsw_mac_attach(vsw_t *vswp); 126 extern void vsw_mac_detach(vsw_t *vswp); 127 extern int vsw_mac_open(vsw_t *vswp); 128 extern void vsw_mac_close(vsw_t *vswp); 129 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 130 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 131 extern void vsw_reconfig_hw(vsw_t *); 132 extern void vsw_unset_addrs(vsw_t *vswp); 133 extern void vsw_set_addrs(vsw_t *vswp); 134 135 136 /* 137 * Internal tunables. 138 */ 139 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 140 int vsw_wretries = 100; /* # of write attempts */ 141 int vsw_desc_delay = 0; /* delay in us */ 142 int vsw_read_attempts = 5; /* # of reads of descriptor */ 143 int vsw_mac_open_retries = 20; /* max # of mac_open() retries */ 144 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 145 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 146 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 147 int vsw_ldc_tx_max_failures = 40; /* Max ldc tx failures */ 148 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 149 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 150 151 152 /* 153 * External tunables. 154 */ 155 /* 156 * Enable/disable thread per ring. This is a mode selection 157 * that is done a vsw driver attach time. 158 */ 159 boolean_t vsw_multi_ring_enable = B_FALSE; 160 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 161 162 /* 163 * Max number of mblks received in one receive operation. 164 */ 165 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 166 167 /* 168 * Tunables for three different pools, that is, the size and 169 * number of mblks for each pool. 170 */ 171 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128; /* size=128 for pool1 */ 172 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256; /* size=256 for pool2 */ 173 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048; /* size=2048 for pool3 */ 174 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 175 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 176 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 177 178 /* 179 * MAC callbacks 180 */ 181 static mac_callbacks_t vsw_m_callbacks = { 182 0, 183 vsw_m_stat, 184 vsw_m_start, 185 vsw_m_stop, 186 vsw_m_promisc, 187 vsw_m_multicst, 188 vsw_m_unicst, 189 vsw_m_tx, 190 NULL, 191 NULL, 192 NULL 193 }; 194 195 static struct cb_ops vsw_cb_ops = { 196 nulldev, /* cb_open */ 197 nulldev, /* cb_close */ 198 nodev, /* cb_strategy */ 199 nodev, /* cb_print */ 200 nodev, /* cb_dump */ 201 nodev, /* cb_read */ 202 nodev, /* cb_write */ 203 nodev, /* cb_ioctl */ 204 nodev, /* cb_devmap */ 205 nodev, /* cb_mmap */ 206 nodev, /* cb_segmap */ 207 nochpoll, /* cb_chpoll */ 208 ddi_prop_op, /* cb_prop_op */ 209 NULL, /* cb_stream */ 210 D_MP, /* cb_flag */ 211 CB_REV, /* rev */ 212 nodev, /* int (*cb_aread)() */ 213 nodev /* int (*cb_awrite)() */ 214 }; 215 216 static struct dev_ops vsw_ops = { 217 DEVO_REV, /* devo_rev */ 218 0, /* devo_refcnt */ 219 vsw_getinfo, /* devo_getinfo */ 220 nulldev, /* devo_identify */ 221 nulldev, /* devo_probe */ 222 vsw_attach, /* devo_attach */ 223 vsw_detach, /* devo_detach */ 224 nodev, /* devo_reset */ 225 &vsw_cb_ops, /* devo_cb_ops */ 226 (struct bus_ops *)NULL, /* devo_bus_ops */ 227 ddi_power /* devo_power */ 228 }; 229 230 extern struct mod_ops mod_driverops; 231 static struct modldrv vswmodldrv = { 232 &mod_driverops, 233 "sun4v Virtual Switch", 234 &vsw_ops, 235 }; 236 237 #define LDC_ENTER_LOCK(ldcp) \ 238 mutex_enter(&((ldcp)->ldc_cblock));\ 239 mutex_enter(&((ldcp)->ldc_rxlock));\ 240 mutex_enter(&((ldcp)->ldc_txlock)); 241 #define LDC_EXIT_LOCK(ldcp) \ 242 mutex_exit(&((ldcp)->ldc_txlock));\ 243 mutex_exit(&((ldcp)->ldc_rxlock));\ 244 mutex_exit(&((ldcp)->ldc_cblock)); 245 246 /* Driver soft state ptr */ 247 static void *vsw_state; 248 249 /* 250 * Linked list of "vsw_t" structures - one per instance. 251 */ 252 vsw_t *vsw_head = NULL; 253 krwlock_t vsw_rw; 254 255 /* 256 * Property names 257 */ 258 static char vdev_propname[] = "virtual-device"; 259 static char vsw_propname[] = "virtual-network-switch"; 260 static char physdev_propname[] = "vsw-phys-dev"; 261 static char smode_propname[] = "vsw-switch-mode"; 262 static char macaddr_propname[] = "local-mac-address"; 263 static char remaddr_propname[] = "remote-mac-address"; 264 static char ldcids_propname[] = "ldc-ids"; 265 static char chan_propname[] = "channel-endpoint"; 266 static char id_propname[] = "id"; 267 static char reg_propname[] = "reg"; 268 269 /* 270 * Matching criteria passed to the MDEG to register interest 271 * in changes to 'virtual-device-port' nodes identified by their 272 * 'id' property. 273 */ 274 static md_prop_match_t vport_prop_match[] = { 275 { MDET_PROP_VAL, "id" }, 276 { MDET_LIST_END, NULL } 277 }; 278 279 static mdeg_node_match_t vport_match = { "virtual-device-port", 280 vport_prop_match }; 281 282 /* 283 * Matching criteria passed to the MDEG to register interest 284 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 285 * by their 'name' and 'cfg-handle' properties. 286 */ 287 static md_prop_match_t vdev_prop_match[] = { 288 { MDET_PROP_STR, "name" }, 289 { MDET_PROP_VAL, "cfg-handle" }, 290 { MDET_LIST_END, NULL } 291 }; 292 293 static mdeg_node_match_t vdev_match = { "virtual-device", 294 vdev_prop_match }; 295 296 297 /* 298 * Specification of an MD node passed to the MDEG to filter any 299 * 'vport' nodes that do not belong to the specified node. This 300 * template is copied for each vsw instance and filled in with 301 * the appropriate 'cfg-handle' value before being passed to the MDEG. 302 */ 303 static mdeg_prop_spec_t vsw_prop_template[] = { 304 { MDET_PROP_STR, "name", vsw_propname }, 305 { MDET_PROP_VAL, "cfg-handle", NULL }, 306 { MDET_LIST_END, NULL, NULL } 307 }; 308 309 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 310 311 #ifdef DEBUG 312 /* 313 * Print debug messages - set to 0x1f to enable all msgs 314 * or 0x0 to turn all off. 315 */ 316 int vswdbg = 0x0; 317 318 /* 319 * debug levels: 320 * 0x01: Function entry/exit tracing 321 * 0x02: Internal function messages 322 * 0x04: Verbose internal messages 323 * 0x08: Warning messages 324 * 0x10: Error messages 325 */ 326 327 void 328 vswdebug(vsw_t *vswp, const char *fmt, ...) 329 { 330 char buf[512]; 331 va_list ap; 332 333 va_start(ap, fmt); 334 (void) vsprintf(buf, fmt, ap); 335 va_end(ap); 336 337 if (vswp == NULL) 338 cmn_err(CE_CONT, "%s\n", buf); 339 else 340 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 341 } 342 343 #endif /* DEBUG */ 344 345 static struct modlinkage modlinkage = { 346 MODREV_1, 347 &vswmodldrv, 348 NULL 349 }; 350 351 int 352 _init(void) 353 { 354 int status; 355 356 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 357 358 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 359 if (status != 0) { 360 return (status); 361 } 362 363 mac_init_ops(&vsw_ops, DRV_NAME); 364 status = mod_install(&modlinkage); 365 if (status != 0) { 366 ddi_soft_state_fini(&vsw_state); 367 } 368 return (status); 369 } 370 371 int 372 _fini(void) 373 { 374 int status; 375 376 status = mod_remove(&modlinkage); 377 if (status != 0) 378 return (status); 379 mac_fini_ops(&vsw_ops); 380 ddi_soft_state_fini(&vsw_state); 381 382 rw_destroy(&vsw_rw); 383 384 return (status); 385 } 386 387 int 388 _info(struct modinfo *modinfop) 389 { 390 return (mod_info(&modlinkage, modinfop)); 391 } 392 393 static int 394 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 395 { 396 vsw_t *vswp; 397 int instance; 398 char hashname[MAXNAMELEN]; 399 char qname[TASKQ_NAMELEN]; 400 enum { PROG_init = 0x00, 401 PROG_locks = 0x01, 402 PROG_readmd = 0x02, 403 PROG_fdb = 0x04, 404 PROG_mfdb = 0x08, 405 PROG_taskq = 0x10, 406 PROG_rx_softint = 0x20, 407 PROG_swmode = 0x40, 408 PROG_macreg = 0x80, 409 PROG_mdreg = 0x100} 410 progress; 411 412 progress = PROG_init; 413 int rv; 414 415 switch (cmd) { 416 case DDI_ATTACH: 417 break; 418 case DDI_RESUME: 419 /* nothing to do for this non-device */ 420 return (DDI_SUCCESS); 421 case DDI_PM_RESUME: 422 default: 423 return (DDI_FAILURE); 424 } 425 426 instance = ddi_get_instance(dip); 427 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 428 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 429 return (DDI_FAILURE); 430 } 431 vswp = ddi_get_soft_state(vsw_state, instance); 432 433 if (vswp == NULL) { 434 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 435 goto vsw_attach_fail; 436 } 437 438 vswp->dip = dip; 439 vswp->instance = instance; 440 ddi_set_driver_private(dip, (caddr_t)vswp); 441 442 mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL); 443 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 444 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 445 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 446 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 447 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 448 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 449 450 progress |= PROG_locks; 451 452 rv = vsw_read_mdprops(vswp); 453 if (rv != 0) 454 goto vsw_attach_fail; 455 456 progress |= PROG_readmd; 457 458 /* setup the unicast forwarding database */ 459 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 460 vswp->instance); 461 D2(vswp, "creating unicast hash table (%s)...", hashname); 462 vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 463 mod_hash_null_valdtor, sizeof (void *)); 464 465 progress |= PROG_fdb; 466 467 /* setup the multicast fowarding database */ 468 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 469 vswp->instance); 470 D2(vswp, "creating multicast hash table %s)...", hashname); 471 vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 472 mod_hash_null_valdtor, sizeof (void *)); 473 474 progress |= PROG_mfdb; 475 476 /* 477 * Create the taskq which will process all the VIO 478 * control messages. 479 */ 480 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 481 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 482 TASKQ_DEFAULTPRI, 0)) == NULL) { 483 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 484 vswp->instance); 485 goto vsw_attach_fail; 486 } 487 488 progress |= PROG_taskq; 489 490 /* 491 * If LDC receive thread is enabled, then we need a 492 * soft-interrupt to deliver the packets to the upper layers. 493 * This applies only to the packets that need to be sent up 494 * the stack, but not to the packets that are sent out via 495 * the physical interface. 496 */ 497 if (vsw_ldc_rxthr_enabled) { 498 vswp->rx_mhead = vswp->rx_mtail = NULL; 499 vswp->soft_pri = PIL_4; 500 vswp->rx_softint = B_TRUE; 501 502 rv = ddi_intr_add_softint(vswp->dip, &vswp->soft_handle, 503 vswp->soft_pri, vsw_rx_softintr, (void *)vswp); 504 if (rv != DDI_SUCCESS) { 505 cmn_err(CE_WARN, "!vsw%d: add_softint failed rv(%d)", 506 vswp->instance, rv); 507 goto vsw_attach_fail; 508 } 509 510 /* 511 * Initialize the soft_lock with the same priority as 512 * the soft interrupt to protect from the soft interrupt. 513 */ 514 mutex_init(&vswp->soft_lock, NULL, MUTEX_DRIVER, 515 DDI_INTR_PRI(vswp->soft_pri)); 516 progress |= PROG_rx_softint; 517 } else { 518 vswp->rx_softint = B_FALSE; 519 } 520 521 /* prevent auto-detaching */ 522 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 523 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 524 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 525 "instance %u", DDI_NO_AUTODETACH, instance); 526 } 527 528 /* 529 * Setup the required switching mode, 530 * based on the mdprops that we read earlier. 531 */ 532 rv = vsw_setup_switching(vswp); 533 if (rv == EAGAIN) { 534 /* 535 * Unable to setup switching mode; 536 * as the error is EAGAIN, schedule a timeout to retry. 537 */ 538 mutex_enter(&vswp->swtmout_lock); 539 540 vswp->swtmout_enabled = B_TRUE; 541 vswp->swtmout_id = 542 timeout(vsw_setup_switching_timeout, vswp, 543 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 544 545 mutex_exit(&vswp->swtmout_lock); 546 } else if (rv != 0) { 547 goto vsw_attach_fail; 548 } 549 550 progress |= PROG_swmode; 551 552 /* Register with mac layer as a provider */ 553 rv = vsw_mac_register(vswp); 554 if (rv != 0) 555 goto vsw_attach_fail; 556 557 progress |= PROG_macreg; 558 559 /* 560 * Now we have everything setup, register an interest in 561 * specific MD nodes. 562 * 563 * The callback is invoked in 2 cases, firstly if upon mdeg 564 * registration there are existing nodes which match our specified 565 * criteria, and secondly if the MD is changed (and again, there 566 * are nodes which we are interested in present within it. Note 567 * that our callback will be invoked even if our specified nodes 568 * have not actually changed). 569 * 570 */ 571 rv = vsw_mdeg_register(vswp); 572 if (rv != 0) 573 goto vsw_attach_fail; 574 575 progress |= PROG_mdreg; 576 577 WRITE_ENTER(&vsw_rw); 578 vswp->next = vsw_head; 579 vsw_head = vswp; 580 RW_EXIT(&vsw_rw); 581 582 ddi_report_dev(vswp->dip); 583 return (DDI_SUCCESS); 584 585 vsw_attach_fail: 586 DERR(NULL, "vsw_attach: failed"); 587 588 if (progress & PROG_rx_softint) { 589 (void) ddi_intr_remove_softint(vswp->soft_handle); 590 mutex_destroy(&vswp->soft_lock); 591 } 592 593 if (progress & PROG_mdreg) { 594 vsw_mdeg_unregister(vswp); 595 (void) vsw_detach_ports(vswp); 596 } 597 598 if (progress & PROG_macreg) 599 (void) vsw_mac_unregister(vswp); 600 601 if (progress & PROG_swmode) { 602 vsw_stop_switching_timeout(vswp); 603 mutex_enter(&vswp->mac_lock); 604 vsw_mac_detach(vswp); 605 vsw_mac_close(vswp); 606 mutex_exit(&vswp->mac_lock); 607 } 608 609 if (progress & PROG_taskq) 610 ddi_taskq_destroy(vswp->taskq_p); 611 612 if (progress & PROG_mfdb) 613 mod_hash_destroy_hash(vswp->mfdb); 614 615 if (progress & PROG_fdb) 616 mod_hash_destroy_hash(vswp->fdb); 617 618 if (progress & PROG_locks) { 619 rw_destroy(&vswp->plist.lockrw); 620 rw_destroy(&vswp->mfdbrw); 621 rw_destroy(&vswp->if_lockrw); 622 mutex_destroy(&vswp->swtmout_lock); 623 mutex_destroy(&vswp->mca_lock); 624 mutex_destroy(&vswp->mac_lock); 625 mutex_destroy(&vswp->hw_lock); 626 } 627 628 ddi_soft_state_free(vsw_state, instance); 629 return (DDI_FAILURE); 630 } 631 632 static int 633 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 634 { 635 vio_mblk_pool_t *poolp, *npoolp; 636 vsw_t **vswpp, *vswp; 637 int instance; 638 639 instance = ddi_get_instance(dip); 640 vswp = ddi_get_soft_state(vsw_state, instance); 641 642 if (vswp == NULL) { 643 return (DDI_FAILURE); 644 } 645 646 switch (cmd) { 647 case DDI_DETACH: 648 break; 649 case DDI_SUSPEND: 650 case DDI_PM_SUSPEND: 651 default: 652 return (DDI_FAILURE); 653 } 654 655 D2(vswp, "detaching instance %d", instance); 656 657 /* Stop any pending timeout to setup switching mode. */ 658 vsw_stop_switching_timeout(vswp); 659 660 if (vswp->if_state & VSW_IF_REG) { 661 if (vsw_mac_unregister(vswp) != 0) { 662 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 663 "MAC layer", vswp->instance); 664 return (DDI_FAILURE); 665 } 666 } 667 668 /* 669 * Destroy/free up the receive thread related structures. 670 */ 671 if (vswp->rx_softint == B_TRUE) { 672 (void) ddi_intr_remove_softint(vswp->soft_handle); 673 mutex_destroy(&vswp->soft_lock); 674 if (vswp->rx_mhead != NULL) { 675 freemsgchain(vswp->rx_mhead); 676 vswp->rx_mhead = vswp->rx_mtail = NULL; 677 } 678 } 679 680 vsw_mdeg_unregister(vswp); 681 682 /* remove mac layer callback */ 683 mutex_enter(&vswp->mac_lock); 684 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 685 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 686 vswp->mrh = NULL; 687 } 688 mutex_exit(&vswp->mac_lock); 689 690 if (vsw_detach_ports(vswp) != 0) { 691 cmn_err(CE_WARN, "!vsw%d: Unable to detach ports", 692 vswp->instance); 693 return (DDI_FAILURE); 694 } 695 696 rw_destroy(&vswp->if_lockrw); 697 698 mutex_destroy(&vswp->hw_lock); 699 700 /* 701 * Now that the ports have been deleted, stop and close 702 * the physical device. 703 */ 704 mutex_enter(&vswp->mac_lock); 705 706 vsw_mac_detach(vswp); 707 vsw_mac_close(vswp); 708 709 mutex_exit(&vswp->mac_lock); 710 711 mutex_destroy(&vswp->mac_lock); 712 mutex_destroy(&vswp->swtmout_lock); 713 714 /* 715 * Destroy any free pools that may still exist. 716 */ 717 poolp = vswp->rxh; 718 while (poolp != NULL) { 719 npoolp = vswp->rxh = poolp->nextp; 720 if (vio_destroy_mblks(poolp) != 0) { 721 vswp->rxh = poolp; 722 return (DDI_FAILURE); 723 } 724 poolp = npoolp; 725 } 726 727 /* 728 * Remove this instance from any entries it may be on in 729 * the hash table by using the list of addresses maintained 730 * in the vsw_t structure. 731 */ 732 vsw_del_mcst_vsw(vswp); 733 734 vswp->mcap = NULL; 735 mutex_destroy(&vswp->mca_lock); 736 737 /* 738 * By now any pending tasks have finished and the underlying 739 * ldc's have been destroyed, so its safe to delete the control 740 * message taskq. 741 */ 742 if (vswp->taskq_p != NULL) 743 ddi_taskq_destroy(vswp->taskq_p); 744 745 /* 746 * At this stage all the data pointers in the hash table 747 * should be NULL, as all the ports have been removed and will 748 * have deleted themselves from the port lists which the data 749 * pointers point to. Hence we can destroy the table using the 750 * default destructors. 751 */ 752 D2(vswp, "vsw_detach: destroying hash tables.."); 753 mod_hash_destroy_hash(vswp->fdb); 754 vswp->fdb = NULL; 755 756 WRITE_ENTER(&vswp->mfdbrw); 757 mod_hash_destroy_hash(vswp->mfdb); 758 vswp->mfdb = NULL; 759 RW_EXIT(&vswp->mfdbrw); 760 rw_destroy(&vswp->mfdbrw); 761 762 ddi_remove_minor_node(dip, NULL); 763 764 rw_destroy(&vswp->plist.lockrw); 765 WRITE_ENTER(&vsw_rw); 766 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 767 if (*vswpp == vswp) { 768 *vswpp = vswp->next; 769 break; 770 } 771 } 772 RW_EXIT(&vsw_rw); 773 ddi_soft_state_free(vsw_state, instance); 774 775 return (DDI_SUCCESS); 776 } 777 778 static int 779 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 780 { 781 _NOTE(ARGUNUSED(dip)) 782 783 vsw_t *vswp = NULL; 784 dev_t dev = (dev_t)arg; 785 int instance; 786 787 instance = getminor(dev); 788 789 switch (infocmd) { 790 case DDI_INFO_DEVT2DEVINFO: 791 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 792 *result = NULL; 793 return (DDI_FAILURE); 794 } 795 *result = vswp->dip; 796 return (DDI_SUCCESS); 797 798 case DDI_INFO_DEVT2INSTANCE: 799 *result = (void *)(uintptr_t)instance; 800 return (DDI_SUCCESS); 801 802 default: 803 *result = NULL; 804 return (DDI_FAILURE); 805 } 806 } 807 808 /* 809 * Get the value of the "vsw-phys-dev" property in the specified 810 * node. This property is the name of the physical device that 811 * the virtual switch will use to talk to the outside world. 812 * 813 * Note it is valid for this property to be NULL (but the property 814 * itself must exist). Callers of this routine should verify that 815 * the value returned is what they expected (i.e. either NULL or non NULL). 816 * 817 * On success returns value of the property in region pointed to by 818 * the 'name' argument, and with return value of 0. Otherwise returns 1. 819 */ 820 static int 821 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 822 { 823 int len = 0; 824 int instance; 825 char *physname = NULL; 826 char *dev; 827 const char *dev_name; 828 char myname[MAXNAMELEN]; 829 830 dev_name = ddi_driver_name(vswp->dip); 831 instance = ddi_get_instance(vswp->dip); 832 (void) snprintf(myname, MAXNAMELEN, "%s%d", dev_name, instance); 833 834 if (md_get_prop_data(mdp, node, physdev_propname, 835 (uint8_t **)(&physname), &len) != 0) { 836 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 837 "device(s) from MD", vswp->instance); 838 return (1); 839 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 840 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 841 vswp->instance, physname); 842 return (1); 843 } else if (strcmp(myname, physname) == 0) { 844 /* 845 * Prevent the vswitch from opening itself as the 846 * network device. 847 */ 848 cmn_err(CE_WARN, "!vsw%d: %s is an invalid device name", 849 vswp->instance, physname); 850 return (1); 851 } else { 852 (void) strncpy(name, physname, strlen(physname) + 1); 853 D2(vswp, "%s: using first device specified (%s)", 854 __func__, physname); 855 } 856 857 #ifdef DEBUG 858 /* 859 * As a temporary measure to aid testing we check to see if there 860 * is a vsw.conf file present. If there is we use the value of the 861 * vsw_physname property in the file as the name of the physical 862 * device, overriding the value from the MD. 863 * 864 * There may be multiple devices listed, but for the moment 865 * we just use the first one. 866 */ 867 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 868 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 869 if ((strlen(dev) + 1) > LIFNAMSIZ) { 870 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 871 vswp->instance, dev); 872 ddi_prop_free(dev); 873 return (1); 874 } else { 875 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 876 "config file", vswp->instance, dev); 877 878 (void) strncpy(name, dev, strlen(dev) + 1); 879 } 880 881 ddi_prop_free(dev); 882 } 883 #endif 884 885 return (0); 886 } 887 888 /* 889 * Read the 'vsw-switch-mode' property from the specified MD node. 890 * 891 * Returns 0 on success and the number of modes found in 'found', 892 * otherwise returns 1. 893 */ 894 static int 895 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 896 uint8_t *modes, int *found) 897 { 898 int len = 0; 899 int smode_num = 0; 900 char *smode = NULL; 901 char *curr_mode = NULL; 902 903 D1(vswp, "%s: enter", __func__); 904 905 /* 906 * Get the switch-mode property. The modes are listed in 907 * decreasing order of preference, i.e. prefered mode is 908 * first item in list. 909 */ 910 len = 0; 911 smode_num = 0; 912 if (md_get_prop_data(mdp, node, smode_propname, 913 (uint8_t **)(&smode), &len) != 0) { 914 /* 915 * Unable to get switch-mode property from MD, nothing 916 * more we can do. 917 */ 918 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 919 " from the MD", vswp->instance); 920 *found = 0; 921 return (1); 922 } 923 924 curr_mode = smode; 925 /* 926 * Modes of operation: 927 * 'switched' - layer 2 switching, underlying HW in 928 * programmed mode. 929 * 'promiscuous' - layer 2 switching, underlying HW in 930 * promiscuous mode. 931 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 932 * in non-promiscuous mode. 933 */ 934 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 935 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 936 if (strcmp(curr_mode, "switched") == 0) { 937 modes[smode_num++] = VSW_LAYER2; 938 } else if (strcmp(curr_mode, "promiscuous") == 0) { 939 modes[smode_num++] = VSW_LAYER2_PROMISC; 940 } else if (strcmp(curr_mode, "routed") == 0) { 941 modes[smode_num++] = VSW_LAYER3; 942 } else { 943 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 944 "setting to default switched mode", 945 vswp->instance, curr_mode); 946 modes[smode_num++] = VSW_LAYER2; 947 } 948 curr_mode += strlen(curr_mode) + 1; 949 } 950 *found = smode_num; 951 952 D2(vswp, "%s: %d modes found", __func__, smode_num); 953 954 D1(vswp, "%s: exit", __func__); 955 956 return (0); 957 } 958 959 /* 960 * Register with the MAC layer as a network device, so we 961 * can be plumbed if necessary. 962 */ 963 static int 964 vsw_mac_register(vsw_t *vswp) 965 { 966 mac_register_t *macp; 967 int rv; 968 969 D1(vswp, "%s: enter", __func__); 970 971 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 972 return (EINVAL); 973 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 974 macp->m_driver = vswp; 975 macp->m_dip = vswp->dip; 976 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 977 macp->m_callbacks = &vsw_m_callbacks; 978 macp->m_min_sdu = 0; 979 macp->m_max_sdu = ETHERMTU; 980 rv = mac_register(macp, &vswp->if_mh); 981 mac_free(macp); 982 if (rv != 0) { 983 /* 984 * Treat this as a non-fatal error as we may be 985 * able to operate in some other mode. 986 */ 987 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 988 "a provider with MAC layer", vswp->instance); 989 return (rv); 990 } 991 992 vswp->if_state |= VSW_IF_REG; 993 994 D1(vswp, "%s: exit", __func__); 995 996 return (rv); 997 } 998 999 static int 1000 vsw_mac_unregister(vsw_t *vswp) 1001 { 1002 int rv = 0; 1003 1004 D1(vswp, "%s: enter", __func__); 1005 1006 WRITE_ENTER(&vswp->if_lockrw); 1007 1008 if (vswp->if_state & VSW_IF_REG) { 1009 rv = mac_unregister(vswp->if_mh); 1010 if (rv != 0) { 1011 DWARN(vswp, "%s: unable to unregister from MAC " 1012 "framework", __func__); 1013 1014 RW_EXIT(&vswp->if_lockrw); 1015 D1(vswp, "%s: fail exit", __func__); 1016 return (rv); 1017 } 1018 1019 /* mark i/f as down and unregistered */ 1020 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1021 } 1022 RW_EXIT(&vswp->if_lockrw); 1023 1024 D1(vswp, "%s: exit", __func__); 1025 1026 return (rv); 1027 } 1028 1029 static int 1030 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1031 { 1032 vsw_t *vswp = (vsw_t *)arg; 1033 1034 D1(vswp, "%s: enter", __func__); 1035 1036 mutex_enter(&vswp->mac_lock); 1037 if (vswp->mh == NULL) { 1038 mutex_exit(&vswp->mac_lock); 1039 return (EINVAL); 1040 } 1041 1042 /* return stats from underlying device */ 1043 *val = mac_stat_get(vswp->mh, stat); 1044 1045 mutex_exit(&vswp->mac_lock); 1046 1047 return (0); 1048 } 1049 1050 static void 1051 vsw_m_stop(void *arg) 1052 { 1053 vsw_t *vswp = (vsw_t *)arg; 1054 1055 D1(vswp, "%s: enter", __func__); 1056 1057 WRITE_ENTER(&vswp->if_lockrw); 1058 vswp->if_state &= ~VSW_IF_UP; 1059 RW_EXIT(&vswp->if_lockrw); 1060 1061 mutex_enter(&vswp->hw_lock); 1062 1063 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1064 1065 if (vswp->recfg_reqd) 1066 vsw_reconfig_hw(vswp); 1067 1068 mutex_exit(&vswp->hw_lock); 1069 1070 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1071 } 1072 1073 static int 1074 vsw_m_start(void *arg) 1075 { 1076 vsw_t *vswp = (vsw_t *)arg; 1077 1078 D1(vswp, "%s: enter", __func__); 1079 1080 WRITE_ENTER(&vswp->if_lockrw); 1081 1082 vswp->if_state |= VSW_IF_UP; 1083 1084 if (vswp->switching_setup_done == B_FALSE) { 1085 /* 1086 * If the switching mode has not been setup yet, just 1087 * return. The unicast address will be programmed 1088 * after the physical device is successfully setup by the 1089 * timeout handler. 1090 */ 1091 RW_EXIT(&vswp->if_lockrw); 1092 return (0); 1093 } 1094 1095 /* if in layer2 mode, program unicast address. */ 1096 if (vswp->mh != NULL) { 1097 mutex_enter(&vswp->hw_lock); 1098 (void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1099 mutex_exit(&vswp->hw_lock); 1100 } 1101 1102 RW_EXIT(&vswp->if_lockrw); 1103 1104 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1105 return (0); 1106 } 1107 1108 /* 1109 * Change the local interface address. 1110 * 1111 * Note: we don't support this entry point. The local 1112 * mac address of the switch can only be changed via its 1113 * MD node properties. 1114 */ 1115 static int 1116 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1117 { 1118 _NOTE(ARGUNUSED(arg, macaddr)) 1119 1120 return (DDI_FAILURE); 1121 } 1122 1123 static int 1124 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1125 { 1126 vsw_t *vswp = (vsw_t *)arg; 1127 mcst_addr_t *mcst_p = NULL; 1128 uint64_t addr = 0x0; 1129 int i, ret = 0; 1130 1131 D1(vswp, "%s: enter", __func__); 1132 1133 /* 1134 * Convert address into form that can be used 1135 * as hash table key. 1136 */ 1137 for (i = 0; i < ETHERADDRL; i++) { 1138 addr = (addr << 8) | mca[i]; 1139 } 1140 1141 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1142 1143 if (add) { 1144 D2(vswp, "%s: adding multicast", __func__); 1145 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1146 /* 1147 * Update the list of multicast addresses 1148 * contained within the vsw_t structure to 1149 * include this new one. 1150 */ 1151 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1152 if (mcst_p == NULL) { 1153 DERR(vswp, "%s unable to alloc mem", __func__); 1154 (void) vsw_del_mcst(vswp, 1155 VSW_LOCALDEV, addr, NULL); 1156 return (1); 1157 } 1158 mcst_p->addr = addr; 1159 ether_copy(mca, &mcst_p->mca); 1160 1161 /* 1162 * Call into the underlying driver to program the 1163 * address into HW. 1164 */ 1165 mutex_enter(&vswp->mac_lock); 1166 if (vswp->mh != NULL) { 1167 ret = mac_multicst_add(vswp->mh, mca); 1168 if (ret != 0) { 1169 cmn_err(CE_WARN, "!vsw%d: unable to " 1170 "add multicast address", 1171 vswp->instance); 1172 mutex_exit(&vswp->mac_lock); 1173 (void) vsw_del_mcst(vswp, 1174 VSW_LOCALDEV, addr, NULL); 1175 kmem_free(mcst_p, sizeof (*mcst_p)); 1176 return (ret); 1177 } 1178 mcst_p->mac_added = B_TRUE; 1179 } 1180 mutex_exit(&vswp->mac_lock); 1181 1182 mutex_enter(&vswp->mca_lock); 1183 mcst_p->nextp = vswp->mcap; 1184 vswp->mcap = mcst_p; 1185 mutex_exit(&vswp->mca_lock); 1186 } else { 1187 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 1188 "address", vswp->instance); 1189 } 1190 return (ret); 1191 } 1192 1193 D2(vswp, "%s: removing multicast", __func__); 1194 /* 1195 * Remove the address from the hash table.. 1196 */ 1197 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1198 1199 /* 1200 * ..and then from the list maintained in the 1201 * vsw_t structure. 1202 */ 1203 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1204 ASSERT(mcst_p != NULL); 1205 1206 mutex_enter(&vswp->mac_lock); 1207 if (vswp->mh != NULL && mcst_p->mac_added) { 1208 (void) mac_multicst_remove(vswp->mh, mca); 1209 mcst_p->mac_added = B_FALSE; 1210 } 1211 mutex_exit(&vswp->mac_lock); 1212 kmem_free(mcst_p, sizeof (*mcst_p)); 1213 } 1214 1215 D1(vswp, "%s: exit", __func__); 1216 1217 return (0); 1218 } 1219 1220 static int 1221 vsw_m_promisc(void *arg, boolean_t on) 1222 { 1223 vsw_t *vswp = (vsw_t *)arg; 1224 1225 D1(vswp, "%s: enter", __func__); 1226 1227 WRITE_ENTER(&vswp->if_lockrw); 1228 if (on) 1229 vswp->if_state |= VSW_IF_PROMISC; 1230 else 1231 vswp->if_state &= ~VSW_IF_PROMISC; 1232 RW_EXIT(&vswp->if_lockrw); 1233 1234 D1(vswp, "%s: exit", __func__); 1235 1236 return (0); 1237 } 1238 1239 static mblk_t * 1240 vsw_m_tx(void *arg, mblk_t *mp) 1241 { 1242 vsw_t *vswp = (vsw_t *)arg; 1243 1244 D1(vswp, "%s: enter", __func__); 1245 1246 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1247 1248 D1(vswp, "%s: exit", __func__); 1249 1250 return (NULL); 1251 } 1252 1253 /* 1254 * Register for machine description (MD) updates. 1255 * 1256 * Returns 0 on success, 1 on failure. 1257 */ 1258 static int 1259 vsw_mdeg_register(vsw_t *vswp) 1260 { 1261 mdeg_prop_spec_t *pspecp; 1262 mdeg_node_spec_t *inst_specp; 1263 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1264 size_t templatesz; 1265 int rv; 1266 1267 D1(vswp, "%s: enter", __func__); 1268 1269 /* 1270 * Allocate and initialize a per-instance copy 1271 * of the global property spec array that will 1272 * uniquely identify this vsw instance. 1273 */ 1274 templatesz = sizeof (vsw_prop_template); 1275 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1276 1277 bcopy(vsw_prop_template, pspecp, templatesz); 1278 1279 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1280 1281 /* initialize the complete prop spec structure */ 1282 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1283 inst_specp->namep = "virtual-device"; 1284 inst_specp->specp = pspecp; 1285 1286 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1287 vswp->regprop); 1288 /* 1289 * Register an interest in 'virtual-device' nodes with a 1290 * 'name' property of 'virtual-network-switch' 1291 */ 1292 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1293 (void *)vswp, &mdeg_hdl); 1294 if (rv != MDEG_SUCCESS) { 1295 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1296 __func__, rv); 1297 goto mdeg_reg_fail; 1298 } 1299 1300 /* 1301 * Register an interest in 'vsw-port' nodes. 1302 */ 1303 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1304 (void *)vswp, &mdeg_port_hdl); 1305 if (rv != MDEG_SUCCESS) { 1306 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1307 (void) mdeg_unregister(mdeg_hdl); 1308 goto mdeg_reg_fail; 1309 } 1310 1311 /* save off data that will be needed later */ 1312 vswp->inst_spec = inst_specp; 1313 vswp->mdeg_hdl = mdeg_hdl; 1314 vswp->mdeg_port_hdl = mdeg_port_hdl; 1315 1316 D1(vswp, "%s: exit", __func__); 1317 return (0); 1318 1319 mdeg_reg_fail: 1320 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1321 vswp->instance); 1322 kmem_free(pspecp, templatesz); 1323 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1324 1325 vswp->mdeg_hdl = NULL; 1326 vswp->mdeg_port_hdl = NULL; 1327 1328 return (1); 1329 } 1330 1331 static void 1332 vsw_mdeg_unregister(vsw_t *vswp) 1333 { 1334 D1(vswp, "vsw_mdeg_unregister: enter"); 1335 1336 if (vswp->mdeg_hdl != NULL) 1337 (void) mdeg_unregister(vswp->mdeg_hdl); 1338 1339 if (vswp->mdeg_port_hdl != NULL) 1340 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1341 1342 if (vswp->inst_spec != NULL) { 1343 if (vswp->inst_spec->specp != NULL) { 1344 (void) kmem_free(vswp->inst_spec->specp, 1345 sizeof (vsw_prop_template)); 1346 vswp->inst_spec->specp = NULL; 1347 } 1348 1349 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1350 vswp->inst_spec = NULL; 1351 } 1352 1353 D1(vswp, "vsw_mdeg_unregister: exit"); 1354 } 1355 1356 /* 1357 * Mdeg callback invoked for the vsw node itself. 1358 */ 1359 static int 1360 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1361 { 1362 vsw_t *vswp; 1363 md_t *mdp; 1364 mde_cookie_t node; 1365 uint64_t inst; 1366 char *node_name = NULL; 1367 1368 if (resp == NULL) 1369 return (MDEG_FAILURE); 1370 1371 vswp = (vsw_t *)cb_argp; 1372 1373 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1374 " : prev matched %d", __func__, resp->added.nelem, 1375 resp->removed.nelem, resp->match_curr.nelem, 1376 resp->match_prev.nelem); 1377 1378 /* 1379 * We get an initial callback for this node as 'added' 1380 * after registering with mdeg. Note that we would have 1381 * already gathered information about this vsw node by 1382 * walking MD earlier during attach (in vsw_read_mdprops()). 1383 * So, there is a window where the properties of this 1384 * node might have changed when we get this initial 'added' 1385 * callback. We handle this as if an update occured 1386 * and invoke the same function which handles updates to 1387 * the properties of this vsw-node if any. 1388 * 1389 * A non-zero 'match' value indicates that the MD has been 1390 * updated and that a virtual-network-switch node is 1391 * present which may or may not have been updated. It is 1392 * up to the clients to examine their own nodes and 1393 * determine if they have changed. 1394 */ 1395 if (resp->added.nelem != 0) { 1396 1397 if (resp->added.nelem != 1) { 1398 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1399 "invalid: %d\n", vswp->instance, resp->added.nelem); 1400 return (MDEG_FAILURE); 1401 } 1402 1403 mdp = resp->added.mdp; 1404 node = resp->added.mdep[0]; 1405 1406 } else if (resp->match_curr.nelem != 0) { 1407 1408 if (resp->match_curr.nelem != 1) { 1409 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1410 "invalid: %d\n", vswp->instance, 1411 resp->match_curr.nelem); 1412 return (MDEG_FAILURE); 1413 } 1414 1415 mdp = resp->match_curr.mdp; 1416 node = resp->match_curr.mdep[0]; 1417 1418 } else { 1419 return (MDEG_FAILURE); 1420 } 1421 1422 /* Validate name and instance */ 1423 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1424 DERR(vswp, "%s: unable to get node name\n", __func__); 1425 return (MDEG_FAILURE); 1426 } 1427 1428 /* is this a virtual-network-switch? */ 1429 if (strcmp(node_name, vsw_propname) != 0) { 1430 DERR(vswp, "%s: Invalid node name: %s\n", 1431 __func__, node_name); 1432 return (MDEG_FAILURE); 1433 } 1434 1435 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1436 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1437 __func__); 1438 return (MDEG_FAILURE); 1439 } 1440 1441 /* is this the right instance of vsw? */ 1442 if (inst != vswp->regprop) { 1443 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1444 __func__, inst); 1445 return (MDEG_FAILURE); 1446 } 1447 1448 vsw_update_md_prop(vswp, mdp, node); 1449 1450 return (MDEG_SUCCESS); 1451 } 1452 1453 /* 1454 * Mdeg callback invoked for changes to the vsw-port nodes 1455 * under the vsw node. 1456 */ 1457 static int 1458 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1459 { 1460 vsw_t *vswp; 1461 int idx; 1462 md_t *mdp; 1463 mde_cookie_t node; 1464 uint64_t inst; 1465 1466 if ((resp == NULL) || (cb_argp == NULL)) 1467 return (MDEG_FAILURE); 1468 1469 vswp = (vsw_t *)cb_argp; 1470 1471 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1472 " : prev matched %d", __func__, resp->added.nelem, 1473 resp->removed.nelem, resp->match_curr.nelem, 1474 resp->match_prev.nelem); 1475 1476 /* process added ports */ 1477 for (idx = 0; idx < resp->added.nelem; idx++) { 1478 mdp = resp->added.mdp; 1479 node = resp->added.mdep[idx]; 1480 1481 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1482 1483 if (vsw_port_add(vswp, mdp, &node) != 0) { 1484 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1485 "(0x%lx)", vswp->instance, node); 1486 } 1487 } 1488 1489 /* process removed ports */ 1490 for (idx = 0; idx < resp->removed.nelem; idx++) { 1491 mdp = resp->removed.mdp; 1492 node = resp->removed.mdep[idx]; 1493 1494 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1495 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1496 __func__, id_propname, idx); 1497 continue; 1498 } 1499 1500 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1501 1502 if (vsw_port_detach(vswp, inst) != 0) { 1503 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1504 vswp->instance, inst); 1505 } 1506 } 1507 1508 /* 1509 * Currently no support for updating already active ports. 1510 * So, ignore the match_curr and match_priv arrays for now. 1511 */ 1512 1513 D1(vswp, "%s: exit", __func__); 1514 1515 return (MDEG_SUCCESS); 1516 } 1517 1518 /* 1519 * Scan the machine description for this instance of vsw 1520 * and read its properties. Called only from vsw_attach(). 1521 * Returns: 0 on success, 1 on failure. 1522 */ 1523 static int 1524 vsw_read_mdprops(vsw_t *vswp) 1525 { 1526 md_t *mdp = NULL; 1527 mde_cookie_t rootnode; 1528 mde_cookie_t *listp = NULL; 1529 uint64_t inst; 1530 uint64_t cfgh; 1531 char *name; 1532 int rv = 1; 1533 int num_nodes = 0; 1534 int num_devs = 0; 1535 int listsz = 0; 1536 int i; 1537 1538 /* 1539 * In each 'virtual-device' node in the MD there is a 1540 * 'cfg-handle' property which is the MD's concept of 1541 * an instance number (this may be completely different from 1542 * the device drivers instance #). OBP reads that value and 1543 * stores it in the 'reg' property of the appropriate node in 1544 * the device tree. We first read this reg property and use this 1545 * to compare against the 'cfg-handle' property of vsw nodes 1546 * in MD to get to this specific vsw instance and then read 1547 * other properties that we are interested in. 1548 * We also cache the value of 'reg' property and use it later 1549 * to register callbacks with mdeg (see vsw_mdeg_register()) 1550 */ 1551 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1552 DDI_PROP_DONTPASS, reg_propname, -1); 1553 if (inst == -1) { 1554 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1555 "OBP device tree", vswp->instance, reg_propname); 1556 return (rv); 1557 } 1558 1559 vswp->regprop = inst; 1560 1561 if ((mdp = md_get_handle()) == NULL) { 1562 DWARN(vswp, "%s: cannot init MD\n", __func__); 1563 return (rv); 1564 } 1565 1566 num_nodes = md_node_count(mdp); 1567 ASSERT(num_nodes > 0); 1568 1569 listsz = num_nodes * sizeof (mde_cookie_t); 1570 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1571 1572 rootnode = md_root_node(mdp); 1573 1574 /* search for all "virtual_device" nodes */ 1575 num_devs = md_scan_dag(mdp, rootnode, 1576 md_find_name(mdp, vdev_propname), 1577 md_find_name(mdp, "fwd"), listp); 1578 if (num_devs <= 0) { 1579 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1580 goto vsw_readmd_exit; 1581 } 1582 1583 /* 1584 * Now loop through the list of virtual-devices looking for 1585 * devices with name "virtual-network-switch" and for each 1586 * such device compare its instance with what we have from 1587 * the 'reg' property to find the right node in MD and then 1588 * read all its properties. 1589 */ 1590 for (i = 0; i < num_devs; i++) { 1591 1592 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1593 DWARN(vswp, "%s: name property not found\n", 1594 __func__); 1595 goto vsw_readmd_exit; 1596 } 1597 1598 /* is this a virtual-network-switch? */ 1599 if (strcmp(name, vsw_propname) != 0) 1600 continue; 1601 1602 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1603 DWARN(vswp, "%s: cfg-handle property not found\n", 1604 __func__); 1605 goto vsw_readmd_exit; 1606 } 1607 1608 /* is this the required instance of vsw? */ 1609 if (inst != cfgh) 1610 continue; 1611 1612 /* now read all properties of this vsw instance */ 1613 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1614 break; 1615 } 1616 1617 vsw_readmd_exit: 1618 1619 kmem_free(listp, listsz); 1620 (void) md_fini_handle(mdp); 1621 return (rv); 1622 } 1623 1624 /* 1625 * Read the initial start-of-day values from the specified MD node. 1626 */ 1627 static int 1628 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1629 { 1630 int i; 1631 uint64_t macaddr = 0; 1632 1633 D1(vswp, "%s: enter", __func__); 1634 1635 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1636 return (1); 1637 } 1638 1639 /* mac address for vswitch device itself */ 1640 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1641 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1642 vswp->instance); 1643 return (1); 1644 } 1645 1646 vsw_save_lmacaddr(vswp, macaddr); 1647 1648 if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) { 1649 cmn_err(CE_WARN, "vsw%d: Unable to read %s property from " 1650 "MD, defaulting to programmed mode", vswp->instance, 1651 smode_propname); 1652 1653 for (i = 0; i < NUM_SMODES; i++) 1654 vswp->smode[i] = VSW_LAYER2; 1655 1656 vswp->smode_num = NUM_SMODES; 1657 } else { 1658 ASSERT(vswp->smode_num != 0); 1659 } 1660 1661 D1(vswp, "%s: exit", __func__); 1662 return (0); 1663 } 1664 1665 /* 1666 * Check to see if the relevant properties in the specified node have 1667 * changed, and if so take the appropriate action. 1668 * 1669 * If any of the properties are missing or invalid we don't take 1670 * any action, as this function should only be invoked when modifications 1671 * have been made to what we assume is a working configuration, which 1672 * we leave active. 1673 * 1674 * Note it is legal for this routine to be invoked even if none of the 1675 * properties in the port node within the MD have actually changed. 1676 */ 1677 static void 1678 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1679 { 1680 char physname[LIFNAMSIZ]; 1681 char drv[LIFNAMSIZ]; 1682 uint_t ddi_instance; 1683 uint8_t new_smode[NUM_SMODES]; 1684 int i, smode_num = 0; 1685 uint64_t macaddr = 0; 1686 enum {MD_init = 0x1, 1687 MD_physname = 0x2, 1688 MD_macaddr = 0x4, 1689 MD_smode = 0x8} updated; 1690 int rv; 1691 1692 updated = MD_init; 1693 1694 D1(vswp, "%s: enter", __func__); 1695 1696 /* 1697 * Check if name of physical device in MD has changed. 1698 */ 1699 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1700 /* 1701 * Do basic sanity check on new device name/instance, 1702 * if its non NULL. It is valid for the device name to 1703 * have changed from a non NULL to a NULL value, i.e. 1704 * the vsw is being changed to 'routed' mode. 1705 */ 1706 if ((strlen(physname) != 0) && 1707 (ddi_parse(physname, drv, 1708 &ddi_instance) != DDI_SUCCESS)) { 1709 cmn_err(CE_WARN, "!vsw%d: new device name %s is not" 1710 " a valid device name/instance", 1711 vswp->instance, physname); 1712 goto fail_reconf; 1713 } 1714 1715 if (strcmp(physname, vswp->physname)) { 1716 D2(vswp, "%s: device name changed from %s to %s", 1717 __func__, vswp->physname, physname); 1718 1719 updated |= MD_physname; 1720 } else { 1721 D2(vswp, "%s: device name unchanged at %s", 1722 __func__, vswp->physname); 1723 } 1724 } else { 1725 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 1726 "device from updated MD.", vswp->instance); 1727 goto fail_reconf; 1728 } 1729 1730 /* 1731 * Check if MAC address has changed. 1732 */ 1733 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1734 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1735 vswp->instance); 1736 goto fail_reconf; 1737 } else { 1738 uint64_t maddr = macaddr; 1739 READ_ENTER(&vswp->if_lockrw); 1740 for (i = ETHERADDRL - 1; i >= 0; i--) { 1741 if (vswp->if_addr.ether_addr_octet[i] 1742 != (macaddr & 0xFF)) { 1743 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 1744 __func__, i, 1745 vswp->if_addr.ether_addr_octet[i], 1746 (macaddr & 0xFF)); 1747 updated |= MD_macaddr; 1748 macaddr = maddr; 1749 break; 1750 } 1751 macaddr >>= 8; 1752 } 1753 RW_EXIT(&vswp->if_lockrw); 1754 if (updated & MD_macaddr) { 1755 vsw_save_lmacaddr(vswp, macaddr); 1756 } 1757 } 1758 1759 /* 1760 * Check if switching modes have changed. 1761 */ 1762 if (vsw_get_md_smodes(vswp, mdp, node, 1763 new_smode, &smode_num)) { 1764 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 1765 vswp->instance, smode_propname); 1766 goto fail_reconf; 1767 } else { 1768 ASSERT(smode_num != 0); 1769 if (smode_num != vswp->smode_num) { 1770 D2(vswp, "%s: number of modes changed from %d to %d", 1771 __func__, vswp->smode_num, smode_num); 1772 } 1773 1774 for (i = 0; i < smode_num; i++) { 1775 if (new_smode[i] != vswp->smode[i]) { 1776 D2(vswp, "%s: mode changed from %d to %d", 1777 __func__, vswp->smode[i], new_smode[i]); 1778 updated |= MD_smode; 1779 break; 1780 } 1781 } 1782 } 1783 1784 /* 1785 * Now make any changes which are needed... 1786 */ 1787 1788 if (updated & (MD_physname | MD_smode)) { 1789 1790 /* 1791 * Stop any pending timeout to setup switching mode. 1792 */ 1793 vsw_stop_switching_timeout(vswp); 1794 1795 /* 1796 * Remove unicst, mcst addrs of vsw interface 1797 * and ports from the physdev. 1798 */ 1799 vsw_unset_addrs(vswp); 1800 1801 /* 1802 * Stop, detach and close the old device.. 1803 */ 1804 mutex_enter(&vswp->mac_lock); 1805 1806 vsw_mac_detach(vswp); 1807 vsw_mac_close(vswp); 1808 1809 mutex_exit(&vswp->mac_lock); 1810 1811 /* 1812 * Update phys name. 1813 */ 1814 if (updated & MD_physname) { 1815 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 1816 vswp->instance, vswp->physname, physname); 1817 (void) strncpy(vswp->physname, 1818 physname, strlen(physname) + 1); 1819 } 1820 1821 /* 1822 * Update array with the new switch mode values. 1823 */ 1824 if (updated & MD_smode) { 1825 for (i = 0; i < smode_num; i++) 1826 vswp->smode[i] = new_smode[i]; 1827 1828 vswp->smode_num = smode_num; 1829 vswp->smode_idx = 0; 1830 } 1831 1832 /* 1833 * ..and attach, start the new device. 1834 */ 1835 rv = vsw_setup_switching(vswp); 1836 if (rv == EAGAIN) { 1837 /* 1838 * Unable to setup switching mode. 1839 * As the error is EAGAIN, schedule a timeout to retry 1840 * and return. Programming addresses of ports and 1841 * vsw interface will be done when the timeout handler 1842 * completes successfully. 1843 */ 1844 mutex_enter(&vswp->swtmout_lock); 1845 1846 vswp->swtmout_enabled = B_TRUE; 1847 vswp->swtmout_id = 1848 timeout(vsw_setup_switching_timeout, vswp, 1849 (vsw_setup_switching_delay * 1850 drv_usectohz(MICROSEC))); 1851 1852 mutex_exit(&vswp->swtmout_lock); 1853 1854 return; 1855 1856 } else if (rv) { 1857 goto fail_update; 1858 } 1859 1860 /* 1861 * program unicst, mcst addrs of vsw interface 1862 * and ports in the physdev. 1863 */ 1864 vsw_set_addrs(vswp); 1865 1866 } else if (updated & MD_macaddr) { 1867 /* 1868 * We enter here if only MD_macaddr is exclusively updated. 1869 * If MD_physname and/or MD_smode are also updated, then 1870 * as part of that, we would have implicitly processed 1871 * MD_macaddr update (above). 1872 */ 1873 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 1874 vswp->instance, macaddr); 1875 1876 READ_ENTER(&vswp->if_lockrw); 1877 if (vswp->if_state & VSW_IF_UP) { 1878 1879 mutex_enter(&vswp->hw_lock); 1880 /* 1881 * Remove old mac address of vsw interface 1882 * from the physdev 1883 */ 1884 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1885 /* 1886 * Program new mac address of vsw interface 1887 * in the physdev 1888 */ 1889 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1890 mutex_exit(&vswp->hw_lock); 1891 if (rv != 0) { 1892 cmn_err(CE_NOTE, 1893 "!vsw%d: failed to program interface " 1894 "unicast address\n", vswp->instance); 1895 } 1896 /* 1897 * Notify the MAC layer of the changed address. 1898 */ 1899 mac_unicst_update(vswp->if_mh, 1900 (uint8_t *)&vswp->if_addr); 1901 1902 } 1903 RW_EXIT(&vswp->if_lockrw); 1904 1905 } 1906 1907 return; 1908 1909 fail_reconf: 1910 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 1911 return; 1912 1913 fail_update: 1914 cmn_err(CE_WARN, "!vsw%d: update of configuration failed", 1915 vswp->instance); 1916 } 1917 1918 /* 1919 * Add a new port to the system. 1920 * 1921 * Returns 0 on success, 1 on failure. 1922 */ 1923 int 1924 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 1925 { 1926 uint64_t ldc_id; 1927 uint8_t *addrp; 1928 int i, addrsz; 1929 int num_nodes = 0, nchan = 0; 1930 int listsz = 0; 1931 mde_cookie_t *listp = NULL; 1932 struct ether_addr ea; 1933 uint64_t macaddr; 1934 uint64_t inst = 0; 1935 vsw_port_t *port; 1936 1937 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 1938 DWARN(vswp, "%s: prop(%s) not found", __func__, 1939 id_propname); 1940 return (1); 1941 } 1942 1943 /* 1944 * Find the channel endpoint node(s) (which should be under this 1945 * port node) which contain the channel id(s). 1946 */ 1947 if ((num_nodes = md_node_count(mdp)) <= 0) { 1948 DERR(vswp, "%s: invalid number of nodes found (%d)", 1949 __func__, num_nodes); 1950 return (1); 1951 } 1952 1953 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 1954 1955 /* allocate enough space for node list */ 1956 listsz = num_nodes * sizeof (mde_cookie_t); 1957 listp = kmem_zalloc(listsz, KM_SLEEP); 1958 1959 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 1960 md_find_name(mdp, "fwd"), listp); 1961 1962 if (nchan <= 0) { 1963 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 1964 kmem_free(listp, listsz); 1965 return (1); 1966 } 1967 1968 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 1969 1970 /* use property from first node found */ 1971 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 1972 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 1973 id_propname); 1974 kmem_free(listp, listsz); 1975 return (1); 1976 } 1977 1978 /* don't need list any more */ 1979 kmem_free(listp, listsz); 1980 1981 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 1982 1983 /* read mac-address property */ 1984 if (md_get_prop_data(mdp, *node, remaddr_propname, 1985 &addrp, &addrsz)) { 1986 DWARN(vswp, "%s: prop(%s) not found", 1987 __func__, remaddr_propname); 1988 return (1); 1989 } 1990 1991 if (addrsz < ETHERADDRL) { 1992 DWARN(vswp, "%s: invalid address size", __func__); 1993 return (1); 1994 } 1995 1996 macaddr = *((uint64_t *)addrp); 1997 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 1998 1999 for (i = ETHERADDRL - 1; i >= 0; i--) { 2000 ea.ether_addr_octet[i] = macaddr & 0xFF; 2001 macaddr >>= 8; 2002 } 2003 2004 if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 2005 DERR(vswp, "%s: failed to attach port", __func__); 2006 return (1); 2007 } 2008 2009 port = vsw_lookup_port(vswp, (int)inst); 2010 2011 /* just successfuly created the port, so it should exist */ 2012 ASSERT(port != NULL); 2013 2014 return (0); 2015 } 2016 2017 /* 2018 * vsw_mac_rx -- A common function to send packets to the interface. 2019 * By default this function check if the interface is UP or not, the 2020 * rest of the behaviour depends on the flags as below: 2021 * 2022 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2023 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2024 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2025 */ 2026 void 2027 vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh, 2028 mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags) 2029 { 2030 int trigger = 0; 2031 2032 D1(vswp, "%s:enter\n", __func__); 2033 READ_ENTER(&vswp->if_lockrw); 2034 /* Check if the interface is up */ 2035 if (!(vswp->if_state & VSW_IF_UP)) { 2036 RW_EXIT(&vswp->if_lockrw); 2037 /* Free messages only if FREEMSG flag specified */ 2038 if (flags & VSW_MACRX_FREEMSG) { 2039 freemsgchain(mp); 2040 } 2041 D1(vswp, "%s:exit\n", __func__); 2042 return; 2043 } 2044 /* 2045 * If PROMISC flag is passed, then check if 2046 * the interface is in the PROMISC mode. 2047 * If not, drop the messages. 2048 */ 2049 if (flags & VSW_MACRX_PROMISC) { 2050 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2051 RW_EXIT(&vswp->if_lockrw); 2052 /* Free messages only if FREEMSG flag specified */ 2053 if (flags & VSW_MACRX_FREEMSG) { 2054 freemsgchain(mp); 2055 } 2056 D1(vswp, "%s:exit\n", __func__); 2057 return; 2058 } 2059 } 2060 RW_EXIT(&vswp->if_lockrw); 2061 /* 2062 * If COPYMSG flag is passed, then make a copy 2063 * of the message chain and send up the copy. 2064 */ 2065 if (flags & VSW_MACRX_COPYMSG) { 2066 mp = copymsgchain(mp); 2067 if (mp) { 2068 mpt = mp; 2069 /* find the tail */ 2070 while (mpt->b_next != NULL) { 2071 mpt = mpt->b_next; 2072 } 2073 } else { 2074 D1(vswp, "%s:exit\n", __func__); 2075 return; 2076 } 2077 } 2078 2079 /* 2080 * If the softint is not enabled or the packets are 2081 * passed by the physical device, then the caller 2082 * is expected to be at the interrupt context. For 2083 * this case, mac_rx() directly. 2084 */ 2085 if ((vswp->rx_softint == B_FALSE) || (caller == VSW_PHYSDEV)) { 2086 ASSERT(servicing_interrupt()); 2087 D3(vswp, "%s: sending up stack", __func__); 2088 mac_rx(vswp->if_mh, mrh, mp); 2089 D1(vswp, "%s:exit\n", __func__); 2090 return; 2091 } 2092 2093 /* 2094 * Here we may not be at the interrupt context, so 2095 * queue the packets and trigger a softint to post 2096 * the packets up the stack. 2097 */ 2098 mutex_enter(&vswp->soft_lock); 2099 if (vswp->rx_mhead == NULL) { 2100 vswp->rx_mhead = mp; 2101 vswp->rx_mtail = mpt; 2102 trigger = 1; 2103 } else { 2104 vswp->rx_mtail->b_next = mp; 2105 vswp->rx_mtail = mpt; 2106 } 2107 mutex_exit(&vswp->soft_lock); 2108 if (trigger) { 2109 D3(vswp, "%s: triggering the softint", __func__); 2110 (void) ddi_intr_trigger_softint(vswp->soft_handle, NULL); 2111 } 2112 D1(vswp, "%s:exit\n", __func__); 2113 } 2114 2115 /* 2116 * vsw_rx_softintr -- vsw soft interrupt handler function. 2117 * Its job is to pickup the recieved packets that are queued 2118 * for the interface and send them up. 2119 * 2120 * NOTE: An interrupt handler is being used to handle the upper 2121 * layer(s) requirement to send up only at interrupt context. 2122 */ 2123 /* ARGSUSED */ 2124 static uint_t 2125 vsw_rx_softintr(caddr_t arg1, caddr_t arg2) 2126 { 2127 mblk_t *mp; 2128 vsw_t *vswp = (vsw_t *)arg1; 2129 2130 mutex_enter(&vswp->soft_lock); 2131 mp = vswp->rx_mhead; 2132 vswp->rx_mhead = vswp->rx_mtail = NULL; 2133 mutex_exit(&vswp->soft_lock); 2134 if (mp != NULL) { 2135 READ_ENTER(&vswp->if_lockrw); 2136 if (vswp->if_state & VSW_IF_UP) { 2137 RW_EXIT(&vswp->if_lockrw); 2138 mac_rx(vswp->if_mh, NULL, mp); 2139 } else { 2140 RW_EXIT(&vswp->if_lockrw); 2141 freemsgchain(mp); 2142 } 2143 } 2144 D1(vswp, "%s:exit\n", __func__); 2145 return (DDI_INTR_CLAIMED); 2146 } 2147 2148 /* copy mac address of vsw into soft state structure */ 2149 static void 2150 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2151 { 2152 int i; 2153 2154 WRITE_ENTER(&vswp->if_lockrw); 2155 for (i = ETHERADDRL - 1; i >= 0; i--) { 2156 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2157 macaddr >>= 8; 2158 } 2159 RW_EXIT(&vswp->if_lockrw); 2160 } 2161