1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/callb.h> 75 76 /* 77 * Function prototypes. 78 */ 79 static int vsw_attach(dev_info_t *, ddi_attach_cmd_t); 80 static int vsw_detach(dev_info_t *, ddi_detach_cmd_t); 81 static int vsw_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 82 static int vsw_get_md_physname(vsw_t *, md_t *, mde_cookie_t, char *); 83 static int vsw_get_md_smodes(vsw_t *, md_t *, mde_cookie_t, uint8_t *, int *); 84 85 /* MDEG routines */ 86 static int vsw_mdeg_register(vsw_t *vswp); 87 static void vsw_mdeg_unregister(vsw_t *vswp); 88 static int vsw_mdeg_cb(void *cb_argp, mdeg_result_t *); 89 static int vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *); 90 static int vsw_get_initial_md_properties(vsw_t *vswp, md_t *, mde_cookie_t); 91 static void vsw_update_md_prop(vsw_t *, md_t *, mde_cookie_t); 92 static int vsw_read_mdprops(vsw_t *vswp); 93 static void vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr); 94 95 /* Mac driver related routines */ 96 static int vsw_mac_register(vsw_t *); 97 static int vsw_mac_unregister(vsw_t *); 98 static int vsw_m_stat(void *, uint_t, uint64_t *); 99 static void vsw_m_stop(void *arg); 100 static int vsw_m_start(void *arg); 101 static int vsw_m_unicst(void *arg, const uint8_t *); 102 static int vsw_m_multicst(void *arg, boolean_t, const uint8_t *); 103 static int vsw_m_promisc(void *arg, boolean_t); 104 static mblk_t *vsw_m_tx(void *arg, mblk_t *); 105 static uint_t vsw_rx_softintr(caddr_t arg1, caddr_t arg2); 106 void vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh, 107 mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags); 108 109 /* 110 * Functions imported from other files. 111 */ 112 extern void vsw_setup_switching_timeout(void *arg); 113 extern void vsw_stop_switching_timeout(vsw_t *vswp); 114 extern int vsw_setup_switching(vsw_t *); 115 extern int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 116 extern int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 117 extern void vsw_del_mcst_vsw(vsw_t *); 118 extern mcst_addr_t *vsw_del_addr(uint8_t devtype, void *arg, uint64_t addr); 119 extern int vsw_detach_ports(vsw_t *vswp); 120 extern int vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node); 121 extern int vsw_port_detach(vsw_t *vswp, int p_instance); 122 extern int vsw_port_attach(vsw_t *vswp, int p_instance, 123 uint64_t *ldcids, int nids, struct ether_addr *macaddr); 124 extern vsw_port_t *vsw_lookup_port(vsw_t *vswp, int p_instance); 125 extern int vsw_mac_attach(vsw_t *vswp); 126 extern void vsw_mac_detach(vsw_t *vswp); 127 extern int vsw_mac_open(vsw_t *vswp); 128 extern void vsw_mac_close(vsw_t *vswp); 129 extern int vsw_set_hw(vsw_t *, vsw_port_t *, int); 130 extern int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 131 extern void vsw_reconfig_hw(vsw_t *); 132 extern void vsw_unset_addrs(vsw_t *vswp); 133 extern void vsw_set_addrs(vsw_t *vswp); 134 135 136 /* 137 * Internal tunables. 138 */ 139 int vsw_num_handshakes = VNET_NUM_HANDSHAKES; /* # of handshake attempts */ 140 int vsw_wretries = 100; /* # of write attempts */ 141 int vsw_desc_delay = 0; /* delay in us */ 142 int vsw_read_attempts = 5; /* # of reads of descriptor */ 143 int vsw_mac_open_retries = 20; /* max # of mac_open() retries */ 144 int vsw_setup_switching_delay = 3; /* setup sw timeout interval in sec */ 145 int vsw_ldc_tx_delay = 5; /* delay(ticks) for tx retries */ 146 int vsw_ldc_tx_retries = 10; /* # of ldc tx retries */ 147 int vsw_ldc_tx_max_failures = 40; /* Max ldc tx failures */ 148 boolean_t vsw_ldc_rxthr_enabled = B_TRUE; /* LDC Rx thread enabled */ 149 boolean_t vsw_ldc_txthr_enabled = B_TRUE; /* LDC Tx thread enabled */ 150 151 152 /* 153 * External tunables. 154 */ 155 /* 156 * Enable/disable thread per ring. This is a mode selection 157 * that is done a vsw driver attach time. 158 */ 159 boolean_t vsw_multi_ring_enable = B_FALSE; 160 int vsw_mac_rx_rings = VSW_MAC_RX_RINGS; 161 162 /* 163 * Max number of mblks received in one receive operation. 164 */ 165 uint32_t vsw_chain_len = (VSW_NUM_MBLKS * 0.6); 166 167 /* 168 * Tunables for three different pools, that is, the size and 169 * number of mblks for each pool. 170 */ 171 uint32_t vsw_mblk_size1 = VSW_MBLK_SZ_128; /* size=128 for pool1 */ 172 uint32_t vsw_mblk_size2 = VSW_MBLK_SZ_256; /* size=256 for pool2 */ 173 uint32_t vsw_mblk_size3 = VSW_MBLK_SZ_2048; /* size=2048 for pool3 */ 174 uint32_t vsw_num_mblks1 = VSW_NUM_MBLKS; /* number of mblks for pool1 */ 175 uint32_t vsw_num_mblks2 = VSW_NUM_MBLKS; /* number of mblks for pool2 */ 176 uint32_t vsw_num_mblks3 = VSW_NUM_MBLKS; /* number of mblks for pool3 */ 177 178 /* 179 * MAC callbacks 180 */ 181 static mac_callbacks_t vsw_m_callbacks = { 182 0, 183 vsw_m_stat, 184 vsw_m_start, 185 vsw_m_stop, 186 vsw_m_promisc, 187 vsw_m_multicst, 188 vsw_m_unicst, 189 vsw_m_tx, 190 NULL, 191 NULL, 192 NULL 193 }; 194 195 static struct cb_ops vsw_cb_ops = { 196 nulldev, /* cb_open */ 197 nulldev, /* cb_close */ 198 nodev, /* cb_strategy */ 199 nodev, /* cb_print */ 200 nodev, /* cb_dump */ 201 nodev, /* cb_read */ 202 nodev, /* cb_write */ 203 nodev, /* cb_ioctl */ 204 nodev, /* cb_devmap */ 205 nodev, /* cb_mmap */ 206 nodev, /* cb_segmap */ 207 nochpoll, /* cb_chpoll */ 208 ddi_prop_op, /* cb_prop_op */ 209 NULL, /* cb_stream */ 210 D_MP, /* cb_flag */ 211 CB_REV, /* rev */ 212 nodev, /* int (*cb_aread)() */ 213 nodev /* int (*cb_awrite)() */ 214 }; 215 216 static struct dev_ops vsw_ops = { 217 DEVO_REV, /* devo_rev */ 218 0, /* devo_refcnt */ 219 vsw_getinfo, /* devo_getinfo */ 220 nulldev, /* devo_identify */ 221 nulldev, /* devo_probe */ 222 vsw_attach, /* devo_attach */ 223 vsw_detach, /* devo_detach */ 224 nodev, /* devo_reset */ 225 &vsw_cb_ops, /* devo_cb_ops */ 226 (struct bus_ops *)NULL, /* devo_bus_ops */ 227 ddi_power /* devo_power */ 228 }; 229 230 extern struct mod_ops mod_driverops; 231 static struct modldrv vswmodldrv = { 232 &mod_driverops, 233 "sun4v Virtual Switch", 234 &vsw_ops, 235 }; 236 237 #define LDC_ENTER_LOCK(ldcp) \ 238 mutex_enter(&((ldcp)->ldc_cblock));\ 239 mutex_enter(&((ldcp)->ldc_rxlock));\ 240 mutex_enter(&((ldcp)->ldc_txlock)); 241 #define LDC_EXIT_LOCK(ldcp) \ 242 mutex_exit(&((ldcp)->ldc_txlock));\ 243 mutex_exit(&((ldcp)->ldc_rxlock));\ 244 mutex_exit(&((ldcp)->ldc_cblock)); 245 246 /* Driver soft state ptr */ 247 static void *vsw_state; 248 249 /* 250 * Linked list of "vsw_t" structures - one per instance. 251 */ 252 vsw_t *vsw_head = NULL; 253 krwlock_t vsw_rw; 254 255 /* 256 * Property names 257 */ 258 static char vdev_propname[] = "virtual-device"; 259 static char vsw_propname[] = "virtual-network-switch"; 260 static char physdev_propname[] = "vsw-phys-dev"; 261 static char smode_propname[] = "vsw-switch-mode"; 262 static char macaddr_propname[] = "local-mac-address"; 263 static char remaddr_propname[] = "remote-mac-address"; 264 static char ldcids_propname[] = "ldc-ids"; 265 static char chan_propname[] = "channel-endpoint"; 266 static char id_propname[] = "id"; 267 static char reg_propname[] = "reg"; 268 269 /* 270 * Matching criteria passed to the MDEG to register interest 271 * in changes to 'virtual-device-port' nodes identified by their 272 * 'id' property. 273 */ 274 static md_prop_match_t vport_prop_match[] = { 275 { MDET_PROP_VAL, "id" }, 276 { MDET_LIST_END, NULL } 277 }; 278 279 static mdeg_node_match_t vport_match = { "virtual-device-port", 280 vport_prop_match }; 281 282 /* 283 * Matching criteria passed to the MDEG to register interest 284 * in changes to 'virtual-device' nodes (i.e. vsw nodes) identified 285 * by their 'name' and 'cfg-handle' properties. 286 */ 287 static md_prop_match_t vdev_prop_match[] = { 288 { MDET_PROP_STR, "name" }, 289 { MDET_PROP_VAL, "cfg-handle" }, 290 { MDET_LIST_END, NULL } 291 }; 292 293 static mdeg_node_match_t vdev_match = { "virtual-device", 294 vdev_prop_match }; 295 296 297 /* 298 * Specification of an MD node passed to the MDEG to filter any 299 * 'vport' nodes that do not belong to the specified node. This 300 * template is copied for each vsw instance and filled in with 301 * the appropriate 'cfg-handle' value before being passed to the MDEG. 302 */ 303 static mdeg_prop_spec_t vsw_prop_template[] = { 304 { MDET_PROP_STR, "name", vsw_propname }, 305 { MDET_PROP_VAL, "cfg-handle", NULL }, 306 { MDET_LIST_END, NULL, NULL } 307 }; 308 309 #define VSW_SET_MDEG_PROP_INST(specp, val) (specp)[1].ps_val = (val); 310 311 #ifdef DEBUG 312 /* 313 * Print debug messages - set to 0x1f to enable all msgs 314 * or 0x0 to turn all off. 315 */ 316 int vswdbg = 0x0; 317 318 /* 319 * debug levels: 320 * 0x01: Function entry/exit tracing 321 * 0x02: Internal function messages 322 * 0x04: Verbose internal messages 323 * 0x08: Warning messages 324 * 0x10: Error messages 325 */ 326 327 void 328 vswdebug(vsw_t *vswp, const char *fmt, ...) 329 { 330 char buf[512]; 331 va_list ap; 332 333 va_start(ap, fmt); 334 (void) vsprintf(buf, fmt, ap); 335 va_end(ap); 336 337 if (vswp == NULL) 338 cmn_err(CE_CONT, "%s\n", buf); 339 else 340 cmn_err(CE_CONT, "vsw%d: %s\n", vswp->instance, buf); 341 } 342 343 #endif /* DEBUG */ 344 345 static struct modlinkage modlinkage = { 346 MODREV_1, 347 &vswmodldrv, 348 NULL 349 }; 350 351 int 352 _init(void) 353 { 354 int status; 355 356 rw_init(&vsw_rw, NULL, RW_DRIVER, NULL); 357 358 status = ddi_soft_state_init(&vsw_state, sizeof (vsw_t), 1); 359 if (status != 0) { 360 return (status); 361 } 362 363 mac_init_ops(&vsw_ops, DRV_NAME); 364 status = mod_install(&modlinkage); 365 if (status != 0) { 366 ddi_soft_state_fini(&vsw_state); 367 } 368 return (status); 369 } 370 371 int 372 _fini(void) 373 { 374 int status; 375 376 status = mod_remove(&modlinkage); 377 if (status != 0) 378 return (status); 379 mac_fini_ops(&vsw_ops); 380 ddi_soft_state_fini(&vsw_state); 381 382 rw_destroy(&vsw_rw); 383 384 return (status); 385 } 386 387 int 388 _info(struct modinfo *modinfop) 389 { 390 return (mod_info(&modlinkage, modinfop)); 391 } 392 393 static int 394 vsw_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 395 { 396 vsw_t *vswp; 397 int instance; 398 char hashname[MAXNAMELEN]; 399 char qname[TASKQ_NAMELEN]; 400 enum { PROG_init = 0x00, 401 PROG_locks = 0x01, 402 PROG_readmd = 0x02, 403 PROG_fdb = 0x04, 404 PROG_mfdb = 0x08, 405 PROG_taskq = 0x10, 406 PROG_rx_softint = 0x20, 407 PROG_swmode = 0x40, 408 PROG_macreg = 0x80, 409 PROG_mdreg = 0x100} 410 progress; 411 412 progress = PROG_init; 413 int rv; 414 415 switch (cmd) { 416 case DDI_ATTACH: 417 break; 418 case DDI_RESUME: 419 /* nothing to do for this non-device */ 420 return (DDI_SUCCESS); 421 case DDI_PM_RESUME: 422 default: 423 return (DDI_FAILURE); 424 } 425 426 instance = ddi_get_instance(dip); 427 if (ddi_soft_state_zalloc(vsw_state, instance) != DDI_SUCCESS) { 428 DERR(NULL, "vsw%d: ddi_soft_state_zalloc failed", instance); 429 return (DDI_FAILURE); 430 } 431 vswp = ddi_get_soft_state(vsw_state, instance); 432 433 if (vswp == NULL) { 434 DERR(NULL, "vsw%d: ddi_get_soft_state failed", instance); 435 goto vsw_attach_fail; 436 } 437 438 vswp->dip = dip; 439 vswp->instance = instance; 440 ddi_set_driver_private(dip, (caddr_t)vswp); 441 442 mutex_init(&vswp->hw_lock, NULL, MUTEX_DRIVER, NULL); 443 mutex_init(&vswp->mac_lock, NULL, MUTEX_DRIVER, NULL); 444 mutex_init(&vswp->mca_lock, NULL, MUTEX_DRIVER, NULL); 445 mutex_init(&vswp->swtmout_lock, NULL, MUTEX_DRIVER, NULL); 446 rw_init(&vswp->if_lockrw, NULL, RW_DRIVER, NULL); 447 rw_init(&vswp->mfdbrw, NULL, RW_DRIVER, NULL); 448 rw_init(&vswp->plist.lockrw, NULL, RW_DRIVER, NULL); 449 450 progress |= PROG_locks; 451 452 rv = vsw_read_mdprops(vswp); 453 if (rv != 0) 454 goto vsw_attach_fail; 455 456 progress |= PROG_readmd; 457 458 /* setup the unicast forwarding database */ 459 (void) snprintf(hashname, MAXNAMELEN, "vsw_unicst_table-%d", 460 vswp->instance); 461 D2(vswp, "creating unicast hash table (%s)...", hashname); 462 vswp->fdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 463 mod_hash_null_valdtor, sizeof (void *)); 464 465 progress |= PROG_fdb; 466 467 /* setup the multicast fowarding database */ 468 (void) snprintf(hashname, MAXNAMELEN, "vsw_mcst_table-%d", 469 vswp->instance); 470 D2(vswp, "creating multicast hash table %s)...", hashname); 471 vswp->mfdb = mod_hash_create_ptrhash(hashname, VSW_NCHAINS, 472 mod_hash_null_valdtor, sizeof (void *)); 473 474 progress |= PROG_mfdb; 475 476 /* 477 * Create the taskq which will process all the VIO 478 * control messages. 479 */ 480 (void) snprintf(qname, TASKQ_NAMELEN, "vsw_taskq%d", vswp->instance); 481 if ((vswp->taskq_p = ddi_taskq_create(vswp->dip, qname, 1, 482 TASKQ_DEFAULTPRI, 0)) == NULL) { 483 cmn_err(CE_WARN, "!vsw%d: Unable to create task queue", 484 vswp->instance); 485 goto vsw_attach_fail; 486 } 487 488 progress |= PROG_taskq; 489 490 /* 491 * If LDC receive thread is enabled, then we need a 492 * soft-interrupt to deliver the packets to the upper layers. 493 * This applies only to the packets that need to be sent up 494 * the stack, but not to the packets that are sent out via 495 * the physical interface. 496 */ 497 if (vsw_ldc_rxthr_enabled) { 498 vswp->rx_mhead = vswp->rx_mtail = NULL; 499 vswp->soft_pri = PIL_4; 500 vswp->rx_softint = B_TRUE; 501 502 rv = ddi_intr_add_softint(vswp->dip, &vswp->soft_handle, 503 vswp->soft_pri, vsw_rx_softintr, (void *)vswp); 504 if (rv != DDI_SUCCESS) { 505 cmn_err(CE_WARN, "!vsw%d: add_softint failed rv(%d)", 506 vswp->instance, rv); 507 goto vsw_attach_fail; 508 } 509 510 /* 511 * Initialize the soft_lock with the same priority as 512 * the soft interrupt to protect from the soft interrupt. 513 */ 514 mutex_init(&vswp->soft_lock, NULL, MUTEX_DRIVER, 515 DDI_INTR_PRI(vswp->soft_pri)); 516 progress |= PROG_rx_softint; 517 } else { 518 vswp->rx_softint = B_FALSE; 519 } 520 521 /* prevent auto-detaching */ 522 if (ddi_prop_update_int(DDI_DEV_T_NONE, vswp->dip, 523 DDI_NO_AUTODETACH, 1) != DDI_SUCCESS) { 524 cmn_err(CE_NOTE, "!Unable to set \"%s\" property for " 525 "instance %u", DDI_NO_AUTODETACH, instance); 526 } 527 528 /* 529 * Setup the required switching mode, 530 * based on the mdprops that we read earlier. 531 */ 532 rv = vsw_setup_switching(vswp); 533 if (rv == EAGAIN) { 534 /* 535 * Unable to setup switching mode; 536 * as the error is EAGAIN, schedule a timeout to retry. 537 */ 538 mutex_enter(&vswp->swtmout_lock); 539 540 vswp->swtmout_enabled = B_TRUE; 541 vswp->swtmout_id = 542 timeout(vsw_setup_switching_timeout, vswp, 543 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 544 545 mutex_exit(&vswp->swtmout_lock); 546 } else if (rv != 0) { 547 goto vsw_attach_fail; 548 } 549 550 progress |= PROG_swmode; 551 552 /* Register with mac layer as a provider */ 553 rv = vsw_mac_register(vswp); 554 if (rv != 0) 555 goto vsw_attach_fail; 556 557 progress |= PROG_macreg; 558 559 /* 560 * Now we have everything setup, register an interest in 561 * specific MD nodes. 562 * 563 * The callback is invoked in 2 cases, firstly if upon mdeg 564 * registration there are existing nodes which match our specified 565 * criteria, and secondly if the MD is changed (and again, there 566 * are nodes which we are interested in present within it. Note 567 * that our callback will be invoked even if our specified nodes 568 * have not actually changed). 569 * 570 */ 571 rv = vsw_mdeg_register(vswp); 572 if (rv != 0) 573 goto vsw_attach_fail; 574 575 progress |= PROG_mdreg; 576 577 WRITE_ENTER(&vsw_rw); 578 vswp->next = vsw_head; 579 vsw_head = vswp; 580 RW_EXIT(&vsw_rw); 581 582 ddi_report_dev(vswp->dip); 583 return (DDI_SUCCESS); 584 585 vsw_attach_fail: 586 DERR(NULL, "vsw_attach: failed"); 587 588 if (progress & PROG_rx_softint) { 589 (void) ddi_intr_remove_softint(vswp->soft_handle); 590 mutex_destroy(&vswp->soft_lock); 591 } 592 593 if (progress & PROG_mdreg) { 594 vsw_mdeg_unregister(vswp); 595 (void) vsw_detach_ports(vswp); 596 } 597 598 if (progress & PROG_macreg) 599 (void) vsw_mac_unregister(vswp); 600 601 if (progress & PROG_swmode) { 602 vsw_stop_switching_timeout(vswp); 603 mutex_enter(&vswp->mac_lock); 604 vsw_mac_detach(vswp); 605 vsw_mac_close(vswp); 606 mutex_exit(&vswp->mac_lock); 607 } 608 609 if (progress & PROG_taskq) 610 ddi_taskq_destroy(vswp->taskq_p); 611 612 if (progress & PROG_mfdb) 613 mod_hash_destroy_hash(vswp->mfdb); 614 615 if (progress & PROG_fdb) 616 mod_hash_destroy_hash(vswp->fdb); 617 618 if (progress & PROG_locks) { 619 rw_destroy(&vswp->plist.lockrw); 620 rw_destroy(&vswp->mfdbrw); 621 rw_destroy(&vswp->if_lockrw); 622 mutex_destroy(&vswp->swtmout_lock); 623 mutex_destroy(&vswp->mca_lock); 624 mutex_destroy(&vswp->mac_lock); 625 mutex_destroy(&vswp->hw_lock); 626 } 627 628 ddi_soft_state_free(vsw_state, instance); 629 return (DDI_FAILURE); 630 } 631 632 static int 633 vsw_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 634 { 635 vio_mblk_pool_t *poolp, *npoolp; 636 vsw_t **vswpp, *vswp; 637 int instance; 638 639 instance = ddi_get_instance(dip); 640 vswp = ddi_get_soft_state(vsw_state, instance); 641 642 if (vswp == NULL) { 643 return (DDI_FAILURE); 644 } 645 646 switch (cmd) { 647 case DDI_DETACH: 648 break; 649 case DDI_SUSPEND: 650 case DDI_PM_SUSPEND: 651 default: 652 return (DDI_FAILURE); 653 } 654 655 D2(vswp, "detaching instance %d", instance); 656 657 /* Stop any pending timeout to setup switching mode. */ 658 vsw_stop_switching_timeout(vswp); 659 660 if (vswp->if_state & VSW_IF_REG) { 661 if (vsw_mac_unregister(vswp) != 0) { 662 cmn_err(CE_WARN, "!vsw%d: Unable to detach from " 663 "MAC layer", vswp->instance); 664 return (DDI_FAILURE); 665 } 666 } 667 668 /* 669 * Destroy/free up the receive thread related structures. 670 */ 671 if (vswp->rx_softint == B_TRUE) { 672 (void) ddi_intr_remove_softint(vswp->soft_handle); 673 mutex_destroy(&vswp->soft_lock); 674 if (vswp->rx_mhead != NULL) { 675 freemsgchain(vswp->rx_mhead); 676 vswp->rx_mhead = vswp->rx_mtail = NULL; 677 } 678 } 679 680 vsw_mdeg_unregister(vswp); 681 682 /* remove mac layer callback */ 683 mutex_enter(&vswp->mac_lock); 684 if ((vswp->mh != NULL) && (vswp->mrh != NULL)) { 685 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 686 vswp->mrh = NULL; 687 } 688 mutex_exit(&vswp->mac_lock); 689 690 if (vsw_detach_ports(vswp) != 0) { 691 cmn_err(CE_WARN, "!vsw%d: Unable to detach ports", 692 vswp->instance); 693 return (DDI_FAILURE); 694 } 695 696 rw_destroy(&vswp->if_lockrw); 697 698 mutex_destroy(&vswp->hw_lock); 699 700 /* 701 * Now that the ports have been deleted, stop and close 702 * the physical device. 703 */ 704 mutex_enter(&vswp->mac_lock); 705 706 vsw_mac_detach(vswp); 707 vsw_mac_close(vswp); 708 709 mutex_exit(&vswp->mac_lock); 710 711 mutex_destroy(&vswp->mac_lock); 712 mutex_destroy(&vswp->swtmout_lock); 713 714 /* 715 * Destroy any free pools that may still exist. 716 */ 717 poolp = vswp->rxh; 718 while (poolp != NULL) { 719 npoolp = vswp->rxh = poolp->nextp; 720 if (vio_destroy_mblks(poolp) != 0) { 721 vswp->rxh = poolp; 722 return (DDI_FAILURE); 723 } 724 poolp = npoolp; 725 } 726 727 /* 728 * Remove this instance from any entries it may be on in 729 * the hash table by using the list of addresses maintained 730 * in the vsw_t structure. 731 */ 732 vsw_del_mcst_vsw(vswp); 733 734 vswp->mcap = NULL; 735 mutex_destroy(&vswp->mca_lock); 736 737 /* 738 * By now any pending tasks have finished and the underlying 739 * ldc's have been destroyed, so its safe to delete the control 740 * message taskq. 741 */ 742 if (vswp->taskq_p != NULL) 743 ddi_taskq_destroy(vswp->taskq_p); 744 745 /* 746 * At this stage all the data pointers in the hash table 747 * should be NULL, as all the ports have been removed and will 748 * have deleted themselves from the port lists which the data 749 * pointers point to. Hence we can destroy the table using the 750 * default destructors. 751 */ 752 D2(vswp, "vsw_detach: destroying hash tables.."); 753 mod_hash_destroy_hash(vswp->fdb); 754 vswp->fdb = NULL; 755 756 WRITE_ENTER(&vswp->mfdbrw); 757 mod_hash_destroy_hash(vswp->mfdb); 758 vswp->mfdb = NULL; 759 RW_EXIT(&vswp->mfdbrw); 760 rw_destroy(&vswp->mfdbrw); 761 762 ddi_remove_minor_node(dip, NULL); 763 764 rw_destroy(&vswp->plist.lockrw); 765 WRITE_ENTER(&vsw_rw); 766 for (vswpp = &vsw_head; *vswpp; vswpp = &(*vswpp)->next) { 767 if (*vswpp == vswp) { 768 *vswpp = vswp->next; 769 break; 770 } 771 } 772 RW_EXIT(&vsw_rw); 773 ddi_soft_state_free(vsw_state, instance); 774 775 return (DDI_SUCCESS); 776 } 777 778 static int 779 vsw_getinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 780 { 781 _NOTE(ARGUNUSED(dip)) 782 783 vsw_t *vswp = NULL; 784 dev_t dev = (dev_t)arg; 785 int instance; 786 787 instance = getminor(dev); 788 789 switch (infocmd) { 790 case DDI_INFO_DEVT2DEVINFO: 791 if ((vswp = ddi_get_soft_state(vsw_state, instance)) == NULL) { 792 *result = NULL; 793 return (DDI_FAILURE); 794 } 795 *result = vswp->dip; 796 return (DDI_SUCCESS); 797 798 case DDI_INFO_DEVT2INSTANCE: 799 *result = (void *)(uintptr_t)instance; 800 return (DDI_SUCCESS); 801 802 default: 803 *result = NULL; 804 return (DDI_FAILURE); 805 } 806 } 807 808 /* 809 * Get the value of the "vsw-phys-dev" property in the specified 810 * node. This property is the name of the physical device that 811 * the virtual switch will use to talk to the outside world. 812 * 813 * Note it is valid for this property to be NULL (but the property 814 * itself must exist). Callers of this routine should verify that 815 * the value returned is what they expected (i.e. either NULL or non NULL). 816 * 817 * On success returns value of the property in region pointed to by 818 * the 'name' argument, and with return value of 0. Otherwise returns 1. 819 */ 820 static int 821 vsw_get_md_physname(vsw_t *vswp, md_t *mdp, mde_cookie_t node, char *name) 822 { 823 int len = 0; 824 char *physname = NULL; 825 char *dev; 826 827 if (md_get_prop_data(mdp, node, physdev_propname, 828 (uint8_t **)(&physname), &len) != 0) { 829 cmn_err(CE_WARN, "!vsw%d: Unable to get name(s) of physical " 830 "device(s) from MD", vswp->instance); 831 return (1); 832 } else if ((strlen(physname) + 1) > LIFNAMSIZ) { 833 cmn_err(CE_WARN, "!vsw%d: %s is too long a device name", 834 vswp->instance, physname); 835 return (1); 836 } else { 837 (void) strncpy(name, physname, strlen(physname) + 1); 838 D2(vswp, "%s: using first device specified (%s)", 839 __func__, physname); 840 } 841 842 #ifdef DEBUG 843 /* 844 * As a temporary measure to aid testing we check to see if there 845 * is a vsw.conf file present. If there is we use the value of the 846 * vsw_physname property in the file as the name of the physical 847 * device, overriding the value from the MD. 848 * 849 * There may be multiple devices listed, but for the moment 850 * we just use the first one. 851 */ 852 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vswp->dip, 0, 853 "vsw_physname", &dev) == DDI_PROP_SUCCESS) { 854 if ((strlen(dev) + 1) > LIFNAMSIZ) { 855 cmn_err(CE_WARN, "vsw%d: %s is too long a device name", 856 vswp->instance, dev); 857 ddi_prop_free(dev); 858 return (1); 859 } else { 860 cmn_err(CE_NOTE, "vsw%d: Using device name (%s) from " 861 "config file", vswp->instance, dev); 862 863 (void) strncpy(name, dev, strlen(dev) + 1); 864 } 865 866 ddi_prop_free(dev); 867 } 868 #endif 869 870 return (0); 871 } 872 873 /* 874 * Read the 'vsw-switch-mode' property from the specified MD node. 875 * 876 * Returns 0 on success and the number of modes found in 'found', 877 * otherwise returns 1. 878 */ 879 static int 880 vsw_get_md_smodes(vsw_t *vswp, md_t *mdp, mde_cookie_t node, 881 uint8_t *modes, int *found) 882 { 883 int len = 0; 884 int smode_num = 0; 885 char *smode = NULL; 886 char *curr_mode = NULL; 887 888 D1(vswp, "%s: enter", __func__); 889 890 /* 891 * Get the switch-mode property. The modes are listed in 892 * decreasing order of preference, i.e. prefered mode is 893 * first item in list. 894 */ 895 len = 0; 896 smode_num = 0; 897 if (md_get_prop_data(mdp, node, smode_propname, 898 (uint8_t **)(&smode), &len) != 0) { 899 /* 900 * Unable to get switch-mode property from MD, nothing 901 * more we can do. 902 */ 903 cmn_err(CE_WARN, "!vsw%d: Unable to get switch mode property" 904 " from the MD", vswp->instance); 905 *found = 0; 906 return (1); 907 } 908 909 curr_mode = smode; 910 /* 911 * Modes of operation: 912 * 'switched' - layer 2 switching, underlying HW in 913 * programmed mode. 914 * 'promiscuous' - layer 2 switching, underlying HW in 915 * promiscuous mode. 916 * 'routed' - layer 3 (i.e. IP) routing, underlying HW 917 * in non-promiscuous mode. 918 */ 919 while ((curr_mode < (smode + len)) && (smode_num < NUM_SMODES)) { 920 D2(vswp, "%s: curr_mode = [%s]", __func__, curr_mode); 921 if (strcmp(curr_mode, "switched") == 0) { 922 modes[smode_num++] = VSW_LAYER2; 923 } else if (strcmp(curr_mode, "promiscuous") == 0) { 924 modes[smode_num++] = VSW_LAYER2_PROMISC; 925 } else if (strcmp(curr_mode, "routed") == 0) { 926 modes[smode_num++] = VSW_LAYER3; 927 } else { 928 cmn_err(CE_WARN, "!vsw%d: Unknown switch mode %s, " 929 "setting to default switched mode", 930 vswp->instance, curr_mode); 931 modes[smode_num++] = VSW_LAYER2; 932 } 933 curr_mode += strlen(curr_mode) + 1; 934 } 935 *found = smode_num; 936 937 D2(vswp, "%s: %d modes found", __func__, smode_num); 938 939 D1(vswp, "%s: exit", __func__); 940 941 return (0); 942 } 943 944 /* 945 * Register with the MAC layer as a network device, so we 946 * can be plumbed if necessary. 947 */ 948 static int 949 vsw_mac_register(vsw_t *vswp) 950 { 951 mac_register_t *macp; 952 int rv; 953 954 D1(vswp, "%s: enter", __func__); 955 956 if ((macp = mac_alloc(MAC_VERSION)) == NULL) 957 return (EINVAL); 958 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 959 macp->m_driver = vswp; 960 macp->m_dip = vswp->dip; 961 macp->m_src_addr = (uint8_t *)&vswp->if_addr; 962 macp->m_callbacks = &vsw_m_callbacks; 963 macp->m_min_sdu = 0; 964 macp->m_max_sdu = ETHERMTU; 965 rv = mac_register(macp, &vswp->if_mh); 966 mac_free(macp); 967 if (rv != 0) { 968 /* 969 * Treat this as a non-fatal error as we may be 970 * able to operate in some other mode. 971 */ 972 cmn_err(CE_NOTE, "!vsw%d: Unable to register as " 973 "a provider with MAC layer", vswp->instance); 974 return (rv); 975 } 976 977 vswp->if_state |= VSW_IF_REG; 978 979 D1(vswp, "%s: exit", __func__); 980 981 return (rv); 982 } 983 984 static int 985 vsw_mac_unregister(vsw_t *vswp) 986 { 987 int rv = 0; 988 989 D1(vswp, "%s: enter", __func__); 990 991 WRITE_ENTER(&vswp->if_lockrw); 992 993 if (vswp->if_state & VSW_IF_REG) { 994 rv = mac_unregister(vswp->if_mh); 995 if (rv != 0) { 996 DWARN(vswp, "%s: unable to unregister from MAC " 997 "framework", __func__); 998 999 RW_EXIT(&vswp->if_lockrw); 1000 D1(vswp, "%s: fail exit", __func__); 1001 return (rv); 1002 } 1003 1004 /* mark i/f as down and unregistered */ 1005 vswp->if_state &= ~(VSW_IF_UP | VSW_IF_REG); 1006 } 1007 RW_EXIT(&vswp->if_lockrw); 1008 1009 D1(vswp, "%s: exit", __func__); 1010 1011 return (rv); 1012 } 1013 1014 static int 1015 vsw_m_stat(void *arg, uint_t stat, uint64_t *val) 1016 { 1017 vsw_t *vswp = (vsw_t *)arg; 1018 1019 D1(vswp, "%s: enter", __func__); 1020 1021 mutex_enter(&vswp->mac_lock); 1022 if (vswp->mh == NULL) { 1023 mutex_exit(&vswp->mac_lock); 1024 return (EINVAL); 1025 } 1026 1027 /* return stats from underlying device */ 1028 *val = mac_stat_get(vswp->mh, stat); 1029 1030 mutex_exit(&vswp->mac_lock); 1031 1032 return (0); 1033 } 1034 1035 static void 1036 vsw_m_stop(void *arg) 1037 { 1038 vsw_t *vswp = (vsw_t *)arg; 1039 1040 D1(vswp, "%s: enter", __func__); 1041 1042 WRITE_ENTER(&vswp->if_lockrw); 1043 vswp->if_state &= ~VSW_IF_UP; 1044 RW_EXIT(&vswp->if_lockrw); 1045 1046 mutex_enter(&vswp->hw_lock); 1047 1048 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1049 1050 if (vswp->recfg_reqd) 1051 vsw_reconfig_hw(vswp); 1052 1053 mutex_exit(&vswp->hw_lock); 1054 1055 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1056 } 1057 1058 static int 1059 vsw_m_start(void *arg) 1060 { 1061 vsw_t *vswp = (vsw_t *)arg; 1062 1063 D1(vswp, "%s: enter", __func__); 1064 1065 WRITE_ENTER(&vswp->if_lockrw); 1066 1067 vswp->if_state |= VSW_IF_UP; 1068 1069 if (vswp->switching_setup_done == B_FALSE) { 1070 /* 1071 * If the switching mode has not been setup yet, just 1072 * return. The unicast address will be programmed 1073 * after the physical device is successfully setup by the 1074 * timeout handler. 1075 */ 1076 RW_EXIT(&vswp->if_lockrw); 1077 return (0); 1078 } 1079 1080 /* if in layer2 mode, program unicast address. */ 1081 if (vswp->mh != NULL) { 1082 mutex_enter(&vswp->hw_lock); 1083 (void) vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1084 mutex_exit(&vswp->hw_lock); 1085 } 1086 1087 RW_EXIT(&vswp->if_lockrw); 1088 1089 D1(vswp, "%s: exit (state = %d)", __func__, vswp->if_state); 1090 return (0); 1091 } 1092 1093 /* 1094 * Change the local interface address. 1095 * 1096 * Note: we don't support this entry point. The local 1097 * mac address of the switch can only be changed via its 1098 * MD node properties. 1099 */ 1100 static int 1101 vsw_m_unicst(void *arg, const uint8_t *macaddr) 1102 { 1103 _NOTE(ARGUNUSED(arg, macaddr)) 1104 1105 return (DDI_FAILURE); 1106 } 1107 1108 static int 1109 vsw_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 1110 { 1111 vsw_t *vswp = (vsw_t *)arg; 1112 mcst_addr_t *mcst_p = NULL; 1113 uint64_t addr = 0x0; 1114 int i, ret = 0; 1115 1116 D1(vswp, "%s: enter", __func__); 1117 1118 /* 1119 * Convert address into form that can be used 1120 * as hash table key. 1121 */ 1122 for (i = 0; i < ETHERADDRL; i++) { 1123 addr = (addr << 8) | mca[i]; 1124 } 1125 1126 D2(vswp, "%s: addr = 0x%llx", __func__, addr); 1127 1128 if (add) { 1129 D2(vswp, "%s: adding multicast", __func__); 1130 if (vsw_add_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1131 /* 1132 * Update the list of multicast addresses 1133 * contained within the vsw_t structure to 1134 * include this new one. 1135 */ 1136 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), KM_NOSLEEP); 1137 if (mcst_p == NULL) { 1138 DERR(vswp, "%s unable to alloc mem", __func__); 1139 (void) vsw_del_mcst(vswp, 1140 VSW_LOCALDEV, addr, NULL); 1141 return (1); 1142 } 1143 mcst_p->addr = addr; 1144 ether_copy(mca, &mcst_p->mca); 1145 1146 /* 1147 * Call into the underlying driver to program the 1148 * address into HW. 1149 */ 1150 mutex_enter(&vswp->mac_lock); 1151 if (vswp->mh != NULL) { 1152 ret = mac_multicst_add(vswp->mh, mca); 1153 if (ret != 0) { 1154 cmn_err(CE_WARN, "!vsw%d: unable to " 1155 "add multicast address", 1156 vswp->instance); 1157 mutex_exit(&vswp->mac_lock); 1158 (void) vsw_del_mcst(vswp, 1159 VSW_LOCALDEV, addr, NULL); 1160 kmem_free(mcst_p, sizeof (*mcst_p)); 1161 return (ret); 1162 } 1163 mcst_p->mac_added = B_TRUE; 1164 } 1165 mutex_exit(&vswp->mac_lock); 1166 1167 mutex_enter(&vswp->mca_lock); 1168 mcst_p->nextp = vswp->mcap; 1169 vswp->mcap = mcst_p; 1170 mutex_exit(&vswp->mca_lock); 1171 } else { 1172 cmn_err(CE_WARN, "!vsw%d: unable to add multicast " 1173 "address", vswp->instance); 1174 } 1175 return (ret); 1176 } 1177 1178 D2(vswp, "%s: removing multicast", __func__); 1179 /* 1180 * Remove the address from the hash table.. 1181 */ 1182 if (vsw_del_mcst(vswp, VSW_LOCALDEV, addr, NULL) == 0) { 1183 1184 /* 1185 * ..and then from the list maintained in the 1186 * vsw_t structure. 1187 */ 1188 mcst_p = vsw_del_addr(VSW_LOCALDEV, vswp, addr); 1189 ASSERT(mcst_p != NULL); 1190 1191 mutex_enter(&vswp->mac_lock); 1192 if (vswp->mh != NULL && mcst_p->mac_added) { 1193 (void) mac_multicst_remove(vswp->mh, mca); 1194 mcst_p->mac_added = B_FALSE; 1195 } 1196 mutex_exit(&vswp->mac_lock); 1197 kmem_free(mcst_p, sizeof (*mcst_p)); 1198 } 1199 1200 D1(vswp, "%s: exit", __func__); 1201 1202 return (0); 1203 } 1204 1205 static int 1206 vsw_m_promisc(void *arg, boolean_t on) 1207 { 1208 vsw_t *vswp = (vsw_t *)arg; 1209 1210 D1(vswp, "%s: enter", __func__); 1211 1212 WRITE_ENTER(&vswp->if_lockrw); 1213 if (on) 1214 vswp->if_state |= VSW_IF_PROMISC; 1215 else 1216 vswp->if_state &= ~VSW_IF_PROMISC; 1217 RW_EXIT(&vswp->if_lockrw); 1218 1219 D1(vswp, "%s: exit", __func__); 1220 1221 return (0); 1222 } 1223 1224 static mblk_t * 1225 vsw_m_tx(void *arg, mblk_t *mp) 1226 { 1227 vsw_t *vswp = (vsw_t *)arg; 1228 1229 D1(vswp, "%s: enter", __func__); 1230 1231 vswp->vsw_switch_frame(vswp, mp, VSW_LOCALDEV, NULL, NULL); 1232 1233 D1(vswp, "%s: exit", __func__); 1234 1235 return (NULL); 1236 } 1237 1238 /* 1239 * Register for machine description (MD) updates. 1240 * 1241 * Returns 0 on success, 1 on failure. 1242 */ 1243 static int 1244 vsw_mdeg_register(vsw_t *vswp) 1245 { 1246 mdeg_prop_spec_t *pspecp; 1247 mdeg_node_spec_t *inst_specp; 1248 mdeg_handle_t mdeg_hdl, mdeg_port_hdl; 1249 size_t templatesz; 1250 int rv; 1251 1252 D1(vswp, "%s: enter", __func__); 1253 1254 /* 1255 * Allocate and initialize a per-instance copy 1256 * of the global property spec array that will 1257 * uniquely identify this vsw instance. 1258 */ 1259 templatesz = sizeof (vsw_prop_template); 1260 pspecp = kmem_zalloc(templatesz, KM_SLEEP); 1261 1262 bcopy(vsw_prop_template, pspecp, templatesz); 1263 1264 VSW_SET_MDEG_PROP_INST(pspecp, vswp->regprop); 1265 1266 /* initialize the complete prop spec structure */ 1267 inst_specp = kmem_zalloc(sizeof (mdeg_node_spec_t), KM_SLEEP); 1268 inst_specp->namep = "virtual-device"; 1269 inst_specp->specp = pspecp; 1270 1271 D2(vswp, "%s: instance %d registering with mdeg", __func__, 1272 vswp->regprop); 1273 /* 1274 * Register an interest in 'virtual-device' nodes with a 1275 * 'name' property of 'virtual-network-switch' 1276 */ 1277 rv = mdeg_register(inst_specp, &vdev_match, vsw_mdeg_cb, 1278 (void *)vswp, &mdeg_hdl); 1279 if (rv != MDEG_SUCCESS) { 1280 DERR(vswp, "%s: mdeg_register failed (%d) for vsw node", 1281 __func__, rv); 1282 goto mdeg_reg_fail; 1283 } 1284 1285 /* 1286 * Register an interest in 'vsw-port' nodes. 1287 */ 1288 rv = mdeg_register(inst_specp, &vport_match, vsw_port_mdeg_cb, 1289 (void *)vswp, &mdeg_port_hdl); 1290 if (rv != MDEG_SUCCESS) { 1291 DERR(vswp, "%s: mdeg_register failed (%d)\n", __func__, rv); 1292 (void) mdeg_unregister(mdeg_hdl); 1293 goto mdeg_reg_fail; 1294 } 1295 1296 /* save off data that will be needed later */ 1297 vswp->inst_spec = inst_specp; 1298 vswp->mdeg_hdl = mdeg_hdl; 1299 vswp->mdeg_port_hdl = mdeg_port_hdl; 1300 1301 D1(vswp, "%s: exit", __func__); 1302 return (0); 1303 1304 mdeg_reg_fail: 1305 cmn_err(CE_WARN, "!vsw%d: Unable to register MDEG callbacks", 1306 vswp->instance); 1307 kmem_free(pspecp, templatesz); 1308 kmem_free(inst_specp, sizeof (mdeg_node_spec_t)); 1309 1310 vswp->mdeg_hdl = NULL; 1311 vswp->mdeg_port_hdl = NULL; 1312 1313 return (1); 1314 } 1315 1316 static void 1317 vsw_mdeg_unregister(vsw_t *vswp) 1318 { 1319 D1(vswp, "vsw_mdeg_unregister: enter"); 1320 1321 if (vswp->mdeg_hdl != NULL) 1322 (void) mdeg_unregister(vswp->mdeg_hdl); 1323 1324 if (vswp->mdeg_port_hdl != NULL) 1325 (void) mdeg_unregister(vswp->mdeg_port_hdl); 1326 1327 if (vswp->inst_spec != NULL) { 1328 if (vswp->inst_spec->specp != NULL) { 1329 (void) kmem_free(vswp->inst_spec->specp, 1330 sizeof (vsw_prop_template)); 1331 vswp->inst_spec->specp = NULL; 1332 } 1333 1334 (void) kmem_free(vswp->inst_spec, sizeof (mdeg_node_spec_t)); 1335 vswp->inst_spec = NULL; 1336 } 1337 1338 D1(vswp, "vsw_mdeg_unregister: exit"); 1339 } 1340 1341 /* 1342 * Mdeg callback invoked for the vsw node itself. 1343 */ 1344 static int 1345 vsw_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1346 { 1347 vsw_t *vswp; 1348 md_t *mdp; 1349 mde_cookie_t node; 1350 uint64_t inst; 1351 char *node_name = NULL; 1352 1353 if (resp == NULL) 1354 return (MDEG_FAILURE); 1355 1356 vswp = (vsw_t *)cb_argp; 1357 1358 D1(vswp, "%s: added %d : removed %d : curr matched %d" 1359 " : prev matched %d", __func__, resp->added.nelem, 1360 resp->removed.nelem, resp->match_curr.nelem, 1361 resp->match_prev.nelem); 1362 1363 /* 1364 * We get an initial callback for this node as 'added' 1365 * after registering with mdeg. Note that we would have 1366 * already gathered information about this vsw node by 1367 * walking MD earlier during attach (in vsw_read_mdprops()). 1368 * So, there is a window where the properties of this 1369 * node might have changed when we get this initial 'added' 1370 * callback. We handle this as if an update occured 1371 * and invoke the same function which handles updates to 1372 * the properties of this vsw-node if any. 1373 * 1374 * A non-zero 'match' value indicates that the MD has been 1375 * updated and that a virtual-network-switch node is 1376 * present which may or may not have been updated. It is 1377 * up to the clients to examine their own nodes and 1378 * determine if they have changed. 1379 */ 1380 if (resp->added.nelem != 0) { 1381 1382 if (resp->added.nelem != 1) { 1383 cmn_err(CE_NOTE, "!vsw%d: number of nodes added " 1384 "invalid: %d\n", vswp->instance, resp->added.nelem); 1385 return (MDEG_FAILURE); 1386 } 1387 1388 mdp = resp->added.mdp; 1389 node = resp->added.mdep[0]; 1390 1391 } else if (resp->match_curr.nelem != 0) { 1392 1393 if (resp->match_curr.nelem != 1) { 1394 cmn_err(CE_NOTE, "!vsw%d: number of nodes updated " 1395 "invalid: %d\n", vswp->instance, 1396 resp->match_curr.nelem); 1397 return (MDEG_FAILURE); 1398 } 1399 1400 mdp = resp->match_curr.mdp; 1401 node = resp->match_curr.mdep[0]; 1402 1403 } else { 1404 return (MDEG_FAILURE); 1405 } 1406 1407 /* Validate name and instance */ 1408 if (md_get_prop_str(mdp, node, "name", &node_name) != 0) { 1409 DERR(vswp, "%s: unable to get node name\n", __func__); 1410 return (MDEG_FAILURE); 1411 } 1412 1413 /* is this a virtual-network-switch? */ 1414 if (strcmp(node_name, vsw_propname) != 0) { 1415 DERR(vswp, "%s: Invalid node name: %s\n", 1416 __func__, node_name); 1417 return (MDEG_FAILURE); 1418 } 1419 1420 if (md_get_prop_val(mdp, node, "cfg-handle", &inst)) { 1421 DERR(vswp, "%s: prop(cfg-handle) not found\n", 1422 __func__); 1423 return (MDEG_FAILURE); 1424 } 1425 1426 /* is this the right instance of vsw? */ 1427 if (inst != vswp->regprop) { 1428 DERR(vswp, "%s: Invalid cfg-handle: %lx\n", 1429 __func__, inst); 1430 return (MDEG_FAILURE); 1431 } 1432 1433 vsw_update_md_prop(vswp, mdp, node); 1434 1435 return (MDEG_SUCCESS); 1436 } 1437 1438 /* 1439 * Mdeg callback invoked for changes to the vsw-port nodes 1440 * under the vsw node. 1441 */ 1442 static int 1443 vsw_port_mdeg_cb(void *cb_argp, mdeg_result_t *resp) 1444 { 1445 vsw_t *vswp; 1446 int idx; 1447 md_t *mdp; 1448 mde_cookie_t node; 1449 uint64_t inst; 1450 1451 if ((resp == NULL) || (cb_argp == NULL)) 1452 return (MDEG_FAILURE); 1453 1454 vswp = (vsw_t *)cb_argp; 1455 1456 D2(vswp, "%s: added %d : removed %d : curr matched %d" 1457 " : prev matched %d", __func__, resp->added.nelem, 1458 resp->removed.nelem, resp->match_curr.nelem, 1459 resp->match_prev.nelem); 1460 1461 /* process added ports */ 1462 for (idx = 0; idx < resp->added.nelem; idx++) { 1463 mdp = resp->added.mdp; 1464 node = resp->added.mdep[idx]; 1465 1466 D2(vswp, "%s: adding node(%d) 0x%lx", __func__, idx, node); 1467 1468 if (vsw_port_add(vswp, mdp, &node) != 0) { 1469 cmn_err(CE_WARN, "!vsw%d: Unable to add new port " 1470 "(0x%lx)", vswp->instance, node); 1471 } 1472 } 1473 1474 /* process removed ports */ 1475 for (idx = 0; idx < resp->removed.nelem; idx++) { 1476 mdp = resp->removed.mdp; 1477 node = resp->removed.mdep[idx]; 1478 1479 if (md_get_prop_val(mdp, node, id_propname, &inst)) { 1480 DERR(vswp, "%s: prop(%s) not found in port(%d)", 1481 __func__, id_propname, idx); 1482 continue; 1483 } 1484 1485 D2(vswp, "%s: removing node(%d) 0x%lx", __func__, idx, node); 1486 1487 if (vsw_port_detach(vswp, inst) != 0) { 1488 cmn_err(CE_WARN, "!vsw%d: Unable to remove port %ld", 1489 vswp->instance, inst); 1490 } 1491 } 1492 1493 /* 1494 * Currently no support for updating already active ports. 1495 * So, ignore the match_curr and match_priv arrays for now. 1496 */ 1497 1498 D1(vswp, "%s: exit", __func__); 1499 1500 return (MDEG_SUCCESS); 1501 } 1502 1503 /* 1504 * Scan the machine description for this instance of vsw 1505 * and read its properties. Called only from vsw_attach(). 1506 * Returns: 0 on success, 1 on failure. 1507 */ 1508 static int 1509 vsw_read_mdprops(vsw_t *vswp) 1510 { 1511 md_t *mdp = NULL; 1512 mde_cookie_t rootnode; 1513 mde_cookie_t *listp = NULL; 1514 uint64_t inst; 1515 uint64_t cfgh; 1516 char *name; 1517 int rv = 1; 1518 int num_nodes = 0; 1519 int num_devs = 0; 1520 int listsz = 0; 1521 int i; 1522 1523 /* 1524 * In each 'virtual-device' node in the MD there is a 1525 * 'cfg-handle' property which is the MD's concept of 1526 * an instance number (this may be completely different from 1527 * the device drivers instance #). OBP reads that value and 1528 * stores it in the 'reg' property of the appropriate node in 1529 * the device tree. We first read this reg property and use this 1530 * to compare against the 'cfg-handle' property of vsw nodes 1531 * in MD to get to this specific vsw instance and then read 1532 * other properties that we are interested in. 1533 * We also cache the value of 'reg' property and use it later 1534 * to register callbacks with mdeg (see vsw_mdeg_register()) 1535 */ 1536 inst = ddi_prop_get_int(DDI_DEV_T_ANY, vswp->dip, 1537 DDI_PROP_DONTPASS, reg_propname, -1); 1538 if (inst == -1) { 1539 cmn_err(CE_NOTE, "!vsw%d: Unable to read %s property from " 1540 "OBP device tree", vswp->instance, reg_propname); 1541 return (rv); 1542 } 1543 1544 vswp->regprop = inst; 1545 1546 if ((mdp = md_get_handle()) == NULL) { 1547 DWARN(vswp, "%s: cannot init MD\n", __func__); 1548 return (rv); 1549 } 1550 1551 num_nodes = md_node_count(mdp); 1552 ASSERT(num_nodes > 0); 1553 1554 listsz = num_nodes * sizeof (mde_cookie_t); 1555 listp = (mde_cookie_t *)kmem_zalloc(listsz, KM_SLEEP); 1556 1557 rootnode = md_root_node(mdp); 1558 1559 /* search for all "virtual_device" nodes */ 1560 num_devs = md_scan_dag(mdp, rootnode, 1561 md_find_name(mdp, vdev_propname), 1562 md_find_name(mdp, "fwd"), listp); 1563 if (num_devs <= 0) { 1564 DWARN(vswp, "%s: invalid num_devs:%d\n", __func__, num_devs); 1565 goto vsw_readmd_exit; 1566 } 1567 1568 /* 1569 * Now loop through the list of virtual-devices looking for 1570 * devices with name "virtual-network-switch" and for each 1571 * such device compare its instance with what we have from 1572 * the 'reg' property to find the right node in MD and then 1573 * read all its properties. 1574 */ 1575 for (i = 0; i < num_devs; i++) { 1576 1577 if (md_get_prop_str(mdp, listp[i], "name", &name) != 0) { 1578 DWARN(vswp, "%s: name property not found\n", 1579 __func__); 1580 goto vsw_readmd_exit; 1581 } 1582 1583 /* is this a virtual-network-switch? */ 1584 if (strcmp(name, vsw_propname) != 0) 1585 continue; 1586 1587 if (md_get_prop_val(mdp, listp[i], "cfg-handle", &cfgh) != 0) { 1588 DWARN(vswp, "%s: cfg-handle property not found\n", 1589 __func__); 1590 goto vsw_readmd_exit; 1591 } 1592 1593 /* is this the required instance of vsw? */ 1594 if (inst != cfgh) 1595 continue; 1596 1597 /* now read all properties of this vsw instance */ 1598 rv = vsw_get_initial_md_properties(vswp, mdp, listp[i]); 1599 break; 1600 } 1601 1602 vsw_readmd_exit: 1603 1604 kmem_free(listp, listsz); 1605 (void) md_fini_handle(mdp); 1606 return (rv); 1607 } 1608 1609 /* 1610 * Read the initial start-of-day values from the specified MD node. 1611 */ 1612 static int 1613 vsw_get_initial_md_properties(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1614 { 1615 int i; 1616 uint64_t macaddr = 0; 1617 1618 D1(vswp, "%s: enter", __func__); 1619 1620 if (vsw_get_md_physname(vswp, mdp, node, vswp->physname) != 0) { 1621 return (1); 1622 } 1623 1624 /* mac address for vswitch device itself */ 1625 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1626 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1627 vswp->instance); 1628 return (1); 1629 } 1630 1631 vsw_save_lmacaddr(vswp, macaddr); 1632 1633 if (vsw_get_md_smodes(vswp, mdp, node, vswp->smode, &vswp->smode_num)) { 1634 cmn_err(CE_WARN, "vsw%d: Unable to read %s property from " 1635 "MD, defaulting to programmed mode", vswp->instance, 1636 smode_propname); 1637 1638 for (i = 0; i < NUM_SMODES; i++) 1639 vswp->smode[i] = VSW_LAYER2; 1640 1641 vswp->smode_num = NUM_SMODES; 1642 } else { 1643 ASSERT(vswp->smode_num != 0); 1644 } 1645 1646 D1(vswp, "%s: exit", __func__); 1647 return (0); 1648 } 1649 1650 /* 1651 * Check to see if the relevant properties in the specified node have 1652 * changed, and if so take the appropriate action. 1653 * 1654 * If any of the properties are missing or invalid we don't take 1655 * any action, as this function should only be invoked when modifications 1656 * have been made to what we assume is a working configuration, which 1657 * we leave active. 1658 * 1659 * Note it is legal for this routine to be invoked even if none of the 1660 * properties in the port node within the MD have actually changed. 1661 */ 1662 static void 1663 vsw_update_md_prop(vsw_t *vswp, md_t *mdp, mde_cookie_t node) 1664 { 1665 char physname[LIFNAMSIZ]; 1666 char drv[LIFNAMSIZ]; 1667 uint_t ddi_instance; 1668 uint8_t new_smode[NUM_SMODES]; 1669 int i, smode_num = 0; 1670 uint64_t macaddr = 0; 1671 enum {MD_init = 0x1, 1672 MD_physname = 0x2, 1673 MD_macaddr = 0x4, 1674 MD_smode = 0x8} updated; 1675 int rv; 1676 1677 updated = MD_init; 1678 1679 D1(vswp, "%s: enter", __func__); 1680 1681 /* 1682 * Check if name of physical device in MD has changed. 1683 */ 1684 if (vsw_get_md_physname(vswp, mdp, node, (char *)&physname) == 0) { 1685 /* 1686 * Do basic sanity check on new device name/instance, 1687 * if its non NULL. It is valid for the device name to 1688 * have changed from a non NULL to a NULL value, i.e. 1689 * the vsw is being changed to 'routed' mode. 1690 */ 1691 if ((strlen(physname) != 0) && 1692 (ddi_parse(physname, drv, 1693 &ddi_instance) != DDI_SUCCESS)) { 1694 cmn_err(CE_WARN, "!vsw%d: new device name %s is not" 1695 " a valid device name/instance", 1696 vswp->instance, physname); 1697 goto fail_reconf; 1698 } 1699 1700 if (strcmp(physname, vswp->physname)) { 1701 D2(vswp, "%s: device name changed from %s to %s", 1702 __func__, vswp->physname, physname); 1703 1704 updated |= MD_physname; 1705 } else { 1706 D2(vswp, "%s: device name unchanged at %s", 1707 __func__, vswp->physname); 1708 } 1709 } else { 1710 cmn_err(CE_WARN, "!vsw%d: Unable to read name of physical " 1711 "device from updated MD.", vswp->instance); 1712 goto fail_reconf; 1713 } 1714 1715 /* 1716 * Check if MAC address has changed. 1717 */ 1718 if (md_get_prop_val(mdp, node, macaddr_propname, &macaddr) != 0) { 1719 cmn_err(CE_WARN, "!vsw%d: Unable to get MAC address from MD", 1720 vswp->instance); 1721 goto fail_reconf; 1722 } else { 1723 uint64_t maddr = macaddr; 1724 READ_ENTER(&vswp->if_lockrw); 1725 for (i = ETHERADDRL - 1; i >= 0; i--) { 1726 if (vswp->if_addr.ether_addr_octet[i] 1727 != (macaddr & 0xFF)) { 1728 D2(vswp, "%s: octet[%d] 0x%x != 0x%x", 1729 __func__, i, 1730 vswp->if_addr.ether_addr_octet[i], 1731 (macaddr & 0xFF)); 1732 updated |= MD_macaddr; 1733 macaddr = maddr; 1734 break; 1735 } 1736 macaddr >>= 8; 1737 } 1738 RW_EXIT(&vswp->if_lockrw); 1739 if (updated & MD_macaddr) { 1740 vsw_save_lmacaddr(vswp, macaddr); 1741 } 1742 } 1743 1744 /* 1745 * Check if switching modes have changed. 1746 */ 1747 if (vsw_get_md_smodes(vswp, mdp, node, 1748 new_smode, &smode_num)) { 1749 cmn_err(CE_WARN, "!vsw%d: Unable to read %s property from MD", 1750 vswp->instance, smode_propname); 1751 goto fail_reconf; 1752 } else { 1753 ASSERT(smode_num != 0); 1754 if (smode_num != vswp->smode_num) { 1755 D2(vswp, "%s: number of modes changed from %d to %d", 1756 __func__, vswp->smode_num, smode_num); 1757 } 1758 1759 for (i = 0; i < smode_num; i++) { 1760 if (new_smode[i] != vswp->smode[i]) { 1761 D2(vswp, "%s: mode changed from %d to %d", 1762 __func__, vswp->smode[i], new_smode[i]); 1763 updated |= MD_smode; 1764 break; 1765 } 1766 } 1767 } 1768 1769 /* 1770 * Now make any changes which are needed... 1771 */ 1772 1773 if (updated & (MD_physname | MD_smode)) { 1774 1775 /* 1776 * Stop any pending timeout to setup switching mode. 1777 */ 1778 vsw_stop_switching_timeout(vswp); 1779 1780 /* 1781 * Remove unicst, mcst addrs of vsw interface 1782 * and ports from the physdev. 1783 */ 1784 vsw_unset_addrs(vswp); 1785 1786 /* 1787 * Stop, detach and close the old device.. 1788 */ 1789 mutex_enter(&vswp->mac_lock); 1790 1791 vsw_mac_detach(vswp); 1792 vsw_mac_close(vswp); 1793 1794 mutex_exit(&vswp->mac_lock); 1795 1796 /* 1797 * Update phys name. 1798 */ 1799 if (updated & MD_physname) { 1800 cmn_err(CE_NOTE, "!vsw%d: changing from %s to %s", 1801 vswp->instance, vswp->physname, physname); 1802 (void) strncpy(vswp->physname, 1803 physname, strlen(physname) + 1); 1804 } 1805 1806 /* 1807 * Update array with the new switch mode values. 1808 */ 1809 if (updated & MD_smode) { 1810 for (i = 0; i < smode_num; i++) 1811 vswp->smode[i] = new_smode[i]; 1812 1813 vswp->smode_num = smode_num; 1814 vswp->smode_idx = 0; 1815 } 1816 1817 /* 1818 * ..and attach, start the new device. 1819 */ 1820 rv = vsw_setup_switching(vswp); 1821 if (rv == EAGAIN) { 1822 /* 1823 * Unable to setup switching mode. 1824 * As the error is EAGAIN, schedule a timeout to retry 1825 * and return. Programming addresses of ports and 1826 * vsw interface will be done when the timeout handler 1827 * completes successfully. 1828 */ 1829 mutex_enter(&vswp->swtmout_lock); 1830 1831 vswp->swtmout_enabled = B_TRUE; 1832 vswp->swtmout_id = 1833 timeout(vsw_setup_switching_timeout, vswp, 1834 (vsw_setup_switching_delay * 1835 drv_usectohz(MICROSEC))); 1836 1837 mutex_exit(&vswp->swtmout_lock); 1838 1839 return; 1840 1841 } else if (rv) { 1842 goto fail_update; 1843 } 1844 1845 /* 1846 * program unicst, mcst addrs of vsw interface 1847 * and ports in the physdev. 1848 */ 1849 vsw_set_addrs(vswp); 1850 1851 } else if (updated & MD_macaddr) { 1852 /* 1853 * We enter here if only MD_macaddr is exclusively updated. 1854 * If MD_physname and/or MD_smode are also updated, then 1855 * as part of that, we would have implicitly processed 1856 * MD_macaddr update (above). 1857 */ 1858 cmn_err(CE_NOTE, "!vsw%d: changing mac address to 0x%lx", 1859 vswp->instance, macaddr); 1860 1861 READ_ENTER(&vswp->if_lockrw); 1862 if (vswp->if_state & VSW_IF_UP) { 1863 1864 mutex_enter(&vswp->hw_lock); 1865 /* 1866 * Remove old mac address of vsw interface 1867 * from the physdev 1868 */ 1869 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 1870 /* 1871 * Program new mac address of vsw interface 1872 * in the physdev 1873 */ 1874 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 1875 mutex_exit(&vswp->hw_lock); 1876 if (rv != 0) { 1877 cmn_err(CE_NOTE, 1878 "!vsw%d: failed to program interface " 1879 "unicast address\n", vswp->instance); 1880 } 1881 /* 1882 * Notify the MAC layer of the changed address. 1883 */ 1884 mac_unicst_update(vswp->if_mh, 1885 (uint8_t *)&vswp->if_addr); 1886 1887 } 1888 RW_EXIT(&vswp->if_lockrw); 1889 1890 } 1891 1892 return; 1893 1894 fail_reconf: 1895 cmn_err(CE_WARN, "!vsw%d: configuration unchanged", vswp->instance); 1896 return; 1897 1898 fail_update: 1899 cmn_err(CE_WARN, "!vsw%d: update of configuration failed", 1900 vswp->instance); 1901 } 1902 1903 /* 1904 * Add a new port to the system. 1905 * 1906 * Returns 0 on success, 1 on failure. 1907 */ 1908 int 1909 vsw_port_add(vsw_t *vswp, md_t *mdp, mde_cookie_t *node) 1910 { 1911 uint64_t ldc_id; 1912 uint8_t *addrp; 1913 int i, addrsz; 1914 int num_nodes = 0, nchan = 0; 1915 int listsz = 0; 1916 mde_cookie_t *listp = NULL; 1917 struct ether_addr ea; 1918 uint64_t macaddr; 1919 uint64_t inst = 0; 1920 vsw_port_t *port; 1921 1922 if (md_get_prop_val(mdp, *node, id_propname, &inst)) { 1923 DWARN(vswp, "%s: prop(%s) not found", __func__, 1924 id_propname); 1925 return (1); 1926 } 1927 1928 /* 1929 * Find the channel endpoint node(s) (which should be under this 1930 * port node) which contain the channel id(s). 1931 */ 1932 if ((num_nodes = md_node_count(mdp)) <= 0) { 1933 DERR(vswp, "%s: invalid number of nodes found (%d)", 1934 __func__, num_nodes); 1935 return (1); 1936 } 1937 1938 D2(vswp, "%s: %d nodes found", __func__, num_nodes); 1939 1940 /* allocate enough space for node list */ 1941 listsz = num_nodes * sizeof (mde_cookie_t); 1942 listp = kmem_zalloc(listsz, KM_SLEEP); 1943 1944 nchan = md_scan_dag(mdp, *node, md_find_name(mdp, chan_propname), 1945 md_find_name(mdp, "fwd"), listp); 1946 1947 if (nchan <= 0) { 1948 DWARN(vswp, "%s: no %s nodes found", __func__, chan_propname); 1949 kmem_free(listp, listsz); 1950 return (1); 1951 } 1952 1953 D2(vswp, "%s: %d %s nodes found", __func__, nchan, chan_propname); 1954 1955 /* use property from first node found */ 1956 if (md_get_prop_val(mdp, listp[0], id_propname, &ldc_id)) { 1957 DWARN(vswp, "%s: prop(%s) not found\n", __func__, 1958 id_propname); 1959 kmem_free(listp, listsz); 1960 return (1); 1961 } 1962 1963 /* don't need list any more */ 1964 kmem_free(listp, listsz); 1965 1966 D2(vswp, "%s: ldc_id 0x%llx", __func__, ldc_id); 1967 1968 /* read mac-address property */ 1969 if (md_get_prop_data(mdp, *node, remaddr_propname, 1970 &addrp, &addrsz)) { 1971 DWARN(vswp, "%s: prop(%s) not found", 1972 __func__, remaddr_propname); 1973 return (1); 1974 } 1975 1976 if (addrsz < ETHERADDRL) { 1977 DWARN(vswp, "%s: invalid address size", __func__); 1978 return (1); 1979 } 1980 1981 macaddr = *((uint64_t *)addrp); 1982 D2(vswp, "%s: remote mac address 0x%llx", __func__, macaddr); 1983 1984 for (i = ETHERADDRL - 1; i >= 0; i--) { 1985 ea.ether_addr_octet[i] = macaddr & 0xFF; 1986 macaddr >>= 8; 1987 } 1988 1989 if (vsw_port_attach(vswp, (int)inst, &ldc_id, 1, &ea) != 0) { 1990 DERR(vswp, "%s: failed to attach port", __func__); 1991 return (1); 1992 } 1993 1994 port = vsw_lookup_port(vswp, (int)inst); 1995 1996 /* just successfuly created the port, so it should exist */ 1997 ASSERT(port != NULL); 1998 1999 return (0); 2000 } 2001 2002 /* 2003 * vsw_mac_rx -- A common function to send packets to the interface. 2004 * By default this function check if the interface is UP or not, the 2005 * rest of the behaviour depends on the flags as below: 2006 * 2007 * VSW_MACRX_PROMISC -- Check if the promisc mode set or not. 2008 * VSW_MACRX_COPYMSG -- Make a copy of the message(s). 2009 * VSW_MACRX_FREEMSG -- Free if the messages cannot be sent up the stack. 2010 */ 2011 void 2012 vsw_mac_rx(vsw_t *vswp, int caller, mac_resource_handle_t mrh, 2013 mblk_t *mp, mblk_t *mpt, vsw_macrx_flags_t flags) 2014 { 2015 int trigger = 0; 2016 2017 D1(vswp, "%s:enter\n", __func__); 2018 READ_ENTER(&vswp->if_lockrw); 2019 /* Check if the interface is up */ 2020 if (!(vswp->if_state & VSW_IF_UP)) { 2021 RW_EXIT(&vswp->if_lockrw); 2022 /* Free messages only if FREEMSG flag specified */ 2023 if (flags & VSW_MACRX_FREEMSG) { 2024 freemsgchain(mp); 2025 } 2026 D1(vswp, "%s:exit\n", __func__); 2027 return; 2028 } 2029 /* 2030 * If PROMISC flag is passed, then check if 2031 * the interface is in the PROMISC mode. 2032 * If not, drop the messages. 2033 */ 2034 if (flags & VSW_MACRX_PROMISC) { 2035 if (!(vswp->if_state & VSW_IF_PROMISC)) { 2036 RW_EXIT(&vswp->if_lockrw); 2037 /* Free messages only if FREEMSG flag specified */ 2038 if (flags & VSW_MACRX_FREEMSG) { 2039 freemsgchain(mp); 2040 } 2041 D1(vswp, "%s:exit\n", __func__); 2042 return; 2043 } 2044 } 2045 RW_EXIT(&vswp->if_lockrw); 2046 /* 2047 * If COPYMSG flag is passed, then make a copy 2048 * of the message chain and send up the copy. 2049 */ 2050 if (flags & VSW_MACRX_COPYMSG) { 2051 mp = copymsgchain(mp); 2052 if (mp) { 2053 mpt = mp; 2054 /* find the tail */ 2055 while (mpt->b_next != NULL) { 2056 mpt = mpt->b_next; 2057 } 2058 } else { 2059 D1(vswp, "%s:exit\n", __func__); 2060 return; 2061 } 2062 } 2063 2064 /* 2065 * If the softint is not enabled or the packets are 2066 * passed by the physical device, then the caller 2067 * is expected to be at the interrupt context. For 2068 * this case, mac_rx() directly. 2069 */ 2070 if ((vswp->rx_softint == B_FALSE) || (caller == VSW_PHYSDEV)) { 2071 ASSERT(servicing_interrupt()); 2072 D3(vswp, "%s: sending up stack", __func__); 2073 mac_rx(vswp->if_mh, mrh, mp); 2074 D1(vswp, "%s:exit\n", __func__); 2075 return; 2076 } 2077 2078 /* 2079 * Here we may not be at the interrupt context, so 2080 * queue the packets and trigger a softint to post 2081 * the packets up the stack. 2082 */ 2083 mutex_enter(&vswp->soft_lock); 2084 if (vswp->rx_mhead == NULL) { 2085 vswp->rx_mhead = mp; 2086 vswp->rx_mtail = mpt; 2087 trigger = 1; 2088 } else { 2089 vswp->rx_mtail->b_next = mp; 2090 vswp->rx_mtail = mpt; 2091 } 2092 mutex_exit(&vswp->soft_lock); 2093 if (trigger) { 2094 D3(vswp, "%s: triggering the softint", __func__); 2095 (void) ddi_intr_trigger_softint(vswp->soft_handle, NULL); 2096 } 2097 D1(vswp, "%s:exit\n", __func__); 2098 } 2099 2100 /* 2101 * vsw_rx_softintr -- vsw soft interrupt handler function. 2102 * Its job is to pickup the recieved packets that are queued 2103 * for the interface and send them up. 2104 * 2105 * NOTE: An interrupt handler is being used to handle the upper 2106 * layer(s) requirement to send up only at interrupt context. 2107 */ 2108 /* ARGSUSED */ 2109 static uint_t 2110 vsw_rx_softintr(caddr_t arg1, caddr_t arg2) 2111 { 2112 mblk_t *mp; 2113 vsw_t *vswp = (vsw_t *)arg1; 2114 2115 mutex_enter(&vswp->soft_lock); 2116 mp = vswp->rx_mhead; 2117 vswp->rx_mhead = vswp->rx_mtail = NULL; 2118 mutex_exit(&vswp->soft_lock); 2119 if (mp != NULL) { 2120 READ_ENTER(&vswp->if_lockrw); 2121 if (vswp->if_state & VSW_IF_UP) { 2122 RW_EXIT(&vswp->if_lockrw); 2123 mac_rx(vswp->if_mh, NULL, mp); 2124 } else { 2125 RW_EXIT(&vswp->if_lockrw); 2126 freemsgchain(mp); 2127 } 2128 } 2129 D1(vswp, "%s:exit\n", __func__); 2130 return (DDI_INTR_CLAIMED); 2131 } 2132 2133 /* copy mac address of vsw into soft state structure */ 2134 static void 2135 vsw_save_lmacaddr(vsw_t *vswp, uint64_t macaddr) 2136 { 2137 int i; 2138 2139 WRITE_ENTER(&vswp->if_lockrw); 2140 for (i = ETHERADDRL - 1; i >= 0; i--) { 2141 vswp->if_addr.ether_addr_octet[i] = macaddr & 0xFF; 2142 macaddr >>= 8; 2143 } 2144 RW_EXIT(&vswp->if_lockrw); 2145 } 2146