1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/vsw.h> 66 67 /* MAC Ring table functions. */ 68 static void vsw_mac_ring_tbl_init(vsw_t *vswp); 69 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp); 70 static void vsw_queue_worker(vsw_mac_ring_t *rrp); 71 static void vsw_queue_stop(vsw_queue_t *vqp); 72 static vsw_queue_t *vsw_queue_create(); 73 static void vsw_queue_destroy(vsw_queue_t *vqp); 74 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *); 75 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 76 77 /* MAC layer routines */ 78 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg, 79 mac_resource_t *mrp); 80 static int vsw_set_hw_addr(vsw_t *, mac_multi_addr_t *); 81 static int vsw_set_hw_promisc(vsw_t *, vsw_port_t *, int); 82 static int vsw_unset_hw_addr(vsw_t *, int); 83 static int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *, int); 84 static int vsw_prog_if(vsw_t *); 85 86 /* Support functions */ 87 static int vsw_prog_ports(vsw_t *); 88 int vsw_set_hw(vsw_t *, vsw_port_t *, int); 89 int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 90 void vsw_reconfig_hw(vsw_t *); 91 int vsw_mac_attach(vsw_t *vswp); 92 void vsw_mac_detach(vsw_t *vswp); 93 int vsw_mac_open(vsw_t *vswp); 94 void vsw_mac_close(vsw_t *vswp); 95 void vsw_unset_addrs(vsw_t *vswp); 96 void vsw_set_addrs(vsw_t *vswp); 97 int vsw_get_hw_maddr(vsw_t *); 98 mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 99 100 /* 101 * Tunables used in this file. 102 */ 103 extern int vsw_mac_open_retries; 104 extern boolean_t vsw_multi_ring_enable; 105 extern int vsw_mac_rx_rings; 106 107 /* 108 * Check to see if the card supports the setting of multiple unicst 109 * addresses. 110 * 111 * Returns 0 if card supports the programming of multiple unicast addresses, 112 * otherwise returns 1. 113 */ 114 int 115 vsw_get_hw_maddr(vsw_t *vswp) 116 { 117 D1(vswp, "%s: enter", __func__); 118 119 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 120 121 if (vswp->mh == NULL) 122 return (1); 123 124 if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) { 125 cmn_err(CE_WARN, "!vsw%d: device (%s) does not support " 126 "setting multiple unicast addresses", vswp->instance, 127 vswp->physname); 128 return (1); 129 } 130 131 D2(vswp, "%s: %d addrs : %d free", __func__, 132 vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree); 133 134 D1(vswp, "%s: exit", __func__); 135 136 return (0); 137 } 138 139 /* 140 * Program unicast and multicast addresses of vsw interface and the ports 141 * into the physical device. 142 */ 143 void 144 vsw_set_addrs(vsw_t *vswp) 145 { 146 vsw_port_list_t *plist = &vswp->plist; 147 vsw_port_t *port; 148 mcst_addr_t *mcap; 149 int rv; 150 151 READ_ENTER(&vswp->if_lockrw); 152 153 if (vswp->if_state & VSW_IF_UP) { 154 155 /* program unicst addr of vsw interface in the physdev */ 156 if (vswp->addr_set == VSW_ADDR_UNSET) { 157 mutex_enter(&vswp->hw_lock); 158 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 159 mutex_exit(&vswp->hw_lock); 160 if (rv != 0) { 161 cmn_err(CE_NOTE, 162 "!vsw%d: failed to program interface " 163 "unicast address\n", vswp->instance); 164 } 165 /* 166 * Notify the MAC layer of the changed address. 167 */ 168 mac_unicst_update(vswp->if_mh, 169 (uint8_t *)&vswp->if_addr); 170 } 171 172 /* program mcast addrs of vsw interface in the physdev */ 173 mutex_enter(&vswp->mca_lock); 174 mutex_enter(&vswp->mac_lock); 175 for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) { 176 if (mcap->mac_added) 177 continue; 178 rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca); 179 if (rv == 0) { 180 mcap->mac_added = B_TRUE; 181 } else { 182 cmn_err(CE_WARN, "!vsw%d: unable to add " 183 "multicast address: %s\n", vswp->instance, 184 ether_sprintf((void *)&mcap->mca)); 185 } 186 } 187 mutex_exit(&vswp->mac_lock); 188 mutex_exit(&vswp->mca_lock); 189 190 } 191 192 RW_EXIT(&vswp->if_lockrw); 193 194 WRITE_ENTER(&plist->lockrw); 195 196 /* program unicast address of ports in the physical device */ 197 mutex_enter(&vswp->hw_lock); 198 for (port = plist->head; port != NULL; port = port->p_next) { 199 if (port->addr_set != VSW_ADDR_UNSET) /* addr already set */ 200 continue; 201 if (vsw_set_hw(vswp, port, VSW_VNETPORT)) { 202 cmn_err(CE_NOTE, 203 "!vsw%d: port:%d failed to set unicast address\n", 204 vswp->instance, port->p_instance); 205 } 206 } 207 mutex_exit(&vswp->hw_lock); 208 209 /* program multicast addresses of ports in the physdev */ 210 for (port = plist->head; port != NULL; port = port->p_next) { 211 mutex_enter(&port->mca_lock); 212 mutex_enter(&vswp->mac_lock); 213 for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) { 214 if (mcap->mac_added) 215 continue; 216 rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca); 217 if (rv == 0) { 218 mcap->mac_added = B_TRUE; 219 } else { 220 cmn_err(CE_WARN, "!vsw%d: unable to add " 221 "multicast address: %s\n", vswp->instance, 222 ether_sprintf((void *)&mcap->mca)); 223 } 224 } 225 mutex_exit(&vswp->mac_lock); 226 mutex_exit(&port->mca_lock); 227 } 228 229 RW_EXIT(&plist->lockrw); 230 } 231 232 /* 233 * Remove unicast and multicast addresses of vsw interface and the ports 234 * from the physical device. 235 */ 236 void 237 vsw_unset_addrs(vsw_t *vswp) 238 { 239 vsw_port_list_t *plist = &vswp->plist; 240 vsw_port_t *port; 241 mcst_addr_t *mcap; 242 243 READ_ENTER(&vswp->if_lockrw); 244 245 if (vswp->if_state & VSW_IF_UP) { 246 247 /* 248 * Remove unicast addr of vsw interfce 249 * from current physdev 250 */ 251 mutex_enter(&vswp->hw_lock); 252 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 253 mutex_exit(&vswp->hw_lock); 254 255 /* 256 * Remove mcast addrs of vsw interface 257 * from current physdev 258 */ 259 mutex_enter(&vswp->mca_lock); 260 mutex_enter(&vswp->mac_lock); 261 for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) { 262 if (!mcap->mac_added) 263 continue; 264 (void) mac_multicst_remove(vswp->mh, 265 (uchar_t *)&mcap->mca); 266 mcap->mac_added = B_FALSE; 267 } 268 mutex_exit(&vswp->mac_lock); 269 mutex_exit(&vswp->mca_lock); 270 271 } 272 273 RW_EXIT(&vswp->if_lockrw); 274 275 WRITE_ENTER(&plist->lockrw); 276 277 /* 278 * Remove unicast address of ports from the current physical device 279 */ 280 mutex_enter(&vswp->hw_lock); 281 for (port = plist->head; port != NULL; port = port->p_next) { 282 /* Remove address if was programmed into HW. */ 283 if (port->addr_set == VSW_ADDR_UNSET) 284 continue; 285 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 286 } 287 mutex_exit(&vswp->hw_lock); 288 289 /* Remove multicast addresses of ports from the current physdev */ 290 for (port = plist->head; port != NULL; port = port->p_next) { 291 mutex_enter(&port->mca_lock); 292 mutex_enter(&vswp->mac_lock); 293 for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) { 294 if (!mcap->mac_added) 295 continue; 296 (void) mac_multicst_remove(vswp->mh, 297 (uchar_t *)&mcap->mca); 298 mcap->mac_added = B_FALSE; 299 } 300 mutex_exit(&vswp->mac_lock); 301 mutex_exit(&port->mca_lock); 302 } 303 304 RW_EXIT(&plist->lockrw); 305 } 306 307 /* 308 * Open the underlying physical device for access in layer2 mode. 309 * Returns: 310 * 0 on success 311 * EAGAIN if mac_open() fails due to the device being not available yet. 312 * EIO on any other failures. 313 */ 314 int 315 vsw_mac_open(vsw_t *vswp) 316 { 317 char drv[LIFNAMSIZ]; 318 uint_t ddi_instance; 319 int rv; 320 321 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 322 323 if (vswp->mh != NULL) { 324 /* already open */ 325 return (0); 326 } 327 328 if (vswp->mac_open_retries++ >= vsw_mac_open_retries) { 329 /* exceeded max retries */ 330 return (EIO); 331 } 332 333 if (ddi_parse(vswp->physname, drv, &ddi_instance) != DDI_SUCCESS) { 334 cmn_err(CE_WARN, "!vsw%d: invalid device name: %s", 335 vswp->instance, vswp->physname); 336 return (EIO); 337 } 338 339 /* 340 * Aggregation devices are special in that the device instance 341 * must be set to zero when they are being mac_open()'ed. 342 * 343 * The only way to determine if we are being passed an aggregated 344 * device is to check the device name. 345 */ 346 if (strcmp(drv, "aggr") == 0) { 347 ddi_instance = 0; 348 } 349 350 rv = mac_open(vswp->physname, ddi_instance, &vswp->mh); 351 if (rv != 0) { 352 /* 353 * If mac_open() failed and the error indicates that the 354 * device is not available yet, then, we return EAGAIN to 355 * indicate that it needs to be retried. 356 * For example, this may happen during boot up, as the 357 * required link aggregation groups(devices) have not been 358 * created yet. 359 */ 360 if (rv == ENOENT) { 361 return (EAGAIN); 362 } else { 363 cmn_err(CE_WARN, "vsw%d: mac_open %s failed rv:%x", 364 vswp->instance, vswp->physname, rv); 365 return (EIO); 366 } 367 } 368 369 vswp->mac_open_retries = 0; 370 371 return (0); 372 } 373 374 /* 375 * Close the underlying physical device. 376 */ 377 void 378 vsw_mac_close(vsw_t *vswp) 379 { 380 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 381 382 if (vswp->mh != NULL) { 383 mac_close(vswp->mh); 384 vswp->mh = NULL; 385 } 386 } 387 388 /* 389 * Link into the MAC layer to gain access to the services provided by 390 * the underlying physical device driver (which should also have 391 * registered with the MAC layer). 392 * 393 * Only when in layer 2 mode. 394 */ 395 int 396 vsw_mac_attach(vsw_t *vswp) 397 { 398 D1(vswp, "%s: enter", __func__); 399 400 ASSERT(vswp->mrh == NULL); 401 ASSERT(vswp->mstarted == B_FALSE); 402 ASSERT(vswp->mresources == B_FALSE); 403 404 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 405 406 ASSERT(vswp->mh != NULL); 407 408 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 409 410 if (vsw_multi_ring_enable) { 411 /* 412 * Initialize the ring table. 413 */ 414 vsw_mac_ring_tbl_init(vswp); 415 416 /* 417 * Register our rx callback function. 418 */ 419 vswp->mrh = mac_rx_add(vswp->mh, 420 vsw_rx_queue_cb, (void *)vswp); 421 ASSERT(vswp->mrh != NULL); 422 423 /* 424 * Register our mac resource callback. 425 */ 426 mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp); 427 vswp->mresources = B_TRUE; 428 429 /* 430 * Get the ring resources available to us from 431 * the mac below us. 432 */ 433 mac_resources(vswp->mh); 434 } else { 435 /* 436 * Just register our rx callback function 437 */ 438 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 439 ASSERT(vswp->mrh != NULL); 440 } 441 442 /* Get the MAC tx fn */ 443 vswp->txinfo = mac_tx_get(vswp->mh); 444 445 /* start the interface */ 446 if (mac_start(vswp->mh) != 0) { 447 cmn_err(CE_WARN, "!vsw%d: Could not start mac interface", 448 vswp->instance); 449 goto mac_fail_exit; 450 } 451 452 vswp->mstarted = B_TRUE; 453 454 D1(vswp, "%s: exit", __func__); 455 return (0); 456 457 mac_fail_exit: 458 vsw_mac_detach(vswp); 459 460 D1(vswp, "%s: exit", __func__); 461 return (1); 462 } 463 464 void 465 vsw_mac_detach(vsw_t *vswp) 466 { 467 D1(vswp, "vsw_mac_detach: enter"); 468 469 ASSERT(vswp != NULL); 470 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 471 472 if (vsw_multi_ring_enable) { 473 vsw_mac_ring_tbl_destroy(vswp); 474 } 475 476 if (vswp->mh != NULL) { 477 if (vswp->mstarted) 478 mac_stop(vswp->mh); 479 if (vswp->mrh != NULL) 480 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 481 if (vswp->mresources) 482 mac_resource_set(vswp->mh, NULL, NULL); 483 } 484 485 vswp->mrh = NULL; 486 vswp->txinfo = NULL; 487 vswp->mstarted = B_FALSE; 488 489 D1(vswp, "vsw_mac_detach: exit"); 490 } 491 492 /* 493 * Depending on the mode specified, the capabilites and capacity 494 * of the underlying device setup the physical device. 495 * 496 * If in layer 3 mode, then do nothing. 497 * 498 * If in layer 2 programmed mode attempt to program the unicast address 499 * associated with the port into the physical device. If this is not 500 * possible due to resource exhaustion or simply because the device does 501 * not support multiple unicast addresses then if required fallback onto 502 * putting the card into promisc mode. 503 * 504 * If in promisc mode then simply set the card into promisc mode. 505 * 506 * Returns 0 success, 1 on failure. 507 */ 508 int 509 vsw_set_hw(vsw_t *vswp, vsw_port_t *port, int type) 510 { 511 mac_multi_addr_t mac_addr; 512 int err; 513 514 D1(vswp, "%s: enter", __func__); 515 516 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 517 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 518 519 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 520 return (0); 521 522 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) { 523 return (vsw_set_hw_promisc(vswp, port, type)); 524 } 525 526 /* 527 * Attempt to program the unicast address into the HW. 528 */ 529 mac_addr.mma_addrlen = ETHERADDRL; 530 if (type == VSW_VNETPORT) { 531 ASSERT(port != NULL); 532 ether_copy(&port->p_macaddr, &mac_addr.mma_addr); 533 } else { 534 ether_copy(&vswp->if_addr, &mac_addr.mma_addr); 535 } 536 537 err = vsw_set_hw_addr(vswp, &mac_addr); 538 if (err == ENOSPC) { 539 /* 540 * Mark that attempt should be made to re-config sometime 541 * in future if a port is deleted. 542 */ 543 vswp->recfg_reqd = B_TRUE; 544 545 /* 546 * Only 1 mode specified, nothing more to do. 547 */ 548 if (vswp->smode_num == 1) 549 return (err); 550 551 /* 552 * If promiscuous was next mode specified try to 553 * set the card into that mode. 554 */ 555 if ((vswp->smode_idx <= (vswp->smode_num - 2)) && 556 (vswp->smode[vswp->smode_idx + 1] == 557 VSW_LAYER2_PROMISC)) { 558 vswp->smode_idx += 1; 559 return (vsw_set_hw_promisc(vswp, port, type)); 560 } 561 return (err); 562 } 563 564 if (err != 0) 565 return (err); 566 567 if (type == VSW_VNETPORT) { 568 port->addr_slot = mac_addr.mma_slot; 569 port->addr_set = VSW_ADDR_HW; 570 } else { 571 vswp->addr_slot = mac_addr.mma_slot; 572 vswp->addr_set = VSW_ADDR_HW; 573 } 574 575 D2(vswp, "programmed addr %s into slot %d " 576 "of device %s", ether_sprintf((void *)mac_addr.mma_addr), 577 mac_addr.mma_slot, vswp->physname); 578 579 D1(vswp, "%s: exit", __func__); 580 581 return (0); 582 } 583 584 /* 585 * If in layer 3 mode do nothing. 586 * 587 * If in layer 2 switched mode remove the address from the physical 588 * device. 589 * 590 * If in layer 2 promiscuous mode disable promisc mode. 591 * 592 * Returns 0 on success. 593 */ 594 int 595 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port, int type) 596 { 597 mac_addr_slot_t slot; 598 int rv; 599 600 D1(vswp, "%s: enter", __func__); 601 602 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 603 604 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 605 return (0); 606 607 switch (type) { 608 case VSW_VNETPORT: 609 ASSERT(port != NULL); 610 611 if (port->addr_set == VSW_ADDR_PROMISC) { 612 return (vsw_unset_hw_promisc(vswp, port, type)); 613 614 } else if (port->addr_set == VSW_ADDR_HW) { 615 slot = port->addr_slot; 616 if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0) 617 port->addr_set = VSW_ADDR_UNSET; 618 } 619 620 break; 621 622 case VSW_LOCALDEV: 623 if (vswp->addr_set == VSW_ADDR_PROMISC) { 624 return (vsw_unset_hw_promisc(vswp, NULL, type)); 625 626 } else if (vswp->addr_set == VSW_ADDR_HW) { 627 slot = vswp->addr_slot; 628 if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0) 629 vswp->addr_set = VSW_ADDR_UNSET; 630 } 631 632 break; 633 634 default: 635 /* should never happen */ 636 DERR(vswp, "%s: unknown type %d", __func__, type); 637 ASSERT(0); 638 return (1); 639 } 640 641 D1(vswp, "%s: exit", __func__); 642 return (rv); 643 } 644 645 /* 646 * Attempt to program a unicast address into HW. 647 * 648 * Returns 0 on sucess, 1 on failure. 649 */ 650 static int 651 vsw_set_hw_addr(vsw_t *vswp, mac_multi_addr_t *mac) 652 { 653 void *mah; 654 int rv = EINVAL; 655 656 D1(vswp, "%s: enter", __func__); 657 658 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 659 660 if (vswp->maddr.maddr_handle == NULL) 661 return (rv); 662 663 mah = vswp->maddr.maddr_handle; 664 665 rv = vswp->maddr.maddr_add(mah, mac); 666 667 if (rv == 0) 668 return (rv); 669 670 /* 671 * Its okay for the add to fail because we have exhausted 672 * all the resouces in the hardware device. Any other error 673 * we want to flag. 674 */ 675 if (rv != ENOSPC) { 676 cmn_err(CE_WARN, "!vsw%d: error programming " 677 "address %s into HW err (%d)", 678 vswp->instance, ether_sprintf((void *)mac->mma_addr), rv); 679 } 680 D1(vswp, "%s: exit", __func__); 681 return (rv); 682 } 683 684 /* 685 * Remove a unicast mac address which has previously been programmed 686 * into HW. 687 * 688 * Returns 0 on sucess, 1 on failure. 689 */ 690 static int 691 vsw_unset_hw_addr(vsw_t *vswp, int slot) 692 { 693 void *mah; 694 int rv; 695 696 D1(vswp, "%s: enter", __func__); 697 698 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 699 ASSERT(slot >= 0); 700 701 if (vswp->maddr.maddr_handle == NULL) 702 return (1); 703 704 mah = vswp->maddr.maddr_handle; 705 706 rv = vswp->maddr.maddr_remove(mah, slot); 707 if (rv != 0) { 708 cmn_err(CE_WARN, "!vsw%d: unable to remove address " 709 "from slot %d in device %s (err %d)", 710 vswp->instance, slot, vswp->physname, rv); 711 return (1); 712 } 713 714 D2(vswp, "removed addr from slot %d in device %s", 715 slot, vswp->physname); 716 717 D1(vswp, "%s: exit", __func__); 718 return (0); 719 } 720 721 /* 722 * Set network card into promisc mode. 723 * 724 * Returns 0 on success, 1 on failure. 725 */ 726 static int 727 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type) 728 { 729 D1(vswp, "%s: enter", __func__); 730 731 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 732 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 733 734 mutex_enter(&vswp->mac_lock); 735 if (vswp->mh == NULL) { 736 mutex_exit(&vswp->mac_lock); 737 return (1); 738 } 739 740 if (vswp->promisc_cnt++ == 0) { 741 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 742 vswp->promisc_cnt--; 743 mutex_exit(&vswp->mac_lock); 744 return (1); 745 } 746 cmn_err(CE_NOTE, "!vsw%d: switching device %s into " 747 "promiscuous mode", vswp->instance, vswp->physname); 748 } 749 mutex_exit(&vswp->mac_lock); 750 751 if (type == VSW_VNETPORT) { 752 ASSERT(port != NULL); 753 port->addr_set = VSW_ADDR_PROMISC; 754 } else { 755 vswp->addr_set = VSW_ADDR_PROMISC; 756 } 757 758 D1(vswp, "%s: exit", __func__); 759 760 return (0); 761 } 762 763 /* 764 * Turn off promiscuous mode on network card. 765 * 766 * Returns 0 on success, 1 on failure. 767 */ 768 static int 769 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type) 770 { 771 vsw_port_list_t *plist = &vswp->plist; 772 773 D2(vswp, "%s: enter", __func__); 774 775 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 776 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 777 778 mutex_enter(&vswp->mac_lock); 779 if (vswp->mh == NULL) { 780 mutex_exit(&vswp->mac_lock); 781 return (1); 782 } 783 784 if (--vswp->promisc_cnt == 0) { 785 if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) { 786 vswp->promisc_cnt++; 787 mutex_exit(&vswp->mac_lock); 788 return (1); 789 } 790 791 /* 792 * We are exiting promisc mode either because we were 793 * only in promisc mode because we had failed over from 794 * switched mode due to HW resource issues, or the user 795 * wanted the card in promisc mode for all the ports and 796 * the last port is now being deleted. Tweak the message 797 * accordingly. 798 */ 799 if (plist->num_ports != 0) { 800 cmn_err(CE_NOTE, "!vsw%d: switching device %s back to " 801 "programmed mode", vswp->instance, vswp->physname); 802 } else { 803 cmn_err(CE_NOTE, "!vsw%d: switching device %s out of " 804 "promiscuous mode", vswp->instance, vswp->physname); 805 } 806 } 807 mutex_exit(&vswp->mac_lock); 808 809 if (type == VSW_VNETPORT) { 810 ASSERT(port != NULL); 811 ASSERT(port->addr_set == VSW_ADDR_PROMISC); 812 port->addr_set = VSW_ADDR_UNSET; 813 } else { 814 ASSERT(vswp->addr_set == VSW_ADDR_PROMISC); 815 vswp->addr_set = VSW_ADDR_UNSET; 816 } 817 818 D1(vswp, "%s: exit", __func__); 819 return (0); 820 } 821 822 /* 823 * Determine whether or not we are operating in our prefered 824 * mode and if not whether the physical resources now allow us 825 * to operate in it. 826 * 827 * If a port is being removed should only be invoked after port has been 828 * removed from the port list. 829 */ 830 void 831 vsw_reconfig_hw(vsw_t *vswp) 832 { 833 int s_idx; 834 835 D1(vswp, "%s: enter", __func__); 836 837 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 838 839 if (vswp->maddr.maddr_handle == NULL) { 840 return; 841 } 842 843 /* 844 * If we are in layer 2 (i.e. switched) or would like to be 845 * in layer 2 then check if any ports or the vswitch itself 846 * need to be programmed into the HW. 847 * 848 * This can happen in two cases - switched was specified as 849 * the prefered mode of operation but we exhausted the HW 850 * resources and so failed over to the next specifed mode, 851 * or switched was the only mode specified so after HW 852 * resources were exhausted there was nothing more we 853 * could do. 854 */ 855 if (vswp->smode_idx > 0) 856 s_idx = vswp->smode_idx - 1; 857 else 858 s_idx = vswp->smode_idx; 859 860 if (vswp->smode[s_idx] != VSW_LAYER2) { 861 return; 862 } 863 864 D2(vswp, "%s: attempting reconfig..", __func__); 865 866 /* 867 * First, attempt to set the vswitch mac address into HW, 868 * if required. 869 */ 870 if (vsw_prog_if(vswp)) { 871 return; 872 } 873 874 /* 875 * Next, attempt to set any ports which have not yet been 876 * programmed into HW. 877 */ 878 if (vsw_prog_ports(vswp)) { 879 return; 880 } 881 882 /* 883 * By now we know that have programmed all desired ports etc 884 * into HW, so safe to mark reconfiguration as complete. 885 */ 886 vswp->recfg_reqd = B_FALSE; 887 888 vswp->smode_idx = s_idx; 889 890 D1(vswp, "%s: exit", __func__); 891 } 892 893 /* 894 * Check to see if vsw itself is plumbed, and if so whether or not 895 * its mac address should be written into HW. 896 * 897 * Returns 0 if could set address, or didn't have to set it. 898 * Returns 1 if failed to set address. 899 */ 900 static int 901 vsw_prog_if(vsw_t *vswp) 902 { 903 mac_multi_addr_t addr; 904 905 D1(vswp, "%s: enter", __func__); 906 907 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 908 909 READ_ENTER(&vswp->if_lockrw); 910 if ((vswp->if_state & VSW_IF_UP) && 911 (vswp->addr_set != VSW_ADDR_HW)) { 912 913 addr.mma_addrlen = ETHERADDRL; 914 ether_copy(&vswp->if_addr, &addr.mma_addr); 915 916 if (vsw_set_hw_addr(vswp, &addr) != 0) { 917 RW_EXIT(&vswp->if_lockrw); 918 return (1); 919 } 920 921 vswp->addr_slot = addr.mma_slot; 922 923 /* 924 * If previously when plumbed had had to place 925 * interface into promisc mode, now reverse that. 926 * 927 * Note that interface will only actually be set into 928 * non-promisc mode when last port/interface has been 929 * programmed into HW. 930 */ 931 if (vswp->addr_set == VSW_ADDR_PROMISC) 932 (void) vsw_unset_hw_promisc(vswp, NULL, VSW_LOCALDEV); 933 934 vswp->addr_set = VSW_ADDR_HW; 935 } 936 RW_EXIT(&vswp->if_lockrw); 937 938 D1(vswp, "%s: exit", __func__); 939 return (0); 940 } 941 942 /* 943 * Scan the port list for any ports which have not yet been set 944 * into HW. For those found attempt to program their mac addresses 945 * into the physical device. 946 * 947 * Returns 0 if able to program all required ports (can be 0) into HW. 948 * Returns 1 if failed to set at least one mac address. 949 */ 950 static int 951 vsw_prog_ports(vsw_t *vswp) 952 { 953 mac_multi_addr_t addr; 954 vsw_port_list_t *plist = &vswp->plist; 955 vsw_port_t *tp; 956 int rv = 0; 957 958 D1(vswp, "%s: enter", __func__); 959 960 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 961 962 READ_ENTER(&plist->lockrw); 963 for (tp = plist->head; tp != NULL; tp = tp->p_next) { 964 if (tp->addr_set != VSW_ADDR_HW) { 965 addr.mma_addrlen = ETHERADDRL; 966 ether_copy(&tp->p_macaddr, &addr.mma_addr); 967 968 if (vsw_set_hw_addr(vswp, &addr) != 0) { 969 rv = 1; 970 break; 971 } 972 973 tp->addr_slot = addr.mma_slot; 974 975 /* 976 * If when this port had first attached we had 977 * had to place the interface into promisc mode, 978 * then now reverse that. 979 * 980 * Note that the interface will not actually 981 * change to non-promisc mode until all ports 982 * have been programmed. 983 */ 984 if (tp->addr_set == VSW_ADDR_PROMISC) 985 (void) vsw_unset_hw_promisc(vswp, 986 tp, VSW_VNETPORT); 987 988 tp->addr_set = VSW_ADDR_HW; 989 } 990 } 991 RW_EXIT(&plist->lockrw); 992 993 D1(vswp, "%s: exit", __func__); 994 return (rv); 995 } 996 997 static void 998 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp) 999 { 1000 ringp->ring_state = VSW_MAC_RING_FREE; 1001 ringp->ring_arg = NULL; 1002 ringp->ring_blank = NULL; 1003 ringp->ring_vqp = NULL; 1004 ringp->ring_vswp = vswp; 1005 } 1006 1007 static void 1008 vsw_mac_ring_tbl_init(vsw_t *vswp) 1009 { 1010 int i; 1011 1012 mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL); 1013 1014 vswp->mac_ring_tbl_sz = vsw_mac_rx_rings; 1015 vswp->mac_ring_tbl = 1016 kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t), KM_SLEEP); 1017 1018 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) 1019 vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]); 1020 } 1021 1022 static void 1023 vsw_mac_ring_tbl_destroy(vsw_t *vswp) 1024 { 1025 int i; 1026 vsw_mac_ring_t *ringp; 1027 1028 mutex_enter(&vswp->mac_ring_lock); 1029 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1030 ringp = &vswp->mac_ring_tbl[i]; 1031 1032 if (ringp->ring_state != VSW_MAC_RING_FREE) { 1033 /* 1034 * Destroy the queue. 1035 */ 1036 vsw_queue_stop(ringp->ring_vqp); 1037 vsw_queue_destroy(ringp->ring_vqp); 1038 1039 /* 1040 * Re-initialize the structure. 1041 */ 1042 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1043 } 1044 } 1045 mutex_exit(&vswp->mac_ring_lock); 1046 1047 mutex_destroy(&vswp->mac_ring_lock); 1048 kmem_free(vswp->mac_ring_tbl, 1049 vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t)); 1050 vswp->mac_ring_tbl_sz = 0; 1051 } 1052 1053 /* 1054 * Handle resource add callbacks from the driver below. 1055 */ 1056 static mac_resource_handle_t 1057 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp) 1058 { 1059 vsw_t *vswp = (vsw_t *)arg; 1060 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 1061 vsw_mac_ring_t *ringp; 1062 vsw_queue_t *vqp; 1063 int i; 1064 1065 ASSERT(vswp != NULL); 1066 ASSERT(mrp != NULL); 1067 ASSERT(vswp->mac_ring_tbl != NULL); 1068 1069 D1(vswp, "%s: enter", __func__); 1070 1071 /* 1072 * Check to make sure we have the correct resource type. 1073 */ 1074 if (mrp->mr_type != MAC_RX_FIFO) 1075 return (NULL); 1076 1077 /* 1078 * Find a open entry in the ring table. 1079 */ 1080 mutex_enter(&vswp->mac_ring_lock); 1081 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1082 ringp = &vswp->mac_ring_tbl[i]; 1083 1084 /* 1085 * Check for an empty slot, if found, then setup queue 1086 * and thread. 1087 */ 1088 if (ringp->ring_state == VSW_MAC_RING_FREE) { 1089 /* 1090 * Create the queue for this ring. 1091 */ 1092 vqp = vsw_queue_create(); 1093 1094 /* 1095 * Initialize the ring data structure. 1096 */ 1097 ringp->ring_vqp = vqp; 1098 ringp->ring_arg = mrfp->mrf_arg; 1099 ringp->ring_blank = mrfp->mrf_blank; 1100 ringp->ring_state = VSW_MAC_RING_INUSE; 1101 1102 /* 1103 * Create the worker thread. 1104 */ 1105 vqp->vq_worker = thread_create(NULL, 0, 1106 vsw_queue_worker, ringp, 0, &p0, 1107 TS_RUN, minclsyspri); 1108 if (vqp->vq_worker == NULL) { 1109 vsw_queue_destroy(vqp); 1110 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1111 ringp = NULL; 1112 } 1113 1114 if (ringp != NULL) { 1115 /* 1116 * Make sure thread get's running state for 1117 * this ring. 1118 */ 1119 mutex_enter(&vqp->vq_lock); 1120 while ((vqp->vq_state != VSW_QUEUE_RUNNING) && 1121 (vqp->vq_state != VSW_QUEUE_DRAINED)) { 1122 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1123 } 1124 1125 /* 1126 * If the thread is not running, cleanup. 1127 */ 1128 if (vqp->vq_state == VSW_QUEUE_DRAINED) { 1129 vsw_queue_destroy(vqp); 1130 vsw_mac_ring_tbl_entry_init(vswp, 1131 ringp); 1132 ringp = NULL; 1133 } 1134 mutex_exit(&vqp->vq_lock); 1135 } 1136 1137 mutex_exit(&vswp->mac_ring_lock); 1138 D1(vswp, "%s: exit", __func__); 1139 return ((mac_resource_handle_t)ringp); 1140 } 1141 } 1142 mutex_exit(&vswp->mac_ring_lock); 1143 1144 /* 1145 * No slots in the ring table available. 1146 */ 1147 D1(vswp, "%s: exit", __func__); 1148 return (NULL); 1149 } 1150 1151 static void 1152 vsw_queue_stop(vsw_queue_t *vqp) 1153 { 1154 mutex_enter(&vqp->vq_lock); 1155 1156 if (vqp->vq_state == VSW_QUEUE_RUNNING) { 1157 vqp->vq_state = VSW_QUEUE_STOP; 1158 cv_signal(&vqp->vq_cv); 1159 1160 while (vqp->vq_state != VSW_QUEUE_DRAINED) 1161 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1162 } 1163 1164 vqp->vq_state = VSW_QUEUE_STOPPED; 1165 1166 mutex_exit(&vqp->vq_lock); 1167 } 1168 1169 static vsw_queue_t * 1170 vsw_queue_create() 1171 { 1172 vsw_queue_t *vqp; 1173 1174 vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP); 1175 1176 mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL); 1177 cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL); 1178 vqp->vq_first = NULL; 1179 vqp->vq_last = NULL; 1180 vqp->vq_state = VSW_QUEUE_STOPPED; 1181 1182 return (vqp); 1183 } 1184 1185 static void 1186 vsw_queue_destroy(vsw_queue_t *vqp) 1187 { 1188 cv_destroy(&vqp->vq_cv); 1189 mutex_destroy(&vqp->vq_lock); 1190 kmem_free(vqp, sizeof (vsw_queue_t)); 1191 } 1192 1193 static void 1194 vsw_queue_worker(vsw_mac_ring_t *rrp) 1195 { 1196 mblk_t *mp; 1197 vsw_queue_t *vqp = rrp->ring_vqp; 1198 vsw_t *vswp = rrp->ring_vswp; 1199 1200 mutex_enter(&vqp->vq_lock); 1201 1202 ASSERT(vqp->vq_state == VSW_QUEUE_STOPPED); 1203 1204 /* 1205 * Set the state to running, since the thread is now active. 1206 */ 1207 vqp->vq_state = VSW_QUEUE_RUNNING; 1208 cv_signal(&vqp->vq_cv); 1209 1210 while (vqp->vq_state == VSW_QUEUE_RUNNING) { 1211 /* 1212 * Wait for work to do or the state has changed 1213 * to not running. 1214 */ 1215 while ((vqp->vq_state == VSW_QUEUE_RUNNING) && 1216 (vqp->vq_first == NULL)) { 1217 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1218 } 1219 1220 /* 1221 * Process packets that we received from the interface. 1222 */ 1223 if (vqp->vq_first != NULL) { 1224 mp = vqp->vq_first; 1225 1226 vqp->vq_first = NULL; 1227 vqp->vq_last = NULL; 1228 1229 mutex_exit(&vqp->vq_lock); 1230 1231 /* switch the chain of packets received */ 1232 vswp->vsw_switch_frame(vswp, mp, 1233 VSW_PHYSDEV, NULL, NULL); 1234 1235 mutex_enter(&vqp->vq_lock); 1236 } 1237 } 1238 1239 /* 1240 * We are drained and signal we are done. 1241 */ 1242 vqp->vq_state = VSW_QUEUE_DRAINED; 1243 cv_signal(&vqp->vq_cv); 1244 1245 /* 1246 * Exit lock and drain the remaining packets. 1247 */ 1248 mutex_exit(&vqp->vq_lock); 1249 1250 /* 1251 * Exit the thread 1252 */ 1253 thread_exit(); 1254 } 1255 1256 /* 1257 * static void 1258 * vsw_rx_queue_cb() - Receive callback routine when 1259 * vsw_multi_ring_enable is non-zero. Queue the packets 1260 * to a packet queue for a worker thread to process. 1261 */ 1262 static void 1263 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1264 { 1265 vsw_mac_ring_t *ringp = (vsw_mac_ring_t *)mrh; 1266 vsw_t *vswp = (vsw_t *)arg; 1267 vsw_queue_t *vqp; 1268 mblk_t *bp, *last; 1269 1270 ASSERT(mrh != NULL); 1271 ASSERT(vswp != NULL); 1272 ASSERT(mp != NULL); 1273 1274 D1(vswp, "%s: enter", __func__); 1275 1276 /* 1277 * Find the last element in the mblk chain. 1278 */ 1279 bp = mp; 1280 do { 1281 last = bp; 1282 bp = bp->b_next; 1283 } while (bp != NULL); 1284 1285 /* Get the queue for the packets */ 1286 vqp = ringp->ring_vqp; 1287 1288 /* 1289 * Grab the lock such we can queue the packets. 1290 */ 1291 mutex_enter(&vqp->vq_lock); 1292 1293 if (vqp->vq_state != VSW_QUEUE_RUNNING) { 1294 freemsg(mp); 1295 mutex_exit(&vqp->vq_lock); 1296 goto vsw_rx_queue_cb_exit; 1297 } 1298 1299 /* 1300 * Add the mblk chain to the queue. If there 1301 * is some mblks in the queue, then add the new 1302 * chain to the end. 1303 */ 1304 if (vqp->vq_first == NULL) 1305 vqp->vq_first = mp; 1306 else 1307 vqp->vq_last->b_next = mp; 1308 1309 vqp->vq_last = last; 1310 1311 /* 1312 * Signal the worker thread that there is work to 1313 * do. 1314 */ 1315 cv_signal(&vqp->vq_cv); 1316 1317 /* 1318 * Let go of the lock and exit. 1319 */ 1320 mutex_exit(&vqp->vq_lock); 1321 1322 vsw_rx_queue_cb_exit: 1323 D1(vswp, "%s: exit", __func__); 1324 } 1325 1326 /* 1327 * receive callback routine. Invoked by MAC layer when there 1328 * are pkts being passed up from physical device. 1329 * 1330 * PERF: It may be more efficient when the card is in promisc 1331 * mode to check the dest address of the pkts here (against 1332 * the FDB) rather than checking later. Needs to be investigated. 1333 */ 1334 static void 1335 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1336 { 1337 _NOTE(ARGUNUSED(mrh)) 1338 1339 vsw_t *vswp = (vsw_t *)arg; 1340 1341 ASSERT(vswp != NULL); 1342 1343 D1(vswp, "vsw_rx_cb: enter"); 1344 1345 /* switch the chain of packets received */ 1346 vswp->vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 1347 1348 D1(vswp, "vsw_rx_cb: exit"); 1349 } 1350 1351 /* 1352 * Send a message out over the physical device via the MAC layer. 1353 * 1354 * Returns any mblks that it was unable to transmit. 1355 */ 1356 mblk_t * 1357 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 1358 { 1359 const mac_txinfo_t *mtp; 1360 1361 mutex_enter(&vswp->mac_lock); 1362 if ((vswp->mh == NULL) || (vswp->mstarted == B_FALSE)) { 1363 1364 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 1365 mutex_exit(&vswp->mac_lock); 1366 return (mp); 1367 } else { 1368 mtp = vswp->txinfo; 1369 mp = mtp->mt_fn(mtp->mt_arg, mp); 1370 } 1371 mutex_exit(&vswp->mac_lock); 1372 1373 return (mp); 1374 } 1375