1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/vsw.h> 66 67 /* MAC Ring table functions. */ 68 static void vsw_mac_ring_tbl_init(vsw_t *vswp); 69 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp); 70 static void vsw_queue_worker(vsw_mac_ring_t *rrp); 71 static void vsw_queue_stop(vsw_queue_t *vqp); 72 static vsw_queue_t *vsw_queue_create(); 73 static void vsw_queue_destroy(vsw_queue_t *vqp); 74 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *); 75 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 76 77 /* MAC layer routines */ 78 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg, 79 mac_resource_t *mrp); 80 static int vsw_set_hw_addr(vsw_t *, mac_multi_addr_t *); 81 static int vsw_set_hw_promisc(vsw_t *, vsw_port_t *, int); 82 static int vsw_unset_hw_addr(vsw_t *, int); 83 static int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *, int); 84 static int vsw_prog_if(vsw_t *); 85 86 /* Support functions */ 87 static int vsw_prog_ports(vsw_t *); 88 int vsw_set_hw(vsw_t *, vsw_port_t *, int); 89 int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 90 void vsw_reconfig_hw(vsw_t *); 91 int vsw_mac_attach(vsw_t *vswp); 92 void vsw_mac_detach(vsw_t *vswp); 93 int vsw_mac_open(vsw_t *vswp); 94 void vsw_mac_close(vsw_t *vswp); 95 void vsw_unset_addrs(vsw_t *vswp); 96 void vsw_set_addrs(vsw_t *vswp); 97 int vsw_get_hw_maddr(vsw_t *); 98 mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 99 100 /* 101 * Tunables used in this file. 102 */ 103 extern int vsw_mac_open_retries; 104 extern boolean_t vsw_multi_ring_enable; 105 extern int vsw_mac_rx_rings; 106 107 /* 108 * Check to see if the card supports the setting of multiple unicst 109 * addresses. 110 * 111 * Returns 0 if card supports the programming of multiple unicast addresses, 112 * otherwise returns 1. 113 */ 114 int 115 vsw_get_hw_maddr(vsw_t *vswp) 116 { 117 D1(vswp, "%s: enter", __func__); 118 119 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 120 121 if (vswp->mh == NULL) 122 return (1); 123 124 if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) { 125 cmn_err(CE_WARN, "!vsw%d: device (%s) does not support " 126 "setting multiple unicast addresses", vswp->instance, 127 vswp->physname); 128 return (1); 129 } 130 131 D2(vswp, "%s: %d addrs : %d free", __func__, 132 vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree); 133 134 D1(vswp, "%s: exit", __func__); 135 136 return (0); 137 } 138 139 /* 140 * Program unicast and multicast addresses of vsw interface and the ports 141 * into the physical device. 142 */ 143 void 144 vsw_set_addrs(vsw_t *vswp) 145 { 146 vsw_port_list_t *plist = &vswp->plist; 147 vsw_port_t *port; 148 mcst_addr_t *mcap; 149 int rv; 150 151 READ_ENTER(&vswp->if_lockrw); 152 153 if (vswp->if_state & VSW_IF_UP) { 154 155 /* program unicst addr of vsw interface in the physdev */ 156 if (vswp->addr_set == VSW_ADDR_UNSET) { 157 mutex_enter(&vswp->hw_lock); 158 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 159 mutex_exit(&vswp->hw_lock); 160 if (rv != 0) { 161 cmn_err(CE_NOTE, 162 "!vsw%d: failed to program interface " 163 "unicast address\n", vswp->instance); 164 } 165 /* 166 * Notify the MAC layer of the changed address. 167 */ 168 mac_unicst_update(vswp->if_mh, 169 (uint8_t *)&vswp->if_addr); 170 } 171 172 /* program mcast addrs of vsw interface in the physdev */ 173 mutex_enter(&vswp->mca_lock); 174 mutex_enter(&vswp->mac_lock); 175 for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) { 176 if (mcap->mac_added) 177 continue; 178 rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca); 179 if (rv == 0) { 180 mcap->mac_added = B_TRUE; 181 } else { 182 cmn_err(CE_WARN, "!vsw%d: unable to add " 183 "multicast address: %s\n", vswp->instance, 184 ether_sprintf((void *)&mcap->mca)); 185 } 186 } 187 mutex_exit(&vswp->mac_lock); 188 mutex_exit(&vswp->mca_lock); 189 190 } 191 192 RW_EXIT(&vswp->if_lockrw); 193 194 WRITE_ENTER(&plist->lockrw); 195 196 /* program unicast address of ports in the physical device */ 197 mutex_enter(&vswp->hw_lock); 198 for (port = plist->head; port != NULL; port = port->p_next) { 199 if (port->addr_set != VSW_ADDR_UNSET) /* addr already set */ 200 continue; 201 if (vsw_set_hw(vswp, port, VSW_VNETPORT)) { 202 cmn_err(CE_NOTE, 203 "!vsw%d: port:%d failed to set unicast address\n", 204 vswp->instance, port->p_instance); 205 } 206 } 207 mutex_exit(&vswp->hw_lock); 208 209 /* program multicast addresses of ports in the physdev */ 210 for (port = plist->head; port != NULL; port = port->p_next) { 211 mutex_enter(&port->mca_lock); 212 mutex_enter(&vswp->mac_lock); 213 for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) { 214 if (mcap->mac_added) 215 continue; 216 rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca); 217 if (rv == 0) { 218 mcap->mac_added = B_TRUE; 219 } else { 220 cmn_err(CE_WARN, "!vsw%d: unable to add " 221 "multicast address: %s\n", vswp->instance, 222 ether_sprintf((void *)&mcap->mca)); 223 } 224 } 225 mutex_exit(&vswp->mac_lock); 226 mutex_exit(&port->mca_lock); 227 } 228 229 RW_EXIT(&plist->lockrw); 230 } 231 232 /* 233 * Remove unicast and multicast addresses of vsw interface and the ports 234 * from the physical device. 235 */ 236 void 237 vsw_unset_addrs(vsw_t *vswp) 238 { 239 vsw_port_list_t *plist = &vswp->plist; 240 vsw_port_t *port; 241 mcst_addr_t *mcap; 242 243 READ_ENTER(&vswp->if_lockrw); 244 245 if (vswp->if_state & VSW_IF_UP) { 246 247 /* 248 * Remove unicast addr of vsw interfce 249 * from current physdev 250 */ 251 mutex_enter(&vswp->hw_lock); 252 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 253 mutex_exit(&vswp->hw_lock); 254 255 /* 256 * Remove mcast addrs of vsw interface 257 * from current physdev 258 */ 259 mutex_enter(&vswp->mca_lock); 260 mutex_enter(&vswp->mac_lock); 261 for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) { 262 if (!mcap->mac_added) 263 continue; 264 (void) mac_multicst_remove(vswp->mh, 265 (uchar_t *)&mcap->mca); 266 mcap->mac_added = B_FALSE; 267 } 268 mutex_exit(&vswp->mac_lock); 269 mutex_exit(&vswp->mca_lock); 270 271 } 272 273 RW_EXIT(&vswp->if_lockrw); 274 275 WRITE_ENTER(&plist->lockrw); 276 277 /* 278 * Remove unicast address of ports from the current physical device 279 */ 280 mutex_enter(&vswp->hw_lock); 281 for (port = plist->head; port != NULL; port = port->p_next) { 282 /* Remove address if was programmed into HW. */ 283 if (port->addr_set == VSW_ADDR_UNSET) 284 continue; 285 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 286 } 287 mutex_exit(&vswp->hw_lock); 288 289 /* Remove multicast addresses of ports from the current physdev */ 290 for (port = plist->head; port != NULL; port = port->p_next) { 291 mutex_enter(&port->mca_lock); 292 mutex_enter(&vswp->mac_lock); 293 for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) { 294 if (!mcap->mac_added) 295 continue; 296 (void) mac_multicst_remove(vswp->mh, 297 (uchar_t *)&mcap->mca); 298 mcap->mac_added = B_FALSE; 299 } 300 mutex_exit(&vswp->mac_lock); 301 mutex_exit(&port->mca_lock); 302 } 303 304 RW_EXIT(&plist->lockrw); 305 } 306 307 /* 308 * Open the underlying physical device for access in layer2 mode. 309 * Returns: 310 * 0 on success 311 * EAGAIN if mac_open() fails due to the device being not available yet. 312 * EIO on any other failures. 313 */ 314 int 315 vsw_mac_open(vsw_t *vswp) 316 { 317 int rv; 318 319 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 320 321 if (vswp->mh != NULL) { 322 /* already open */ 323 return (0); 324 } 325 326 if (vswp->mac_open_retries++ >= vsw_mac_open_retries) { 327 /* exceeded max retries */ 328 return (EIO); 329 } 330 331 rv = mac_open(vswp->physname, &vswp->mh); 332 if (rv != 0) { 333 /* 334 * If mac_open() failed and the error indicates that the 335 * device is not available yet, then, we return EAGAIN to 336 * indicate that it needs to be retried. 337 * For example, this may happen during boot up, as the 338 * required link aggregation groups(devices) have not been 339 * created yet. 340 */ 341 if (rv == ENOENT) { 342 return (EAGAIN); 343 } else { 344 cmn_err(CE_WARN, "vsw%d: mac_open %s failed rv:%x", 345 vswp->instance, vswp->physname, rv); 346 return (EIO); 347 } 348 } 349 350 vswp->mac_open_retries = 0; 351 352 return (0); 353 } 354 355 /* 356 * Close the underlying physical device. 357 */ 358 void 359 vsw_mac_close(vsw_t *vswp) 360 { 361 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 362 363 if (vswp->mh != NULL) { 364 mac_close(vswp->mh); 365 vswp->mh = NULL; 366 } 367 } 368 369 /* 370 * Link into the MAC layer to gain access to the services provided by 371 * the underlying physical device driver (which should also have 372 * registered with the MAC layer). 373 * 374 * Only when in layer 2 mode. 375 */ 376 int 377 vsw_mac_attach(vsw_t *vswp) 378 { 379 D1(vswp, "%s: enter", __func__); 380 381 ASSERT(vswp->mrh == NULL); 382 ASSERT(vswp->mstarted == B_FALSE); 383 ASSERT(vswp->mresources == B_FALSE); 384 385 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 386 387 ASSERT(vswp->mh != NULL); 388 389 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 390 391 if (vsw_multi_ring_enable) { 392 /* 393 * Initialize the ring table. 394 */ 395 vsw_mac_ring_tbl_init(vswp); 396 397 /* 398 * Register our rx callback function. 399 */ 400 vswp->mrh = mac_rx_add(vswp->mh, 401 vsw_rx_queue_cb, (void *)vswp); 402 ASSERT(vswp->mrh != NULL); 403 404 /* 405 * Register our mac resource callback. 406 */ 407 mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp); 408 vswp->mresources = B_TRUE; 409 410 /* 411 * Get the ring resources available to us from 412 * the mac below us. 413 */ 414 mac_resources(vswp->mh); 415 } else { 416 /* 417 * Just register our rx callback function 418 */ 419 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 420 ASSERT(vswp->mrh != NULL); 421 } 422 423 /* Get the MAC tx fn */ 424 vswp->txinfo = mac_tx_get(vswp->mh); 425 426 /* start the interface */ 427 if (mac_start(vswp->mh) != 0) { 428 cmn_err(CE_WARN, "!vsw%d: Could not start mac interface", 429 vswp->instance); 430 goto mac_fail_exit; 431 } 432 433 vswp->mstarted = B_TRUE; 434 435 D1(vswp, "%s: exit", __func__); 436 return (0); 437 438 mac_fail_exit: 439 vsw_mac_detach(vswp); 440 441 D1(vswp, "%s: exit", __func__); 442 return (1); 443 } 444 445 void 446 vsw_mac_detach(vsw_t *vswp) 447 { 448 D1(vswp, "vsw_mac_detach: enter"); 449 450 ASSERT(vswp != NULL); 451 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 452 453 if (vsw_multi_ring_enable) { 454 vsw_mac_ring_tbl_destroy(vswp); 455 } 456 457 if (vswp->mh != NULL) { 458 if (vswp->mstarted) 459 mac_stop(vswp->mh); 460 if (vswp->mrh != NULL) 461 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 462 if (vswp->mresources) 463 mac_resource_set(vswp->mh, NULL, NULL); 464 } 465 466 vswp->mrh = NULL; 467 vswp->txinfo = NULL; 468 vswp->mstarted = B_FALSE; 469 470 D1(vswp, "vsw_mac_detach: exit"); 471 } 472 473 /* 474 * Depending on the mode specified, the capabilites and capacity 475 * of the underlying device setup the physical device. 476 * 477 * If in layer 3 mode, then do nothing. 478 * 479 * If in layer 2 programmed mode attempt to program the unicast address 480 * associated with the port into the physical device. If this is not 481 * possible due to resource exhaustion or simply because the device does 482 * not support multiple unicast addresses then if required fallback onto 483 * putting the card into promisc mode. 484 * 485 * If in promisc mode then simply set the card into promisc mode. 486 * 487 * Returns 0 success, 1 on failure. 488 */ 489 int 490 vsw_set_hw(vsw_t *vswp, vsw_port_t *port, int type) 491 { 492 mac_multi_addr_t mac_addr; 493 int err; 494 495 D1(vswp, "%s: enter", __func__); 496 497 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 498 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 499 500 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 501 return (0); 502 503 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) { 504 return (vsw_set_hw_promisc(vswp, port, type)); 505 } 506 507 /* 508 * Attempt to program the unicast address into the HW. 509 */ 510 mac_addr.mma_addrlen = ETHERADDRL; 511 if (type == VSW_VNETPORT) { 512 ASSERT(port != NULL); 513 ether_copy(&port->p_macaddr, &mac_addr.mma_addr); 514 } else { 515 ether_copy(&vswp->if_addr, &mac_addr.mma_addr); 516 } 517 518 err = vsw_set_hw_addr(vswp, &mac_addr); 519 if (err == ENOSPC) { 520 /* 521 * Mark that attempt should be made to re-config sometime 522 * in future if a port is deleted. 523 */ 524 vswp->recfg_reqd = B_TRUE; 525 526 /* 527 * Only 1 mode specified, nothing more to do. 528 */ 529 if (vswp->smode_num == 1) 530 return (err); 531 532 /* 533 * If promiscuous was next mode specified try to 534 * set the card into that mode. 535 */ 536 if ((vswp->smode_idx <= (vswp->smode_num - 2)) && 537 (vswp->smode[vswp->smode_idx + 1] == 538 VSW_LAYER2_PROMISC)) { 539 vswp->smode_idx += 1; 540 return (vsw_set_hw_promisc(vswp, port, type)); 541 } 542 return (err); 543 } 544 545 if (err != 0) 546 return (err); 547 548 if (type == VSW_VNETPORT) { 549 port->addr_slot = mac_addr.mma_slot; 550 port->addr_set = VSW_ADDR_HW; 551 } else { 552 vswp->addr_slot = mac_addr.mma_slot; 553 vswp->addr_set = VSW_ADDR_HW; 554 } 555 556 D2(vswp, "programmed addr %s into slot %d " 557 "of device %s", ether_sprintf((void *)mac_addr.mma_addr), 558 mac_addr.mma_slot, vswp->physname); 559 560 D1(vswp, "%s: exit", __func__); 561 562 return (0); 563 } 564 565 /* 566 * If in layer 3 mode do nothing. 567 * 568 * If in layer 2 switched mode remove the address from the physical 569 * device. 570 * 571 * If in layer 2 promiscuous mode disable promisc mode. 572 * 573 * Returns 0 on success. 574 */ 575 int 576 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port, int type) 577 { 578 mac_addr_slot_t slot; 579 int rv; 580 581 D1(vswp, "%s: enter", __func__); 582 583 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 584 585 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 586 return (0); 587 588 switch (type) { 589 case VSW_VNETPORT: 590 ASSERT(port != NULL); 591 592 if (port->addr_set == VSW_ADDR_PROMISC) { 593 return (vsw_unset_hw_promisc(vswp, port, type)); 594 595 } else if (port->addr_set == VSW_ADDR_HW) { 596 slot = port->addr_slot; 597 if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0) 598 port->addr_set = VSW_ADDR_UNSET; 599 } 600 601 break; 602 603 case VSW_LOCALDEV: 604 if (vswp->addr_set == VSW_ADDR_PROMISC) { 605 return (vsw_unset_hw_promisc(vswp, NULL, type)); 606 607 } else if (vswp->addr_set == VSW_ADDR_HW) { 608 slot = vswp->addr_slot; 609 if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0) 610 vswp->addr_set = VSW_ADDR_UNSET; 611 } 612 613 break; 614 615 default: 616 /* should never happen */ 617 DERR(vswp, "%s: unknown type %d", __func__, type); 618 ASSERT(0); 619 return (1); 620 } 621 622 D1(vswp, "%s: exit", __func__); 623 return (rv); 624 } 625 626 /* 627 * Attempt to program a unicast address into HW. 628 * 629 * Returns 0 on sucess, 1 on failure. 630 */ 631 static int 632 vsw_set_hw_addr(vsw_t *vswp, mac_multi_addr_t *mac) 633 { 634 void *mah; 635 int rv = EINVAL; 636 637 D1(vswp, "%s: enter", __func__); 638 639 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 640 641 if (vswp->maddr.maddr_handle == NULL) 642 return (rv); 643 644 mah = vswp->maddr.maddr_handle; 645 646 rv = vswp->maddr.maddr_add(mah, mac); 647 648 if (rv == 0) 649 return (rv); 650 651 /* 652 * Its okay for the add to fail because we have exhausted 653 * all the resouces in the hardware device. Any other error 654 * we want to flag. 655 */ 656 if (rv != ENOSPC) { 657 cmn_err(CE_WARN, "!vsw%d: error programming " 658 "address %s into HW err (%d)", 659 vswp->instance, ether_sprintf((void *)mac->mma_addr), rv); 660 } 661 D1(vswp, "%s: exit", __func__); 662 return (rv); 663 } 664 665 /* 666 * Remove a unicast mac address which has previously been programmed 667 * into HW. 668 * 669 * Returns 0 on sucess, 1 on failure. 670 */ 671 static int 672 vsw_unset_hw_addr(vsw_t *vswp, int slot) 673 { 674 void *mah; 675 int rv; 676 677 D1(vswp, "%s: enter", __func__); 678 679 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 680 ASSERT(slot >= 0); 681 682 if (vswp->maddr.maddr_handle == NULL) 683 return (1); 684 685 mah = vswp->maddr.maddr_handle; 686 687 rv = vswp->maddr.maddr_remove(mah, slot); 688 if (rv != 0) { 689 cmn_err(CE_WARN, "!vsw%d: unable to remove address " 690 "from slot %d in device %s (err %d)", 691 vswp->instance, slot, vswp->physname, rv); 692 return (1); 693 } 694 695 D2(vswp, "removed addr from slot %d in device %s", 696 slot, vswp->physname); 697 698 D1(vswp, "%s: exit", __func__); 699 return (0); 700 } 701 702 /* 703 * Set network card into promisc mode. 704 * 705 * Returns 0 on success, 1 on failure. 706 */ 707 static int 708 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type) 709 { 710 D1(vswp, "%s: enter", __func__); 711 712 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 713 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 714 715 mutex_enter(&vswp->mac_lock); 716 if (vswp->mh == NULL) { 717 mutex_exit(&vswp->mac_lock); 718 return (1); 719 } 720 721 if (vswp->promisc_cnt++ == 0) { 722 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 723 vswp->promisc_cnt--; 724 mutex_exit(&vswp->mac_lock); 725 return (1); 726 } 727 cmn_err(CE_NOTE, "!vsw%d: switching device %s into " 728 "promiscuous mode", vswp->instance, vswp->physname); 729 } 730 mutex_exit(&vswp->mac_lock); 731 732 if (type == VSW_VNETPORT) { 733 ASSERT(port != NULL); 734 port->addr_set = VSW_ADDR_PROMISC; 735 } else { 736 vswp->addr_set = VSW_ADDR_PROMISC; 737 } 738 739 D1(vswp, "%s: exit", __func__); 740 741 return (0); 742 } 743 744 /* 745 * Turn off promiscuous mode on network card. 746 * 747 * Returns 0 on success, 1 on failure. 748 */ 749 static int 750 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type) 751 { 752 vsw_port_list_t *plist = &vswp->plist; 753 754 D2(vswp, "%s: enter", __func__); 755 756 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 757 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 758 759 mutex_enter(&vswp->mac_lock); 760 if (vswp->mh == NULL) { 761 mutex_exit(&vswp->mac_lock); 762 return (1); 763 } 764 765 if (--vswp->promisc_cnt == 0) { 766 if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) { 767 vswp->promisc_cnt++; 768 mutex_exit(&vswp->mac_lock); 769 return (1); 770 } 771 772 /* 773 * We are exiting promisc mode either because we were 774 * only in promisc mode because we had failed over from 775 * switched mode due to HW resource issues, or the user 776 * wanted the card in promisc mode for all the ports and 777 * the last port is now being deleted. Tweak the message 778 * accordingly. 779 */ 780 if (plist->num_ports != 0) { 781 cmn_err(CE_NOTE, "!vsw%d: switching device %s back to " 782 "programmed mode", vswp->instance, vswp->physname); 783 } else { 784 cmn_err(CE_NOTE, "!vsw%d: switching device %s out of " 785 "promiscuous mode", vswp->instance, vswp->physname); 786 } 787 } 788 mutex_exit(&vswp->mac_lock); 789 790 if (type == VSW_VNETPORT) { 791 ASSERT(port != NULL); 792 ASSERT(port->addr_set == VSW_ADDR_PROMISC); 793 port->addr_set = VSW_ADDR_UNSET; 794 } else { 795 ASSERT(vswp->addr_set == VSW_ADDR_PROMISC); 796 vswp->addr_set = VSW_ADDR_UNSET; 797 } 798 799 D1(vswp, "%s: exit", __func__); 800 return (0); 801 } 802 803 /* 804 * Determine whether or not we are operating in our prefered 805 * mode and if not whether the physical resources now allow us 806 * to operate in it. 807 * 808 * If a port is being removed should only be invoked after port has been 809 * removed from the port list. 810 */ 811 void 812 vsw_reconfig_hw(vsw_t *vswp) 813 { 814 int s_idx; 815 816 D1(vswp, "%s: enter", __func__); 817 818 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 819 820 if (vswp->maddr.maddr_handle == NULL) { 821 return; 822 } 823 824 /* 825 * If we are in layer 2 (i.e. switched) or would like to be 826 * in layer 2 then check if any ports or the vswitch itself 827 * need to be programmed into the HW. 828 * 829 * This can happen in two cases - switched was specified as 830 * the prefered mode of operation but we exhausted the HW 831 * resources and so failed over to the next specifed mode, 832 * or switched was the only mode specified so after HW 833 * resources were exhausted there was nothing more we 834 * could do. 835 */ 836 if (vswp->smode_idx > 0) 837 s_idx = vswp->smode_idx - 1; 838 else 839 s_idx = vswp->smode_idx; 840 841 if (vswp->smode[s_idx] != VSW_LAYER2) { 842 return; 843 } 844 845 D2(vswp, "%s: attempting reconfig..", __func__); 846 847 /* 848 * First, attempt to set the vswitch mac address into HW, 849 * if required. 850 */ 851 if (vsw_prog_if(vswp)) { 852 return; 853 } 854 855 /* 856 * Next, attempt to set any ports which have not yet been 857 * programmed into HW. 858 */ 859 if (vsw_prog_ports(vswp)) { 860 return; 861 } 862 863 /* 864 * By now we know that have programmed all desired ports etc 865 * into HW, so safe to mark reconfiguration as complete. 866 */ 867 vswp->recfg_reqd = B_FALSE; 868 869 vswp->smode_idx = s_idx; 870 871 D1(vswp, "%s: exit", __func__); 872 } 873 874 /* 875 * Check to see if vsw itself is plumbed, and if so whether or not 876 * its mac address should be written into HW. 877 * 878 * Returns 0 if could set address, or didn't have to set it. 879 * Returns 1 if failed to set address. 880 */ 881 static int 882 vsw_prog_if(vsw_t *vswp) 883 { 884 mac_multi_addr_t addr; 885 886 D1(vswp, "%s: enter", __func__); 887 888 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 889 890 READ_ENTER(&vswp->if_lockrw); 891 if ((vswp->if_state & VSW_IF_UP) && 892 (vswp->addr_set != VSW_ADDR_HW)) { 893 894 addr.mma_addrlen = ETHERADDRL; 895 ether_copy(&vswp->if_addr, &addr.mma_addr); 896 897 if (vsw_set_hw_addr(vswp, &addr) != 0) { 898 RW_EXIT(&vswp->if_lockrw); 899 return (1); 900 } 901 902 vswp->addr_slot = addr.mma_slot; 903 904 /* 905 * If previously when plumbed had had to place 906 * interface into promisc mode, now reverse that. 907 * 908 * Note that interface will only actually be set into 909 * non-promisc mode when last port/interface has been 910 * programmed into HW. 911 */ 912 if (vswp->addr_set == VSW_ADDR_PROMISC) 913 (void) vsw_unset_hw_promisc(vswp, NULL, VSW_LOCALDEV); 914 915 vswp->addr_set = VSW_ADDR_HW; 916 } 917 RW_EXIT(&vswp->if_lockrw); 918 919 D1(vswp, "%s: exit", __func__); 920 return (0); 921 } 922 923 /* 924 * Scan the port list for any ports which have not yet been set 925 * into HW. For those found attempt to program their mac addresses 926 * into the physical device. 927 * 928 * Returns 0 if able to program all required ports (can be 0) into HW. 929 * Returns 1 if failed to set at least one mac address. 930 */ 931 static int 932 vsw_prog_ports(vsw_t *vswp) 933 { 934 mac_multi_addr_t addr; 935 vsw_port_list_t *plist = &vswp->plist; 936 vsw_port_t *tp; 937 int rv = 0; 938 939 D1(vswp, "%s: enter", __func__); 940 941 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 942 943 READ_ENTER(&plist->lockrw); 944 for (tp = plist->head; tp != NULL; tp = tp->p_next) { 945 if (tp->addr_set != VSW_ADDR_HW) { 946 addr.mma_addrlen = ETHERADDRL; 947 ether_copy(&tp->p_macaddr, &addr.mma_addr); 948 949 if (vsw_set_hw_addr(vswp, &addr) != 0) { 950 rv = 1; 951 break; 952 } 953 954 tp->addr_slot = addr.mma_slot; 955 956 /* 957 * If when this port had first attached we had 958 * had to place the interface into promisc mode, 959 * then now reverse that. 960 * 961 * Note that the interface will not actually 962 * change to non-promisc mode until all ports 963 * have been programmed. 964 */ 965 if (tp->addr_set == VSW_ADDR_PROMISC) 966 (void) vsw_unset_hw_promisc(vswp, 967 tp, VSW_VNETPORT); 968 969 tp->addr_set = VSW_ADDR_HW; 970 } 971 } 972 RW_EXIT(&plist->lockrw); 973 974 D1(vswp, "%s: exit", __func__); 975 return (rv); 976 } 977 978 static void 979 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp) 980 { 981 ringp->ring_state = VSW_MAC_RING_FREE; 982 ringp->ring_arg = NULL; 983 ringp->ring_blank = NULL; 984 ringp->ring_vqp = NULL; 985 ringp->ring_vswp = vswp; 986 } 987 988 static void 989 vsw_mac_ring_tbl_init(vsw_t *vswp) 990 { 991 int i; 992 993 mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL); 994 995 vswp->mac_ring_tbl_sz = vsw_mac_rx_rings; 996 vswp->mac_ring_tbl = 997 kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t), KM_SLEEP); 998 999 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) 1000 vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]); 1001 } 1002 1003 static void 1004 vsw_mac_ring_tbl_destroy(vsw_t *vswp) 1005 { 1006 int i; 1007 vsw_mac_ring_t *ringp; 1008 1009 mutex_enter(&vswp->mac_ring_lock); 1010 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1011 ringp = &vswp->mac_ring_tbl[i]; 1012 1013 if (ringp->ring_state != VSW_MAC_RING_FREE) { 1014 /* 1015 * Destroy the queue. 1016 */ 1017 vsw_queue_stop(ringp->ring_vqp); 1018 vsw_queue_destroy(ringp->ring_vqp); 1019 1020 /* 1021 * Re-initialize the structure. 1022 */ 1023 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1024 } 1025 } 1026 mutex_exit(&vswp->mac_ring_lock); 1027 1028 mutex_destroy(&vswp->mac_ring_lock); 1029 kmem_free(vswp->mac_ring_tbl, 1030 vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t)); 1031 vswp->mac_ring_tbl_sz = 0; 1032 } 1033 1034 /* 1035 * Handle resource add callbacks from the driver below. 1036 */ 1037 static mac_resource_handle_t 1038 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp) 1039 { 1040 vsw_t *vswp = (vsw_t *)arg; 1041 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 1042 vsw_mac_ring_t *ringp; 1043 vsw_queue_t *vqp; 1044 int i; 1045 1046 ASSERT(vswp != NULL); 1047 ASSERT(mrp != NULL); 1048 ASSERT(vswp->mac_ring_tbl != NULL); 1049 1050 D1(vswp, "%s: enter", __func__); 1051 1052 /* 1053 * Check to make sure we have the correct resource type. 1054 */ 1055 if (mrp->mr_type != MAC_RX_FIFO) 1056 return (NULL); 1057 1058 /* 1059 * Find a open entry in the ring table. 1060 */ 1061 mutex_enter(&vswp->mac_ring_lock); 1062 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1063 ringp = &vswp->mac_ring_tbl[i]; 1064 1065 /* 1066 * Check for an empty slot, if found, then setup queue 1067 * and thread. 1068 */ 1069 if (ringp->ring_state == VSW_MAC_RING_FREE) { 1070 /* 1071 * Create the queue for this ring. 1072 */ 1073 vqp = vsw_queue_create(); 1074 1075 /* 1076 * Initialize the ring data structure. 1077 */ 1078 ringp->ring_vqp = vqp; 1079 ringp->ring_arg = mrfp->mrf_arg; 1080 ringp->ring_blank = mrfp->mrf_blank; 1081 ringp->ring_state = VSW_MAC_RING_INUSE; 1082 1083 /* 1084 * Create the worker thread. 1085 */ 1086 vqp->vq_worker = thread_create(NULL, 0, 1087 vsw_queue_worker, ringp, 0, &p0, 1088 TS_RUN, minclsyspri); 1089 if (vqp->vq_worker == NULL) { 1090 vsw_queue_destroy(vqp); 1091 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1092 ringp = NULL; 1093 } 1094 1095 if (ringp != NULL) { 1096 /* 1097 * Make sure thread get's running state for 1098 * this ring. 1099 */ 1100 mutex_enter(&vqp->vq_lock); 1101 while ((vqp->vq_state != VSW_QUEUE_RUNNING) && 1102 (vqp->vq_state != VSW_QUEUE_DRAINED)) { 1103 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1104 } 1105 1106 /* 1107 * If the thread is not running, cleanup. 1108 */ 1109 if (vqp->vq_state == VSW_QUEUE_DRAINED) { 1110 vsw_queue_destroy(vqp); 1111 vsw_mac_ring_tbl_entry_init(vswp, 1112 ringp); 1113 ringp = NULL; 1114 } 1115 mutex_exit(&vqp->vq_lock); 1116 } 1117 1118 mutex_exit(&vswp->mac_ring_lock); 1119 D1(vswp, "%s: exit", __func__); 1120 return ((mac_resource_handle_t)ringp); 1121 } 1122 } 1123 mutex_exit(&vswp->mac_ring_lock); 1124 1125 /* 1126 * No slots in the ring table available. 1127 */ 1128 D1(vswp, "%s: exit", __func__); 1129 return (NULL); 1130 } 1131 1132 static void 1133 vsw_queue_stop(vsw_queue_t *vqp) 1134 { 1135 mutex_enter(&vqp->vq_lock); 1136 1137 if (vqp->vq_state == VSW_QUEUE_RUNNING) { 1138 vqp->vq_state = VSW_QUEUE_STOP; 1139 cv_signal(&vqp->vq_cv); 1140 1141 while (vqp->vq_state != VSW_QUEUE_DRAINED) 1142 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1143 } 1144 1145 vqp->vq_state = VSW_QUEUE_STOPPED; 1146 1147 mutex_exit(&vqp->vq_lock); 1148 } 1149 1150 static vsw_queue_t * 1151 vsw_queue_create() 1152 { 1153 vsw_queue_t *vqp; 1154 1155 vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP); 1156 1157 mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL); 1158 cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL); 1159 vqp->vq_first = NULL; 1160 vqp->vq_last = NULL; 1161 vqp->vq_state = VSW_QUEUE_STOPPED; 1162 1163 return (vqp); 1164 } 1165 1166 static void 1167 vsw_queue_destroy(vsw_queue_t *vqp) 1168 { 1169 cv_destroy(&vqp->vq_cv); 1170 mutex_destroy(&vqp->vq_lock); 1171 kmem_free(vqp, sizeof (vsw_queue_t)); 1172 } 1173 1174 static void 1175 vsw_queue_worker(vsw_mac_ring_t *rrp) 1176 { 1177 mblk_t *mp; 1178 vsw_queue_t *vqp = rrp->ring_vqp; 1179 vsw_t *vswp = rrp->ring_vswp; 1180 1181 mutex_enter(&vqp->vq_lock); 1182 1183 ASSERT(vqp->vq_state == VSW_QUEUE_STOPPED); 1184 1185 /* 1186 * Set the state to running, since the thread is now active. 1187 */ 1188 vqp->vq_state = VSW_QUEUE_RUNNING; 1189 cv_signal(&vqp->vq_cv); 1190 1191 while (vqp->vq_state == VSW_QUEUE_RUNNING) { 1192 /* 1193 * Wait for work to do or the state has changed 1194 * to not running. 1195 */ 1196 while ((vqp->vq_state == VSW_QUEUE_RUNNING) && 1197 (vqp->vq_first == NULL)) { 1198 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1199 } 1200 1201 /* 1202 * Process packets that we received from the interface. 1203 */ 1204 if (vqp->vq_first != NULL) { 1205 mp = vqp->vq_first; 1206 1207 vqp->vq_first = NULL; 1208 vqp->vq_last = NULL; 1209 1210 mutex_exit(&vqp->vq_lock); 1211 1212 /* switch the chain of packets received */ 1213 vswp->vsw_switch_frame(vswp, mp, 1214 VSW_PHYSDEV, NULL, NULL); 1215 1216 mutex_enter(&vqp->vq_lock); 1217 } 1218 } 1219 1220 /* 1221 * We are drained and signal we are done. 1222 */ 1223 vqp->vq_state = VSW_QUEUE_DRAINED; 1224 cv_signal(&vqp->vq_cv); 1225 1226 /* 1227 * Exit lock and drain the remaining packets. 1228 */ 1229 mutex_exit(&vqp->vq_lock); 1230 1231 /* 1232 * Exit the thread 1233 */ 1234 thread_exit(); 1235 } 1236 1237 /* 1238 * static void 1239 * vsw_rx_queue_cb() - Receive callback routine when 1240 * vsw_multi_ring_enable is non-zero. Queue the packets 1241 * to a packet queue for a worker thread to process. 1242 */ 1243 static void 1244 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1245 { 1246 vsw_mac_ring_t *ringp = (vsw_mac_ring_t *)mrh; 1247 vsw_t *vswp = (vsw_t *)arg; 1248 vsw_queue_t *vqp; 1249 mblk_t *bp, *last; 1250 1251 ASSERT(mrh != NULL); 1252 ASSERT(vswp != NULL); 1253 ASSERT(mp != NULL); 1254 1255 D1(vswp, "%s: enter", __func__); 1256 1257 /* 1258 * Find the last element in the mblk chain. 1259 */ 1260 bp = mp; 1261 do { 1262 last = bp; 1263 bp = bp->b_next; 1264 } while (bp != NULL); 1265 1266 /* Get the queue for the packets */ 1267 vqp = ringp->ring_vqp; 1268 1269 /* 1270 * Grab the lock such we can queue the packets. 1271 */ 1272 mutex_enter(&vqp->vq_lock); 1273 1274 if (vqp->vq_state != VSW_QUEUE_RUNNING) { 1275 freemsg(mp); 1276 mutex_exit(&vqp->vq_lock); 1277 goto vsw_rx_queue_cb_exit; 1278 } 1279 1280 /* 1281 * Add the mblk chain to the queue. If there 1282 * is some mblks in the queue, then add the new 1283 * chain to the end. 1284 */ 1285 if (vqp->vq_first == NULL) 1286 vqp->vq_first = mp; 1287 else 1288 vqp->vq_last->b_next = mp; 1289 1290 vqp->vq_last = last; 1291 1292 /* 1293 * Signal the worker thread that there is work to 1294 * do. 1295 */ 1296 cv_signal(&vqp->vq_cv); 1297 1298 /* 1299 * Let go of the lock and exit. 1300 */ 1301 mutex_exit(&vqp->vq_lock); 1302 1303 vsw_rx_queue_cb_exit: 1304 D1(vswp, "%s: exit", __func__); 1305 } 1306 1307 /* 1308 * receive callback routine. Invoked by MAC layer when there 1309 * are pkts being passed up from physical device. 1310 * 1311 * PERF: It may be more efficient when the card is in promisc 1312 * mode to check the dest address of the pkts here (against 1313 * the FDB) rather than checking later. Needs to be investigated. 1314 */ 1315 static void 1316 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1317 { 1318 _NOTE(ARGUNUSED(mrh)) 1319 1320 vsw_t *vswp = (vsw_t *)arg; 1321 1322 ASSERT(vswp != NULL); 1323 1324 D1(vswp, "vsw_rx_cb: enter"); 1325 1326 /* switch the chain of packets received */ 1327 vswp->vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 1328 1329 D1(vswp, "vsw_rx_cb: exit"); 1330 } 1331 1332 /* 1333 * Send a message out over the physical device via the MAC layer. 1334 * 1335 * Returns any mblks that it was unable to transmit. 1336 */ 1337 mblk_t * 1338 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 1339 { 1340 const mac_txinfo_t *mtp; 1341 1342 mutex_enter(&vswp->mac_lock); 1343 if ((vswp->mh == NULL) || (vswp->mstarted == B_FALSE)) { 1344 1345 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 1346 mutex_exit(&vswp->mac_lock); 1347 return (mp); 1348 } else { 1349 mtp = vswp->txinfo; 1350 mp = mtp->mt_fn(mtp->mt_arg, mp); 1351 } 1352 mutex_exit(&vswp->mac_lock); 1353 1354 return (mp); 1355 } 1356