1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/vsw.h> 66 67 /* MAC Ring table functions. */ 68 static void vsw_mac_ring_tbl_init(vsw_t *vswp); 69 static void vsw_mac_ring_tbl_destroy(vsw_t *vswp); 70 static void vsw_queue_worker(vsw_mac_ring_t *rrp); 71 static void vsw_queue_stop(vsw_queue_t *vqp); 72 static vsw_queue_t *vsw_queue_create(); 73 static void vsw_queue_destroy(vsw_queue_t *vqp); 74 static void vsw_rx_queue_cb(void *, mac_resource_handle_t, mblk_t *); 75 static void vsw_rx_cb(void *, mac_resource_handle_t, mblk_t *); 76 77 /* MAC layer routines */ 78 static mac_resource_handle_t vsw_mac_ring_add_cb(void *arg, 79 mac_resource_t *mrp); 80 static int vsw_set_hw_addr(vsw_t *, mac_multi_addr_t *); 81 static int vsw_set_hw_promisc(vsw_t *, vsw_port_t *, int); 82 static int vsw_unset_hw_addr(vsw_t *, int); 83 static int vsw_unset_hw_promisc(vsw_t *, vsw_port_t *, int); 84 static int vsw_prog_if(vsw_t *); 85 86 /* Support functions */ 87 static int vsw_prog_ports(vsw_t *); 88 int vsw_set_hw(vsw_t *, vsw_port_t *, int); 89 int vsw_unset_hw(vsw_t *, vsw_port_t *, int); 90 void vsw_reconfig_hw(vsw_t *); 91 int vsw_mac_attach(vsw_t *vswp); 92 void vsw_mac_detach(vsw_t *vswp); 93 int vsw_mac_open(vsw_t *vswp); 94 void vsw_mac_close(vsw_t *vswp); 95 void vsw_unset_addrs(vsw_t *vswp); 96 void vsw_set_addrs(vsw_t *vswp); 97 int vsw_get_hw_maddr(vsw_t *); 98 mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 99 100 /* 101 * Tunables used in this file. 102 */ 103 extern int vsw_mac_open_retries; 104 extern boolean_t vsw_multi_ring_enable; 105 extern int vsw_mac_rx_rings; 106 107 /* 108 * Check to see if the card supports the setting of multiple unicst 109 * addresses. 110 * 111 * Returns 0 if card supports the programming of multiple unicast addresses, 112 * otherwise returns 1. 113 */ 114 int 115 vsw_get_hw_maddr(vsw_t *vswp) 116 { 117 D1(vswp, "%s: enter", __func__); 118 119 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 120 121 if (vswp->mh == NULL) 122 return (1); 123 124 if (!mac_capab_get(vswp->mh, MAC_CAPAB_MULTIADDRESS, &vswp->maddr)) { 125 cmn_err(CE_NOTE, "!vsw%d: device (%s) does not support " 126 "programming multiple addresses", vswp->instance, 127 vswp->physname); 128 return (1); 129 } 130 131 D2(vswp, "%s: %d addrs : %d free", __func__, 132 vswp->maddr.maddr_naddr, vswp->maddr.maddr_naddrfree); 133 134 D1(vswp, "%s: exit", __func__); 135 136 return (0); 137 } 138 139 /* 140 * Program unicast and multicast addresses of vsw interface and the ports 141 * into the physical device. 142 */ 143 void 144 vsw_set_addrs(vsw_t *vswp) 145 { 146 vsw_port_list_t *plist = &vswp->plist; 147 vsw_port_t *port; 148 mcst_addr_t *mcap; 149 int rv; 150 151 READ_ENTER(&vswp->if_lockrw); 152 153 if (vswp->if_state & VSW_IF_UP) { 154 155 /* program unicst addr of vsw interface in the physdev */ 156 if (vswp->addr_set == VSW_ADDR_UNSET) { 157 mutex_enter(&vswp->hw_lock); 158 rv = vsw_set_hw(vswp, NULL, VSW_LOCALDEV); 159 mutex_exit(&vswp->hw_lock); 160 if (rv != 0) { 161 cmn_err(CE_NOTE, 162 "!vsw%d: failed to program interface " 163 "unicast address\n", vswp->instance); 164 } 165 /* 166 * Notify the MAC layer of the changed address. 167 */ 168 mac_unicst_update(vswp->if_mh, 169 (uint8_t *)&vswp->if_addr); 170 } 171 172 /* program mcast addrs of vsw interface in the physdev */ 173 mutex_enter(&vswp->mca_lock); 174 mutex_enter(&vswp->mac_lock); 175 for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) { 176 if (mcap->mac_added) 177 continue; 178 rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca); 179 if (rv == 0) { 180 mcap->mac_added = B_TRUE; 181 } else { 182 cmn_err(CE_NOTE, "!vsw%d: unable to add " 183 "multicast address: %s\n", vswp->instance, 184 ether_sprintf((void *)&mcap->mca)); 185 } 186 } 187 mutex_exit(&vswp->mac_lock); 188 mutex_exit(&vswp->mca_lock); 189 190 } 191 192 RW_EXIT(&vswp->if_lockrw); 193 194 WRITE_ENTER(&plist->lockrw); 195 196 /* program unicast address of ports in the physical device */ 197 mutex_enter(&vswp->hw_lock); 198 for (port = plist->head; port != NULL; port = port->p_next) { 199 if (port->addr_set != VSW_ADDR_UNSET) /* addr already set */ 200 continue; 201 if (vsw_set_hw(vswp, port, VSW_VNETPORT)) { 202 cmn_err(CE_NOTE, 203 "!vsw%d: port:%d failed to set unicast address\n", 204 vswp->instance, port->p_instance); 205 } 206 } 207 mutex_exit(&vswp->hw_lock); 208 209 /* program multicast addresses of ports in the physdev */ 210 for (port = plist->head; port != NULL; port = port->p_next) { 211 mutex_enter(&port->mca_lock); 212 mutex_enter(&vswp->mac_lock); 213 for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) { 214 if (mcap->mac_added) 215 continue; 216 rv = mac_multicst_add(vswp->mh, (uchar_t *)&mcap->mca); 217 if (rv == 0) { 218 mcap->mac_added = B_TRUE; 219 } else { 220 cmn_err(CE_NOTE, "!vsw%d: unable to add " 221 "multicast address: %s\n", vswp->instance, 222 ether_sprintf((void *)&mcap->mca)); 223 } 224 } 225 mutex_exit(&vswp->mac_lock); 226 mutex_exit(&port->mca_lock); 227 } 228 229 RW_EXIT(&plist->lockrw); 230 } 231 232 /* 233 * Remove unicast and multicast addresses of vsw interface and the ports 234 * from the physical device. 235 */ 236 void 237 vsw_unset_addrs(vsw_t *vswp) 238 { 239 vsw_port_list_t *plist = &vswp->plist; 240 vsw_port_t *port; 241 mcst_addr_t *mcap; 242 243 READ_ENTER(&vswp->if_lockrw); 244 245 if (vswp->if_state & VSW_IF_UP) { 246 247 /* 248 * Remove unicast addr of vsw interfce 249 * from current physdev 250 */ 251 mutex_enter(&vswp->hw_lock); 252 (void) vsw_unset_hw(vswp, NULL, VSW_LOCALDEV); 253 mutex_exit(&vswp->hw_lock); 254 255 /* 256 * Remove mcast addrs of vsw interface 257 * from current physdev 258 */ 259 mutex_enter(&vswp->mca_lock); 260 mutex_enter(&vswp->mac_lock); 261 for (mcap = vswp->mcap; mcap != NULL; mcap = mcap->nextp) { 262 if (!mcap->mac_added) 263 continue; 264 (void) mac_multicst_remove(vswp->mh, 265 (uchar_t *)&mcap->mca); 266 mcap->mac_added = B_FALSE; 267 } 268 mutex_exit(&vswp->mac_lock); 269 mutex_exit(&vswp->mca_lock); 270 271 } 272 273 RW_EXIT(&vswp->if_lockrw); 274 275 WRITE_ENTER(&plist->lockrw); 276 277 /* 278 * Remove unicast address of ports from the current physical device 279 */ 280 mutex_enter(&vswp->hw_lock); 281 for (port = plist->head; port != NULL; port = port->p_next) { 282 /* Remove address if was programmed into HW. */ 283 if (port->addr_set == VSW_ADDR_UNSET) 284 continue; 285 (void) vsw_unset_hw(vswp, port, VSW_VNETPORT); 286 } 287 mutex_exit(&vswp->hw_lock); 288 289 /* Remove multicast addresses of ports from the current physdev */ 290 for (port = plist->head; port != NULL; port = port->p_next) { 291 mutex_enter(&port->mca_lock); 292 mutex_enter(&vswp->mac_lock); 293 for (mcap = port->mcap; mcap != NULL; mcap = mcap->nextp) { 294 if (!mcap->mac_added) 295 continue; 296 (void) mac_multicst_remove(vswp->mh, 297 (uchar_t *)&mcap->mca); 298 mcap->mac_added = B_FALSE; 299 } 300 mutex_exit(&vswp->mac_lock); 301 mutex_exit(&port->mca_lock); 302 } 303 304 RW_EXIT(&plist->lockrw); 305 } 306 307 /* 308 * Open the underlying physical device for access in layer2 mode. 309 * Returns: 310 * 0 on success 311 * EAGAIN if mac_open() fails due to the device being not available yet. 312 * EIO on any other failures. 313 */ 314 int 315 vsw_mac_open(vsw_t *vswp) 316 { 317 int rv; 318 319 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 320 321 if (vswp->mh != NULL) { 322 /* already open */ 323 return (0); 324 } 325 326 if (vswp->mac_open_retries++ >= vsw_mac_open_retries) { 327 /* exceeded max retries */ 328 return (EIO); 329 } 330 331 if ((rv = mac_open_by_linkname(vswp->physname, &vswp->mh)) != 0) { 332 /* 333 * If mac_open() failed and the error indicates that either 334 * the dlmgmtd door or the device is not available yet, we 335 * return EAGAIN to indicate that mac_open() needs to be 336 * retried. For example, this may happen during boot up, if 337 * the required link aggregation groups(devices) have not 338 * been created yet. 339 */ 340 if (rv == ENOENT || rv == EBADF) { 341 return (EAGAIN); 342 } else { 343 cmn_err(CE_WARN, "vsw%d: device (%s) open failed rv:%x", 344 vswp->instance, vswp->physname, rv); 345 return (EIO); 346 } 347 } 348 349 vswp->mac_open_retries = 0; 350 351 return (0); 352 } 353 354 /* 355 * Close the underlying physical device. 356 */ 357 void 358 vsw_mac_close(vsw_t *vswp) 359 { 360 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 361 362 if (vswp->mh != NULL) { 363 mac_close(vswp->mh); 364 vswp->mh = NULL; 365 } 366 } 367 368 /* 369 * Link into the MAC layer to gain access to the services provided by 370 * the underlying physical device driver (which should also have 371 * registered with the MAC layer). 372 * 373 * Only when in layer 2 mode. 374 */ 375 int 376 vsw_mac_attach(vsw_t *vswp) 377 { 378 D1(vswp, "%s: enter", __func__); 379 380 ASSERT(vswp->mrh == NULL); 381 ASSERT(vswp->mstarted == B_FALSE); 382 ASSERT(vswp->mresources == B_FALSE); 383 384 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 385 386 ASSERT(vswp->mh != NULL); 387 388 D2(vswp, "vsw_mac_attach: using device %s", vswp->physname); 389 390 if (vsw_multi_ring_enable) { 391 /* 392 * Initialize the ring table. 393 */ 394 vsw_mac_ring_tbl_init(vswp); 395 396 /* 397 * Register our rx callback function. 398 */ 399 vswp->mrh = mac_rx_add(vswp->mh, 400 vsw_rx_queue_cb, (void *)vswp); 401 ASSERT(vswp->mrh != NULL); 402 403 /* 404 * Register our mac resource callback. 405 */ 406 mac_resource_set(vswp->mh, vsw_mac_ring_add_cb, (void *)vswp); 407 vswp->mresources = B_TRUE; 408 409 /* 410 * Get the ring resources available to us from 411 * the mac below us. 412 */ 413 mac_resources(vswp->mh); 414 } else { 415 /* 416 * Just register our rx callback function 417 */ 418 vswp->mrh = mac_rx_add(vswp->mh, vsw_rx_cb, (void *)vswp); 419 ASSERT(vswp->mrh != NULL); 420 } 421 422 /* Get the MAC tx fn */ 423 vswp->txinfo = mac_tx_get(vswp->mh); 424 425 /* start the interface */ 426 if (mac_start(vswp->mh) != 0) { 427 cmn_err(CE_WARN, "!vsw%d: Could not start mac interface", 428 vswp->instance); 429 goto mac_fail_exit; 430 } 431 432 vswp->mstarted = B_TRUE; 433 434 D1(vswp, "%s: exit", __func__); 435 return (0); 436 437 mac_fail_exit: 438 vsw_mac_detach(vswp); 439 440 D1(vswp, "%s: exit", __func__); 441 return (1); 442 } 443 444 void 445 vsw_mac_detach(vsw_t *vswp) 446 { 447 D1(vswp, "vsw_mac_detach: enter"); 448 449 ASSERT(vswp != NULL); 450 ASSERT(MUTEX_HELD(&vswp->mac_lock)); 451 452 if (vsw_multi_ring_enable) { 453 vsw_mac_ring_tbl_destroy(vswp); 454 } 455 456 if (vswp->mh != NULL) { 457 if (vswp->mstarted) 458 mac_stop(vswp->mh); 459 if (vswp->mrh != NULL) 460 mac_rx_remove(vswp->mh, vswp->mrh, B_TRUE); 461 if (vswp->mresources) 462 mac_resource_set(vswp->mh, NULL, NULL); 463 } 464 465 vswp->mrh = NULL; 466 vswp->txinfo = NULL; 467 vswp->mstarted = B_FALSE; 468 469 D1(vswp, "vsw_mac_detach: exit"); 470 } 471 472 /* 473 * Depending on the mode specified, the capabilites and capacity 474 * of the underlying device setup the physical device. 475 * 476 * If in layer 3 mode, then do nothing. 477 * 478 * If in layer 2 programmed mode attempt to program the unicast address 479 * associated with the port into the physical device. If this is not 480 * possible due to resource exhaustion or simply because the device does 481 * not support multiple unicast addresses then if required fallback onto 482 * putting the card into promisc mode. 483 * 484 * If in promisc mode then simply set the card into promisc mode. 485 * 486 * Returns 0 success, 1 on failure. 487 */ 488 int 489 vsw_set_hw(vsw_t *vswp, vsw_port_t *port, int type) 490 { 491 mac_multi_addr_t mac_addr; 492 int err; 493 494 D1(vswp, "%s: enter", __func__); 495 496 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 497 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 498 499 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 500 return (0); 501 502 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC) { 503 return (vsw_set_hw_promisc(vswp, port, type)); 504 } 505 506 /* 507 * Attempt to program the unicast address into the HW. 508 */ 509 mac_addr.mma_addrlen = ETHERADDRL; 510 if (type == VSW_VNETPORT) { 511 ASSERT(port != NULL); 512 ether_copy(&port->p_macaddr, &mac_addr.mma_addr); 513 } else { 514 ether_copy(&vswp->if_addr, &mac_addr.mma_addr); 515 } 516 517 err = vsw_set_hw_addr(vswp, &mac_addr); 518 if (err == ENOSPC) { 519 /* 520 * Mark that attempt should be made to re-config sometime 521 * in future if a port is deleted. 522 */ 523 vswp->recfg_reqd = B_TRUE; 524 525 /* 526 * Only 1 mode specified, nothing more to do. 527 */ 528 if (vswp->smode_num == 1) 529 return (err); 530 531 /* 532 * If promiscuous was next mode specified try to 533 * set the card into that mode. 534 */ 535 if ((vswp->smode_idx <= (vswp->smode_num - 2)) && 536 (vswp->smode[vswp->smode_idx + 1] == 537 VSW_LAYER2_PROMISC)) { 538 vswp->smode_idx += 1; 539 return (vsw_set_hw_promisc(vswp, port, type)); 540 } 541 return (err); 542 } 543 544 if (err != 0) 545 return (err); 546 547 if (type == VSW_VNETPORT) { 548 port->addr_slot = mac_addr.mma_slot; 549 port->addr_set = VSW_ADDR_HW; 550 } else { 551 vswp->addr_slot = mac_addr.mma_slot; 552 vswp->addr_set = VSW_ADDR_HW; 553 } 554 555 D2(vswp, "programmed addr %s into slot %d " 556 "of device %s", ether_sprintf((void *)mac_addr.mma_addr), 557 mac_addr.mma_slot, vswp->physname); 558 559 D1(vswp, "%s: exit", __func__); 560 561 return (0); 562 } 563 564 /* 565 * If in layer 3 mode do nothing. 566 * 567 * If in layer 2 switched mode remove the address from the physical 568 * device. 569 * 570 * If in layer 2 promiscuous mode disable promisc mode. 571 * 572 * Returns 0 on success. 573 */ 574 int 575 vsw_unset_hw(vsw_t *vswp, vsw_port_t *port, int type) 576 { 577 mac_addr_slot_t slot; 578 int rv; 579 580 D1(vswp, "%s: enter", __func__); 581 582 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 583 584 if (vswp->smode[vswp->smode_idx] == VSW_LAYER3) 585 return (0); 586 587 switch (type) { 588 case VSW_VNETPORT: 589 ASSERT(port != NULL); 590 591 if (port->addr_set == VSW_ADDR_PROMISC) { 592 return (vsw_unset_hw_promisc(vswp, port, type)); 593 594 } else if (port->addr_set == VSW_ADDR_HW) { 595 slot = port->addr_slot; 596 if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0) 597 port->addr_set = VSW_ADDR_UNSET; 598 } 599 600 break; 601 602 case VSW_LOCALDEV: 603 if (vswp->addr_set == VSW_ADDR_PROMISC) { 604 return (vsw_unset_hw_promisc(vswp, NULL, type)); 605 606 } else if (vswp->addr_set == VSW_ADDR_HW) { 607 slot = vswp->addr_slot; 608 if ((rv = vsw_unset_hw_addr(vswp, slot)) == 0) 609 vswp->addr_set = VSW_ADDR_UNSET; 610 } 611 612 break; 613 614 default: 615 /* should never happen */ 616 DERR(vswp, "%s: unknown type %d", __func__, type); 617 ASSERT(0); 618 return (1); 619 } 620 621 D1(vswp, "%s: exit", __func__); 622 return (rv); 623 } 624 625 /* 626 * Attempt to program a unicast address into HW. 627 * 628 * Returns 0 on sucess, 1 on failure. 629 */ 630 static int 631 vsw_set_hw_addr(vsw_t *vswp, mac_multi_addr_t *mac) 632 { 633 void *mah; 634 int rv = EINVAL; 635 636 D1(vswp, "%s: enter", __func__); 637 638 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 639 640 if (vswp->maddr.maddr_handle == NULL) 641 return (rv); 642 643 mah = vswp->maddr.maddr_handle; 644 645 rv = vswp->maddr.maddr_add(mah, mac); 646 647 if (rv == 0) 648 return (rv); 649 650 /* 651 * Its okay for the add to fail because we have exhausted 652 * all the resouces in the hardware device. Any other error 653 * we want to flag. 654 */ 655 if (rv != ENOSPC) { 656 cmn_err(CE_NOTE, "!vsw%d: error programming " 657 "address %s into HW err (%d)", 658 vswp->instance, ether_sprintf((void *)mac->mma_addr), rv); 659 } 660 D1(vswp, "%s: exit", __func__); 661 return (rv); 662 } 663 664 /* 665 * Remove a unicast mac address which has previously been programmed 666 * into HW. 667 * 668 * Returns 0 on sucess, 1 on failure. 669 */ 670 static int 671 vsw_unset_hw_addr(vsw_t *vswp, int slot) 672 { 673 void *mah; 674 int rv; 675 676 D1(vswp, "%s: enter", __func__); 677 678 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 679 ASSERT(slot >= 0); 680 681 if (vswp->maddr.maddr_handle == NULL) 682 return (1); 683 684 mah = vswp->maddr.maddr_handle; 685 686 rv = vswp->maddr.maddr_remove(mah, slot); 687 if (rv != 0) { 688 DWARN(vswp, "%s: unable to remove address " 689 "from slot %d in device %s (err %d)", 690 __func__, slot, vswp->physname, rv); 691 return (1); 692 } 693 694 D2(vswp, "removed addr from slot %d in device %s", 695 slot, vswp->physname); 696 697 D1(vswp, "%s: exit", __func__); 698 return (0); 699 } 700 701 /* 702 * Set network card into promisc mode. 703 * 704 * Returns 0 on success, 1 on failure. 705 */ 706 static int 707 vsw_set_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type) 708 { 709 D1(vswp, "%s: enter", __func__); 710 711 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 712 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 713 714 mutex_enter(&vswp->mac_lock); 715 if (vswp->mh == NULL) { 716 mutex_exit(&vswp->mac_lock); 717 return (1); 718 } 719 720 if (vswp->promisc_cnt++ == 0) { 721 if (mac_promisc_set(vswp->mh, B_TRUE, MAC_DEVPROMISC) != 0) { 722 vswp->promisc_cnt--; 723 mutex_exit(&vswp->mac_lock); 724 return (1); 725 } 726 cmn_err(CE_NOTE, "!vsw%d: switching device %s into " 727 "promiscuous mode", vswp->instance, vswp->physname); 728 } 729 mutex_exit(&vswp->mac_lock); 730 731 if (type == VSW_VNETPORT) { 732 ASSERT(port != NULL); 733 port->addr_set = VSW_ADDR_PROMISC; 734 } else { 735 vswp->addr_set = VSW_ADDR_PROMISC; 736 } 737 738 D1(vswp, "%s: exit", __func__); 739 740 return (0); 741 } 742 743 /* 744 * Turn off promiscuous mode on network card. 745 * 746 * Returns 0 on success, 1 on failure. 747 */ 748 static int 749 vsw_unset_hw_promisc(vsw_t *vswp, vsw_port_t *port, int type) 750 { 751 vsw_port_list_t *plist = &vswp->plist; 752 753 D2(vswp, "%s: enter", __func__); 754 755 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 756 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 757 758 mutex_enter(&vswp->mac_lock); 759 if (vswp->mh == NULL) { 760 mutex_exit(&vswp->mac_lock); 761 return (1); 762 } 763 764 if (--vswp->promisc_cnt == 0) { 765 if (mac_promisc_set(vswp->mh, B_FALSE, MAC_DEVPROMISC) != 0) { 766 vswp->promisc_cnt++; 767 mutex_exit(&vswp->mac_lock); 768 return (1); 769 } 770 771 /* 772 * We are exiting promisc mode either because we were 773 * only in promisc mode because we had failed over from 774 * switched mode due to HW resource issues, or the user 775 * wanted the card in promisc mode for all the ports and 776 * the last port is now being deleted. Tweak the message 777 * accordingly. 778 */ 779 if (plist->num_ports != 0) { 780 cmn_err(CE_NOTE, "!vsw%d: switching device %s back to " 781 "programmed mode", vswp->instance, vswp->physname); 782 } else { 783 cmn_err(CE_NOTE, "!vsw%d: switching device %s out of " 784 "promiscuous mode", vswp->instance, vswp->physname); 785 } 786 } 787 mutex_exit(&vswp->mac_lock); 788 789 if (type == VSW_VNETPORT) { 790 ASSERT(port != NULL); 791 ASSERT(port->addr_set == VSW_ADDR_PROMISC); 792 port->addr_set = VSW_ADDR_UNSET; 793 } else { 794 ASSERT(vswp->addr_set == VSW_ADDR_PROMISC); 795 vswp->addr_set = VSW_ADDR_UNSET; 796 } 797 798 D1(vswp, "%s: exit", __func__); 799 return (0); 800 } 801 802 /* 803 * Determine whether or not we are operating in our prefered 804 * mode and if not whether the physical resources now allow us 805 * to operate in it. 806 * 807 * If a port is being removed should only be invoked after port has been 808 * removed from the port list. 809 */ 810 void 811 vsw_reconfig_hw(vsw_t *vswp) 812 { 813 int s_idx; 814 815 D1(vswp, "%s: enter", __func__); 816 817 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 818 819 if (vswp->maddr.maddr_handle == NULL) { 820 return; 821 } 822 823 /* 824 * If we are in layer 2 (i.e. switched) or would like to be 825 * in layer 2 then check if any ports or the vswitch itself 826 * need to be programmed into the HW. 827 * 828 * This can happen in two cases - switched was specified as 829 * the prefered mode of operation but we exhausted the HW 830 * resources and so failed over to the next specifed mode, 831 * or switched was the only mode specified so after HW 832 * resources were exhausted there was nothing more we 833 * could do. 834 */ 835 if (vswp->smode_idx > 0) 836 s_idx = vswp->smode_idx - 1; 837 else 838 s_idx = vswp->smode_idx; 839 840 if (vswp->smode[s_idx] != VSW_LAYER2) { 841 return; 842 } 843 844 D2(vswp, "%s: attempting reconfig..", __func__); 845 846 /* 847 * First, attempt to set the vswitch mac address into HW, 848 * if required. 849 */ 850 if (vsw_prog_if(vswp)) { 851 return; 852 } 853 854 /* 855 * Next, attempt to set any ports which have not yet been 856 * programmed into HW. 857 */ 858 if (vsw_prog_ports(vswp)) { 859 return; 860 } 861 862 /* 863 * By now we know that have programmed all desired ports etc 864 * into HW, so safe to mark reconfiguration as complete. 865 */ 866 vswp->recfg_reqd = B_FALSE; 867 868 vswp->smode_idx = s_idx; 869 870 D1(vswp, "%s: exit", __func__); 871 } 872 873 /* 874 * Check to see if vsw itself is plumbed, and if so whether or not 875 * its mac address should be written into HW. 876 * 877 * Returns 0 if could set address, or didn't have to set it. 878 * Returns 1 if failed to set address. 879 */ 880 static int 881 vsw_prog_if(vsw_t *vswp) 882 { 883 mac_multi_addr_t addr; 884 885 D1(vswp, "%s: enter", __func__); 886 887 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 888 889 READ_ENTER(&vswp->if_lockrw); 890 if ((vswp->if_state & VSW_IF_UP) && 891 (vswp->addr_set != VSW_ADDR_HW)) { 892 893 addr.mma_addrlen = ETHERADDRL; 894 ether_copy(&vswp->if_addr, &addr.mma_addr); 895 896 if (vsw_set_hw_addr(vswp, &addr) != 0) { 897 RW_EXIT(&vswp->if_lockrw); 898 return (1); 899 } 900 901 vswp->addr_slot = addr.mma_slot; 902 903 /* 904 * If previously when plumbed had had to place 905 * interface into promisc mode, now reverse that. 906 * 907 * Note that interface will only actually be set into 908 * non-promisc mode when last port/interface has been 909 * programmed into HW. 910 */ 911 if (vswp->addr_set == VSW_ADDR_PROMISC) 912 (void) vsw_unset_hw_promisc(vswp, NULL, VSW_LOCALDEV); 913 914 vswp->addr_set = VSW_ADDR_HW; 915 } 916 RW_EXIT(&vswp->if_lockrw); 917 918 D1(vswp, "%s: exit", __func__); 919 return (0); 920 } 921 922 /* 923 * Scan the port list for any ports which have not yet been set 924 * into HW. For those found attempt to program their mac addresses 925 * into the physical device. 926 * 927 * Returns 0 if able to program all required ports (can be 0) into HW. 928 * Returns 1 if failed to set at least one mac address. 929 */ 930 static int 931 vsw_prog_ports(vsw_t *vswp) 932 { 933 mac_multi_addr_t addr; 934 vsw_port_list_t *plist = &vswp->plist; 935 vsw_port_t *tp; 936 int rv = 0; 937 938 D1(vswp, "%s: enter", __func__); 939 940 ASSERT(MUTEX_HELD(&vswp->hw_lock)); 941 942 READ_ENTER(&plist->lockrw); 943 for (tp = plist->head; tp != NULL; tp = tp->p_next) { 944 if (tp->addr_set != VSW_ADDR_HW) { 945 addr.mma_addrlen = ETHERADDRL; 946 ether_copy(&tp->p_macaddr, &addr.mma_addr); 947 948 if (vsw_set_hw_addr(vswp, &addr) != 0) { 949 rv = 1; 950 break; 951 } 952 953 tp->addr_slot = addr.mma_slot; 954 955 /* 956 * If when this port had first attached we had 957 * had to place the interface into promisc mode, 958 * then now reverse that. 959 * 960 * Note that the interface will not actually 961 * change to non-promisc mode until all ports 962 * have been programmed. 963 */ 964 if (tp->addr_set == VSW_ADDR_PROMISC) 965 (void) vsw_unset_hw_promisc(vswp, 966 tp, VSW_VNETPORT); 967 968 tp->addr_set = VSW_ADDR_HW; 969 } 970 } 971 RW_EXIT(&plist->lockrw); 972 973 D1(vswp, "%s: exit", __func__); 974 return (rv); 975 } 976 977 static void 978 vsw_mac_ring_tbl_entry_init(vsw_t *vswp, vsw_mac_ring_t *ringp) 979 { 980 ringp->ring_state = VSW_MAC_RING_FREE; 981 ringp->ring_arg = NULL; 982 ringp->ring_blank = NULL; 983 ringp->ring_vqp = NULL; 984 ringp->ring_vswp = vswp; 985 } 986 987 static void 988 vsw_mac_ring_tbl_init(vsw_t *vswp) 989 { 990 int i; 991 992 mutex_init(&vswp->mac_ring_lock, NULL, MUTEX_DRIVER, NULL); 993 994 vswp->mac_ring_tbl_sz = vsw_mac_rx_rings; 995 vswp->mac_ring_tbl = 996 kmem_alloc(vsw_mac_rx_rings * sizeof (vsw_mac_ring_t), KM_SLEEP); 997 998 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) 999 vsw_mac_ring_tbl_entry_init(vswp, &vswp->mac_ring_tbl[i]); 1000 } 1001 1002 static void 1003 vsw_mac_ring_tbl_destroy(vsw_t *vswp) 1004 { 1005 int i; 1006 vsw_mac_ring_t *ringp; 1007 1008 mutex_enter(&vswp->mac_ring_lock); 1009 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1010 ringp = &vswp->mac_ring_tbl[i]; 1011 1012 if (ringp->ring_state != VSW_MAC_RING_FREE) { 1013 /* 1014 * Destroy the queue. 1015 */ 1016 vsw_queue_stop(ringp->ring_vqp); 1017 vsw_queue_destroy(ringp->ring_vqp); 1018 1019 /* 1020 * Re-initialize the structure. 1021 */ 1022 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1023 } 1024 } 1025 mutex_exit(&vswp->mac_ring_lock); 1026 1027 mutex_destroy(&vswp->mac_ring_lock); 1028 kmem_free(vswp->mac_ring_tbl, 1029 vswp->mac_ring_tbl_sz * sizeof (vsw_mac_ring_t)); 1030 vswp->mac_ring_tbl_sz = 0; 1031 } 1032 1033 /* 1034 * Handle resource add callbacks from the driver below. 1035 */ 1036 static mac_resource_handle_t 1037 vsw_mac_ring_add_cb(void *arg, mac_resource_t *mrp) 1038 { 1039 vsw_t *vswp = (vsw_t *)arg; 1040 mac_rx_fifo_t *mrfp = (mac_rx_fifo_t *)mrp; 1041 vsw_mac_ring_t *ringp; 1042 vsw_queue_t *vqp; 1043 int i; 1044 1045 ASSERT(vswp != NULL); 1046 ASSERT(mrp != NULL); 1047 ASSERT(vswp->mac_ring_tbl != NULL); 1048 1049 D1(vswp, "%s: enter", __func__); 1050 1051 /* 1052 * Check to make sure we have the correct resource type. 1053 */ 1054 if (mrp->mr_type != MAC_RX_FIFO) 1055 return (NULL); 1056 1057 /* 1058 * Find a open entry in the ring table. 1059 */ 1060 mutex_enter(&vswp->mac_ring_lock); 1061 for (i = 0; i < vswp->mac_ring_tbl_sz; i++) { 1062 ringp = &vswp->mac_ring_tbl[i]; 1063 1064 /* 1065 * Check for an empty slot, if found, then setup queue 1066 * and thread. 1067 */ 1068 if (ringp->ring_state == VSW_MAC_RING_FREE) { 1069 /* 1070 * Create the queue for this ring. 1071 */ 1072 vqp = vsw_queue_create(); 1073 1074 /* 1075 * Initialize the ring data structure. 1076 */ 1077 ringp->ring_vqp = vqp; 1078 ringp->ring_arg = mrfp->mrf_arg; 1079 ringp->ring_blank = mrfp->mrf_blank; 1080 ringp->ring_state = VSW_MAC_RING_INUSE; 1081 1082 /* 1083 * Create the worker thread. 1084 */ 1085 vqp->vq_worker = thread_create(NULL, 0, 1086 vsw_queue_worker, ringp, 0, &p0, 1087 TS_RUN, minclsyspri); 1088 if (vqp->vq_worker == NULL) { 1089 vsw_queue_destroy(vqp); 1090 vsw_mac_ring_tbl_entry_init(vswp, ringp); 1091 ringp = NULL; 1092 } 1093 1094 if (ringp != NULL) { 1095 /* 1096 * Make sure thread get's running state for 1097 * this ring. 1098 */ 1099 mutex_enter(&vqp->vq_lock); 1100 while ((vqp->vq_state != VSW_QUEUE_RUNNING) && 1101 (vqp->vq_state != VSW_QUEUE_DRAINED)) { 1102 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1103 } 1104 1105 /* 1106 * If the thread is not running, cleanup. 1107 */ 1108 if (vqp->vq_state == VSW_QUEUE_DRAINED) { 1109 vsw_queue_destroy(vqp); 1110 vsw_mac_ring_tbl_entry_init(vswp, 1111 ringp); 1112 ringp = NULL; 1113 } 1114 mutex_exit(&vqp->vq_lock); 1115 } 1116 1117 mutex_exit(&vswp->mac_ring_lock); 1118 D1(vswp, "%s: exit", __func__); 1119 return ((mac_resource_handle_t)ringp); 1120 } 1121 } 1122 mutex_exit(&vswp->mac_ring_lock); 1123 1124 /* 1125 * No slots in the ring table available. 1126 */ 1127 D1(vswp, "%s: exit", __func__); 1128 return (NULL); 1129 } 1130 1131 static void 1132 vsw_queue_stop(vsw_queue_t *vqp) 1133 { 1134 mutex_enter(&vqp->vq_lock); 1135 1136 if (vqp->vq_state == VSW_QUEUE_RUNNING) { 1137 vqp->vq_state = VSW_QUEUE_STOP; 1138 cv_signal(&vqp->vq_cv); 1139 1140 while (vqp->vq_state != VSW_QUEUE_DRAINED) 1141 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1142 } 1143 1144 vqp->vq_state = VSW_QUEUE_STOPPED; 1145 1146 mutex_exit(&vqp->vq_lock); 1147 } 1148 1149 static vsw_queue_t * 1150 vsw_queue_create() 1151 { 1152 vsw_queue_t *vqp; 1153 1154 vqp = kmem_zalloc(sizeof (vsw_queue_t), KM_SLEEP); 1155 1156 mutex_init(&vqp->vq_lock, NULL, MUTEX_DRIVER, NULL); 1157 cv_init(&vqp->vq_cv, NULL, CV_DRIVER, NULL); 1158 vqp->vq_first = NULL; 1159 vqp->vq_last = NULL; 1160 vqp->vq_state = VSW_QUEUE_STOPPED; 1161 1162 return (vqp); 1163 } 1164 1165 static void 1166 vsw_queue_destroy(vsw_queue_t *vqp) 1167 { 1168 cv_destroy(&vqp->vq_cv); 1169 mutex_destroy(&vqp->vq_lock); 1170 kmem_free(vqp, sizeof (vsw_queue_t)); 1171 } 1172 1173 static void 1174 vsw_queue_worker(vsw_mac_ring_t *rrp) 1175 { 1176 mblk_t *mp; 1177 vsw_queue_t *vqp = rrp->ring_vqp; 1178 vsw_t *vswp = rrp->ring_vswp; 1179 1180 mutex_enter(&vqp->vq_lock); 1181 1182 ASSERT(vqp->vq_state == VSW_QUEUE_STOPPED); 1183 1184 /* 1185 * Set the state to running, since the thread is now active. 1186 */ 1187 vqp->vq_state = VSW_QUEUE_RUNNING; 1188 cv_signal(&vqp->vq_cv); 1189 1190 while (vqp->vq_state == VSW_QUEUE_RUNNING) { 1191 /* 1192 * Wait for work to do or the state has changed 1193 * to not running. 1194 */ 1195 while ((vqp->vq_state == VSW_QUEUE_RUNNING) && 1196 (vqp->vq_first == NULL)) { 1197 cv_wait(&vqp->vq_cv, &vqp->vq_lock); 1198 } 1199 1200 /* 1201 * Process packets that we received from the interface. 1202 */ 1203 if (vqp->vq_first != NULL) { 1204 mp = vqp->vq_first; 1205 1206 vqp->vq_first = NULL; 1207 vqp->vq_last = NULL; 1208 1209 mutex_exit(&vqp->vq_lock); 1210 1211 /* switch the chain of packets received */ 1212 vswp->vsw_switch_frame(vswp, mp, 1213 VSW_PHYSDEV, NULL, NULL); 1214 1215 mutex_enter(&vqp->vq_lock); 1216 } 1217 } 1218 1219 /* 1220 * We are drained and signal we are done. 1221 */ 1222 vqp->vq_state = VSW_QUEUE_DRAINED; 1223 cv_signal(&vqp->vq_cv); 1224 1225 /* 1226 * Exit lock and drain the remaining packets. 1227 */ 1228 mutex_exit(&vqp->vq_lock); 1229 1230 /* 1231 * Exit the thread 1232 */ 1233 thread_exit(); 1234 } 1235 1236 /* 1237 * static void 1238 * vsw_rx_queue_cb() - Receive callback routine when 1239 * vsw_multi_ring_enable is non-zero. Queue the packets 1240 * to a packet queue for a worker thread to process. 1241 */ 1242 static void 1243 vsw_rx_queue_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1244 { 1245 vsw_mac_ring_t *ringp = (vsw_mac_ring_t *)mrh; 1246 vsw_t *vswp = (vsw_t *)arg; 1247 vsw_queue_t *vqp; 1248 mblk_t *bp, *last; 1249 1250 ASSERT(mrh != NULL); 1251 ASSERT(vswp != NULL); 1252 ASSERT(mp != NULL); 1253 1254 D1(vswp, "%s: enter", __func__); 1255 1256 /* 1257 * Find the last element in the mblk chain. 1258 */ 1259 bp = mp; 1260 do { 1261 last = bp; 1262 bp = bp->b_next; 1263 } while (bp != NULL); 1264 1265 /* Get the queue for the packets */ 1266 vqp = ringp->ring_vqp; 1267 1268 /* 1269 * Grab the lock such we can queue the packets. 1270 */ 1271 mutex_enter(&vqp->vq_lock); 1272 1273 if (vqp->vq_state != VSW_QUEUE_RUNNING) { 1274 freemsgchain(mp); 1275 mutex_exit(&vqp->vq_lock); 1276 goto vsw_rx_queue_cb_exit; 1277 } 1278 1279 /* 1280 * Add the mblk chain to the queue. If there 1281 * is some mblks in the queue, then add the new 1282 * chain to the end. 1283 */ 1284 if (vqp->vq_first == NULL) 1285 vqp->vq_first = mp; 1286 else 1287 vqp->vq_last->b_next = mp; 1288 1289 vqp->vq_last = last; 1290 1291 /* 1292 * Signal the worker thread that there is work to 1293 * do. 1294 */ 1295 cv_signal(&vqp->vq_cv); 1296 1297 /* 1298 * Let go of the lock and exit. 1299 */ 1300 mutex_exit(&vqp->vq_lock); 1301 1302 vsw_rx_queue_cb_exit: 1303 D1(vswp, "%s: exit", __func__); 1304 } 1305 1306 /* 1307 * receive callback routine. Invoked by MAC layer when there 1308 * are pkts being passed up from physical device. 1309 * 1310 * PERF: It may be more efficient when the card is in promisc 1311 * mode to check the dest address of the pkts here (against 1312 * the FDB) rather than checking later. Needs to be investigated. 1313 */ 1314 static void 1315 vsw_rx_cb(void *arg, mac_resource_handle_t mrh, mblk_t *mp) 1316 { 1317 _NOTE(ARGUNUSED(mrh)) 1318 1319 vsw_t *vswp = (vsw_t *)arg; 1320 1321 ASSERT(vswp != NULL); 1322 1323 D1(vswp, "vsw_rx_cb: enter"); 1324 1325 /* switch the chain of packets received */ 1326 vswp->vsw_switch_frame(vswp, mp, VSW_PHYSDEV, NULL, NULL); 1327 1328 D1(vswp, "vsw_rx_cb: exit"); 1329 } 1330 1331 /* 1332 * Send a message out over the physical device via the MAC layer. 1333 * 1334 * Returns any mblks that it was unable to transmit. 1335 */ 1336 mblk_t * 1337 vsw_tx_msg(vsw_t *vswp, mblk_t *mp) 1338 { 1339 const mac_txinfo_t *mtp; 1340 1341 mutex_enter(&vswp->mac_lock); 1342 if ((vswp->mh == NULL) || (vswp->mstarted == B_FALSE)) { 1343 1344 DERR(vswp, "vsw_tx_msg: dropping pkts: no tx routine avail"); 1345 mutex_exit(&vswp->mac_lock); 1346 return (mp); 1347 } else { 1348 mtp = vswp->txinfo; 1349 mp = mtp->mt_fn(mtp->mt_arg, mp); 1350 } 1351 mutex_exit(&vswp->mac_lock); 1352 1353 return (mp); 1354 } 1355