1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/vlan.h> 72 73 /* Switching setup routines */ 74 void vsw_setup_switching_thread(void *arg); 75 int vsw_setup_switching_start(vsw_t *vswp); 76 void vsw_setup_switching_stop(vsw_t *vswp); 77 int vsw_setup_switching(vsw_t *); 78 void vsw_setup_switching_post_process(vsw_t *vswp); 79 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 80 vsw_port_t *port, mac_resource_handle_t mrh); 81 static int vsw_setup_layer2(vsw_t *); 82 static int vsw_setup_layer3(vsw_t *); 83 84 /* Switching/data transmit routines */ 85 static void vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller, 86 vsw_port_t *port, mac_resource_handle_t); 87 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 88 vsw_port_t *port, mac_resource_handle_t); 89 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 90 vsw_port_t *port, mac_resource_handle_t); 91 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 92 int caller, vsw_port_t *port); 93 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 94 int caller, vsw_port_t *port); 95 96 /* VLAN routines */ 97 void vsw_create_vlans(void *arg, int type); 98 void vsw_destroy_vlans(void *arg, int type); 99 void vsw_vlan_add_ids(void *arg, int type); 100 void vsw_vlan_remove_ids(void *arg, int type); 101 static void vsw_vlan_create_hash(void *arg, int type); 102 static void vsw_vlan_destroy_hash(void *arg, int type); 103 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 104 uint16_t *vidp); 105 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 106 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt); 107 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 108 109 /* Forwarding database (FDB) routines */ 110 void vsw_fdbe_add(vsw_t *vswp, void *port); 111 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 112 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *); 113 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 114 115 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 116 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 117 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 118 void vsw_del_mcst_vsw(vsw_t *); 119 120 /* Support functions */ 121 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 122 static mblk_t *vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp); 123 124 125 /* 126 * Functions imported from other files. 127 */ 128 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *, int, vsw_port_t *); 129 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 130 extern int vsw_mac_open(vsw_t *vswp); 131 extern void vsw_mac_close(vsw_t *vswp); 132 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 133 mblk_t *mp, vsw_macrx_flags_t flags); 134 extern void vsw_set_addrs(vsw_t *vswp); 135 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp); 136 extern void vsw_hio_init(vsw_t *vswp); 137 extern void vsw_hio_start_ports(vsw_t *vswp); 138 extern int vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port, 139 mcst_addr_t *mcst_p, int type); 140 extern void vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port, 141 mcst_addr_t *mcst_p, int type); 142 extern void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state); 143 extern void vsw_physlink_update_ports(vsw_t *vswp); 144 145 /* 146 * Tunables used in this file. 147 */ 148 extern int vsw_setup_switching_delay; 149 extern uint32_t vsw_vlan_nchains; 150 extern uint32_t vsw_fdbe_refcnt_delay; 151 152 #define VSW_FDBE_REFHOLD(p) \ 153 { \ 154 atomic_inc_32(&(p)->refcnt); \ 155 ASSERT((p)->refcnt != 0); \ 156 } 157 158 #define VSW_FDBE_REFRELE(p) \ 159 { \ 160 ASSERT((p)->refcnt != 0); \ 161 atomic_dec_32(&(p)->refcnt); \ 162 } 163 164 /* 165 * Thread to setup switching mode. This thread is created during vsw_attach() 166 * initially. It invokes vsw_setup_switching() and keeps retrying while the 167 * returned value is EAGAIN. The thread exits when the switching mode setup is 168 * done successfully or when the error returned is not EAGAIN. This thread may 169 * also get created from vsw_update_md_prop() if the switching mode needs to be 170 * updated. 171 */ 172 void 173 vsw_setup_switching_thread(void *arg) 174 { 175 callb_cpr_t cprinfo; 176 vsw_t *vswp = (vsw_t *)arg; 177 clock_t wait_time; 178 clock_t xwait; 179 clock_t wait_rv; 180 int rv; 181 182 /* wait time used on successive retries */ 183 xwait = drv_usectohz(vsw_setup_switching_delay * MICROSEC); 184 185 CALLB_CPR_INIT(&cprinfo, &vswp->sw_thr_lock, callb_generic_cpr, 186 "vsw_setup_sw_thread"); 187 188 mutex_enter(&vswp->sw_thr_lock); 189 190 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) { 191 192 CALLB_CPR_SAFE_BEGIN(&cprinfo); 193 194 /* Wait for sometime before (re)trying setup_switching() */ 195 wait_time = ddi_get_lbolt() + xwait; 196 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) { 197 wait_rv = cv_timedwait(&vswp->sw_thr_cv, 198 &vswp->sw_thr_lock, wait_time); 199 if (wait_rv == -1) { /* timed out */ 200 break; 201 } 202 } 203 204 CALLB_CPR_SAFE_END(&cprinfo, &vswp->sw_thr_lock) 205 206 if ((vswp->sw_thr_flags & VSW_SWTHR_STOP) != 0) { 207 /* 208 * If there is a stop request, process that first and 209 * exit the loop. Continue to hold the mutex which gets 210 * released in CALLB_CPR_EXIT(). 211 */ 212 break; 213 } 214 215 mutex_exit(&vswp->sw_thr_lock); 216 rv = vsw_setup_switching(vswp); 217 if (rv == 0) { 218 vsw_setup_switching_post_process(vswp); 219 } 220 mutex_enter(&vswp->sw_thr_lock); 221 if (rv != EAGAIN) { 222 break; 223 } 224 225 } 226 227 vswp->sw_thr_flags &= ~VSW_SWTHR_STOP; 228 vswp->sw_thread = NULL; 229 CALLB_CPR_EXIT(&cprinfo); 230 thread_exit(); 231 } 232 233 /* 234 * Create a thread to setup the switching mode. 235 * Returns 0 on success; 1 on failure. 236 */ 237 int 238 vsw_setup_switching_start(vsw_t *vswp) 239 { 240 mutex_enter(&vswp->sw_thr_lock); 241 242 vswp->sw_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 243 vsw_setup_switching_thread, vswp, 0, &p0, TS_RUN, minclsyspri); 244 245 if (vswp->sw_thread == NULL) { 246 mutex_exit(&vswp->sw_thr_lock); 247 return (1); 248 } 249 250 mutex_exit(&vswp->sw_thr_lock); 251 return (0); 252 } 253 254 /* 255 * Stop the thread to setup switching mode. 256 */ 257 void 258 vsw_setup_switching_stop(vsw_t *vswp) 259 { 260 kt_did_t tid = 0; 261 262 /* 263 * Signal the setup_switching thread to stop and wait until it stops. 264 */ 265 mutex_enter(&vswp->sw_thr_lock); 266 267 if (vswp->sw_thread != NULL) { 268 tid = vswp->sw_thread->t_did; 269 vswp->sw_thr_flags |= VSW_SWTHR_STOP; 270 cv_signal(&vswp->sw_thr_cv); 271 } 272 273 mutex_exit(&vswp->sw_thr_lock); 274 275 if (tid != 0) 276 thread_join(tid); 277 278 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 279 280 vswp->mac_open_retries = 0; 281 } 282 283 /* 284 * Setup the required switching mode. 285 * Returns: 286 * 0 on success. 287 * EAGAIN if retry is needed. 288 * 1 on all other failures. 289 */ 290 int 291 vsw_setup_switching(vsw_t *vswp) 292 { 293 int rv = 1; 294 295 D1(vswp, "%s: enter", __func__); 296 297 /* 298 * Select best switching mode. 299 * This is done as this routine can be called from the timeout 300 * handler to retry setting up a specific mode. Currently only 301 * the function which sets up layer2/promisc mode returns EAGAIN 302 * if the underlying network device is not available yet, causing 303 * retries. 304 */ 305 if (vswp->smode & VSW_LAYER2) { 306 rv = vsw_setup_layer2(vswp); 307 } else if (vswp->smode & VSW_LAYER3) { 308 rv = vsw_setup_layer3(vswp); 309 } else { 310 DERR(vswp, "unknown switch mode"); 311 rv = 1; 312 } 313 314 if (rv && (rv != EAGAIN)) { 315 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 316 "switching mode", vswp->instance); 317 } else if (rv == 0) { 318 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 319 } 320 321 D2(vswp, "%s: Operating in mode %d", __func__, 322 vswp->smode); 323 324 D1(vswp, "%s: exit", __func__); 325 326 return (rv); 327 } 328 329 /* 330 * Setup for layer 2 switching. 331 * 332 * Returns: 333 * 0 on success. 334 * EAGAIN if retry is needed. 335 * EIO on all other failures. 336 */ 337 static int 338 vsw_setup_layer2(vsw_t *vswp) 339 { 340 int rv; 341 342 D1(vswp, "%s: enter", __func__); 343 344 /* 345 * Until the network device is successfully opened, 346 * set the switching to use vsw_switch_l2_frame. 347 */ 348 vswp->vsw_switch_frame = vsw_switch_l2_frame; 349 vswp->mac_cl_switching = B_FALSE; 350 351 rv = strlen(vswp->physname); 352 if (rv == 0) { 353 /* 354 * Physical device name is NULL, which is 355 * required for layer 2. 356 */ 357 cmn_err(CE_WARN, "!vsw%d: no network device name specified", 358 vswp->instance); 359 return (EIO); 360 } 361 362 mutex_enter(&vswp->mac_lock); 363 364 rv = vsw_mac_open(vswp); 365 if (rv != 0) { 366 if (rv != EAGAIN) { 367 cmn_err(CE_WARN, "!vsw%d: Unable to open network " 368 "device: %s\n", vswp->instance, vswp->physname); 369 } 370 mutex_exit(&vswp->mac_lock); 371 return (rv); 372 } 373 374 /* 375 * Now we can use the mac client switching, so set the switching 376 * function to use vsw_switch_l2_frame_mac_client(), which simply 377 * sends the packets to MAC layer for switching. 378 */ 379 vswp->vsw_switch_frame = vsw_switch_l2_frame_mac_client; 380 vswp->mac_cl_switching = B_TRUE; 381 382 D1(vswp, "%s: exit", __func__); 383 384 /* Initialize HybridIO related stuff */ 385 vsw_hio_init(vswp); 386 387 mutex_exit(&vswp->mac_lock); 388 return (0); 389 390 exit_error: 391 vsw_mac_close(vswp); 392 mutex_exit(&vswp->mac_lock); 393 return (EIO); 394 } 395 396 static int 397 vsw_setup_layer3(vsw_t *vswp) 398 { 399 D1(vswp, "%s: enter", __func__); 400 401 D2(vswp, "%s: operating in layer 3 mode", __func__); 402 vswp->vsw_switch_frame = vsw_switch_l3_frame; 403 404 D1(vswp, "%s: exit", __func__); 405 406 return (0); 407 } 408 409 /* ARGSUSED */ 410 void 411 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port, 412 mac_resource_handle_t mrh) 413 { 414 freemsgchain(mp); 415 } 416 417 /* 418 * Use mac client for layer 2 switching . 419 */ 420 static void 421 vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller, 422 vsw_port_t *port, mac_resource_handle_t mrh) 423 { 424 _NOTE(ARGUNUSED(mrh)) 425 426 mblk_t *ret_m; 427 428 /* 429 * This switching function is expected to be called by 430 * the ports or the interface only. The packets from 431 * physical interface already switched. 432 */ 433 ASSERT((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV)); 434 435 if ((ret_m = vsw_tx_msg(vswp, mp, caller, port)) != NULL) { 436 DERR(vswp, "%s: drop mblks to " 437 "phys dev", __func__); 438 freemsgchain(ret_m); 439 } 440 } 441 442 /* 443 * Switch the given ethernet frame when operating in layer 2 mode. 444 * 445 * vswp: pointer to the vsw instance 446 * mp: pointer to chain of ethernet frame(s) to be switched 447 * caller: identifies the source of this frame as: 448 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 449 * 2. VSW_PHYSDEV - the physical ethernet device 450 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 451 * arg: argument provided by the caller. 452 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 453 * 2. for PHYSDEV - NULL 454 * 3. for LOCALDEV - pointer to to this vsw_t(self) 455 */ 456 void 457 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 458 vsw_port_t *arg, mac_resource_handle_t mrh) 459 { 460 struct ether_header *ehp; 461 mblk_t *bp, *ret_m; 462 vsw_fdbe_t *fp; 463 464 D1(vswp, "%s: enter (caller %d)", __func__, caller); 465 466 /* 467 * PERF: rather than breaking up the chain here, scan it 468 * to find all mblks heading to same destination and then 469 * pass that sub-chain to the lower transmit functions. 470 */ 471 472 /* process the chain of packets */ 473 bp = mp; 474 while (bp) { 475 ehp = (struct ether_header *)bp->b_rptr; 476 mp = vsw_get_same_dest_list(ehp, &bp); 477 ASSERT(mp != NULL); 478 479 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 480 __func__, MBLKSIZE(mp), MBLKL(mp)); 481 482 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 483 /* 484 * If destination is VSW_LOCALDEV (vsw as an eth 485 * interface) and if the device is up & running, 486 * send the packet up the stack on this host. 487 * If the virtual interface is down, drop the packet. 488 */ 489 if (caller != VSW_LOCALDEV) { 490 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 491 } else { 492 freemsgchain(mp); 493 } 494 continue; 495 } 496 497 /* 498 * Find fdb entry for the destination 499 * and hold a reference to it. 500 */ 501 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 502 if (fp != NULL) { 503 504 /* 505 * If plumbed and in promisc mode then copy msg 506 * and send up the stack. 507 */ 508 vsw_mac_rx(vswp, mrh, mp, 509 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 510 511 /* 512 * If the destination is in FDB, the packet 513 * should be forwarded to the correponding 514 * vsw_port (connected to a vnet device - 515 * VSW_VNETPORT) 516 */ 517 (void) vsw_portsend(fp->portp, mp); 518 519 /* Release the reference on the fdb entry */ 520 VSW_FDBE_REFRELE(fp); 521 } else { 522 /* 523 * Destination not in FDB. 524 * 525 * If the destination is broadcast or 526 * multicast forward the packet to all 527 * (VNETPORTs, PHYSDEV, LOCALDEV), 528 * except the caller. 529 */ 530 if (IS_BROADCAST(ehp)) { 531 D2(vswp, "%s: BROADCAST pkt", __func__); 532 (void) vsw_forward_all(vswp, mp, caller, arg); 533 } else if (IS_MULTICAST(ehp)) { 534 D2(vswp, "%s: MULTICAST pkt", __func__); 535 (void) vsw_forward_grp(vswp, mp, caller, arg); 536 } else { 537 /* 538 * If the destination is unicast, and came 539 * from either a logical network device or 540 * the switch itself when it is plumbed, then 541 * send it out on the physical device and also 542 * up the stack if the logical interface is 543 * in promiscious mode. 544 * 545 * NOTE: The assumption here is that if we 546 * cannot find the destination in our fdb, its 547 * a unicast address, and came from either a 548 * vnet or down the stack (when plumbed) it 549 * must be destinded for an ethernet device 550 * outside our ldoms. 551 */ 552 if (caller == VSW_VNETPORT) { 553 /* promisc check copy etc */ 554 vsw_mac_rx(vswp, mrh, mp, 555 VSW_MACRX_PROMISC | 556 VSW_MACRX_COPYMSG); 557 558 if ((ret_m = vsw_tx_msg(vswp, mp, 559 caller, arg)) != NULL) { 560 DERR(vswp, "%s: drop mblks to " 561 "phys dev", __func__); 562 freemsgchain(ret_m); 563 } 564 565 } else if (caller == VSW_PHYSDEV) { 566 /* 567 * Pkt seen because card in promisc 568 * mode. Send up stack if plumbed in 569 * promisc mode, else drop it. 570 */ 571 vsw_mac_rx(vswp, mrh, mp, 572 VSW_MACRX_PROMISC | 573 VSW_MACRX_FREEMSG); 574 575 } else if (caller == VSW_LOCALDEV) { 576 /* 577 * Pkt came down the stack, send out 578 * over physical device. 579 */ 580 if ((ret_m = vsw_tx_msg(vswp, mp, 581 caller, NULL)) != NULL) { 582 DERR(vswp, "%s: drop mblks to " 583 "phys dev", __func__); 584 freemsgchain(ret_m); 585 } 586 } 587 } 588 } 589 } 590 D1(vswp, "%s: exit\n", __func__); 591 } 592 593 /* 594 * Switch ethernet frame when in layer 3 mode (i.e. using IP 595 * layer to do the routing). 596 * 597 * There is a large amount of overlap between this function and 598 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 599 * both these functions. 600 */ 601 void 602 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 603 vsw_port_t *arg, mac_resource_handle_t mrh) 604 { 605 struct ether_header *ehp; 606 mblk_t *bp = NULL; 607 vsw_fdbe_t *fp; 608 609 D1(vswp, "%s: enter (caller %d)", __func__, caller); 610 611 /* 612 * In layer 3 mode should only ever be switching packets 613 * between IP layer and vnet devices. So make sure thats 614 * who is invoking us. 615 */ 616 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 617 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 618 freemsgchain(mp); 619 return; 620 } 621 622 /* process the chain of packets */ 623 bp = mp; 624 while (bp) { 625 ehp = (struct ether_header *)bp->b_rptr; 626 mp = vsw_get_same_dest_list(ehp, &bp); 627 ASSERT(mp != NULL); 628 629 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 630 __func__, MBLKSIZE(mp), MBLKL(mp)); 631 632 /* 633 * Find fdb entry for the destination 634 * and hold a reference to it. 635 */ 636 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 637 if (fp != NULL) { 638 639 D2(vswp, "%s: sending to target port", __func__); 640 (void) vsw_portsend(fp->portp, mp); 641 642 /* Release the reference on the fdb entry */ 643 VSW_FDBE_REFRELE(fp); 644 } else { 645 /* 646 * Destination not in FDB 647 * 648 * If the destination is broadcast or 649 * multicast forward the packet to all 650 * (VNETPORTs, PHYSDEV, LOCALDEV), 651 * except the caller. 652 */ 653 if (IS_BROADCAST(ehp)) { 654 D2(vswp, "%s: BROADCAST pkt", __func__); 655 (void) vsw_forward_all(vswp, mp, caller, arg); 656 } else if (IS_MULTICAST(ehp)) { 657 D2(vswp, "%s: MULTICAST pkt", __func__); 658 (void) vsw_forward_grp(vswp, mp, caller, arg); 659 } else { 660 /* 661 * Unicast pkt from vnet that we don't have 662 * an FDB entry for, so must be destinded for 663 * the outside world. Attempt to send up to the 664 * IP layer to allow it to deal with it. 665 */ 666 if (caller == VSW_VNETPORT) { 667 vsw_mac_rx(vswp, mrh, 668 mp, VSW_MACRX_FREEMSG); 669 } 670 } 671 } 672 } 673 674 D1(vswp, "%s: exit", __func__); 675 } 676 677 /* 678 * Additional initializations that are needed for the specific switching mode. 679 */ 680 void 681 vsw_setup_switching_post_process(vsw_t *vswp) 682 { 683 link_state_t link_state = LINK_STATE_UP; 684 685 if (vswp->smode & VSW_LAYER2) { 686 /* 687 * Program unicst, mcst addrs of vsw 688 * interface and ports in the physdev. 689 */ 690 vsw_set_addrs(vswp); 691 692 /* Start HIO for ports that have already connected */ 693 vsw_hio_start_ports(vswp); 694 695 if (vswp->pls_update == B_TRUE) { 696 link_state = vswp->phys_link_state; 697 } 698 699 /* Update physical link info to any ports already connected */ 700 vsw_physlink_update_ports(vswp); 701 } 702 703 vsw_mac_link_update(vswp, link_state); 704 } 705 706 /* 707 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 708 * except the caller (port on which frame arrived). 709 */ 710 static int 711 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 712 { 713 vsw_port_list_t *plist = &vswp->plist; 714 vsw_port_t *portp; 715 mblk_t *nmp = NULL; 716 mblk_t *ret_m = NULL; 717 int skip_port = 0; 718 719 D1(vswp, "vsw_forward_all: enter\n"); 720 721 /* 722 * Broadcast message from inside ldoms so send to outside 723 * world if in either of layer 2 modes. 724 */ 725 if ((vswp->smode & VSW_LAYER2) && 726 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 727 728 nmp = vsw_dupmsgchain(mp); 729 if (nmp) { 730 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg)) 731 != NULL) { 732 DERR(vswp, "%s: dropping pkt(s) " 733 "consisting of %ld bytes of data for" 734 " physical device", __func__, MBLKL(ret_m)); 735 freemsgchain(ret_m); 736 } 737 } 738 } 739 740 if (caller == VSW_VNETPORT) 741 skip_port = 1; 742 743 /* 744 * Broadcast message from other vnet (layer 2 or 3) or outside 745 * world (layer 2 only), send up stack if plumbed. 746 */ 747 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 748 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 749 } 750 751 /* send it to all VNETPORTs */ 752 READ_ENTER(&plist->lockrw); 753 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 754 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 755 /* 756 * Caution ! - don't reorder these two checks as arg 757 * will be NULL if the caller is PHYSDEV. skip_port is 758 * only set if caller is VNETPORT. 759 */ 760 if ((skip_port) && (portp == arg)) { 761 continue; 762 } else { 763 nmp = vsw_dupmsgchain(mp); 764 if (nmp) { 765 /* 766 * The plist->lockrw is protecting the 767 * portp from getting destroyed here. 768 * So, no ref_cnt is incremented here. 769 */ 770 (void) vsw_portsend(portp, nmp); 771 } else { 772 DERR(vswp, "vsw_forward_all: nmp NULL"); 773 } 774 } 775 } 776 RW_EXIT(&plist->lockrw); 777 778 freemsgchain(mp); 779 780 D1(vswp, "vsw_forward_all: exit\n"); 781 return (0); 782 } 783 784 /* 785 * Forward pkts to any devices or interfaces which have registered 786 * an interest in them (i.e. multicast groups). 787 */ 788 static int 789 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 790 { 791 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 792 mfdb_ent_t *entp = NULL; 793 mfdb_ent_t *tpp = NULL; 794 vsw_port_t *port; 795 uint64_t key = 0; 796 mblk_t *nmp = NULL; 797 mblk_t *ret_m = NULL; 798 boolean_t check_if = B_TRUE; 799 800 /* 801 * Convert address to hash table key 802 */ 803 KEY_HASH(key, &ehp->ether_dhost); 804 805 D1(vswp, "%s: key 0x%llx", __func__, key); 806 807 /* 808 * If pkt came from either a vnet or down the stack (if we are 809 * plumbed) and we are in layer 2 mode, then we send the pkt out 810 * over the physical adapter, and then check to see if any other 811 * vnets are interested in it. 812 */ 813 if ((vswp->smode & VSW_LAYER2) && 814 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 815 nmp = vsw_dupmsgchain(mp); 816 if (nmp) { 817 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg)) 818 != NULL) { 819 DERR(vswp, "%s: dropping pkt(s) consisting of " 820 "%ld bytes of data for physical device", 821 __func__, MBLKL(ret_m)); 822 freemsgchain(ret_m); 823 } 824 } 825 } 826 827 READ_ENTER(&vswp->mfdbrw); 828 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 829 (mod_hash_val_t *)&entp) != 0) { 830 D3(vswp, "%s: no table entry found for addr 0x%llx", 831 __func__, key); 832 } else { 833 /* 834 * Send to list of devices associated with this address... 835 */ 836 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 837 838 /* dont send to ourselves */ 839 if ((caller == VSW_VNETPORT) && 840 (tpp->d_addr == (void *)arg)) { 841 port = (vsw_port_t *)tpp->d_addr; 842 D3(vswp, "%s: not sending to ourselves" 843 " : port %d", __func__, port->p_instance); 844 continue; 845 846 } else if ((caller == VSW_LOCALDEV) && 847 (tpp->d_type == VSW_LOCALDEV)) { 848 D2(vswp, "%s: not sending back up stack", 849 __func__); 850 continue; 851 } 852 853 if (tpp->d_type == VSW_VNETPORT) { 854 port = (vsw_port_t *)tpp->d_addr; 855 D3(vswp, "%s: sending to port %ld for addr " 856 "0x%llx", __func__, port->p_instance, key); 857 858 nmp = vsw_dupmsgchain(mp); 859 if (nmp) { 860 /* 861 * The vswp->mfdbrw is protecting the 862 * portp from getting destroyed here. 863 * So, no ref_cnt is incremented here. 864 */ 865 (void) vsw_portsend(port, nmp); 866 } 867 } else { 868 vsw_mac_rx(vswp, NULL, 869 mp, VSW_MACRX_COPYMSG); 870 D2(vswp, "%s: sending up stack" 871 " for addr 0x%llx", __func__, key); 872 check_if = B_FALSE; 873 } 874 } 875 } 876 877 RW_EXIT(&vswp->mfdbrw); 878 879 /* 880 * If the pkt came from either a vnet or from physical device, 881 * and if we havent already sent the pkt up the stack then we 882 * check now if we can/should (i.e. the interface is plumbed 883 * and in promisc mode). 884 */ 885 if ((check_if) && 886 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 887 vsw_mac_rx(vswp, NULL, mp, 888 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 889 } 890 891 freemsgchain(mp); 892 893 D1(vswp, "%s: exit", __func__); 894 895 return (0); 896 } 897 898 /* 899 * This function creates the vlan id hash table for the given vsw device or 900 * port. It then adds each vlan that the device or port has been assigned, 901 * into this hash table. 902 * Arguments: 903 * arg: vsw device or port. 904 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 905 */ 906 void 907 vsw_create_vlans(void *arg, int type) 908 { 909 /* create vlan hash table */ 910 vsw_vlan_create_hash(arg, type); 911 912 /* add vlan ids of the vsw device into its hash table */ 913 vsw_vlan_add_ids(arg, type); 914 } 915 916 /* 917 * This function removes the vlan ids of the vsw device or port from its hash 918 * table. It then destroys the vlan hash table. 919 * Arguments: 920 * arg: vsw device or port. 921 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 922 */ 923 void 924 vsw_destroy_vlans(void *arg, int type) 925 { 926 /* remove vlan ids from the hash table */ 927 vsw_vlan_remove_ids(arg, type); 928 929 /* destroy vlan-hash-table */ 930 vsw_vlan_destroy_hash(arg, type); 931 } 932 933 /* 934 * Create a vlan-id hash table for the given vsw device or port. 935 */ 936 static void 937 vsw_vlan_create_hash(void *arg, int type) 938 { 939 char hashname[MAXNAMELEN]; 940 941 if (type == VSW_LOCALDEV) { 942 vsw_t *vswp = (vsw_t *)arg; 943 944 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash", 945 vswp->instance); 946 947 vswp->vlan_nchains = vsw_vlan_nchains; 948 vswp->vlan_hashp = mod_hash_create_idhash(hashname, 949 vswp->vlan_nchains, mod_hash_null_valdtor); 950 951 } else if (type == VSW_VNETPORT) { 952 vsw_port_t *portp = (vsw_port_t *)arg; 953 954 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 955 portp->p_instance); 956 957 portp->vlan_nchains = vsw_vlan_nchains; 958 portp->vlan_hashp = mod_hash_create_idhash(hashname, 959 portp->vlan_nchains, mod_hash_null_valdtor); 960 961 } else { 962 return; 963 } 964 } 965 966 /* 967 * Destroy the vlan-id hash table for the given vsw device or port. 968 */ 969 static void 970 vsw_vlan_destroy_hash(void *arg, int type) 971 { 972 if (type == VSW_LOCALDEV) { 973 vsw_t *vswp = (vsw_t *)arg; 974 975 mod_hash_destroy_hash(vswp->vlan_hashp); 976 vswp->vlan_nchains = 0; 977 } else if (type == VSW_VNETPORT) { 978 vsw_port_t *portp = (vsw_port_t *)arg; 979 980 mod_hash_destroy_hash(portp->vlan_hashp); 981 portp->vlan_nchains = 0; 982 } else { 983 return; 984 } 985 } 986 987 /* 988 * Add vlan ids of the given vsw device or port into its hash table. 989 */ 990 void 991 vsw_vlan_add_ids(void *arg, int type) 992 { 993 int rv; 994 int i; 995 996 if (type == VSW_LOCALDEV) { 997 vsw_t *vswp = (vsw_t *)arg; 998 999 rv = mod_hash_insert(vswp->vlan_hashp, 1000 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 1001 (mod_hash_val_t)B_TRUE); 1002 if (rv != 0) { 1003 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for " 1004 "the interface", vswp->instance, vswp->pvid); 1005 } 1006 1007 for (i = 0; i < vswp->nvids; i++) { 1008 rv = mod_hash_insert(vswp->vlan_hashp, 1009 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i].vl_vid), 1010 (mod_hash_val_t)B_TRUE); 1011 if (rv != 0) { 1012 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)" 1013 " for the interface", vswp->instance, 1014 vswp->pvid); 1015 } 1016 } 1017 1018 } else if (type == VSW_VNETPORT) { 1019 vsw_port_t *portp = (vsw_port_t *)arg; 1020 vsw_t *vswp = portp->p_vswp; 1021 1022 rv = mod_hash_insert(portp->vlan_hashp, 1023 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1024 (mod_hash_val_t)B_TRUE); 1025 if (rv != 0) { 1026 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for " 1027 "the port(%d)", vswp->instance, vswp->pvid, 1028 portp->p_instance); 1029 } 1030 1031 for (i = 0; i < portp->nvids; i++) { 1032 rv = mod_hash_insert(portp->vlan_hashp, 1033 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i].vl_vid), 1034 (mod_hash_val_t)B_TRUE); 1035 if (rv != 0) { 1036 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)" 1037 " for the port(%d)", vswp->instance, 1038 vswp->pvid, portp->p_instance); 1039 } 1040 } 1041 1042 } 1043 } 1044 1045 /* 1046 * Remove vlan ids of the given vsw device or port from its hash table. 1047 */ 1048 void 1049 vsw_vlan_remove_ids(void *arg, int type) 1050 { 1051 mod_hash_val_t vp; 1052 int rv; 1053 int i; 1054 1055 if (type == VSW_LOCALDEV) { 1056 vsw_t *vswp = (vsw_t *)arg; 1057 1058 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid); 1059 if (rv == B_TRUE) { 1060 rv = mod_hash_remove(vswp->vlan_hashp, 1061 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 1062 (mod_hash_val_t *)&vp); 1063 ASSERT(rv == 0); 1064 } 1065 1066 for (i = 0; i < vswp->nvids; i++) { 1067 rv = vsw_vlan_lookup(vswp->vlan_hashp, 1068 vswp->vids[i].vl_vid); 1069 if (rv == B_TRUE) { 1070 rv = mod_hash_remove(vswp->vlan_hashp, 1071 (mod_hash_key_t)VLAN_ID_KEY( 1072 vswp->vids[i].vl_vid), 1073 (mod_hash_val_t *)&vp); 1074 ASSERT(rv == 0); 1075 } 1076 } 1077 1078 } else if (type == VSW_VNETPORT) { 1079 vsw_port_t *portp = (vsw_port_t *)arg; 1080 1081 portp = (vsw_port_t *)arg; 1082 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid); 1083 if (rv == B_TRUE) { 1084 rv = mod_hash_remove(portp->vlan_hashp, 1085 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1086 (mod_hash_val_t *)&vp); 1087 ASSERT(rv == 0); 1088 } 1089 1090 for (i = 0; i < portp->nvids; i++) { 1091 rv = vsw_vlan_lookup(portp->vlan_hashp, 1092 portp->vids[i].vl_vid); 1093 if (rv == B_TRUE) { 1094 rv = mod_hash_remove(portp->vlan_hashp, 1095 (mod_hash_key_t)VLAN_ID_KEY( 1096 portp->vids[i].vl_vid), 1097 (mod_hash_val_t *)&vp); 1098 ASSERT(rv == 0); 1099 } 1100 } 1101 1102 } else { 1103 return; 1104 } 1105 } 1106 1107 /* 1108 * Find the given vlan id in the hash table. 1109 * Return: B_TRUE if the id is found; B_FALSE if not found. 1110 */ 1111 boolean_t 1112 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1113 { 1114 int rv; 1115 mod_hash_val_t vp; 1116 1117 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1118 1119 if (rv != 0) 1120 return (B_FALSE); 1121 1122 return (B_TRUE); 1123 } 1124 1125 /* 1126 * Add an entry into FDB for the given vsw. 1127 */ 1128 void 1129 vsw_fdbe_add(vsw_t *vswp, void *port) 1130 { 1131 uint64_t addr = 0; 1132 vsw_port_t *portp; 1133 vsw_fdbe_t *fp; 1134 int rv; 1135 1136 portp = (vsw_port_t *)port; 1137 KEY_HASH(addr, &portp->p_macaddr); 1138 1139 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP); 1140 fp->portp = port; 1141 1142 /* 1143 * Note: duplicate keys will be rejected by mod_hash. 1144 */ 1145 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr, 1146 (mod_hash_val_t)fp); 1147 if (rv != 0) { 1148 cmn_err(CE_WARN, "vsw%d: Duplicate mac-address(%s) for " 1149 "the port(%d)", vswp->instance, 1150 ether_sprintf(&portp->p_macaddr), portp->p_instance); 1151 } 1152 } 1153 1154 /* 1155 * Remove an entry from FDB. 1156 */ 1157 void 1158 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr) 1159 { 1160 uint64_t addr = 0; 1161 vsw_fdbe_t *fp; 1162 int rv; 1163 1164 KEY_HASH(addr, eaddr); 1165 1166 /* 1167 * Remove the entry from fdb hash table. 1168 * This prevents further references to this fdb entry. 1169 */ 1170 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr, 1171 (mod_hash_val_t *)&fp); 1172 if (rv != 0) { 1173 /* invalid key? */ 1174 return; 1175 } 1176 1177 /* 1178 * If there are threads already ref holding before the entry was 1179 * removed from hash table, then wait for ref count to drop to zero. 1180 */ 1181 while (fp->refcnt != 0) { 1182 delay(drv_usectohz(vsw_fdbe_refcnt_delay)); 1183 } 1184 1185 kmem_free(fp, sizeof (*fp)); 1186 } 1187 1188 /* 1189 * Search fdb for a given mac address. If an entry is found, hold 1190 * a reference to it and return the entry, else returns NULL. 1191 */ 1192 static vsw_fdbe_t * 1193 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp) 1194 { 1195 uint64_t key = 0; 1196 vsw_fdbe_t *fp; 1197 int rv; 1198 1199 KEY_HASH(key, addrp); 1200 1201 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key, 1202 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb); 1203 1204 if (rv != 0) 1205 return (NULL); 1206 1207 return (fp); 1208 } 1209 1210 /* 1211 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1212 * entry corresponding to the key (macaddr), this callback will be invoked by 1213 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1214 * entry before returning the found entry. 1215 */ 1216 static void 1217 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1218 { 1219 _NOTE(ARGUNUSED(key)) 1220 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val); 1221 } 1222 1223 /* 1224 * A given frame must be always tagged with the appropriate vlan id (unless it 1225 * is in the default-vlan) before the mac address switching function is called. 1226 * Otherwise, after switching function determines the destination, we cannot 1227 * figure out if the destination belongs to the the same vlan that the frame 1228 * originated from and if it needs tag/untag. Frames which are inbound from 1229 * the external(physical) network over a vlan trunk link are always tagged. 1230 * However frames which are received from a vnet-port over ldc or frames which 1231 * are coming down the stack on the service domain over vsw interface may be 1232 * untagged. These frames must be tagged with the appropriate pvid of the 1233 * sender (vnet-port or vsw device), before invoking the switching function. 1234 * 1235 * Arguments: 1236 * arg: caller of the function. 1237 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1238 * mp: frame(s) to be tagged. 1239 */ 1240 mblk_t * 1241 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp) 1242 { 1243 vsw_t *vswp; 1244 vsw_port_t *portp; 1245 struct ether_header *ehp; 1246 mblk_t *bp; 1247 mblk_t *bpt; 1248 mblk_t *bph; 1249 mblk_t *bpn; 1250 uint16_t pvid; 1251 1252 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1253 1254 if (type == VSW_LOCALDEV) { 1255 vswp = (vsw_t *)arg; 1256 pvid = vswp->pvid; 1257 portp = NULL; 1258 } else { 1259 /* VSW_VNETPORT */ 1260 portp = (vsw_port_t *)arg; 1261 pvid = portp->pvid; 1262 vswp = portp->p_vswp; 1263 } 1264 1265 bpn = bph = bpt = NULL; 1266 1267 for (bp = mp; bp != NULL; bp = bpn) { 1268 1269 bpn = bp->b_next; 1270 bp->b_next = bp->b_prev = NULL; 1271 1272 /* Determine if it is an untagged frame */ 1273 ehp = (struct ether_header *)bp->b_rptr; 1274 1275 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */ 1276 1277 /* no need to tag if the frame is in default vlan */ 1278 if (pvid != vswp->default_vlan_id) { 1279 bp = vnet_vlan_insert_tag(bp, pvid); 1280 if (bp == NULL) { 1281 continue; 1282 } 1283 } 1284 } 1285 1286 /* build a chain of processed packets */ 1287 if (bph == NULL) { 1288 bph = bpt = bp; 1289 } else { 1290 bpt->b_next = bp; 1291 bpt = bp; 1292 } 1293 1294 } 1295 1296 return (bph); 1297 } 1298 1299 /* 1300 * Frames destined to a vnet-port or to the local vsw interface, must be 1301 * untagged if necessary before sending. This function first checks that the 1302 * frame can be sent to the destination in the vlan identified by the frame 1303 * tag. Note that when this function is invoked the frame must have been 1304 * already tagged (unless it is in the default-vlan). Because, this function is 1305 * called when the switching function determines the destination and invokes 1306 * its send function (vnet-port or vsw interface) and all frames would have 1307 * been tagged by this time (see comments in vsw_vlan_frame_pretag()). 1308 * 1309 * Arguments: 1310 * arg: destination device. 1311 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1312 * np: head of pkt chain to be validated and untagged. 1313 * npt: tail of pkt chain to be validated and untagged. 1314 * 1315 * Returns: 1316 * np: head of updated chain of packets 1317 * npt: tail of updated chain of packets 1318 * rv: count of the packets in the returned list 1319 */ 1320 uint32_t 1321 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt) 1322 { 1323 mblk_t *bp; 1324 mblk_t *bpt; 1325 mblk_t *bph; 1326 mblk_t *bpn; 1327 vsw_port_t *portp; 1328 vsw_t *vswp; 1329 uint32_t count; 1330 struct ether_header *ehp; 1331 boolean_t is_tagged; 1332 boolean_t rv; 1333 uint16_t vlan_id; 1334 uint16_t pvid; 1335 mod_hash_t *vlan_hashp; 1336 1337 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1338 1339 1340 if (type == VSW_LOCALDEV) { 1341 vswp = (vsw_t *)arg; 1342 pvid = vswp->pvid; 1343 vlan_hashp = vswp->vlan_hashp; 1344 portp = NULL; 1345 } else { 1346 /* type == VSW_VNETPORT */ 1347 portp = (vsw_port_t *)arg; 1348 vswp = portp->p_vswp; 1349 vlan_hashp = portp->vlan_hashp; 1350 pvid = portp->pvid; 1351 } 1352 1353 /* 1354 * If the MAC layer switching in place, then 1355 * untagging required only if the pvid is not 1356 * the same as default_vlan_id. This is because, 1357 * the MAC layer will send packets for the 1358 * registered vlans only. 1359 */ 1360 if ((vswp->mac_cl_switching == B_TRUE) && 1361 (pvid == vswp->default_vlan_id)) { 1362 /* simply count and set the tail */ 1363 count = 1; 1364 bp = *np; 1365 ASSERT(bp != NULL); 1366 while (bp->b_next != NULL) { 1367 bp = bp->b_next; 1368 count++; 1369 } 1370 *npt = bp; 1371 return (count); 1372 } 1373 1374 bpn = bph = bpt = NULL; 1375 count = 0; 1376 1377 for (bp = *np; bp != NULL; bp = bpn) { 1378 1379 bpn = bp->b_next; 1380 bp->b_next = bp->b_prev = NULL; 1381 1382 /* 1383 * Determine the vlan id that the frame belongs to. 1384 */ 1385 ehp = (struct ether_header *)bp->b_rptr; 1386 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id); 1387 1388 /* 1389 * If MAC layer switching in place, then we 1390 * need to untag only if the tagged packet has 1391 * vlan-id same as the pvid. 1392 */ 1393 if (vswp->mac_cl_switching == B_TRUE) { 1394 1395 /* only tagged packets expected here */ 1396 ASSERT(is_tagged == B_TRUE); 1397 if (vlan_id == pvid) { 1398 bp = vnet_vlan_remove_tag(bp); 1399 if (bp == NULL) { 1400 /* packet dropped */ 1401 continue; 1402 } 1403 } 1404 } else { /* No MAC layer switching */ 1405 1406 /* 1407 * Check the frame header if tag/untag is needed. 1408 */ 1409 if (is_tagged == B_FALSE) { 1410 /* 1411 * Untagged frame. We shouldn't have an 1412 * untagged packet at this point, unless 1413 * the destination's vlan id is 1414 * default-vlan-id; if it is not the 1415 * default-vlan-id, we drop the packet. 1416 */ 1417 if (vlan_id != vswp->default_vlan_id) { 1418 /* drop the packet */ 1419 freemsg(bp); 1420 continue; 1421 } 1422 } else { /* Tagged */ 1423 /* 1424 * Tagged frame, untag if it's the 1425 * destination's pvid. 1426 */ 1427 if (vlan_id == pvid) { 1428 1429 bp = vnet_vlan_remove_tag(bp); 1430 if (bp == NULL) { 1431 /* packet dropped */ 1432 continue; 1433 } 1434 } else { 1435 1436 /* 1437 * Check if the destination is in the 1438 * same vlan. 1439 */ 1440 rv = vsw_vlan_lookup(vlan_hashp, 1441 vlan_id); 1442 if (rv == B_FALSE) { 1443 /* drop the packet */ 1444 freemsg(bp); 1445 continue; 1446 } 1447 } 1448 1449 } 1450 } 1451 1452 /* build a chain of processed packets */ 1453 if (bph == NULL) { 1454 bph = bpt = bp; 1455 } else { 1456 bpt->b_next = bp; 1457 bpt = bp; 1458 } 1459 count++; 1460 } 1461 1462 *np = bph; 1463 *npt = bpt; 1464 return (count); 1465 } 1466 1467 /* 1468 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame, 1469 * then the vlan-id is available in the tag; otherwise, its vlan id is 1470 * implicitly obtained based on the caller (destination of the frame: 1471 * VSW_VNETPORT or VSW_LOCALDEV). 1472 * The vlan id determined is returned in vidp. 1473 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1474 */ 1475 boolean_t 1476 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 1477 uint16_t *vidp) 1478 { 1479 struct ether_vlan_header *evhp; 1480 vsw_t *vswp; 1481 vsw_port_t *portp; 1482 1483 /* If it's a tagged frame, get the vid from vlan header */ 1484 if (ehp->ether_type == ETHERTYPE_VLAN) { 1485 1486 evhp = (struct ether_vlan_header *)ehp; 1487 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1488 return (B_TRUE); 1489 } 1490 1491 /* Untagged frame; determine vlan id based on caller */ 1492 switch (caller) { 1493 1494 case VSW_VNETPORT: 1495 /* 1496 * packet destined to a vnet; vlan-id is pvid of vnet-port. 1497 */ 1498 portp = (vsw_port_t *)arg; 1499 *vidp = portp->pvid; 1500 break; 1501 1502 case VSW_LOCALDEV: 1503 1504 /* 1505 * packet destined to vsw interface; 1506 * vlan-id is port-vlan-id of vsw device. 1507 */ 1508 vswp = (vsw_t *)arg; 1509 *vidp = vswp->pvid; 1510 break; 1511 } 1512 1513 return (B_FALSE); 1514 } 1515 1516 /* 1517 * Add or remove multicast address(es). 1518 * 1519 * Returns 0 on success, 1 on failure. 1520 */ 1521 int 1522 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 1523 { 1524 mcst_addr_t *mcst_p = NULL; 1525 vsw_t *vswp = port->p_vswp; 1526 uint64_t addr = 0x0; 1527 int i; 1528 1529 D1(vswp, "%s: enter", __func__); 1530 1531 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 1532 1533 for (i = 0; i < mcst_pkt->count; i++) { 1534 /* 1535 * Convert address into form that can be used 1536 * as hash table key. 1537 */ 1538 KEY_HASH(addr, &(mcst_pkt->mca[i])); 1539 1540 /* 1541 * Add or delete the specified address/port combination. 1542 */ 1543 if (mcst_pkt->set == 0x1) { 1544 D3(vswp, "%s: adding multicast address 0x%llx for " 1545 "port %ld", __func__, addr, port->p_instance); 1546 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1547 /* 1548 * Update the list of multicast 1549 * addresses contained within the 1550 * port structure to include this new 1551 * one. 1552 */ 1553 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 1554 KM_NOSLEEP); 1555 if (mcst_p == NULL) { 1556 DERR(vswp, "%s: unable to alloc mem", 1557 __func__); 1558 (void) vsw_del_mcst(vswp, 1559 VSW_VNETPORT, addr, port); 1560 return (1); 1561 } 1562 1563 mcst_p->nextp = NULL; 1564 mcst_p->addr = addr; 1565 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 1566 1567 /* 1568 * Program the address into HW. If the addr 1569 * has already been programmed then the MAC 1570 * just increments a ref counter (which is 1571 * used when the address is being deleted) 1572 */ 1573 if (vsw_mac_multicast_add(vswp, port, mcst_p, 1574 VSW_VNETPORT)) { 1575 (void) vsw_del_mcst(vswp, 1576 VSW_VNETPORT, addr, port); 1577 kmem_free(mcst_p, sizeof (*mcst_p)); 1578 return (1); 1579 } 1580 1581 mutex_enter(&port->mca_lock); 1582 mcst_p->nextp = port->mcap; 1583 port->mcap = mcst_p; 1584 mutex_exit(&port->mca_lock); 1585 1586 } else { 1587 DERR(vswp, "%s: error adding multicast " 1588 "address 0x%llx for port %ld", 1589 __func__, addr, port->p_instance); 1590 return (1); 1591 } 1592 } else { 1593 /* 1594 * Delete an entry from the multicast hash 1595 * table and update the address list 1596 * appropriately. 1597 */ 1598 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1599 D3(vswp, "%s: deleting multicast address " 1600 "0x%llx for port %ld", __func__, addr, 1601 port->p_instance); 1602 1603 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 1604 ASSERT(mcst_p != NULL); 1605 1606 /* 1607 * Remove the address from HW. The address 1608 * will actually only be removed once the ref 1609 * count within the MAC layer has dropped to 1610 * zero. I.e. we can safely call this fn even 1611 * if other ports are interested in this 1612 * address. 1613 */ 1614 vsw_mac_multicast_remove(vswp, port, mcst_p, 1615 VSW_VNETPORT); 1616 kmem_free(mcst_p, sizeof (*mcst_p)); 1617 1618 } else { 1619 DERR(vswp, "%s: error deleting multicast " 1620 "addr 0x%llx for port %ld", 1621 __func__, addr, port->p_instance); 1622 return (1); 1623 } 1624 } 1625 } 1626 D1(vswp, "%s: exit", __func__); 1627 return (0); 1628 } 1629 1630 /* 1631 * Add a new multicast entry. 1632 * 1633 * Search hash table based on address. If match found then 1634 * update associated val (which is chain of ports), otherwise 1635 * create new key/val (addr/port) pair and insert into table. 1636 */ 1637 int 1638 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1639 { 1640 int dup = 0; 1641 int rv = 0; 1642 mfdb_ent_t *ment = NULL; 1643 mfdb_ent_t *tmp_ent = NULL; 1644 mfdb_ent_t *new_ent = NULL; 1645 void *tgt = NULL; 1646 1647 if (devtype == VSW_VNETPORT) { 1648 /* 1649 * Being invoked from a vnet. 1650 */ 1651 ASSERT(arg != NULL); 1652 tgt = arg; 1653 D2(NULL, "%s: port %d : address 0x%llx", __func__, 1654 ((vsw_port_t *)arg)->p_instance, addr); 1655 } else { 1656 /* 1657 * We are being invoked via the m_multicst mac entry 1658 * point. 1659 */ 1660 D2(NULL, "%s: address 0x%llx", __func__, addr); 1661 tgt = (void *)vswp; 1662 } 1663 1664 WRITE_ENTER(&vswp->mfdbrw); 1665 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1666 (mod_hash_val_t *)&ment) != 0) { 1667 1668 /* address not currently in table */ 1669 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1670 ment->d_addr = (void *)tgt; 1671 ment->d_type = devtype; 1672 ment->nextp = NULL; 1673 1674 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 1675 (mod_hash_val_t)ment) != 0) { 1676 DERR(vswp, "%s: hash table insertion failed", __func__); 1677 kmem_free(ment, sizeof (mfdb_ent_t)); 1678 rv = 1; 1679 } else { 1680 D2(vswp, "%s: added initial entry for 0x%llx to " 1681 "table", __func__, addr); 1682 } 1683 } else { 1684 /* 1685 * Address in table. Check to see if specified port 1686 * is already associated with the address. If not add 1687 * it now. 1688 */ 1689 tmp_ent = ment; 1690 while (tmp_ent != NULL) { 1691 if (tmp_ent->d_addr == (void *)tgt) { 1692 if (devtype == VSW_VNETPORT) { 1693 DERR(vswp, "%s: duplicate port entry " 1694 "found for portid %ld and key " 1695 "0x%llx", __func__, 1696 ((vsw_port_t *)arg)->p_instance, 1697 addr); 1698 } else { 1699 DERR(vswp, "%s: duplicate entry found" 1700 "for key 0x%llx", __func__, addr); 1701 } 1702 rv = 1; 1703 dup = 1; 1704 break; 1705 } 1706 tmp_ent = tmp_ent->nextp; 1707 } 1708 1709 /* 1710 * Port not on list so add it to end now. 1711 */ 1712 if (0 == dup) { 1713 D2(vswp, "%s: added entry for 0x%llx to table", 1714 __func__, addr); 1715 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1716 new_ent->d_addr = (void *)tgt; 1717 new_ent->d_type = devtype; 1718 new_ent->nextp = NULL; 1719 1720 tmp_ent = ment; 1721 while (tmp_ent->nextp != NULL) 1722 tmp_ent = tmp_ent->nextp; 1723 1724 tmp_ent->nextp = new_ent; 1725 } 1726 } 1727 1728 RW_EXIT(&vswp->mfdbrw); 1729 return (rv); 1730 } 1731 1732 /* 1733 * Remove a multicast entry from the hashtable. 1734 * 1735 * Search hash table based on address. If match found, scan 1736 * list of ports associated with address. If specified port 1737 * found remove it from list. 1738 */ 1739 int 1740 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1741 { 1742 mfdb_ent_t *ment = NULL; 1743 mfdb_ent_t *curr_p, *prev_p; 1744 void *tgt = NULL; 1745 1746 D1(vswp, "%s: enter", __func__); 1747 1748 if (devtype == VSW_VNETPORT) { 1749 tgt = (vsw_port_t *)arg; 1750 D2(vswp, "%s: removing port %d from mFDB for address" 1751 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 1752 } else { 1753 D2(vswp, "%s: removing entry", __func__); 1754 tgt = (void *)vswp; 1755 } 1756 1757 WRITE_ENTER(&vswp->mfdbrw); 1758 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1759 (mod_hash_val_t *)&ment) != 0) { 1760 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 1761 RW_EXIT(&vswp->mfdbrw); 1762 return (1); 1763 } 1764 1765 prev_p = curr_p = ment; 1766 1767 while (curr_p != NULL) { 1768 if (curr_p->d_addr == (void *)tgt) { 1769 if (devtype == VSW_VNETPORT) { 1770 D2(vswp, "%s: port %d found", __func__, 1771 ((vsw_port_t *)tgt)->p_instance); 1772 } else { 1773 D2(vswp, "%s: instance found", __func__); 1774 } 1775 1776 if (prev_p == curr_p) { 1777 /* 1778 * head of list, if no other element is in 1779 * list then destroy this entry, otherwise 1780 * just replace it with updated value. 1781 */ 1782 ment = curr_p->nextp; 1783 if (ment == NULL) { 1784 (void) mod_hash_destroy(vswp->mfdb, 1785 (mod_hash_val_t)addr); 1786 } else { 1787 (void) mod_hash_replace(vswp->mfdb, 1788 (mod_hash_key_t)addr, 1789 (mod_hash_val_t)ment); 1790 } 1791 } else { 1792 /* 1793 * Not head of list, no need to do 1794 * replacement, just adjust list pointers. 1795 */ 1796 prev_p->nextp = curr_p->nextp; 1797 } 1798 break; 1799 } 1800 1801 prev_p = curr_p; 1802 curr_p = curr_p->nextp; 1803 } 1804 1805 RW_EXIT(&vswp->mfdbrw); 1806 1807 D1(vswp, "%s: exit", __func__); 1808 1809 if (curr_p == NULL) 1810 return (1); 1811 kmem_free(curr_p, sizeof (mfdb_ent_t)); 1812 return (0); 1813 } 1814 1815 /* 1816 * Port is being deleted, but has registered an interest in one 1817 * or more multicast groups. Using the list of addresses maintained 1818 * within the port structure find the appropriate entry in the hash 1819 * table and remove this port from the list of interested ports. 1820 */ 1821 void 1822 vsw_del_mcst_port(vsw_port_t *port) 1823 { 1824 mcst_addr_t *mcap = NULL; 1825 vsw_t *vswp = port->p_vswp; 1826 1827 D1(vswp, "%s: enter", __func__); 1828 1829 mutex_enter(&port->mca_lock); 1830 1831 while ((mcap = port->mcap) != NULL) { 1832 1833 port->mcap = mcap->nextp; 1834 1835 mutex_exit(&port->mca_lock); 1836 1837 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 1838 mcap->addr, port); 1839 1840 /* 1841 * Remove the address from HW. The address 1842 * will actually only be removed once the ref 1843 * count within the MAC layer has dropped to 1844 * zero. I.e. we can safely call this fn even 1845 * if other ports are interested in this 1846 * address. 1847 */ 1848 vsw_mac_multicast_remove(vswp, port, mcap, VSW_VNETPORT); 1849 kmem_free(mcap, sizeof (*mcap)); 1850 1851 mutex_enter(&port->mca_lock); 1852 1853 } 1854 1855 mutex_exit(&port->mca_lock); 1856 1857 D1(vswp, "%s: exit", __func__); 1858 } 1859 1860 /* 1861 * This vsw instance is detaching, but has registered an interest in one 1862 * or more multicast groups. Using the list of addresses maintained 1863 * within the vsw structure find the appropriate entry in the hash 1864 * table and remove this instance from the list of interested ports. 1865 */ 1866 void 1867 vsw_del_mcst_vsw(vsw_t *vswp) 1868 { 1869 mcst_addr_t *next_p = NULL; 1870 1871 D1(vswp, "%s: enter", __func__); 1872 1873 mutex_enter(&vswp->mca_lock); 1874 1875 while (vswp->mcap != NULL) { 1876 DERR(vswp, "%s: deleting addr 0x%llx", 1877 __func__, vswp->mcap->addr); 1878 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 1879 1880 next_p = vswp->mcap->nextp; 1881 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 1882 vswp->mcap = next_p; 1883 } 1884 1885 vswp->mcap = NULL; 1886 mutex_exit(&vswp->mca_lock); 1887 1888 D1(vswp, "%s: exit", __func__); 1889 } 1890 1891 mblk_t * 1892 vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp) 1893 { 1894 mblk_t *bp; 1895 mblk_t *nbp; 1896 mblk_t *head = NULL; 1897 mblk_t *tail = NULL; 1898 mblk_t *prev = NULL; 1899 struct ether_header *behp; 1900 1901 /* process the chain of packets */ 1902 bp = *mpp; 1903 while (bp) { 1904 nbp = bp->b_next; 1905 behp = (struct ether_header *)bp->b_rptr; 1906 bp->b_prev = NULL; 1907 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 1908 if (prev == NULL) { 1909 *mpp = nbp; 1910 } else { 1911 prev->b_next = nbp; 1912 } 1913 bp->b_next = NULL; 1914 if (head == NULL) { 1915 head = tail = bp; 1916 } else { 1917 tail->b_next = bp; 1918 tail = bp; 1919 } 1920 } else { 1921 prev = bp; 1922 } 1923 bp = nbp; 1924 } 1925 return (head); 1926 } 1927 1928 static mblk_t * 1929 vsw_dupmsgchain(mblk_t *mp) 1930 { 1931 mblk_t *nmp = NULL; 1932 mblk_t **nmpp = &nmp; 1933 1934 for (; mp != NULL; mp = mp->b_next) { 1935 if ((*nmpp = dupmsg(mp)) == NULL) { 1936 freemsgchain(nmp); 1937 return (NULL); 1938 } 1939 1940 nmpp = &((*nmpp)->b_next); 1941 } 1942 1943 return (nmp); 1944 } 1945