1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/vlan.h> 72 73 /* Switching setup routines */ 74 void vsw_setup_switching_thread(void *arg); 75 int vsw_setup_switching_start(vsw_t *vswp); 76 void vsw_setup_switching_stop(vsw_t *vswp); 77 int vsw_setup_switching(vsw_t *); 78 void vsw_setup_layer2_post_process(vsw_t *vswp); 79 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 80 vsw_port_t *port, mac_resource_handle_t mrh); 81 static int vsw_setup_layer2(vsw_t *); 82 static int vsw_setup_layer3(vsw_t *); 83 84 /* Switching/data transmit routines */ 85 static void vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller, 86 vsw_port_t *port, mac_resource_handle_t); 87 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 88 vsw_port_t *port, mac_resource_handle_t); 89 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 90 vsw_port_t *port, mac_resource_handle_t); 91 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 92 int caller, vsw_port_t *port); 93 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 94 int caller, vsw_port_t *port); 95 96 /* VLAN routines */ 97 void vsw_create_vlans(void *arg, int type); 98 void vsw_destroy_vlans(void *arg, int type); 99 void vsw_vlan_add_ids(void *arg, int type); 100 void vsw_vlan_remove_ids(void *arg, int type); 101 static void vsw_vlan_create_hash(void *arg, int type); 102 static void vsw_vlan_destroy_hash(void *arg, int type); 103 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 104 uint16_t *vidp); 105 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 106 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt); 107 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 108 109 /* Forwarding database (FDB) routines */ 110 void vsw_fdbe_add(vsw_t *vswp, void *port); 111 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 112 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *); 113 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 114 115 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 116 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 117 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 118 void vsw_del_mcst_vsw(vsw_t *); 119 120 /* Support functions */ 121 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 122 static mblk_t *vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp); 123 124 125 /* 126 * Functions imported from other files. 127 */ 128 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *, int, vsw_port_t *); 129 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 130 extern int vsw_mac_open(vsw_t *vswp); 131 extern void vsw_mac_close(vsw_t *vswp); 132 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 133 mblk_t *mp, vsw_macrx_flags_t flags); 134 extern void vsw_set_addrs(vsw_t *vswp); 135 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp); 136 extern void vsw_hio_init(vsw_t *vswp); 137 extern void vsw_hio_start_ports(vsw_t *vswp); 138 extern int vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port, 139 mcst_addr_t *mcst_p, int type); 140 extern void vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port, 141 mcst_addr_t *mcst_p, int type); 142 extern void vsw_physlink_state_update(vsw_t *vswp); 143 144 /* 145 * Tunables used in this file. 146 */ 147 extern int vsw_setup_switching_delay; 148 extern uint32_t vsw_vlan_nchains; 149 extern uint32_t vsw_fdbe_refcnt_delay; 150 151 #define VSW_FDBE_REFHOLD(p) \ 152 { \ 153 atomic_inc_32(&(p)->refcnt); \ 154 ASSERT((p)->refcnt != 0); \ 155 } 156 157 #define VSW_FDBE_REFRELE(p) \ 158 { \ 159 ASSERT((p)->refcnt != 0); \ 160 atomic_dec_32(&(p)->refcnt); \ 161 } 162 163 /* 164 * Thread to setup switching mode. This thread is created during vsw_attach() 165 * initially. It invokes vsw_setup_switching() and keeps retrying while the 166 * returned value is EAGAIN. The thread exits when the switching mode setup is 167 * done successfully or when the error returned is not EAGAIN. This thread may 168 * also get created from vsw_update_md_prop() if the switching mode needs to be 169 * updated. 170 */ 171 void 172 vsw_setup_switching_thread(void *arg) 173 { 174 callb_cpr_t cprinfo; 175 vsw_t *vswp = (vsw_t *)arg; 176 clock_t wait_time; 177 clock_t xwait; 178 clock_t wait_rv; 179 int rv; 180 181 /* wait time used on successive retries */ 182 xwait = drv_usectohz(vsw_setup_switching_delay * MICROSEC); 183 184 CALLB_CPR_INIT(&cprinfo, &vswp->sw_thr_lock, callb_generic_cpr, 185 "vsw_setup_sw_thread"); 186 187 mutex_enter(&vswp->sw_thr_lock); 188 189 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) { 190 191 CALLB_CPR_SAFE_BEGIN(&cprinfo); 192 193 /* Wait for sometime before (re)trying setup_switching() */ 194 wait_time = ddi_get_lbolt() + xwait; 195 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) { 196 wait_rv = cv_timedwait(&vswp->sw_thr_cv, 197 &vswp->sw_thr_lock, wait_time); 198 if (wait_rv == -1) { /* timed out */ 199 break; 200 } 201 } 202 203 CALLB_CPR_SAFE_END(&cprinfo, &vswp->sw_thr_lock) 204 205 if ((vswp->sw_thr_flags & VSW_SWTHR_STOP) != 0) { 206 /* 207 * If there is a stop request, process that first and 208 * exit the loop. Continue to hold the mutex which gets 209 * released in CALLB_CPR_EXIT(). 210 */ 211 break; 212 } 213 214 mutex_exit(&vswp->sw_thr_lock); 215 rv = vsw_setup_switching(vswp); 216 if (rv == 0) { 217 vsw_setup_layer2_post_process(vswp); 218 } 219 mutex_enter(&vswp->sw_thr_lock); 220 if (rv != EAGAIN) { 221 break; 222 } 223 224 } 225 226 vswp->sw_thr_flags &= ~VSW_SWTHR_STOP; 227 vswp->sw_thread = NULL; 228 CALLB_CPR_EXIT(&cprinfo); 229 thread_exit(); 230 } 231 232 /* 233 * Create a thread to setup the switching mode. 234 * Returns 0 on success; 1 on failure. 235 */ 236 int 237 vsw_setup_switching_start(vsw_t *vswp) 238 { 239 mutex_enter(&vswp->sw_thr_lock); 240 241 vswp->sw_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 242 vsw_setup_switching_thread, vswp, 0, &p0, TS_RUN, minclsyspri); 243 244 if (vswp->sw_thread == NULL) { 245 mutex_exit(&vswp->sw_thr_lock); 246 return (1); 247 } 248 249 mutex_exit(&vswp->sw_thr_lock); 250 return (0); 251 } 252 253 /* 254 * Stop the thread to setup switching mode. 255 */ 256 void 257 vsw_setup_switching_stop(vsw_t *vswp) 258 { 259 kt_did_t tid = 0; 260 261 /* 262 * Signal the setup_switching thread to stop and wait until it stops. 263 */ 264 mutex_enter(&vswp->sw_thr_lock); 265 266 if (vswp->sw_thread != NULL) { 267 tid = vswp->sw_thread->t_did; 268 vswp->sw_thr_flags |= VSW_SWTHR_STOP; 269 cv_signal(&vswp->sw_thr_cv); 270 } 271 272 mutex_exit(&vswp->sw_thr_lock); 273 274 if (tid != 0) 275 thread_join(tid); 276 277 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 278 279 vswp->mac_open_retries = 0; 280 } 281 282 /* 283 * Setup the required switching mode. 284 * Returns: 285 * 0 on success. 286 * EAGAIN if retry is needed. 287 * 1 on all other failures. 288 */ 289 int 290 vsw_setup_switching(vsw_t *vswp) 291 { 292 int rv = 1; 293 294 D1(vswp, "%s: enter", __func__); 295 296 /* 297 * Select best switching mode. 298 * This is done as this routine can be called from the timeout 299 * handler to retry setting up a specific mode. Currently only 300 * the function which sets up layer2/promisc mode returns EAGAIN 301 * if the underlying network device is not available yet, causing 302 * retries. 303 */ 304 if (vswp->smode & VSW_LAYER2) { 305 rv = vsw_setup_layer2(vswp); 306 } else if (vswp->smode & VSW_LAYER3) { 307 rv = vsw_setup_layer3(vswp); 308 } else { 309 DERR(vswp, "unknown switch mode"); 310 rv = 1; 311 } 312 313 if (rv && (rv != EAGAIN)) { 314 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 315 "switching mode", vswp->instance); 316 } else if (rv == 0) { 317 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 318 } 319 320 D2(vswp, "%s: Operating in mode %d", __func__, 321 vswp->smode); 322 323 D1(vswp, "%s: exit", __func__); 324 325 return (rv); 326 } 327 328 /* 329 * Setup for layer 2 switching. 330 * 331 * Returns: 332 * 0 on success. 333 * EAGAIN if retry is needed. 334 * EIO on all other failures. 335 */ 336 static int 337 vsw_setup_layer2(vsw_t *vswp) 338 { 339 int rv; 340 341 D1(vswp, "%s: enter", __func__); 342 343 /* 344 * Until the network device is successfully opened, 345 * set the switching to use vsw_switch_l2_frame. 346 */ 347 vswp->vsw_switch_frame = vsw_switch_l2_frame; 348 vswp->mac_cl_switching = B_FALSE; 349 350 rv = strlen(vswp->physname); 351 if (rv == 0) { 352 /* 353 * Physical device name is NULL, which is 354 * required for layer 2. 355 */ 356 cmn_err(CE_WARN, "!vsw%d: no network device name specified", 357 vswp->instance); 358 return (EIO); 359 } 360 361 mutex_enter(&vswp->mac_lock); 362 363 rv = vsw_mac_open(vswp); 364 if (rv != 0) { 365 if (rv != EAGAIN) { 366 cmn_err(CE_WARN, "!vsw%d: Unable to open network " 367 "device: %s\n", vswp->instance, vswp->physname); 368 } 369 mutex_exit(&vswp->mac_lock); 370 return (rv); 371 } 372 373 /* 374 * Now we can use the mac client switching, so set the switching 375 * function to use vsw_switch_l2_frame_mac_client(), which simply 376 * sends the packets to MAC layer for switching. 377 */ 378 vswp->vsw_switch_frame = vsw_switch_l2_frame_mac_client; 379 vswp->mac_cl_switching = B_TRUE; 380 381 D1(vswp, "%s: exit", __func__); 382 383 /* Initialize HybridIO related stuff */ 384 vsw_hio_init(vswp); 385 386 mutex_exit(&vswp->mac_lock); 387 return (0); 388 389 exit_error: 390 vsw_mac_close(vswp); 391 mutex_exit(&vswp->mac_lock); 392 return (EIO); 393 } 394 395 static int 396 vsw_setup_layer3(vsw_t *vswp) 397 { 398 D1(vswp, "%s: enter", __func__); 399 400 D2(vswp, "%s: operating in layer 3 mode", __func__); 401 vswp->vsw_switch_frame = vsw_switch_l3_frame; 402 403 D1(vswp, "%s: exit", __func__); 404 405 return (0); 406 } 407 408 /* ARGSUSED */ 409 void 410 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port, 411 mac_resource_handle_t mrh) 412 { 413 freemsgchain(mp); 414 } 415 416 /* 417 * Use mac client for layer 2 switching . 418 */ 419 static void 420 vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller, 421 vsw_port_t *port, mac_resource_handle_t mrh) 422 { 423 _NOTE(ARGUNUSED(mrh)) 424 425 mblk_t *ret_m; 426 427 /* 428 * This switching function is expected to be called by 429 * the ports or the interface only. The packets from 430 * physical interface already switched. 431 */ 432 ASSERT((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV)); 433 434 if ((ret_m = vsw_tx_msg(vswp, mp, caller, port)) != NULL) { 435 DERR(vswp, "%s: drop mblks to " 436 "phys dev", __func__); 437 freemsgchain(ret_m); 438 } 439 } 440 441 /* 442 * Switch the given ethernet frame when operating in layer 2 mode. 443 * 444 * vswp: pointer to the vsw instance 445 * mp: pointer to chain of ethernet frame(s) to be switched 446 * caller: identifies the source of this frame as: 447 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 448 * 2. VSW_PHYSDEV - the physical ethernet device 449 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 450 * arg: argument provided by the caller. 451 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 452 * 2. for PHYSDEV - NULL 453 * 3. for LOCALDEV - pointer to to this vsw_t(self) 454 */ 455 void 456 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 457 vsw_port_t *arg, mac_resource_handle_t mrh) 458 { 459 struct ether_header *ehp; 460 mblk_t *bp, *ret_m; 461 vsw_fdbe_t *fp; 462 463 D1(vswp, "%s: enter (caller %d)", __func__, caller); 464 465 /* 466 * PERF: rather than breaking up the chain here, scan it 467 * to find all mblks heading to same destination and then 468 * pass that sub-chain to the lower transmit functions. 469 */ 470 471 /* process the chain of packets */ 472 bp = mp; 473 while (bp) { 474 ehp = (struct ether_header *)bp->b_rptr; 475 mp = vsw_get_same_dest_list(ehp, &bp); 476 ASSERT(mp != NULL); 477 478 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 479 __func__, MBLKSIZE(mp), MBLKL(mp)); 480 481 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 482 /* 483 * If destination is VSW_LOCALDEV (vsw as an eth 484 * interface) and if the device is up & running, 485 * send the packet up the stack on this host. 486 * If the virtual interface is down, drop the packet. 487 */ 488 if (caller != VSW_LOCALDEV) { 489 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 490 } else { 491 freemsgchain(mp); 492 } 493 continue; 494 } 495 496 /* 497 * Find fdb entry for the destination 498 * and hold a reference to it. 499 */ 500 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 501 if (fp != NULL) { 502 503 /* 504 * If plumbed and in promisc mode then copy msg 505 * and send up the stack. 506 */ 507 vsw_mac_rx(vswp, mrh, mp, 508 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 509 510 /* 511 * If the destination is in FDB, the packet 512 * should be forwarded to the correponding 513 * vsw_port (connected to a vnet device - 514 * VSW_VNETPORT) 515 */ 516 (void) vsw_portsend(fp->portp, mp); 517 518 /* Release the reference on the fdb entry */ 519 VSW_FDBE_REFRELE(fp); 520 } else { 521 /* 522 * Destination not in FDB. 523 * 524 * If the destination is broadcast or 525 * multicast forward the packet to all 526 * (VNETPORTs, PHYSDEV, LOCALDEV), 527 * except the caller. 528 */ 529 if (IS_BROADCAST(ehp)) { 530 D2(vswp, "%s: BROADCAST pkt", __func__); 531 (void) vsw_forward_all(vswp, mp, caller, arg); 532 } else if (IS_MULTICAST(ehp)) { 533 D2(vswp, "%s: MULTICAST pkt", __func__); 534 (void) vsw_forward_grp(vswp, mp, caller, arg); 535 } else { 536 /* 537 * If the destination is unicast, and came 538 * from either a logical network device or 539 * the switch itself when it is plumbed, then 540 * send it out on the physical device and also 541 * up the stack if the logical interface is 542 * in promiscious mode. 543 * 544 * NOTE: The assumption here is that if we 545 * cannot find the destination in our fdb, its 546 * a unicast address, and came from either a 547 * vnet or down the stack (when plumbed) it 548 * must be destinded for an ethernet device 549 * outside our ldoms. 550 */ 551 if (caller == VSW_VNETPORT) { 552 /* promisc check copy etc */ 553 vsw_mac_rx(vswp, mrh, mp, 554 VSW_MACRX_PROMISC | 555 VSW_MACRX_COPYMSG); 556 557 if ((ret_m = vsw_tx_msg(vswp, mp, 558 caller, arg)) != NULL) { 559 DERR(vswp, "%s: drop mblks to " 560 "phys dev", __func__); 561 freemsgchain(ret_m); 562 } 563 564 } else if (caller == VSW_PHYSDEV) { 565 /* 566 * Pkt seen because card in promisc 567 * mode. Send up stack if plumbed in 568 * promisc mode, else drop it. 569 */ 570 vsw_mac_rx(vswp, mrh, mp, 571 VSW_MACRX_PROMISC | 572 VSW_MACRX_FREEMSG); 573 574 } else if (caller == VSW_LOCALDEV) { 575 /* 576 * Pkt came down the stack, send out 577 * over physical device. 578 */ 579 if ((ret_m = vsw_tx_msg(vswp, mp, 580 caller, NULL)) != NULL) { 581 DERR(vswp, "%s: drop mblks to " 582 "phys dev", __func__); 583 freemsgchain(ret_m); 584 } 585 } 586 } 587 } 588 } 589 D1(vswp, "%s: exit\n", __func__); 590 } 591 592 /* 593 * Switch ethernet frame when in layer 3 mode (i.e. using IP 594 * layer to do the routing). 595 * 596 * There is a large amount of overlap between this function and 597 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 598 * both these functions. 599 */ 600 void 601 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 602 vsw_port_t *arg, mac_resource_handle_t mrh) 603 { 604 struct ether_header *ehp; 605 mblk_t *bp = NULL; 606 vsw_fdbe_t *fp; 607 608 D1(vswp, "%s: enter (caller %d)", __func__, caller); 609 610 /* 611 * In layer 3 mode should only ever be switching packets 612 * between IP layer and vnet devices. So make sure thats 613 * who is invoking us. 614 */ 615 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 616 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 617 freemsgchain(mp); 618 return; 619 } 620 621 /* process the chain of packets */ 622 bp = mp; 623 while (bp) { 624 ehp = (struct ether_header *)bp->b_rptr; 625 mp = vsw_get_same_dest_list(ehp, &bp); 626 ASSERT(mp != NULL); 627 628 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 629 __func__, MBLKSIZE(mp), MBLKL(mp)); 630 631 /* 632 * Find fdb entry for the destination 633 * and hold a reference to it. 634 */ 635 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 636 if (fp != NULL) { 637 638 D2(vswp, "%s: sending to target port", __func__); 639 (void) vsw_portsend(fp->portp, mp); 640 641 /* Release the reference on the fdb entry */ 642 VSW_FDBE_REFRELE(fp); 643 } else { 644 /* 645 * Destination not in FDB 646 * 647 * If the destination is broadcast or 648 * multicast forward the packet to all 649 * (VNETPORTs, PHYSDEV, LOCALDEV), 650 * except the caller. 651 */ 652 if (IS_BROADCAST(ehp)) { 653 D2(vswp, "%s: BROADCAST pkt", __func__); 654 (void) vsw_forward_all(vswp, mp, caller, arg); 655 } else if (IS_MULTICAST(ehp)) { 656 D2(vswp, "%s: MULTICAST pkt", __func__); 657 (void) vsw_forward_grp(vswp, mp, caller, arg); 658 } else { 659 /* 660 * Unicast pkt from vnet that we don't have 661 * an FDB entry for, so must be destinded for 662 * the outside world. Attempt to send up to the 663 * IP layer to allow it to deal with it. 664 */ 665 if (caller == VSW_VNETPORT) { 666 vsw_mac_rx(vswp, mrh, 667 mp, VSW_MACRX_FREEMSG); 668 } 669 } 670 } 671 } 672 673 D1(vswp, "%s: exit", __func__); 674 } 675 676 /* 677 * Setup mac addrs and hio resources for layer 2 switching only. 678 */ 679 void 680 vsw_setup_layer2_post_process(vsw_t *vswp) 681 { 682 if (vswp->smode & VSW_LAYER2) { 683 /* 684 * Program unicst, mcst addrs of vsw 685 * interface and ports in the physdev. 686 */ 687 vsw_set_addrs(vswp); 688 689 /* Start HIO for ports that have already connected */ 690 vsw_hio_start_ports(vswp); 691 692 /* Update physical link info to any ports already connected */ 693 vsw_physlink_state_update(vswp); 694 } 695 } 696 697 /* 698 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 699 * except the caller (port on which frame arrived). 700 */ 701 static int 702 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 703 { 704 vsw_port_list_t *plist = &vswp->plist; 705 vsw_port_t *portp; 706 mblk_t *nmp = NULL; 707 mblk_t *ret_m = NULL; 708 int skip_port = 0; 709 710 D1(vswp, "vsw_forward_all: enter\n"); 711 712 /* 713 * Broadcast message from inside ldoms so send to outside 714 * world if in either of layer 2 modes. 715 */ 716 if ((vswp->smode & VSW_LAYER2) && 717 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 718 719 nmp = vsw_dupmsgchain(mp); 720 if (nmp) { 721 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg)) 722 != NULL) { 723 DERR(vswp, "%s: dropping pkt(s) " 724 "consisting of %ld bytes of data for" 725 " physical device", __func__, MBLKL(ret_m)); 726 freemsgchain(ret_m); 727 } 728 } 729 } 730 731 if (caller == VSW_VNETPORT) 732 skip_port = 1; 733 734 /* 735 * Broadcast message from other vnet (layer 2 or 3) or outside 736 * world (layer 2 only), send up stack if plumbed. 737 */ 738 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 739 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 740 } 741 742 /* send it to all VNETPORTs */ 743 READ_ENTER(&plist->lockrw); 744 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 745 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 746 /* 747 * Caution ! - don't reorder these two checks as arg 748 * will be NULL if the caller is PHYSDEV. skip_port is 749 * only set if caller is VNETPORT. 750 */ 751 if ((skip_port) && (portp == arg)) { 752 continue; 753 } else { 754 nmp = vsw_dupmsgchain(mp); 755 if (nmp) { 756 /* 757 * The plist->lockrw is protecting the 758 * portp from getting destroyed here. 759 * So, no ref_cnt is incremented here. 760 */ 761 (void) vsw_portsend(portp, nmp); 762 } else { 763 DERR(vswp, "vsw_forward_all: nmp NULL"); 764 } 765 } 766 } 767 RW_EXIT(&plist->lockrw); 768 769 freemsgchain(mp); 770 771 D1(vswp, "vsw_forward_all: exit\n"); 772 return (0); 773 } 774 775 /* 776 * Forward pkts to any devices or interfaces which have registered 777 * an interest in them (i.e. multicast groups). 778 */ 779 static int 780 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 781 { 782 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 783 mfdb_ent_t *entp = NULL; 784 mfdb_ent_t *tpp = NULL; 785 vsw_port_t *port; 786 uint64_t key = 0; 787 mblk_t *nmp = NULL; 788 mblk_t *ret_m = NULL; 789 boolean_t check_if = B_TRUE; 790 791 /* 792 * Convert address to hash table key 793 */ 794 KEY_HASH(key, &ehp->ether_dhost); 795 796 D1(vswp, "%s: key 0x%llx", __func__, key); 797 798 /* 799 * If pkt came from either a vnet or down the stack (if we are 800 * plumbed) and we are in layer 2 mode, then we send the pkt out 801 * over the physical adapter, and then check to see if any other 802 * vnets are interested in it. 803 */ 804 if ((vswp->smode & VSW_LAYER2) && 805 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 806 nmp = vsw_dupmsgchain(mp); 807 if (nmp) { 808 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg)) 809 != NULL) { 810 DERR(vswp, "%s: dropping pkt(s) consisting of " 811 "%ld bytes of data for physical device", 812 __func__, MBLKL(ret_m)); 813 freemsgchain(ret_m); 814 } 815 } 816 } 817 818 READ_ENTER(&vswp->mfdbrw); 819 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 820 (mod_hash_val_t *)&entp) != 0) { 821 D3(vswp, "%s: no table entry found for addr 0x%llx", 822 __func__, key); 823 } else { 824 /* 825 * Send to list of devices associated with this address... 826 */ 827 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 828 829 /* dont send to ourselves */ 830 if ((caller == VSW_VNETPORT) && 831 (tpp->d_addr == (void *)arg)) { 832 port = (vsw_port_t *)tpp->d_addr; 833 D3(vswp, "%s: not sending to ourselves" 834 " : port %d", __func__, port->p_instance); 835 continue; 836 837 } else if ((caller == VSW_LOCALDEV) && 838 (tpp->d_type == VSW_LOCALDEV)) { 839 D2(vswp, "%s: not sending back up stack", 840 __func__); 841 continue; 842 } 843 844 if (tpp->d_type == VSW_VNETPORT) { 845 port = (vsw_port_t *)tpp->d_addr; 846 D3(vswp, "%s: sending to port %ld for addr " 847 "0x%llx", __func__, port->p_instance, key); 848 849 nmp = vsw_dupmsgchain(mp); 850 if (nmp) { 851 /* 852 * The vswp->mfdbrw is protecting the 853 * portp from getting destroyed here. 854 * So, no ref_cnt is incremented here. 855 */ 856 (void) vsw_portsend(port, nmp); 857 } 858 } else { 859 vsw_mac_rx(vswp, NULL, 860 mp, VSW_MACRX_COPYMSG); 861 D2(vswp, "%s: sending up stack" 862 " for addr 0x%llx", __func__, key); 863 check_if = B_FALSE; 864 } 865 } 866 } 867 868 RW_EXIT(&vswp->mfdbrw); 869 870 /* 871 * If the pkt came from either a vnet or from physical device, 872 * and if we havent already sent the pkt up the stack then we 873 * check now if we can/should (i.e. the interface is plumbed 874 * and in promisc mode). 875 */ 876 if ((check_if) && 877 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 878 vsw_mac_rx(vswp, NULL, mp, 879 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 880 } 881 882 freemsgchain(mp); 883 884 D1(vswp, "%s: exit", __func__); 885 886 return (0); 887 } 888 889 /* 890 * This function creates the vlan id hash table for the given vsw device or 891 * port. It then adds each vlan that the device or port has been assigned, 892 * into this hash table. 893 * Arguments: 894 * arg: vsw device or port. 895 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 896 */ 897 void 898 vsw_create_vlans(void *arg, int type) 899 { 900 /* create vlan hash table */ 901 vsw_vlan_create_hash(arg, type); 902 903 /* add vlan ids of the vsw device into its hash table */ 904 vsw_vlan_add_ids(arg, type); 905 } 906 907 /* 908 * This function removes the vlan ids of the vsw device or port from its hash 909 * table. It then destroys the vlan hash table. 910 * Arguments: 911 * arg: vsw device or port. 912 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 913 */ 914 void 915 vsw_destroy_vlans(void *arg, int type) 916 { 917 /* remove vlan ids from the hash table */ 918 vsw_vlan_remove_ids(arg, type); 919 920 /* destroy vlan-hash-table */ 921 vsw_vlan_destroy_hash(arg, type); 922 } 923 924 /* 925 * Create a vlan-id hash table for the given vsw device or port. 926 */ 927 static void 928 vsw_vlan_create_hash(void *arg, int type) 929 { 930 char hashname[MAXNAMELEN]; 931 932 if (type == VSW_LOCALDEV) { 933 vsw_t *vswp = (vsw_t *)arg; 934 935 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash", 936 vswp->instance); 937 938 vswp->vlan_nchains = vsw_vlan_nchains; 939 vswp->vlan_hashp = mod_hash_create_idhash(hashname, 940 vswp->vlan_nchains, mod_hash_null_valdtor); 941 942 } else if (type == VSW_VNETPORT) { 943 vsw_port_t *portp = (vsw_port_t *)arg; 944 945 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 946 portp->p_instance); 947 948 portp->vlan_nchains = vsw_vlan_nchains; 949 portp->vlan_hashp = mod_hash_create_idhash(hashname, 950 portp->vlan_nchains, mod_hash_null_valdtor); 951 952 } else { 953 return; 954 } 955 } 956 957 /* 958 * Destroy the vlan-id hash table for the given vsw device or port. 959 */ 960 static void 961 vsw_vlan_destroy_hash(void *arg, int type) 962 { 963 if (type == VSW_LOCALDEV) { 964 vsw_t *vswp = (vsw_t *)arg; 965 966 mod_hash_destroy_hash(vswp->vlan_hashp); 967 vswp->vlan_nchains = 0; 968 } else if (type == VSW_VNETPORT) { 969 vsw_port_t *portp = (vsw_port_t *)arg; 970 971 mod_hash_destroy_hash(portp->vlan_hashp); 972 portp->vlan_nchains = 0; 973 } else { 974 return; 975 } 976 } 977 978 /* 979 * Add vlan ids of the given vsw device or port into its hash table. 980 */ 981 void 982 vsw_vlan_add_ids(void *arg, int type) 983 { 984 int rv; 985 int i; 986 987 if (type == VSW_LOCALDEV) { 988 vsw_t *vswp = (vsw_t *)arg; 989 990 rv = mod_hash_insert(vswp->vlan_hashp, 991 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 992 (mod_hash_val_t)B_TRUE); 993 if (rv != 0) { 994 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for " 995 "the interface", vswp->instance, vswp->pvid); 996 } 997 998 for (i = 0; i < vswp->nvids; i++) { 999 rv = mod_hash_insert(vswp->vlan_hashp, 1000 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i].vl_vid), 1001 (mod_hash_val_t)B_TRUE); 1002 if (rv != 0) { 1003 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)" 1004 " for the interface", vswp->instance, 1005 vswp->pvid); 1006 } 1007 } 1008 1009 } else if (type == VSW_VNETPORT) { 1010 vsw_port_t *portp = (vsw_port_t *)arg; 1011 vsw_t *vswp = portp->p_vswp; 1012 1013 rv = mod_hash_insert(portp->vlan_hashp, 1014 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1015 (mod_hash_val_t)B_TRUE); 1016 if (rv != 0) { 1017 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for " 1018 "the port(%d)", vswp->instance, vswp->pvid, 1019 portp->p_instance); 1020 } 1021 1022 for (i = 0; i < portp->nvids; i++) { 1023 rv = mod_hash_insert(portp->vlan_hashp, 1024 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i].vl_vid), 1025 (mod_hash_val_t)B_TRUE); 1026 if (rv != 0) { 1027 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)" 1028 " for the port(%d)", vswp->instance, 1029 vswp->pvid, portp->p_instance); 1030 } 1031 } 1032 1033 } 1034 } 1035 1036 /* 1037 * Remove vlan ids of the given vsw device or port from its hash table. 1038 */ 1039 void 1040 vsw_vlan_remove_ids(void *arg, int type) 1041 { 1042 mod_hash_val_t vp; 1043 int rv; 1044 int i; 1045 1046 if (type == VSW_LOCALDEV) { 1047 vsw_t *vswp = (vsw_t *)arg; 1048 1049 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid); 1050 if (rv == B_TRUE) { 1051 rv = mod_hash_remove(vswp->vlan_hashp, 1052 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 1053 (mod_hash_val_t *)&vp); 1054 ASSERT(rv == 0); 1055 } 1056 1057 for (i = 0; i < vswp->nvids; i++) { 1058 rv = vsw_vlan_lookup(vswp->vlan_hashp, 1059 vswp->vids[i].vl_vid); 1060 if (rv == B_TRUE) { 1061 rv = mod_hash_remove(vswp->vlan_hashp, 1062 (mod_hash_key_t)VLAN_ID_KEY( 1063 vswp->vids[i].vl_vid), 1064 (mod_hash_val_t *)&vp); 1065 ASSERT(rv == 0); 1066 } 1067 } 1068 1069 } else if (type == VSW_VNETPORT) { 1070 vsw_port_t *portp = (vsw_port_t *)arg; 1071 1072 portp = (vsw_port_t *)arg; 1073 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid); 1074 if (rv == B_TRUE) { 1075 rv = mod_hash_remove(portp->vlan_hashp, 1076 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1077 (mod_hash_val_t *)&vp); 1078 ASSERT(rv == 0); 1079 } 1080 1081 for (i = 0; i < portp->nvids; i++) { 1082 rv = vsw_vlan_lookup(portp->vlan_hashp, 1083 portp->vids[i].vl_vid); 1084 if (rv == B_TRUE) { 1085 rv = mod_hash_remove(portp->vlan_hashp, 1086 (mod_hash_key_t)VLAN_ID_KEY( 1087 portp->vids[i].vl_vid), 1088 (mod_hash_val_t *)&vp); 1089 ASSERT(rv == 0); 1090 } 1091 } 1092 1093 } else { 1094 return; 1095 } 1096 } 1097 1098 /* 1099 * Find the given vlan id in the hash table. 1100 * Return: B_TRUE if the id is found; B_FALSE if not found. 1101 */ 1102 boolean_t 1103 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1104 { 1105 int rv; 1106 mod_hash_val_t vp; 1107 1108 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1109 1110 if (rv != 0) 1111 return (B_FALSE); 1112 1113 return (B_TRUE); 1114 } 1115 1116 /* 1117 * Add an entry into FDB for the given vsw. 1118 */ 1119 void 1120 vsw_fdbe_add(vsw_t *vswp, void *port) 1121 { 1122 uint64_t addr = 0; 1123 vsw_port_t *portp; 1124 vsw_fdbe_t *fp; 1125 int rv; 1126 1127 portp = (vsw_port_t *)port; 1128 KEY_HASH(addr, &portp->p_macaddr); 1129 1130 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP); 1131 fp->portp = port; 1132 1133 /* 1134 * Note: duplicate keys will be rejected by mod_hash. 1135 */ 1136 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr, 1137 (mod_hash_val_t)fp); 1138 if (rv != 0) { 1139 cmn_err(CE_WARN, "vsw%d: Duplicate mac-address(%s) for " 1140 "the port(%d)", vswp->instance, 1141 ether_sprintf(&portp->p_macaddr), portp->p_instance); 1142 } 1143 } 1144 1145 /* 1146 * Remove an entry from FDB. 1147 */ 1148 void 1149 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr) 1150 { 1151 uint64_t addr = 0; 1152 vsw_fdbe_t *fp; 1153 int rv; 1154 1155 KEY_HASH(addr, eaddr); 1156 1157 /* 1158 * Remove the entry from fdb hash table. 1159 * This prevents further references to this fdb entry. 1160 */ 1161 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr, 1162 (mod_hash_val_t *)&fp); 1163 if (rv != 0) { 1164 /* invalid key? */ 1165 return; 1166 } 1167 1168 /* 1169 * If there are threads already ref holding before the entry was 1170 * removed from hash table, then wait for ref count to drop to zero. 1171 */ 1172 while (fp->refcnt != 0) { 1173 delay(drv_usectohz(vsw_fdbe_refcnt_delay)); 1174 } 1175 1176 kmem_free(fp, sizeof (*fp)); 1177 } 1178 1179 /* 1180 * Search fdb for a given mac address. If an entry is found, hold 1181 * a reference to it and return the entry, else returns NULL. 1182 */ 1183 static vsw_fdbe_t * 1184 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp) 1185 { 1186 uint64_t key = 0; 1187 vsw_fdbe_t *fp; 1188 int rv; 1189 1190 KEY_HASH(key, addrp); 1191 1192 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key, 1193 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb); 1194 1195 if (rv != 0) 1196 return (NULL); 1197 1198 return (fp); 1199 } 1200 1201 /* 1202 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1203 * entry corresponding to the key (macaddr), this callback will be invoked by 1204 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1205 * entry before returning the found entry. 1206 */ 1207 static void 1208 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1209 { 1210 _NOTE(ARGUNUSED(key)) 1211 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val); 1212 } 1213 1214 /* 1215 * A given frame must be always tagged with the appropriate vlan id (unless it 1216 * is in the default-vlan) before the mac address switching function is called. 1217 * Otherwise, after switching function determines the destination, we cannot 1218 * figure out if the destination belongs to the the same vlan that the frame 1219 * originated from and if it needs tag/untag. Frames which are inbound from 1220 * the external(physical) network over a vlan trunk link are always tagged. 1221 * However frames which are received from a vnet-port over ldc or frames which 1222 * are coming down the stack on the service domain over vsw interface may be 1223 * untagged. These frames must be tagged with the appropriate pvid of the 1224 * sender (vnet-port or vsw device), before invoking the switching function. 1225 * 1226 * Arguments: 1227 * arg: caller of the function. 1228 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1229 * mp: frame(s) to be tagged. 1230 */ 1231 mblk_t * 1232 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp) 1233 { 1234 vsw_t *vswp; 1235 vsw_port_t *portp; 1236 struct ether_header *ehp; 1237 mblk_t *bp; 1238 mblk_t *bpt; 1239 mblk_t *bph; 1240 mblk_t *bpn; 1241 uint16_t pvid; 1242 1243 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1244 1245 if (type == VSW_LOCALDEV) { 1246 vswp = (vsw_t *)arg; 1247 pvid = vswp->pvid; 1248 portp = NULL; 1249 } else { 1250 /* VSW_VNETPORT */ 1251 portp = (vsw_port_t *)arg; 1252 pvid = portp->pvid; 1253 vswp = portp->p_vswp; 1254 } 1255 1256 bpn = bph = bpt = NULL; 1257 1258 for (bp = mp; bp != NULL; bp = bpn) { 1259 1260 bpn = bp->b_next; 1261 bp->b_next = bp->b_prev = NULL; 1262 1263 /* Determine if it is an untagged frame */ 1264 ehp = (struct ether_header *)bp->b_rptr; 1265 1266 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */ 1267 1268 /* no need to tag if the frame is in default vlan */ 1269 if (pvid != vswp->default_vlan_id) { 1270 bp = vnet_vlan_insert_tag(bp, pvid); 1271 if (bp == NULL) { 1272 continue; 1273 } 1274 } 1275 } 1276 1277 /* build a chain of processed packets */ 1278 if (bph == NULL) { 1279 bph = bpt = bp; 1280 } else { 1281 bpt->b_next = bp; 1282 bpt = bp; 1283 } 1284 1285 } 1286 1287 return (bph); 1288 } 1289 1290 /* 1291 * Frames destined to a vnet-port or to the local vsw interface, must be 1292 * untagged if necessary before sending. This function first checks that the 1293 * frame can be sent to the destination in the vlan identified by the frame 1294 * tag. Note that when this function is invoked the frame must have been 1295 * already tagged (unless it is in the default-vlan). Because, this function is 1296 * called when the switching function determines the destination and invokes 1297 * its send function (vnet-port or vsw interface) and all frames would have 1298 * been tagged by this time (see comments in vsw_vlan_frame_pretag()). 1299 * 1300 * Arguments: 1301 * arg: destination device. 1302 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1303 * np: head of pkt chain to be validated and untagged. 1304 * npt: tail of pkt chain to be validated and untagged. 1305 * 1306 * Returns: 1307 * np: head of updated chain of packets 1308 * npt: tail of updated chain of packets 1309 * rv: count of the packets in the returned list 1310 */ 1311 uint32_t 1312 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt) 1313 { 1314 mblk_t *bp; 1315 mblk_t *bpt; 1316 mblk_t *bph; 1317 mblk_t *bpn; 1318 vsw_port_t *portp; 1319 vsw_t *vswp; 1320 uint32_t count; 1321 struct ether_header *ehp; 1322 boolean_t is_tagged; 1323 boolean_t rv; 1324 uint16_t vlan_id; 1325 uint16_t pvid; 1326 mod_hash_t *vlan_hashp; 1327 1328 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1329 1330 1331 if (type == VSW_LOCALDEV) { 1332 vswp = (vsw_t *)arg; 1333 pvid = vswp->pvid; 1334 vlan_hashp = vswp->vlan_hashp; 1335 portp = NULL; 1336 } else { 1337 /* type == VSW_VNETPORT */ 1338 portp = (vsw_port_t *)arg; 1339 vswp = portp->p_vswp; 1340 vlan_hashp = portp->vlan_hashp; 1341 pvid = portp->pvid; 1342 } 1343 1344 /* 1345 * If the MAC layer switching in place, then 1346 * untagging required only if the pvid is not 1347 * the same as default_vlan_id. This is because, 1348 * the MAC layer will send packets for the 1349 * registered vlans only. 1350 */ 1351 if ((vswp->mac_cl_switching == B_TRUE) && 1352 (pvid == vswp->default_vlan_id)) { 1353 /* simply count and set the tail */ 1354 count = 1; 1355 bp = *np; 1356 ASSERT(bp != NULL); 1357 while (bp->b_next != NULL) { 1358 bp = bp->b_next; 1359 count++; 1360 } 1361 *npt = bp; 1362 return (count); 1363 } 1364 1365 bpn = bph = bpt = NULL; 1366 count = 0; 1367 1368 for (bp = *np; bp != NULL; bp = bpn) { 1369 1370 bpn = bp->b_next; 1371 bp->b_next = bp->b_prev = NULL; 1372 1373 /* 1374 * Determine the vlan id that the frame belongs to. 1375 */ 1376 ehp = (struct ether_header *)bp->b_rptr; 1377 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id); 1378 1379 /* 1380 * If MAC layer switching in place, then we 1381 * need to untag only if the tagged packet has 1382 * vlan-id same as the pvid. 1383 */ 1384 if (vswp->mac_cl_switching == B_TRUE) { 1385 1386 /* only tagged packets expected here */ 1387 ASSERT(is_tagged == B_TRUE); 1388 if (vlan_id == pvid) { 1389 bp = vnet_vlan_remove_tag(bp); 1390 if (bp == NULL) { 1391 /* packet dropped */ 1392 continue; 1393 } 1394 } 1395 } else { /* No MAC layer switching */ 1396 1397 /* 1398 * Check the frame header if tag/untag is needed. 1399 */ 1400 if (is_tagged == B_FALSE) { 1401 /* 1402 * Untagged frame. We shouldn't have an 1403 * untagged packet at this point, unless 1404 * the destination's vlan id is 1405 * default-vlan-id; if it is not the 1406 * default-vlan-id, we drop the packet. 1407 */ 1408 if (vlan_id != vswp->default_vlan_id) { 1409 /* drop the packet */ 1410 freemsg(bp); 1411 continue; 1412 } 1413 } else { /* Tagged */ 1414 /* 1415 * Tagged frame, untag if it's the 1416 * destination's pvid. 1417 */ 1418 if (vlan_id == pvid) { 1419 1420 bp = vnet_vlan_remove_tag(bp); 1421 if (bp == NULL) { 1422 /* packet dropped */ 1423 continue; 1424 } 1425 } else { 1426 1427 /* 1428 * Check if the destination is in the 1429 * same vlan. 1430 */ 1431 rv = vsw_vlan_lookup(vlan_hashp, 1432 vlan_id); 1433 if (rv == B_FALSE) { 1434 /* drop the packet */ 1435 freemsg(bp); 1436 continue; 1437 } 1438 } 1439 1440 } 1441 } 1442 1443 /* build a chain of processed packets */ 1444 if (bph == NULL) { 1445 bph = bpt = bp; 1446 } else { 1447 bpt->b_next = bp; 1448 bpt = bp; 1449 } 1450 count++; 1451 } 1452 1453 *np = bph; 1454 *npt = bpt; 1455 return (count); 1456 } 1457 1458 /* 1459 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame, 1460 * then the vlan-id is available in the tag; otherwise, its vlan id is 1461 * implicitly obtained based on the caller (destination of the frame: 1462 * VSW_VNETPORT or VSW_LOCALDEV). 1463 * The vlan id determined is returned in vidp. 1464 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1465 */ 1466 boolean_t 1467 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 1468 uint16_t *vidp) 1469 { 1470 struct ether_vlan_header *evhp; 1471 vsw_t *vswp; 1472 vsw_port_t *portp; 1473 1474 /* If it's a tagged frame, get the vid from vlan header */ 1475 if (ehp->ether_type == ETHERTYPE_VLAN) { 1476 1477 evhp = (struct ether_vlan_header *)ehp; 1478 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1479 return (B_TRUE); 1480 } 1481 1482 /* Untagged frame; determine vlan id based on caller */ 1483 switch (caller) { 1484 1485 case VSW_VNETPORT: 1486 /* 1487 * packet destined to a vnet; vlan-id is pvid of vnet-port. 1488 */ 1489 portp = (vsw_port_t *)arg; 1490 *vidp = portp->pvid; 1491 break; 1492 1493 case VSW_LOCALDEV: 1494 1495 /* 1496 * packet destined to vsw interface; 1497 * vlan-id is port-vlan-id of vsw device. 1498 */ 1499 vswp = (vsw_t *)arg; 1500 *vidp = vswp->pvid; 1501 break; 1502 } 1503 1504 return (B_FALSE); 1505 } 1506 1507 /* 1508 * Add or remove multicast address(es). 1509 * 1510 * Returns 0 on success, 1 on failure. 1511 */ 1512 int 1513 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 1514 { 1515 mcst_addr_t *mcst_p = NULL; 1516 vsw_t *vswp = port->p_vswp; 1517 uint64_t addr = 0x0; 1518 int i; 1519 1520 D1(vswp, "%s: enter", __func__); 1521 1522 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 1523 1524 for (i = 0; i < mcst_pkt->count; i++) { 1525 /* 1526 * Convert address into form that can be used 1527 * as hash table key. 1528 */ 1529 KEY_HASH(addr, &(mcst_pkt->mca[i])); 1530 1531 /* 1532 * Add or delete the specified address/port combination. 1533 */ 1534 if (mcst_pkt->set == 0x1) { 1535 D3(vswp, "%s: adding multicast address 0x%llx for " 1536 "port %ld", __func__, addr, port->p_instance); 1537 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1538 /* 1539 * Update the list of multicast 1540 * addresses contained within the 1541 * port structure to include this new 1542 * one. 1543 */ 1544 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 1545 KM_NOSLEEP); 1546 if (mcst_p == NULL) { 1547 DERR(vswp, "%s: unable to alloc mem", 1548 __func__); 1549 (void) vsw_del_mcst(vswp, 1550 VSW_VNETPORT, addr, port); 1551 return (1); 1552 } 1553 1554 mcst_p->nextp = NULL; 1555 mcst_p->addr = addr; 1556 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 1557 1558 /* 1559 * Program the address into HW. If the addr 1560 * has already been programmed then the MAC 1561 * just increments a ref counter (which is 1562 * used when the address is being deleted) 1563 */ 1564 if (vsw_mac_multicast_add(vswp, port, mcst_p, 1565 VSW_VNETPORT)) { 1566 (void) vsw_del_mcst(vswp, 1567 VSW_VNETPORT, addr, port); 1568 kmem_free(mcst_p, sizeof (*mcst_p)); 1569 return (1); 1570 } 1571 1572 mutex_enter(&port->mca_lock); 1573 mcst_p->nextp = port->mcap; 1574 port->mcap = mcst_p; 1575 mutex_exit(&port->mca_lock); 1576 1577 } else { 1578 DERR(vswp, "%s: error adding multicast " 1579 "address 0x%llx for port %ld", 1580 __func__, addr, port->p_instance); 1581 return (1); 1582 } 1583 } else { 1584 /* 1585 * Delete an entry from the multicast hash 1586 * table and update the address list 1587 * appropriately. 1588 */ 1589 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1590 D3(vswp, "%s: deleting multicast address " 1591 "0x%llx for port %ld", __func__, addr, 1592 port->p_instance); 1593 1594 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 1595 ASSERT(mcst_p != NULL); 1596 1597 /* 1598 * Remove the address from HW. The address 1599 * will actually only be removed once the ref 1600 * count within the MAC layer has dropped to 1601 * zero. I.e. we can safely call this fn even 1602 * if other ports are interested in this 1603 * address. 1604 */ 1605 vsw_mac_multicast_remove(vswp, port, mcst_p, 1606 VSW_VNETPORT); 1607 kmem_free(mcst_p, sizeof (*mcst_p)); 1608 1609 } else { 1610 DERR(vswp, "%s: error deleting multicast " 1611 "addr 0x%llx for port %ld", 1612 __func__, addr, port->p_instance); 1613 return (1); 1614 } 1615 } 1616 } 1617 D1(vswp, "%s: exit", __func__); 1618 return (0); 1619 } 1620 1621 /* 1622 * Add a new multicast entry. 1623 * 1624 * Search hash table based on address. If match found then 1625 * update associated val (which is chain of ports), otherwise 1626 * create new key/val (addr/port) pair and insert into table. 1627 */ 1628 int 1629 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1630 { 1631 int dup = 0; 1632 int rv = 0; 1633 mfdb_ent_t *ment = NULL; 1634 mfdb_ent_t *tmp_ent = NULL; 1635 mfdb_ent_t *new_ent = NULL; 1636 void *tgt = NULL; 1637 1638 if (devtype == VSW_VNETPORT) { 1639 /* 1640 * Being invoked from a vnet. 1641 */ 1642 ASSERT(arg != NULL); 1643 tgt = arg; 1644 D2(NULL, "%s: port %d : address 0x%llx", __func__, 1645 ((vsw_port_t *)arg)->p_instance, addr); 1646 } else { 1647 /* 1648 * We are being invoked via the m_multicst mac entry 1649 * point. 1650 */ 1651 D2(NULL, "%s: address 0x%llx", __func__, addr); 1652 tgt = (void *)vswp; 1653 } 1654 1655 WRITE_ENTER(&vswp->mfdbrw); 1656 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1657 (mod_hash_val_t *)&ment) != 0) { 1658 1659 /* address not currently in table */ 1660 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1661 ment->d_addr = (void *)tgt; 1662 ment->d_type = devtype; 1663 ment->nextp = NULL; 1664 1665 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 1666 (mod_hash_val_t)ment) != 0) { 1667 DERR(vswp, "%s: hash table insertion failed", __func__); 1668 kmem_free(ment, sizeof (mfdb_ent_t)); 1669 rv = 1; 1670 } else { 1671 D2(vswp, "%s: added initial entry for 0x%llx to " 1672 "table", __func__, addr); 1673 } 1674 } else { 1675 /* 1676 * Address in table. Check to see if specified port 1677 * is already associated with the address. If not add 1678 * it now. 1679 */ 1680 tmp_ent = ment; 1681 while (tmp_ent != NULL) { 1682 if (tmp_ent->d_addr == (void *)tgt) { 1683 if (devtype == VSW_VNETPORT) { 1684 DERR(vswp, "%s: duplicate port entry " 1685 "found for portid %ld and key " 1686 "0x%llx", __func__, 1687 ((vsw_port_t *)arg)->p_instance, 1688 addr); 1689 } else { 1690 DERR(vswp, "%s: duplicate entry found" 1691 "for key 0x%llx", __func__, addr); 1692 } 1693 rv = 1; 1694 dup = 1; 1695 break; 1696 } 1697 tmp_ent = tmp_ent->nextp; 1698 } 1699 1700 /* 1701 * Port not on list so add it to end now. 1702 */ 1703 if (0 == dup) { 1704 D2(vswp, "%s: added entry for 0x%llx to table", 1705 __func__, addr); 1706 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1707 new_ent->d_addr = (void *)tgt; 1708 new_ent->d_type = devtype; 1709 new_ent->nextp = NULL; 1710 1711 tmp_ent = ment; 1712 while (tmp_ent->nextp != NULL) 1713 tmp_ent = tmp_ent->nextp; 1714 1715 tmp_ent->nextp = new_ent; 1716 } 1717 } 1718 1719 RW_EXIT(&vswp->mfdbrw); 1720 return (rv); 1721 } 1722 1723 /* 1724 * Remove a multicast entry from the hashtable. 1725 * 1726 * Search hash table based on address. If match found, scan 1727 * list of ports associated with address. If specified port 1728 * found remove it from list. 1729 */ 1730 int 1731 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1732 { 1733 mfdb_ent_t *ment = NULL; 1734 mfdb_ent_t *curr_p, *prev_p; 1735 void *tgt = NULL; 1736 1737 D1(vswp, "%s: enter", __func__); 1738 1739 if (devtype == VSW_VNETPORT) { 1740 tgt = (vsw_port_t *)arg; 1741 D2(vswp, "%s: removing port %d from mFDB for address" 1742 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 1743 } else { 1744 D2(vswp, "%s: removing entry", __func__); 1745 tgt = (void *)vswp; 1746 } 1747 1748 WRITE_ENTER(&vswp->mfdbrw); 1749 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1750 (mod_hash_val_t *)&ment) != 0) { 1751 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 1752 RW_EXIT(&vswp->mfdbrw); 1753 return (1); 1754 } 1755 1756 prev_p = curr_p = ment; 1757 1758 while (curr_p != NULL) { 1759 if (curr_p->d_addr == (void *)tgt) { 1760 if (devtype == VSW_VNETPORT) { 1761 D2(vswp, "%s: port %d found", __func__, 1762 ((vsw_port_t *)tgt)->p_instance); 1763 } else { 1764 D2(vswp, "%s: instance found", __func__); 1765 } 1766 1767 if (prev_p == curr_p) { 1768 /* 1769 * head of list, if no other element is in 1770 * list then destroy this entry, otherwise 1771 * just replace it with updated value. 1772 */ 1773 ment = curr_p->nextp; 1774 if (ment == NULL) { 1775 (void) mod_hash_destroy(vswp->mfdb, 1776 (mod_hash_val_t)addr); 1777 } else { 1778 (void) mod_hash_replace(vswp->mfdb, 1779 (mod_hash_key_t)addr, 1780 (mod_hash_val_t)ment); 1781 } 1782 } else { 1783 /* 1784 * Not head of list, no need to do 1785 * replacement, just adjust list pointers. 1786 */ 1787 prev_p->nextp = curr_p->nextp; 1788 } 1789 break; 1790 } 1791 1792 prev_p = curr_p; 1793 curr_p = curr_p->nextp; 1794 } 1795 1796 RW_EXIT(&vswp->mfdbrw); 1797 1798 D1(vswp, "%s: exit", __func__); 1799 1800 if (curr_p == NULL) 1801 return (1); 1802 kmem_free(curr_p, sizeof (mfdb_ent_t)); 1803 return (0); 1804 } 1805 1806 /* 1807 * Port is being deleted, but has registered an interest in one 1808 * or more multicast groups. Using the list of addresses maintained 1809 * within the port structure find the appropriate entry in the hash 1810 * table and remove this port from the list of interested ports. 1811 */ 1812 void 1813 vsw_del_mcst_port(vsw_port_t *port) 1814 { 1815 mcst_addr_t *mcap = NULL; 1816 vsw_t *vswp = port->p_vswp; 1817 1818 D1(vswp, "%s: enter", __func__); 1819 1820 mutex_enter(&port->mca_lock); 1821 1822 while ((mcap = port->mcap) != NULL) { 1823 1824 port->mcap = mcap->nextp; 1825 1826 mutex_exit(&port->mca_lock); 1827 1828 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 1829 mcap->addr, port); 1830 1831 /* 1832 * Remove the address from HW. The address 1833 * will actually only be removed once the ref 1834 * count within the MAC layer has dropped to 1835 * zero. I.e. we can safely call this fn even 1836 * if other ports are interested in this 1837 * address. 1838 */ 1839 vsw_mac_multicast_remove(vswp, port, mcap, VSW_VNETPORT); 1840 kmem_free(mcap, sizeof (*mcap)); 1841 1842 mutex_enter(&port->mca_lock); 1843 1844 } 1845 1846 mutex_exit(&port->mca_lock); 1847 1848 D1(vswp, "%s: exit", __func__); 1849 } 1850 1851 /* 1852 * This vsw instance is detaching, but has registered an interest in one 1853 * or more multicast groups. Using the list of addresses maintained 1854 * within the vsw structure find the appropriate entry in the hash 1855 * table and remove this instance from the list of interested ports. 1856 */ 1857 void 1858 vsw_del_mcst_vsw(vsw_t *vswp) 1859 { 1860 mcst_addr_t *next_p = NULL; 1861 1862 D1(vswp, "%s: enter", __func__); 1863 1864 mutex_enter(&vswp->mca_lock); 1865 1866 while (vswp->mcap != NULL) { 1867 DERR(vswp, "%s: deleting addr 0x%llx", 1868 __func__, vswp->mcap->addr); 1869 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 1870 1871 next_p = vswp->mcap->nextp; 1872 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 1873 vswp->mcap = next_p; 1874 } 1875 1876 vswp->mcap = NULL; 1877 mutex_exit(&vswp->mca_lock); 1878 1879 D1(vswp, "%s: exit", __func__); 1880 } 1881 1882 mblk_t * 1883 vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp) 1884 { 1885 mblk_t *bp; 1886 mblk_t *nbp; 1887 mblk_t *head = NULL; 1888 mblk_t *tail = NULL; 1889 mblk_t *prev = NULL; 1890 struct ether_header *behp; 1891 1892 /* process the chain of packets */ 1893 bp = *mpp; 1894 while (bp) { 1895 nbp = bp->b_next; 1896 behp = (struct ether_header *)bp->b_rptr; 1897 bp->b_prev = NULL; 1898 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 1899 if (prev == NULL) { 1900 *mpp = nbp; 1901 } else { 1902 prev->b_next = nbp; 1903 } 1904 bp->b_next = NULL; 1905 if (head == NULL) { 1906 head = tail = bp; 1907 } else { 1908 tail->b_next = bp; 1909 tail = bp; 1910 } 1911 } else { 1912 prev = bp; 1913 } 1914 bp = nbp; 1915 } 1916 return (head); 1917 } 1918 1919 static mblk_t * 1920 vsw_dupmsgchain(mblk_t *mp) 1921 { 1922 mblk_t *nmp = NULL; 1923 mblk_t **nmpp = &nmp; 1924 1925 for (; mp != NULL; mp = mp->b_next) { 1926 if ((*nmpp = dupmsg(mp)) == NULL) { 1927 freemsgchain(nmp); 1928 return (NULL); 1929 } 1930 1931 nmpp = &((*nmpp)->b_next); 1932 } 1933 1934 return (nmp); 1935 } 1936