1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/vlan.h> 72 73 /* Switching setup routines */ 74 void vsw_setup_switching_thread(void *arg); 75 int vsw_setup_switching_start(vsw_t *vswp); 76 void vsw_setup_switching_stop(vsw_t *vswp); 77 int vsw_setup_switching(vsw_t *); 78 void vsw_setup_layer2_post_process(vsw_t *vswp); 79 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 80 vsw_port_t *port, mac_resource_handle_t mrh); 81 static int vsw_setup_layer2(vsw_t *); 82 static int vsw_setup_layer3(vsw_t *); 83 84 /* Switching/data transmit routines */ 85 static void vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller, 86 vsw_port_t *port, mac_resource_handle_t); 87 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 88 vsw_port_t *port, mac_resource_handle_t); 89 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 90 vsw_port_t *port, mac_resource_handle_t); 91 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 92 int caller, vsw_port_t *port); 93 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 94 int caller, vsw_port_t *port); 95 96 /* VLAN routines */ 97 void vsw_create_vlans(void *arg, int type); 98 void vsw_destroy_vlans(void *arg, int type); 99 void vsw_vlan_add_ids(void *arg, int type); 100 void vsw_vlan_remove_ids(void *arg, int type); 101 static void vsw_vlan_create_hash(void *arg, int type); 102 static void vsw_vlan_destroy_hash(void *arg, int type); 103 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 104 uint16_t *vidp); 105 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 106 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt); 107 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 108 109 /* Forwarding database (FDB) routines */ 110 void vsw_fdbe_add(vsw_t *vswp, void *port); 111 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 112 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *); 113 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 114 115 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 116 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 117 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 118 void vsw_del_mcst_vsw(vsw_t *); 119 120 /* Support functions */ 121 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 122 static mblk_t *vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp); 123 124 125 /* 126 * Functions imported from other files. 127 */ 128 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *, int, vsw_port_t *); 129 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 130 extern int vsw_mac_open(vsw_t *vswp); 131 extern void vsw_mac_close(vsw_t *vswp); 132 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 133 mblk_t *mp, vsw_macrx_flags_t flags); 134 extern void vsw_set_addrs(vsw_t *vswp); 135 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp); 136 extern void vsw_hio_init(vsw_t *vswp); 137 extern void vsw_hio_start_ports(vsw_t *vswp); 138 extern int vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port, 139 mcst_addr_t *mcst_p, int type); 140 extern void vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port, 141 mcst_addr_t *mcst_p, int type); 142 143 /* 144 * Tunables used in this file. 145 */ 146 extern int vsw_setup_switching_delay; 147 extern uint32_t vsw_vlan_nchains; 148 extern uint32_t vsw_fdbe_refcnt_delay; 149 150 #define VSW_FDBE_REFHOLD(p) \ 151 { \ 152 atomic_inc_32(&(p)->refcnt); \ 153 ASSERT((p)->refcnt != 0); \ 154 } 155 156 #define VSW_FDBE_REFRELE(p) \ 157 { \ 158 ASSERT((p)->refcnt != 0); \ 159 atomic_dec_32(&(p)->refcnt); \ 160 } 161 162 /* 163 * Thread to setup switching mode. This thread is created during vsw_attach() 164 * initially. It invokes vsw_setup_switching() and keeps retrying while the 165 * returned value is EAGAIN. The thread exits when the switching mode setup is 166 * done successfully or when the error returned is not EAGAIN. This thread may 167 * also get created from vsw_update_md_prop() if the switching mode needs to be 168 * updated. 169 */ 170 void 171 vsw_setup_switching_thread(void *arg) 172 { 173 callb_cpr_t cprinfo; 174 vsw_t *vswp = (vsw_t *)arg; 175 clock_t wait_time; 176 clock_t xwait; 177 clock_t wait_rv; 178 int rv; 179 180 /* wait time used on successive retries */ 181 xwait = drv_usectohz(vsw_setup_switching_delay * MICROSEC); 182 183 CALLB_CPR_INIT(&cprinfo, &vswp->sw_thr_lock, callb_generic_cpr, 184 "vsw_setup_sw_thread"); 185 186 mutex_enter(&vswp->sw_thr_lock); 187 188 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) { 189 190 CALLB_CPR_SAFE_BEGIN(&cprinfo); 191 192 /* Wait for sometime before (re)trying setup_switching() */ 193 wait_time = ddi_get_lbolt() + xwait; 194 while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) { 195 wait_rv = cv_timedwait(&vswp->sw_thr_cv, 196 &vswp->sw_thr_lock, wait_time); 197 if (wait_rv == -1) { /* timed out */ 198 break; 199 } 200 } 201 202 CALLB_CPR_SAFE_END(&cprinfo, &vswp->sw_thr_lock) 203 204 if ((vswp->sw_thr_flags & VSW_SWTHR_STOP) != 0) { 205 /* 206 * If there is a stop request, process that first and 207 * exit the loop. Continue to hold the mutex which gets 208 * released in CALLB_CPR_EXIT(). 209 */ 210 break; 211 } 212 213 mutex_exit(&vswp->sw_thr_lock); 214 rv = vsw_setup_switching(vswp); 215 if (rv == 0) { 216 vsw_setup_layer2_post_process(vswp); 217 } 218 mutex_enter(&vswp->sw_thr_lock); 219 if (rv != EAGAIN) { 220 break; 221 } 222 223 } 224 225 vswp->sw_thr_flags &= ~VSW_SWTHR_STOP; 226 vswp->sw_thread = NULL; 227 CALLB_CPR_EXIT(&cprinfo); 228 thread_exit(); 229 } 230 231 /* 232 * Create a thread to setup the switching mode. 233 * Returns 0 on success; 1 on failure. 234 */ 235 int 236 vsw_setup_switching_start(vsw_t *vswp) 237 { 238 mutex_enter(&vswp->sw_thr_lock); 239 240 vswp->sw_thread = thread_create(NULL, 2 * DEFAULTSTKSZ, 241 vsw_setup_switching_thread, vswp, 0, &p0, TS_RUN, minclsyspri); 242 243 if (vswp->sw_thread == NULL) { 244 mutex_exit(&vswp->sw_thr_lock); 245 return (1); 246 } 247 248 mutex_exit(&vswp->sw_thr_lock); 249 return (0); 250 } 251 252 /* 253 * Stop the thread to setup switching mode. 254 */ 255 void 256 vsw_setup_switching_stop(vsw_t *vswp) 257 { 258 kt_did_t tid = 0; 259 260 /* 261 * Signal the setup_switching thread to stop and wait until it stops. 262 */ 263 mutex_enter(&vswp->sw_thr_lock); 264 265 if (vswp->sw_thread != NULL) { 266 tid = vswp->sw_thread->t_did; 267 vswp->sw_thr_flags |= VSW_SWTHR_STOP; 268 cv_signal(&vswp->sw_thr_cv); 269 } 270 271 mutex_exit(&vswp->sw_thr_lock); 272 273 if (tid != 0) 274 thread_join(tid); 275 276 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 277 278 vswp->mac_open_retries = 0; 279 } 280 281 /* 282 * Setup the required switching mode. 283 * Returns: 284 * 0 on success. 285 * EAGAIN if retry is needed. 286 * 1 on all other failures. 287 */ 288 int 289 vsw_setup_switching(vsw_t *vswp) 290 { 291 int rv = 1; 292 293 D1(vswp, "%s: enter", __func__); 294 295 /* 296 * Select best switching mode. 297 * This is done as this routine can be called from the timeout 298 * handler to retry setting up a specific mode. Currently only 299 * the function which sets up layer2/promisc mode returns EAGAIN 300 * if the underlying network device is not available yet, causing 301 * retries. 302 */ 303 if (vswp->smode & VSW_LAYER2) { 304 rv = vsw_setup_layer2(vswp); 305 } else if (vswp->smode & VSW_LAYER3) { 306 rv = vsw_setup_layer3(vswp); 307 } else { 308 DERR(vswp, "unknown switch mode"); 309 rv = 1; 310 } 311 312 if (rv && (rv != EAGAIN)) { 313 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 314 "switching mode", vswp->instance); 315 } else if (rv == 0) { 316 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 317 } 318 319 D2(vswp, "%s: Operating in mode %d", __func__, 320 vswp->smode); 321 322 D1(vswp, "%s: exit", __func__); 323 324 return (rv); 325 } 326 327 /* 328 * Setup for layer 2 switching. 329 * 330 * Returns: 331 * 0 on success. 332 * EAGAIN if retry is needed. 333 * EIO on all other failures. 334 */ 335 static int 336 vsw_setup_layer2(vsw_t *vswp) 337 { 338 int rv; 339 340 D1(vswp, "%s: enter", __func__); 341 342 /* 343 * Until the network device is successfully opened, 344 * set the switching to use vsw_switch_l2_frame. 345 */ 346 vswp->vsw_switch_frame = vsw_switch_l2_frame; 347 vswp->mac_cl_switching = B_FALSE; 348 349 rv = strlen(vswp->physname); 350 if (rv == 0) { 351 /* 352 * Physical device name is NULL, which is 353 * required for layer 2. 354 */ 355 cmn_err(CE_WARN, "!vsw%d: no network device name specified", 356 vswp->instance); 357 return (EIO); 358 } 359 360 mutex_enter(&vswp->mac_lock); 361 362 rv = vsw_mac_open(vswp); 363 if (rv != 0) { 364 if (rv != EAGAIN) { 365 cmn_err(CE_WARN, "!vsw%d: Unable to open network " 366 "device: %s\n", vswp->instance, vswp->physname); 367 } 368 mutex_exit(&vswp->mac_lock); 369 return (rv); 370 } 371 372 /* 373 * Now we can use the mac client switching, so set the switching 374 * function to use vsw_switch_l2_frame_mac_client(), which simply 375 * sends the packets to MAC layer for switching. 376 */ 377 vswp->vsw_switch_frame = vsw_switch_l2_frame_mac_client; 378 vswp->mac_cl_switching = B_TRUE; 379 380 D1(vswp, "%s: exit", __func__); 381 382 /* Initialize HybridIO related stuff */ 383 vsw_hio_init(vswp); 384 385 mutex_exit(&vswp->mac_lock); 386 return (0); 387 388 exit_error: 389 vsw_mac_close(vswp); 390 mutex_exit(&vswp->mac_lock); 391 return (EIO); 392 } 393 394 static int 395 vsw_setup_layer3(vsw_t *vswp) 396 { 397 D1(vswp, "%s: enter", __func__); 398 399 D2(vswp, "%s: operating in layer 3 mode", __func__); 400 vswp->vsw_switch_frame = vsw_switch_l3_frame; 401 402 D1(vswp, "%s: exit", __func__); 403 404 return (0); 405 } 406 407 /* ARGSUSED */ 408 void 409 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port, 410 mac_resource_handle_t mrh) 411 { 412 freemsgchain(mp); 413 } 414 415 /* 416 * Use mac client for layer 2 switching . 417 */ 418 static void 419 vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller, 420 vsw_port_t *port, mac_resource_handle_t mrh) 421 { 422 _NOTE(ARGUNUSED(mrh)) 423 424 mblk_t *ret_m; 425 426 /* 427 * This switching function is expected to be called by 428 * the ports or the interface only. The packets from 429 * physical interface already switched. 430 */ 431 ASSERT((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV)); 432 433 if ((ret_m = vsw_tx_msg(vswp, mp, caller, port)) != NULL) { 434 DERR(vswp, "%s: drop mblks to " 435 "phys dev", __func__); 436 freemsgchain(ret_m); 437 } 438 } 439 440 /* 441 * Switch the given ethernet frame when operating in layer 2 mode. 442 * 443 * vswp: pointer to the vsw instance 444 * mp: pointer to chain of ethernet frame(s) to be switched 445 * caller: identifies the source of this frame as: 446 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 447 * 2. VSW_PHYSDEV - the physical ethernet device 448 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 449 * arg: argument provided by the caller. 450 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 451 * 2. for PHYSDEV - NULL 452 * 3. for LOCALDEV - pointer to to this vsw_t(self) 453 */ 454 void 455 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 456 vsw_port_t *arg, mac_resource_handle_t mrh) 457 { 458 struct ether_header *ehp; 459 mblk_t *bp, *ret_m; 460 vsw_fdbe_t *fp; 461 462 D1(vswp, "%s: enter (caller %d)", __func__, caller); 463 464 /* 465 * PERF: rather than breaking up the chain here, scan it 466 * to find all mblks heading to same destination and then 467 * pass that sub-chain to the lower transmit functions. 468 */ 469 470 /* process the chain of packets */ 471 bp = mp; 472 while (bp) { 473 ehp = (struct ether_header *)bp->b_rptr; 474 mp = vsw_get_same_dest_list(ehp, &bp); 475 ASSERT(mp != NULL); 476 477 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 478 __func__, MBLKSIZE(mp), MBLKL(mp)); 479 480 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 481 /* 482 * If destination is VSW_LOCALDEV (vsw as an eth 483 * interface) and if the device is up & running, 484 * send the packet up the stack on this host. 485 * If the virtual interface is down, drop the packet. 486 */ 487 if (caller != VSW_LOCALDEV) { 488 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 489 } else { 490 freemsgchain(mp); 491 } 492 continue; 493 } 494 495 /* 496 * Find fdb entry for the destination 497 * and hold a reference to it. 498 */ 499 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 500 if (fp != NULL) { 501 502 /* 503 * If plumbed and in promisc mode then copy msg 504 * and send up the stack. 505 */ 506 vsw_mac_rx(vswp, mrh, mp, 507 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 508 509 /* 510 * If the destination is in FDB, the packet 511 * should be forwarded to the correponding 512 * vsw_port (connected to a vnet device - 513 * VSW_VNETPORT) 514 */ 515 (void) vsw_portsend(fp->portp, mp); 516 517 /* Release the reference on the fdb entry */ 518 VSW_FDBE_REFRELE(fp); 519 } else { 520 /* 521 * Destination not in FDB. 522 * 523 * If the destination is broadcast or 524 * multicast forward the packet to all 525 * (VNETPORTs, PHYSDEV, LOCALDEV), 526 * except the caller. 527 */ 528 if (IS_BROADCAST(ehp)) { 529 D2(vswp, "%s: BROADCAST pkt", __func__); 530 (void) vsw_forward_all(vswp, mp, caller, arg); 531 } else if (IS_MULTICAST(ehp)) { 532 D2(vswp, "%s: MULTICAST pkt", __func__); 533 (void) vsw_forward_grp(vswp, mp, caller, arg); 534 } else { 535 /* 536 * If the destination is unicast, and came 537 * from either a logical network device or 538 * the switch itself when it is plumbed, then 539 * send it out on the physical device and also 540 * up the stack if the logical interface is 541 * in promiscious mode. 542 * 543 * NOTE: The assumption here is that if we 544 * cannot find the destination in our fdb, its 545 * a unicast address, and came from either a 546 * vnet or down the stack (when plumbed) it 547 * must be destinded for an ethernet device 548 * outside our ldoms. 549 */ 550 if (caller == VSW_VNETPORT) { 551 /* promisc check copy etc */ 552 vsw_mac_rx(vswp, mrh, mp, 553 VSW_MACRX_PROMISC | 554 VSW_MACRX_COPYMSG); 555 556 if ((ret_m = vsw_tx_msg(vswp, mp, 557 caller, arg)) != NULL) { 558 DERR(vswp, "%s: drop mblks to " 559 "phys dev", __func__); 560 freemsgchain(ret_m); 561 } 562 563 } else if (caller == VSW_PHYSDEV) { 564 /* 565 * Pkt seen because card in promisc 566 * mode. Send up stack if plumbed in 567 * promisc mode, else drop it. 568 */ 569 vsw_mac_rx(vswp, mrh, mp, 570 VSW_MACRX_PROMISC | 571 VSW_MACRX_FREEMSG); 572 573 } else if (caller == VSW_LOCALDEV) { 574 /* 575 * Pkt came down the stack, send out 576 * over physical device. 577 */ 578 if ((ret_m = vsw_tx_msg(vswp, mp, 579 caller, NULL)) != NULL) { 580 DERR(vswp, "%s: drop mblks to " 581 "phys dev", __func__); 582 freemsgchain(ret_m); 583 } 584 } 585 } 586 } 587 } 588 D1(vswp, "%s: exit\n", __func__); 589 } 590 591 /* 592 * Switch ethernet frame when in layer 3 mode (i.e. using IP 593 * layer to do the routing). 594 * 595 * There is a large amount of overlap between this function and 596 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 597 * both these functions. 598 */ 599 void 600 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 601 vsw_port_t *arg, mac_resource_handle_t mrh) 602 { 603 struct ether_header *ehp; 604 mblk_t *bp = NULL; 605 vsw_fdbe_t *fp; 606 607 D1(vswp, "%s: enter (caller %d)", __func__, caller); 608 609 /* 610 * In layer 3 mode should only ever be switching packets 611 * between IP layer and vnet devices. So make sure thats 612 * who is invoking us. 613 */ 614 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 615 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 616 freemsgchain(mp); 617 return; 618 } 619 620 /* process the chain of packets */ 621 bp = mp; 622 while (bp) { 623 ehp = (struct ether_header *)bp->b_rptr; 624 mp = vsw_get_same_dest_list(ehp, &bp); 625 ASSERT(mp != NULL); 626 627 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 628 __func__, MBLKSIZE(mp), MBLKL(mp)); 629 630 /* 631 * Find fdb entry for the destination 632 * and hold a reference to it. 633 */ 634 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 635 if (fp != NULL) { 636 637 D2(vswp, "%s: sending to target port", __func__); 638 (void) vsw_portsend(fp->portp, mp); 639 640 /* Release the reference on the fdb entry */ 641 VSW_FDBE_REFRELE(fp); 642 } else { 643 /* 644 * Destination not in FDB 645 * 646 * If the destination is broadcast or 647 * multicast forward the packet to all 648 * (VNETPORTs, PHYSDEV, LOCALDEV), 649 * except the caller. 650 */ 651 if (IS_BROADCAST(ehp)) { 652 D2(vswp, "%s: BROADCAST pkt", __func__); 653 (void) vsw_forward_all(vswp, mp, caller, arg); 654 } else if (IS_MULTICAST(ehp)) { 655 D2(vswp, "%s: MULTICAST pkt", __func__); 656 (void) vsw_forward_grp(vswp, mp, caller, arg); 657 } else { 658 /* 659 * Unicast pkt from vnet that we don't have 660 * an FDB entry for, so must be destinded for 661 * the outside world. Attempt to send up to the 662 * IP layer to allow it to deal with it. 663 */ 664 if (caller == VSW_VNETPORT) { 665 vsw_mac_rx(vswp, mrh, 666 mp, VSW_MACRX_FREEMSG); 667 } 668 } 669 } 670 } 671 672 D1(vswp, "%s: exit", __func__); 673 } 674 675 /* 676 * Setup mac addrs and hio resources for layer 2 switching only. 677 */ 678 void 679 vsw_setup_layer2_post_process(vsw_t *vswp) 680 { 681 if (vswp->smode & VSW_LAYER2) { 682 /* 683 * Program unicst, mcst addrs of vsw 684 * interface and ports in the physdev. 685 */ 686 vsw_set_addrs(vswp); 687 688 /* Start HIO for ports that have already connected */ 689 vsw_hio_start_ports(vswp); 690 } 691 } 692 693 /* 694 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 695 * except the caller (port on which frame arrived). 696 */ 697 static int 698 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 699 { 700 vsw_port_list_t *plist = &vswp->plist; 701 vsw_port_t *portp; 702 mblk_t *nmp = NULL; 703 mblk_t *ret_m = NULL; 704 int skip_port = 0; 705 706 D1(vswp, "vsw_forward_all: enter\n"); 707 708 /* 709 * Broadcast message from inside ldoms so send to outside 710 * world if in either of layer 2 modes. 711 */ 712 if ((vswp->smode & VSW_LAYER2) && 713 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 714 715 nmp = vsw_dupmsgchain(mp); 716 if (nmp) { 717 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg)) 718 != NULL) { 719 DERR(vswp, "%s: dropping pkt(s) " 720 "consisting of %ld bytes of data for" 721 " physical device", __func__, MBLKL(ret_m)); 722 freemsgchain(ret_m); 723 } 724 } 725 } 726 727 if (caller == VSW_VNETPORT) 728 skip_port = 1; 729 730 /* 731 * Broadcast message from other vnet (layer 2 or 3) or outside 732 * world (layer 2 only), send up stack if plumbed. 733 */ 734 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 735 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 736 } 737 738 /* send it to all VNETPORTs */ 739 READ_ENTER(&plist->lockrw); 740 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 741 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 742 /* 743 * Caution ! - don't reorder these two checks as arg 744 * will be NULL if the caller is PHYSDEV. skip_port is 745 * only set if caller is VNETPORT. 746 */ 747 if ((skip_port) && (portp == arg)) { 748 continue; 749 } else { 750 nmp = vsw_dupmsgchain(mp); 751 if (nmp) { 752 /* 753 * The plist->lockrw is protecting the 754 * portp from getting destroyed here. 755 * So, no ref_cnt is incremented here. 756 */ 757 (void) vsw_portsend(portp, nmp); 758 } else { 759 DERR(vswp, "vsw_forward_all: nmp NULL"); 760 } 761 } 762 } 763 RW_EXIT(&plist->lockrw); 764 765 freemsgchain(mp); 766 767 D1(vswp, "vsw_forward_all: exit\n"); 768 return (0); 769 } 770 771 /* 772 * Forward pkts to any devices or interfaces which have registered 773 * an interest in them (i.e. multicast groups). 774 */ 775 static int 776 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 777 { 778 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 779 mfdb_ent_t *entp = NULL; 780 mfdb_ent_t *tpp = NULL; 781 vsw_port_t *port; 782 uint64_t key = 0; 783 mblk_t *nmp = NULL; 784 mblk_t *ret_m = NULL; 785 boolean_t check_if = B_TRUE; 786 787 /* 788 * Convert address to hash table key 789 */ 790 KEY_HASH(key, &ehp->ether_dhost); 791 792 D1(vswp, "%s: key 0x%llx", __func__, key); 793 794 /* 795 * If pkt came from either a vnet or down the stack (if we are 796 * plumbed) and we are in layer 2 mode, then we send the pkt out 797 * over the physical adapter, and then check to see if any other 798 * vnets are interested in it. 799 */ 800 if ((vswp->smode & VSW_LAYER2) && 801 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 802 nmp = vsw_dupmsgchain(mp); 803 if (nmp) { 804 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg)) 805 != NULL) { 806 DERR(vswp, "%s: dropping pkt(s) consisting of " 807 "%ld bytes of data for physical device", 808 __func__, MBLKL(ret_m)); 809 freemsgchain(ret_m); 810 } 811 } 812 } 813 814 READ_ENTER(&vswp->mfdbrw); 815 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 816 (mod_hash_val_t *)&entp) != 0) { 817 D3(vswp, "%s: no table entry found for addr 0x%llx", 818 __func__, key); 819 } else { 820 /* 821 * Send to list of devices associated with this address... 822 */ 823 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 824 825 /* dont send to ourselves */ 826 if ((caller == VSW_VNETPORT) && 827 (tpp->d_addr == (void *)arg)) { 828 port = (vsw_port_t *)tpp->d_addr; 829 D3(vswp, "%s: not sending to ourselves" 830 " : port %d", __func__, port->p_instance); 831 continue; 832 833 } else if ((caller == VSW_LOCALDEV) && 834 (tpp->d_type == VSW_LOCALDEV)) { 835 D2(vswp, "%s: not sending back up stack", 836 __func__); 837 continue; 838 } 839 840 if (tpp->d_type == VSW_VNETPORT) { 841 port = (vsw_port_t *)tpp->d_addr; 842 D3(vswp, "%s: sending to port %ld for addr " 843 "0x%llx", __func__, port->p_instance, key); 844 845 nmp = vsw_dupmsgchain(mp); 846 if (nmp) { 847 /* 848 * The vswp->mfdbrw is protecting the 849 * portp from getting destroyed here. 850 * So, no ref_cnt is incremented here. 851 */ 852 (void) vsw_portsend(port, nmp); 853 } 854 } else { 855 vsw_mac_rx(vswp, NULL, 856 mp, VSW_MACRX_COPYMSG); 857 D2(vswp, "%s: sending up stack" 858 " for addr 0x%llx", __func__, key); 859 check_if = B_FALSE; 860 } 861 } 862 } 863 864 RW_EXIT(&vswp->mfdbrw); 865 866 /* 867 * If the pkt came from either a vnet or from physical device, 868 * and if we havent already sent the pkt up the stack then we 869 * check now if we can/should (i.e. the interface is plumbed 870 * and in promisc mode). 871 */ 872 if ((check_if) && 873 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 874 vsw_mac_rx(vswp, NULL, mp, 875 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 876 } 877 878 freemsgchain(mp); 879 880 D1(vswp, "%s: exit", __func__); 881 882 return (0); 883 } 884 885 /* 886 * This function creates the vlan id hash table for the given vsw device or 887 * port. It then adds each vlan that the device or port has been assigned, 888 * into this hash table. 889 * Arguments: 890 * arg: vsw device or port. 891 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 892 */ 893 void 894 vsw_create_vlans(void *arg, int type) 895 { 896 /* create vlan hash table */ 897 vsw_vlan_create_hash(arg, type); 898 899 /* add vlan ids of the vsw device into its hash table */ 900 vsw_vlan_add_ids(arg, type); 901 } 902 903 /* 904 * This function removes the vlan ids of the vsw device or port from its hash 905 * table. It then destroys the vlan hash table. 906 * Arguments: 907 * arg: vsw device or port. 908 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 909 */ 910 void 911 vsw_destroy_vlans(void *arg, int type) 912 { 913 /* remove vlan ids from the hash table */ 914 vsw_vlan_remove_ids(arg, type); 915 916 /* destroy vlan-hash-table */ 917 vsw_vlan_destroy_hash(arg, type); 918 } 919 920 /* 921 * Create a vlan-id hash table for the given vsw device or port. 922 */ 923 static void 924 vsw_vlan_create_hash(void *arg, int type) 925 { 926 char hashname[MAXNAMELEN]; 927 928 if (type == VSW_LOCALDEV) { 929 vsw_t *vswp = (vsw_t *)arg; 930 931 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash", 932 vswp->instance); 933 934 vswp->vlan_nchains = vsw_vlan_nchains; 935 vswp->vlan_hashp = mod_hash_create_idhash(hashname, 936 vswp->vlan_nchains, mod_hash_null_valdtor); 937 938 } else if (type == VSW_VNETPORT) { 939 vsw_port_t *portp = (vsw_port_t *)arg; 940 941 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 942 portp->p_instance); 943 944 portp->vlan_nchains = vsw_vlan_nchains; 945 portp->vlan_hashp = mod_hash_create_idhash(hashname, 946 portp->vlan_nchains, mod_hash_null_valdtor); 947 948 } else { 949 return; 950 } 951 } 952 953 /* 954 * Destroy the vlan-id hash table for the given vsw device or port. 955 */ 956 static void 957 vsw_vlan_destroy_hash(void *arg, int type) 958 { 959 if (type == VSW_LOCALDEV) { 960 vsw_t *vswp = (vsw_t *)arg; 961 962 mod_hash_destroy_hash(vswp->vlan_hashp); 963 vswp->vlan_nchains = 0; 964 } else if (type == VSW_VNETPORT) { 965 vsw_port_t *portp = (vsw_port_t *)arg; 966 967 mod_hash_destroy_hash(portp->vlan_hashp); 968 portp->vlan_nchains = 0; 969 } else { 970 return; 971 } 972 } 973 974 /* 975 * Add vlan ids of the given vsw device or port into its hash table. 976 */ 977 void 978 vsw_vlan_add_ids(void *arg, int type) 979 { 980 int rv; 981 int i; 982 983 if (type == VSW_LOCALDEV) { 984 vsw_t *vswp = (vsw_t *)arg; 985 986 rv = mod_hash_insert(vswp->vlan_hashp, 987 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 988 (mod_hash_val_t)B_TRUE); 989 if (rv != 0) { 990 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for " 991 "the interface", vswp->instance, vswp->pvid); 992 } 993 994 for (i = 0; i < vswp->nvids; i++) { 995 rv = mod_hash_insert(vswp->vlan_hashp, 996 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i].vl_vid), 997 (mod_hash_val_t)B_TRUE); 998 if (rv != 0) { 999 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)" 1000 " for the interface", vswp->instance, 1001 vswp->pvid); 1002 } 1003 } 1004 1005 } else if (type == VSW_VNETPORT) { 1006 vsw_port_t *portp = (vsw_port_t *)arg; 1007 vsw_t *vswp = portp->p_vswp; 1008 1009 rv = mod_hash_insert(portp->vlan_hashp, 1010 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1011 (mod_hash_val_t)B_TRUE); 1012 if (rv != 0) { 1013 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for " 1014 "the port(%d)", vswp->instance, vswp->pvid, 1015 portp->p_instance); 1016 } 1017 1018 for (i = 0; i < portp->nvids; i++) { 1019 rv = mod_hash_insert(portp->vlan_hashp, 1020 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i].vl_vid), 1021 (mod_hash_val_t)B_TRUE); 1022 if (rv != 0) { 1023 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)" 1024 " for the port(%d)", vswp->instance, 1025 vswp->pvid, portp->p_instance); 1026 } 1027 } 1028 1029 } 1030 } 1031 1032 /* 1033 * Remove vlan ids of the given vsw device or port from its hash table. 1034 */ 1035 void 1036 vsw_vlan_remove_ids(void *arg, int type) 1037 { 1038 mod_hash_val_t vp; 1039 int rv; 1040 int i; 1041 1042 if (type == VSW_LOCALDEV) { 1043 vsw_t *vswp = (vsw_t *)arg; 1044 1045 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid); 1046 if (rv == B_TRUE) { 1047 rv = mod_hash_remove(vswp->vlan_hashp, 1048 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 1049 (mod_hash_val_t *)&vp); 1050 ASSERT(rv == 0); 1051 } 1052 1053 for (i = 0; i < vswp->nvids; i++) { 1054 rv = vsw_vlan_lookup(vswp->vlan_hashp, 1055 vswp->vids[i].vl_vid); 1056 if (rv == B_TRUE) { 1057 rv = mod_hash_remove(vswp->vlan_hashp, 1058 (mod_hash_key_t)VLAN_ID_KEY( 1059 vswp->vids[i].vl_vid), 1060 (mod_hash_val_t *)&vp); 1061 ASSERT(rv == 0); 1062 } 1063 } 1064 1065 } else if (type == VSW_VNETPORT) { 1066 vsw_port_t *portp = (vsw_port_t *)arg; 1067 1068 portp = (vsw_port_t *)arg; 1069 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid); 1070 if (rv == B_TRUE) { 1071 rv = mod_hash_remove(portp->vlan_hashp, 1072 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1073 (mod_hash_val_t *)&vp); 1074 ASSERT(rv == 0); 1075 } 1076 1077 for (i = 0; i < portp->nvids; i++) { 1078 rv = vsw_vlan_lookup(portp->vlan_hashp, 1079 portp->vids[i].vl_vid); 1080 if (rv == B_TRUE) { 1081 rv = mod_hash_remove(portp->vlan_hashp, 1082 (mod_hash_key_t)VLAN_ID_KEY( 1083 portp->vids[i].vl_vid), 1084 (mod_hash_val_t *)&vp); 1085 ASSERT(rv == 0); 1086 } 1087 } 1088 1089 } else { 1090 return; 1091 } 1092 } 1093 1094 /* 1095 * Find the given vlan id in the hash table. 1096 * Return: B_TRUE if the id is found; B_FALSE if not found. 1097 */ 1098 boolean_t 1099 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1100 { 1101 int rv; 1102 mod_hash_val_t vp; 1103 1104 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1105 1106 if (rv != 0) 1107 return (B_FALSE); 1108 1109 return (B_TRUE); 1110 } 1111 1112 /* 1113 * Add an entry into FDB for the given vsw. 1114 */ 1115 void 1116 vsw_fdbe_add(vsw_t *vswp, void *port) 1117 { 1118 uint64_t addr = 0; 1119 vsw_port_t *portp; 1120 vsw_fdbe_t *fp; 1121 int rv; 1122 1123 portp = (vsw_port_t *)port; 1124 KEY_HASH(addr, &portp->p_macaddr); 1125 1126 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP); 1127 fp->portp = port; 1128 1129 /* 1130 * Note: duplicate keys will be rejected by mod_hash. 1131 */ 1132 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr, 1133 (mod_hash_val_t)fp); 1134 if (rv != 0) { 1135 cmn_err(CE_WARN, "vsw%d: Duplicate mac-address(%s) for " 1136 "the port(%d)", vswp->instance, 1137 ether_sprintf(&portp->p_macaddr), portp->p_instance); 1138 } 1139 } 1140 1141 /* 1142 * Remove an entry from FDB. 1143 */ 1144 void 1145 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr) 1146 { 1147 uint64_t addr = 0; 1148 vsw_fdbe_t *fp; 1149 int rv; 1150 1151 KEY_HASH(addr, eaddr); 1152 1153 /* 1154 * Remove the entry from fdb hash table. 1155 * This prevents further references to this fdb entry. 1156 */ 1157 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr, 1158 (mod_hash_val_t *)&fp); 1159 if (rv != 0) { 1160 /* invalid key? */ 1161 return; 1162 } 1163 1164 /* 1165 * If there are threads already ref holding before the entry was 1166 * removed from hash table, then wait for ref count to drop to zero. 1167 */ 1168 while (fp->refcnt != 0) { 1169 delay(drv_usectohz(vsw_fdbe_refcnt_delay)); 1170 } 1171 1172 kmem_free(fp, sizeof (*fp)); 1173 } 1174 1175 /* 1176 * Search fdb for a given mac address. If an entry is found, hold 1177 * a reference to it and return the entry, else returns NULL. 1178 */ 1179 static vsw_fdbe_t * 1180 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp) 1181 { 1182 uint64_t key = 0; 1183 vsw_fdbe_t *fp; 1184 int rv; 1185 1186 KEY_HASH(key, addrp); 1187 1188 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key, 1189 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb); 1190 1191 if (rv != 0) 1192 return (NULL); 1193 1194 return (fp); 1195 } 1196 1197 /* 1198 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1199 * entry corresponding to the key (macaddr), this callback will be invoked by 1200 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1201 * entry before returning the found entry. 1202 */ 1203 static void 1204 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1205 { 1206 _NOTE(ARGUNUSED(key)) 1207 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val); 1208 } 1209 1210 /* 1211 * A given frame must be always tagged with the appropriate vlan id (unless it 1212 * is in the default-vlan) before the mac address switching function is called. 1213 * Otherwise, after switching function determines the destination, we cannot 1214 * figure out if the destination belongs to the the same vlan that the frame 1215 * originated from and if it needs tag/untag. Frames which are inbound from 1216 * the external(physical) network over a vlan trunk link are always tagged. 1217 * However frames which are received from a vnet-port over ldc or frames which 1218 * are coming down the stack on the service domain over vsw interface may be 1219 * untagged. These frames must be tagged with the appropriate pvid of the 1220 * sender (vnet-port or vsw device), before invoking the switching function. 1221 * 1222 * Arguments: 1223 * arg: caller of the function. 1224 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1225 * mp: frame(s) to be tagged. 1226 */ 1227 mblk_t * 1228 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp) 1229 { 1230 vsw_t *vswp; 1231 vsw_port_t *portp; 1232 struct ether_header *ehp; 1233 mblk_t *bp; 1234 mblk_t *bpt; 1235 mblk_t *bph; 1236 mblk_t *bpn; 1237 uint16_t pvid; 1238 1239 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1240 1241 if (type == VSW_LOCALDEV) { 1242 vswp = (vsw_t *)arg; 1243 pvid = vswp->pvid; 1244 portp = NULL; 1245 } else { 1246 /* VSW_VNETPORT */ 1247 portp = (vsw_port_t *)arg; 1248 pvid = portp->pvid; 1249 vswp = portp->p_vswp; 1250 } 1251 1252 bpn = bph = bpt = NULL; 1253 1254 for (bp = mp; bp != NULL; bp = bpn) { 1255 1256 bpn = bp->b_next; 1257 bp->b_next = bp->b_prev = NULL; 1258 1259 /* Determine if it is an untagged frame */ 1260 ehp = (struct ether_header *)bp->b_rptr; 1261 1262 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */ 1263 1264 /* no need to tag if the frame is in default vlan */ 1265 if (pvid != vswp->default_vlan_id) { 1266 bp = vnet_vlan_insert_tag(bp, pvid); 1267 if (bp == NULL) { 1268 continue; 1269 } 1270 } 1271 } 1272 1273 /* build a chain of processed packets */ 1274 if (bph == NULL) { 1275 bph = bpt = bp; 1276 } else { 1277 bpt->b_next = bp; 1278 bpt = bp; 1279 } 1280 1281 } 1282 1283 return (bph); 1284 } 1285 1286 /* 1287 * Frames destined to a vnet-port or to the local vsw interface, must be 1288 * untagged if necessary before sending. This function first checks that the 1289 * frame can be sent to the destination in the vlan identified by the frame 1290 * tag. Note that when this function is invoked the frame must have been 1291 * already tagged (unless it is in the default-vlan). Because, this function is 1292 * called when the switching function determines the destination and invokes 1293 * its send function (vnet-port or vsw interface) and all frames would have 1294 * been tagged by this time (see comments in vsw_vlan_frame_pretag()). 1295 * 1296 * Arguments: 1297 * arg: destination device. 1298 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1299 * np: head of pkt chain to be validated and untagged. 1300 * npt: tail of pkt chain to be validated and untagged. 1301 * 1302 * Returns: 1303 * np: head of updated chain of packets 1304 * npt: tail of updated chain of packets 1305 * rv: count of the packets in the returned list 1306 */ 1307 uint32_t 1308 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt) 1309 { 1310 mblk_t *bp; 1311 mblk_t *bpt; 1312 mblk_t *bph; 1313 mblk_t *bpn; 1314 vsw_port_t *portp; 1315 vsw_t *vswp; 1316 uint32_t count; 1317 struct ether_header *ehp; 1318 boolean_t is_tagged; 1319 boolean_t rv; 1320 uint16_t vlan_id; 1321 uint16_t pvid; 1322 mod_hash_t *vlan_hashp; 1323 1324 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1325 1326 1327 if (type == VSW_LOCALDEV) { 1328 vswp = (vsw_t *)arg; 1329 pvid = vswp->pvid; 1330 vlan_hashp = vswp->vlan_hashp; 1331 portp = NULL; 1332 } else { 1333 /* type == VSW_VNETPORT */ 1334 portp = (vsw_port_t *)arg; 1335 vswp = portp->p_vswp; 1336 vlan_hashp = portp->vlan_hashp; 1337 pvid = portp->pvid; 1338 } 1339 1340 /* 1341 * If the MAC layer switching in place, then 1342 * untagging required only if the pvid is not 1343 * the same as default_vlan_id. This is because, 1344 * the MAC layer will send packets for the 1345 * registered vlans only. 1346 */ 1347 if ((vswp->mac_cl_switching == B_TRUE) && 1348 (pvid == vswp->default_vlan_id)) { 1349 /* simply count and set the tail */ 1350 count = 1; 1351 bp = *np; 1352 ASSERT(bp != NULL); 1353 while (bp->b_next != NULL) { 1354 bp = bp->b_next; 1355 count++; 1356 } 1357 *npt = bp; 1358 return (count); 1359 } 1360 1361 bpn = bph = bpt = NULL; 1362 count = 0; 1363 1364 for (bp = *np; bp != NULL; bp = bpn) { 1365 1366 bpn = bp->b_next; 1367 bp->b_next = bp->b_prev = NULL; 1368 1369 /* 1370 * Determine the vlan id that the frame belongs to. 1371 */ 1372 ehp = (struct ether_header *)bp->b_rptr; 1373 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id); 1374 1375 /* 1376 * If MAC layer switching in place, then we 1377 * need to untag only if the tagged packet has 1378 * vlan-id same as the pvid. 1379 */ 1380 if (vswp->mac_cl_switching == B_TRUE) { 1381 1382 /* only tagged packets expected here */ 1383 ASSERT(is_tagged == B_TRUE); 1384 if (vlan_id == pvid) { 1385 bp = vnet_vlan_remove_tag(bp); 1386 if (bp == NULL) { 1387 /* packet dropped */ 1388 continue; 1389 } 1390 } 1391 } else { /* No MAC layer switching */ 1392 1393 /* 1394 * Check the frame header if tag/untag is needed. 1395 */ 1396 if (is_tagged == B_FALSE) { 1397 /* 1398 * Untagged frame. We shouldn't have an 1399 * untagged packet at this point, unless 1400 * the destination's vlan id is 1401 * default-vlan-id; if it is not the 1402 * default-vlan-id, we drop the packet. 1403 */ 1404 if (vlan_id != vswp->default_vlan_id) { 1405 /* drop the packet */ 1406 freemsg(bp); 1407 continue; 1408 } 1409 } else { /* Tagged */ 1410 /* 1411 * Tagged frame, untag if it's the 1412 * destination's pvid. 1413 */ 1414 if (vlan_id == pvid) { 1415 1416 bp = vnet_vlan_remove_tag(bp); 1417 if (bp == NULL) { 1418 /* packet dropped */ 1419 continue; 1420 } 1421 } else { 1422 1423 /* 1424 * Check if the destination is in the 1425 * same vlan. 1426 */ 1427 rv = vsw_vlan_lookup(vlan_hashp, 1428 vlan_id); 1429 if (rv == B_FALSE) { 1430 /* drop the packet */ 1431 freemsg(bp); 1432 continue; 1433 } 1434 } 1435 1436 } 1437 } 1438 1439 /* build a chain of processed packets */ 1440 if (bph == NULL) { 1441 bph = bpt = bp; 1442 } else { 1443 bpt->b_next = bp; 1444 bpt = bp; 1445 } 1446 count++; 1447 } 1448 1449 *np = bph; 1450 *npt = bpt; 1451 return (count); 1452 } 1453 1454 /* 1455 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame, 1456 * then the vlan-id is available in the tag; otherwise, its vlan id is 1457 * implicitly obtained based on the caller (destination of the frame: 1458 * VSW_VNETPORT or VSW_LOCALDEV). 1459 * The vlan id determined is returned in vidp. 1460 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1461 */ 1462 boolean_t 1463 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 1464 uint16_t *vidp) 1465 { 1466 struct ether_vlan_header *evhp; 1467 vsw_t *vswp; 1468 vsw_port_t *portp; 1469 1470 /* If it's a tagged frame, get the vid from vlan header */ 1471 if (ehp->ether_type == ETHERTYPE_VLAN) { 1472 1473 evhp = (struct ether_vlan_header *)ehp; 1474 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1475 return (B_TRUE); 1476 } 1477 1478 /* Untagged frame; determine vlan id based on caller */ 1479 switch (caller) { 1480 1481 case VSW_VNETPORT: 1482 /* 1483 * packet destined to a vnet; vlan-id is pvid of vnet-port. 1484 */ 1485 portp = (vsw_port_t *)arg; 1486 *vidp = portp->pvid; 1487 break; 1488 1489 case VSW_LOCALDEV: 1490 1491 /* 1492 * packet destined to vsw interface; 1493 * vlan-id is port-vlan-id of vsw device. 1494 */ 1495 vswp = (vsw_t *)arg; 1496 *vidp = vswp->pvid; 1497 break; 1498 } 1499 1500 return (B_FALSE); 1501 } 1502 1503 /* 1504 * Add or remove multicast address(es). 1505 * 1506 * Returns 0 on success, 1 on failure. 1507 */ 1508 int 1509 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 1510 { 1511 mcst_addr_t *mcst_p = NULL; 1512 vsw_t *vswp = port->p_vswp; 1513 uint64_t addr = 0x0; 1514 int i; 1515 1516 D1(vswp, "%s: enter", __func__); 1517 1518 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 1519 1520 for (i = 0; i < mcst_pkt->count; i++) { 1521 /* 1522 * Convert address into form that can be used 1523 * as hash table key. 1524 */ 1525 KEY_HASH(addr, &(mcst_pkt->mca[i])); 1526 1527 /* 1528 * Add or delete the specified address/port combination. 1529 */ 1530 if (mcst_pkt->set == 0x1) { 1531 D3(vswp, "%s: adding multicast address 0x%llx for " 1532 "port %ld", __func__, addr, port->p_instance); 1533 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1534 /* 1535 * Update the list of multicast 1536 * addresses contained within the 1537 * port structure to include this new 1538 * one. 1539 */ 1540 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 1541 KM_NOSLEEP); 1542 if (mcst_p == NULL) { 1543 DERR(vswp, "%s: unable to alloc mem", 1544 __func__); 1545 (void) vsw_del_mcst(vswp, 1546 VSW_VNETPORT, addr, port); 1547 return (1); 1548 } 1549 1550 mcst_p->nextp = NULL; 1551 mcst_p->addr = addr; 1552 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 1553 1554 /* 1555 * Program the address into HW. If the addr 1556 * has already been programmed then the MAC 1557 * just increments a ref counter (which is 1558 * used when the address is being deleted) 1559 */ 1560 if (vsw_mac_multicast_add(vswp, port, mcst_p, 1561 VSW_VNETPORT)) { 1562 (void) vsw_del_mcst(vswp, 1563 VSW_VNETPORT, addr, port); 1564 kmem_free(mcst_p, sizeof (*mcst_p)); 1565 return (1); 1566 } 1567 1568 mutex_enter(&port->mca_lock); 1569 mcst_p->nextp = port->mcap; 1570 port->mcap = mcst_p; 1571 mutex_exit(&port->mca_lock); 1572 1573 } else { 1574 DERR(vswp, "%s: error adding multicast " 1575 "address 0x%llx for port %ld", 1576 __func__, addr, port->p_instance); 1577 return (1); 1578 } 1579 } else { 1580 /* 1581 * Delete an entry from the multicast hash 1582 * table and update the address list 1583 * appropriately. 1584 */ 1585 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1586 D3(vswp, "%s: deleting multicast address " 1587 "0x%llx for port %ld", __func__, addr, 1588 port->p_instance); 1589 1590 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 1591 ASSERT(mcst_p != NULL); 1592 1593 /* 1594 * Remove the address from HW. The address 1595 * will actually only be removed once the ref 1596 * count within the MAC layer has dropped to 1597 * zero. I.e. we can safely call this fn even 1598 * if other ports are interested in this 1599 * address. 1600 */ 1601 vsw_mac_multicast_remove(vswp, port, mcst_p, 1602 VSW_VNETPORT); 1603 kmem_free(mcst_p, sizeof (*mcst_p)); 1604 1605 } else { 1606 DERR(vswp, "%s: error deleting multicast " 1607 "addr 0x%llx for port %ld", 1608 __func__, addr, port->p_instance); 1609 return (1); 1610 } 1611 } 1612 } 1613 D1(vswp, "%s: exit", __func__); 1614 return (0); 1615 } 1616 1617 /* 1618 * Add a new multicast entry. 1619 * 1620 * Search hash table based on address. If match found then 1621 * update associated val (which is chain of ports), otherwise 1622 * create new key/val (addr/port) pair and insert into table. 1623 */ 1624 int 1625 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1626 { 1627 int dup = 0; 1628 int rv = 0; 1629 mfdb_ent_t *ment = NULL; 1630 mfdb_ent_t *tmp_ent = NULL; 1631 mfdb_ent_t *new_ent = NULL; 1632 void *tgt = NULL; 1633 1634 if (devtype == VSW_VNETPORT) { 1635 /* 1636 * Being invoked from a vnet. 1637 */ 1638 ASSERT(arg != NULL); 1639 tgt = arg; 1640 D2(NULL, "%s: port %d : address 0x%llx", __func__, 1641 ((vsw_port_t *)arg)->p_instance, addr); 1642 } else { 1643 /* 1644 * We are being invoked via the m_multicst mac entry 1645 * point. 1646 */ 1647 D2(NULL, "%s: address 0x%llx", __func__, addr); 1648 tgt = (void *)vswp; 1649 } 1650 1651 WRITE_ENTER(&vswp->mfdbrw); 1652 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1653 (mod_hash_val_t *)&ment) != 0) { 1654 1655 /* address not currently in table */ 1656 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1657 ment->d_addr = (void *)tgt; 1658 ment->d_type = devtype; 1659 ment->nextp = NULL; 1660 1661 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 1662 (mod_hash_val_t)ment) != 0) { 1663 DERR(vswp, "%s: hash table insertion failed", __func__); 1664 kmem_free(ment, sizeof (mfdb_ent_t)); 1665 rv = 1; 1666 } else { 1667 D2(vswp, "%s: added initial entry for 0x%llx to " 1668 "table", __func__, addr); 1669 } 1670 } else { 1671 /* 1672 * Address in table. Check to see if specified port 1673 * is already associated with the address. If not add 1674 * it now. 1675 */ 1676 tmp_ent = ment; 1677 while (tmp_ent != NULL) { 1678 if (tmp_ent->d_addr == (void *)tgt) { 1679 if (devtype == VSW_VNETPORT) { 1680 DERR(vswp, "%s: duplicate port entry " 1681 "found for portid %ld and key " 1682 "0x%llx", __func__, 1683 ((vsw_port_t *)arg)->p_instance, 1684 addr); 1685 } else { 1686 DERR(vswp, "%s: duplicate entry found" 1687 "for key 0x%llx", __func__, addr); 1688 } 1689 rv = 1; 1690 dup = 1; 1691 break; 1692 } 1693 tmp_ent = tmp_ent->nextp; 1694 } 1695 1696 /* 1697 * Port not on list so add it to end now. 1698 */ 1699 if (0 == dup) { 1700 D2(vswp, "%s: added entry for 0x%llx to table", 1701 __func__, addr); 1702 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1703 new_ent->d_addr = (void *)tgt; 1704 new_ent->d_type = devtype; 1705 new_ent->nextp = NULL; 1706 1707 tmp_ent = ment; 1708 while (tmp_ent->nextp != NULL) 1709 tmp_ent = tmp_ent->nextp; 1710 1711 tmp_ent->nextp = new_ent; 1712 } 1713 } 1714 1715 RW_EXIT(&vswp->mfdbrw); 1716 return (rv); 1717 } 1718 1719 /* 1720 * Remove a multicast entry from the hashtable. 1721 * 1722 * Search hash table based on address. If match found, scan 1723 * list of ports associated with address. If specified port 1724 * found remove it from list. 1725 */ 1726 int 1727 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1728 { 1729 mfdb_ent_t *ment = NULL; 1730 mfdb_ent_t *curr_p, *prev_p; 1731 void *tgt = NULL; 1732 1733 D1(vswp, "%s: enter", __func__); 1734 1735 if (devtype == VSW_VNETPORT) { 1736 tgt = (vsw_port_t *)arg; 1737 D2(vswp, "%s: removing port %d from mFDB for address" 1738 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 1739 } else { 1740 D2(vswp, "%s: removing entry", __func__); 1741 tgt = (void *)vswp; 1742 } 1743 1744 WRITE_ENTER(&vswp->mfdbrw); 1745 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1746 (mod_hash_val_t *)&ment) != 0) { 1747 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 1748 RW_EXIT(&vswp->mfdbrw); 1749 return (1); 1750 } 1751 1752 prev_p = curr_p = ment; 1753 1754 while (curr_p != NULL) { 1755 if (curr_p->d_addr == (void *)tgt) { 1756 if (devtype == VSW_VNETPORT) { 1757 D2(vswp, "%s: port %d found", __func__, 1758 ((vsw_port_t *)tgt)->p_instance); 1759 } else { 1760 D2(vswp, "%s: instance found", __func__); 1761 } 1762 1763 if (prev_p == curr_p) { 1764 /* 1765 * head of list, if no other element is in 1766 * list then destroy this entry, otherwise 1767 * just replace it with updated value. 1768 */ 1769 ment = curr_p->nextp; 1770 if (ment == NULL) { 1771 (void) mod_hash_destroy(vswp->mfdb, 1772 (mod_hash_val_t)addr); 1773 } else { 1774 (void) mod_hash_replace(vswp->mfdb, 1775 (mod_hash_key_t)addr, 1776 (mod_hash_val_t)ment); 1777 } 1778 } else { 1779 /* 1780 * Not head of list, no need to do 1781 * replacement, just adjust list pointers. 1782 */ 1783 prev_p->nextp = curr_p->nextp; 1784 } 1785 break; 1786 } 1787 1788 prev_p = curr_p; 1789 curr_p = curr_p->nextp; 1790 } 1791 1792 RW_EXIT(&vswp->mfdbrw); 1793 1794 D1(vswp, "%s: exit", __func__); 1795 1796 if (curr_p == NULL) 1797 return (1); 1798 kmem_free(curr_p, sizeof (mfdb_ent_t)); 1799 return (0); 1800 } 1801 1802 /* 1803 * Port is being deleted, but has registered an interest in one 1804 * or more multicast groups. Using the list of addresses maintained 1805 * within the port structure find the appropriate entry in the hash 1806 * table and remove this port from the list of interested ports. 1807 */ 1808 void 1809 vsw_del_mcst_port(vsw_port_t *port) 1810 { 1811 mcst_addr_t *mcap = NULL; 1812 vsw_t *vswp = port->p_vswp; 1813 1814 D1(vswp, "%s: enter", __func__); 1815 1816 mutex_enter(&port->mca_lock); 1817 1818 while ((mcap = port->mcap) != NULL) { 1819 1820 port->mcap = mcap->nextp; 1821 1822 mutex_exit(&port->mca_lock); 1823 1824 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 1825 mcap->addr, port); 1826 1827 /* 1828 * Remove the address from HW. The address 1829 * will actually only be removed once the ref 1830 * count within the MAC layer has dropped to 1831 * zero. I.e. we can safely call this fn even 1832 * if other ports are interested in this 1833 * address. 1834 */ 1835 vsw_mac_multicast_remove(vswp, port, mcap, VSW_VNETPORT); 1836 kmem_free(mcap, sizeof (*mcap)); 1837 1838 mutex_enter(&port->mca_lock); 1839 1840 } 1841 1842 mutex_exit(&port->mca_lock); 1843 1844 D1(vswp, "%s: exit", __func__); 1845 } 1846 1847 /* 1848 * This vsw instance is detaching, but has registered an interest in one 1849 * or more multicast groups. Using the list of addresses maintained 1850 * within the vsw structure find the appropriate entry in the hash 1851 * table and remove this instance from the list of interested ports. 1852 */ 1853 void 1854 vsw_del_mcst_vsw(vsw_t *vswp) 1855 { 1856 mcst_addr_t *next_p = NULL; 1857 1858 D1(vswp, "%s: enter", __func__); 1859 1860 mutex_enter(&vswp->mca_lock); 1861 1862 while (vswp->mcap != NULL) { 1863 DERR(vswp, "%s: deleting addr 0x%llx", 1864 __func__, vswp->mcap->addr); 1865 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 1866 1867 next_p = vswp->mcap->nextp; 1868 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 1869 vswp->mcap = next_p; 1870 } 1871 1872 vswp->mcap = NULL; 1873 mutex_exit(&vswp->mca_lock); 1874 1875 D1(vswp, "%s: exit", __func__); 1876 } 1877 1878 mblk_t * 1879 vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp) 1880 { 1881 mblk_t *bp; 1882 mblk_t *nbp; 1883 mblk_t *head = NULL; 1884 mblk_t *tail = NULL; 1885 mblk_t *prev = NULL; 1886 struct ether_header *behp; 1887 1888 /* process the chain of packets */ 1889 bp = *mpp; 1890 while (bp) { 1891 nbp = bp->b_next; 1892 behp = (struct ether_header *)bp->b_rptr; 1893 bp->b_prev = NULL; 1894 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 1895 if (prev == NULL) { 1896 *mpp = nbp; 1897 } else { 1898 prev->b_next = nbp; 1899 } 1900 bp->b_next = NULL; 1901 if (head == NULL) { 1902 head = tail = bp; 1903 } else { 1904 tail->b_next = bp; 1905 tail = bp; 1906 } 1907 } else { 1908 prev = bp; 1909 } 1910 bp = nbp; 1911 } 1912 return (head); 1913 } 1914 1915 static mblk_t * 1916 vsw_dupmsgchain(mblk_t *mp) 1917 { 1918 mblk_t *nmp = NULL; 1919 mblk_t **nmpp = &nmp; 1920 1921 for (; mp != NULL; mp = mp->b_next) { 1922 if ((*nmpp = dupmsg(mp)) == NULL) { 1923 freemsgchain(nmp); 1924 return (NULL); 1925 } 1926 1927 nmpp = &((*nmpp)->b_next); 1928 } 1929 1930 return (nmp); 1931 } 1932