1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mdeg.h> 62 #include <sys/ldc.h> 63 #include <sys/vsw_fdb.h> 64 #include <sys/vsw.h> 65 #include <sys/vio_mailbox.h> 66 #include <sys/vnet_mailbox.h> 67 #include <sys/vnet_common.h> 68 #include <sys/vio_util.h> 69 #include <sys/sdt.h> 70 #include <sys/atomic.h> 71 #include <sys/vlan.h> 72 73 /* Switching setup routines */ 74 void vsw_setup_switching_timeout(void *arg); 75 void vsw_stop_switching_timeout(vsw_t *vswp); 76 int vsw_setup_switching(vsw_t *); 77 void vsw_setup_layer2_post_process(vsw_t *vswp); 78 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 79 vsw_port_t *port, mac_resource_handle_t mrh); 80 static int vsw_setup_layer2(vsw_t *); 81 static int vsw_setup_layer3(vsw_t *); 82 83 /* Switching/data transmit routines */ 84 static void vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller, 85 vsw_port_t *port, mac_resource_handle_t); 86 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 87 vsw_port_t *port, mac_resource_handle_t); 88 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 89 vsw_port_t *port, mac_resource_handle_t); 90 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 91 int caller, vsw_port_t *port); 92 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 93 int caller, vsw_port_t *port); 94 95 /* VLAN routines */ 96 void vsw_create_vlans(void *arg, int type); 97 void vsw_destroy_vlans(void *arg, int type); 98 void vsw_vlan_add_ids(void *arg, int type); 99 void vsw_vlan_remove_ids(void *arg, int type); 100 static void vsw_vlan_create_hash(void *arg, int type); 101 static void vsw_vlan_destroy_hash(void *arg, int type); 102 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 103 uint16_t *vidp); 104 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 105 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt); 106 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 107 108 /* Forwarding database (FDB) routines */ 109 void vsw_fdbe_add(vsw_t *vswp, void *port); 110 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 111 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *); 112 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 113 114 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 115 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 116 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 117 void vsw_del_mcst_vsw(vsw_t *); 118 119 /* Support functions */ 120 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 121 static mblk_t *vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp); 122 123 124 /* 125 * Functions imported from other files. 126 */ 127 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *, int, vsw_port_t *); 128 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 129 extern int vsw_mac_open(vsw_t *vswp); 130 extern void vsw_mac_close(vsw_t *vswp); 131 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 132 mblk_t *mp, vsw_macrx_flags_t flags); 133 extern void vsw_set_addrs(vsw_t *vswp); 134 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp); 135 extern void vsw_hio_init(vsw_t *vswp); 136 extern void vsw_hio_start_ports(vsw_t *vswp); 137 extern int vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port, 138 mcst_addr_t *mcst_p, int type); 139 extern void vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port, 140 mcst_addr_t *mcst_p, int type); 141 142 /* 143 * Tunables used in this file. 144 */ 145 extern int vsw_setup_switching_delay; 146 extern uint32_t vsw_vlan_nchains; 147 extern uint32_t vsw_fdbe_refcnt_delay; 148 149 #define VSW_FDBE_REFHOLD(p) \ 150 { \ 151 atomic_inc_32(&(p)->refcnt); \ 152 ASSERT((p)->refcnt != 0); \ 153 } 154 155 #define VSW_FDBE_REFRELE(p) \ 156 { \ 157 ASSERT((p)->refcnt != 0); \ 158 atomic_dec_32(&(p)->refcnt); \ 159 } 160 161 /* 162 * Timeout routine to setup switching mode: 163 * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop() 164 * initially. If it fails and the error is EAGAIN, then this timeout handler 165 * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried 166 * until we successfully finish it; or the returned error is not EAGAIN. 167 */ 168 void 169 vsw_setup_switching_timeout(void *arg) 170 { 171 vsw_t *vswp = (vsw_t *)arg; 172 int rv; 173 174 if (vswp->swtmout_enabled == B_FALSE) 175 return; 176 177 rv = vsw_setup_switching(vswp); 178 179 if (rv == 0) { 180 vsw_setup_layer2_post_process(vswp); 181 } 182 183 mutex_enter(&vswp->swtmout_lock); 184 185 if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) { 186 /* 187 * Reschedule timeout() if the error is EAGAIN and the 188 * timeout is still enabled. For errors other than EAGAIN, 189 * we simply return without rescheduling timeout(). 190 */ 191 vswp->swtmout_id = 192 timeout(vsw_setup_switching_timeout, vswp, 193 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 194 goto exit; 195 } 196 197 /* timeout handler completed */ 198 vswp->swtmout_enabled = B_FALSE; 199 vswp->swtmout_id = 0; 200 201 exit: 202 mutex_exit(&vswp->swtmout_lock); 203 } 204 205 /* 206 * Cancel the timeout handler to setup switching mode. 207 */ 208 void 209 vsw_stop_switching_timeout(vsw_t *vswp) 210 { 211 timeout_id_t tid; 212 213 mutex_enter(&vswp->swtmout_lock); 214 215 tid = vswp->swtmout_id; 216 217 if (tid != 0) { 218 /* signal timeout handler to stop */ 219 vswp->swtmout_enabled = B_FALSE; 220 vswp->swtmout_id = 0; 221 mutex_exit(&vswp->swtmout_lock); 222 223 (void) untimeout(tid); 224 } else { 225 mutex_exit(&vswp->swtmout_lock); 226 } 227 228 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 229 230 mutex_enter(&vswp->mac_lock); 231 vswp->mac_open_retries = 0; 232 mutex_exit(&vswp->mac_lock); 233 } 234 235 /* 236 * Setup the required switching mode. 237 * This routine is invoked from vsw_attach() or vsw_update_md_prop() 238 * initially. If it fails and the error is EAGAIN, then a timeout handler 239 * is started to retry vsw_setup_switching(), until it successfully finishes; 240 * or the returned error is not EAGAIN. 241 * 242 * Returns: 243 * 0 on success. 244 * EAGAIN if retry is needed. 245 * 1 on all other failures. 246 */ 247 int 248 vsw_setup_switching(vsw_t *vswp) 249 { 250 int rv = 1; 251 252 D1(vswp, "%s: enter", __func__); 253 254 /* 255 * Select best switching mode. 256 * This is done as this routine can be called from the timeout 257 * handler to retry setting up a specific mode. Currently only 258 * the function which sets up layer2/promisc mode returns EAGAIN 259 * if the underlying network device is not available yet, causing 260 * retries. 261 */ 262 if (vswp->smode & VSW_LAYER2) { 263 rv = vsw_setup_layer2(vswp); 264 } else if (vswp->smode & VSW_LAYER3) { 265 rv = vsw_setup_layer3(vswp); 266 } else { 267 DERR(vswp, "unknown switch mode"); 268 rv = 1; 269 } 270 271 if (rv && (rv != EAGAIN)) { 272 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 273 "switching mode", vswp->instance); 274 } else if (rv == 0) { 275 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 276 } 277 278 D2(vswp, "%s: Operating in mode %d", __func__, 279 vswp->smode); 280 281 D1(vswp, "%s: exit", __func__); 282 283 return (rv); 284 } 285 286 /* 287 * Setup for layer 2 switching. 288 * 289 * Returns: 290 * 0 on success. 291 * EAGAIN if retry is needed. 292 * EIO on all other failures. 293 */ 294 static int 295 vsw_setup_layer2(vsw_t *vswp) 296 { 297 int rv; 298 299 D1(vswp, "%s: enter", __func__); 300 301 /* 302 * Until the network device is successfully opened, 303 * set the switching to use vsw_switch_l2_frame. 304 */ 305 vswp->vsw_switch_frame = vsw_switch_l2_frame; 306 vswp->mac_cl_switching = B_FALSE; 307 308 rv = strlen(vswp->physname); 309 if (rv == 0) { 310 /* 311 * Physical device name is NULL, which is 312 * required for layer 2. 313 */ 314 cmn_err(CE_WARN, "!vsw%d: no network device name specified", 315 vswp->instance); 316 return (EIO); 317 } 318 319 mutex_enter(&vswp->mac_lock); 320 321 rv = vsw_mac_open(vswp); 322 if (rv != 0) { 323 if (rv != EAGAIN) { 324 cmn_err(CE_WARN, "!vsw%d: Unable to open network " 325 "device: %s\n", vswp->instance, vswp->physname); 326 } 327 mutex_exit(&vswp->mac_lock); 328 return (rv); 329 } 330 331 /* 332 * Now we can use the mac client switching, so set the switching 333 * function to use vsw_switch_l2_frame_mac_client(), which simply 334 * sends the packets to MAC layer for switching. 335 */ 336 vswp->vsw_switch_frame = vsw_switch_l2_frame_mac_client; 337 vswp->mac_cl_switching = B_TRUE; 338 339 D1(vswp, "%s: exit", __func__); 340 341 /* Initialize HybridIO related stuff */ 342 vsw_hio_init(vswp); 343 344 mutex_exit(&vswp->mac_lock); 345 return (0); 346 347 exit_error: 348 vsw_mac_close(vswp); 349 mutex_exit(&vswp->mac_lock); 350 return (EIO); 351 } 352 353 static int 354 vsw_setup_layer3(vsw_t *vswp) 355 { 356 D1(vswp, "%s: enter", __func__); 357 358 D2(vswp, "%s: operating in layer 3 mode", __func__); 359 vswp->vsw_switch_frame = vsw_switch_l3_frame; 360 361 D1(vswp, "%s: exit", __func__); 362 363 return (0); 364 } 365 366 /* ARGSUSED */ 367 void 368 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port, 369 mac_resource_handle_t mrh) 370 { 371 freemsgchain(mp); 372 } 373 374 /* 375 * Use mac client for layer 2 switching . 376 */ 377 static void 378 vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller, 379 vsw_port_t *port, mac_resource_handle_t mrh) 380 { 381 _NOTE(ARGUNUSED(mrh)) 382 383 mblk_t *ret_m; 384 385 /* 386 * This switching function is expected to be called by 387 * the ports or the interface only. The packets from 388 * physical interface already switched. 389 */ 390 ASSERT((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV)); 391 392 if ((ret_m = vsw_tx_msg(vswp, mp, caller, port)) != NULL) { 393 DERR(vswp, "%s: drop mblks to " 394 "phys dev", __func__); 395 freemsgchain(ret_m); 396 } 397 } 398 399 /* 400 * Switch the given ethernet frame when operating in layer 2 mode. 401 * 402 * vswp: pointer to the vsw instance 403 * mp: pointer to chain of ethernet frame(s) to be switched 404 * caller: identifies the source of this frame as: 405 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 406 * 2. VSW_PHYSDEV - the physical ethernet device 407 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 408 * arg: argument provided by the caller. 409 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 410 * 2. for PHYSDEV - NULL 411 * 3. for LOCALDEV - pointer to to this vsw_t(self) 412 */ 413 void 414 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 415 vsw_port_t *arg, mac_resource_handle_t mrh) 416 { 417 struct ether_header *ehp; 418 mblk_t *bp, *ret_m; 419 vsw_fdbe_t *fp; 420 421 D1(vswp, "%s: enter (caller %d)", __func__, caller); 422 423 /* 424 * PERF: rather than breaking up the chain here, scan it 425 * to find all mblks heading to same destination and then 426 * pass that sub-chain to the lower transmit functions. 427 */ 428 429 /* process the chain of packets */ 430 bp = mp; 431 while (bp) { 432 ehp = (struct ether_header *)bp->b_rptr; 433 mp = vsw_get_same_dest_list(ehp, &bp); 434 ASSERT(mp != NULL); 435 436 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 437 __func__, MBLKSIZE(mp), MBLKL(mp)); 438 439 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 440 /* 441 * If destination is VSW_LOCALDEV (vsw as an eth 442 * interface) and if the device is up & running, 443 * send the packet up the stack on this host. 444 * If the virtual interface is down, drop the packet. 445 */ 446 if (caller != VSW_LOCALDEV) { 447 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 448 } else { 449 freemsgchain(mp); 450 } 451 continue; 452 } 453 454 /* 455 * Find fdb entry for the destination 456 * and hold a reference to it. 457 */ 458 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 459 if (fp != NULL) { 460 461 /* 462 * If plumbed and in promisc mode then copy msg 463 * and send up the stack. 464 */ 465 vsw_mac_rx(vswp, mrh, mp, 466 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 467 468 /* 469 * If the destination is in FDB, the packet 470 * should be forwarded to the correponding 471 * vsw_port (connected to a vnet device - 472 * VSW_VNETPORT) 473 */ 474 (void) vsw_portsend(fp->portp, mp); 475 476 /* Release the reference on the fdb entry */ 477 VSW_FDBE_REFRELE(fp); 478 } else { 479 /* 480 * Destination not in FDB. 481 * 482 * If the destination is broadcast or 483 * multicast forward the packet to all 484 * (VNETPORTs, PHYSDEV, LOCALDEV), 485 * except the caller. 486 */ 487 if (IS_BROADCAST(ehp)) { 488 D2(vswp, "%s: BROADCAST pkt", __func__); 489 (void) vsw_forward_all(vswp, mp, caller, arg); 490 } else if (IS_MULTICAST(ehp)) { 491 D2(vswp, "%s: MULTICAST pkt", __func__); 492 (void) vsw_forward_grp(vswp, mp, caller, arg); 493 } else { 494 /* 495 * If the destination is unicast, and came 496 * from either a logical network device or 497 * the switch itself when it is plumbed, then 498 * send it out on the physical device and also 499 * up the stack if the logical interface is 500 * in promiscious mode. 501 * 502 * NOTE: The assumption here is that if we 503 * cannot find the destination in our fdb, its 504 * a unicast address, and came from either a 505 * vnet or down the stack (when plumbed) it 506 * must be destinded for an ethernet device 507 * outside our ldoms. 508 */ 509 if (caller == VSW_VNETPORT) { 510 /* promisc check copy etc */ 511 vsw_mac_rx(vswp, mrh, mp, 512 VSW_MACRX_PROMISC | 513 VSW_MACRX_COPYMSG); 514 515 if ((ret_m = vsw_tx_msg(vswp, mp, 516 caller, arg)) != NULL) { 517 DERR(vswp, "%s: drop mblks to " 518 "phys dev", __func__); 519 freemsgchain(ret_m); 520 } 521 522 } else if (caller == VSW_PHYSDEV) { 523 /* 524 * Pkt seen because card in promisc 525 * mode. Send up stack if plumbed in 526 * promisc mode, else drop it. 527 */ 528 vsw_mac_rx(vswp, mrh, mp, 529 VSW_MACRX_PROMISC | 530 VSW_MACRX_FREEMSG); 531 532 } else if (caller == VSW_LOCALDEV) { 533 /* 534 * Pkt came down the stack, send out 535 * over physical device. 536 */ 537 if ((ret_m = vsw_tx_msg(vswp, mp, 538 caller, NULL)) != NULL) { 539 DERR(vswp, "%s: drop mblks to " 540 "phys dev", __func__); 541 freemsgchain(ret_m); 542 } 543 } 544 } 545 } 546 } 547 D1(vswp, "%s: exit\n", __func__); 548 } 549 550 /* 551 * Switch ethernet frame when in layer 3 mode (i.e. using IP 552 * layer to do the routing). 553 * 554 * There is a large amount of overlap between this function and 555 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 556 * both these functions. 557 */ 558 void 559 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 560 vsw_port_t *arg, mac_resource_handle_t mrh) 561 { 562 struct ether_header *ehp; 563 mblk_t *bp = NULL; 564 vsw_fdbe_t *fp; 565 566 D1(vswp, "%s: enter (caller %d)", __func__, caller); 567 568 /* 569 * In layer 3 mode should only ever be switching packets 570 * between IP layer and vnet devices. So make sure thats 571 * who is invoking us. 572 */ 573 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 574 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 575 freemsgchain(mp); 576 return; 577 } 578 579 /* process the chain of packets */ 580 bp = mp; 581 while (bp) { 582 ehp = (struct ether_header *)bp->b_rptr; 583 mp = vsw_get_same_dest_list(ehp, &bp); 584 ASSERT(mp != NULL); 585 586 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 587 __func__, MBLKSIZE(mp), MBLKL(mp)); 588 589 /* 590 * Find fdb entry for the destination 591 * and hold a reference to it. 592 */ 593 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 594 if (fp != NULL) { 595 596 D2(vswp, "%s: sending to target port", __func__); 597 (void) vsw_portsend(fp->portp, mp); 598 599 /* Release the reference on the fdb entry */ 600 VSW_FDBE_REFRELE(fp); 601 } else { 602 /* 603 * Destination not in FDB 604 * 605 * If the destination is broadcast or 606 * multicast forward the packet to all 607 * (VNETPORTs, PHYSDEV, LOCALDEV), 608 * except the caller. 609 */ 610 if (IS_BROADCAST(ehp)) { 611 D2(vswp, "%s: BROADCAST pkt", __func__); 612 (void) vsw_forward_all(vswp, mp, caller, arg); 613 } else if (IS_MULTICAST(ehp)) { 614 D2(vswp, "%s: MULTICAST pkt", __func__); 615 (void) vsw_forward_grp(vswp, mp, caller, arg); 616 } else { 617 /* 618 * Unicast pkt from vnet that we don't have 619 * an FDB entry for, so must be destinded for 620 * the outside world. Attempt to send up to the 621 * IP layer to allow it to deal with it. 622 */ 623 if (caller == VSW_VNETPORT) { 624 vsw_mac_rx(vswp, mrh, 625 mp, VSW_MACRX_FREEMSG); 626 } 627 } 628 } 629 } 630 631 D1(vswp, "%s: exit", __func__); 632 } 633 634 /* 635 * Setup mac addrs and hio resources for layer 2 switching only. 636 */ 637 void 638 vsw_setup_layer2_post_process(vsw_t *vswp) 639 { 640 if (vswp->smode & VSW_LAYER2) { 641 /* 642 * Program unicst, mcst addrs of vsw 643 * interface and ports in the physdev. 644 */ 645 vsw_set_addrs(vswp); 646 647 /* Start HIO for ports that have already connected */ 648 vsw_hio_start_ports(vswp); 649 } 650 } 651 652 /* 653 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 654 * except the caller (port on which frame arrived). 655 */ 656 static int 657 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 658 { 659 vsw_port_list_t *plist = &vswp->plist; 660 vsw_port_t *portp; 661 mblk_t *nmp = NULL; 662 mblk_t *ret_m = NULL; 663 int skip_port = 0; 664 665 D1(vswp, "vsw_forward_all: enter\n"); 666 667 /* 668 * Broadcast message from inside ldoms so send to outside 669 * world if in either of layer 2 modes. 670 */ 671 if ((vswp->smode & VSW_LAYER2) && 672 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 673 674 nmp = vsw_dupmsgchain(mp); 675 if (nmp) { 676 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg)) 677 != NULL) { 678 DERR(vswp, "%s: dropping pkt(s) " 679 "consisting of %ld bytes of data for" 680 " physical device", __func__, MBLKL(ret_m)); 681 freemsgchain(ret_m); 682 } 683 } 684 } 685 686 if (caller == VSW_VNETPORT) 687 skip_port = 1; 688 689 /* 690 * Broadcast message from other vnet (layer 2 or 3) or outside 691 * world (layer 2 only), send up stack if plumbed. 692 */ 693 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 694 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 695 } 696 697 /* send it to all VNETPORTs */ 698 READ_ENTER(&plist->lockrw); 699 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 700 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 701 /* 702 * Caution ! - don't reorder these two checks as arg 703 * will be NULL if the caller is PHYSDEV. skip_port is 704 * only set if caller is VNETPORT. 705 */ 706 if ((skip_port) && (portp == arg)) { 707 continue; 708 } else { 709 nmp = vsw_dupmsgchain(mp); 710 if (nmp) { 711 /* 712 * The plist->lockrw is protecting the 713 * portp from getting destroyed here. 714 * So, no ref_cnt is incremented here. 715 */ 716 (void) vsw_portsend(portp, nmp); 717 } else { 718 DERR(vswp, "vsw_forward_all: nmp NULL"); 719 } 720 } 721 } 722 RW_EXIT(&plist->lockrw); 723 724 freemsgchain(mp); 725 726 D1(vswp, "vsw_forward_all: exit\n"); 727 return (0); 728 } 729 730 /* 731 * Forward pkts to any devices or interfaces which have registered 732 * an interest in them (i.e. multicast groups). 733 */ 734 static int 735 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 736 { 737 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 738 mfdb_ent_t *entp = NULL; 739 mfdb_ent_t *tpp = NULL; 740 vsw_port_t *port; 741 uint64_t key = 0; 742 mblk_t *nmp = NULL; 743 mblk_t *ret_m = NULL; 744 boolean_t check_if = B_TRUE; 745 746 /* 747 * Convert address to hash table key 748 */ 749 KEY_HASH(key, &ehp->ether_dhost); 750 751 D1(vswp, "%s: key 0x%llx", __func__, key); 752 753 /* 754 * If pkt came from either a vnet or down the stack (if we are 755 * plumbed) and we are in layer 2 mode, then we send the pkt out 756 * over the physical adapter, and then check to see if any other 757 * vnets are interested in it. 758 */ 759 if ((vswp->smode & VSW_LAYER2) && 760 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 761 nmp = vsw_dupmsgchain(mp); 762 if (nmp) { 763 if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg)) 764 != NULL) { 765 DERR(vswp, "%s: dropping pkt(s) consisting of " 766 "%ld bytes of data for physical device", 767 __func__, MBLKL(ret_m)); 768 freemsgchain(ret_m); 769 } 770 } 771 } 772 773 READ_ENTER(&vswp->mfdbrw); 774 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 775 (mod_hash_val_t *)&entp) != 0) { 776 D3(vswp, "%s: no table entry found for addr 0x%llx", 777 __func__, key); 778 } else { 779 /* 780 * Send to list of devices associated with this address... 781 */ 782 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 783 784 /* dont send to ourselves */ 785 if ((caller == VSW_VNETPORT) && 786 (tpp->d_addr == (void *)arg)) { 787 port = (vsw_port_t *)tpp->d_addr; 788 D3(vswp, "%s: not sending to ourselves" 789 " : port %d", __func__, port->p_instance); 790 continue; 791 792 } else if ((caller == VSW_LOCALDEV) && 793 (tpp->d_type == VSW_LOCALDEV)) { 794 D2(vswp, "%s: not sending back up stack", 795 __func__); 796 continue; 797 } 798 799 if (tpp->d_type == VSW_VNETPORT) { 800 port = (vsw_port_t *)tpp->d_addr; 801 D3(vswp, "%s: sending to port %ld for addr " 802 "0x%llx", __func__, port->p_instance, key); 803 804 nmp = vsw_dupmsgchain(mp); 805 if (nmp) { 806 /* 807 * The vswp->mfdbrw is protecting the 808 * portp from getting destroyed here. 809 * So, no ref_cnt is incremented here. 810 */ 811 (void) vsw_portsend(port, nmp); 812 } 813 } else { 814 vsw_mac_rx(vswp, NULL, 815 mp, VSW_MACRX_COPYMSG); 816 D2(vswp, "%s: sending up stack" 817 " for addr 0x%llx", __func__, key); 818 check_if = B_FALSE; 819 } 820 } 821 } 822 823 RW_EXIT(&vswp->mfdbrw); 824 825 /* 826 * If the pkt came from either a vnet or from physical device, 827 * and if we havent already sent the pkt up the stack then we 828 * check now if we can/should (i.e. the interface is plumbed 829 * and in promisc mode). 830 */ 831 if ((check_if) && 832 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 833 vsw_mac_rx(vswp, NULL, mp, 834 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 835 } 836 837 freemsgchain(mp); 838 839 D1(vswp, "%s: exit", __func__); 840 841 return (0); 842 } 843 844 /* 845 * This function creates the vlan id hash table for the given vsw device or 846 * port. It then adds each vlan that the device or port has been assigned, 847 * into this hash table. 848 * Arguments: 849 * arg: vsw device or port. 850 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 851 */ 852 void 853 vsw_create_vlans(void *arg, int type) 854 { 855 /* create vlan hash table */ 856 vsw_vlan_create_hash(arg, type); 857 858 /* add vlan ids of the vsw device into its hash table */ 859 vsw_vlan_add_ids(arg, type); 860 } 861 862 /* 863 * This function removes the vlan ids of the vsw device or port from its hash 864 * table. It then destroys the vlan hash table. 865 * Arguments: 866 * arg: vsw device or port. 867 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 868 */ 869 void 870 vsw_destroy_vlans(void *arg, int type) 871 { 872 /* remove vlan ids from the hash table */ 873 vsw_vlan_remove_ids(arg, type); 874 875 /* destroy vlan-hash-table */ 876 vsw_vlan_destroy_hash(arg, type); 877 } 878 879 /* 880 * Create a vlan-id hash table for the given vsw device or port. 881 */ 882 static void 883 vsw_vlan_create_hash(void *arg, int type) 884 { 885 char hashname[MAXNAMELEN]; 886 887 if (type == VSW_LOCALDEV) { 888 vsw_t *vswp = (vsw_t *)arg; 889 890 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash", 891 vswp->instance); 892 893 vswp->vlan_nchains = vsw_vlan_nchains; 894 vswp->vlan_hashp = mod_hash_create_idhash(hashname, 895 vswp->vlan_nchains, mod_hash_null_valdtor); 896 897 } else if (type == VSW_VNETPORT) { 898 vsw_port_t *portp = (vsw_port_t *)arg; 899 900 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 901 portp->p_instance); 902 903 portp->vlan_nchains = vsw_vlan_nchains; 904 portp->vlan_hashp = mod_hash_create_idhash(hashname, 905 portp->vlan_nchains, mod_hash_null_valdtor); 906 907 } else { 908 return; 909 } 910 } 911 912 /* 913 * Destroy the vlan-id hash table for the given vsw device or port. 914 */ 915 static void 916 vsw_vlan_destroy_hash(void *arg, int type) 917 { 918 if (type == VSW_LOCALDEV) { 919 vsw_t *vswp = (vsw_t *)arg; 920 921 mod_hash_destroy_hash(vswp->vlan_hashp); 922 vswp->vlan_nchains = 0; 923 } else if (type == VSW_VNETPORT) { 924 vsw_port_t *portp = (vsw_port_t *)arg; 925 926 mod_hash_destroy_hash(portp->vlan_hashp); 927 portp->vlan_nchains = 0; 928 } else { 929 return; 930 } 931 } 932 933 /* 934 * Add vlan ids of the given vsw device or port into its hash table. 935 */ 936 void 937 vsw_vlan_add_ids(void *arg, int type) 938 { 939 int rv; 940 int i; 941 942 if (type == VSW_LOCALDEV) { 943 vsw_t *vswp = (vsw_t *)arg; 944 945 rv = mod_hash_insert(vswp->vlan_hashp, 946 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 947 (mod_hash_val_t)B_TRUE); 948 if (rv != 0) { 949 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for " 950 "the interface", vswp->instance, vswp->pvid); 951 } 952 953 for (i = 0; i < vswp->nvids; i++) { 954 rv = mod_hash_insert(vswp->vlan_hashp, 955 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i].vl_vid), 956 (mod_hash_val_t)B_TRUE); 957 if (rv != 0) { 958 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)" 959 " for the interface", vswp->instance, 960 vswp->pvid); 961 } 962 } 963 964 } else if (type == VSW_VNETPORT) { 965 vsw_port_t *portp = (vsw_port_t *)arg; 966 vsw_t *vswp = portp->p_vswp; 967 968 rv = mod_hash_insert(portp->vlan_hashp, 969 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 970 (mod_hash_val_t)B_TRUE); 971 if (rv != 0) { 972 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for " 973 "the port(%d)", vswp->instance, vswp->pvid, 974 portp->p_instance); 975 } 976 977 for (i = 0; i < portp->nvids; i++) { 978 rv = mod_hash_insert(portp->vlan_hashp, 979 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i].vl_vid), 980 (mod_hash_val_t)B_TRUE); 981 if (rv != 0) { 982 cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)" 983 " for the port(%d)", vswp->instance, 984 vswp->pvid, portp->p_instance); 985 } 986 } 987 988 } 989 } 990 991 /* 992 * Remove vlan ids of the given vsw device or port from its hash table. 993 */ 994 void 995 vsw_vlan_remove_ids(void *arg, int type) 996 { 997 mod_hash_val_t vp; 998 int rv; 999 int i; 1000 1001 if (type == VSW_LOCALDEV) { 1002 vsw_t *vswp = (vsw_t *)arg; 1003 1004 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid); 1005 if (rv == B_TRUE) { 1006 rv = mod_hash_remove(vswp->vlan_hashp, 1007 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 1008 (mod_hash_val_t *)&vp); 1009 ASSERT(rv == 0); 1010 } 1011 1012 for (i = 0; i < vswp->nvids; i++) { 1013 rv = vsw_vlan_lookup(vswp->vlan_hashp, 1014 vswp->vids[i].vl_vid); 1015 if (rv == B_TRUE) { 1016 rv = mod_hash_remove(vswp->vlan_hashp, 1017 (mod_hash_key_t)VLAN_ID_KEY( 1018 vswp->vids[i].vl_vid), 1019 (mod_hash_val_t *)&vp); 1020 ASSERT(rv == 0); 1021 } 1022 } 1023 1024 } else if (type == VSW_VNETPORT) { 1025 vsw_port_t *portp = (vsw_port_t *)arg; 1026 1027 portp = (vsw_port_t *)arg; 1028 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid); 1029 if (rv == B_TRUE) { 1030 rv = mod_hash_remove(portp->vlan_hashp, 1031 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1032 (mod_hash_val_t *)&vp); 1033 ASSERT(rv == 0); 1034 } 1035 1036 for (i = 0; i < portp->nvids; i++) { 1037 rv = vsw_vlan_lookup(portp->vlan_hashp, 1038 portp->vids[i].vl_vid); 1039 if (rv == B_TRUE) { 1040 rv = mod_hash_remove(portp->vlan_hashp, 1041 (mod_hash_key_t)VLAN_ID_KEY( 1042 portp->vids[i].vl_vid), 1043 (mod_hash_val_t *)&vp); 1044 ASSERT(rv == 0); 1045 } 1046 } 1047 1048 } else { 1049 return; 1050 } 1051 } 1052 1053 /* 1054 * Find the given vlan id in the hash table. 1055 * Return: B_TRUE if the id is found; B_FALSE if not found. 1056 */ 1057 boolean_t 1058 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1059 { 1060 int rv; 1061 mod_hash_val_t vp; 1062 1063 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1064 1065 if (rv != 0) 1066 return (B_FALSE); 1067 1068 return (B_TRUE); 1069 } 1070 1071 /* 1072 * Add an entry into FDB for the given vsw. 1073 */ 1074 void 1075 vsw_fdbe_add(vsw_t *vswp, void *port) 1076 { 1077 uint64_t addr = 0; 1078 vsw_port_t *portp; 1079 vsw_fdbe_t *fp; 1080 int rv; 1081 1082 portp = (vsw_port_t *)port; 1083 KEY_HASH(addr, &portp->p_macaddr); 1084 1085 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP); 1086 fp->portp = port; 1087 1088 /* 1089 * Note: duplicate keys will be rejected by mod_hash. 1090 */ 1091 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr, 1092 (mod_hash_val_t)fp); 1093 if (rv != 0) { 1094 cmn_err(CE_WARN, "vsw%d: Duplicate mac-address(%s) for " 1095 "the port(%d)", vswp->instance, 1096 ether_sprintf(&portp->p_macaddr), portp->p_instance); 1097 } 1098 } 1099 1100 /* 1101 * Remove an entry from FDB. 1102 */ 1103 void 1104 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr) 1105 { 1106 uint64_t addr = 0; 1107 vsw_fdbe_t *fp; 1108 int rv; 1109 1110 KEY_HASH(addr, eaddr); 1111 1112 /* 1113 * Remove the entry from fdb hash table. 1114 * This prevents further references to this fdb entry. 1115 */ 1116 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr, 1117 (mod_hash_val_t *)&fp); 1118 if (rv != 0) { 1119 /* invalid key? */ 1120 return; 1121 } 1122 1123 /* 1124 * If there are threads already ref holding before the entry was 1125 * removed from hash table, then wait for ref count to drop to zero. 1126 */ 1127 while (fp->refcnt != 0) { 1128 delay(drv_usectohz(vsw_fdbe_refcnt_delay)); 1129 } 1130 1131 kmem_free(fp, sizeof (*fp)); 1132 } 1133 1134 /* 1135 * Search fdb for a given mac address. If an entry is found, hold 1136 * a reference to it and return the entry, else returns NULL. 1137 */ 1138 static vsw_fdbe_t * 1139 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp) 1140 { 1141 uint64_t key = 0; 1142 vsw_fdbe_t *fp; 1143 int rv; 1144 1145 KEY_HASH(key, addrp); 1146 1147 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key, 1148 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb); 1149 1150 if (rv != 0) 1151 return (NULL); 1152 1153 return (fp); 1154 } 1155 1156 /* 1157 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1158 * entry corresponding to the key (macaddr), this callback will be invoked by 1159 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1160 * entry before returning the found entry. 1161 */ 1162 static void 1163 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1164 { 1165 _NOTE(ARGUNUSED(key)) 1166 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val); 1167 } 1168 1169 /* 1170 * A given frame must be always tagged with the appropriate vlan id (unless it 1171 * is in the default-vlan) before the mac address switching function is called. 1172 * Otherwise, after switching function determines the destination, we cannot 1173 * figure out if the destination belongs to the the same vlan that the frame 1174 * originated from and if it needs tag/untag. Frames which are inbound from 1175 * the external(physical) network over a vlan trunk link are always tagged. 1176 * However frames which are received from a vnet-port over ldc or frames which 1177 * are coming down the stack on the service domain over vsw interface may be 1178 * untagged. These frames must be tagged with the appropriate pvid of the 1179 * sender (vnet-port or vsw device), before invoking the switching function. 1180 * 1181 * Arguments: 1182 * arg: caller of the function. 1183 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1184 * mp: frame(s) to be tagged. 1185 */ 1186 mblk_t * 1187 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp) 1188 { 1189 vsw_t *vswp; 1190 vsw_port_t *portp; 1191 struct ether_header *ehp; 1192 mblk_t *bp; 1193 mblk_t *bpt; 1194 mblk_t *bph; 1195 mblk_t *bpn; 1196 uint16_t pvid; 1197 1198 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1199 1200 if (type == VSW_LOCALDEV) { 1201 vswp = (vsw_t *)arg; 1202 pvid = vswp->pvid; 1203 portp = NULL; 1204 } else { 1205 /* VSW_VNETPORT */ 1206 portp = (vsw_port_t *)arg; 1207 pvid = portp->pvid; 1208 vswp = portp->p_vswp; 1209 } 1210 1211 bpn = bph = bpt = NULL; 1212 1213 for (bp = mp; bp != NULL; bp = bpn) { 1214 1215 bpn = bp->b_next; 1216 bp->b_next = bp->b_prev = NULL; 1217 1218 /* Determine if it is an untagged frame */ 1219 ehp = (struct ether_header *)bp->b_rptr; 1220 1221 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */ 1222 1223 /* no need to tag if the frame is in default vlan */ 1224 if (pvid != vswp->default_vlan_id) { 1225 bp = vnet_vlan_insert_tag(bp, pvid); 1226 if (bp == NULL) { 1227 continue; 1228 } 1229 } 1230 } 1231 1232 /* build a chain of processed packets */ 1233 if (bph == NULL) { 1234 bph = bpt = bp; 1235 } else { 1236 bpt->b_next = bp; 1237 bpt = bp; 1238 } 1239 1240 } 1241 1242 return (bph); 1243 } 1244 1245 /* 1246 * Frames destined to a vnet-port or to the local vsw interface, must be 1247 * untagged if necessary before sending. This function first checks that the 1248 * frame can be sent to the destination in the vlan identified by the frame 1249 * tag. Note that when this function is invoked the frame must have been 1250 * already tagged (unless it is in the default-vlan). Because, this function is 1251 * called when the switching function determines the destination and invokes 1252 * its send function (vnet-port or vsw interface) and all frames would have 1253 * been tagged by this time (see comments in vsw_vlan_frame_pretag()). 1254 * 1255 * Arguments: 1256 * arg: destination device. 1257 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1258 * np: head of pkt chain to be validated and untagged. 1259 * npt: tail of pkt chain to be validated and untagged. 1260 * 1261 * Returns: 1262 * np: head of updated chain of packets 1263 * npt: tail of updated chain of packets 1264 * rv: count of the packets in the returned list 1265 */ 1266 uint32_t 1267 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt) 1268 { 1269 mblk_t *bp; 1270 mblk_t *bpt; 1271 mblk_t *bph; 1272 mblk_t *bpn; 1273 vsw_port_t *portp; 1274 vsw_t *vswp; 1275 uint32_t count; 1276 struct ether_header *ehp; 1277 boolean_t is_tagged; 1278 boolean_t rv; 1279 uint16_t vlan_id; 1280 uint16_t pvid; 1281 mod_hash_t *vlan_hashp; 1282 1283 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1284 1285 1286 if (type == VSW_LOCALDEV) { 1287 vswp = (vsw_t *)arg; 1288 pvid = vswp->pvid; 1289 vlan_hashp = vswp->vlan_hashp; 1290 portp = NULL; 1291 } else { 1292 /* type == VSW_VNETPORT */ 1293 portp = (vsw_port_t *)arg; 1294 vswp = portp->p_vswp; 1295 vlan_hashp = portp->vlan_hashp; 1296 pvid = portp->pvid; 1297 } 1298 1299 /* 1300 * If the MAC layer switching in place, then 1301 * untagging required only if the pvid is not 1302 * the same as default_vlan_id. This is because, 1303 * the MAC layer will send packets for the 1304 * registered vlans only. 1305 */ 1306 if ((vswp->mac_cl_switching == B_TRUE) && 1307 (pvid == vswp->default_vlan_id)) { 1308 /* simply count and set the tail */ 1309 count = 1; 1310 bp = *np; 1311 ASSERT(bp != NULL); 1312 while (bp->b_next != NULL) { 1313 bp = bp->b_next; 1314 count++; 1315 } 1316 *npt = bp; 1317 return (count); 1318 } 1319 1320 bpn = bph = bpt = NULL; 1321 count = 0; 1322 1323 for (bp = *np; bp != NULL; bp = bpn) { 1324 1325 bpn = bp->b_next; 1326 bp->b_next = bp->b_prev = NULL; 1327 1328 /* 1329 * Determine the vlan id that the frame belongs to. 1330 */ 1331 ehp = (struct ether_header *)bp->b_rptr; 1332 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id); 1333 1334 /* 1335 * If MAC layer switching in place, then we 1336 * need to untag only if the tagged packet has 1337 * vlan-id same as the pvid. 1338 */ 1339 if (vswp->mac_cl_switching == B_TRUE) { 1340 1341 /* only tagged packets expected here */ 1342 ASSERT(is_tagged == B_TRUE); 1343 if (vlan_id == pvid) { 1344 bp = vnet_vlan_remove_tag(bp); 1345 if (bp == NULL) { 1346 /* packet dropped */ 1347 continue; 1348 } 1349 } 1350 } else { /* No MAC layer switching */ 1351 1352 /* 1353 * Check the frame header if tag/untag is needed. 1354 */ 1355 if (is_tagged == B_FALSE) { 1356 /* 1357 * Untagged frame. We shouldn't have an 1358 * untagged packet at this point, unless 1359 * the destination's vlan id is 1360 * default-vlan-id; if it is not the 1361 * default-vlan-id, we drop the packet. 1362 */ 1363 if (vlan_id != vswp->default_vlan_id) { 1364 /* drop the packet */ 1365 freemsg(bp); 1366 continue; 1367 } 1368 } else { /* Tagged */ 1369 /* 1370 * Tagged frame, untag if it's the 1371 * destination's pvid. 1372 */ 1373 if (vlan_id == pvid) { 1374 1375 bp = vnet_vlan_remove_tag(bp); 1376 if (bp == NULL) { 1377 /* packet dropped */ 1378 continue; 1379 } 1380 } else { 1381 1382 /* 1383 * Check if the destination is in the 1384 * same vlan. 1385 */ 1386 rv = vsw_vlan_lookup(vlan_hashp, 1387 vlan_id); 1388 if (rv == B_FALSE) { 1389 /* drop the packet */ 1390 freemsg(bp); 1391 continue; 1392 } 1393 } 1394 1395 } 1396 } 1397 1398 /* build a chain of processed packets */ 1399 if (bph == NULL) { 1400 bph = bpt = bp; 1401 } else { 1402 bpt->b_next = bp; 1403 bpt = bp; 1404 } 1405 count++; 1406 } 1407 1408 *np = bph; 1409 *npt = bpt; 1410 return (count); 1411 } 1412 1413 /* 1414 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame, 1415 * then the vlan-id is available in the tag; otherwise, its vlan id is 1416 * implicitly obtained based on the caller (destination of the frame: 1417 * VSW_VNETPORT or VSW_LOCALDEV). 1418 * The vlan id determined is returned in vidp. 1419 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1420 */ 1421 boolean_t 1422 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 1423 uint16_t *vidp) 1424 { 1425 struct ether_vlan_header *evhp; 1426 vsw_t *vswp; 1427 vsw_port_t *portp; 1428 1429 /* If it's a tagged frame, get the vid from vlan header */ 1430 if (ehp->ether_type == ETHERTYPE_VLAN) { 1431 1432 evhp = (struct ether_vlan_header *)ehp; 1433 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1434 return (B_TRUE); 1435 } 1436 1437 /* Untagged frame; determine vlan id based on caller */ 1438 switch (caller) { 1439 1440 case VSW_VNETPORT: 1441 /* 1442 * packet destined to a vnet; vlan-id is pvid of vnet-port. 1443 */ 1444 portp = (vsw_port_t *)arg; 1445 *vidp = portp->pvid; 1446 break; 1447 1448 case VSW_LOCALDEV: 1449 1450 /* 1451 * packet destined to vsw interface; 1452 * vlan-id is port-vlan-id of vsw device. 1453 */ 1454 vswp = (vsw_t *)arg; 1455 *vidp = vswp->pvid; 1456 break; 1457 } 1458 1459 return (B_FALSE); 1460 } 1461 1462 /* 1463 * Add or remove multicast address(es). 1464 * 1465 * Returns 0 on success, 1 on failure. 1466 */ 1467 int 1468 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 1469 { 1470 mcst_addr_t *mcst_p = NULL; 1471 vsw_t *vswp = port->p_vswp; 1472 uint64_t addr = 0x0; 1473 int i; 1474 1475 D1(vswp, "%s: enter", __func__); 1476 1477 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 1478 1479 for (i = 0; i < mcst_pkt->count; i++) { 1480 /* 1481 * Convert address into form that can be used 1482 * as hash table key. 1483 */ 1484 KEY_HASH(addr, &(mcst_pkt->mca[i])); 1485 1486 /* 1487 * Add or delete the specified address/port combination. 1488 */ 1489 if (mcst_pkt->set == 0x1) { 1490 D3(vswp, "%s: adding multicast address 0x%llx for " 1491 "port %ld", __func__, addr, port->p_instance); 1492 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1493 /* 1494 * Update the list of multicast 1495 * addresses contained within the 1496 * port structure to include this new 1497 * one. 1498 */ 1499 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 1500 KM_NOSLEEP); 1501 if (mcst_p == NULL) { 1502 DERR(vswp, "%s: unable to alloc mem", 1503 __func__); 1504 (void) vsw_del_mcst(vswp, 1505 VSW_VNETPORT, addr, port); 1506 return (1); 1507 } 1508 1509 mcst_p->nextp = NULL; 1510 mcst_p->addr = addr; 1511 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 1512 1513 /* 1514 * Program the address into HW. If the addr 1515 * has already been programmed then the MAC 1516 * just increments a ref counter (which is 1517 * used when the address is being deleted) 1518 */ 1519 if (vsw_mac_multicast_add(vswp, port, mcst_p, 1520 VSW_VNETPORT)) { 1521 (void) vsw_del_mcst(vswp, 1522 VSW_VNETPORT, addr, port); 1523 kmem_free(mcst_p, sizeof (*mcst_p)); 1524 return (1); 1525 } 1526 1527 mutex_enter(&port->mca_lock); 1528 mcst_p->nextp = port->mcap; 1529 port->mcap = mcst_p; 1530 mutex_exit(&port->mca_lock); 1531 1532 } else { 1533 DERR(vswp, "%s: error adding multicast " 1534 "address 0x%llx for port %ld", 1535 __func__, addr, port->p_instance); 1536 return (1); 1537 } 1538 } else { 1539 /* 1540 * Delete an entry from the multicast hash 1541 * table and update the address list 1542 * appropriately. 1543 */ 1544 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1545 D3(vswp, "%s: deleting multicast address " 1546 "0x%llx for port %ld", __func__, addr, 1547 port->p_instance); 1548 1549 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 1550 ASSERT(mcst_p != NULL); 1551 1552 /* 1553 * Remove the address from HW. The address 1554 * will actually only be removed once the ref 1555 * count within the MAC layer has dropped to 1556 * zero. I.e. we can safely call this fn even 1557 * if other ports are interested in this 1558 * address. 1559 */ 1560 vsw_mac_multicast_remove(vswp, port, mcst_p, 1561 VSW_VNETPORT); 1562 kmem_free(mcst_p, sizeof (*mcst_p)); 1563 1564 } else { 1565 DERR(vswp, "%s: error deleting multicast " 1566 "addr 0x%llx for port %ld", 1567 __func__, addr, port->p_instance); 1568 return (1); 1569 } 1570 } 1571 } 1572 D1(vswp, "%s: exit", __func__); 1573 return (0); 1574 } 1575 1576 /* 1577 * Add a new multicast entry. 1578 * 1579 * Search hash table based on address. If match found then 1580 * update associated val (which is chain of ports), otherwise 1581 * create new key/val (addr/port) pair and insert into table. 1582 */ 1583 int 1584 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1585 { 1586 int dup = 0; 1587 int rv = 0; 1588 mfdb_ent_t *ment = NULL; 1589 mfdb_ent_t *tmp_ent = NULL; 1590 mfdb_ent_t *new_ent = NULL; 1591 void *tgt = NULL; 1592 1593 if (devtype == VSW_VNETPORT) { 1594 /* 1595 * Being invoked from a vnet. 1596 */ 1597 ASSERT(arg != NULL); 1598 tgt = arg; 1599 D2(NULL, "%s: port %d : address 0x%llx", __func__, 1600 ((vsw_port_t *)arg)->p_instance, addr); 1601 } else { 1602 /* 1603 * We are being invoked via the m_multicst mac entry 1604 * point. 1605 */ 1606 D2(NULL, "%s: address 0x%llx", __func__, addr); 1607 tgt = (void *)vswp; 1608 } 1609 1610 WRITE_ENTER(&vswp->mfdbrw); 1611 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1612 (mod_hash_val_t *)&ment) != 0) { 1613 1614 /* address not currently in table */ 1615 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1616 ment->d_addr = (void *)tgt; 1617 ment->d_type = devtype; 1618 ment->nextp = NULL; 1619 1620 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 1621 (mod_hash_val_t)ment) != 0) { 1622 DERR(vswp, "%s: hash table insertion failed", __func__); 1623 kmem_free(ment, sizeof (mfdb_ent_t)); 1624 rv = 1; 1625 } else { 1626 D2(vswp, "%s: added initial entry for 0x%llx to " 1627 "table", __func__, addr); 1628 } 1629 } else { 1630 /* 1631 * Address in table. Check to see if specified port 1632 * is already associated with the address. If not add 1633 * it now. 1634 */ 1635 tmp_ent = ment; 1636 while (tmp_ent != NULL) { 1637 if (tmp_ent->d_addr == (void *)tgt) { 1638 if (devtype == VSW_VNETPORT) { 1639 DERR(vswp, "%s: duplicate port entry " 1640 "found for portid %ld and key " 1641 "0x%llx", __func__, 1642 ((vsw_port_t *)arg)->p_instance, 1643 addr); 1644 } else { 1645 DERR(vswp, "%s: duplicate entry found" 1646 "for key 0x%llx", __func__, addr); 1647 } 1648 rv = 1; 1649 dup = 1; 1650 break; 1651 } 1652 tmp_ent = tmp_ent->nextp; 1653 } 1654 1655 /* 1656 * Port not on list so add it to end now. 1657 */ 1658 if (0 == dup) { 1659 D2(vswp, "%s: added entry for 0x%llx to table", 1660 __func__, addr); 1661 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1662 new_ent->d_addr = (void *)tgt; 1663 new_ent->d_type = devtype; 1664 new_ent->nextp = NULL; 1665 1666 tmp_ent = ment; 1667 while (tmp_ent->nextp != NULL) 1668 tmp_ent = tmp_ent->nextp; 1669 1670 tmp_ent->nextp = new_ent; 1671 } 1672 } 1673 1674 RW_EXIT(&vswp->mfdbrw); 1675 return (rv); 1676 } 1677 1678 /* 1679 * Remove a multicast entry from the hashtable. 1680 * 1681 * Search hash table based on address. If match found, scan 1682 * list of ports associated with address. If specified port 1683 * found remove it from list. 1684 */ 1685 int 1686 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1687 { 1688 mfdb_ent_t *ment = NULL; 1689 mfdb_ent_t *curr_p, *prev_p; 1690 void *tgt = NULL; 1691 1692 D1(vswp, "%s: enter", __func__); 1693 1694 if (devtype == VSW_VNETPORT) { 1695 tgt = (vsw_port_t *)arg; 1696 D2(vswp, "%s: removing port %d from mFDB for address" 1697 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 1698 } else { 1699 D2(vswp, "%s: removing entry", __func__); 1700 tgt = (void *)vswp; 1701 } 1702 1703 WRITE_ENTER(&vswp->mfdbrw); 1704 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1705 (mod_hash_val_t *)&ment) != 0) { 1706 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 1707 RW_EXIT(&vswp->mfdbrw); 1708 return (1); 1709 } 1710 1711 prev_p = curr_p = ment; 1712 1713 while (curr_p != NULL) { 1714 if (curr_p->d_addr == (void *)tgt) { 1715 if (devtype == VSW_VNETPORT) { 1716 D2(vswp, "%s: port %d found", __func__, 1717 ((vsw_port_t *)tgt)->p_instance); 1718 } else { 1719 D2(vswp, "%s: instance found", __func__); 1720 } 1721 1722 if (prev_p == curr_p) { 1723 /* 1724 * head of list, if no other element is in 1725 * list then destroy this entry, otherwise 1726 * just replace it with updated value. 1727 */ 1728 ment = curr_p->nextp; 1729 if (ment == NULL) { 1730 (void) mod_hash_destroy(vswp->mfdb, 1731 (mod_hash_val_t)addr); 1732 } else { 1733 (void) mod_hash_replace(vswp->mfdb, 1734 (mod_hash_key_t)addr, 1735 (mod_hash_val_t)ment); 1736 } 1737 } else { 1738 /* 1739 * Not head of list, no need to do 1740 * replacement, just adjust list pointers. 1741 */ 1742 prev_p->nextp = curr_p->nextp; 1743 } 1744 break; 1745 } 1746 1747 prev_p = curr_p; 1748 curr_p = curr_p->nextp; 1749 } 1750 1751 RW_EXIT(&vswp->mfdbrw); 1752 1753 D1(vswp, "%s: exit", __func__); 1754 1755 if (curr_p == NULL) 1756 return (1); 1757 kmem_free(curr_p, sizeof (mfdb_ent_t)); 1758 return (0); 1759 } 1760 1761 /* 1762 * Port is being deleted, but has registered an interest in one 1763 * or more multicast groups. Using the list of addresses maintained 1764 * within the port structure find the appropriate entry in the hash 1765 * table and remove this port from the list of interested ports. 1766 */ 1767 void 1768 vsw_del_mcst_port(vsw_port_t *port) 1769 { 1770 mcst_addr_t *mcap = NULL; 1771 vsw_t *vswp = port->p_vswp; 1772 1773 D1(vswp, "%s: enter", __func__); 1774 1775 mutex_enter(&port->mca_lock); 1776 1777 while ((mcap = port->mcap) != NULL) { 1778 1779 port->mcap = mcap->nextp; 1780 1781 mutex_exit(&port->mca_lock); 1782 1783 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 1784 mcap->addr, port); 1785 1786 /* 1787 * Remove the address from HW. The address 1788 * will actually only be removed once the ref 1789 * count within the MAC layer has dropped to 1790 * zero. I.e. we can safely call this fn even 1791 * if other ports are interested in this 1792 * address. 1793 */ 1794 vsw_mac_multicast_remove(vswp, port, mcap, VSW_VNETPORT); 1795 kmem_free(mcap, sizeof (*mcap)); 1796 1797 mutex_enter(&port->mca_lock); 1798 1799 } 1800 1801 mutex_exit(&port->mca_lock); 1802 1803 D1(vswp, "%s: exit", __func__); 1804 } 1805 1806 /* 1807 * This vsw instance is detaching, but has registered an interest in one 1808 * or more multicast groups. Using the list of addresses maintained 1809 * within the vsw structure find the appropriate entry in the hash 1810 * table and remove this instance from the list of interested ports. 1811 */ 1812 void 1813 vsw_del_mcst_vsw(vsw_t *vswp) 1814 { 1815 mcst_addr_t *next_p = NULL; 1816 1817 D1(vswp, "%s: enter", __func__); 1818 1819 mutex_enter(&vswp->mca_lock); 1820 1821 while (vswp->mcap != NULL) { 1822 DERR(vswp, "%s: deleting addr 0x%llx", 1823 __func__, vswp->mcap->addr); 1824 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 1825 1826 next_p = vswp->mcap->nextp; 1827 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 1828 vswp->mcap = next_p; 1829 } 1830 1831 vswp->mcap = NULL; 1832 mutex_exit(&vswp->mca_lock); 1833 1834 D1(vswp, "%s: exit", __func__); 1835 } 1836 1837 mblk_t * 1838 vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp) 1839 { 1840 mblk_t *bp; 1841 mblk_t *nbp; 1842 mblk_t *head = NULL; 1843 mblk_t *tail = NULL; 1844 mblk_t *prev = NULL; 1845 struct ether_header *behp; 1846 1847 /* process the chain of packets */ 1848 bp = *mpp; 1849 while (bp) { 1850 nbp = bp->b_next; 1851 behp = (struct ether_header *)bp->b_rptr; 1852 bp->b_prev = NULL; 1853 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 1854 if (prev == NULL) { 1855 *mpp = nbp; 1856 } else { 1857 prev->b_next = nbp; 1858 } 1859 bp->b_next = NULL; 1860 if (head == NULL) { 1861 head = tail = bp; 1862 } else { 1863 tail->b_next = bp; 1864 tail = bp; 1865 } 1866 } else { 1867 prev = bp; 1868 } 1869 bp = nbp; 1870 } 1871 return (head); 1872 } 1873 1874 static mblk_t * 1875 vsw_dupmsgchain(mblk_t *mp) 1876 { 1877 mblk_t *nmp = NULL; 1878 mblk_t **nmpp = &nmp; 1879 1880 for (; mp != NULL; mp = mp->b_next) { 1881 if ((*nmpp = dupmsg(mp)) == NULL) { 1882 freemsgchain(nmp); 1883 return (NULL); 1884 } 1885 1886 nmpp = &((*nmpp)->b_next); 1887 } 1888 1889 return (nmp); 1890 } 1891