1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/errno.h> 29 #include <sys/debug.h> 30 #include <sys/time.h> 31 #include <sys/sysmacros.h> 32 #include <sys/systm.h> 33 #include <sys/user.h> 34 #include <sys/stropts.h> 35 #include <sys/stream.h> 36 #include <sys/strlog.h> 37 #include <sys/strsubr.h> 38 #include <sys/cmn_err.h> 39 #include <sys/cpu.h> 40 #include <sys/kmem.h> 41 #include <sys/conf.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/ksynch.h> 45 #include <sys/stat.h> 46 #include <sys/kstat.h> 47 #include <sys/vtrace.h> 48 #include <sys/strsun.h> 49 #include <sys/dlpi.h> 50 #include <sys/ethernet.h> 51 #include <net/if.h> 52 #include <sys/varargs.h> 53 #include <sys/machsystm.h> 54 #include <sys/modctl.h> 55 #include <sys/modhash.h> 56 #include <sys/mac.h> 57 #include <sys/mac_ether.h> 58 #include <sys/taskq.h> 59 #include <sys/note.h> 60 #include <sys/mach_descrip.h> 61 #include <sys/mac.h> 62 #include <sys/mdeg.h> 63 #include <sys/ldc.h> 64 #include <sys/vsw_fdb.h> 65 #include <sys/vsw.h> 66 #include <sys/vio_mailbox.h> 67 #include <sys/vnet_mailbox.h> 68 #include <sys/vnet_common.h> 69 #include <sys/vio_util.h> 70 #include <sys/sdt.h> 71 #include <sys/atomic.h> 72 #include <sys/vlan.h> 73 74 /* Switching setup routines */ 75 void vsw_setup_switching_timeout(void *arg); 76 void vsw_stop_switching_timeout(vsw_t *vswp); 77 int vsw_setup_switching(vsw_t *); 78 void vsw_setup_layer2_post_process(vsw_t *vswp); 79 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 80 vsw_port_t *port, mac_resource_handle_t mrh); 81 static int vsw_setup_layer2(vsw_t *); 82 static int vsw_setup_layer3(vsw_t *); 83 84 /* Switching/data transmit routines */ 85 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 86 vsw_port_t *port, mac_resource_handle_t); 87 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 88 vsw_port_t *port, mac_resource_handle_t); 89 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 90 int caller, vsw_port_t *port); 91 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 92 int caller, vsw_port_t *port); 93 94 /* VLAN routines */ 95 void vsw_create_vlans(void *arg, int type); 96 void vsw_destroy_vlans(void *arg, int type); 97 void vsw_vlan_add_ids(void *arg, int type); 98 void vsw_vlan_remove_ids(void *arg, int type); 99 static void vsw_vlan_create_hash(void *arg, int type); 100 static void vsw_vlan_destroy_hash(void *arg, int type); 101 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 102 uint16_t *vidp); 103 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 104 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt); 105 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 106 107 /* Forwarding database (FDB) routines */ 108 void vsw_fdbe_add(vsw_t *vswp, void *port); 109 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 110 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *); 111 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 112 113 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 114 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 115 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 116 void vsw_del_mcst_vsw(vsw_t *); 117 118 /* Support functions */ 119 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 120 static uint32_t vsw_get_same_dest_list(struct ether_header *ehp, 121 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 122 123 124 /* 125 * Functions imported from other files. 126 */ 127 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 128 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 129 extern int vsw_mac_open(vsw_t *vswp); 130 extern void vsw_mac_close(vsw_t *vswp); 131 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 132 mblk_t *mp, vsw_macrx_flags_t flags); 133 extern void vsw_set_addrs(vsw_t *vswp); 134 extern int vsw_get_hw_maddr(vsw_t *); 135 extern int vsw_mac_attach(vsw_t *vswp); 136 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, 137 uint32_t count); 138 extern void vsw_hio_init(vsw_t *vswp); 139 extern void vsw_hio_start_ports(vsw_t *vswp); 140 141 /* 142 * Tunables used in this file. 143 */ 144 extern int vsw_setup_switching_delay; 145 extern uint32_t vsw_vlan_nchains; 146 extern uint32_t vsw_fdbe_refcnt_delay; 147 148 #define VSW_FDBE_REFHOLD(p) \ 149 { \ 150 atomic_inc_32(&(p)->refcnt); \ 151 ASSERT((p)->refcnt != 0); \ 152 } 153 154 #define VSW_FDBE_REFRELE(p) \ 155 { \ 156 ASSERT((p)->refcnt != 0); \ 157 atomic_dec_32(&(p)->refcnt); \ 158 } 159 160 /* 161 * Timeout routine to setup switching mode: 162 * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop() 163 * initially. If it fails and the error is EAGAIN, then this timeout handler 164 * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried 165 * until we successfully finish it; or the returned error is not EAGAIN. 166 */ 167 void 168 vsw_setup_switching_timeout(void *arg) 169 { 170 vsw_t *vswp = (vsw_t *)arg; 171 int rv; 172 173 if (vswp->swtmout_enabled == B_FALSE) 174 return; 175 176 rv = vsw_setup_switching(vswp); 177 178 if (rv == 0) { 179 vsw_setup_layer2_post_process(vswp); 180 } 181 182 mutex_enter(&vswp->swtmout_lock); 183 184 if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) { 185 /* 186 * Reschedule timeout() if the error is EAGAIN and the 187 * timeout is still enabled. For errors other than EAGAIN, 188 * we simply return without rescheduling timeout(). 189 */ 190 vswp->swtmout_id = 191 timeout(vsw_setup_switching_timeout, vswp, 192 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 193 goto exit; 194 } 195 196 /* timeout handler completed */ 197 vswp->swtmout_enabled = B_FALSE; 198 vswp->swtmout_id = 0; 199 200 exit: 201 mutex_exit(&vswp->swtmout_lock); 202 } 203 204 /* 205 * Cancel the timeout handler to setup switching mode. 206 */ 207 void 208 vsw_stop_switching_timeout(vsw_t *vswp) 209 { 210 timeout_id_t tid; 211 212 mutex_enter(&vswp->swtmout_lock); 213 214 tid = vswp->swtmout_id; 215 216 if (tid != 0) { 217 /* signal timeout handler to stop */ 218 vswp->swtmout_enabled = B_FALSE; 219 vswp->swtmout_id = 0; 220 mutex_exit(&vswp->swtmout_lock); 221 222 (void) untimeout(tid); 223 } else { 224 mutex_exit(&vswp->swtmout_lock); 225 } 226 227 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 228 229 WRITE_ENTER(&vswp->mac_rwlock); 230 vswp->mac_open_retries = 0; 231 RW_EXIT(&vswp->mac_rwlock); 232 } 233 234 /* 235 * Setup the required switching mode. 236 * This routine is invoked from vsw_attach() or vsw_update_md_prop() 237 * initially. If it fails and the error is EAGAIN, then a timeout handler 238 * is started to retry vsw_setup_switching(), until it successfully finishes; 239 * or the returned error is not EAGAIN. 240 * 241 * Returns: 242 * 0 on success. 243 * EAGAIN if retry is needed. 244 * 1 on all other failures. 245 */ 246 int 247 vsw_setup_switching(vsw_t *vswp) 248 { 249 int i, rv = 1; 250 251 D1(vswp, "%s: enter", __func__); 252 253 /* 254 * Select best switching mode. 255 * Note that we start from the saved smode_idx. This is done as 256 * this routine can be called from the timeout handler to retry 257 * setting up a specific mode. Currently only the function which 258 * sets up layer2/promisc mode returns EAGAIN if the underlying 259 * physical device is not available yet, causing retries. 260 */ 261 for (i = vswp->smode_idx; i < vswp->smode_num; i++) { 262 vswp->smode_idx = i; 263 switch (vswp->smode[i]) { 264 case VSW_LAYER2: 265 case VSW_LAYER2_PROMISC: 266 rv = vsw_setup_layer2(vswp); 267 break; 268 269 case VSW_LAYER3: 270 rv = vsw_setup_layer3(vswp); 271 break; 272 273 default: 274 DERR(vswp, "unknown switch mode"); 275 break; 276 } 277 278 if ((rv == 0) || (rv == EAGAIN)) 279 break; 280 281 /* all other errors(rv != 0): continue & select the next mode */ 282 rv = 1; 283 } 284 285 if (rv && (rv != EAGAIN)) { 286 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 287 "switching mode", vswp->instance); 288 } else if (rv == 0) { 289 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 290 } 291 292 D2(vswp, "%s: Operating in mode %d", __func__, 293 vswp->smode[vswp->smode_idx]); 294 295 D1(vswp, "%s: exit", __func__); 296 297 return (rv); 298 } 299 300 /* 301 * Setup for layer 2 switching. 302 * 303 * Returns: 304 * 0 on success. 305 * EAGAIN if retry is needed. 306 * EIO on all other failures. 307 */ 308 static int 309 vsw_setup_layer2(vsw_t *vswp) 310 { 311 int rv; 312 313 D1(vswp, "%s: enter", __func__); 314 315 vswp->vsw_switch_frame = vsw_switch_l2_frame; 316 317 rv = strlen(vswp->physname); 318 if (rv == 0) { 319 /* 320 * Physical device name is NULL, which is 321 * required for layer 2. 322 */ 323 cmn_err(CE_WARN, "!vsw%d: no physical device name specified", 324 vswp->instance); 325 return (EIO); 326 } 327 328 WRITE_ENTER(&vswp->mac_rwlock); 329 330 rv = vsw_mac_open(vswp); 331 if (rv != 0) { 332 if (rv != EAGAIN) { 333 cmn_err(CE_WARN, "!vsw%d: Unable to open physical " 334 "device: %s\n", vswp->instance, vswp->physname); 335 } 336 RW_EXIT(&vswp->mac_rwlock); 337 return (rv); 338 } 339 340 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 341 /* 342 * Verify that underlying device can support multiple 343 * unicast mac addresses. 344 */ 345 rv = vsw_get_hw_maddr(vswp); 346 if (rv != 0) { 347 goto exit_error; 348 } 349 } 350 351 /* 352 * Attempt to link into the MAC layer so we can get 353 * and send packets out over the physical adapter. 354 */ 355 rv = vsw_mac_attach(vswp); 356 if (rv != 0) { 357 /* 358 * Registration with the MAC layer has failed, 359 * so return error so that can fall back to next 360 * prefered switching method. 361 */ 362 cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: " 363 "%s\n", vswp->instance, vswp->physname); 364 goto exit_error; 365 } 366 367 D1(vswp, "%s: exit", __func__); 368 369 RW_EXIT(&vswp->mac_rwlock); 370 371 /* Initialize HybridIO related stuff */ 372 vsw_hio_init(vswp); 373 return (0); 374 375 exit_error: 376 vsw_mac_close(vswp); 377 RW_EXIT(&vswp->mac_rwlock); 378 return (EIO); 379 } 380 381 static int 382 vsw_setup_layer3(vsw_t *vswp) 383 { 384 D1(vswp, "%s: enter", __func__); 385 386 D2(vswp, "%s: operating in layer 3 mode", __func__); 387 vswp->vsw_switch_frame = vsw_switch_l3_frame; 388 389 D1(vswp, "%s: exit", __func__); 390 391 return (0); 392 } 393 394 /* ARGSUSED */ 395 void 396 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port, 397 mac_resource_handle_t mrh) 398 { 399 freemsgchain(mp); 400 } 401 402 /* 403 * Switch the given ethernet frame when operating in layer 2 mode. 404 * 405 * vswp: pointer to the vsw instance 406 * mp: pointer to chain of ethernet frame(s) to be switched 407 * caller: identifies the source of this frame as: 408 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 409 * 2. VSW_PHYSDEV - the physical ethernet device 410 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 411 * arg: argument provided by the caller. 412 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 413 * 2. for PHYSDEV - NULL 414 * 3. for LOCALDEV - pointer to to this vsw_t(self) 415 */ 416 void 417 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 418 vsw_port_t *arg, mac_resource_handle_t mrh) 419 { 420 struct ether_header *ehp; 421 mblk_t *bp, *ret_m; 422 mblk_t *mpt = NULL; 423 uint32_t count; 424 vsw_fdbe_t *fp; 425 426 D1(vswp, "%s: enter (caller %d)", __func__, caller); 427 428 /* 429 * PERF: rather than breaking up the chain here, scan it 430 * to find all mblks heading to same destination and then 431 * pass that sub-chain to the lower transmit functions. 432 */ 433 434 /* process the chain of packets */ 435 bp = mp; 436 while (bp) { 437 ehp = (struct ether_header *)bp->b_rptr; 438 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 439 ASSERT(count != 0); 440 441 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 442 __func__, MBLKSIZE(mp), MBLKL(mp)); 443 444 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 445 /* 446 * If destination is VSW_LOCALDEV (vsw as an eth 447 * interface) and if the device is up & running, 448 * send the packet up the stack on this host. 449 * If the virtual interface is down, drop the packet. 450 */ 451 if (caller != VSW_LOCALDEV) { 452 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 453 } else { 454 freemsgchain(mp); 455 } 456 continue; 457 } 458 459 /* 460 * Find fdb entry for the destination 461 * and hold a reference to it. 462 */ 463 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 464 if (fp != NULL) { 465 466 /* 467 * If plumbed and in promisc mode then copy msg 468 * and send up the stack. 469 */ 470 vsw_mac_rx(vswp, mrh, mp, 471 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 472 473 /* 474 * If the destination is in FDB, the packet 475 * should be forwarded to the correponding 476 * vsw_port (connected to a vnet device - 477 * VSW_VNETPORT) 478 */ 479 (void) vsw_portsend(fp->portp, mp, mpt, count); 480 481 /* Release the reference on the fdb entry */ 482 VSW_FDBE_REFRELE(fp); 483 } else { 484 /* 485 * Destination not in FDB. 486 * 487 * If the destination is broadcast or 488 * multicast forward the packet to all 489 * (VNETPORTs, PHYSDEV, LOCALDEV), 490 * except the caller. 491 */ 492 if (IS_BROADCAST(ehp)) { 493 D2(vswp, "%s: BROADCAST pkt", __func__); 494 (void) vsw_forward_all(vswp, mp, caller, arg); 495 } else if (IS_MULTICAST(ehp)) { 496 D2(vswp, "%s: MULTICAST pkt", __func__); 497 (void) vsw_forward_grp(vswp, mp, caller, arg); 498 } else { 499 /* 500 * If the destination is unicast, and came 501 * from either a logical network device or 502 * the switch itself when it is plumbed, then 503 * send it out on the physical device and also 504 * up the stack if the logical interface is 505 * in promiscious mode. 506 * 507 * NOTE: The assumption here is that if we 508 * cannot find the destination in our fdb, its 509 * a unicast address, and came from either a 510 * vnet or down the stack (when plumbed) it 511 * must be destinded for an ethernet device 512 * outside our ldoms. 513 */ 514 if (caller == VSW_VNETPORT) { 515 /* promisc check copy etc */ 516 vsw_mac_rx(vswp, mrh, mp, 517 VSW_MACRX_PROMISC | 518 VSW_MACRX_COPYMSG); 519 520 if ((ret_m = vsw_tx_msg(vswp, mp)) 521 != NULL) { 522 DERR(vswp, "%s: drop mblks to " 523 "phys dev", __func__); 524 freemsgchain(ret_m); 525 } 526 527 } else if (caller == VSW_PHYSDEV) { 528 /* 529 * Pkt seen because card in promisc 530 * mode. Send up stack if plumbed in 531 * promisc mode, else drop it. 532 */ 533 vsw_mac_rx(vswp, mrh, mp, 534 VSW_MACRX_PROMISC | 535 VSW_MACRX_FREEMSG); 536 537 } else if (caller == VSW_LOCALDEV) { 538 /* 539 * Pkt came down the stack, send out 540 * over physical device. 541 */ 542 if ((ret_m = vsw_tx_msg(vswp, mp)) 543 != NULL) { 544 DERR(vswp, "%s: drop mblks to " 545 "phys dev", __func__); 546 freemsgchain(ret_m); 547 } 548 } 549 } 550 } 551 } 552 D1(vswp, "%s: exit\n", __func__); 553 } 554 555 /* 556 * Switch ethernet frame when in layer 3 mode (i.e. using IP 557 * layer to do the routing). 558 * 559 * There is a large amount of overlap between this function and 560 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 561 * both these functions. 562 */ 563 void 564 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 565 vsw_port_t *arg, mac_resource_handle_t mrh) 566 { 567 struct ether_header *ehp; 568 mblk_t *bp = NULL; 569 mblk_t *mpt; 570 uint32_t count; 571 vsw_fdbe_t *fp; 572 573 D1(vswp, "%s: enter (caller %d)", __func__, caller); 574 575 /* 576 * In layer 3 mode should only ever be switching packets 577 * between IP layer and vnet devices. So make sure thats 578 * who is invoking us. 579 */ 580 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 581 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 582 freemsgchain(mp); 583 return; 584 } 585 586 /* process the chain of packets */ 587 bp = mp; 588 while (bp) { 589 ehp = (struct ether_header *)bp->b_rptr; 590 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 591 ASSERT(count != 0); 592 593 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 594 __func__, MBLKSIZE(mp), MBLKL(mp)); 595 596 /* 597 * Find fdb entry for the destination 598 * and hold a reference to it. 599 */ 600 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 601 if (fp != NULL) { 602 603 D2(vswp, "%s: sending to target port", __func__); 604 (void) vsw_portsend(fp->portp, mp, mpt, count); 605 606 /* Release the reference on the fdb entry */ 607 VSW_FDBE_REFRELE(fp); 608 } else { 609 /* 610 * Destination not in FDB 611 * 612 * If the destination is broadcast or 613 * multicast forward the packet to all 614 * (VNETPORTs, PHYSDEV, LOCALDEV), 615 * except the caller. 616 */ 617 if (IS_BROADCAST(ehp)) { 618 D2(vswp, "%s: BROADCAST pkt", __func__); 619 (void) vsw_forward_all(vswp, mp, caller, arg); 620 } else if (IS_MULTICAST(ehp)) { 621 D2(vswp, "%s: MULTICAST pkt", __func__); 622 (void) vsw_forward_grp(vswp, mp, caller, arg); 623 } else { 624 /* 625 * Unicast pkt from vnet that we don't have 626 * an FDB entry for, so must be destinded for 627 * the outside world. Attempt to send up to the 628 * IP layer to allow it to deal with it. 629 */ 630 if (caller == VSW_VNETPORT) { 631 vsw_mac_rx(vswp, mrh, 632 mp, VSW_MACRX_FREEMSG); 633 } 634 } 635 } 636 } 637 638 D1(vswp, "%s: exit", __func__); 639 } 640 641 /* 642 * Setup mac addrs and hio resources for layer 2 switching only. 643 */ 644 void 645 vsw_setup_layer2_post_process(vsw_t *vswp) 646 { 647 if ((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 648 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) { 649 /* 650 * Program unicst, mcst addrs of vsw 651 * interface and ports in the physdev. 652 */ 653 vsw_set_addrs(vswp); 654 655 /* Start HIO for ports that have already connected */ 656 vsw_hio_start_ports(vswp); 657 } 658 } 659 660 /* 661 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 662 * except the caller (port on which frame arrived). 663 */ 664 static int 665 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 666 { 667 vsw_port_list_t *plist = &vswp->plist; 668 vsw_port_t *portp; 669 mblk_t *nmp = NULL; 670 mblk_t *ret_m = NULL; 671 int skip_port = 0; 672 673 D1(vswp, "vsw_forward_all: enter\n"); 674 675 /* 676 * Broadcast message from inside ldoms so send to outside 677 * world if in either of layer 2 modes. 678 */ 679 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 680 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 681 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 682 683 nmp = vsw_dupmsgchain(mp); 684 if (nmp) { 685 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 686 DERR(vswp, "%s: dropping pkt(s) " 687 "consisting of %ld bytes of data for" 688 " physical device", __func__, MBLKL(ret_m)); 689 freemsgchain(ret_m); 690 } 691 } 692 } 693 694 if (caller == VSW_VNETPORT) 695 skip_port = 1; 696 697 /* 698 * Broadcast message from other vnet (layer 2 or 3) or outside 699 * world (layer 2 only), send up stack if plumbed. 700 */ 701 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 702 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 703 } 704 705 /* send it to all VNETPORTs */ 706 READ_ENTER(&plist->lockrw); 707 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 708 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 709 /* 710 * Caution ! - don't reorder these two checks as arg 711 * will be NULL if the caller is PHYSDEV. skip_port is 712 * only set if caller is VNETPORT. 713 */ 714 if ((skip_port) && (portp == arg)) { 715 continue; 716 } else { 717 nmp = vsw_dupmsgchain(mp); 718 if (nmp) { 719 mblk_t *mpt = nmp; 720 uint32_t count = 1; 721 722 /* Find tail */ 723 while (mpt->b_next != NULL) { 724 mpt = mpt->b_next; 725 count++; 726 } 727 /* 728 * The plist->lockrw is protecting the 729 * portp from getting destroyed here. 730 * So, no ref_cnt is incremented here. 731 */ 732 (void) vsw_portsend(portp, nmp, mpt, count); 733 } else { 734 DERR(vswp, "vsw_forward_all: nmp NULL"); 735 } 736 } 737 } 738 RW_EXIT(&plist->lockrw); 739 740 freemsgchain(mp); 741 742 D1(vswp, "vsw_forward_all: exit\n"); 743 return (0); 744 } 745 746 /* 747 * Forward pkts to any devices or interfaces which have registered 748 * an interest in them (i.e. multicast groups). 749 */ 750 static int 751 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 752 { 753 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 754 mfdb_ent_t *entp = NULL; 755 mfdb_ent_t *tpp = NULL; 756 vsw_port_t *port; 757 uint64_t key = 0; 758 mblk_t *nmp = NULL; 759 mblk_t *ret_m = NULL; 760 boolean_t check_if = B_TRUE; 761 762 /* 763 * Convert address to hash table key 764 */ 765 KEY_HASH(key, &ehp->ether_dhost); 766 767 D1(vswp, "%s: key 0x%llx", __func__, key); 768 769 /* 770 * If pkt came from either a vnet or down the stack (if we are 771 * plumbed) and we are in layer 2 mode, then we send the pkt out 772 * over the physical adapter, and then check to see if any other 773 * vnets are interested in it. 774 */ 775 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 776 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 777 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 778 nmp = vsw_dupmsgchain(mp); 779 if (nmp) { 780 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 781 DERR(vswp, "%s: dropping pkt(s) consisting of " 782 "%ld bytes of data for physical device", 783 __func__, MBLKL(ret_m)); 784 freemsgchain(ret_m); 785 } 786 } 787 } 788 789 READ_ENTER(&vswp->mfdbrw); 790 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 791 (mod_hash_val_t *)&entp) != 0) { 792 D3(vswp, "%s: no table entry found for addr 0x%llx", 793 __func__, key); 794 } else { 795 /* 796 * Send to list of devices associated with this address... 797 */ 798 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 799 800 /* dont send to ourselves */ 801 if ((caller == VSW_VNETPORT) && 802 (tpp->d_addr == (void *)arg)) { 803 port = (vsw_port_t *)tpp->d_addr; 804 D3(vswp, "%s: not sending to ourselves" 805 " : port %d", __func__, port->p_instance); 806 continue; 807 808 } else if ((caller == VSW_LOCALDEV) && 809 (tpp->d_type == VSW_LOCALDEV)) { 810 D2(vswp, "%s: not sending back up stack", 811 __func__); 812 continue; 813 } 814 815 if (tpp->d_type == VSW_VNETPORT) { 816 port = (vsw_port_t *)tpp->d_addr; 817 D3(vswp, "%s: sending to port %ld for addr " 818 "0x%llx", __func__, port->p_instance, key); 819 820 nmp = vsw_dupmsgchain(mp); 821 if (nmp) { 822 mblk_t *mpt = nmp; 823 uint32_t count = 1; 824 825 /* Find tail */ 826 while (mpt->b_next != NULL) { 827 mpt = mpt->b_next; 828 count++; 829 } 830 /* 831 * The vswp->mfdbrw is protecting the 832 * portp from getting destroyed here. 833 * So, no ref_cnt is incremented here. 834 */ 835 (void) vsw_portsend(port, nmp, mpt, 836 count); 837 } 838 } else { 839 vsw_mac_rx(vswp, NULL, 840 mp, VSW_MACRX_COPYMSG); 841 D2(vswp, "%s: sending up stack" 842 " for addr 0x%llx", __func__, key); 843 check_if = B_FALSE; 844 } 845 } 846 } 847 848 RW_EXIT(&vswp->mfdbrw); 849 850 /* 851 * If the pkt came from either a vnet or from physical device, 852 * and if we havent already sent the pkt up the stack then we 853 * check now if we can/should (i.e. the interface is plumbed 854 * and in promisc mode). 855 */ 856 if ((check_if) && 857 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 858 vsw_mac_rx(vswp, NULL, mp, 859 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 860 } 861 862 freemsgchain(mp); 863 864 D1(vswp, "%s: exit", __func__); 865 866 return (0); 867 } 868 869 /* 870 * This function creates the vlan id hash table for the given vsw device or 871 * port. It then adds each vlan that the device or port has been assigned, 872 * into this hash table. 873 * Arguments: 874 * arg: vsw device or port. 875 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 876 */ 877 void 878 vsw_create_vlans(void *arg, int type) 879 { 880 /* create vlan hash table */ 881 vsw_vlan_create_hash(arg, type); 882 883 /* add vlan ids of the vsw device into its hash table */ 884 vsw_vlan_add_ids(arg, type); 885 } 886 887 /* 888 * This function removes the vlan ids of the vsw device or port from its hash 889 * table. It then destroys the vlan hash table. 890 * Arguments: 891 * arg: vsw device or port. 892 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 893 */ 894 void 895 vsw_destroy_vlans(void *arg, int type) 896 { 897 /* remove vlan ids from the hash table */ 898 vsw_vlan_remove_ids(arg, type); 899 900 /* destroy vlan-hash-table */ 901 vsw_vlan_destroy_hash(arg, type); 902 } 903 904 /* 905 * Create a vlan-id hash table for the given vsw device or port. 906 */ 907 static void 908 vsw_vlan_create_hash(void *arg, int type) 909 { 910 char hashname[MAXNAMELEN]; 911 912 if (type == VSW_LOCALDEV) { 913 vsw_t *vswp = (vsw_t *)arg; 914 915 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash", 916 vswp->instance); 917 918 vswp->vlan_nchains = vsw_vlan_nchains; 919 vswp->vlan_hashp = mod_hash_create_idhash(hashname, 920 vswp->vlan_nchains, mod_hash_null_valdtor); 921 922 } else if (type == VSW_VNETPORT) { 923 vsw_port_t *portp = (vsw_port_t *)arg; 924 925 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 926 portp->p_instance); 927 928 portp->vlan_nchains = vsw_vlan_nchains; 929 portp->vlan_hashp = mod_hash_create_idhash(hashname, 930 portp->vlan_nchains, mod_hash_null_valdtor); 931 932 } else { 933 return; 934 } 935 } 936 937 /* 938 * Destroy the vlan-id hash table for the given vsw device or port. 939 */ 940 static void 941 vsw_vlan_destroy_hash(void *arg, int type) 942 { 943 if (type == VSW_LOCALDEV) { 944 vsw_t *vswp = (vsw_t *)arg; 945 946 mod_hash_destroy_hash(vswp->vlan_hashp); 947 vswp->vlan_nchains = 0; 948 } else if (type == VSW_VNETPORT) { 949 vsw_port_t *portp = (vsw_port_t *)arg; 950 951 mod_hash_destroy_hash(portp->vlan_hashp); 952 portp->vlan_nchains = 0; 953 } else { 954 return; 955 } 956 } 957 958 /* 959 * Add vlan ids of the given vsw device or port into its hash table. 960 */ 961 void 962 vsw_vlan_add_ids(void *arg, int type) 963 { 964 int rv; 965 int i; 966 967 if (type == VSW_LOCALDEV) { 968 vsw_t *vswp = (vsw_t *)arg; 969 970 rv = mod_hash_insert(vswp->vlan_hashp, 971 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 972 (mod_hash_val_t)B_TRUE); 973 ASSERT(rv == 0); 974 975 for (i = 0; i < vswp->nvids; i++) { 976 rv = mod_hash_insert(vswp->vlan_hashp, 977 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]), 978 (mod_hash_val_t)B_TRUE); 979 ASSERT(rv == 0); 980 } 981 982 } else if (type == VSW_VNETPORT) { 983 vsw_port_t *portp = (vsw_port_t *)arg; 984 985 rv = mod_hash_insert(portp->vlan_hashp, 986 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 987 (mod_hash_val_t)B_TRUE); 988 ASSERT(rv == 0); 989 990 for (i = 0; i < portp->nvids; i++) { 991 rv = mod_hash_insert(portp->vlan_hashp, 992 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 993 (mod_hash_val_t)B_TRUE); 994 ASSERT(rv == 0); 995 } 996 997 } else { 998 return; 999 } 1000 } 1001 1002 /* 1003 * Remove vlan ids of the given vsw device or port from its hash table. 1004 */ 1005 void 1006 vsw_vlan_remove_ids(void *arg, int type) 1007 { 1008 mod_hash_val_t vp; 1009 int rv; 1010 int i; 1011 1012 if (type == VSW_LOCALDEV) { 1013 vsw_t *vswp = (vsw_t *)arg; 1014 1015 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid); 1016 if (rv == B_TRUE) { 1017 rv = mod_hash_remove(vswp->vlan_hashp, 1018 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 1019 (mod_hash_val_t *)&vp); 1020 ASSERT(rv == 0); 1021 } 1022 1023 for (i = 0; i < vswp->nvids; i++) { 1024 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->vids[i]); 1025 if (rv == B_TRUE) { 1026 rv = mod_hash_remove(vswp->vlan_hashp, 1027 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]), 1028 (mod_hash_val_t *)&vp); 1029 ASSERT(rv == 0); 1030 } 1031 } 1032 1033 } else if (type == VSW_VNETPORT) { 1034 vsw_port_t *portp = (vsw_port_t *)arg; 1035 1036 portp = (vsw_port_t *)arg; 1037 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid); 1038 if (rv == B_TRUE) { 1039 rv = mod_hash_remove(portp->vlan_hashp, 1040 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1041 (mod_hash_val_t *)&vp); 1042 ASSERT(rv == 0); 1043 } 1044 1045 for (i = 0; i < portp->nvids; i++) { 1046 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->vids[i]); 1047 if (rv == B_TRUE) { 1048 rv = mod_hash_remove(portp->vlan_hashp, 1049 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 1050 (mod_hash_val_t *)&vp); 1051 ASSERT(rv == 0); 1052 } 1053 } 1054 1055 } else { 1056 return; 1057 } 1058 } 1059 1060 /* 1061 * Find the given vlan id in the hash table. 1062 * Return: B_TRUE if the id is found; B_FALSE if not found. 1063 */ 1064 boolean_t 1065 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1066 { 1067 int rv; 1068 mod_hash_val_t vp; 1069 1070 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1071 1072 if (rv != 0) 1073 return (B_FALSE); 1074 1075 return (B_TRUE); 1076 } 1077 1078 /* 1079 * Add an entry into FDB for the given vsw. 1080 */ 1081 void 1082 vsw_fdbe_add(vsw_t *vswp, void *port) 1083 { 1084 uint64_t addr = 0; 1085 vsw_port_t *portp; 1086 vsw_fdbe_t *fp; 1087 int rv; 1088 1089 portp = (vsw_port_t *)port; 1090 KEY_HASH(addr, &portp->p_macaddr); 1091 1092 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP); 1093 fp->portp = port; 1094 1095 /* 1096 * Note: duplicate keys will be rejected by mod_hash. 1097 */ 1098 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr, 1099 (mod_hash_val_t)fp); 1100 ASSERT(rv == 0); 1101 } 1102 1103 /* 1104 * Remove an entry from FDB. 1105 */ 1106 void 1107 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr) 1108 { 1109 uint64_t addr = 0; 1110 vsw_fdbe_t *fp; 1111 int rv; 1112 1113 KEY_HASH(addr, eaddr); 1114 1115 /* 1116 * Remove the entry from fdb hash table. 1117 * This prevents further references to this fdb entry. 1118 */ 1119 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr, 1120 (mod_hash_val_t *)&fp); 1121 if (rv != 0) { 1122 /* invalid key? */ 1123 return; 1124 } 1125 1126 /* 1127 * If there are threads already ref holding before the entry was 1128 * removed from hash table, then wait for ref count to drop to zero. 1129 */ 1130 while (fp->refcnt != 0) { 1131 delay(drv_usectohz(vsw_fdbe_refcnt_delay)); 1132 } 1133 1134 kmem_free(fp, sizeof (*fp)); 1135 } 1136 1137 /* 1138 * Search fdb for a given mac address. If an entry is found, hold 1139 * a reference to it and return the entry, else returns NULL. 1140 */ 1141 static vsw_fdbe_t * 1142 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp) 1143 { 1144 uint64_t key = 0; 1145 vsw_fdbe_t *fp; 1146 int rv; 1147 1148 KEY_HASH(key, addrp); 1149 1150 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key, 1151 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb); 1152 1153 if (rv != 0) 1154 return (NULL); 1155 1156 return (fp); 1157 } 1158 1159 /* 1160 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1161 * entry corresponding to the key (macaddr), this callback will be invoked by 1162 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1163 * entry before returning the found entry. 1164 */ 1165 static void 1166 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1167 { 1168 _NOTE(ARGUNUSED(key)) 1169 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val); 1170 } 1171 1172 /* 1173 * A given frame must be always tagged with the appropriate vlan id (unless it 1174 * is in the default-vlan) before the mac address switching function is called. 1175 * Otherwise, after switching function determines the destination, we cannot 1176 * figure out if the destination belongs to the the same vlan that the frame 1177 * originated from and if it needs tag/untag. Frames which are inbound from 1178 * the external(physical) network over a vlan trunk link are always tagged. 1179 * However frames which are received from a vnet-port over ldc or frames which 1180 * are coming down the stack on the service domain over vsw interface may be 1181 * untagged. These frames must be tagged with the appropriate pvid of the 1182 * sender (vnet-port or vsw device), before invoking the switching function. 1183 * 1184 * Arguments: 1185 * arg: caller of the function. 1186 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1187 * mp: frame(s) to be tagged. 1188 */ 1189 mblk_t * 1190 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp) 1191 { 1192 vsw_t *vswp; 1193 vsw_port_t *portp; 1194 struct ether_header *ehp; 1195 mblk_t *bp; 1196 mblk_t *bpt; 1197 mblk_t *bph; 1198 mblk_t *bpn; 1199 uint16_t pvid; 1200 1201 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1202 1203 if (type == VSW_LOCALDEV) { 1204 vswp = (vsw_t *)arg; 1205 pvid = vswp->pvid; 1206 portp = NULL; 1207 } else { 1208 /* VSW_VNETPORT */ 1209 portp = (vsw_port_t *)arg; 1210 pvid = portp->pvid; 1211 vswp = portp->p_vswp; 1212 } 1213 1214 bpn = bph = bpt = NULL; 1215 1216 for (bp = mp; bp != NULL; bp = bpn) { 1217 1218 bpn = bp->b_next; 1219 bp->b_next = bp->b_prev = NULL; 1220 1221 /* Determine if it is an untagged frame */ 1222 ehp = (struct ether_header *)bp->b_rptr; 1223 1224 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */ 1225 1226 /* no need to tag if the frame is in default vlan */ 1227 if (pvid != vswp->default_vlan_id) { 1228 bp = vnet_vlan_insert_tag(bp, pvid); 1229 if (bp == NULL) { 1230 continue; 1231 } 1232 } 1233 } 1234 1235 /* build a chain of processed packets */ 1236 if (bph == NULL) { 1237 bph = bpt = bp; 1238 } else { 1239 bpt->b_next = bp; 1240 bpt = bp; 1241 } 1242 1243 } 1244 1245 return (bph); 1246 } 1247 1248 /* 1249 * Frames destined to a vnet-port or to the local vsw interface, must be 1250 * untagged if necessary before sending. This function first checks that the 1251 * frame can be sent to the destination in the vlan identified by the frame 1252 * tag. Note that when this function is invoked the frame must have been 1253 * already tagged (unless it is in the default-vlan). Because, this function is 1254 * called when the switching function determines the destination and invokes 1255 * its send function (vnet-port or vsw interface) and all frames would have 1256 * been tagged by this time (see comments in vsw_vlan_frame_pretag()). 1257 * 1258 * Arguments: 1259 * arg: destination device. 1260 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1261 * np: head of pkt chain to be validated and untagged. 1262 * npt: tail of pkt chain to be validated and untagged. 1263 * 1264 * Returns: 1265 * np: head of updated chain of packets 1266 * npt: tail of updated chain of packets 1267 * rv: count of any packets dropped 1268 */ 1269 uint32_t 1270 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt) 1271 { 1272 mblk_t *bp; 1273 mblk_t *bpt; 1274 mblk_t *bph; 1275 mblk_t *bpn; 1276 vsw_port_t *portp; 1277 vsw_t *vswp; 1278 uint32_t count; 1279 struct ether_header *ehp; 1280 boolean_t is_tagged; 1281 boolean_t rv; 1282 uint16_t vlan_id; 1283 uint16_t pvid; 1284 mod_hash_t *vlan_hashp; 1285 1286 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1287 1288 if (type == VSW_LOCALDEV) { 1289 vswp = (vsw_t *)arg; 1290 pvid = vswp->pvid; 1291 vlan_hashp = vswp->vlan_hashp; 1292 portp = NULL; 1293 } else { 1294 /* type == VSW_VNETPORT */ 1295 portp = (vsw_port_t *)arg; 1296 vswp = portp->p_vswp; 1297 vlan_hashp = portp->vlan_hashp; 1298 pvid = portp->pvid; 1299 } 1300 1301 bpn = bph = bpt = NULL; 1302 count = 0; 1303 1304 for (bp = *np; bp != NULL; bp = bpn) { 1305 1306 bpn = bp->b_next; 1307 bp->b_next = bp->b_prev = NULL; 1308 1309 /* 1310 * Determine the vlan id that the frame belongs to. 1311 */ 1312 ehp = (struct ether_header *)bp->b_rptr; 1313 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id); 1314 1315 /* 1316 * Check if the destination is in the same vlan. 1317 */ 1318 rv = vsw_vlan_lookup(vlan_hashp, vlan_id); 1319 if (rv == B_FALSE) { 1320 /* drop the packet */ 1321 freemsg(bp); 1322 count++; 1323 continue; 1324 } 1325 1326 /* 1327 * Check the frame header if tag/untag is needed. 1328 */ 1329 if (is_tagged == B_FALSE) { 1330 /* 1331 * Untagged frame. We shouldn't have an untagged 1332 * packet at this point, unless the destination's 1333 * vlan id is default-vlan-id; if it is not the 1334 * default-vlan-id, we drop the packet. 1335 */ 1336 if (vlan_id != vswp->default_vlan_id) { 1337 /* drop the packet */ 1338 freemsg(bp); 1339 count++; 1340 continue; 1341 } 1342 } else { 1343 /* 1344 * Tagged frame, untag if it's the destination's pvid. 1345 */ 1346 if (vlan_id == pvid) { 1347 1348 bp = vnet_vlan_remove_tag(bp); 1349 if (bp == NULL) { 1350 /* packet dropped */ 1351 count++; 1352 continue; 1353 } 1354 } 1355 } 1356 1357 /* build a chain of processed packets */ 1358 if (bph == NULL) { 1359 bph = bpt = bp; 1360 } else { 1361 bpt->b_next = bp; 1362 bpt = bp; 1363 } 1364 1365 } 1366 1367 *np = bph; 1368 *npt = bpt; 1369 1370 return (count); 1371 } 1372 1373 /* 1374 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame, 1375 * then the vlan-id is available in the tag; otherwise, its vlan id is 1376 * implicitly obtained based on the caller (destination of the frame: 1377 * VSW_VNETPORT or VSW_LOCALDEV). 1378 * The vlan id determined is returned in vidp. 1379 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1380 */ 1381 boolean_t 1382 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 1383 uint16_t *vidp) 1384 { 1385 struct ether_vlan_header *evhp; 1386 vsw_t *vswp; 1387 vsw_port_t *portp; 1388 1389 /* If it's a tagged frame, get the vid from vlan header */ 1390 if (ehp->ether_type == ETHERTYPE_VLAN) { 1391 1392 evhp = (struct ether_vlan_header *)ehp; 1393 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1394 return (B_TRUE); 1395 } 1396 1397 /* Untagged frame; determine vlan id based on caller */ 1398 switch (caller) { 1399 1400 case VSW_VNETPORT: 1401 /* 1402 * packet destined to a vnet; vlan-id is pvid of vnet-port. 1403 */ 1404 portp = (vsw_port_t *)arg; 1405 *vidp = portp->pvid; 1406 break; 1407 1408 case VSW_LOCALDEV: 1409 1410 /* 1411 * packet destined to vsw interface; 1412 * vlan-id is port-vlan-id of vsw device. 1413 */ 1414 vswp = (vsw_t *)arg; 1415 *vidp = vswp->pvid; 1416 break; 1417 } 1418 1419 return (B_FALSE); 1420 } 1421 1422 /* 1423 * Add or remove multicast address(es). 1424 * 1425 * Returns 0 on success, 1 on failure. 1426 */ 1427 int 1428 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 1429 { 1430 mcst_addr_t *mcst_p = NULL; 1431 vsw_t *vswp = port->p_vswp; 1432 uint64_t addr = 0x0; 1433 int i; 1434 1435 D1(vswp, "%s: enter", __func__); 1436 1437 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 1438 1439 for (i = 0; i < mcst_pkt->count; i++) { 1440 /* 1441 * Convert address into form that can be used 1442 * as hash table key. 1443 */ 1444 KEY_HASH(addr, &(mcst_pkt->mca[i])); 1445 1446 /* 1447 * Add or delete the specified address/port combination. 1448 */ 1449 if (mcst_pkt->set == 0x1) { 1450 D3(vswp, "%s: adding multicast address 0x%llx for " 1451 "port %ld", __func__, addr, port->p_instance); 1452 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1453 /* 1454 * Update the list of multicast 1455 * addresses contained within the 1456 * port structure to include this new 1457 * one. 1458 */ 1459 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 1460 KM_NOSLEEP); 1461 if (mcst_p == NULL) { 1462 DERR(vswp, "%s: unable to alloc mem", 1463 __func__); 1464 (void) vsw_del_mcst(vswp, 1465 VSW_VNETPORT, addr, port); 1466 return (1); 1467 } 1468 1469 mcst_p->nextp = NULL; 1470 mcst_p->addr = addr; 1471 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 1472 1473 /* 1474 * Program the address into HW. If the addr 1475 * has already been programmed then the MAC 1476 * just increments a ref counter (which is 1477 * used when the address is being deleted) 1478 */ 1479 WRITE_ENTER(&vswp->mac_rwlock); 1480 if (vswp->mh != NULL) { 1481 if (mac_multicst_add(vswp->mh, 1482 (uchar_t *)&mcst_pkt->mca[i])) { 1483 RW_EXIT(&vswp->mac_rwlock); 1484 cmn_err(CE_WARN, "!vsw%d: " 1485 "unable to add multicast " 1486 "address: %s\n", 1487 vswp->instance, 1488 ether_sprintf((void *) 1489 &mcst_p->mca)); 1490 (void) vsw_del_mcst(vswp, 1491 VSW_VNETPORT, addr, port); 1492 kmem_free(mcst_p, 1493 sizeof (*mcst_p)); 1494 return (1); 1495 } 1496 mcst_p->mac_added = B_TRUE; 1497 } 1498 RW_EXIT(&vswp->mac_rwlock); 1499 1500 mutex_enter(&port->mca_lock); 1501 mcst_p->nextp = port->mcap; 1502 port->mcap = mcst_p; 1503 mutex_exit(&port->mca_lock); 1504 1505 } else { 1506 DERR(vswp, "%s: error adding multicast " 1507 "address 0x%llx for port %ld", 1508 __func__, addr, port->p_instance); 1509 return (1); 1510 } 1511 } else { 1512 /* 1513 * Delete an entry from the multicast hash 1514 * table and update the address list 1515 * appropriately. 1516 */ 1517 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1518 D3(vswp, "%s: deleting multicast address " 1519 "0x%llx for port %ld", __func__, addr, 1520 port->p_instance); 1521 1522 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 1523 ASSERT(mcst_p != NULL); 1524 1525 /* 1526 * Remove the address from HW. The address 1527 * will actually only be removed once the ref 1528 * count within the MAC layer has dropped to 1529 * zero. I.e. we can safely call this fn even 1530 * if other ports are interested in this 1531 * address. 1532 */ 1533 WRITE_ENTER(&vswp->mac_rwlock); 1534 if (vswp->mh != NULL && mcst_p->mac_added) { 1535 if (mac_multicst_remove(vswp->mh, 1536 (uchar_t *)&mcst_pkt->mca[i])) { 1537 RW_EXIT(&vswp->mac_rwlock); 1538 cmn_err(CE_WARN, "!vsw%d: " 1539 "unable to remove mcast " 1540 "address: %s\n", 1541 vswp->instance, 1542 ether_sprintf((void *) 1543 &mcst_p->mca)); 1544 kmem_free(mcst_p, 1545 sizeof (*mcst_p)); 1546 return (1); 1547 } 1548 mcst_p->mac_added = B_FALSE; 1549 } 1550 RW_EXIT(&vswp->mac_rwlock); 1551 kmem_free(mcst_p, sizeof (*mcst_p)); 1552 1553 } else { 1554 DERR(vswp, "%s: error deleting multicast " 1555 "addr 0x%llx for port %ld", 1556 __func__, addr, port->p_instance); 1557 return (1); 1558 } 1559 } 1560 } 1561 D1(vswp, "%s: exit", __func__); 1562 return (0); 1563 } 1564 1565 /* 1566 * Add a new multicast entry. 1567 * 1568 * Search hash table based on address. If match found then 1569 * update associated val (which is chain of ports), otherwise 1570 * create new key/val (addr/port) pair and insert into table. 1571 */ 1572 int 1573 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1574 { 1575 int dup = 0; 1576 int rv = 0; 1577 mfdb_ent_t *ment = NULL; 1578 mfdb_ent_t *tmp_ent = NULL; 1579 mfdb_ent_t *new_ent = NULL; 1580 void *tgt = NULL; 1581 1582 if (devtype == VSW_VNETPORT) { 1583 /* 1584 * Being invoked from a vnet. 1585 */ 1586 ASSERT(arg != NULL); 1587 tgt = arg; 1588 D2(NULL, "%s: port %d : address 0x%llx", __func__, 1589 ((vsw_port_t *)arg)->p_instance, addr); 1590 } else { 1591 /* 1592 * We are being invoked via the m_multicst mac entry 1593 * point. 1594 */ 1595 D2(NULL, "%s: address 0x%llx", __func__, addr); 1596 tgt = (void *)vswp; 1597 } 1598 1599 WRITE_ENTER(&vswp->mfdbrw); 1600 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1601 (mod_hash_val_t *)&ment) != 0) { 1602 1603 /* address not currently in table */ 1604 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1605 ment->d_addr = (void *)tgt; 1606 ment->d_type = devtype; 1607 ment->nextp = NULL; 1608 1609 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 1610 (mod_hash_val_t)ment) != 0) { 1611 DERR(vswp, "%s: hash table insertion failed", __func__); 1612 kmem_free(ment, sizeof (mfdb_ent_t)); 1613 rv = 1; 1614 } else { 1615 D2(vswp, "%s: added initial entry for 0x%llx to " 1616 "table", __func__, addr); 1617 } 1618 } else { 1619 /* 1620 * Address in table. Check to see if specified port 1621 * is already associated with the address. If not add 1622 * it now. 1623 */ 1624 tmp_ent = ment; 1625 while (tmp_ent != NULL) { 1626 if (tmp_ent->d_addr == (void *)tgt) { 1627 if (devtype == VSW_VNETPORT) { 1628 DERR(vswp, "%s: duplicate port entry " 1629 "found for portid %ld and key " 1630 "0x%llx", __func__, 1631 ((vsw_port_t *)arg)->p_instance, 1632 addr); 1633 } else { 1634 DERR(vswp, "%s: duplicate entry found" 1635 "for key 0x%llx", __func__, addr); 1636 } 1637 rv = 1; 1638 dup = 1; 1639 break; 1640 } 1641 tmp_ent = tmp_ent->nextp; 1642 } 1643 1644 /* 1645 * Port not on list so add it to end now. 1646 */ 1647 if (0 == dup) { 1648 D2(vswp, "%s: added entry for 0x%llx to table", 1649 __func__, addr); 1650 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1651 new_ent->d_addr = (void *)tgt; 1652 new_ent->d_type = devtype; 1653 new_ent->nextp = NULL; 1654 1655 tmp_ent = ment; 1656 while (tmp_ent->nextp != NULL) 1657 tmp_ent = tmp_ent->nextp; 1658 1659 tmp_ent->nextp = new_ent; 1660 } 1661 } 1662 1663 RW_EXIT(&vswp->mfdbrw); 1664 return (rv); 1665 } 1666 1667 /* 1668 * Remove a multicast entry from the hashtable. 1669 * 1670 * Search hash table based on address. If match found, scan 1671 * list of ports associated with address. If specified port 1672 * found remove it from list. 1673 */ 1674 int 1675 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1676 { 1677 mfdb_ent_t *ment = NULL; 1678 mfdb_ent_t *curr_p, *prev_p; 1679 void *tgt = NULL; 1680 1681 D1(vswp, "%s: enter", __func__); 1682 1683 if (devtype == VSW_VNETPORT) { 1684 tgt = (vsw_port_t *)arg; 1685 D2(vswp, "%s: removing port %d from mFDB for address" 1686 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 1687 } else { 1688 D2(vswp, "%s: removing entry", __func__); 1689 tgt = (void *)vswp; 1690 } 1691 1692 WRITE_ENTER(&vswp->mfdbrw); 1693 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1694 (mod_hash_val_t *)&ment) != 0) { 1695 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 1696 RW_EXIT(&vswp->mfdbrw); 1697 return (1); 1698 } 1699 1700 prev_p = curr_p = ment; 1701 1702 while (curr_p != NULL) { 1703 if (curr_p->d_addr == (void *)tgt) { 1704 if (devtype == VSW_VNETPORT) { 1705 D2(vswp, "%s: port %d found", __func__, 1706 ((vsw_port_t *)tgt)->p_instance); 1707 } else { 1708 D2(vswp, "%s: instance found", __func__); 1709 } 1710 1711 if (prev_p == curr_p) { 1712 /* 1713 * head of list, if no other element is in 1714 * list then destroy this entry, otherwise 1715 * just replace it with updated value. 1716 */ 1717 ment = curr_p->nextp; 1718 if (ment == NULL) { 1719 (void) mod_hash_destroy(vswp->mfdb, 1720 (mod_hash_val_t)addr); 1721 } else { 1722 (void) mod_hash_replace(vswp->mfdb, 1723 (mod_hash_key_t)addr, 1724 (mod_hash_val_t)ment); 1725 } 1726 } else { 1727 /* 1728 * Not head of list, no need to do 1729 * replacement, just adjust list pointers. 1730 */ 1731 prev_p->nextp = curr_p->nextp; 1732 } 1733 break; 1734 } 1735 1736 prev_p = curr_p; 1737 curr_p = curr_p->nextp; 1738 } 1739 1740 RW_EXIT(&vswp->mfdbrw); 1741 1742 D1(vswp, "%s: exit", __func__); 1743 1744 if (curr_p == NULL) 1745 return (1); 1746 kmem_free(curr_p, sizeof (mfdb_ent_t)); 1747 return (0); 1748 } 1749 1750 /* 1751 * Port is being deleted, but has registered an interest in one 1752 * or more multicast groups. Using the list of addresses maintained 1753 * within the port structure find the appropriate entry in the hash 1754 * table and remove this port from the list of interested ports. 1755 */ 1756 void 1757 vsw_del_mcst_port(vsw_port_t *port) 1758 { 1759 mcst_addr_t *mcap = NULL; 1760 vsw_t *vswp = port->p_vswp; 1761 1762 D1(vswp, "%s: enter", __func__); 1763 1764 mutex_enter(&port->mca_lock); 1765 1766 while ((mcap = port->mcap) != NULL) { 1767 1768 port->mcap = mcap->nextp; 1769 1770 mutex_exit(&port->mca_lock); 1771 1772 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 1773 mcap->addr, port); 1774 1775 /* 1776 * Remove the address from HW. The address 1777 * will actually only be removed once the ref 1778 * count within the MAC layer has dropped to 1779 * zero. I.e. we can safely call this fn even 1780 * if other ports are interested in this 1781 * address. 1782 */ 1783 WRITE_ENTER(&vswp->mac_rwlock); 1784 if (vswp->mh != NULL && mcap->mac_added) { 1785 (void) mac_multicst_remove(vswp->mh, 1786 (uchar_t *)&mcap->mca); 1787 } 1788 RW_EXIT(&vswp->mac_rwlock); 1789 1790 kmem_free(mcap, sizeof (*mcap)); 1791 1792 mutex_enter(&port->mca_lock); 1793 1794 } 1795 1796 mutex_exit(&port->mca_lock); 1797 1798 D1(vswp, "%s: exit", __func__); 1799 } 1800 1801 /* 1802 * This vsw instance is detaching, but has registered an interest in one 1803 * or more multicast groups. Using the list of addresses maintained 1804 * within the vsw structure find the appropriate entry in the hash 1805 * table and remove this instance from the list of interested ports. 1806 */ 1807 void 1808 vsw_del_mcst_vsw(vsw_t *vswp) 1809 { 1810 mcst_addr_t *next_p = NULL; 1811 1812 D1(vswp, "%s: enter", __func__); 1813 1814 mutex_enter(&vswp->mca_lock); 1815 1816 while (vswp->mcap != NULL) { 1817 DERR(vswp, "%s: deleting addr 0x%llx", 1818 __func__, vswp->mcap->addr); 1819 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 1820 1821 next_p = vswp->mcap->nextp; 1822 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 1823 vswp->mcap = next_p; 1824 } 1825 1826 vswp->mcap = NULL; 1827 mutex_exit(&vswp->mca_lock); 1828 1829 D1(vswp, "%s: exit", __func__); 1830 } 1831 1832 static uint32_t 1833 vsw_get_same_dest_list(struct ether_header *ehp, 1834 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp) 1835 { 1836 uint32_t count = 0; 1837 mblk_t *bp; 1838 mblk_t *nbp; 1839 mblk_t *head = NULL; 1840 mblk_t *tail = NULL; 1841 mblk_t *prev = NULL; 1842 struct ether_header *behp; 1843 1844 /* process the chain of packets */ 1845 bp = *mpp; 1846 while (bp) { 1847 nbp = bp->b_next; 1848 behp = (struct ether_header *)bp->b_rptr; 1849 bp->b_prev = NULL; 1850 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 1851 if (prev == NULL) { 1852 *mpp = nbp; 1853 } else { 1854 prev->b_next = nbp; 1855 } 1856 bp->b_next = NULL; 1857 if (head == NULL) { 1858 head = tail = bp; 1859 } else { 1860 tail->b_next = bp; 1861 tail = bp; 1862 } 1863 count++; 1864 } else { 1865 prev = bp; 1866 } 1867 bp = nbp; 1868 } 1869 *rhead = head; 1870 *rtail = tail; 1871 DTRACE_PROBE1(vsw_same_dest, int, count); 1872 return (count); 1873 } 1874 1875 static mblk_t * 1876 vsw_dupmsgchain(mblk_t *mp) 1877 { 1878 mblk_t *nmp = NULL; 1879 mblk_t **nmpp = &nmp; 1880 1881 for (; mp != NULL; mp = mp->b_next) { 1882 if ((*nmpp = dupmsg(mp)) == NULL) { 1883 freemsgchain(nmp); 1884 return (NULL); 1885 } 1886 1887 nmpp = &((*nmpp)->b_next); 1888 } 1889 1890 return (nmp); 1891 } 1892