1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/vlan.h> 75 76 /* Switching setup routines */ 77 void vsw_setup_switching_timeout(void *arg); 78 void vsw_stop_switching_timeout(vsw_t *vswp); 79 int vsw_setup_switching(vsw_t *); 80 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, 81 vsw_port_t *port, mac_resource_handle_t mrh); 82 static int vsw_setup_layer2(vsw_t *); 83 static int vsw_setup_layer3(vsw_t *); 84 85 /* Switching/data transmit routines */ 86 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 87 vsw_port_t *port, mac_resource_handle_t); 88 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 89 vsw_port_t *port, mac_resource_handle_t); 90 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 91 int caller, vsw_port_t *port); 92 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 93 int caller, vsw_port_t *port); 94 95 /* VLAN routines */ 96 void vsw_create_vlans(void *arg, int type); 97 void vsw_destroy_vlans(void *arg, int type); 98 void vsw_vlan_add_ids(void *arg, int type); 99 void vsw_vlan_remove_ids(void *arg, int type); 100 static void vsw_vlan_create_hash(void *arg, int type); 101 static void vsw_vlan_destroy_hash(void *arg, int type); 102 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 103 uint16_t *vidp); 104 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 105 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt); 106 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 107 108 /* Forwarding database (FDB) routines */ 109 void vsw_fdbe_add(vsw_t *vswp, void *port); 110 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 111 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *); 112 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 113 114 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 115 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 116 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 117 void vsw_del_mcst_vsw(vsw_t *); 118 119 /* Support functions */ 120 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 121 static uint32_t vsw_get_same_dest_list(struct ether_header *ehp, 122 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 123 124 125 /* 126 * Functions imported from other files. 127 */ 128 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 129 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 130 extern int vsw_mac_open(vsw_t *vswp); 131 extern void vsw_mac_close(vsw_t *vswp); 132 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 133 mblk_t *mp, vsw_macrx_flags_t flags); 134 extern void vsw_set_addrs(vsw_t *vswp); 135 extern int vsw_get_hw_maddr(vsw_t *); 136 extern int vsw_mac_attach(vsw_t *vswp); 137 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, 138 uint32_t count); 139 extern void vsw_hio_init(vsw_t *vswp); 140 extern void vsw_hio_start_ports(vsw_t *vswp); 141 142 /* 143 * Tunables used in this file. 144 */ 145 extern int vsw_setup_switching_delay; 146 extern uint32_t vsw_vlan_nchains; 147 extern uint32_t vsw_fdbe_refcnt_delay; 148 149 #define VSW_FDBE_REFHOLD(p) \ 150 { \ 151 atomic_inc_32(&(p)->refcnt); \ 152 ASSERT((p)->refcnt != 0); \ 153 } 154 155 #define VSW_FDBE_REFRELE(p) \ 156 { \ 157 ASSERT((p)->refcnt != 0); \ 158 atomic_dec_32(&(p)->refcnt); \ 159 } 160 161 /* 162 * Timeout routine to setup switching mode: 163 * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop() 164 * initially. If it fails and the error is EAGAIN, then this timeout handler 165 * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried 166 * until we successfully finish it; or the returned error is not EAGAIN. 167 */ 168 void 169 vsw_setup_switching_timeout(void *arg) 170 { 171 vsw_t *vswp = (vsw_t *)arg; 172 int rv; 173 174 if (vswp->swtmout_enabled == B_FALSE) 175 return; 176 177 rv = vsw_setup_switching(vswp); 178 179 if (rv == 0) { 180 /* 181 * Successfully setup switching mode. 182 * Program unicst, mcst addrs of vsw 183 * interface and ports in the physdev. 184 */ 185 vsw_set_addrs(vswp); 186 187 /* Start HIO for ports that have already connected */ 188 vsw_hio_start_ports(vswp); 189 } 190 191 mutex_enter(&vswp->swtmout_lock); 192 193 if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) { 194 /* 195 * Reschedule timeout() if the error is EAGAIN and the 196 * timeout is still enabled. For errors other than EAGAIN, 197 * we simply return without rescheduling timeout(). 198 */ 199 vswp->swtmout_id = 200 timeout(vsw_setup_switching_timeout, vswp, 201 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 202 goto exit; 203 } 204 205 /* timeout handler completed */ 206 vswp->swtmout_enabled = B_FALSE; 207 vswp->swtmout_id = 0; 208 209 exit: 210 mutex_exit(&vswp->swtmout_lock); 211 } 212 213 /* 214 * Cancel the timeout handler to setup switching mode. 215 */ 216 void 217 vsw_stop_switching_timeout(vsw_t *vswp) 218 { 219 timeout_id_t tid; 220 221 mutex_enter(&vswp->swtmout_lock); 222 223 tid = vswp->swtmout_id; 224 225 if (tid != 0) { 226 /* signal timeout handler to stop */ 227 vswp->swtmout_enabled = B_FALSE; 228 vswp->swtmout_id = 0; 229 mutex_exit(&vswp->swtmout_lock); 230 231 (void) untimeout(tid); 232 } else { 233 mutex_exit(&vswp->swtmout_lock); 234 } 235 236 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 237 238 WRITE_ENTER(&vswp->mac_rwlock); 239 vswp->mac_open_retries = 0; 240 RW_EXIT(&vswp->mac_rwlock); 241 } 242 243 /* 244 * Setup the required switching mode. 245 * This routine is invoked from vsw_attach() or vsw_update_md_prop() 246 * initially. If it fails and the error is EAGAIN, then a timeout handler 247 * is started to retry vsw_setup_switching(), until it successfully finishes; 248 * or the returned error is not EAGAIN. 249 * 250 * Returns: 251 * 0 on success. 252 * EAGAIN if retry is needed. 253 * 1 on all other failures. 254 */ 255 int 256 vsw_setup_switching(vsw_t *vswp) 257 { 258 int i, rv = 1; 259 260 D1(vswp, "%s: enter", __func__); 261 262 /* 263 * Select best switching mode. 264 * Note that we start from the saved smode_idx. This is done as 265 * this routine can be called from the timeout handler to retry 266 * setting up a specific mode. Currently only the function which 267 * sets up layer2/promisc mode returns EAGAIN if the underlying 268 * physical device is not available yet, causing retries. 269 */ 270 for (i = vswp->smode_idx; i < vswp->smode_num; i++) { 271 vswp->smode_idx = i; 272 switch (vswp->smode[i]) { 273 case VSW_LAYER2: 274 case VSW_LAYER2_PROMISC: 275 rv = vsw_setup_layer2(vswp); 276 break; 277 278 case VSW_LAYER3: 279 rv = vsw_setup_layer3(vswp); 280 break; 281 282 default: 283 DERR(vswp, "unknown switch mode"); 284 break; 285 } 286 287 if ((rv == 0) || (rv == EAGAIN)) 288 break; 289 290 /* all other errors(rv != 0): continue & select the next mode */ 291 rv = 1; 292 } 293 294 if (rv && (rv != EAGAIN)) { 295 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 296 "switching mode", vswp->instance); 297 } else if (rv == 0) { 298 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 299 } 300 301 D2(vswp, "%s: Operating in mode %d", __func__, 302 vswp->smode[vswp->smode_idx]); 303 304 D1(vswp, "%s: exit", __func__); 305 306 return (rv); 307 } 308 309 /* 310 * Setup for layer 2 switching. 311 * 312 * Returns: 313 * 0 on success. 314 * EAGAIN if retry is needed. 315 * EIO on all other failures. 316 */ 317 static int 318 vsw_setup_layer2(vsw_t *vswp) 319 { 320 int rv; 321 322 D1(vswp, "%s: enter", __func__); 323 324 vswp->vsw_switch_frame = vsw_switch_l2_frame; 325 326 rv = strlen(vswp->physname); 327 if (rv == 0) { 328 /* 329 * Physical device name is NULL, which is 330 * required for layer 2. 331 */ 332 cmn_err(CE_WARN, "!vsw%d: no physical device name specified", 333 vswp->instance); 334 return (EIO); 335 } 336 337 WRITE_ENTER(&vswp->mac_rwlock); 338 339 rv = vsw_mac_open(vswp); 340 if (rv != 0) { 341 if (rv != EAGAIN) { 342 cmn_err(CE_WARN, "!vsw%d: Unable to open physical " 343 "device: %s\n", vswp->instance, vswp->physname); 344 } 345 RW_EXIT(&vswp->mac_rwlock); 346 return (rv); 347 } 348 349 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 350 /* 351 * Verify that underlying device can support multiple 352 * unicast mac addresses. 353 */ 354 rv = vsw_get_hw_maddr(vswp); 355 if (rv != 0) { 356 goto exit_error; 357 } 358 } 359 360 /* 361 * Attempt to link into the MAC layer so we can get 362 * and send packets out over the physical adapter. 363 */ 364 rv = vsw_mac_attach(vswp); 365 if (rv != 0) { 366 /* 367 * Registration with the MAC layer has failed, 368 * so return error so that can fall back to next 369 * prefered switching method. 370 */ 371 cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: " 372 "%s\n", vswp->instance, vswp->physname); 373 goto exit_error; 374 } 375 376 D1(vswp, "%s: exit", __func__); 377 378 RW_EXIT(&vswp->mac_rwlock); 379 380 /* Initialize HybridIO related stuff */ 381 vsw_hio_init(vswp); 382 return (0); 383 384 exit_error: 385 vsw_mac_close(vswp); 386 RW_EXIT(&vswp->mac_rwlock); 387 return (EIO); 388 } 389 390 static int 391 vsw_setup_layer3(vsw_t *vswp) 392 { 393 D1(vswp, "%s: enter", __func__); 394 395 D2(vswp, "%s: operating in layer 3 mode", __func__); 396 vswp->vsw_switch_frame = vsw_switch_l3_frame; 397 398 D1(vswp, "%s: exit", __func__); 399 400 return (0); 401 } 402 403 /* ARGSUSED */ 404 void 405 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port, 406 mac_resource_handle_t mrh) 407 { 408 freemsgchain(mp); 409 } 410 411 /* 412 * Switch the given ethernet frame when operating in layer 2 mode. 413 * 414 * vswp: pointer to the vsw instance 415 * mp: pointer to chain of ethernet frame(s) to be switched 416 * caller: identifies the source of this frame as: 417 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 418 * 2. VSW_PHYSDEV - the physical ethernet device 419 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 420 * arg: argument provided by the caller. 421 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 422 * 2. for PHYSDEV - NULL 423 * 3. for LOCALDEV - pointer to to this vsw_t(self) 424 */ 425 void 426 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 427 vsw_port_t *arg, mac_resource_handle_t mrh) 428 { 429 struct ether_header *ehp; 430 mblk_t *bp, *ret_m; 431 mblk_t *mpt = NULL; 432 uint32_t count; 433 vsw_fdbe_t *fp; 434 435 D1(vswp, "%s: enter (caller %d)", __func__, caller); 436 437 /* 438 * PERF: rather than breaking up the chain here, scan it 439 * to find all mblks heading to same destination and then 440 * pass that sub-chain to the lower transmit functions. 441 */ 442 443 /* process the chain of packets */ 444 bp = mp; 445 while (bp) { 446 ehp = (struct ether_header *)bp->b_rptr; 447 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 448 ASSERT(count != 0); 449 450 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 451 __func__, MBLKSIZE(mp), MBLKL(mp)); 452 453 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 454 /* 455 * If destination is VSW_LOCALDEV (vsw as an eth 456 * interface) and if the device is up & running, 457 * send the packet up the stack on this host. 458 * If the virtual interface is down, drop the packet. 459 */ 460 if (caller != VSW_LOCALDEV) { 461 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 462 } else { 463 freemsgchain(mp); 464 } 465 continue; 466 } 467 468 /* 469 * Find fdb entry for the destination 470 * and hold a reference to it. 471 */ 472 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 473 if (fp != NULL) { 474 475 /* 476 * If plumbed and in promisc mode then copy msg 477 * and send up the stack. 478 */ 479 vsw_mac_rx(vswp, mrh, mp, 480 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 481 482 /* 483 * If the destination is in FDB, the packet 484 * should be forwarded to the correponding 485 * vsw_port (connected to a vnet device - 486 * VSW_VNETPORT) 487 */ 488 (void) vsw_portsend(fp->portp, mp, mpt, count); 489 490 /* Release the reference on the fdb entry */ 491 VSW_FDBE_REFRELE(fp); 492 } else { 493 /* 494 * Destination not in FDB. 495 * 496 * If the destination is broadcast or 497 * multicast forward the packet to all 498 * (VNETPORTs, PHYSDEV, LOCALDEV), 499 * except the caller. 500 */ 501 if (IS_BROADCAST(ehp)) { 502 D2(vswp, "%s: BROADCAST pkt", __func__); 503 (void) vsw_forward_all(vswp, mp, caller, arg); 504 } else if (IS_MULTICAST(ehp)) { 505 D2(vswp, "%s: MULTICAST pkt", __func__); 506 (void) vsw_forward_grp(vswp, mp, caller, arg); 507 } else { 508 /* 509 * If the destination is unicast, and came 510 * from either a logical network device or 511 * the switch itself when it is plumbed, then 512 * send it out on the physical device and also 513 * up the stack if the logical interface is 514 * in promiscious mode. 515 * 516 * NOTE: The assumption here is that if we 517 * cannot find the destination in our fdb, its 518 * a unicast address, and came from either a 519 * vnet or down the stack (when plumbed) it 520 * must be destinded for an ethernet device 521 * outside our ldoms. 522 */ 523 if (caller == VSW_VNETPORT) { 524 /* promisc check copy etc */ 525 vsw_mac_rx(vswp, mrh, mp, 526 VSW_MACRX_PROMISC | 527 VSW_MACRX_COPYMSG); 528 529 if ((ret_m = vsw_tx_msg(vswp, mp)) 530 != NULL) { 531 DERR(vswp, "%s: drop mblks to " 532 "phys dev", __func__); 533 freemsgchain(ret_m); 534 } 535 536 } else if (caller == VSW_PHYSDEV) { 537 /* 538 * Pkt seen because card in promisc 539 * mode. Send up stack if plumbed in 540 * promisc mode, else drop it. 541 */ 542 vsw_mac_rx(vswp, mrh, mp, 543 VSW_MACRX_PROMISC | 544 VSW_MACRX_FREEMSG); 545 546 } else if (caller == VSW_LOCALDEV) { 547 /* 548 * Pkt came down the stack, send out 549 * over physical device. 550 */ 551 if ((ret_m = vsw_tx_msg(vswp, mp)) 552 != NULL) { 553 DERR(vswp, "%s: drop mblks to " 554 "phys dev", __func__); 555 freemsgchain(ret_m); 556 } 557 } 558 } 559 } 560 } 561 D1(vswp, "%s: exit\n", __func__); 562 } 563 564 /* 565 * Switch ethernet frame when in layer 3 mode (i.e. using IP 566 * layer to do the routing). 567 * 568 * There is a large amount of overlap between this function and 569 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 570 * both these functions. 571 */ 572 void 573 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 574 vsw_port_t *arg, mac_resource_handle_t mrh) 575 { 576 struct ether_header *ehp; 577 mblk_t *bp = NULL; 578 mblk_t *mpt; 579 uint32_t count; 580 vsw_fdbe_t *fp; 581 582 D1(vswp, "%s: enter (caller %d)", __func__, caller); 583 584 /* 585 * In layer 3 mode should only ever be switching packets 586 * between IP layer and vnet devices. So make sure thats 587 * who is invoking us. 588 */ 589 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 590 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 591 freemsgchain(mp); 592 return; 593 } 594 595 /* process the chain of packets */ 596 bp = mp; 597 while (bp) { 598 ehp = (struct ether_header *)bp->b_rptr; 599 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 600 ASSERT(count != 0); 601 602 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 603 __func__, MBLKSIZE(mp), MBLKL(mp)); 604 605 /* 606 * Find fdb entry for the destination 607 * and hold a reference to it. 608 */ 609 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 610 if (fp != NULL) { 611 612 D2(vswp, "%s: sending to target port", __func__); 613 (void) vsw_portsend(fp->portp, mp, mpt, count); 614 615 /* Release the reference on the fdb entry */ 616 VSW_FDBE_REFRELE(fp); 617 } else { 618 /* 619 * Destination not in FDB 620 * 621 * If the destination is broadcast or 622 * multicast forward the packet to all 623 * (VNETPORTs, PHYSDEV, LOCALDEV), 624 * except the caller. 625 */ 626 if (IS_BROADCAST(ehp)) { 627 D2(vswp, "%s: BROADCAST pkt", __func__); 628 (void) vsw_forward_all(vswp, mp, caller, arg); 629 } else if (IS_MULTICAST(ehp)) { 630 D2(vswp, "%s: MULTICAST pkt", __func__); 631 (void) vsw_forward_grp(vswp, mp, caller, arg); 632 } else { 633 /* 634 * Unicast pkt from vnet that we don't have 635 * an FDB entry for, so must be destinded for 636 * the outside world. Attempt to send up to the 637 * IP layer to allow it to deal with it. 638 */ 639 if (caller == VSW_VNETPORT) { 640 vsw_mac_rx(vswp, mrh, 641 mp, VSW_MACRX_FREEMSG); 642 } 643 } 644 } 645 } 646 647 D1(vswp, "%s: exit", __func__); 648 } 649 650 /* 651 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 652 * except the caller (port on which frame arrived). 653 */ 654 static int 655 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 656 { 657 vsw_port_list_t *plist = &vswp->plist; 658 vsw_port_t *portp; 659 mblk_t *nmp = NULL; 660 mblk_t *ret_m = NULL; 661 int skip_port = 0; 662 663 D1(vswp, "vsw_forward_all: enter\n"); 664 665 /* 666 * Broadcast message from inside ldoms so send to outside 667 * world if in either of layer 2 modes. 668 */ 669 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 670 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 671 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 672 673 nmp = vsw_dupmsgchain(mp); 674 if (nmp) { 675 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 676 DERR(vswp, "%s: dropping pkt(s) " 677 "consisting of %ld bytes of data for" 678 " physical device", __func__, MBLKL(ret_m)); 679 freemsgchain(ret_m); 680 } 681 } 682 } 683 684 if (caller == VSW_VNETPORT) 685 skip_port = 1; 686 687 /* 688 * Broadcast message from other vnet (layer 2 or 3) or outside 689 * world (layer 2 only), send up stack if plumbed. 690 */ 691 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 692 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 693 } 694 695 /* send it to all VNETPORTs */ 696 READ_ENTER(&plist->lockrw); 697 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 698 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 699 /* 700 * Caution ! - don't reorder these two checks as arg 701 * will be NULL if the caller is PHYSDEV. skip_port is 702 * only set if caller is VNETPORT. 703 */ 704 if ((skip_port) && (portp == arg)) { 705 continue; 706 } else { 707 nmp = vsw_dupmsgchain(mp); 708 if (nmp) { 709 mblk_t *mpt = nmp; 710 uint32_t count = 1; 711 712 /* Find tail */ 713 while (mpt->b_next != NULL) { 714 mpt = mpt->b_next; 715 count++; 716 } 717 /* 718 * The plist->lockrw is protecting the 719 * portp from getting destroyed here. 720 * So, no ref_cnt is incremented here. 721 */ 722 (void) vsw_portsend(portp, nmp, mpt, count); 723 } else { 724 DERR(vswp, "vsw_forward_all: nmp NULL"); 725 } 726 } 727 } 728 RW_EXIT(&plist->lockrw); 729 730 freemsgchain(mp); 731 732 D1(vswp, "vsw_forward_all: exit\n"); 733 return (0); 734 } 735 736 /* 737 * Forward pkts to any devices or interfaces which have registered 738 * an interest in them (i.e. multicast groups). 739 */ 740 static int 741 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 742 { 743 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 744 mfdb_ent_t *entp = NULL; 745 mfdb_ent_t *tpp = NULL; 746 vsw_port_t *port; 747 uint64_t key = 0; 748 mblk_t *nmp = NULL; 749 mblk_t *ret_m = NULL; 750 boolean_t check_if = B_TRUE; 751 752 /* 753 * Convert address to hash table key 754 */ 755 KEY_HASH(key, &ehp->ether_dhost); 756 757 D1(vswp, "%s: key 0x%llx", __func__, key); 758 759 /* 760 * If pkt came from either a vnet or down the stack (if we are 761 * plumbed) and we are in layer 2 mode, then we send the pkt out 762 * over the physical adapter, and then check to see if any other 763 * vnets are interested in it. 764 */ 765 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 766 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 767 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 768 nmp = vsw_dupmsgchain(mp); 769 if (nmp) { 770 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 771 DERR(vswp, "%s: dropping pkt(s) consisting of " 772 "%ld bytes of data for physical device", 773 __func__, MBLKL(ret_m)); 774 freemsgchain(ret_m); 775 } 776 } 777 } 778 779 READ_ENTER(&vswp->mfdbrw); 780 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 781 (mod_hash_val_t *)&entp) != 0) { 782 D3(vswp, "%s: no table entry found for addr 0x%llx", 783 __func__, key); 784 } else { 785 /* 786 * Send to list of devices associated with this address... 787 */ 788 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 789 790 /* dont send to ourselves */ 791 if ((caller == VSW_VNETPORT) && 792 (tpp->d_addr == (void *)arg)) { 793 port = (vsw_port_t *)tpp->d_addr; 794 D3(vswp, "%s: not sending to ourselves" 795 " : port %d", __func__, port->p_instance); 796 continue; 797 798 } else if ((caller == VSW_LOCALDEV) && 799 (tpp->d_type == VSW_LOCALDEV)) { 800 D2(vswp, "%s: not sending back up stack", 801 __func__); 802 continue; 803 } 804 805 if (tpp->d_type == VSW_VNETPORT) { 806 port = (vsw_port_t *)tpp->d_addr; 807 D3(vswp, "%s: sending to port %ld for addr " 808 "0x%llx", __func__, port->p_instance, key); 809 810 nmp = vsw_dupmsgchain(mp); 811 if (nmp) { 812 mblk_t *mpt = nmp; 813 uint32_t count = 1; 814 815 /* Find tail */ 816 while (mpt->b_next != NULL) { 817 mpt = mpt->b_next; 818 count++; 819 } 820 /* 821 * The vswp->mfdbrw is protecting the 822 * portp from getting destroyed here. 823 * So, no ref_cnt is incremented here. 824 */ 825 (void) vsw_portsend(port, nmp, mpt, 826 count); 827 } 828 } else { 829 vsw_mac_rx(vswp, NULL, 830 mp, VSW_MACRX_COPYMSG); 831 D2(vswp, "%s: sending up stack" 832 " for addr 0x%llx", __func__, key); 833 check_if = B_FALSE; 834 } 835 } 836 } 837 838 RW_EXIT(&vswp->mfdbrw); 839 840 /* 841 * If the pkt came from either a vnet or from physical device, 842 * and if we havent already sent the pkt up the stack then we 843 * check now if we can/should (i.e. the interface is plumbed 844 * and in promisc mode). 845 */ 846 if ((check_if) && 847 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 848 vsw_mac_rx(vswp, NULL, mp, 849 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 850 } 851 852 freemsgchain(mp); 853 854 D1(vswp, "%s: exit", __func__); 855 856 return (0); 857 } 858 859 /* 860 * This function creates the vlan id hash table for the given vsw device or 861 * port. It then adds each vlan that the device or port has been assigned, 862 * into this hash table. 863 * Arguments: 864 * arg: vsw device or port. 865 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 866 */ 867 void 868 vsw_create_vlans(void *arg, int type) 869 { 870 /* create vlan hash table */ 871 vsw_vlan_create_hash(arg, type); 872 873 /* add vlan ids of the vsw device into its hash table */ 874 vsw_vlan_add_ids(arg, type); 875 } 876 877 /* 878 * This function removes the vlan ids of the vsw device or port from its hash 879 * table. It then destroys the vlan hash table. 880 * Arguments: 881 * arg: vsw device or port. 882 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 883 */ 884 void 885 vsw_destroy_vlans(void *arg, int type) 886 { 887 /* remove vlan ids from the hash table */ 888 vsw_vlan_remove_ids(arg, type); 889 890 /* destroy vlan-hash-table */ 891 vsw_vlan_destroy_hash(arg, type); 892 } 893 894 /* 895 * Create a vlan-id hash table for the given vsw device or port. 896 */ 897 static void 898 vsw_vlan_create_hash(void *arg, int type) 899 { 900 char hashname[MAXNAMELEN]; 901 902 if (type == VSW_LOCALDEV) { 903 vsw_t *vswp = (vsw_t *)arg; 904 905 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash", 906 vswp->instance); 907 908 vswp->vlan_nchains = vsw_vlan_nchains; 909 vswp->vlan_hashp = mod_hash_create_idhash(hashname, 910 vswp->vlan_nchains, mod_hash_null_valdtor); 911 912 } else if (type == VSW_VNETPORT) { 913 vsw_port_t *portp = (vsw_port_t *)arg; 914 915 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 916 portp->p_instance); 917 918 portp->vlan_nchains = vsw_vlan_nchains; 919 portp->vlan_hashp = mod_hash_create_idhash(hashname, 920 portp->vlan_nchains, mod_hash_null_valdtor); 921 922 } else { 923 return; 924 } 925 } 926 927 /* 928 * Destroy the vlan-id hash table for the given vsw device or port. 929 */ 930 static void 931 vsw_vlan_destroy_hash(void *arg, int type) 932 { 933 if (type == VSW_LOCALDEV) { 934 vsw_t *vswp = (vsw_t *)arg; 935 936 mod_hash_destroy_hash(vswp->vlan_hashp); 937 vswp->vlan_nchains = 0; 938 } else if (type == VSW_VNETPORT) { 939 vsw_port_t *portp = (vsw_port_t *)arg; 940 941 mod_hash_destroy_hash(portp->vlan_hashp); 942 portp->vlan_nchains = 0; 943 } else { 944 return; 945 } 946 } 947 948 /* 949 * Add vlan ids of the given vsw device or port into its hash table. 950 */ 951 void 952 vsw_vlan_add_ids(void *arg, int type) 953 { 954 int rv; 955 int i; 956 957 if (type == VSW_LOCALDEV) { 958 vsw_t *vswp = (vsw_t *)arg; 959 960 rv = mod_hash_insert(vswp->vlan_hashp, 961 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 962 (mod_hash_val_t)B_TRUE); 963 ASSERT(rv == 0); 964 965 for (i = 0; i < vswp->nvids; i++) { 966 rv = mod_hash_insert(vswp->vlan_hashp, 967 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]), 968 (mod_hash_val_t)B_TRUE); 969 ASSERT(rv == 0); 970 } 971 972 } else if (type == VSW_VNETPORT) { 973 vsw_port_t *portp = (vsw_port_t *)arg; 974 975 rv = mod_hash_insert(portp->vlan_hashp, 976 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 977 (mod_hash_val_t)B_TRUE); 978 ASSERT(rv == 0); 979 980 for (i = 0; i < portp->nvids; i++) { 981 rv = mod_hash_insert(portp->vlan_hashp, 982 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 983 (mod_hash_val_t)B_TRUE); 984 ASSERT(rv == 0); 985 } 986 987 } else { 988 return; 989 } 990 } 991 992 /* 993 * Remove vlan ids of the given vsw device or port from its hash table. 994 */ 995 void 996 vsw_vlan_remove_ids(void *arg, int type) 997 { 998 mod_hash_val_t vp; 999 int rv; 1000 int i; 1001 1002 if (type == VSW_LOCALDEV) { 1003 vsw_t *vswp = (vsw_t *)arg; 1004 1005 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid); 1006 if (rv == B_TRUE) { 1007 rv = mod_hash_remove(vswp->vlan_hashp, 1008 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 1009 (mod_hash_val_t *)&vp); 1010 ASSERT(rv == 0); 1011 } 1012 1013 for (i = 0; i < vswp->nvids; i++) { 1014 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->vids[i]); 1015 if (rv == B_TRUE) { 1016 rv = mod_hash_remove(vswp->vlan_hashp, 1017 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]), 1018 (mod_hash_val_t *)&vp); 1019 ASSERT(rv == 0); 1020 } 1021 } 1022 1023 } else if (type == VSW_VNETPORT) { 1024 vsw_port_t *portp = (vsw_port_t *)arg; 1025 1026 portp = (vsw_port_t *)arg; 1027 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid); 1028 if (rv == B_TRUE) { 1029 rv = mod_hash_remove(portp->vlan_hashp, 1030 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1031 (mod_hash_val_t *)&vp); 1032 ASSERT(rv == 0); 1033 } 1034 1035 for (i = 0; i < portp->nvids; i++) { 1036 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->vids[i]); 1037 if (rv == B_TRUE) { 1038 rv = mod_hash_remove(portp->vlan_hashp, 1039 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 1040 (mod_hash_val_t *)&vp); 1041 ASSERT(rv == 0); 1042 } 1043 } 1044 1045 } else { 1046 return; 1047 } 1048 } 1049 1050 /* 1051 * Find the given vlan id in the hash table. 1052 * Return: B_TRUE if the id is found; B_FALSE if not found. 1053 */ 1054 boolean_t 1055 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1056 { 1057 int rv; 1058 mod_hash_val_t vp; 1059 1060 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1061 1062 if (rv != 0) 1063 return (B_FALSE); 1064 1065 return (B_TRUE); 1066 } 1067 1068 /* 1069 * Add an entry into FDB for the given vsw. 1070 */ 1071 void 1072 vsw_fdbe_add(vsw_t *vswp, void *port) 1073 { 1074 uint64_t addr = 0; 1075 vsw_port_t *portp; 1076 vsw_fdbe_t *fp; 1077 int rv; 1078 1079 portp = (vsw_port_t *)port; 1080 KEY_HASH(addr, &portp->p_macaddr); 1081 1082 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP); 1083 fp->portp = port; 1084 1085 /* 1086 * Note: duplicate keys will be rejected by mod_hash. 1087 */ 1088 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr, 1089 (mod_hash_val_t)fp); 1090 ASSERT(rv == 0); 1091 } 1092 1093 /* 1094 * Remove an entry from FDB. 1095 */ 1096 void 1097 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr) 1098 { 1099 uint64_t addr = 0; 1100 vsw_fdbe_t *fp; 1101 int rv; 1102 1103 KEY_HASH(addr, eaddr); 1104 1105 /* 1106 * Remove the entry from fdb hash table. 1107 * This prevents further references to this fdb entry. 1108 */ 1109 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr, 1110 (mod_hash_val_t *)&fp); 1111 if (rv != 0) { 1112 /* invalid key? */ 1113 return; 1114 } 1115 1116 /* 1117 * If there are threads already ref holding before the entry was 1118 * removed from hash table, then wait for ref count to drop to zero. 1119 */ 1120 while (fp->refcnt != 0) { 1121 delay(drv_usectohz(vsw_fdbe_refcnt_delay)); 1122 } 1123 1124 kmem_free(fp, sizeof (*fp)); 1125 } 1126 1127 /* 1128 * Search fdb for a given mac address. If an entry is found, hold 1129 * a reference to it and return the entry, else returns NULL. 1130 */ 1131 static vsw_fdbe_t * 1132 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp) 1133 { 1134 uint64_t key = 0; 1135 vsw_fdbe_t *fp; 1136 int rv; 1137 1138 KEY_HASH(key, addrp); 1139 1140 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key, 1141 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb); 1142 1143 if (rv != 0) 1144 return (NULL); 1145 1146 return (fp); 1147 } 1148 1149 /* 1150 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1151 * entry corresponding to the key (macaddr), this callback will be invoked by 1152 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1153 * entry before returning the found entry. 1154 */ 1155 static void 1156 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1157 { 1158 _NOTE(ARGUNUSED(key)) 1159 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val); 1160 } 1161 1162 /* 1163 * A given frame must be always tagged with the appropriate vlan id (unless it 1164 * is in the default-vlan) before the mac address switching function is called. 1165 * Otherwise, after switching function determines the destination, we cannot 1166 * figure out if the destination belongs to the the same vlan that the frame 1167 * originated from and if it needs tag/untag. Frames which are inbound from 1168 * the external(physical) network over a vlan trunk link are always tagged. 1169 * However frames which are received from a vnet-port over ldc or frames which 1170 * are coming down the stack on the service domain over vsw interface may be 1171 * untagged. These frames must be tagged with the appropriate pvid of the 1172 * sender (vnet-port or vsw device), before invoking the switching function. 1173 * 1174 * Arguments: 1175 * arg: caller of the function. 1176 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1177 * mp: frame(s) to be tagged. 1178 */ 1179 mblk_t * 1180 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp) 1181 { 1182 vsw_t *vswp; 1183 vsw_port_t *portp; 1184 struct ether_header *ehp; 1185 mblk_t *bp; 1186 mblk_t *bpt; 1187 mblk_t *bph; 1188 mblk_t *bpn; 1189 uint16_t pvid; 1190 1191 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1192 1193 if (type == VSW_LOCALDEV) { 1194 vswp = (vsw_t *)arg; 1195 pvid = vswp->pvid; 1196 portp = NULL; 1197 } else { 1198 /* VSW_VNETPORT */ 1199 portp = (vsw_port_t *)arg; 1200 pvid = portp->pvid; 1201 vswp = portp->p_vswp; 1202 } 1203 1204 bpn = bph = bpt = NULL; 1205 1206 for (bp = mp; bp != NULL; bp = bpn) { 1207 1208 bpn = bp->b_next; 1209 bp->b_next = bp->b_prev = NULL; 1210 1211 /* Determine if it is an untagged frame */ 1212 ehp = (struct ether_header *)bp->b_rptr; 1213 1214 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */ 1215 1216 /* no need to tag if the frame is in default vlan */ 1217 if (pvid != vswp->default_vlan_id) { 1218 bp = vnet_vlan_insert_tag(bp, pvid); 1219 if (bp == NULL) { 1220 continue; 1221 } 1222 } 1223 } 1224 1225 /* build a chain of processed packets */ 1226 if (bph == NULL) { 1227 bph = bpt = bp; 1228 } else { 1229 bpt->b_next = bp; 1230 bpt = bp; 1231 } 1232 1233 } 1234 1235 return (bph); 1236 } 1237 1238 /* 1239 * Frames destined to a vnet-port or to the local vsw interface, must be 1240 * untagged if necessary before sending. This function first checks that the 1241 * frame can be sent to the destination in the vlan identified by the frame 1242 * tag. Note that when this function is invoked the frame must have been 1243 * already tagged (unless it is in the default-vlan). Because, this function is 1244 * called when the switching function determines the destination and invokes 1245 * its send function (vnet-port or vsw interface) and all frames would have 1246 * been tagged by this time (see comments in vsw_vlan_frame_pretag()). 1247 * 1248 * Arguments: 1249 * arg: destination device. 1250 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1251 * np: head of pkt chain to be validated and untagged. 1252 * npt: tail of pkt chain to be validated and untagged. 1253 * 1254 * Returns: 1255 * np: head of updated chain of packets 1256 * npt: tail of updated chain of packets 1257 * rv: count of any packets dropped 1258 */ 1259 uint32_t 1260 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt) 1261 { 1262 mblk_t *bp; 1263 mblk_t *bpt; 1264 mblk_t *bph; 1265 mblk_t *bpn; 1266 vsw_port_t *portp; 1267 vsw_t *vswp; 1268 uint32_t count; 1269 struct ether_header *ehp; 1270 boolean_t is_tagged; 1271 boolean_t rv; 1272 uint16_t vlan_id; 1273 uint16_t pvid; 1274 mod_hash_t *vlan_hashp; 1275 1276 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1277 1278 if (type == VSW_LOCALDEV) { 1279 vswp = (vsw_t *)arg; 1280 pvid = vswp->pvid; 1281 vlan_hashp = vswp->vlan_hashp; 1282 portp = NULL; 1283 } else { 1284 /* type == VSW_VNETPORT */ 1285 portp = (vsw_port_t *)arg; 1286 vswp = portp->p_vswp; 1287 vlan_hashp = portp->vlan_hashp; 1288 pvid = portp->pvid; 1289 } 1290 1291 bpn = bph = bpt = NULL; 1292 count = 0; 1293 1294 for (bp = *np; bp != NULL; bp = bpn) { 1295 1296 bpn = bp->b_next; 1297 bp->b_next = bp->b_prev = NULL; 1298 1299 /* 1300 * Determine the vlan id that the frame belongs to. 1301 */ 1302 ehp = (struct ether_header *)bp->b_rptr; 1303 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id); 1304 1305 /* 1306 * Check if the destination is in the same vlan. 1307 */ 1308 rv = vsw_vlan_lookup(vlan_hashp, vlan_id); 1309 if (rv == B_FALSE) { 1310 /* drop the packet */ 1311 freemsg(bp); 1312 count++; 1313 continue; 1314 } 1315 1316 /* 1317 * Check the frame header if tag/untag is needed. 1318 */ 1319 if (is_tagged == B_FALSE) { 1320 /* 1321 * Untagged frame. We shouldn't have an untagged 1322 * packet at this point, unless the destination's 1323 * vlan id is default-vlan-id; if it is not the 1324 * default-vlan-id, we drop the packet. 1325 */ 1326 if (vlan_id != vswp->default_vlan_id) { 1327 /* drop the packet */ 1328 freemsg(bp); 1329 count++; 1330 continue; 1331 } 1332 } else { 1333 /* 1334 * Tagged frame, untag if it's the destination's pvid. 1335 */ 1336 if (vlan_id == pvid) { 1337 1338 bp = vnet_vlan_remove_tag(bp); 1339 if (bp == NULL) { 1340 /* packet dropped */ 1341 count++; 1342 continue; 1343 } 1344 } 1345 } 1346 1347 /* build a chain of processed packets */ 1348 if (bph == NULL) { 1349 bph = bpt = bp; 1350 } else { 1351 bpt->b_next = bp; 1352 bpt = bp; 1353 } 1354 1355 } 1356 1357 *np = bph; 1358 *npt = bpt; 1359 1360 return (count); 1361 } 1362 1363 /* 1364 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame, 1365 * then the vlan-id is available in the tag; otherwise, its vlan id is 1366 * implicitly obtained based on the caller (destination of the frame: 1367 * VSW_VNETPORT or VSW_LOCALDEV). 1368 * The vlan id determined is returned in vidp. 1369 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1370 */ 1371 boolean_t 1372 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 1373 uint16_t *vidp) 1374 { 1375 struct ether_vlan_header *evhp; 1376 vsw_t *vswp; 1377 vsw_port_t *portp; 1378 1379 /* If it's a tagged frame, get the vid from vlan header */ 1380 if (ehp->ether_type == ETHERTYPE_VLAN) { 1381 1382 evhp = (struct ether_vlan_header *)ehp; 1383 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1384 return (B_TRUE); 1385 } 1386 1387 /* Untagged frame; determine vlan id based on caller */ 1388 switch (caller) { 1389 1390 case VSW_VNETPORT: 1391 /* 1392 * packet destined to a vnet; vlan-id is pvid of vnet-port. 1393 */ 1394 portp = (vsw_port_t *)arg; 1395 *vidp = portp->pvid; 1396 break; 1397 1398 case VSW_LOCALDEV: 1399 1400 /* 1401 * packet destined to vsw interface; 1402 * vlan-id is port-vlan-id of vsw device. 1403 */ 1404 vswp = (vsw_t *)arg; 1405 *vidp = vswp->pvid; 1406 break; 1407 } 1408 1409 return (B_FALSE); 1410 } 1411 1412 /* 1413 * Add or remove multicast address(es). 1414 * 1415 * Returns 0 on success, 1 on failure. 1416 */ 1417 int 1418 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 1419 { 1420 mcst_addr_t *mcst_p = NULL; 1421 vsw_t *vswp = port->p_vswp; 1422 uint64_t addr = 0x0; 1423 int i; 1424 1425 D1(vswp, "%s: enter", __func__); 1426 1427 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 1428 1429 for (i = 0; i < mcst_pkt->count; i++) { 1430 /* 1431 * Convert address into form that can be used 1432 * as hash table key. 1433 */ 1434 KEY_HASH(addr, &(mcst_pkt->mca[i])); 1435 1436 /* 1437 * Add or delete the specified address/port combination. 1438 */ 1439 if (mcst_pkt->set == 0x1) { 1440 D3(vswp, "%s: adding multicast address 0x%llx for " 1441 "port %ld", __func__, addr, port->p_instance); 1442 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1443 /* 1444 * Update the list of multicast 1445 * addresses contained within the 1446 * port structure to include this new 1447 * one. 1448 */ 1449 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 1450 KM_NOSLEEP); 1451 if (mcst_p == NULL) { 1452 DERR(vswp, "%s: unable to alloc mem", 1453 __func__); 1454 (void) vsw_del_mcst(vswp, 1455 VSW_VNETPORT, addr, port); 1456 return (1); 1457 } 1458 1459 mcst_p->nextp = NULL; 1460 mcst_p->addr = addr; 1461 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 1462 1463 /* 1464 * Program the address into HW. If the addr 1465 * has already been programmed then the MAC 1466 * just increments a ref counter (which is 1467 * used when the address is being deleted) 1468 */ 1469 WRITE_ENTER(&vswp->mac_rwlock); 1470 if (vswp->mh != NULL) { 1471 if (mac_multicst_add(vswp->mh, 1472 (uchar_t *)&mcst_pkt->mca[i])) { 1473 RW_EXIT(&vswp->mac_rwlock); 1474 cmn_err(CE_WARN, "!vsw%d: " 1475 "unable to add multicast " 1476 "address: %s\n", 1477 vswp->instance, 1478 ether_sprintf((void *) 1479 &mcst_p->mca)); 1480 (void) vsw_del_mcst(vswp, 1481 VSW_VNETPORT, addr, port); 1482 kmem_free(mcst_p, 1483 sizeof (*mcst_p)); 1484 return (1); 1485 } 1486 mcst_p->mac_added = B_TRUE; 1487 } 1488 RW_EXIT(&vswp->mac_rwlock); 1489 1490 mutex_enter(&port->mca_lock); 1491 mcst_p->nextp = port->mcap; 1492 port->mcap = mcst_p; 1493 mutex_exit(&port->mca_lock); 1494 1495 } else { 1496 DERR(vswp, "%s: error adding multicast " 1497 "address 0x%llx for port %ld", 1498 __func__, addr, port->p_instance); 1499 return (1); 1500 } 1501 } else { 1502 /* 1503 * Delete an entry from the multicast hash 1504 * table and update the address list 1505 * appropriately. 1506 */ 1507 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1508 D3(vswp, "%s: deleting multicast address " 1509 "0x%llx for port %ld", __func__, addr, 1510 port->p_instance); 1511 1512 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 1513 ASSERT(mcst_p != NULL); 1514 1515 /* 1516 * Remove the address from HW. The address 1517 * will actually only be removed once the ref 1518 * count within the MAC layer has dropped to 1519 * zero. I.e. we can safely call this fn even 1520 * if other ports are interested in this 1521 * address. 1522 */ 1523 WRITE_ENTER(&vswp->mac_rwlock); 1524 if (vswp->mh != NULL && mcst_p->mac_added) { 1525 if (mac_multicst_remove(vswp->mh, 1526 (uchar_t *)&mcst_pkt->mca[i])) { 1527 RW_EXIT(&vswp->mac_rwlock); 1528 cmn_err(CE_WARN, "!vsw%d: " 1529 "unable to remove mcast " 1530 "address: %s\n", 1531 vswp->instance, 1532 ether_sprintf((void *) 1533 &mcst_p->mca)); 1534 kmem_free(mcst_p, 1535 sizeof (*mcst_p)); 1536 return (1); 1537 } 1538 mcst_p->mac_added = B_FALSE; 1539 } 1540 RW_EXIT(&vswp->mac_rwlock); 1541 kmem_free(mcst_p, sizeof (*mcst_p)); 1542 1543 } else { 1544 DERR(vswp, "%s: error deleting multicast " 1545 "addr 0x%llx for port %ld", 1546 __func__, addr, port->p_instance); 1547 return (1); 1548 } 1549 } 1550 } 1551 D1(vswp, "%s: exit", __func__); 1552 return (0); 1553 } 1554 1555 /* 1556 * Add a new multicast entry. 1557 * 1558 * Search hash table based on address. If match found then 1559 * update associated val (which is chain of ports), otherwise 1560 * create new key/val (addr/port) pair and insert into table. 1561 */ 1562 int 1563 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1564 { 1565 int dup = 0; 1566 int rv = 0; 1567 mfdb_ent_t *ment = NULL; 1568 mfdb_ent_t *tmp_ent = NULL; 1569 mfdb_ent_t *new_ent = NULL; 1570 void *tgt = NULL; 1571 1572 if (devtype == VSW_VNETPORT) { 1573 /* 1574 * Being invoked from a vnet. 1575 */ 1576 ASSERT(arg != NULL); 1577 tgt = arg; 1578 D2(NULL, "%s: port %d : address 0x%llx", __func__, 1579 ((vsw_port_t *)arg)->p_instance, addr); 1580 } else { 1581 /* 1582 * We are being invoked via the m_multicst mac entry 1583 * point. 1584 */ 1585 D2(NULL, "%s: address 0x%llx", __func__, addr); 1586 tgt = (void *)vswp; 1587 } 1588 1589 WRITE_ENTER(&vswp->mfdbrw); 1590 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1591 (mod_hash_val_t *)&ment) != 0) { 1592 1593 /* address not currently in table */ 1594 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1595 ment->d_addr = (void *)tgt; 1596 ment->d_type = devtype; 1597 ment->nextp = NULL; 1598 1599 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 1600 (mod_hash_val_t)ment) != 0) { 1601 DERR(vswp, "%s: hash table insertion failed", __func__); 1602 kmem_free(ment, sizeof (mfdb_ent_t)); 1603 rv = 1; 1604 } else { 1605 D2(vswp, "%s: added initial entry for 0x%llx to " 1606 "table", __func__, addr); 1607 } 1608 } else { 1609 /* 1610 * Address in table. Check to see if specified port 1611 * is already associated with the address. If not add 1612 * it now. 1613 */ 1614 tmp_ent = ment; 1615 while (tmp_ent != NULL) { 1616 if (tmp_ent->d_addr == (void *)tgt) { 1617 if (devtype == VSW_VNETPORT) { 1618 DERR(vswp, "%s: duplicate port entry " 1619 "found for portid %ld and key " 1620 "0x%llx", __func__, 1621 ((vsw_port_t *)arg)->p_instance, 1622 addr); 1623 } else { 1624 DERR(vswp, "%s: duplicate entry found" 1625 "for key 0x%llx", __func__, addr); 1626 } 1627 rv = 1; 1628 dup = 1; 1629 break; 1630 } 1631 tmp_ent = tmp_ent->nextp; 1632 } 1633 1634 /* 1635 * Port not on list so add it to end now. 1636 */ 1637 if (0 == dup) { 1638 D2(vswp, "%s: added entry for 0x%llx to table", 1639 __func__, addr); 1640 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1641 new_ent->d_addr = (void *)tgt; 1642 new_ent->d_type = devtype; 1643 new_ent->nextp = NULL; 1644 1645 tmp_ent = ment; 1646 while (tmp_ent->nextp != NULL) 1647 tmp_ent = tmp_ent->nextp; 1648 1649 tmp_ent->nextp = new_ent; 1650 } 1651 } 1652 1653 RW_EXIT(&vswp->mfdbrw); 1654 return (rv); 1655 } 1656 1657 /* 1658 * Remove a multicast entry from the hashtable. 1659 * 1660 * Search hash table based on address. If match found, scan 1661 * list of ports associated with address. If specified port 1662 * found remove it from list. 1663 */ 1664 int 1665 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1666 { 1667 mfdb_ent_t *ment = NULL; 1668 mfdb_ent_t *curr_p, *prev_p; 1669 void *tgt = NULL; 1670 1671 D1(vswp, "%s: enter", __func__); 1672 1673 if (devtype == VSW_VNETPORT) { 1674 tgt = (vsw_port_t *)arg; 1675 D2(vswp, "%s: removing port %d from mFDB for address" 1676 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 1677 } else { 1678 D2(vswp, "%s: removing entry", __func__); 1679 tgt = (void *)vswp; 1680 } 1681 1682 WRITE_ENTER(&vswp->mfdbrw); 1683 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1684 (mod_hash_val_t *)&ment) != 0) { 1685 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 1686 RW_EXIT(&vswp->mfdbrw); 1687 return (1); 1688 } 1689 1690 prev_p = curr_p = ment; 1691 1692 while (curr_p != NULL) { 1693 if (curr_p->d_addr == (void *)tgt) { 1694 if (devtype == VSW_VNETPORT) { 1695 D2(vswp, "%s: port %d found", __func__, 1696 ((vsw_port_t *)tgt)->p_instance); 1697 } else { 1698 D2(vswp, "%s: instance found", __func__); 1699 } 1700 1701 if (prev_p == curr_p) { 1702 /* 1703 * head of list, if no other element is in 1704 * list then destroy this entry, otherwise 1705 * just replace it with updated value. 1706 */ 1707 ment = curr_p->nextp; 1708 if (ment == NULL) { 1709 (void) mod_hash_destroy(vswp->mfdb, 1710 (mod_hash_val_t)addr); 1711 } else { 1712 (void) mod_hash_replace(vswp->mfdb, 1713 (mod_hash_key_t)addr, 1714 (mod_hash_val_t)ment); 1715 } 1716 } else { 1717 /* 1718 * Not head of list, no need to do 1719 * replacement, just adjust list pointers. 1720 */ 1721 prev_p->nextp = curr_p->nextp; 1722 } 1723 break; 1724 } 1725 1726 prev_p = curr_p; 1727 curr_p = curr_p->nextp; 1728 } 1729 1730 RW_EXIT(&vswp->mfdbrw); 1731 1732 D1(vswp, "%s: exit", __func__); 1733 1734 if (curr_p == NULL) 1735 return (1); 1736 kmem_free(curr_p, sizeof (mfdb_ent_t)); 1737 return (0); 1738 } 1739 1740 /* 1741 * Port is being deleted, but has registered an interest in one 1742 * or more multicast groups. Using the list of addresses maintained 1743 * within the port structure find the appropriate entry in the hash 1744 * table and remove this port from the list of interested ports. 1745 */ 1746 void 1747 vsw_del_mcst_port(vsw_port_t *port) 1748 { 1749 mcst_addr_t *mcap = NULL; 1750 vsw_t *vswp = port->p_vswp; 1751 1752 D1(vswp, "%s: enter", __func__); 1753 1754 mutex_enter(&port->mca_lock); 1755 1756 while ((mcap = port->mcap) != NULL) { 1757 1758 port->mcap = mcap->nextp; 1759 1760 mutex_exit(&port->mca_lock); 1761 1762 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 1763 mcap->addr, port); 1764 1765 /* 1766 * Remove the address from HW. The address 1767 * will actually only be removed once the ref 1768 * count within the MAC layer has dropped to 1769 * zero. I.e. we can safely call this fn even 1770 * if other ports are interested in this 1771 * address. 1772 */ 1773 WRITE_ENTER(&vswp->mac_rwlock); 1774 if (vswp->mh != NULL && mcap->mac_added) { 1775 (void) mac_multicst_remove(vswp->mh, 1776 (uchar_t *)&mcap->mca); 1777 } 1778 RW_EXIT(&vswp->mac_rwlock); 1779 1780 kmem_free(mcap, sizeof (*mcap)); 1781 1782 mutex_enter(&port->mca_lock); 1783 1784 } 1785 1786 mutex_exit(&port->mca_lock); 1787 1788 D1(vswp, "%s: exit", __func__); 1789 } 1790 1791 /* 1792 * This vsw instance is detaching, but has registered an interest in one 1793 * or more multicast groups. Using the list of addresses maintained 1794 * within the vsw structure find the appropriate entry in the hash 1795 * table and remove this instance from the list of interested ports. 1796 */ 1797 void 1798 vsw_del_mcst_vsw(vsw_t *vswp) 1799 { 1800 mcst_addr_t *next_p = NULL; 1801 1802 D1(vswp, "%s: enter", __func__); 1803 1804 mutex_enter(&vswp->mca_lock); 1805 1806 while (vswp->mcap != NULL) { 1807 DERR(vswp, "%s: deleting addr 0x%llx", 1808 __func__, vswp->mcap->addr); 1809 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 1810 1811 next_p = vswp->mcap->nextp; 1812 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 1813 vswp->mcap = next_p; 1814 } 1815 1816 vswp->mcap = NULL; 1817 mutex_exit(&vswp->mca_lock); 1818 1819 D1(vswp, "%s: exit", __func__); 1820 } 1821 1822 static uint32_t 1823 vsw_get_same_dest_list(struct ether_header *ehp, 1824 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp) 1825 { 1826 uint32_t count = 0; 1827 mblk_t *bp; 1828 mblk_t *nbp; 1829 mblk_t *head = NULL; 1830 mblk_t *tail = NULL; 1831 mblk_t *prev = NULL; 1832 struct ether_header *behp; 1833 1834 /* process the chain of packets */ 1835 bp = *mpp; 1836 while (bp) { 1837 nbp = bp->b_next; 1838 behp = (struct ether_header *)bp->b_rptr; 1839 bp->b_prev = NULL; 1840 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 1841 if (prev == NULL) { 1842 *mpp = nbp; 1843 } else { 1844 prev->b_next = nbp; 1845 } 1846 bp->b_next = NULL; 1847 if (head == NULL) { 1848 head = tail = bp; 1849 } else { 1850 tail->b_next = bp; 1851 tail = bp; 1852 } 1853 count++; 1854 } else { 1855 prev = bp; 1856 } 1857 bp = nbp; 1858 } 1859 *rhead = head; 1860 *rtail = tail; 1861 DTRACE_PROBE1(vsw_same_dest, int, count); 1862 return (count); 1863 } 1864 1865 static mblk_t * 1866 vsw_dupmsgchain(mblk_t *mp) 1867 { 1868 mblk_t *nmp = NULL; 1869 mblk_t **nmpp = &nmp; 1870 1871 for (; mp != NULL; mp = mp->b_next) { 1872 if ((*nmpp = dupmsg(mp)) == NULL) { 1873 freemsgchain(nmp); 1874 return (NULL); 1875 } 1876 1877 nmpp = &((*nmpp)->b_next); 1878 } 1879 1880 return (nmp); 1881 } 1882