1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/vlan.h> 75 76 /* Switching setup routines */ 77 void vsw_setup_switching_timeout(void *arg); 78 void vsw_stop_switching_timeout(vsw_t *vswp); 79 int vsw_setup_switching(vsw_t *); 80 static int vsw_setup_layer2(vsw_t *); 81 static int vsw_setup_layer3(vsw_t *); 82 83 /* Switching/data transmit routines */ 84 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 85 vsw_port_t *port, mac_resource_handle_t); 86 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 87 vsw_port_t *port, mac_resource_handle_t); 88 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 89 int caller, vsw_port_t *port); 90 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 91 int caller, vsw_port_t *port); 92 93 /* VLAN routines */ 94 void vsw_create_vlans(void *arg, int type); 95 void vsw_destroy_vlans(void *arg, int type); 96 void vsw_vlan_add_ids(void *arg, int type); 97 void vsw_vlan_remove_ids(void *arg, int type); 98 static void vsw_vlan_create_hash(void *arg, int type); 99 static void vsw_vlan_destroy_hash(void *arg, int type); 100 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 101 uint16_t *vidp); 102 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 103 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt); 104 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 105 106 /* Forwarding database (FDB) routines */ 107 void vsw_fdbe_add(vsw_t *vswp, void *port); 108 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 109 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *); 110 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 111 112 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 113 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 114 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 115 void vsw_del_mcst_vsw(vsw_t *); 116 117 /* Support functions */ 118 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 119 static uint32_t vsw_get_same_dest_list(struct ether_header *ehp, 120 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 121 122 123 /* 124 * Functions imported from other files. 125 */ 126 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 127 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 128 extern int vsw_mac_open(vsw_t *vswp); 129 extern void vsw_mac_close(vsw_t *vswp); 130 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 131 mblk_t *mp, vsw_macrx_flags_t flags); 132 extern void vsw_set_addrs(vsw_t *vswp); 133 extern int vsw_get_hw_maddr(vsw_t *); 134 extern int vsw_mac_attach(vsw_t *vswp); 135 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, 136 uint32_t count); 137 138 /* 139 * Tunables used in this file. 140 */ 141 extern int vsw_setup_switching_delay; 142 extern uint32_t vsw_vlan_nchains; 143 extern uint32_t vsw_fdbe_refcnt_delay; 144 145 #define VSW_FDBE_REFHOLD(p) \ 146 { \ 147 atomic_inc_32(&(p)->refcnt); \ 148 ASSERT((p)->refcnt != 0); \ 149 } 150 151 #define VSW_FDBE_REFRELE(p) \ 152 { \ 153 ASSERT((p)->refcnt != 0); \ 154 atomic_dec_32(&(p)->refcnt); \ 155 } 156 157 /* 158 * Timeout routine to setup switching mode: 159 * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop() 160 * initially. If it fails and the error is EAGAIN, then this timeout handler 161 * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried 162 * until we successfully finish it; or the returned error is not EAGAIN. 163 */ 164 void 165 vsw_setup_switching_timeout(void *arg) 166 { 167 vsw_t *vswp = (vsw_t *)arg; 168 int rv; 169 170 if (vswp->swtmout_enabled == B_FALSE) 171 return; 172 173 rv = vsw_setup_switching(vswp); 174 175 if (rv == 0) { 176 /* 177 * Successfully setup switching mode. 178 * Program unicst, mcst addrs of vsw 179 * interface and ports in the physdev. 180 */ 181 vsw_set_addrs(vswp); 182 } 183 184 mutex_enter(&vswp->swtmout_lock); 185 186 if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) { 187 /* 188 * Reschedule timeout() if the error is EAGAIN and the 189 * timeout is still enabled. For errors other than EAGAIN, 190 * we simply return without rescheduling timeout(). 191 */ 192 vswp->swtmout_id = 193 timeout(vsw_setup_switching_timeout, vswp, 194 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 195 goto exit; 196 } 197 198 /* timeout handler completed */ 199 vswp->swtmout_enabled = B_FALSE; 200 vswp->swtmout_id = 0; 201 202 exit: 203 mutex_exit(&vswp->swtmout_lock); 204 } 205 206 /* 207 * Cancel the timeout handler to setup switching mode. 208 */ 209 void 210 vsw_stop_switching_timeout(vsw_t *vswp) 211 { 212 timeout_id_t tid; 213 214 mutex_enter(&vswp->swtmout_lock); 215 216 tid = vswp->swtmout_id; 217 218 if (tid != 0) { 219 /* signal timeout handler to stop */ 220 vswp->swtmout_enabled = B_FALSE; 221 vswp->swtmout_id = 0; 222 mutex_exit(&vswp->swtmout_lock); 223 224 (void) untimeout(tid); 225 } else { 226 mutex_exit(&vswp->swtmout_lock); 227 } 228 229 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 230 231 mutex_enter(&vswp->mac_lock); 232 vswp->mac_open_retries = 0; 233 mutex_exit(&vswp->mac_lock); 234 } 235 236 /* 237 * Setup the required switching mode. 238 * This routine is invoked from vsw_attach() or vsw_update_md_prop() 239 * initially. If it fails and the error is EAGAIN, then a timeout handler 240 * is started to retry vsw_setup_switching(), until it successfully finishes; 241 * or the returned error is not EAGAIN. 242 * 243 * Returns: 244 * 0 on success. 245 * EAGAIN if retry is needed. 246 * 1 on all other failures. 247 */ 248 int 249 vsw_setup_switching(vsw_t *vswp) 250 { 251 int i, rv = 1; 252 253 D1(vswp, "%s: enter", __func__); 254 255 /* 256 * Select best switching mode. 257 * Note that we start from the saved smode_idx. This is done as 258 * this routine can be called from the timeout handler to retry 259 * setting up a specific mode. Currently only the function which 260 * sets up layer2/promisc mode returns EAGAIN if the underlying 261 * physical device is not available yet, causing retries. 262 */ 263 for (i = vswp->smode_idx; i < vswp->smode_num; i++) { 264 vswp->smode_idx = i; 265 switch (vswp->smode[i]) { 266 case VSW_LAYER2: 267 case VSW_LAYER2_PROMISC: 268 rv = vsw_setup_layer2(vswp); 269 break; 270 271 case VSW_LAYER3: 272 rv = vsw_setup_layer3(vswp); 273 break; 274 275 default: 276 DERR(vswp, "unknown switch mode"); 277 break; 278 } 279 280 if ((rv == 0) || (rv == EAGAIN)) 281 break; 282 283 /* all other errors(rv != 0): continue & select the next mode */ 284 rv = 1; 285 } 286 287 if (rv && (rv != EAGAIN)) { 288 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 289 "switching mode", vswp->instance); 290 } else if (rv == 0) { 291 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 292 } 293 294 D2(vswp, "%s: Operating in mode %d", __func__, 295 vswp->smode[vswp->smode_idx]); 296 297 D1(vswp, "%s: exit", __func__); 298 299 return (rv); 300 } 301 302 /* 303 * Setup for layer 2 switching. 304 * 305 * Returns: 306 * 0 on success. 307 * EAGAIN if retry is needed. 308 * EIO on all other failures. 309 */ 310 static int 311 vsw_setup_layer2(vsw_t *vswp) 312 { 313 int rv; 314 315 D1(vswp, "%s: enter", __func__); 316 317 vswp->vsw_switch_frame = vsw_switch_l2_frame; 318 319 rv = strlen(vswp->physname); 320 if (rv == 0) { 321 /* 322 * Physical device name is NULL, which is 323 * required for layer 2. 324 */ 325 cmn_err(CE_WARN, "!vsw%d: no physical device name specified", 326 vswp->instance); 327 return (EIO); 328 } 329 330 mutex_enter(&vswp->mac_lock); 331 332 rv = vsw_mac_open(vswp); 333 if (rv != 0) { 334 if (rv != EAGAIN) { 335 cmn_err(CE_WARN, "!vsw%d: Unable to open physical " 336 "device: %s\n", vswp->instance, vswp->physname); 337 } 338 mutex_exit(&vswp->mac_lock); 339 return (rv); 340 } 341 342 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 343 /* 344 * Verify that underlying device can support multiple 345 * unicast mac addresses. 346 */ 347 rv = vsw_get_hw_maddr(vswp); 348 if (rv != 0) { 349 goto exit_error; 350 } 351 } 352 353 /* 354 * Attempt to link into the MAC layer so we can get 355 * and send packets out over the physical adapter. 356 */ 357 rv = vsw_mac_attach(vswp); 358 if (rv != 0) { 359 /* 360 * Registration with the MAC layer has failed, 361 * so return error so that can fall back to next 362 * prefered switching method. 363 */ 364 cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: " 365 "%s\n", vswp->instance, vswp->physname); 366 goto exit_error; 367 } 368 369 D1(vswp, "%s: exit", __func__); 370 371 mutex_exit(&vswp->mac_lock); 372 return (0); 373 374 exit_error: 375 vsw_mac_close(vswp); 376 mutex_exit(&vswp->mac_lock); 377 return (EIO); 378 } 379 380 static int 381 vsw_setup_layer3(vsw_t *vswp) 382 { 383 D1(vswp, "%s: enter", __func__); 384 385 D2(vswp, "%s: operating in layer 3 mode", __func__); 386 vswp->vsw_switch_frame = vsw_switch_l3_frame; 387 388 D1(vswp, "%s: exit", __func__); 389 390 return (0); 391 } 392 393 /* 394 * Switch the given ethernet frame when operating in layer 2 mode. 395 * 396 * vswp: pointer to the vsw instance 397 * mp: pointer to chain of ethernet frame(s) to be switched 398 * caller: identifies the source of this frame as: 399 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 400 * 2. VSW_PHYSDEV - the physical ethernet device 401 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 402 * arg: argument provided by the caller. 403 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 404 * 2. for PHYSDEV - NULL 405 * 3. for LOCALDEV - pointer to to this vsw_t(self) 406 */ 407 void 408 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 409 vsw_port_t *arg, mac_resource_handle_t mrh) 410 { 411 struct ether_header *ehp; 412 mblk_t *bp, *ret_m; 413 mblk_t *mpt = NULL; 414 uint32_t count; 415 vsw_fdbe_t *fp; 416 417 D1(vswp, "%s: enter (caller %d)", __func__, caller); 418 419 /* 420 * PERF: rather than breaking up the chain here, scan it 421 * to find all mblks heading to same destination and then 422 * pass that sub-chain to the lower transmit functions. 423 */ 424 425 /* process the chain of packets */ 426 bp = mp; 427 while (bp) { 428 ehp = (struct ether_header *)bp->b_rptr; 429 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 430 ASSERT(count != 0); 431 432 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 433 __func__, MBLKSIZE(mp), MBLKL(mp)); 434 435 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 436 /* 437 * If destination is VSW_LOCALDEV (vsw as an eth 438 * interface) and if the device is up & running, 439 * send the packet up the stack on this host. 440 * If the virtual interface is down, drop the packet. 441 */ 442 if (caller != VSW_LOCALDEV) { 443 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 444 } else { 445 freemsgchain(mp); 446 } 447 continue; 448 } 449 450 /* 451 * Find fdb entry for the destination 452 * and hold a reference to it. 453 */ 454 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 455 if (fp != NULL) { 456 457 /* 458 * If plumbed and in promisc mode then copy msg 459 * and send up the stack. 460 */ 461 vsw_mac_rx(vswp, mrh, mp, 462 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 463 464 /* 465 * If the destination is in FDB, the packet 466 * should be forwarded to the correponding 467 * vsw_port (connected to a vnet device - 468 * VSW_VNETPORT) 469 */ 470 (void) vsw_portsend(fp->portp, mp, mpt, count); 471 472 /* Release the reference on the fdb entry */ 473 VSW_FDBE_REFRELE(fp); 474 } else { 475 /* 476 * Destination not in FDB. 477 * 478 * If the destination is broadcast or 479 * multicast forward the packet to all 480 * (VNETPORTs, PHYSDEV, LOCALDEV), 481 * except the caller. 482 */ 483 if (IS_BROADCAST(ehp)) { 484 D2(vswp, "%s: BROADCAST pkt", __func__); 485 (void) vsw_forward_all(vswp, mp, caller, arg); 486 } else if (IS_MULTICAST(ehp)) { 487 D2(vswp, "%s: MULTICAST pkt", __func__); 488 (void) vsw_forward_grp(vswp, mp, caller, arg); 489 } else { 490 /* 491 * If the destination is unicast, and came 492 * from either a logical network device or 493 * the switch itself when it is plumbed, then 494 * send it out on the physical device and also 495 * up the stack if the logical interface is 496 * in promiscious mode. 497 * 498 * NOTE: The assumption here is that if we 499 * cannot find the destination in our fdb, its 500 * a unicast address, and came from either a 501 * vnet or down the stack (when plumbed) it 502 * must be destinded for an ethernet device 503 * outside our ldoms. 504 */ 505 if (caller == VSW_VNETPORT) { 506 /* promisc check copy etc */ 507 vsw_mac_rx(vswp, mrh, mp, 508 VSW_MACRX_PROMISC | 509 VSW_MACRX_COPYMSG); 510 511 if ((ret_m = vsw_tx_msg(vswp, mp)) 512 != NULL) { 513 DERR(vswp, "%s: drop mblks to " 514 "phys dev", __func__); 515 freemsgchain(ret_m); 516 } 517 518 } else if (caller == VSW_PHYSDEV) { 519 /* 520 * Pkt seen because card in promisc 521 * mode. Send up stack if plumbed in 522 * promisc mode, else drop it. 523 */ 524 vsw_mac_rx(vswp, mrh, mp, 525 VSW_MACRX_PROMISC | 526 VSW_MACRX_FREEMSG); 527 528 } else if (caller == VSW_LOCALDEV) { 529 /* 530 * Pkt came down the stack, send out 531 * over physical device. 532 */ 533 if ((ret_m = vsw_tx_msg(vswp, mp)) 534 != NULL) { 535 DERR(vswp, "%s: drop mblks to " 536 "phys dev", __func__); 537 freemsgchain(ret_m); 538 } 539 } 540 } 541 } 542 } 543 D1(vswp, "%s: exit\n", __func__); 544 } 545 546 /* 547 * Switch ethernet frame when in layer 3 mode (i.e. using IP 548 * layer to do the routing). 549 * 550 * There is a large amount of overlap between this function and 551 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 552 * both these functions. 553 */ 554 void 555 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 556 vsw_port_t *arg, mac_resource_handle_t mrh) 557 { 558 struct ether_header *ehp; 559 mblk_t *bp = NULL; 560 mblk_t *mpt; 561 uint32_t count; 562 vsw_fdbe_t *fp; 563 564 D1(vswp, "%s: enter (caller %d)", __func__, caller); 565 566 /* 567 * In layer 3 mode should only ever be switching packets 568 * between IP layer and vnet devices. So make sure thats 569 * who is invoking us. 570 */ 571 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 572 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 573 freemsgchain(mp); 574 return; 575 } 576 577 /* process the chain of packets */ 578 bp = mp; 579 while (bp) { 580 ehp = (struct ether_header *)bp->b_rptr; 581 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 582 ASSERT(count != 0); 583 584 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 585 __func__, MBLKSIZE(mp), MBLKL(mp)); 586 587 /* 588 * Find fdb entry for the destination 589 * and hold a reference to it. 590 */ 591 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 592 if (fp != NULL) { 593 594 D2(vswp, "%s: sending to target port", __func__); 595 (void) vsw_portsend(fp->portp, mp, mpt, count); 596 597 /* Release the reference on the fdb entry */ 598 VSW_FDBE_REFRELE(fp); 599 } else { 600 /* 601 * Destination not in FDB 602 * 603 * If the destination is broadcast or 604 * multicast forward the packet to all 605 * (VNETPORTs, PHYSDEV, LOCALDEV), 606 * except the caller. 607 */ 608 if (IS_BROADCAST(ehp)) { 609 D2(vswp, "%s: BROADCAST pkt", __func__); 610 (void) vsw_forward_all(vswp, mp, caller, arg); 611 } else if (IS_MULTICAST(ehp)) { 612 D2(vswp, "%s: MULTICAST pkt", __func__); 613 (void) vsw_forward_grp(vswp, mp, caller, arg); 614 } else { 615 /* 616 * Unicast pkt from vnet that we don't have 617 * an FDB entry for, so must be destinded for 618 * the outside world. Attempt to send up to the 619 * IP layer to allow it to deal with it. 620 */ 621 if (caller == VSW_VNETPORT) { 622 vsw_mac_rx(vswp, mrh, 623 mp, VSW_MACRX_FREEMSG); 624 } 625 } 626 } 627 } 628 629 D1(vswp, "%s: exit", __func__); 630 } 631 632 /* 633 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 634 * except the caller (port on which frame arrived). 635 */ 636 static int 637 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 638 { 639 vsw_port_list_t *plist = &vswp->plist; 640 vsw_port_t *portp; 641 mblk_t *nmp = NULL; 642 mblk_t *ret_m = NULL; 643 int skip_port = 0; 644 645 D1(vswp, "vsw_forward_all: enter\n"); 646 647 /* 648 * Broadcast message from inside ldoms so send to outside 649 * world if in either of layer 2 modes. 650 */ 651 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 652 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 653 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 654 655 nmp = vsw_dupmsgchain(mp); 656 if (nmp) { 657 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 658 DERR(vswp, "%s: dropping pkt(s) " 659 "consisting of %ld bytes of data for" 660 " physical device", __func__, MBLKL(ret_m)); 661 freemsgchain(ret_m); 662 } 663 } 664 } 665 666 if (caller == VSW_VNETPORT) 667 skip_port = 1; 668 669 /* 670 * Broadcast message from other vnet (layer 2 or 3) or outside 671 * world (layer 2 only), send up stack if plumbed. 672 */ 673 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 674 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 675 } 676 677 /* send it to all VNETPORTs */ 678 READ_ENTER(&plist->lockrw); 679 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 680 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 681 /* 682 * Caution ! - don't reorder these two checks as arg 683 * will be NULL if the caller is PHYSDEV. skip_port is 684 * only set if caller is VNETPORT. 685 */ 686 if ((skip_port) && (portp == arg)) { 687 continue; 688 } else { 689 nmp = vsw_dupmsgchain(mp); 690 if (nmp) { 691 mblk_t *mpt = nmp; 692 uint32_t count = 1; 693 694 /* Find tail */ 695 while (mpt->b_next != NULL) { 696 mpt = mpt->b_next; 697 count++; 698 } 699 /* 700 * The plist->lockrw is protecting the 701 * portp from getting destroyed here. 702 * So, no ref_cnt is incremented here. 703 */ 704 (void) vsw_portsend(portp, nmp, mpt, count); 705 } else { 706 DERR(vswp, "vsw_forward_all: nmp NULL"); 707 } 708 } 709 } 710 RW_EXIT(&plist->lockrw); 711 712 freemsgchain(mp); 713 714 D1(vswp, "vsw_forward_all: exit\n"); 715 return (0); 716 } 717 718 /* 719 * Forward pkts to any devices or interfaces which have registered 720 * an interest in them (i.e. multicast groups). 721 */ 722 static int 723 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 724 { 725 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 726 mfdb_ent_t *entp = NULL; 727 mfdb_ent_t *tpp = NULL; 728 vsw_port_t *port; 729 uint64_t key = 0; 730 mblk_t *nmp = NULL; 731 mblk_t *ret_m = NULL; 732 boolean_t check_if = B_TRUE; 733 734 /* 735 * Convert address to hash table key 736 */ 737 KEY_HASH(key, &ehp->ether_dhost); 738 739 D1(vswp, "%s: key 0x%llx", __func__, key); 740 741 /* 742 * If pkt came from either a vnet or down the stack (if we are 743 * plumbed) and we are in layer 2 mode, then we send the pkt out 744 * over the physical adapter, and then check to see if any other 745 * vnets are interested in it. 746 */ 747 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 748 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 749 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 750 nmp = vsw_dupmsgchain(mp); 751 if (nmp) { 752 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 753 DERR(vswp, "%s: dropping pkt(s) consisting of " 754 "%ld bytes of data for physical device", 755 __func__, MBLKL(ret_m)); 756 freemsgchain(ret_m); 757 } 758 } 759 } 760 761 READ_ENTER(&vswp->mfdbrw); 762 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 763 (mod_hash_val_t *)&entp) != 0) { 764 D3(vswp, "%s: no table entry found for addr 0x%llx", 765 __func__, key); 766 } else { 767 /* 768 * Send to list of devices associated with this address... 769 */ 770 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 771 772 /* dont send to ourselves */ 773 if ((caller == VSW_VNETPORT) && 774 (tpp->d_addr == (void *)arg)) { 775 port = (vsw_port_t *)tpp->d_addr; 776 D3(vswp, "%s: not sending to ourselves" 777 " : port %d", __func__, port->p_instance); 778 continue; 779 780 } else if ((caller == VSW_LOCALDEV) && 781 (tpp->d_type == VSW_LOCALDEV)) { 782 D2(vswp, "%s: not sending back up stack", 783 __func__); 784 continue; 785 } 786 787 if (tpp->d_type == VSW_VNETPORT) { 788 port = (vsw_port_t *)tpp->d_addr; 789 D3(vswp, "%s: sending to port %ld for addr " 790 "0x%llx", __func__, port->p_instance, key); 791 792 nmp = vsw_dupmsgchain(mp); 793 if (nmp) { 794 mblk_t *mpt = nmp; 795 uint32_t count = 1; 796 797 /* Find tail */ 798 while (mpt->b_next != NULL) { 799 mpt = mpt->b_next; 800 count++; 801 } 802 /* 803 * The vswp->mfdbrw is protecting the 804 * portp from getting destroyed here. 805 * So, no ref_cnt is incremented here. 806 */ 807 (void) vsw_portsend(port, nmp, mpt, 808 count); 809 } 810 } else { 811 vsw_mac_rx(vswp, NULL, 812 mp, VSW_MACRX_COPYMSG); 813 D2(vswp, "%s: sending up stack" 814 " for addr 0x%llx", __func__, key); 815 check_if = B_FALSE; 816 } 817 } 818 } 819 820 RW_EXIT(&vswp->mfdbrw); 821 822 /* 823 * If the pkt came from either a vnet or from physical device, 824 * and if we havent already sent the pkt up the stack then we 825 * check now if we can/should (i.e. the interface is plumbed 826 * and in promisc mode). 827 */ 828 if ((check_if) && 829 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 830 vsw_mac_rx(vswp, NULL, mp, 831 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 832 } 833 834 freemsgchain(mp); 835 836 D1(vswp, "%s: exit", __func__); 837 838 return (0); 839 } 840 841 /* 842 * This function creates the vlan id hash table for the given vsw device or 843 * port. It then adds each vlan that the device or port has been assigned, 844 * into this hash table. 845 * Arguments: 846 * arg: vsw device or port. 847 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 848 */ 849 void 850 vsw_create_vlans(void *arg, int type) 851 { 852 /* create vlan hash table */ 853 vsw_vlan_create_hash(arg, type); 854 855 /* add vlan ids of the vsw device into its hash table */ 856 vsw_vlan_add_ids(arg, type); 857 } 858 859 /* 860 * This function removes the vlan ids of the vsw device or port from its hash 861 * table. It then destroys the vlan hash table. 862 * Arguments: 863 * arg: vsw device or port. 864 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 865 */ 866 void 867 vsw_destroy_vlans(void *arg, int type) 868 { 869 /* remove vlan ids from the hash table */ 870 vsw_vlan_remove_ids(arg, type); 871 872 /* destroy vlan-hash-table */ 873 vsw_vlan_destroy_hash(arg, type); 874 } 875 876 /* 877 * Create a vlan-id hash table for the given vsw device or port. 878 */ 879 static void 880 vsw_vlan_create_hash(void *arg, int type) 881 { 882 char hashname[MAXNAMELEN]; 883 884 if (type == VSW_LOCALDEV) { 885 vsw_t *vswp = (vsw_t *)arg; 886 887 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash", 888 vswp->instance); 889 890 vswp->vlan_nchains = vsw_vlan_nchains; 891 vswp->vlan_hashp = mod_hash_create_idhash(hashname, 892 vswp->vlan_nchains, mod_hash_null_valdtor); 893 894 } else if (type == VSW_VNETPORT) { 895 vsw_port_t *portp = (vsw_port_t *)arg; 896 897 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 898 portp->p_instance); 899 900 portp->vlan_nchains = vsw_vlan_nchains; 901 portp->vlan_hashp = mod_hash_create_idhash(hashname, 902 portp->vlan_nchains, mod_hash_null_valdtor); 903 904 } else { 905 return; 906 } 907 } 908 909 /* 910 * Destroy the vlan-id hash table for the given vsw device or port. 911 */ 912 static void 913 vsw_vlan_destroy_hash(void *arg, int type) 914 { 915 if (type == VSW_LOCALDEV) { 916 vsw_t *vswp = (vsw_t *)arg; 917 918 mod_hash_destroy_hash(vswp->vlan_hashp); 919 vswp->vlan_nchains = 0; 920 } else if (type == VSW_VNETPORT) { 921 vsw_port_t *portp = (vsw_port_t *)arg; 922 923 mod_hash_destroy_hash(portp->vlan_hashp); 924 portp->vlan_nchains = 0; 925 } else { 926 return; 927 } 928 } 929 930 /* 931 * Add vlan ids of the given vsw device or port into its hash table. 932 */ 933 void 934 vsw_vlan_add_ids(void *arg, int type) 935 { 936 int rv; 937 int i; 938 939 if (type == VSW_LOCALDEV) { 940 vsw_t *vswp = (vsw_t *)arg; 941 942 rv = mod_hash_insert(vswp->vlan_hashp, 943 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 944 (mod_hash_val_t)B_TRUE); 945 ASSERT(rv == 0); 946 947 for (i = 0; i < vswp->nvids; i++) { 948 rv = mod_hash_insert(vswp->vlan_hashp, 949 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]), 950 (mod_hash_val_t)B_TRUE); 951 ASSERT(rv == 0); 952 } 953 954 } else if (type == VSW_VNETPORT) { 955 vsw_port_t *portp = (vsw_port_t *)arg; 956 957 rv = mod_hash_insert(portp->vlan_hashp, 958 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 959 (mod_hash_val_t)B_TRUE); 960 ASSERT(rv == 0); 961 962 for (i = 0; i < portp->nvids; i++) { 963 rv = mod_hash_insert(portp->vlan_hashp, 964 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 965 (mod_hash_val_t)B_TRUE); 966 ASSERT(rv == 0); 967 } 968 969 } else { 970 return; 971 } 972 } 973 974 /* 975 * Remove vlan ids of the given vsw device or port from its hash table. 976 */ 977 void 978 vsw_vlan_remove_ids(void *arg, int type) 979 { 980 mod_hash_val_t vp; 981 int rv; 982 int i; 983 984 if (type == VSW_LOCALDEV) { 985 vsw_t *vswp = (vsw_t *)arg; 986 987 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid); 988 if (rv == B_TRUE) { 989 rv = mod_hash_remove(vswp->vlan_hashp, 990 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 991 (mod_hash_val_t *)&vp); 992 ASSERT(rv == 0); 993 } 994 995 for (i = 0; i < vswp->nvids; i++) { 996 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->vids[i]); 997 if (rv == B_TRUE) { 998 rv = mod_hash_remove(vswp->vlan_hashp, 999 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]), 1000 (mod_hash_val_t *)&vp); 1001 ASSERT(rv == 0); 1002 } 1003 } 1004 1005 } else if (type == VSW_VNETPORT) { 1006 vsw_port_t *portp = (vsw_port_t *)arg; 1007 1008 portp = (vsw_port_t *)arg; 1009 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid); 1010 if (rv == B_TRUE) { 1011 rv = mod_hash_remove(portp->vlan_hashp, 1012 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1013 (mod_hash_val_t *)&vp); 1014 ASSERT(rv == 0); 1015 } 1016 1017 for (i = 0; i < portp->nvids; i++) { 1018 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->vids[i]); 1019 if (rv == B_TRUE) { 1020 rv = mod_hash_remove(portp->vlan_hashp, 1021 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 1022 (mod_hash_val_t *)&vp); 1023 ASSERT(rv == 0); 1024 } 1025 } 1026 1027 } else { 1028 return; 1029 } 1030 } 1031 1032 /* 1033 * Find the given vlan id in the hash table. 1034 * Return: B_TRUE if the id is found; B_FALSE if not found. 1035 */ 1036 boolean_t 1037 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1038 { 1039 int rv; 1040 mod_hash_val_t vp; 1041 1042 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1043 1044 if (rv != 0) 1045 return (B_FALSE); 1046 1047 return (B_TRUE); 1048 } 1049 1050 /* 1051 * Add an entry into FDB for the given vsw. 1052 */ 1053 void 1054 vsw_fdbe_add(vsw_t *vswp, void *port) 1055 { 1056 uint64_t addr = 0; 1057 vsw_port_t *portp; 1058 vsw_fdbe_t *fp; 1059 int rv; 1060 1061 portp = (vsw_port_t *)port; 1062 KEY_HASH(addr, &portp->p_macaddr); 1063 1064 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP); 1065 fp->portp = port; 1066 1067 /* 1068 * Note: duplicate keys will be rejected by mod_hash. 1069 */ 1070 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr, 1071 (mod_hash_val_t)fp); 1072 ASSERT(rv == 0); 1073 } 1074 1075 /* 1076 * Remove an entry from FDB. 1077 */ 1078 void 1079 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr) 1080 { 1081 uint64_t addr = 0; 1082 vsw_fdbe_t *fp; 1083 int rv; 1084 1085 KEY_HASH(addr, eaddr); 1086 1087 /* 1088 * Remove the entry from fdb hash table. 1089 * This prevents further references to this fdb entry. 1090 */ 1091 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr, 1092 (mod_hash_val_t *)&fp); 1093 if (rv != 0) { 1094 /* invalid key? */ 1095 return; 1096 } 1097 1098 /* 1099 * If there are threads already ref holding before the entry was 1100 * removed from hash table, then wait for ref count to drop to zero. 1101 */ 1102 while (fp->refcnt != 0) { 1103 delay(drv_usectohz(vsw_fdbe_refcnt_delay)); 1104 } 1105 1106 kmem_free(fp, sizeof (*fp)); 1107 } 1108 1109 /* 1110 * Search fdb for a given mac address. If an entry is found, hold 1111 * a reference to it and return the entry, else returns NULL. 1112 */ 1113 static vsw_fdbe_t * 1114 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp) 1115 { 1116 uint64_t key = 0; 1117 vsw_fdbe_t *fp; 1118 int rv; 1119 1120 KEY_HASH(key, addrp); 1121 1122 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key, 1123 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb); 1124 1125 if (rv != 0) 1126 return (NULL); 1127 1128 return (fp); 1129 } 1130 1131 /* 1132 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1133 * entry corresponding to the key (macaddr), this callback will be invoked by 1134 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1135 * entry before returning the found entry. 1136 */ 1137 static void 1138 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1139 { 1140 _NOTE(ARGUNUSED(key)) 1141 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val); 1142 } 1143 1144 /* 1145 * A given frame must be always tagged with the appropriate vlan id (unless it 1146 * is in the default-vlan) before the mac address switching function is called. 1147 * Otherwise, after switching function determines the destination, we cannot 1148 * figure out if the destination belongs to the the same vlan that the frame 1149 * originated from and if it needs tag/untag. Frames which are inbound from 1150 * the external(physical) network over a vlan trunk link are always tagged. 1151 * However frames which are received from a vnet-port over ldc or frames which 1152 * are coming down the stack on the service domain over vsw interface may be 1153 * untagged. These frames must be tagged with the appropriate pvid of the 1154 * sender (vnet-port or vsw device), before invoking the switching function. 1155 * 1156 * Arguments: 1157 * arg: caller of the function. 1158 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1159 * mp: frame(s) to be tagged. 1160 */ 1161 mblk_t * 1162 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp) 1163 { 1164 vsw_t *vswp; 1165 vsw_port_t *portp; 1166 struct ether_header *ehp; 1167 mblk_t *bp; 1168 mblk_t *bpt; 1169 mblk_t *bph; 1170 mblk_t *bpn; 1171 uint16_t pvid; 1172 1173 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1174 1175 if (type == VSW_LOCALDEV) { 1176 vswp = (vsw_t *)arg; 1177 pvid = vswp->pvid; 1178 portp = NULL; 1179 } else { 1180 /* VSW_VNETPORT */ 1181 portp = (vsw_port_t *)arg; 1182 pvid = portp->pvid; 1183 vswp = portp->p_vswp; 1184 } 1185 1186 bpn = bph = bpt = NULL; 1187 1188 for (bp = mp; bp != NULL; bp = bpn) { 1189 1190 bpn = bp->b_next; 1191 bp->b_next = bp->b_prev = NULL; 1192 1193 /* Determine if it is an untagged frame */ 1194 ehp = (struct ether_header *)bp->b_rptr; 1195 1196 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */ 1197 1198 /* no need to tag if the frame is in default vlan */ 1199 if (pvid != vswp->default_vlan_id) { 1200 bp = vnet_vlan_insert_tag(bp, pvid); 1201 if (bp == NULL) { 1202 continue; 1203 } 1204 } 1205 } 1206 1207 /* build a chain of processed packets */ 1208 if (bph == NULL) { 1209 bph = bpt = bp; 1210 } else { 1211 bpt->b_next = bp; 1212 bpt = bp; 1213 } 1214 1215 } 1216 1217 return (bph); 1218 } 1219 1220 /* 1221 * Frames destined to a vnet-port or to the local vsw interface, must be 1222 * untagged if necessary before sending. This function first checks that the 1223 * frame can be sent to the destination in the vlan identified by the frame 1224 * tag. Note that when this function is invoked the frame must have been 1225 * already tagged (unless it is in the default-vlan). Because, this function is 1226 * called when the switching function determines the destination and invokes 1227 * its send function (vnet-port or vsw interface) and all frames would have 1228 * been tagged by this time (see comments in vsw_vlan_frame_pretag()). 1229 * 1230 * Arguments: 1231 * arg: destination device. 1232 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1233 * np: head of pkt chain to be validated and untagged. 1234 * npt: tail of pkt chain to be validated and untagged. 1235 * 1236 * Returns: 1237 * np: head of updated chain of packets 1238 * npt: tail of updated chain of packets 1239 * rv: count of any packets dropped 1240 */ 1241 uint32_t 1242 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt) 1243 { 1244 mblk_t *bp; 1245 mblk_t *bpt; 1246 mblk_t *bph; 1247 mblk_t *bpn; 1248 vsw_port_t *portp; 1249 vsw_t *vswp; 1250 uint32_t count; 1251 struct ether_header *ehp; 1252 boolean_t is_tagged; 1253 boolean_t rv; 1254 uint16_t vlan_id; 1255 uint16_t pvid; 1256 mod_hash_t *vlan_hashp; 1257 1258 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1259 1260 if (type == VSW_LOCALDEV) { 1261 vswp = (vsw_t *)arg; 1262 pvid = vswp->pvid; 1263 vlan_hashp = vswp->vlan_hashp; 1264 portp = NULL; 1265 } else { 1266 /* type == VSW_VNETPORT */ 1267 portp = (vsw_port_t *)arg; 1268 vswp = portp->p_vswp; 1269 vlan_hashp = portp->vlan_hashp; 1270 pvid = portp->pvid; 1271 } 1272 1273 bpn = bph = bpt = NULL; 1274 count = 0; 1275 1276 for (bp = *np; bp != NULL; bp = bpn) { 1277 1278 bpn = bp->b_next; 1279 bp->b_next = bp->b_prev = NULL; 1280 1281 /* 1282 * Determine the vlan id that the frame belongs to. 1283 */ 1284 ehp = (struct ether_header *)bp->b_rptr; 1285 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id); 1286 1287 /* 1288 * Check if the destination is in the same vlan. 1289 */ 1290 rv = vsw_vlan_lookup(vlan_hashp, vlan_id); 1291 if (rv == B_FALSE) { 1292 /* drop the packet */ 1293 freemsg(bp); 1294 count++; 1295 continue; 1296 } 1297 1298 /* 1299 * Check the frame header if tag/untag is needed. 1300 */ 1301 if (is_tagged == B_FALSE) { 1302 /* 1303 * Untagged frame. We shouldn't have an untagged 1304 * packet at this point, unless the destination's 1305 * vlan id is default-vlan-id; if it is not the 1306 * default-vlan-id, we drop the packet. 1307 */ 1308 if (vlan_id != vswp->default_vlan_id) { 1309 /* drop the packet */ 1310 freemsg(bp); 1311 count++; 1312 continue; 1313 } 1314 } else { 1315 /* 1316 * Tagged frame, untag if it's the destination's pvid. 1317 */ 1318 if (vlan_id == pvid) { 1319 1320 bp = vnet_vlan_remove_tag(bp); 1321 if (bp == NULL) { 1322 /* packet dropped */ 1323 count++; 1324 continue; 1325 } 1326 } 1327 } 1328 1329 /* build a chain of processed packets */ 1330 if (bph == NULL) { 1331 bph = bpt = bp; 1332 } else { 1333 bpt->b_next = bp; 1334 bpt = bp; 1335 } 1336 1337 } 1338 1339 *np = bph; 1340 *npt = bpt; 1341 1342 return (count); 1343 } 1344 1345 /* 1346 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame, 1347 * then the vlan-id is available in the tag; otherwise, its vlan id is 1348 * implicitly obtained based on the caller (destination of the frame: 1349 * VSW_VNETPORT or VSW_LOCALDEV). 1350 * The vlan id determined is returned in vidp. 1351 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1352 */ 1353 boolean_t 1354 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 1355 uint16_t *vidp) 1356 { 1357 struct ether_vlan_header *evhp; 1358 vsw_t *vswp; 1359 vsw_port_t *portp; 1360 1361 /* If it's a tagged frame, get the vid from vlan header */ 1362 if (ehp->ether_type == ETHERTYPE_VLAN) { 1363 1364 evhp = (struct ether_vlan_header *)ehp; 1365 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1366 return (B_TRUE); 1367 } 1368 1369 /* Untagged frame; determine vlan id based on caller */ 1370 switch (caller) { 1371 1372 case VSW_VNETPORT: 1373 /* 1374 * packet destined to a vnet; vlan-id is pvid of vnet-port. 1375 */ 1376 portp = (vsw_port_t *)arg; 1377 *vidp = portp->pvid; 1378 break; 1379 1380 case VSW_LOCALDEV: 1381 1382 /* 1383 * packet destined to vsw interface; 1384 * vlan-id is port-vlan-id of vsw device. 1385 */ 1386 vswp = (vsw_t *)arg; 1387 *vidp = vswp->pvid; 1388 break; 1389 } 1390 1391 return (B_FALSE); 1392 } 1393 1394 /* 1395 * Add or remove multicast address(es). 1396 * 1397 * Returns 0 on success, 1 on failure. 1398 */ 1399 int 1400 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 1401 { 1402 mcst_addr_t *mcst_p = NULL; 1403 vsw_t *vswp = port->p_vswp; 1404 uint64_t addr = 0x0; 1405 int i; 1406 1407 D1(vswp, "%s: enter", __func__); 1408 1409 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 1410 1411 for (i = 0; i < mcst_pkt->count; i++) { 1412 /* 1413 * Convert address into form that can be used 1414 * as hash table key. 1415 */ 1416 KEY_HASH(addr, &(mcst_pkt->mca[i])); 1417 1418 /* 1419 * Add or delete the specified address/port combination. 1420 */ 1421 if (mcst_pkt->set == 0x1) { 1422 D3(vswp, "%s: adding multicast address 0x%llx for " 1423 "port %ld", __func__, addr, port->p_instance); 1424 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1425 /* 1426 * Update the list of multicast 1427 * addresses contained within the 1428 * port structure to include this new 1429 * one. 1430 */ 1431 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 1432 KM_NOSLEEP); 1433 if (mcst_p == NULL) { 1434 DERR(vswp, "%s: unable to alloc mem", 1435 __func__); 1436 (void) vsw_del_mcst(vswp, 1437 VSW_VNETPORT, addr, port); 1438 return (1); 1439 } 1440 1441 mcst_p->nextp = NULL; 1442 mcst_p->addr = addr; 1443 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 1444 1445 /* 1446 * Program the address into HW. If the addr 1447 * has already been programmed then the MAC 1448 * just increments a ref counter (which is 1449 * used when the address is being deleted) 1450 */ 1451 mutex_enter(&vswp->mac_lock); 1452 if (vswp->mh != NULL) { 1453 if (mac_multicst_add(vswp->mh, 1454 (uchar_t *)&mcst_pkt->mca[i])) { 1455 mutex_exit(&vswp->mac_lock); 1456 cmn_err(CE_WARN, "!vsw%d: " 1457 "unable to add multicast " 1458 "address: %s\n", 1459 vswp->instance, 1460 ether_sprintf((void *) 1461 &mcst_p->mca)); 1462 (void) vsw_del_mcst(vswp, 1463 VSW_VNETPORT, addr, port); 1464 kmem_free(mcst_p, 1465 sizeof (*mcst_p)); 1466 return (1); 1467 } 1468 mcst_p->mac_added = B_TRUE; 1469 } 1470 mutex_exit(&vswp->mac_lock); 1471 1472 mutex_enter(&port->mca_lock); 1473 mcst_p->nextp = port->mcap; 1474 port->mcap = mcst_p; 1475 mutex_exit(&port->mca_lock); 1476 1477 } else { 1478 DERR(vswp, "%s: error adding multicast " 1479 "address 0x%llx for port %ld", 1480 __func__, addr, port->p_instance); 1481 return (1); 1482 } 1483 } else { 1484 /* 1485 * Delete an entry from the multicast hash 1486 * table and update the address list 1487 * appropriately. 1488 */ 1489 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1490 D3(vswp, "%s: deleting multicast address " 1491 "0x%llx for port %ld", __func__, addr, 1492 port->p_instance); 1493 1494 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 1495 ASSERT(mcst_p != NULL); 1496 1497 /* 1498 * Remove the address from HW. The address 1499 * will actually only be removed once the ref 1500 * count within the MAC layer has dropped to 1501 * zero. I.e. we can safely call this fn even 1502 * if other ports are interested in this 1503 * address. 1504 */ 1505 mutex_enter(&vswp->mac_lock); 1506 if (vswp->mh != NULL && mcst_p->mac_added) { 1507 if (mac_multicst_remove(vswp->mh, 1508 (uchar_t *)&mcst_pkt->mca[i])) { 1509 mutex_exit(&vswp->mac_lock); 1510 cmn_err(CE_WARN, "!vsw%d: " 1511 "unable to remove mcast " 1512 "address: %s\n", 1513 vswp->instance, 1514 ether_sprintf((void *) 1515 &mcst_p->mca)); 1516 kmem_free(mcst_p, 1517 sizeof (*mcst_p)); 1518 return (1); 1519 } 1520 mcst_p->mac_added = B_FALSE; 1521 } 1522 mutex_exit(&vswp->mac_lock); 1523 kmem_free(mcst_p, sizeof (*mcst_p)); 1524 1525 } else { 1526 DERR(vswp, "%s: error deleting multicast " 1527 "addr 0x%llx for port %ld", 1528 __func__, addr, port->p_instance); 1529 return (1); 1530 } 1531 } 1532 } 1533 D1(vswp, "%s: exit", __func__); 1534 return (0); 1535 } 1536 1537 /* 1538 * Add a new multicast entry. 1539 * 1540 * Search hash table based on address. If match found then 1541 * update associated val (which is chain of ports), otherwise 1542 * create new key/val (addr/port) pair and insert into table. 1543 */ 1544 int 1545 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1546 { 1547 int dup = 0; 1548 int rv = 0; 1549 mfdb_ent_t *ment = NULL; 1550 mfdb_ent_t *tmp_ent = NULL; 1551 mfdb_ent_t *new_ent = NULL; 1552 void *tgt = NULL; 1553 1554 if (devtype == VSW_VNETPORT) { 1555 /* 1556 * Being invoked from a vnet. 1557 */ 1558 ASSERT(arg != NULL); 1559 tgt = arg; 1560 D2(NULL, "%s: port %d : address 0x%llx", __func__, 1561 ((vsw_port_t *)arg)->p_instance, addr); 1562 } else { 1563 /* 1564 * We are being invoked via the m_multicst mac entry 1565 * point. 1566 */ 1567 D2(NULL, "%s: address 0x%llx", __func__, addr); 1568 tgt = (void *)vswp; 1569 } 1570 1571 WRITE_ENTER(&vswp->mfdbrw); 1572 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1573 (mod_hash_val_t *)&ment) != 0) { 1574 1575 /* address not currently in table */ 1576 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1577 ment->d_addr = (void *)tgt; 1578 ment->d_type = devtype; 1579 ment->nextp = NULL; 1580 1581 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 1582 (mod_hash_val_t)ment) != 0) { 1583 DERR(vswp, "%s: hash table insertion failed", __func__); 1584 kmem_free(ment, sizeof (mfdb_ent_t)); 1585 rv = 1; 1586 } else { 1587 D2(vswp, "%s: added initial entry for 0x%llx to " 1588 "table", __func__, addr); 1589 } 1590 } else { 1591 /* 1592 * Address in table. Check to see if specified port 1593 * is already associated with the address. If not add 1594 * it now. 1595 */ 1596 tmp_ent = ment; 1597 while (tmp_ent != NULL) { 1598 if (tmp_ent->d_addr == (void *)tgt) { 1599 if (devtype == VSW_VNETPORT) { 1600 DERR(vswp, "%s: duplicate port entry " 1601 "found for portid %ld and key " 1602 "0x%llx", __func__, 1603 ((vsw_port_t *)arg)->p_instance, 1604 addr); 1605 } else { 1606 DERR(vswp, "%s: duplicate entry found" 1607 "for key 0x%llx", __func__, addr); 1608 } 1609 rv = 1; 1610 dup = 1; 1611 break; 1612 } 1613 tmp_ent = tmp_ent->nextp; 1614 } 1615 1616 /* 1617 * Port not on list so add it to end now. 1618 */ 1619 if (0 == dup) { 1620 D2(vswp, "%s: added entry for 0x%llx to table", 1621 __func__, addr); 1622 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1623 new_ent->d_addr = (void *)tgt; 1624 new_ent->d_type = devtype; 1625 new_ent->nextp = NULL; 1626 1627 tmp_ent = ment; 1628 while (tmp_ent->nextp != NULL) 1629 tmp_ent = tmp_ent->nextp; 1630 1631 tmp_ent->nextp = new_ent; 1632 } 1633 } 1634 1635 RW_EXIT(&vswp->mfdbrw); 1636 return (rv); 1637 } 1638 1639 /* 1640 * Remove a multicast entry from the hashtable. 1641 * 1642 * Search hash table based on address. If match found, scan 1643 * list of ports associated with address. If specified port 1644 * found remove it from list. 1645 */ 1646 int 1647 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1648 { 1649 mfdb_ent_t *ment = NULL; 1650 mfdb_ent_t *curr_p, *prev_p; 1651 void *tgt = NULL; 1652 1653 D1(vswp, "%s: enter", __func__); 1654 1655 if (devtype == VSW_VNETPORT) { 1656 tgt = (vsw_port_t *)arg; 1657 D2(vswp, "%s: removing port %d from mFDB for address" 1658 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 1659 } else { 1660 D2(vswp, "%s: removing entry", __func__); 1661 tgt = (void *)vswp; 1662 } 1663 1664 WRITE_ENTER(&vswp->mfdbrw); 1665 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1666 (mod_hash_val_t *)&ment) != 0) { 1667 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 1668 RW_EXIT(&vswp->mfdbrw); 1669 return (1); 1670 } 1671 1672 prev_p = curr_p = ment; 1673 1674 while (curr_p != NULL) { 1675 if (curr_p->d_addr == (void *)tgt) { 1676 if (devtype == VSW_VNETPORT) { 1677 D2(vswp, "%s: port %d found", __func__, 1678 ((vsw_port_t *)tgt)->p_instance); 1679 } else { 1680 D2(vswp, "%s: instance found", __func__); 1681 } 1682 1683 if (prev_p == curr_p) { 1684 /* 1685 * head of list, if no other element is in 1686 * list then destroy this entry, otherwise 1687 * just replace it with updated value. 1688 */ 1689 ment = curr_p->nextp; 1690 if (ment == NULL) { 1691 (void) mod_hash_destroy(vswp->mfdb, 1692 (mod_hash_val_t)addr); 1693 } else { 1694 (void) mod_hash_replace(vswp->mfdb, 1695 (mod_hash_key_t)addr, 1696 (mod_hash_val_t)ment); 1697 } 1698 } else { 1699 /* 1700 * Not head of list, no need to do 1701 * replacement, just adjust list pointers. 1702 */ 1703 prev_p->nextp = curr_p->nextp; 1704 } 1705 break; 1706 } 1707 1708 prev_p = curr_p; 1709 curr_p = curr_p->nextp; 1710 } 1711 1712 RW_EXIT(&vswp->mfdbrw); 1713 1714 D1(vswp, "%s: exit", __func__); 1715 1716 if (curr_p == NULL) 1717 return (1); 1718 kmem_free(curr_p, sizeof (mfdb_ent_t)); 1719 return (0); 1720 } 1721 1722 /* 1723 * Port is being deleted, but has registered an interest in one 1724 * or more multicast groups. Using the list of addresses maintained 1725 * within the port structure find the appropriate entry in the hash 1726 * table and remove this port from the list of interested ports. 1727 */ 1728 void 1729 vsw_del_mcst_port(vsw_port_t *port) 1730 { 1731 mcst_addr_t *mcap = NULL; 1732 vsw_t *vswp = port->p_vswp; 1733 1734 D1(vswp, "%s: enter", __func__); 1735 1736 mutex_enter(&port->mca_lock); 1737 1738 while ((mcap = port->mcap) != NULL) { 1739 1740 port->mcap = mcap->nextp; 1741 1742 mutex_exit(&port->mca_lock); 1743 1744 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 1745 mcap->addr, port); 1746 1747 /* 1748 * Remove the address from HW. The address 1749 * will actually only be removed once the ref 1750 * count within the MAC layer has dropped to 1751 * zero. I.e. we can safely call this fn even 1752 * if other ports are interested in this 1753 * address. 1754 */ 1755 mutex_enter(&vswp->mac_lock); 1756 if (vswp->mh != NULL && mcap->mac_added) { 1757 (void) mac_multicst_remove(vswp->mh, 1758 (uchar_t *)&mcap->mca); 1759 } 1760 mutex_exit(&vswp->mac_lock); 1761 1762 kmem_free(mcap, sizeof (*mcap)); 1763 1764 mutex_enter(&port->mca_lock); 1765 1766 } 1767 1768 mutex_exit(&port->mca_lock); 1769 1770 D1(vswp, "%s: exit", __func__); 1771 } 1772 1773 /* 1774 * This vsw instance is detaching, but has registered an interest in one 1775 * or more multicast groups. Using the list of addresses maintained 1776 * within the vsw structure find the appropriate entry in the hash 1777 * table and remove this instance from the list of interested ports. 1778 */ 1779 void 1780 vsw_del_mcst_vsw(vsw_t *vswp) 1781 { 1782 mcst_addr_t *next_p = NULL; 1783 1784 D1(vswp, "%s: enter", __func__); 1785 1786 mutex_enter(&vswp->mca_lock); 1787 1788 while (vswp->mcap != NULL) { 1789 DERR(vswp, "%s: deleting addr 0x%llx", 1790 __func__, vswp->mcap->addr); 1791 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 1792 1793 next_p = vswp->mcap->nextp; 1794 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 1795 vswp->mcap = next_p; 1796 } 1797 1798 vswp->mcap = NULL; 1799 mutex_exit(&vswp->mca_lock); 1800 1801 D1(vswp, "%s: exit", __func__); 1802 } 1803 1804 static uint32_t 1805 vsw_get_same_dest_list(struct ether_header *ehp, 1806 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp) 1807 { 1808 uint32_t count = 0; 1809 mblk_t *bp; 1810 mblk_t *nbp; 1811 mblk_t *head = NULL; 1812 mblk_t *tail = NULL; 1813 mblk_t *prev = NULL; 1814 struct ether_header *behp; 1815 1816 /* process the chain of packets */ 1817 bp = *mpp; 1818 while (bp) { 1819 nbp = bp->b_next; 1820 behp = (struct ether_header *)bp->b_rptr; 1821 bp->b_prev = NULL; 1822 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 1823 if (prev == NULL) { 1824 *mpp = nbp; 1825 } else { 1826 prev->b_next = nbp; 1827 } 1828 bp->b_next = NULL; 1829 if (head == NULL) { 1830 head = tail = bp; 1831 } else { 1832 tail->b_next = bp; 1833 tail = bp; 1834 } 1835 count++; 1836 } else { 1837 prev = bp; 1838 } 1839 bp = nbp; 1840 } 1841 *rhead = head; 1842 *rtail = tail; 1843 DTRACE_PROBE1(vsw_same_dest, int, count); 1844 return (count); 1845 } 1846 1847 static mblk_t * 1848 vsw_dupmsgchain(mblk_t *mp) 1849 { 1850 mblk_t *nmp = NULL; 1851 mblk_t **nmpp = &nmp; 1852 1853 for (; mp != NULL; mp = mp->b_next) { 1854 if ((*nmpp = dupmsg(mp)) == NULL) { 1855 freemsgchain(nmp); 1856 return (NULL); 1857 } 1858 1859 nmpp = &((*nmpp)->b_next); 1860 } 1861 1862 return (nmp); 1863 } 1864