1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/errno.h> 31 #include <sys/debug.h> 32 #include <sys/time.h> 33 #include <sys/sysmacros.h> 34 #include <sys/systm.h> 35 #include <sys/user.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strlog.h> 39 #include <sys/strsubr.h> 40 #include <sys/cmn_err.h> 41 #include <sys/cpu.h> 42 #include <sys/kmem.h> 43 #include <sys/conf.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/ksynch.h> 47 #include <sys/stat.h> 48 #include <sys/kstat.h> 49 #include <sys/vtrace.h> 50 #include <sys/strsun.h> 51 #include <sys/dlpi.h> 52 #include <sys/ethernet.h> 53 #include <net/if.h> 54 #include <sys/varargs.h> 55 #include <sys/machsystm.h> 56 #include <sys/modctl.h> 57 #include <sys/modhash.h> 58 #include <sys/mac.h> 59 #include <sys/mac_ether.h> 60 #include <sys/taskq.h> 61 #include <sys/note.h> 62 #include <sys/mach_descrip.h> 63 #include <sys/mac.h> 64 #include <sys/mdeg.h> 65 #include <sys/ldc.h> 66 #include <sys/vsw_fdb.h> 67 #include <sys/vsw.h> 68 #include <sys/vio_mailbox.h> 69 #include <sys/vnet_mailbox.h> 70 #include <sys/vnet_common.h> 71 #include <sys/vio_util.h> 72 #include <sys/sdt.h> 73 #include <sys/atomic.h> 74 #include <sys/vlan.h> 75 76 /* Switching setup routines */ 77 void vsw_setup_switching_timeout(void *arg); 78 void vsw_stop_switching_timeout(vsw_t *vswp); 79 int vsw_setup_switching(vsw_t *); 80 static int vsw_setup_layer2(vsw_t *); 81 static int vsw_setup_layer3(vsw_t *); 82 83 /* Switching/data transmit routines */ 84 static void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 85 vsw_port_t *port, mac_resource_handle_t); 86 static void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 87 vsw_port_t *port, mac_resource_handle_t); 88 static int vsw_forward_all(vsw_t *vswp, mblk_t *mp, 89 int caller, vsw_port_t *port); 90 static int vsw_forward_grp(vsw_t *vswp, mblk_t *mp, 91 int caller, vsw_port_t *port); 92 93 /* VLAN routines */ 94 void vsw_create_vlans(void *arg, int type); 95 void vsw_destroy_vlans(void *arg, int type); 96 void vsw_vlan_add_ids(void *arg, int type); 97 void vsw_vlan_remove_ids(void *arg, int type); 98 static void vsw_vlan_create_hash(void *arg, int type); 99 static void vsw_vlan_destroy_hash(void *arg, int type); 100 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 101 uint16_t *vidp); 102 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp); 103 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt); 104 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid); 105 106 /* Forwarding database (FDB) routines */ 107 void vsw_fdbe_add(vsw_t *vswp, void *port); 108 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr); 109 static vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *); 110 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val); 111 112 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *); 113 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *); 114 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *); 115 void vsw_del_mcst_vsw(vsw_t *); 116 117 /* Support functions */ 118 static mblk_t *vsw_dupmsgchain(mblk_t *mp); 119 static uint32_t vsw_get_same_dest_list(struct ether_header *ehp, 120 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp); 121 122 123 /* 124 * Functions imported from other files. 125 */ 126 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *); 127 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t); 128 extern int vsw_mac_open(vsw_t *vswp); 129 extern void vsw_mac_close(vsw_t *vswp); 130 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh, 131 mblk_t *mp, vsw_macrx_flags_t flags); 132 extern void vsw_set_addrs(vsw_t *vswp); 133 extern int vsw_get_hw_maddr(vsw_t *); 134 extern int vsw_mac_attach(vsw_t *vswp); 135 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp, mblk_t *mpt, 136 uint32_t count); 137 extern void vsw_hio_init(vsw_t *vswp); 138 extern void vsw_hio_start_ports(vsw_t *vswp); 139 140 /* 141 * Tunables used in this file. 142 */ 143 extern int vsw_setup_switching_delay; 144 extern uint32_t vsw_vlan_nchains; 145 extern uint32_t vsw_fdbe_refcnt_delay; 146 147 #define VSW_FDBE_REFHOLD(p) \ 148 { \ 149 atomic_inc_32(&(p)->refcnt); \ 150 ASSERT((p)->refcnt != 0); \ 151 } 152 153 #define VSW_FDBE_REFRELE(p) \ 154 { \ 155 ASSERT((p)->refcnt != 0); \ 156 atomic_dec_32(&(p)->refcnt); \ 157 } 158 159 /* 160 * Timeout routine to setup switching mode: 161 * vsw_setup_switching() is invoked from vsw_attach() or vsw_update_md_prop() 162 * initially. If it fails and the error is EAGAIN, then this timeout handler 163 * is started to retry vsw_setup_switching(). vsw_setup_switching() is retried 164 * until we successfully finish it; or the returned error is not EAGAIN. 165 */ 166 void 167 vsw_setup_switching_timeout(void *arg) 168 { 169 vsw_t *vswp = (vsw_t *)arg; 170 int rv; 171 172 if (vswp->swtmout_enabled == B_FALSE) 173 return; 174 175 rv = vsw_setup_switching(vswp); 176 177 if (rv == 0) { 178 /* 179 * Successfully setup switching mode. 180 * Program unicst, mcst addrs of vsw 181 * interface and ports in the physdev. 182 */ 183 vsw_set_addrs(vswp); 184 185 /* Start HIO for ports that have already connected */ 186 vsw_hio_start_ports(vswp); 187 } 188 189 mutex_enter(&vswp->swtmout_lock); 190 191 if (rv == EAGAIN && vswp->swtmout_enabled == B_TRUE) { 192 /* 193 * Reschedule timeout() if the error is EAGAIN and the 194 * timeout is still enabled. For errors other than EAGAIN, 195 * we simply return without rescheduling timeout(). 196 */ 197 vswp->swtmout_id = 198 timeout(vsw_setup_switching_timeout, vswp, 199 (vsw_setup_switching_delay * drv_usectohz(MICROSEC))); 200 goto exit; 201 } 202 203 /* timeout handler completed */ 204 vswp->swtmout_enabled = B_FALSE; 205 vswp->swtmout_id = 0; 206 207 exit: 208 mutex_exit(&vswp->swtmout_lock); 209 } 210 211 /* 212 * Cancel the timeout handler to setup switching mode. 213 */ 214 void 215 vsw_stop_switching_timeout(vsw_t *vswp) 216 { 217 timeout_id_t tid; 218 219 mutex_enter(&vswp->swtmout_lock); 220 221 tid = vswp->swtmout_id; 222 223 if (tid != 0) { 224 /* signal timeout handler to stop */ 225 vswp->swtmout_enabled = B_FALSE; 226 vswp->swtmout_id = 0; 227 mutex_exit(&vswp->swtmout_lock); 228 229 (void) untimeout(tid); 230 } else { 231 mutex_exit(&vswp->swtmout_lock); 232 } 233 234 (void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE); 235 236 mutex_enter(&vswp->mac_lock); 237 vswp->mac_open_retries = 0; 238 mutex_exit(&vswp->mac_lock); 239 } 240 241 /* 242 * Setup the required switching mode. 243 * This routine is invoked from vsw_attach() or vsw_update_md_prop() 244 * initially. If it fails and the error is EAGAIN, then a timeout handler 245 * is started to retry vsw_setup_switching(), until it successfully finishes; 246 * or the returned error is not EAGAIN. 247 * 248 * Returns: 249 * 0 on success. 250 * EAGAIN if retry is needed. 251 * 1 on all other failures. 252 */ 253 int 254 vsw_setup_switching(vsw_t *vswp) 255 { 256 int i, rv = 1; 257 258 D1(vswp, "%s: enter", __func__); 259 260 /* 261 * Select best switching mode. 262 * Note that we start from the saved smode_idx. This is done as 263 * this routine can be called from the timeout handler to retry 264 * setting up a specific mode. Currently only the function which 265 * sets up layer2/promisc mode returns EAGAIN if the underlying 266 * physical device is not available yet, causing retries. 267 */ 268 for (i = vswp->smode_idx; i < vswp->smode_num; i++) { 269 vswp->smode_idx = i; 270 switch (vswp->smode[i]) { 271 case VSW_LAYER2: 272 case VSW_LAYER2_PROMISC: 273 rv = vsw_setup_layer2(vswp); 274 break; 275 276 case VSW_LAYER3: 277 rv = vsw_setup_layer3(vswp); 278 break; 279 280 default: 281 DERR(vswp, "unknown switch mode"); 282 break; 283 } 284 285 if ((rv == 0) || (rv == EAGAIN)) 286 break; 287 288 /* all other errors(rv != 0): continue & select the next mode */ 289 rv = 1; 290 } 291 292 if (rv && (rv != EAGAIN)) { 293 cmn_err(CE_WARN, "!vsw%d: Unable to setup specified " 294 "switching mode", vswp->instance); 295 } else if (rv == 0) { 296 (void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE); 297 } 298 299 D2(vswp, "%s: Operating in mode %d", __func__, 300 vswp->smode[vswp->smode_idx]); 301 302 D1(vswp, "%s: exit", __func__); 303 304 return (rv); 305 } 306 307 /* 308 * Setup for layer 2 switching. 309 * 310 * Returns: 311 * 0 on success. 312 * EAGAIN if retry is needed. 313 * EIO on all other failures. 314 */ 315 static int 316 vsw_setup_layer2(vsw_t *vswp) 317 { 318 int rv; 319 320 D1(vswp, "%s: enter", __func__); 321 322 vswp->vsw_switch_frame = vsw_switch_l2_frame; 323 324 rv = strlen(vswp->physname); 325 if (rv == 0) { 326 /* 327 * Physical device name is NULL, which is 328 * required for layer 2. 329 */ 330 cmn_err(CE_WARN, "!vsw%d: no physical device name specified", 331 vswp->instance); 332 return (EIO); 333 } 334 335 mutex_enter(&vswp->mac_lock); 336 337 rv = vsw_mac_open(vswp); 338 if (rv != 0) { 339 if (rv != EAGAIN) { 340 cmn_err(CE_WARN, "!vsw%d: Unable to open physical " 341 "device: %s\n", vswp->instance, vswp->physname); 342 } 343 mutex_exit(&vswp->mac_lock); 344 return (rv); 345 } 346 347 if (vswp->smode[vswp->smode_idx] == VSW_LAYER2) { 348 /* 349 * Verify that underlying device can support multiple 350 * unicast mac addresses. 351 */ 352 rv = vsw_get_hw_maddr(vswp); 353 if (rv != 0) { 354 goto exit_error; 355 } 356 } 357 358 /* 359 * Attempt to link into the MAC layer so we can get 360 * and send packets out over the physical adapter. 361 */ 362 rv = vsw_mac_attach(vswp); 363 if (rv != 0) { 364 /* 365 * Registration with the MAC layer has failed, 366 * so return error so that can fall back to next 367 * prefered switching method. 368 */ 369 cmn_err(CE_WARN, "!vsw%d: Unable to setup physical device: " 370 "%s\n", vswp->instance, vswp->physname); 371 goto exit_error; 372 } 373 374 D1(vswp, "%s: exit", __func__); 375 376 mutex_exit(&vswp->mac_lock); 377 378 /* Initialize HybridIO related stuff */ 379 vsw_hio_init(vswp); 380 return (0); 381 382 exit_error: 383 vsw_mac_close(vswp); 384 mutex_exit(&vswp->mac_lock); 385 return (EIO); 386 } 387 388 static int 389 vsw_setup_layer3(vsw_t *vswp) 390 { 391 D1(vswp, "%s: enter", __func__); 392 393 D2(vswp, "%s: operating in layer 3 mode", __func__); 394 vswp->vsw_switch_frame = vsw_switch_l3_frame; 395 396 D1(vswp, "%s: exit", __func__); 397 398 return (0); 399 } 400 401 /* 402 * Switch the given ethernet frame when operating in layer 2 mode. 403 * 404 * vswp: pointer to the vsw instance 405 * mp: pointer to chain of ethernet frame(s) to be switched 406 * caller: identifies the source of this frame as: 407 * 1. VSW_VNETPORT - a vsw port (connected to a vnet). 408 * 2. VSW_PHYSDEV - the physical ethernet device 409 * 3. VSW_LOCALDEV - vsw configured as a virtual interface 410 * arg: argument provided by the caller. 411 * 1. for VNETPORT - pointer to the corresponding vsw_port_t. 412 * 2. for PHYSDEV - NULL 413 * 3. for LOCALDEV - pointer to to this vsw_t(self) 414 */ 415 void 416 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller, 417 vsw_port_t *arg, mac_resource_handle_t mrh) 418 { 419 struct ether_header *ehp; 420 mblk_t *bp, *ret_m; 421 mblk_t *mpt = NULL; 422 uint32_t count; 423 vsw_fdbe_t *fp; 424 425 D1(vswp, "%s: enter (caller %d)", __func__, caller); 426 427 /* 428 * PERF: rather than breaking up the chain here, scan it 429 * to find all mblks heading to same destination and then 430 * pass that sub-chain to the lower transmit functions. 431 */ 432 433 /* process the chain of packets */ 434 bp = mp; 435 while (bp) { 436 ehp = (struct ether_header *)bp->b_rptr; 437 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 438 ASSERT(count != 0); 439 440 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 441 __func__, MBLKSIZE(mp), MBLKL(mp)); 442 443 if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) { 444 /* 445 * If destination is VSW_LOCALDEV (vsw as an eth 446 * interface) and if the device is up & running, 447 * send the packet up the stack on this host. 448 * If the virtual interface is down, drop the packet. 449 */ 450 if (caller != VSW_LOCALDEV) { 451 vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG); 452 } else { 453 freemsgchain(mp); 454 } 455 continue; 456 } 457 458 /* 459 * Find fdb entry for the destination 460 * and hold a reference to it. 461 */ 462 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 463 if (fp != NULL) { 464 465 /* 466 * If plumbed and in promisc mode then copy msg 467 * and send up the stack. 468 */ 469 vsw_mac_rx(vswp, mrh, mp, 470 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 471 472 /* 473 * If the destination is in FDB, the packet 474 * should be forwarded to the correponding 475 * vsw_port (connected to a vnet device - 476 * VSW_VNETPORT) 477 */ 478 (void) vsw_portsend(fp->portp, mp, mpt, count); 479 480 /* Release the reference on the fdb entry */ 481 VSW_FDBE_REFRELE(fp); 482 } else { 483 /* 484 * Destination not in FDB. 485 * 486 * If the destination is broadcast or 487 * multicast forward the packet to all 488 * (VNETPORTs, PHYSDEV, LOCALDEV), 489 * except the caller. 490 */ 491 if (IS_BROADCAST(ehp)) { 492 D2(vswp, "%s: BROADCAST pkt", __func__); 493 (void) vsw_forward_all(vswp, mp, caller, arg); 494 } else if (IS_MULTICAST(ehp)) { 495 D2(vswp, "%s: MULTICAST pkt", __func__); 496 (void) vsw_forward_grp(vswp, mp, caller, arg); 497 } else { 498 /* 499 * If the destination is unicast, and came 500 * from either a logical network device or 501 * the switch itself when it is plumbed, then 502 * send it out on the physical device and also 503 * up the stack if the logical interface is 504 * in promiscious mode. 505 * 506 * NOTE: The assumption here is that if we 507 * cannot find the destination in our fdb, its 508 * a unicast address, and came from either a 509 * vnet or down the stack (when plumbed) it 510 * must be destinded for an ethernet device 511 * outside our ldoms. 512 */ 513 if (caller == VSW_VNETPORT) { 514 /* promisc check copy etc */ 515 vsw_mac_rx(vswp, mrh, mp, 516 VSW_MACRX_PROMISC | 517 VSW_MACRX_COPYMSG); 518 519 if ((ret_m = vsw_tx_msg(vswp, mp)) 520 != NULL) { 521 DERR(vswp, "%s: drop mblks to " 522 "phys dev", __func__); 523 freemsgchain(ret_m); 524 } 525 526 } else if (caller == VSW_PHYSDEV) { 527 /* 528 * Pkt seen because card in promisc 529 * mode. Send up stack if plumbed in 530 * promisc mode, else drop it. 531 */ 532 vsw_mac_rx(vswp, mrh, mp, 533 VSW_MACRX_PROMISC | 534 VSW_MACRX_FREEMSG); 535 536 } else if (caller == VSW_LOCALDEV) { 537 /* 538 * Pkt came down the stack, send out 539 * over physical device. 540 */ 541 if ((ret_m = vsw_tx_msg(vswp, mp)) 542 != NULL) { 543 DERR(vswp, "%s: drop mblks to " 544 "phys dev", __func__); 545 freemsgchain(ret_m); 546 } 547 } 548 } 549 } 550 } 551 D1(vswp, "%s: exit\n", __func__); 552 } 553 554 /* 555 * Switch ethernet frame when in layer 3 mode (i.e. using IP 556 * layer to do the routing). 557 * 558 * There is a large amount of overlap between this function and 559 * vsw_switch_l2_frame. At some stage we need to revisit and refactor 560 * both these functions. 561 */ 562 void 563 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller, 564 vsw_port_t *arg, mac_resource_handle_t mrh) 565 { 566 struct ether_header *ehp; 567 mblk_t *bp = NULL; 568 mblk_t *mpt; 569 uint32_t count; 570 vsw_fdbe_t *fp; 571 572 D1(vswp, "%s: enter (caller %d)", __func__, caller); 573 574 /* 575 * In layer 3 mode should only ever be switching packets 576 * between IP layer and vnet devices. So make sure thats 577 * who is invoking us. 578 */ 579 if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) { 580 DERR(vswp, "%s: unexpected caller (%d)", __func__, caller); 581 freemsgchain(mp); 582 return; 583 } 584 585 /* process the chain of packets */ 586 bp = mp; 587 while (bp) { 588 ehp = (struct ether_header *)bp->b_rptr; 589 count = vsw_get_same_dest_list(ehp, &mp, &mpt, &bp); 590 ASSERT(count != 0); 591 592 D2(vswp, "%s: mblk data buffer %lld : actual data size %lld", 593 __func__, MBLKSIZE(mp), MBLKL(mp)); 594 595 /* 596 * Find fdb entry for the destination 597 * and hold a reference to it. 598 */ 599 fp = vsw_fdbe_find(vswp, &ehp->ether_dhost); 600 if (fp != NULL) { 601 602 D2(vswp, "%s: sending to target port", __func__); 603 (void) vsw_portsend(fp->portp, mp, mpt, count); 604 605 /* Release the reference on the fdb entry */ 606 VSW_FDBE_REFRELE(fp); 607 } else { 608 /* 609 * Destination not in FDB 610 * 611 * If the destination is broadcast or 612 * multicast forward the packet to all 613 * (VNETPORTs, PHYSDEV, LOCALDEV), 614 * except the caller. 615 */ 616 if (IS_BROADCAST(ehp)) { 617 D2(vswp, "%s: BROADCAST pkt", __func__); 618 (void) vsw_forward_all(vswp, mp, caller, arg); 619 } else if (IS_MULTICAST(ehp)) { 620 D2(vswp, "%s: MULTICAST pkt", __func__); 621 (void) vsw_forward_grp(vswp, mp, caller, arg); 622 } else { 623 /* 624 * Unicast pkt from vnet that we don't have 625 * an FDB entry for, so must be destinded for 626 * the outside world. Attempt to send up to the 627 * IP layer to allow it to deal with it. 628 */ 629 if (caller == VSW_VNETPORT) { 630 vsw_mac_rx(vswp, mrh, 631 mp, VSW_MACRX_FREEMSG); 632 } 633 } 634 } 635 } 636 637 D1(vswp, "%s: exit", __func__); 638 } 639 640 /* 641 * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV), 642 * except the caller (port on which frame arrived). 643 */ 644 static int 645 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 646 { 647 vsw_port_list_t *plist = &vswp->plist; 648 vsw_port_t *portp; 649 mblk_t *nmp = NULL; 650 mblk_t *ret_m = NULL; 651 int skip_port = 0; 652 653 D1(vswp, "vsw_forward_all: enter\n"); 654 655 /* 656 * Broadcast message from inside ldoms so send to outside 657 * world if in either of layer 2 modes. 658 */ 659 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 660 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 661 ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) { 662 663 nmp = vsw_dupmsgchain(mp); 664 if (nmp) { 665 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 666 DERR(vswp, "%s: dropping pkt(s) " 667 "consisting of %ld bytes of data for" 668 " physical device", __func__, MBLKL(ret_m)); 669 freemsgchain(ret_m); 670 } 671 } 672 } 673 674 if (caller == VSW_VNETPORT) 675 skip_port = 1; 676 677 /* 678 * Broadcast message from other vnet (layer 2 or 3) or outside 679 * world (layer 2 only), send up stack if plumbed. 680 */ 681 if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) { 682 vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG); 683 } 684 685 /* send it to all VNETPORTs */ 686 READ_ENTER(&plist->lockrw); 687 for (portp = plist->head; portp != NULL; portp = portp->p_next) { 688 D2(vswp, "vsw_forward_all: port %d", portp->p_instance); 689 /* 690 * Caution ! - don't reorder these two checks as arg 691 * will be NULL if the caller is PHYSDEV. skip_port is 692 * only set if caller is VNETPORT. 693 */ 694 if ((skip_port) && (portp == arg)) { 695 continue; 696 } else { 697 nmp = vsw_dupmsgchain(mp); 698 if (nmp) { 699 mblk_t *mpt = nmp; 700 uint32_t count = 1; 701 702 /* Find tail */ 703 while (mpt->b_next != NULL) { 704 mpt = mpt->b_next; 705 count++; 706 } 707 /* 708 * The plist->lockrw is protecting the 709 * portp from getting destroyed here. 710 * So, no ref_cnt is incremented here. 711 */ 712 (void) vsw_portsend(portp, nmp, mpt, count); 713 } else { 714 DERR(vswp, "vsw_forward_all: nmp NULL"); 715 } 716 } 717 } 718 RW_EXIT(&plist->lockrw); 719 720 freemsgchain(mp); 721 722 D1(vswp, "vsw_forward_all: exit\n"); 723 return (0); 724 } 725 726 /* 727 * Forward pkts to any devices or interfaces which have registered 728 * an interest in them (i.e. multicast groups). 729 */ 730 static int 731 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg) 732 { 733 struct ether_header *ehp = (struct ether_header *)mp->b_rptr; 734 mfdb_ent_t *entp = NULL; 735 mfdb_ent_t *tpp = NULL; 736 vsw_port_t *port; 737 uint64_t key = 0; 738 mblk_t *nmp = NULL; 739 mblk_t *ret_m = NULL; 740 boolean_t check_if = B_TRUE; 741 742 /* 743 * Convert address to hash table key 744 */ 745 KEY_HASH(key, &ehp->ether_dhost); 746 747 D1(vswp, "%s: key 0x%llx", __func__, key); 748 749 /* 750 * If pkt came from either a vnet or down the stack (if we are 751 * plumbed) and we are in layer 2 mode, then we send the pkt out 752 * over the physical adapter, and then check to see if any other 753 * vnets are interested in it. 754 */ 755 if (((vswp->smode[vswp->smode_idx] == VSW_LAYER2) || 756 (vswp->smode[vswp->smode_idx] == VSW_LAYER2_PROMISC)) && 757 ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) { 758 nmp = vsw_dupmsgchain(mp); 759 if (nmp) { 760 if ((ret_m = vsw_tx_msg(vswp, nmp)) != NULL) { 761 DERR(vswp, "%s: dropping pkt(s) consisting of " 762 "%ld bytes of data for physical device", 763 __func__, MBLKL(ret_m)); 764 freemsgchain(ret_m); 765 } 766 } 767 } 768 769 READ_ENTER(&vswp->mfdbrw); 770 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key, 771 (mod_hash_val_t *)&entp) != 0) { 772 D3(vswp, "%s: no table entry found for addr 0x%llx", 773 __func__, key); 774 } else { 775 /* 776 * Send to list of devices associated with this address... 777 */ 778 for (tpp = entp; tpp != NULL; tpp = tpp->nextp) { 779 780 /* dont send to ourselves */ 781 if ((caller == VSW_VNETPORT) && 782 (tpp->d_addr == (void *)arg)) { 783 port = (vsw_port_t *)tpp->d_addr; 784 D3(vswp, "%s: not sending to ourselves" 785 " : port %d", __func__, port->p_instance); 786 continue; 787 788 } else if ((caller == VSW_LOCALDEV) && 789 (tpp->d_type == VSW_LOCALDEV)) { 790 D2(vswp, "%s: not sending back up stack", 791 __func__); 792 continue; 793 } 794 795 if (tpp->d_type == VSW_VNETPORT) { 796 port = (vsw_port_t *)tpp->d_addr; 797 D3(vswp, "%s: sending to port %ld for addr " 798 "0x%llx", __func__, port->p_instance, key); 799 800 nmp = vsw_dupmsgchain(mp); 801 if (nmp) { 802 mblk_t *mpt = nmp; 803 uint32_t count = 1; 804 805 /* Find tail */ 806 while (mpt->b_next != NULL) { 807 mpt = mpt->b_next; 808 count++; 809 } 810 /* 811 * The vswp->mfdbrw is protecting the 812 * portp from getting destroyed here. 813 * So, no ref_cnt is incremented here. 814 */ 815 (void) vsw_portsend(port, nmp, mpt, 816 count); 817 } 818 } else { 819 vsw_mac_rx(vswp, NULL, 820 mp, VSW_MACRX_COPYMSG); 821 D2(vswp, "%s: sending up stack" 822 " for addr 0x%llx", __func__, key); 823 check_if = B_FALSE; 824 } 825 } 826 } 827 828 RW_EXIT(&vswp->mfdbrw); 829 830 /* 831 * If the pkt came from either a vnet or from physical device, 832 * and if we havent already sent the pkt up the stack then we 833 * check now if we can/should (i.e. the interface is plumbed 834 * and in promisc mode). 835 */ 836 if ((check_if) && 837 ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) { 838 vsw_mac_rx(vswp, NULL, mp, 839 VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG); 840 } 841 842 freemsgchain(mp); 843 844 D1(vswp, "%s: exit", __func__); 845 846 return (0); 847 } 848 849 /* 850 * This function creates the vlan id hash table for the given vsw device or 851 * port. It then adds each vlan that the device or port has been assigned, 852 * into this hash table. 853 * Arguments: 854 * arg: vsw device or port. 855 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 856 */ 857 void 858 vsw_create_vlans(void *arg, int type) 859 { 860 /* create vlan hash table */ 861 vsw_vlan_create_hash(arg, type); 862 863 /* add vlan ids of the vsw device into its hash table */ 864 vsw_vlan_add_ids(arg, type); 865 } 866 867 /* 868 * This function removes the vlan ids of the vsw device or port from its hash 869 * table. It then destroys the vlan hash table. 870 * Arguments: 871 * arg: vsw device or port. 872 * type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port). 873 */ 874 void 875 vsw_destroy_vlans(void *arg, int type) 876 { 877 /* remove vlan ids from the hash table */ 878 vsw_vlan_remove_ids(arg, type); 879 880 /* destroy vlan-hash-table */ 881 vsw_vlan_destroy_hash(arg, type); 882 } 883 884 /* 885 * Create a vlan-id hash table for the given vsw device or port. 886 */ 887 static void 888 vsw_vlan_create_hash(void *arg, int type) 889 { 890 char hashname[MAXNAMELEN]; 891 892 if (type == VSW_LOCALDEV) { 893 vsw_t *vswp = (vsw_t *)arg; 894 895 (void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash", 896 vswp->instance); 897 898 vswp->vlan_nchains = vsw_vlan_nchains; 899 vswp->vlan_hashp = mod_hash_create_idhash(hashname, 900 vswp->vlan_nchains, mod_hash_null_valdtor); 901 902 } else if (type == VSW_VNETPORT) { 903 vsw_port_t *portp = (vsw_port_t *)arg; 904 905 (void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash", 906 portp->p_instance); 907 908 portp->vlan_nchains = vsw_vlan_nchains; 909 portp->vlan_hashp = mod_hash_create_idhash(hashname, 910 portp->vlan_nchains, mod_hash_null_valdtor); 911 912 } else { 913 return; 914 } 915 } 916 917 /* 918 * Destroy the vlan-id hash table for the given vsw device or port. 919 */ 920 static void 921 vsw_vlan_destroy_hash(void *arg, int type) 922 { 923 if (type == VSW_LOCALDEV) { 924 vsw_t *vswp = (vsw_t *)arg; 925 926 mod_hash_destroy_hash(vswp->vlan_hashp); 927 vswp->vlan_nchains = 0; 928 } else if (type == VSW_VNETPORT) { 929 vsw_port_t *portp = (vsw_port_t *)arg; 930 931 mod_hash_destroy_hash(portp->vlan_hashp); 932 portp->vlan_nchains = 0; 933 } else { 934 return; 935 } 936 } 937 938 /* 939 * Add vlan ids of the given vsw device or port into its hash table. 940 */ 941 void 942 vsw_vlan_add_ids(void *arg, int type) 943 { 944 int rv; 945 int i; 946 947 if (type == VSW_LOCALDEV) { 948 vsw_t *vswp = (vsw_t *)arg; 949 950 rv = mod_hash_insert(vswp->vlan_hashp, 951 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 952 (mod_hash_val_t)B_TRUE); 953 ASSERT(rv == 0); 954 955 for (i = 0; i < vswp->nvids; i++) { 956 rv = mod_hash_insert(vswp->vlan_hashp, 957 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]), 958 (mod_hash_val_t)B_TRUE); 959 ASSERT(rv == 0); 960 } 961 962 } else if (type == VSW_VNETPORT) { 963 vsw_port_t *portp = (vsw_port_t *)arg; 964 965 rv = mod_hash_insert(portp->vlan_hashp, 966 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 967 (mod_hash_val_t)B_TRUE); 968 ASSERT(rv == 0); 969 970 for (i = 0; i < portp->nvids; i++) { 971 rv = mod_hash_insert(portp->vlan_hashp, 972 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 973 (mod_hash_val_t)B_TRUE); 974 ASSERT(rv == 0); 975 } 976 977 } else { 978 return; 979 } 980 } 981 982 /* 983 * Remove vlan ids of the given vsw device or port from its hash table. 984 */ 985 void 986 vsw_vlan_remove_ids(void *arg, int type) 987 { 988 mod_hash_val_t vp; 989 int rv; 990 int i; 991 992 if (type == VSW_LOCALDEV) { 993 vsw_t *vswp = (vsw_t *)arg; 994 995 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid); 996 if (rv == B_TRUE) { 997 rv = mod_hash_remove(vswp->vlan_hashp, 998 (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid), 999 (mod_hash_val_t *)&vp); 1000 ASSERT(rv == 0); 1001 } 1002 1003 for (i = 0; i < vswp->nvids; i++) { 1004 rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->vids[i]); 1005 if (rv == B_TRUE) { 1006 rv = mod_hash_remove(vswp->vlan_hashp, 1007 (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i]), 1008 (mod_hash_val_t *)&vp); 1009 ASSERT(rv == 0); 1010 } 1011 } 1012 1013 } else if (type == VSW_VNETPORT) { 1014 vsw_port_t *portp = (vsw_port_t *)arg; 1015 1016 portp = (vsw_port_t *)arg; 1017 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid); 1018 if (rv == B_TRUE) { 1019 rv = mod_hash_remove(portp->vlan_hashp, 1020 (mod_hash_key_t)VLAN_ID_KEY(portp->pvid), 1021 (mod_hash_val_t *)&vp); 1022 ASSERT(rv == 0); 1023 } 1024 1025 for (i = 0; i < portp->nvids; i++) { 1026 rv = vsw_vlan_lookup(portp->vlan_hashp, portp->vids[i]); 1027 if (rv == B_TRUE) { 1028 rv = mod_hash_remove(portp->vlan_hashp, 1029 (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i]), 1030 (mod_hash_val_t *)&vp); 1031 ASSERT(rv == 0); 1032 } 1033 } 1034 1035 } else { 1036 return; 1037 } 1038 } 1039 1040 /* 1041 * Find the given vlan id in the hash table. 1042 * Return: B_TRUE if the id is found; B_FALSE if not found. 1043 */ 1044 boolean_t 1045 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid) 1046 { 1047 int rv; 1048 mod_hash_val_t vp; 1049 1050 rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp); 1051 1052 if (rv != 0) 1053 return (B_FALSE); 1054 1055 return (B_TRUE); 1056 } 1057 1058 /* 1059 * Add an entry into FDB for the given vsw. 1060 */ 1061 void 1062 vsw_fdbe_add(vsw_t *vswp, void *port) 1063 { 1064 uint64_t addr = 0; 1065 vsw_port_t *portp; 1066 vsw_fdbe_t *fp; 1067 int rv; 1068 1069 portp = (vsw_port_t *)port; 1070 KEY_HASH(addr, &portp->p_macaddr); 1071 1072 fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP); 1073 fp->portp = port; 1074 1075 /* 1076 * Note: duplicate keys will be rejected by mod_hash. 1077 */ 1078 rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr, 1079 (mod_hash_val_t)fp); 1080 ASSERT(rv == 0); 1081 } 1082 1083 /* 1084 * Remove an entry from FDB. 1085 */ 1086 void 1087 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr) 1088 { 1089 uint64_t addr = 0; 1090 vsw_fdbe_t *fp; 1091 int rv; 1092 1093 KEY_HASH(addr, eaddr); 1094 1095 /* 1096 * Remove the entry from fdb hash table. 1097 * This prevents further references to this fdb entry. 1098 */ 1099 rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr, 1100 (mod_hash_val_t *)&fp); 1101 if (rv != 0) { 1102 /* invalid key? */ 1103 return; 1104 } 1105 1106 /* 1107 * If there are threads already ref holding before the entry was 1108 * removed from hash table, then wait for ref count to drop to zero. 1109 */ 1110 while (fp->refcnt != 0) { 1111 delay(drv_usectohz(vsw_fdbe_refcnt_delay)); 1112 } 1113 1114 kmem_free(fp, sizeof (*fp)); 1115 } 1116 1117 /* 1118 * Search fdb for a given mac address. If an entry is found, hold 1119 * a reference to it and return the entry, else returns NULL. 1120 */ 1121 static vsw_fdbe_t * 1122 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp) 1123 { 1124 uint64_t key = 0; 1125 vsw_fdbe_t *fp; 1126 int rv; 1127 1128 KEY_HASH(key, addrp); 1129 1130 rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key, 1131 (mod_hash_val_t *)&fp, vsw_fdbe_find_cb); 1132 1133 if (rv != 0) 1134 return (NULL); 1135 1136 return (fp); 1137 } 1138 1139 /* 1140 * Callback function provided to mod_hash_find_cb(). After finding the fdb 1141 * entry corresponding to the key (macaddr), this callback will be invoked by 1142 * mod_hash_find_cb() to atomically increment the reference count on the fdb 1143 * entry before returning the found entry. 1144 */ 1145 static void 1146 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val) 1147 { 1148 _NOTE(ARGUNUSED(key)) 1149 VSW_FDBE_REFHOLD((vsw_fdbe_t *)val); 1150 } 1151 1152 /* 1153 * A given frame must be always tagged with the appropriate vlan id (unless it 1154 * is in the default-vlan) before the mac address switching function is called. 1155 * Otherwise, after switching function determines the destination, we cannot 1156 * figure out if the destination belongs to the the same vlan that the frame 1157 * originated from and if it needs tag/untag. Frames which are inbound from 1158 * the external(physical) network over a vlan trunk link are always tagged. 1159 * However frames which are received from a vnet-port over ldc or frames which 1160 * are coming down the stack on the service domain over vsw interface may be 1161 * untagged. These frames must be tagged with the appropriate pvid of the 1162 * sender (vnet-port or vsw device), before invoking the switching function. 1163 * 1164 * Arguments: 1165 * arg: caller of the function. 1166 * type: type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1167 * mp: frame(s) to be tagged. 1168 */ 1169 mblk_t * 1170 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp) 1171 { 1172 vsw_t *vswp; 1173 vsw_port_t *portp; 1174 struct ether_header *ehp; 1175 mblk_t *bp; 1176 mblk_t *bpt; 1177 mblk_t *bph; 1178 mblk_t *bpn; 1179 uint16_t pvid; 1180 1181 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1182 1183 if (type == VSW_LOCALDEV) { 1184 vswp = (vsw_t *)arg; 1185 pvid = vswp->pvid; 1186 portp = NULL; 1187 } else { 1188 /* VSW_VNETPORT */ 1189 portp = (vsw_port_t *)arg; 1190 pvid = portp->pvid; 1191 vswp = portp->p_vswp; 1192 } 1193 1194 bpn = bph = bpt = NULL; 1195 1196 for (bp = mp; bp != NULL; bp = bpn) { 1197 1198 bpn = bp->b_next; 1199 bp->b_next = bp->b_prev = NULL; 1200 1201 /* Determine if it is an untagged frame */ 1202 ehp = (struct ether_header *)bp->b_rptr; 1203 1204 if (ehp->ether_type != ETHERTYPE_VLAN) { /* untagged */ 1205 1206 /* no need to tag if the frame is in default vlan */ 1207 if (pvid != vswp->default_vlan_id) { 1208 bp = vnet_vlan_insert_tag(bp, pvid); 1209 if (bp == NULL) { 1210 continue; 1211 } 1212 } 1213 } 1214 1215 /* build a chain of processed packets */ 1216 if (bph == NULL) { 1217 bph = bpt = bp; 1218 } else { 1219 bpt->b_next = bp; 1220 bpt = bp; 1221 } 1222 1223 } 1224 1225 return (bph); 1226 } 1227 1228 /* 1229 * Frames destined to a vnet-port or to the local vsw interface, must be 1230 * untagged if necessary before sending. This function first checks that the 1231 * frame can be sent to the destination in the vlan identified by the frame 1232 * tag. Note that when this function is invoked the frame must have been 1233 * already tagged (unless it is in the default-vlan). Because, this function is 1234 * called when the switching function determines the destination and invokes 1235 * its send function (vnet-port or vsw interface) and all frames would have 1236 * been tagged by this time (see comments in vsw_vlan_frame_pretag()). 1237 * 1238 * Arguments: 1239 * arg: destination device. 1240 * type: type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port) 1241 * np: head of pkt chain to be validated and untagged. 1242 * npt: tail of pkt chain to be validated and untagged. 1243 * 1244 * Returns: 1245 * np: head of updated chain of packets 1246 * npt: tail of updated chain of packets 1247 * rv: count of any packets dropped 1248 */ 1249 uint32_t 1250 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt) 1251 { 1252 mblk_t *bp; 1253 mblk_t *bpt; 1254 mblk_t *bph; 1255 mblk_t *bpn; 1256 vsw_port_t *portp; 1257 vsw_t *vswp; 1258 uint32_t count; 1259 struct ether_header *ehp; 1260 boolean_t is_tagged; 1261 boolean_t rv; 1262 uint16_t vlan_id; 1263 uint16_t pvid; 1264 mod_hash_t *vlan_hashp; 1265 1266 ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT)); 1267 1268 if (type == VSW_LOCALDEV) { 1269 vswp = (vsw_t *)arg; 1270 pvid = vswp->pvid; 1271 vlan_hashp = vswp->vlan_hashp; 1272 portp = NULL; 1273 } else { 1274 /* type == VSW_VNETPORT */ 1275 portp = (vsw_port_t *)arg; 1276 vswp = portp->p_vswp; 1277 vlan_hashp = portp->vlan_hashp; 1278 pvid = portp->pvid; 1279 } 1280 1281 bpn = bph = bpt = NULL; 1282 count = 0; 1283 1284 for (bp = *np; bp != NULL; bp = bpn) { 1285 1286 bpn = bp->b_next; 1287 bp->b_next = bp->b_prev = NULL; 1288 1289 /* 1290 * Determine the vlan id that the frame belongs to. 1291 */ 1292 ehp = (struct ether_header *)bp->b_rptr; 1293 is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id); 1294 1295 /* 1296 * Check if the destination is in the same vlan. 1297 */ 1298 rv = vsw_vlan_lookup(vlan_hashp, vlan_id); 1299 if (rv == B_FALSE) { 1300 /* drop the packet */ 1301 freemsg(bp); 1302 count++; 1303 continue; 1304 } 1305 1306 /* 1307 * Check the frame header if tag/untag is needed. 1308 */ 1309 if (is_tagged == B_FALSE) { 1310 /* 1311 * Untagged frame. We shouldn't have an untagged 1312 * packet at this point, unless the destination's 1313 * vlan id is default-vlan-id; if it is not the 1314 * default-vlan-id, we drop the packet. 1315 */ 1316 if (vlan_id != vswp->default_vlan_id) { 1317 /* drop the packet */ 1318 freemsg(bp); 1319 count++; 1320 continue; 1321 } 1322 } else { 1323 /* 1324 * Tagged frame, untag if it's the destination's pvid. 1325 */ 1326 if (vlan_id == pvid) { 1327 1328 bp = vnet_vlan_remove_tag(bp); 1329 if (bp == NULL) { 1330 /* packet dropped */ 1331 count++; 1332 continue; 1333 } 1334 } 1335 } 1336 1337 /* build a chain of processed packets */ 1338 if (bph == NULL) { 1339 bph = bpt = bp; 1340 } else { 1341 bpt->b_next = bp; 1342 bpt = bp; 1343 } 1344 1345 } 1346 1347 *np = bph; 1348 *npt = bpt; 1349 1350 return (count); 1351 } 1352 1353 /* 1354 * Lookup the vlan id of the given frame. If it is a vlan-tagged frame, 1355 * then the vlan-id is available in the tag; otherwise, its vlan id is 1356 * implicitly obtained based on the caller (destination of the frame: 1357 * VSW_VNETPORT or VSW_LOCALDEV). 1358 * The vlan id determined is returned in vidp. 1359 * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged. 1360 */ 1361 boolean_t 1362 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp, 1363 uint16_t *vidp) 1364 { 1365 struct ether_vlan_header *evhp; 1366 vsw_t *vswp; 1367 vsw_port_t *portp; 1368 1369 /* If it's a tagged frame, get the vid from vlan header */ 1370 if (ehp->ether_type == ETHERTYPE_VLAN) { 1371 1372 evhp = (struct ether_vlan_header *)ehp; 1373 *vidp = VLAN_ID(ntohs(evhp->ether_tci)); 1374 return (B_TRUE); 1375 } 1376 1377 /* Untagged frame; determine vlan id based on caller */ 1378 switch (caller) { 1379 1380 case VSW_VNETPORT: 1381 /* 1382 * packet destined to a vnet; vlan-id is pvid of vnet-port. 1383 */ 1384 portp = (vsw_port_t *)arg; 1385 *vidp = portp->pvid; 1386 break; 1387 1388 case VSW_LOCALDEV: 1389 1390 /* 1391 * packet destined to vsw interface; 1392 * vlan-id is port-vlan-id of vsw device. 1393 */ 1394 vswp = (vsw_t *)arg; 1395 *vidp = vswp->pvid; 1396 break; 1397 } 1398 1399 return (B_FALSE); 1400 } 1401 1402 /* 1403 * Add or remove multicast address(es). 1404 * 1405 * Returns 0 on success, 1 on failure. 1406 */ 1407 int 1408 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port) 1409 { 1410 mcst_addr_t *mcst_p = NULL; 1411 vsw_t *vswp = port->p_vswp; 1412 uint64_t addr = 0x0; 1413 int i; 1414 1415 D1(vswp, "%s: enter", __func__); 1416 1417 D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count); 1418 1419 for (i = 0; i < mcst_pkt->count; i++) { 1420 /* 1421 * Convert address into form that can be used 1422 * as hash table key. 1423 */ 1424 KEY_HASH(addr, &(mcst_pkt->mca[i])); 1425 1426 /* 1427 * Add or delete the specified address/port combination. 1428 */ 1429 if (mcst_pkt->set == 0x1) { 1430 D3(vswp, "%s: adding multicast address 0x%llx for " 1431 "port %ld", __func__, addr, port->p_instance); 1432 if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1433 /* 1434 * Update the list of multicast 1435 * addresses contained within the 1436 * port structure to include this new 1437 * one. 1438 */ 1439 mcst_p = kmem_zalloc(sizeof (mcst_addr_t), 1440 KM_NOSLEEP); 1441 if (mcst_p == NULL) { 1442 DERR(vswp, "%s: unable to alloc mem", 1443 __func__); 1444 (void) vsw_del_mcst(vswp, 1445 VSW_VNETPORT, addr, port); 1446 return (1); 1447 } 1448 1449 mcst_p->nextp = NULL; 1450 mcst_p->addr = addr; 1451 ether_copy(&mcst_pkt->mca[i], &mcst_p->mca); 1452 1453 /* 1454 * Program the address into HW. If the addr 1455 * has already been programmed then the MAC 1456 * just increments a ref counter (which is 1457 * used when the address is being deleted) 1458 */ 1459 mutex_enter(&vswp->mac_lock); 1460 if (vswp->mh != NULL) { 1461 if (mac_multicst_add(vswp->mh, 1462 (uchar_t *)&mcst_pkt->mca[i])) { 1463 mutex_exit(&vswp->mac_lock); 1464 cmn_err(CE_WARN, "!vsw%d: " 1465 "unable to add multicast " 1466 "address: %s\n", 1467 vswp->instance, 1468 ether_sprintf((void *) 1469 &mcst_p->mca)); 1470 (void) vsw_del_mcst(vswp, 1471 VSW_VNETPORT, addr, port); 1472 kmem_free(mcst_p, 1473 sizeof (*mcst_p)); 1474 return (1); 1475 } 1476 mcst_p->mac_added = B_TRUE; 1477 } 1478 mutex_exit(&vswp->mac_lock); 1479 1480 mutex_enter(&port->mca_lock); 1481 mcst_p->nextp = port->mcap; 1482 port->mcap = mcst_p; 1483 mutex_exit(&port->mca_lock); 1484 1485 } else { 1486 DERR(vswp, "%s: error adding multicast " 1487 "address 0x%llx for port %ld", 1488 __func__, addr, port->p_instance); 1489 return (1); 1490 } 1491 } else { 1492 /* 1493 * Delete an entry from the multicast hash 1494 * table and update the address list 1495 * appropriately. 1496 */ 1497 if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) { 1498 D3(vswp, "%s: deleting multicast address " 1499 "0x%llx for port %ld", __func__, addr, 1500 port->p_instance); 1501 1502 mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr); 1503 ASSERT(mcst_p != NULL); 1504 1505 /* 1506 * Remove the address from HW. The address 1507 * will actually only be removed once the ref 1508 * count within the MAC layer has dropped to 1509 * zero. I.e. we can safely call this fn even 1510 * if other ports are interested in this 1511 * address. 1512 */ 1513 mutex_enter(&vswp->mac_lock); 1514 if (vswp->mh != NULL && mcst_p->mac_added) { 1515 if (mac_multicst_remove(vswp->mh, 1516 (uchar_t *)&mcst_pkt->mca[i])) { 1517 mutex_exit(&vswp->mac_lock); 1518 cmn_err(CE_WARN, "!vsw%d: " 1519 "unable to remove mcast " 1520 "address: %s\n", 1521 vswp->instance, 1522 ether_sprintf((void *) 1523 &mcst_p->mca)); 1524 kmem_free(mcst_p, 1525 sizeof (*mcst_p)); 1526 return (1); 1527 } 1528 mcst_p->mac_added = B_FALSE; 1529 } 1530 mutex_exit(&vswp->mac_lock); 1531 kmem_free(mcst_p, sizeof (*mcst_p)); 1532 1533 } else { 1534 DERR(vswp, "%s: error deleting multicast " 1535 "addr 0x%llx for port %ld", 1536 __func__, addr, port->p_instance); 1537 return (1); 1538 } 1539 } 1540 } 1541 D1(vswp, "%s: exit", __func__); 1542 return (0); 1543 } 1544 1545 /* 1546 * Add a new multicast entry. 1547 * 1548 * Search hash table based on address. If match found then 1549 * update associated val (which is chain of ports), otherwise 1550 * create new key/val (addr/port) pair and insert into table. 1551 */ 1552 int 1553 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1554 { 1555 int dup = 0; 1556 int rv = 0; 1557 mfdb_ent_t *ment = NULL; 1558 mfdb_ent_t *tmp_ent = NULL; 1559 mfdb_ent_t *new_ent = NULL; 1560 void *tgt = NULL; 1561 1562 if (devtype == VSW_VNETPORT) { 1563 /* 1564 * Being invoked from a vnet. 1565 */ 1566 ASSERT(arg != NULL); 1567 tgt = arg; 1568 D2(NULL, "%s: port %d : address 0x%llx", __func__, 1569 ((vsw_port_t *)arg)->p_instance, addr); 1570 } else { 1571 /* 1572 * We are being invoked via the m_multicst mac entry 1573 * point. 1574 */ 1575 D2(NULL, "%s: address 0x%llx", __func__, addr); 1576 tgt = (void *)vswp; 1577 } 1578 1579 WRITE_ENTER(&vswp->mfdbrw); 1580 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1581 (mod_hash_val_t *)&ment) != 0) { 1582 1583 /* address not currently in table */ 1584 ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1585 ment->d_addr = (void *)tgt; 1586 ment->d_type = devtype; 1587 ment->nextp = NULL; 1588 1589 if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr, 1590 (mod_hash_val_t)ment) != 0) { 1591 DERR(vswp, "%s: hash table insertion failed", __func__); 1592 kmem_free(ment, sizeof (mfdb_ent_t)); 1593 rv = 1; 1594 } else { 1595 D2(vswp, "%s: added initial entry for 0x%llx to " 1596 "table", __func__, addr); 1597 } 1598 } else { 1599 /* 1600 * Address in table. Check to see if specified port 1601 * is already associated with the address. If not add 1602 * it now. 1603 */ 1604 tmp_ent = ment; 1605 while (tmp_ent != NULL) { 1606 if (tmp_ent->d_addr == (void *)tgt) { 1607 if (devtype == VSW_VNETPORT) { 1608 DERR(vswp, "%s: duplicate port entry " 1609 "found for portid %ld and key " 1610 "0x%llx", __func__, 1611 ((vsw_port_t *)arg)->p_instance, 1612 addr); 1613 } else { 1614 DERR(vswp, "%s: duplicate entry found" 1615 "for key 0x%llx", __func__, addr); 1616 } 1617 rv = 1; 1618 dup = 1; 1619 break; 1620 } 1621 tmp_ent = tmp_ent->nextp; 1622 } 1623 1624 /* 1625 * Port not on list so add it to end now. 1626 */ 1627 if (0 == dup) { 1628 D2(vswp, "%s: added entry for 0x%llx to table", 1629 __func__, addr); 1630 new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP); 1631 new_ent->d_addr = (void *)tgt; 1632 new_ent->d_type = devtype; 1633 new_ent->nextp = NULL; 1634 1635 tmp_ent = ment; 1636 while (tmp_ent->nextp != NULL) 1637 tmp_ent = tmp_ent->nextp; 1638 1639 tmp_ent->nextp = new_ent; 1640 } 1641 } 1642 1643 RW_EXIT(&vswp->mfdbrw); 1644 return (rv); 1645 } 1646 1647 /* 1648 * Remove a multicast entry from the hashtable. 1649 * 1650 * Search hash table based on address. If match found, scan 1651 * list of ports associated with address. If specified port 1652 * found remove it from list. 1653 */ 1654 int 1655 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg) 1656 { 1657 mfdb_ent_t *ment = NULL; 1658 mfdb_ent_t *curr_p, *prev_p; 1659 void *tgt = NULL; 1660 1661 D1(vswp, "%s: enter", __func__); 1662 1663 if (devtype == VSW_VNETPORT) { 1664 tgt = (vsw_port_t *)arg; 1665 D2(vswp, "%s: removing port %d from mFDB for address" 1666 " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr); 1667 } else { 1668 D2(vswp, "%s: removing entry", __func__); 1669 tgt = (void *)vswp; 1670 } 1671 1672 WRITE_ENTER(&vswp->mfdbrw); 1673 if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr, 1674 (mod_hash_val_t *)&ment) != 0) { 1675 D2(vswp, "%s: address 0x%llx not in table", __func__, addr); 1676 RW_EXIT(&vswp->mfdbrw); 1677 return (1); 1678 } 1679 1680 prev_p = curr_p = ment; 1681 1682 while (curr_p != NULL) { 1683 if (curr_p->d_addr == (void *)tgt) { 1684 if (devtype == VSW_VNETPORT) { 1685 D2(vswp, "%s: port %d found", __func__, 1686 ((vsw_port_t *)tgt)->p_instance); 1687 } else { 1688 D2(vswp, "%s: instance found", __func__); 1689 } 1690 1691 if (prev_p == curr_p) { 1692 /* 1693 * head of list, if no other element is in 1694 * list then destroy this entry, otherwise 1695 * just replace it with updated value. 1696 */ 1697 ment = curr_p->nextp; 1698 if (ment == NULL) { 1699 (void) mod_hash_destroy(vswp->mfdb, 1700 (mod_hash_val_t)addr); 1701 } else { 1702 (void) mod_hash_replace(vswp->mfdb, 1703 (mod_hash_key_t)addr, 1704 (mod_hash_val_t)ment); 1705 } 1706 } else { 1707 /* 1708 * Not head of list, no need to do 1709 * replacement, just adjust list pointers. 1710 */ 1711 prev_p->nextp = curr_p->nextp; 1712 } 1713 break; 1714 } 1715 1716 prev_p = curr_p; 1717 curr_p = curr_p->nextp; 1718 } 1719 1720 RW_EXIT(&vswp->mfdbrw); 1721 1722 D1(vswp, "%s: exit", __func__); 1723 1724 if (curr_p == NULL) 1725 return (1); 1726 kmem_free(curr_p, sizeof (mfdb_ent_t)); 1727 return (0); 1728 } 1729 1730 /* 1731 * Port is being deleted, but has registered an interest in one 1732 * or more multicast groups. Using the list of addresses maintained 1733 * within the port structure find the appropriate entry in the hash 1734 * table and remove this port from the list of interested ports. 1735 */ 1736 void 1737 vsw_del_mcst_port(vsw_port_t *port) 1738 { 1739 mcst_addr_t *mcap = NULL; 1740 vsw_t *vswp = port->p_vswp; 1741 1742 D1(vswp, "%s: enter", __func__); 1743 1744 mutex_enter(&port->mca_lock); 1745 1746 while ((mcap = port->mcap) != NULL) { 1747 1748 port->mcap = mcap->nextp; 1749 1750 mutex_exit(&port->mca_lock); 1751 1752 (void) vsw_del_mcst(vswp, VSW_VNETPORT, 1753 mcap->addr, port); 1754 1755 /* 1756 * Remove the address from HW. The address 1757 * will actually only be removed once the ref 1758 * count within the MAC layer has dropped to 1759 * zero. I.e. we can safely call this fn even 1760 * if other ports are interested in this 1761 * address. 1762 */ 1763 mutex_enter(&vswp->mac_lock); 1764 if (vswp->mh != NULL && mcap->mac_added) { 1765 (void) mac_multicst_remove(vswp->mh, 1766 (uchar_t *)&mcap->mca); 1767 } 1768 mutex_exit(&vswp->mac_lock); 1769 1770 kmem_free(mcap, sizeof (*mcap)); 1771 1772 mutex_enter(&port->mca_lock); 1773 1774 } 1775 1776 mutex_exit(&port->mca_lock); 1777 1778 D1(vswp, "%s: exit", __func__); 1779 } 1780 1781 /* 1782 * This vsw instance is detaching, but has registered an interest in one 1783 * or more multicast groups. Using the list of addresses maintained 1784 * within the vsw structure find the appropriate entry in the hash 1785 * table and remove this instance from the list of interested ports. 1786 */ 1787 void 1788 vsw_del_mcst_vsw(vsw_t *vswp) 1789 { 1790 mcst_addr_t *next_p = NULL; 1791 1792 D1(vswp, "%s: enter", __func__); 1793 1794 mutex_enter(&vswp->mca_lock); 1795 1796 while (vswp->mcap != NULL) { 1797 DERR(vswp, "%s: deleting addr 0x%llx", 1798 __func__, vswp->mcap->addr); 1799 (void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL); 1800 1801 next_p = vswp->mcap->nextp; 1802 kmem_free(vswp->mcap, sizeof (mcst_addr_t)); 1803 vswp->mcap = next_p; 1804 } 1805 1806 vswp->mcap = NULL; 1807 mutex_exit(&vswp->mca_lock); 1808 1809 D1(vswp, "%s: exit", __func__); 1810 } 1811 1812 static uint32_t 1813 vsw_get_same_dest_list(struct ether_header *ehp, 1814 mblk_t **rhead, mblk_t **rtail, mblk_t **mpp) 1815 { 1816 uint32_t count = 0; 1817 mblk_t *bp; 1818 mblk_t *nbp; 1819 mblk_t *head = NULL; 1820 mblk_t *tail = NULL; 1821 mblk_t *prev = NULL; 1822 struct ether_header *behp; 1823 1824 /* process the chain of packets */ 1825 bp = *mpp; 1826 while (bp) { 1827 nbp = bp->b_next; 1828 behp = (struct ether_header *)bp->b_rptr; 1829 bp->b_prev = NULL; 1830 if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) { 1831 if (prev == NULL) { 1832 *mpp = nbp; 1833 } else { 1834 prev->b_next = nbp; 1835 } 1836 bp->b_next = NULL; 1837 if (head == NULL) { 1838 head = tail = bp; 1839 } else { 1840 tail->b_next = bp; 1841 tail = bp; 1842 } 1843 count++; 1844 } else { 1845 prev = bp; 1846 } 1847 bp = nbp; 1848 } 1849 *rhead = head; 1850 *rtail = tail; 1851 DTRACE_PROBE1(vsw_same_dest, int, count); 1852 return (count); 1853 } 1854 1855 static mblk_t * 1856 vsw_dupmsgchain(mblk_t *mp) 1857 { 1858 mblk_t *nmp = NULL; 1859 mblk_t **nmpp = &nmp; 1860 1861 for (; mp != NULL; mp = mp->b_next) { 1862 if ((*nmpp = dupmsg(mp)) == NULL) { 1863 freemsgchain(nmp); 1864 return (NULL); 1865 } 1866 1867 nmpp = &((*nmpp)->b_next); 1868 } 1869 1870 return (nmp); 1871 } 1872