1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * This module implements a STREAMS driver that provides layer-two (Ethernet) 30 * bridging functionality. The STREAMS interface is used to provide 31 * observability (snoop/wireshark) and control, but not for interface plumbing. 32 */ 33 34 #include <sys/types.h> 35 #include <sys/bitmap.h> 36 #include <sys/cmn_err.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/errno.h> 40 #include <sys/kstat.h> 41 #include <sys/modctl.h> 42 #include <sys/note.h> 43 #include <sys/param.h> 44 #include <sys/policy.h> 45 #include <sys/sdt.h> 46 #include <sys/stat.h> 47 #include <sys/stream.h> 48 #include <sys/stropts.h> 49 #include <sys/strsun.h> 50 #include <sys/sunddi.h> 51 #include <sys/sysmacros.h> 52 #include <sys/systm.h> 53 #include <sys/time.h> 54 #include <sys/dlpi.h> 55 #include <sys/dls.h> 56 #include <sys/mac_ether.h> 57 #include <sys/mac_provider.h> 58 #include <sys/mac_client_priv.h> 59 #include <sys/mac_impl.h> 60 #include <sys/vlan.h> 61 #include <net/bridge.h> 62 #include <net/bridge_impl.h> 63 #include <net/trill.h> 64 #include <sys/dld_ioc.h> 65 66 /* 67 * Locks and reference counts: object lifetime and design. 68 * 69 * bridge_mac_t 70 * Bridge mac (snoop) instances are in bmac_list, which is protected by 71 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer(). 72 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes 73 * away, the bridge_mac_t remains until either all of the users go away 74 * (detected by a timer) or until the instance is picked up again by the same 75 * bridge starting back up. 76 * 77 * bridge_inst_t 78 * Bridge instances are in inst_list, which is protected by inst_lock. 79 * They're allocated by inst_alloc() and freed by inst_free(). After 80 * allocation, an instance is placed in inst_list, and the reference count is 81 * incremented to represent this. That reference is decremented when the 82 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last 83 * reference is freed, the instance is removed from the list. 84 * 85 * Bridge instances have lists of links and an AVL tree of forwarding 86 * entries. Each of these structures holds one reference on the bridge 87 * instance. These lists and tree are protected by bi_rwlock. 88 * 89 * bridge_stream_t 90 * Bridge streams are allocated by stream_alloc() and freed by stream_free(). 91 * These streams are created when "bridged" opens /dev/bridgectl, and are 92 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the 93 * links on the bridge. When a stream closes, the bridge instance created is 94 * destroyed. There's at most one bridge instance for a given control 95 * stream. 96 * 97 * bridge_link_t 98 * Links are allocated by bridge_add_link() and freed by link_free(). The 99 * bi_links list holds a reference to the link. When the BLF_DELETED flag is 100 * set, that reference is dropped. The link isn't removed from the list 101 * until the last reference drops. Each forwarding entry that uses a given 102 * link holds a reference, as does each thread transmitting a packet via the 103 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on 104 * a link when transmitting. 105 * 106 * It's important that once BLF_DELETED is set, there's no way for the 107 * reference count to increase again. If it can, then the link may be 108 * double-freed. The BLF_FREED flag is intended for use with assertions to 109 * guard against this in testing. 110 * 111 * bridge_fwd_t 112 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by 113 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike 114 * other data structures, the reference is dropped when the entry is removed 115 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each 116 * thread that's forwarding a packet to a known destination holds a reference 117 * to a forwarding entry. 118 * 119 * TRILL notes: 120 * 121 * The TRILL module does all of its I/O through bridging. It uses references 122 * on the bridge_inst_t and bridge_link_t structures, and has seven entry 123 * points and four callbacks. One entry point is for setting the callbacks 124 * (bridge_trill_register_cb). There are four entry points for taking bridge 125 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two 126 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps) 127 * that need to be bridged locally, and for TRILL-encapsulated output packets 128 * (bridge_trill_output). 129 * 130 * The four callbacks comprise two notification functions for bridges and 131 * links being deleted, one function for raw received TRILL packets, and one 132 * for bridge output to non-local TRILL destinations (tunnel entry). 133 */ 134 135 /* 136 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module. 137 */ 138 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES; 139 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES; 140 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS; 141 142 static const char *inst_kstats_list[] = { KSINST_NAMES }; 143 static const char *link_kstats_list[] = { KSLINK_NAMES }; 144 145 #define KREF(p, m, vn) p->m.vn.value.ui64 146 #define KINCR(p, m, vn) ++KREF(p, m, vn) 147 #define KDECR(p, m, vn) --KREF(p, m, vn) 148 149 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn) 150 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn) 151 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn) 152 153 #define KIINCR(vn) KIPINCR(bip, vn) 154 #define KIDECR(vn) KIPDECR(bip, vn) 155 #define KLINCR(vn) KLPINCR(blp, vn) 156 157 #define Dim(x) (sizeof (x) / sizeof (*(x))) 158 159 /* Amount of overhead added when encapsulating with VLAN headers */ 160 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \ 161 sizeof (struct ether_header)) 162 163 static dev_info_t *bridge_dev_info; 164 static major_t bridge_major; 165 static ddi_taskq_t *bridge_taskq; 166 167 /* 168 * These are the bridge instance management data structures. The mutex lock 169 * protects the list of bridge instances. A reference count is then used on 170 * each instance to determine when to free it. We use mac_minor_hold() to 171 * allocate minor_t values, which are used both for self-cloning /dev/net/ 172 * device nodes as well as client streams. Minor node 0 is reserved for the 173 * allocation control node. 174 */ 175 static list_t inst_list; 176 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */ 177 static kmutex_t inst_lock; 178 179 static krwlock_t bmac_rwlock; 180 static list_t bmac_list; 181 182 /* Wait for taskq entries that use STREAMS */ 183 static kcondvar_t stream_ref_cv; 184 static kmutex_t stream_ref_lock; 185 186 static timeout_id_t bridge_timerid; 187 static clock_t bridge_scan_interval; 188 static clock_t bridge_fwd_age; 189 190 static bridge_inst_t *bridge_find_name(const char *); 191 static void bridge_timer(void *); 192 static void bridge_unref(bridge_inst_t *); 193 194 static const uint8_t zero_addr[ETHERADDRL] = { 0 }; 195 196 /* Global TRILL linkage */ 197 static trill_recv_pkt_t trill_recv_fn; 198 static trill_encap_pkt_t trill_encap_fn; 199 static trill_br_dstr_t trill_brdstr_fn; 200 static trill_ln_dstr_t trill_lndstr_fn; 201 202 /* special settings to accommodate DLD flow control; see dld_str.c */ 203 static struct module_info bridge_dld_modinfo = { 204 0, /* mi_idnum */ 205 BRIDGE_DEV_NAME, /* mi_idname */ 206 0, /* mi_minpsz */ 207 INFPSZ, /* mi_maxpsz */ 208 1, /* mi_hiwat */ 209 0 /* mi_lowat */ 210 }; 211 212 static struct qinit bridge_dld_rinit = { 213 NULL, /* qi_putp */ 214 NULL, /* qi_srvp */ 215 dld_open, /* qi_qopen */ 216 dld_close, /* qi_qclose */ 217 NULL, /* qi_qadmin */ 218 &bridge_dld_modinfo, /* qi_minfo */ 219 NULL /* qi_mstat */ 220 }; 221 222 static struct qinit bridge_dld_winit = { 223 (int (*)())dld_wput, /* qi_putp */ 224 (int (*)())dld_wsrv, /* qi_srvp */ 225 NULL, /* qi_qopen */ 226 NULL, /* qi_qclose */ 227 NULL, /* qi_qadmin */ 228 &bridge_dld_modinfo, /* qi_minfo */ 229 NULL /* qi_mstat */ 230 }; 231 232 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *); 233 234 /* GLDv3 control ioctls used by Bridging */ 235 static dld_ioc_info_t bridge_ioc_list[] = { 236 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t), 237 bridge_ioc_listfwd, NULL}, 238 }; 239 240 /* 241 * Given a bridge mac pointer, get a ref-held pointer to the corresponding 242 * bridge instance, if any. We must hold the global bmac_rwlock so that 243 * bm_inst doesn't slide out from under us. 244 */ 245 static bridge_inst_t * 246 mac_to_inst(const bridge_mac_t *bmp) 247 { 248 bridge_inst_t *bip; 249 250 rw_enter(&bmac_rwlock, RW_READER); 251 if ((bip = bmp->bm_inst) != NULL) 252 atomic_inc_uint(&bip->bi_refs); 253 rw_exit(&bmac_rwlock); 254 return (bip); 255 } 256 257 static void 258 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist) 259 { 260 mblk_t *mp; 261 bridge_ctl_t *bcp; 262 bridge_link_t *blcmp; 263 bridge_inst_t *bip; 264 bridge_mac_t *bmp; 265 266 if (failed) { 267 if (blp->bl_flags & BLF_SDUFAIL) 268 return; 269 blp->bl_flags |= BLF_SDUFAIL; 270 } else { 271 if (!(blp->bl_flags & BLF_SDUFAIL)) 272 return; 273 blp->bl_flags &= ~BLF_SDUFAIL; 274 } 275 276 /* 277 * If this link is otherwise up, then check if there are any other 278 * non-failed non-down links. If not, then we control the state of the 279 * whole bridge. 280 */ 281 bip = blp->bl_inst; 282 bmp = bip->bi_mac; 283 if (blp->bl_linkstate != LINK_STATE_DOWN) { 284 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 285 blcmp = list_next(&bip->bi_links, blcmp)) { 286 if (blp != blcmp && 287 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 288 blcmp->bl_linkstate != LINK_STATE_DOWN) 289 break; 290 } 291 if (blcmp == NULL) { 292 bmp->bm_linkstate = failed ? LINK_STATE_DOWN : 293 LINK_STATE_UP; 294 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 295 } 296 } 297 298 /* 299 * If we're becoming failed, then the link's current true state needs 300 * to be reflected upwards to this link's clients. If we're becoming 301 * unfailed, then we get the state of the bridge instead on all 302 * clients. 303 */ 304 if (failed) { 305 if (bmp->bm_linkstate != blp->bl_linkstate) 306 mac_link_redo(blp->bl_mh, blp->bl_linkstate); 307 } else { 308 mac_link_redo(blp->bl_mh, bmp->bm_linkstate); 309 } 310 311 /* get the current mblk we're going to send up */ 312 if ((mp = blp->bl_lfailmp) == NULL && 313 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL) 314 return; 315 316 /* get a new one for next time */ 317 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 318 319 /* if none for next time, then report only failures */ 320 if (blp->bl_lfailmp == NULL && !failed) { 321 blp->bl_lfailmp = mp; 322 return; 323 } 324 325 /* LINTED: alignment */ 326 bcp = (bridge_ctl_t *)mp->b_rptr; 327 bcp->bc_linkid = blp->bl_linkid; 328 bcp->bc_failed = failed; 329 mp->b_wptr = (uchar_t *)(bcp + 1); 330 mp->b_next = *mlist; 331 *mlist = mp; 332 } 333 334 /* 335 * Send control messages (link SDU changes) using the stream to the 336 * bridge instance daemon. 337 */ 338 static void 339 send_up_messages(bridge_inst_t *bip, mblk_t *mp) 340 { 341 mblk_t *mnext; 342 queue_t *rq; 343 344 rq = bip->bi_control->bs_wq; 345 rq = OTHERQ(rq); 346 while (mp != NULL) { 347 mnext = mp->b_next; 348 mp->b_next = NULL; 349 putnext(rq, mp); 350 mp = mnext; 351 } 352 } 353 354 /* ARGSUSED */ 355 static int 356 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val) 357 { 358 return (ENOTSUP); 359 } 360 361 static int 362 bridge_m_start(void *arg) 363 { 364 bridge_mac_t *bmp = arg; 365 366 bmp->bm_flags |= BMF_STARTED; 367 return (0); 368 } 369 370 static void 371 bridge_m_stop(void *arg) 372 { 373 bridge_mac_t *bmp = arg; 374 375 bmp->bm_flags &= ~BMF_STARTED; 376 } 377 378 /* ARGSUSED */ 379 static int 380 bridge_m_setpromisc(void *arg, boolean_t on) 381 { 382 return (0); 383 } 384 385 /* ARGSUSED */ 386 static int 387 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 388 { 389 return (0); 390 } 391 392 /* ARGSUSED */ 393 static int 394 bridge_m_unicst(void *arg, const uint8_t *macaddr) 395 { 396 return (ENOTSUP); 397 } 398 399 static mblk_t * 400 bridge_m_tx(void *arg, mblk_t *mp) 401 { 402 _NOTE(ARGUNUSED(arg)); 403 freemsgchain(mp); 404 return (NULL); 405 } 406 407 /* ARGSUSED */ 408 static int 409 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) 410 { 411 bridge_listfwd_t *blf = karg; 412 bridge_inst_t *bip; 413 bridge_fwd_t *bfp, match; 414 avl_index_t where; 415 416 bip = bridge_find_name(blf->blf_name); 417 if (bip == NULL) 418 return (ENOENT); 419 420 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL); 421 match.bf_flags |= BFF_VLANLOCAL; 422 rw_enter(&bip->bi_rwlock, RW_READER); 423 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL) 424 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER); 425 else 426 bfp = AVL_NEXT(&bip->bi_fwd, bfp); 427 if (bfp == NULL) { 428 bzero(blf, sizeof (*blf)); 429 } else { 430 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL); 431 blf->blf_trill_nick = bfp->bf_trill_nick; 432 blf->blf_ms_age = 433 drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000; 434 blf->blf_is_local = 435 (bfp->bf_flags & BFF_LOCALADDR) != 0; 436 blf->blf_linkid = bfp->bf_links[0]->bl_linkid; 437 } 438 rw_exit(&bip->bi_rwlock); 439 bridge_unref(bip); 440 return (0); 441 } 442 443 static int 444 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 445 uint_t pr_valsize, const void *pr_val) 446 { 447 bridge_mac_t *bmp = arg; 448 bridge_inst_t *bip; 449 bridge_link_t *blp; 450 int err; 451 uint_t maxsdu; 452 mblk_t *mlist; 453 454 _NOTE(ARGUNUSED(pr_name)); 455 switch (pr_num) { 456 case MAC_PROP_MTU: 457 if (pr_valsize < sizeof (bmp->bm_maxsdu)) { 458 err = EINVAL; 459 break; 460 } 461 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu)); 462 if (maxsdu == bmp->bm_maxsdu) { 463 err = 0; 464 } else if ((bip = mac_to_inst(bmp)) == NULL) { 465 err = ENXIO; 466 } else { 467 rw_enter(&bip->bi_rwlock, RW_WRITER); 468 mlist = NULL; 469 for (blp = list_head(&bip->bi_links); blp != NULL; 470 blp = list_next(&bip->bi_links, blp)) { 471 if (blp->bl_flags & BLF_DELETED) 472 continue; 473 if (blp->bl_maxsdu == maxsdu) 474 link_sdu_fail(blp, B_FALSE, &mlist); 475 else if (blp->bl_maxsdu == bmp->bm_maxsdu) 476 link_sdu_fail(blp, B_TRUE, &mlist); 477 } 478 rw_exit(&bip->bi_rwlock); 479 bmp->bm_maxsdu = maxsdu; 480 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 481 send_up_messages(bip, mlist); 482 bridge_unref(bip); 483 err = 0; 484 } 485 break; 486 487 default: 488 err = ENOTSUP; 489 break; 490 } 491 return (err); 492 } 493 494 static int 495 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 496 uint_t pr_valsize, void *pr_val) 497 { 498 bridge_mac_t *bmp = arg; 499 int err = 0; 500 501 _NOTE(ARGUNUSED(pr_name)); 502 switch (pr_num) { 503 case MAC_PROP_STATUS: 504 ASSERT(pr_valsize >= sizeof (bmp->bm_linkstate)); 505 bcopy(&bmp->bm_linkstate, pr_val, sizeof (&bmp->bm_linkstate)); 506 break; 507 508 default: 509 err = ENOTSUP; 510 break; 511 } 512 return (err); 513 } 514 515 static void 516 bridge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 517 mac_prop_info_handle_t prh) 518 { 519 bridge_mac_t *bmp = arg; 520 521 _NOTE(ARGUNUSED(pr_name)); 522 523 switch (pr_num) { 524 case MAC_PROP_MTU: 525 mac_prop_info_set_range_uint32(prh, bmp->bm_maxsdu, 526 bmp->bm_maxsdu); 527 break; 528 case MAC_PROP_STATUS: 529 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 530 break; 531 } 532 } 533 534 static mac_callbacks_t bridge_m_callbacks = { 535 MC_SETPROP | MC_GETPROP | MC_PROPINFO, 536 bridge_m_getstat, 537 bridge_m_start, 538 bridge_m_stop, 539 bridge_m_setpromisc, 540 bridge_m_multicst, 541 bridge_m_unicst, 542 bridge_m_tx, 543 NULL, /* reserved */ 544 NULL, /* ioctl */ 545 NULL, /* getcapab */ 546 NULL, /* open */ 547 NULL, /* close */ 548 bridge_m_setprop, 549 bridge_m_getprop, 550 bridge_m_propinfo 551 }; 552 553 /* 554 * Create kstats from a list. 555 */ 556 static kstat_t * 557 kstat_setup(kstat_named_t *knt, const char **names, int nstat, 558 const char *unitname) 559 { 560 kstat_t *ksp; 561 int i; 562 563 for (i = 0; i < nstat; i++) 564 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64); 565 566 ksp = kstat_create_zone(BRIDGE_DEV_NAME, 0, unitname, "net", 567 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 568 if (ksp != NULL) { 569 ksp->ks_data = knt; 570 kstat_install(ksp); 571 } 572 return (ksp); 573 } 574 575 /* 576 * Find an existing bridge_mac_t structure or allocate a new one for the given 577 * bridge instance. This creates the mac driver instance that snoop can use. 578 */ 579 static int 580 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp) 581 { 582 bridge_mac_t *bmp, *bnew; 583 mac_register_t *mac; 584 int err; 585 586 *bmacp = NULL; 587 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 588 return (EINVAL); 589 590 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP); 591 592 rw_enter(&bmac_rwlock, RW_WRITER); 593 for (bmp = list_head(&bmac_list); bmp != NULL; 594 bmp = list_next(&bmac_list, bmp)) { 595 if (strcmp(bip->bi_name, bmp->bm_name) == 0) { 596 ASSERT(bmp->bm_inst == NULL); 597 bmp->bm_inst = bip; 598 rw_exit(&bmac_rwlock); 599 kmem_free(bnew, sizeof (*bnew)); 600 mac_free(mac); 601 *bmacp = bmp; 602 return (0); 603 } 604 } 605 606 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 607 mac->m_driver = bnew; 608 mac->m_dip = bridge_dev_info; 609 mac->m_instance = (uint_t)-1; 610 mac->m_src_addr = (uint8_t *)zero_addr; 611 mac->m_callbacks = &bridge_m_callbacks; 612 613 /* 614 * Note that the SDU limits are irrelevant, as nobody transmits on the 615 * bridge node itself. It's mainly for monitoring but we allow 616 * setting the bridge MTU for quick transition of all links part of the 617 * bridge to a new MTU. 618 */ 619 mac->m_min_sdu = 1; 620 mac->m_max_sdu = 1500; 621 err = mac_register(mac, &bnew->bm_mh); 622 mac_free(mac); 623 if (err != 0) { 624 rw_exit(&bmac_rwlock); 625 kmem_free(bnew, sizeof (*bnew)); 626 return (err); 627 } 628 629 bnew->bm_inst = bip; 630 (void) strcpy(bnew->bm_name, bip->bi_name); 631 if (list_is_empty(&bmac_list)) { 632 bridge_timerid = timeout(bridge_timer, NULL, 633 bridge_scan_interval); 634 } 635 list_insert_tail(&bmac_list, bnew); 636 rw_exit(&bmac_rwlock); 637 638 /* 639 * Mark the MAC as unable to go "active" so that only passive clients 640 * (such as snoop) can bind to it. 641 */ 642 mac_no_active(bnew->bm_mh); 643 *bmacp = bnew; 644 return (0); 645 } 646 647 /* 648 * Disconnect the given bridge_mac_t from its bridge instance. The bridge 649 * instance is going away. The mac instance can't go away until the clients 650 * are gone (see bridge_timer). 651 */ 652 static void 653 bmac_disconnect(bridge_mac_t *bmp) 654 { 655 bridge_inst_t *bip; 656 657 bmp->bm_linkstate = LINK_STATE_DOWN; 658 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 659 660 rw_enter(&bmac_rwlock, RW_READER); 661 bip = bmp->bm_inst; 662 bip->bi_mac = NULL; 663 bmp->bm_inst = NULL; 664 rw_exit(&bmac_rwlock); 665 } 666 667 /* This is used by the avl trees to sort forwarding table entries */ 668 static int 669 fwd_compare(const void *addr1, const void *addr2) 670 { 671 const bridge_fwd_t *fwd1 = addr1; 672 const bridge_fwd_t *fwd2 = addr2; 673 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL); 674 675 if (diff != 0) 676 return (diff > 0 ? 1 : -1); 677 678 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) { 679 if (fwd1->bf_vlanid > fwd2->bf_vlanid) 680 return (1); 681 else if (fwd1->bf_vlanid < fwd2->bf_vlanid) 682 return (-1); 683 } 684 return (0); 685 } 686 687 static void 688 inst_free(bridge_inst_t *bip) 689 { 690 ASSERT(bip->bi_mac == NULL); 691 rw_destroy(&bip->bi_rwlock); 692 list_destroy(&bip->bi_links); 693 cv_destroy(&bip->bi_linkwait); 694 avl_destroy(&bip->bi_fwd); 695 if (bip->bi_ksp != NULL) 696 kstat_delete(bip->bi_ksp); 697 kmem_free(bip, sizeof (*bip)); 698 } 699 700 static bridge_inst_t * 701 inst_alloc(const char *bridge) 702 { 703 bridge_inst_t *bip; 704 705 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP); 706 bip->bi_refs = 1; 707 (void) strcpy(bip->bi_name, bridge); 708 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL); 709 list_create(&bip->bi_links, sizeof (bridge_link_t), 710 offsetof(bridge_link_t, bl_node)); 711 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL); 712 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t), 713 offsetof(bridge_fwd_t, bf_node)); 714 return (bip); 715 } 716 717 static bridge_inst_t * 718 bridge_find_name(const char *bridge) 719 { 720 bridge_inst_t *bip; 721 722 mutex_enter(&inst_lock); 723 for (bip = list_head(&inst_list); bip != NULL; 724 bip = list_next(&inst_list, bip)) { 725 if (!(bip->bi_flags & BIF_SHUTDOWN) && 726 strcmp(bridge, bip->bi_name) == 0) { 727 atomic_inc_uint(&bip->bi_refs); 728 break; 729 } 730 } 731 mutex_exit(&inst_lock); 732 733 return (bip); 734 } 735 736 static int 737 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc, 738 cred_t *cred) 739 { 740 bridge_inst_t *bip, *bipnew; 741 bridge_mac_t *bmp = NULL; 742 int err; 743 744 *bipc = NULL; 745 bipnew = inst_alloc(bridge); 746 747 mutex_enter(&inst_lock); 748 lookup_retry: 749 for (bip = list_head(&inst_list); bip != NULL; 750 bip = list_next(&inst_list, bip)) { 751 if (strcmp(bridge, bip->bi_name) == 0) 752 break; 753 } 754 755 /* This should not take long; if it does, we've got a design problem */ 756 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) { 757 cv_wait(&inst_cv, &inst_lock); 758 goto lookup_retry; 759 } 760 761 if (bip == NULL) { 762 bip = bipnew; 763 bipnew = NULL; 764 list_insert_tail(&inst_list, bip); 765 } 766 767 mutex_exit(&inst_lock); 768 if (bipnew != NULL) { 769 inst_free(bipnew); 770 return (EEXIST); 771 } 772 773 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats, 774 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name); 775 776 err = bmac_alloc(bip, &bmp); 777 if ((bip->bi_mac = bmp) == NULL) 778 goto fail_create; 779 780 /* 781 * bm_inst is set, so the timer cannot yank the DLS rug from under us. 782 * No extra locking is needed here. 783 */ 784 if (!(bmp->bm_flags & BMF_DLS)) { 785 err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred)); 786 if (err != 0) 787 goto fail_create; 788 bmp->bm_flags |= BMF_DLS; 789 } 790 791 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh)); 792 *bipc = bip; 793 return (0); 794 795 fail_create: 796 ASSERT(bip->bi_trilldata == NULL); 797 bip->bi_flags |= BIF_SHUTDOWN; 798 bridge_unref(bip); 799 return (err); 800 } 801 802 static void 803 bridge_unref(bridge_inst_t *bip) 804 { 805 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) { 806 ASSERT(bip->bi_flags & BIF_SHUTDOWN); 807 /* free up mac for reuse before leaving global list */ 808 if (bip->bi_mac != NULL) 809 bmac_disconnect(bip->bi_mac); 810 mutex_enter(&inst_lock); 811 list_remove(&inst_list, bip); 812 cv_broadcast(&inst_cv); 813 mutex_exit(&inst_lock); 814 inst_free(bip); 815 } 816 } 817 818 /* 819 * Stream instances are used only for allocating bridges and serving as a 820 * control node. They serve no data-handling function. 821 */ 822 static bridge_stream_t * 823 stream_alloc(void) 824 { 825 bridge_stream_t *bsp; 826 minor_t mn; 827 828 if ((mn = mac_minor_hold(B_FALSE)) == 0) 829 return (NULL); 830 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP); 831 bsp->bs_minor = mn; 832 return (bsp); 833 } 834 835 static void 836 stream_free(bridge_stream_t *bsp) 837 { 838 mac_minor_rele(bsp->bs_minor); 839 kmem_free(bsp, sizeof (*bsp)); 840 } 841 842 /* Reference hold/release functions for STREAMS-related taskq */ 843 static void 844 stream_ref(bridge_stream_t *bsp) 845 { 846 mutex_enter(&stream_ref_lock); 847 bsp->bs_taskq_cnt++; 848 mutex_exit(&stream_ref_lock); 849 } 850 851 static void 852 stream_unref(bridge_stream_t *bsp) 853 { 854 mutex_enter(&stream_ref_lock); 855 if (--bsp->bs_taskq_cnt == 0) 856 cv_broadcast(&stream_ref_cv); 857 mutex_exit(&stream_ref_lock); 858 } 859 860 static void 861 link_free(bridge_link_t *blp) 862 { 863 bridge_inst_t *bip = blp->bl_inst; 864 865 ASSERT(!(blp->bl_flags & BLF_FREED)); 866 blp->bl_flags |= BLF_FREED; 867 if (blp->bl_ksp != NULL) 868 kstat_delete(blp->bl_ksp); 869 if (blp->bl_lfailmp != NULL) 870 freeb(blp->bl_lfailmp); 871 cv_destroy(&blp->bl_trillwait); 872 mutex_destroy(&blp->bl_trilllock); 873 kmem_free(blp, sizeof (*blp)); 874 /* Don't unreference the bridge until the MAC is closed */ 875 bridge_unref(bip); 876 } 877 878 static void 879 link_unref(bridge_link_t *blp) 880 { 881 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) { 882 bridge_inst_t *bip = blp->bl_inst; 883 884 ASSERT(blp->bl_flags & BLF_DELETED); 885 rw_enter(&bip->bi_rwlock, RW_WRITER); 886 if (blp->bl_flags & BLF_LINK_ADDED) 887 list_remove(&bip->bi_links, blp); 888 rw_exit(&bip->bi_rwlock); 889 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links)) 890 cv_broadcast(&bip->bi_linkwait); 891 link_free(blp); 892 } 893 } 894 895 static bridge_fwd_t * 896 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick) 897 { 898 bridge_fwd_t *bfp; 899 900 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)), 901 KM_NOSLEEP); 902 if (bfp != NULL) { 903 bcopy(addr, bfp->bf_dest, ETHERADDRL); 904 bfp->bf_lastheard = ddi_get_lbolt(); 905 bfp->bf_maxlinks = nlinks; 906 bfp->bf_links = (bridge_link_t **)(bfp + 1); 907 bfp->bf_trill_nick = nick; 908 } 909 return (bfp); 910 } 911 912 static bridge_fwd_t * 913 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid) 914 { 915 bridge_fwd_t *bfp, *vbfp; 916 bridge_fwd_t match; 917 918 bcopy(addr, match.bf_dest, ETHERADDRL); 919 match.bf_flags = 0; 920 rw_enter(&bip->bi_rwlock, RW_READER); 921 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 922 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) { 923 match.bf_vlanid = vlanid; 924 match.bf_flags = BFF_VLANLOCAL; 925 vbfp = avl_find(&bip->bi_fwd, &match, NULL); 926 if (vbfp != NULL) 927 bfp = vbfp; 928 } 929 atomic_inc_uint(&bfp->bf_refs); 930 } 931 rw_exit(&bip->bi_rwlock); 932 return (bfp); 933 } 934 935 static void 936 fwd_free(bridge_fwd_t *bfp) 937 { 938 uint_t i; 939 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst; 940 941 KIDECR(bki_count); 942 for (i = 0; i < bfp->bf_nlinks; i++) 943 link_unref(bfp->bf_links[i]); 944 kmem_free(bfp, 945 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *)); 946 } 947 948 static void 949 fwd_unref(bridge_fwd_t *bfp) 950 { 951 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) { 952 ASSERT(!(bfp->bf_flags & BFF_INTREE)); 953 fwd_free(bfp); 954 } 955 } 956 957 static void 958 fwd_delete(bridge_fwd_t *bfp) 959 { 960 bridge_inst_t *bip; 961 bridge_fwd_t *bfpzero; 962 963 if (bfp->bf_flags & BFF_INTREE) { 964 ASSERT(bfp->bf_nlinks > 0); 965 bip = bfp->bf_links[0]->bl_inst; 966 rw_enter(&bip->bi_rwlock, RW_WRITER); 967 /* Another thread could beat us to this */ 968 if (bfp->bf_flags & BFF_INTREE) { 969 avl_remove(&bip->bi_fwd, bfp); 970 bfp->bf_flags &= ~BFF_INTREE; 971 if (bfp->bf_flags & BFF_VLANLOCAL) { 972 bfp->bf_flags &= ~BFF_VLANLOCAL; 973 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL); 974 if (bfpzero != NULL && bfpzero->bf_vcnt > 0) 975 bfpzero->bf_vcnt--; 976 } 977 rw_exit(&bip->bi_rwlock); 978 fwd_unref(bfp); /* no longer in avl tree */ 979 } else { 980 rw_exit(&bip->bi_rwlock); 981 } 982 } 983 } 984 985 static boolean_t 986 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp) 987 { 988 avl_index_t idx; 989 boolean_t retv; 990 991 rw_enter(&bip->bi_rwlock, RW_WRITER); 992 if (!(bip->bi_flags & BIF_SHUTDOWN) && 993 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax && 994 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) { 995 avl_insert(&bip->bi_fwd, bfp, idx); 996 bfp->bf_flags |= BFF_INTREE; 997 atomic_inc_uint(&bfp->bf_refs); /* avl entry */ 998 retv = B_TRUE; 999 } else { 1000 retv = B_FALSE; 1001 } 1002 rw_exit(&bip->bi_rwlock); 1003 return (retv); 1004 } 1005 1006 static void 1007 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr, 1008 const uint8_t *newaddr) 1009 { 1010 bridge_inst_t *bip = blp->bl_inst; 1011 bridge_fwd_t *bfp, *bfnew; 1012 bridge_fwd_t match; 1013 avl_index_t idx; 1014 boolean_t drop_ref = B_FALSE; 1015 1016 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0) 1017 return; 1018 1019 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0) 1020 goto no_old_addr; 1021 1022 /* 1023 * Find the previous entry, and remove our link from it. 1024 */ 1025 bcopy(oldaddr, match.bf_dest, ETHERADDRL); 1026 rw_enter(&bip->bi_rwlock, RW_WRITER); 1027 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 1028 int i; 1029 1030 /* 1031 * See if we're in the list, and remove if so. 1032 */ 1033 for (i = 0; i < bfp->bf_nlinks; i++) { 1034 if (bfp->bf_links[i] == blp) { 1035 /* 1036 * We assume writes are atomic, so no special 1037 * MT handling is needed. The list length is 1038 * decremented first, and then we remove 1039 * entries. 1040 */ 1041 bfp->bf_nlinks--; 1042 for (; i < bfp->bf_nlinks; i++) 1043 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1044 drop_ref = B_TRUE; 1045 break; 1046 } 1047 } 1048 /* If no more links, then remove and free up */ 1049 if (bfp->bf_nlinks == 0) { 1050 avl_remove(&bip->bi_fwd, bfp); 1051 bfp->bf_flags &= ~BFF_INTREE; 1052 } else { 1053 bfp = NULL; 1054 } 1055 } 1056 rw_exit(&bip->bi_rwlock); 1057 if (bfp != NULL) 1058 fwd_unref(bfp); /* no longer in avl tree */ 1059 1060 /* 1061 * Now get the new link address and add this link to the list. The 1062 * list should be of length 1 unless the user has configured multiple 1063 * NICs with the same address. (That's an incorrect configuration, but 1064 * we support it anyway.) 1065 */ 1066 no_old_addr: 1067 bfp = NULL; 1068 if ((bip->bi_flags & BIF_SHUTDOWN) || 1069 bcmp(newaddr, zero_addr, ETHERADDRL) == 0) 1070 goto no_new_addr; 1071 1072 bcopy(newaddr, match.bf_dest, ETHERADDRL); 1073 rw_enter(&bip->bi_rwlock, RW_WRITER); 1074 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) { 1075 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE); 1076 if (bfnew != NULL) 1077 KIINCR(bki_count); 1078 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) { 1079 /* special case: link fits in existing entry */ 1080 bfnew = bfp; 1081 } else { 1082 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1, 1083 RBRIDGE_NICKNAME_NONE); 1084 if (bfnew != NULL) { 1085 KIINCR(bki_count); 1086 avl_remove(&bip->bi_fwd, bfp); 1087 bfp->bf_flags &= ~BFF_INTREE; 1088 bfnew->bf_nlinks = bfp->bf_nlinks; 1089 bcopy(bfp->bf_links, bfnew->bf_links, 1090 bfp->bf_nlinks * sizeof (bfp)); 1091 /* reset the idx value due to removal above */ 1092 (void) avl_find(&bip->bi_fwd, &match, &idx); 1093 } 1094 } 1095 1096 if (bfnew != NULL) { 1097 bfnew->bf_links[bfnew->bf_nlinks++] = blp; 1098 if (drop_ref) 1099 drop_ref = B_FALSE; 1100 else 1101 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1102 1103 if (bfnew != bfp) { 1104 /* local addresses are not subject to table limits */ 1105 avl_insert(&bip->bi_fwd, bfnew, idx); 1106 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR); 1107 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */ 1108 } 1109 } 1110 rw_exit(&bip->bi_rwlock); 1111 1112 no_new_addr: 1113 /* 1114 * If we found an existing entry and we replaced it with a new one, 1115 * then drop the table reference from the old one. We removed it from 1116 * the AVL tree above. 1117 */ 1118 if (bfnew != NULL && bfp != NULL && bfnew != bfp) 1119 fwd_unref(bfp); 1120 1121 /* Account for removed entry. */ 1122 if (drop_ref) 1123 link_unref(blp); 1124 } 1125 1126 static void 1127 bridge_new_unicst(bridge_link_t *blp) 1128 { 1129 uint8_t new_mac[ETHERADDRL]; 1130 1131 mac_unicast_primary_get(blp->bl_mh, new_mac); 1132 fwd_update_local(blp, blp->bl_local_mac, new_mac); 1133 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL); 1134 } 1135 1136 /* 1137 * We must shut down a link prior to freeing it, and doing that requires 1138 * blocking to wait for running MAC threads while holding a reference. This is 1139 * run from a taskq to accomplish proper link shutdown followed by reference 1140 * drop. 1141 */ 1142 static void 1143 link_shutdown(void *arg) 1144 { 1145 bridge_link_t *blp = arg; 1146 mac_handle_t mh = blp->bl_mh; 1147 bridge_inst_t *bip; 1148 bridge_fwd_t *bfp, *bfnext; 1149 avl_tree_t fwd_scavenge; 1150 int i; 1151 1152 /* 1153 * This link is being destroyed. Notify TRILL now that it's no longer 1154 * possible to send packets. Data packets may still arrive until TRILL 1155 * calls bridge_trill_lnunref. 1156 */ 1157 if (blp->bl_trilldata != NULL) 1158 trill_lndstr_fn(blp->bl_trilldata, blp); 1159 1160 if (blp->bl_flags & BLF_PROM_ADDED) 1161 (void) mac_promisc_remove(blp->bl_mphp); 1162 1163 if (blp->bl_flags & BLF_SET_BRIDGE) 1164 mac_bridge_clear(mh, (mac_handle_t)blp); 1165 1166 if (blp->bl_flags & BLF_MARGIN_ADDED) { 1167 (void) mac_notify_remove(blp->bl_mnh, B_TRUE); 1168 (void) mac_margin_remove(mh, blp->bl_margin); 1169 } 1170 1171 /* Tell the clients the real link state when we leave */ 1172 mac_link_redo(blp->bl_mh, 1173 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE)); 1174 1175 /* Destroy all of the forwarding entries related to this link */ 1176 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1177 offsetof(bridge_fwd_t, bf_node)); 1178 bip = blp->bl_inst; 1179 rw_enter(&bip->bi_rwlock, RW_WRITER); 1180 bfnext = avl_first(&bip->bi_fwd); 1181 while ((bfp = bfnext) != NULL) { 1182 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1183 for (i = 0; i < bfp->bf_nlinks; i++) { 1184 if (bfp->bf_links[i] == blp) 1185 break; 1186 } 1187 if (i >= bfp->bf_nlinks) 1188 continue; 1189 if (bfp->bf_nlinks > 1) { 1190 /* note that this can't be the last reference */ 1191 link_unref(blp); 1192 bfp->bf_nlinks--; 1193 for (; i < bfp->bf_nlinks; i++) 1194 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1195 } else { 1196 ASSERT(bfp->bf_flags & BFF_INTREE); 1197 avl_remove(&bip->bi_fwd, bfp); 1198 bfp->bf_flags &= ~BFF_INTREE; 1199 avl_add(&fwd_scavenge, bfp); 1200 } 1201 } 1202 rw_exit(&bip->bi_rwlock); 1203 bfnext = avl_first(&fwd_scavenge); 1204 while ((bfp = bfnext) != NULL) { 1205 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1206 avl_remove(&fwd_scavenge, bfp); 1207 fwd_unref(bfp); 1208 } 1209 avl_destroy(&fwd_scavenge); 1210 1211 if (blp->bl_flags & BLF_CLIENT_OPEN) 1212 mac_client_close(blp->bl_mch, 0); 1213 1214 mac_close(mh); 1215 1216 /* 1217 * We are now completely removed from the active list, so drop the 1218 * reference (see bridge_add_link). 1219 */ 1220 link_unref(blp); 1221 } 1222 1223 static void 1224 shutdown_inst(bridge_inst_t *bip) 1225 { 1226 bridge_link_t *blp, *blnext; 1227 bridge_fwd_t *bfp; 1228 1229 mutex_enter(&inst_lock); 1230 if (bip->bi_flags & BIF_SHUTDOWN) { 1231 mutex_exit(&inst_lock); 1232 return; 1233 } 1234 1235 /* 1236 * Once on the inst_list, the bridge instance must not leave that list 1237 * without having the shutdown flag set first. When the shutdown flag 1238 * is set, we own the list reference, so we must drop it before 1239 * returning. 1240 */ 1241 bip->bi_flags |= BIF_SHUTDOWN; 1242 mutex_exit(&inst_lock); 1243 1244 bip->bi_control = NULL; 1245 1246 rw_enter(&bip->bi_rwlock, RW_READER); 1247 blnext = list_head(&bip->bi_links); 1248 while ((blp = blnext) != NULL) { 1249 blnext = list_next(&bip->bi_links, blp); 1250 if (!(blp->bl_flags & BLF_DELETED)) { 1251 blp->bl_flags |= BLF_DELETED; 1252 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 1253 blp, DDI_SLEEP); 1254 } 1255 } 1256 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) { 1257 atomic_inc_uint(&bfp->bf_refs); 1258 rw_exit(&bip->bi_rwlock); 1259 fwd_delete(bfp); 1260 fwd_unref(bfp); 1261 rw_enter(&bip->bi_rwlock, RW_READER); 1262 } 1263 rw_exit(&bip->bi_rwlock); 1264 1265 /* 1266 * This bridge is being destroyed. Notify TRILL once all of the 1267 * links are all gone. 1268 */ 1269 mutex_enter(&inst_lock); 1270 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links)) 1271 cv_wait(&bip->bi_linkwait, &inst_lock); 1272 mutex_exit(&inst_lock); 1273 if (bip->bi_trilldata != NULL) 1274 trill_brdstr_fn(bip->bi_trilldata, bip); 1275 1276 bridge_unref(bip); 1277 } 1278 1279 /* 1280 * This is called once by the TRILL module when it starts up. It just sets the 1281 * global TRILL callback function pointers -- data transmit/receive and bridge 1282 * and link destroy notification. There's only one TRILL module, so only one 1283 * registration is needed. 1284 * 1285 * TRILL should call this function with NULL pointers before unloading. It 1286 * must not do so before dropping all references to bridges and links. We 1287 * assert that this is true on debug builds. 1288 */ 1289 void 1290 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn, 1291 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn) 1292 { 1293 #ifdef DEBUG 1294 if (recv_fn == NULL && trill_recv_fn != NULL) { 1295 bridge_inst_t *bip; 1296 bridge_link_t *blp; 1297 1298 mutex_enter(&inst_lock); 1299 for (bip = list_head(&inst_list); bip != NULL; 1300 bip = list_next(&inst_list, bip)) { 1301 ASSERT(bip->bi_trilldata == NULL); 1302 rw_enter(&bip->bi_rwlock, RW_READER); 1303 for (blp = list_head(&bip->bi_links); blp != NULL; 1304 blp = list_next(&bip->bi_links, blp)) { 1305 ASSERT(blp->bl_trilldata == NULL); 1306 } 1307 rw_exit(&bip->bi_rwlock); 1308 } 1309 mutex_exit(&inst_lock); 1310 } 1311 #endif 1312 trill_recv_fn = recv_fn; 1313 trill_encap_fn = encap_fn; 1314 trill_brdstr_fn = brdstr_fn; 1315 trill_lndstr_fn = lndstr_fn; 1316 } 1317 1318 /* 1319 * This registers the TRILL instance pointer with a bridge. Before this 1320 * pointer is set, the forwarding, TRILL receive, and bridge destructor 1321 * functions won't be called. 1322 * 1323 * TRILL holds a reference on a bridge with this call. It must free the 1324 * reference by calling the unregister function below. 1325 */ 1326 bridge_inst_t * 1327 bridge_trill_brref(const char *bname, void *ptr) 1328 { 1329 char bridge[MAXLINKNAMELEN]; 1330 bridge_inst_t *bip; 1331 1332 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname); 1333 bip = bridge_find_name(bridge); 1334 if (bip != NULL) { 1335 ASSERT(bip->bi_trilldata == NULL && ptr != NULL); 1336 bip->bi_trilldata = ptr; 1337 } 1338 return (bip); 1339 } 1340 1341 void 1342 bridge_trill_brunref(bridge_inst_t *bip) 1343 { 1344 ASSERT(bip->bi_trilldata != NULL); 1345 bip->bi_trilldata = NULL; 1346 bridge_unref(bip); 1347 } 1348 1349 /* 1350 * TRILL calls this function when referencing a particular link on a bridge. 1351 * 1352 * It holds a reference on the link, so TRILL must clear out the reference when 1353 * it's done with the link (on unbinding). 1354 */ 1355 bridge_link_t * 1356 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr) 1357 { 1358 bridge_link_t *blp; 1359 1360 ASSERT(ptr != NULL); 1361 rw_enter(&bip->bi_rwlock, RW_READER); 1362 for (blp = list_head(&bip->bi_links); blp != NULL; 1363 blp = list_next(&bip->bi_links, blp)) { 1364 if (!(blp->bl_flags & BLF_DELETED) && 1365 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) { 1366 blp->bl_trilldata = ptr; 1367 blp->bl_flags &= ~BLF_TRILLACTIVE; 1368 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs)); 1369 atomic_inc_uint(&blp->bl_refs); 1370 break; 1371 } 1372 } 1373 rw_exit(&bip->bi_rwlock); 1374 return (blp); 1375 } 1376 1377 void 1378 bridge_trill_lnunref(bridge_link_t *blp) 1379 { 1380 mutex_enter(&blp->bl_trilllock); 1381 ASSERT(blp->bl_trilldata != NULL); 1382 blp->bl_trilldata = NULL; 1383 blp->bl_flags &= ~BLF_TRILLACTIVE; 1384 while (blp->bl_trillthreads > 0) 1385 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock); 1386 mutex_exit(&blp->bl_trilllock); 1387 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 1388 link_unref(blp); 1389 } 1390 1391 /* 1392 * This periodic timer performs three functions: 1393 * 1. It scans the list of learned forwarding entries, and removes ones that 1394 * haven't been heard from in a while. The time limit is backed down if 1395 * we're above the configured table limit. 1396 * 2. It walks the links and decays away the bl_learns counter. 1397 * 3. It scans the observability node entries looking for ones that can be 1398 * freed up. 1399 */ 1400 /* ARGSUSED */ 1401 static void 1402 bridge_timer(void *arg) 1403 { 1404 bridge_inst_t *bip; 1405 bridge_fwd_t *bfp, *bfnext; 1406 bridge_mac_t *bmp, *bmnext; 1407 bridge_link_t *blp; 1408 int err; 1409 datalink_id_t tmpid; 1410 avl_tree_t fwd_scavenge; 1411 clock_t age_limit; 1412 uint32_t ldecay; 1413 1414 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1415 offsetof(bridge_fwd_t, bf_node)); 1416 mutex_enter(&inst_lock); 1417 for (bip = list_head(&inst_list); bip != NULL; 1418 bip = list_next(&inst_list, bip)) { 1419 if (bip->bi_flags & BIF_SHUTDOWN) 1420 continue; 1421 rw_enter(&bip->bi_rwlock, RW_WRITER); 1422 /* compute scaled maximum age based on table limit */ 1423 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax) 1424 bip->bi_tshift++; 1425 else 1426 bip->bi_tshift = 0; 1427 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) { 1428 if (bip->bi_tshift != 0) 1429 bip->bi_tshift--; 1430 age_limit = 1; 1431 } 1432 bfnext = avl_first(&bip->bi_fwd); 1433 while ((bfp = bfnext) != NULL) { 1434 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1435 if (!(bfp->bf_flags & BFF_LOCALADDR) && 1436 (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) { 1437 ASSERT(bfp->bf_flags & BFF_INTREE); 1438 avl_remove(&bip->bi_fwd, bfp); 1439 bfp->bf_flags &= ~BFF_INTREE; 1440 avl_add(&fwd_scavenge, bfp); 1441 } 1442 } 1443 for (blp = list_head(&bip->bi_links); blp != NULL; 1444 blp = list_next(&bip->bi_links, blp)) { 1445 ldecay = mac_get_ldecay(blp->bl_mh); 1446 if (ldecay >= blp->bl_learns) 1447 blp->bl_learns = 0; 1448 else 1449 atomic_add_int(&blp->bl_learns, -(int)ldecay); 1450 } 1451 rw_exit(&bip->bi_rwlock); 1452 bfnext = avl_first(&fwd_scavenge); 1453 while ((bfp = bfnext) != NULL) { 1454 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1455 avl_remove(&fwd_scavenge, bfp); 1456 KIINCR(bki_expire); 1457 fwd_unref(bfp); /* drop tree reference */ 1458 } 1459 } 1460 mutex_exit(&inst_lock); 1461 avl_destroy(&fwd_scavenge); 1462 1463 /* 1464 * Scan the bridge_mac_t entries and try to free up the ones that are 1465 * no longer active. This must be done by polling, as neither DLS nor 1466 * MAC provides a driver any sort of positive control over clients. 1467 */ 1468 rw_enter(&bmac_rwlock, RW_WRITER); 1469 bmnext = list_head(&bmac_list); 1470 while ((bmp = bmnext) != NULL) { 1471 bmnext = list_next(&bmac_list, bmp); 1472 1473 /* ignore active bridges */ 1474 if (bmp->bm_inst != NULL) 1475 continue; 1476 1477 if (bmp->bm_flags & BMF_DLS) { 1478 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE); 1479 ASSERT(err == 0 || err == EBUSY); 1480 if (err == 0) 1481 bmp->bm_flags &= ~BMF_DLS; 1482 } 1483 1484 if (!(bmp->bm_flags & BMF_DLS)) { 1485 err = mac_unregister(bmp->bm_mh); 1486 ASSERT(err == 0 || err == EBUSY); 1487 if (err == 0) { 1488 list_remove(&bmac_list, bmp); 1489 kmem_free(bmp, sizeof (*bmp)); 1490 } 1491 } 1492 } 1493 if (list_is_empty(&bmac_list)) { 1494 bridge_timerid = 0; 1495 } else { 1496 bridge_timerid = timeout(bridge_timer, NULL, 1497 bridge_scan_interval); 1498 } 1499 rw_exit(&bmac_rwlock); 1500 } 1501 1502 static int 1503 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1504 { 1505 bridge_stream_t *bsp; 1506 1507 if (rq->q_ptr != NULL) 1508 return (0); 1509 1510 if (sflag & MODOPEN) 1511 return (EINVAL); 1512 1513 /* 1514 * Check the minor node number being opened. This tells us which 1515 * bridge instance the user wants. 1516 */ 1517 if (getminor(*devp) != 0) { 1518 /* 1519 * This is a regular DLPI stream for snoop or the like. 1520 * Redirect it through DLD. 1521 */ 1522 rq->q_qinfo = &bridge_dld_rinit; 1523 OTHERQ(rq)->q_qinfo = &bridge_dld_winit; 1524 return (dld_open(rq, devp, oflag, sflag, credp)); 1525 } else { 1526 /* 1527 * Allocate the bridge control stream structure. 1528 */ 1529 if ((bsp = stream_alloc()) == NULL) 1530 return (ENOSR); 1531 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp; 1532 bsp->bs_wq = WR(rq); 1533 *devp = makedevice(getmajor(*devp), bsp->bs_minor); 1534 qprocson(rq); 1535 return (0); 1536 } 1537 } 1538 1539 /* 1540 * This is used only for bridge control streams. DLPI goes through dld 1541 * instead. 1542 */ 1543 static int 1544 bridge_close(queue_t *rq) 1545 { 1546 bridge_stream_t *bsp = rq->q_ptr; 1547 bridge_inst_t *bip; 1548 1549 /* 1550 * Wait for any stray taskq (add/delete link) entries related to this 1551 * stream to leave the system. 1552 */ 1553 mutex_enter(&stream_ref_lock); 1554 while (bsp->bs_taskq_cnt != 0) 1555 cv_wait(&stream_ref_cv, &stream_ref_lock); 1556 mutex_exit(&stream_ref_lock); 1557 1558 qprocsoff(rq); 1559 if ((bip = bsp->bs_inst) != NULL) 1560 shutdown_inst(bip); 1561 rq->q_ptr = WR(rq)->q_ptr = NULL; 1562 stream_free(bsp); 1563 if (bip != NULL) 1564 bridge_unref(bip); 1565 1566 return (0); 1567 } 1568 1569 static void 1570 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick, 1571 uint16_t vlanid) 1572 { 1573 bridge_inst_t *bip = blp->bl_inst; 1574 bridge_fwd_t *bfp, *bfpnew; 1575 int i; 1576 boolean_t replaced = B_FALSE; 1577 1578 /* Ignore multi-destination address used as source; it's nonsense. */ 1579 if (*saddr & 1) 1580 return; 1581 1582 /* 1583 * If the source is known, then check whether it belongs on this link. 1584 * If not, and this isn't a fixed local address, then we've detected a 1585 * move. If it's not known, learn it. 1586 */ 1587 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) { 1588 /* 1589 * If the packet has a fixed local source address, then there's 1590 * nothing we can learn. We must quit. If this was a received 1591 * packet, then the sender has stolen our address, but there's 1592 * nothing we can do. If it's a transmitted packet, then 1593 * that's the normal case. 1594 */ 1595 if (bfp->bf_flags & BFF_LOCALADDR) { 1596 fwd_unref(bfp); 1597 return; 1598 } 1599 1600 /* 1601 * Check if the link (and TRILL sender, if any) being used is 1602 * among the ones registered for this address. If so, then 1603 * this is information that we already know. 1604 */ 1605 if (bfp->bf_trill_nick == ingress_nick) { 1606 for (i = 0; i < bfp->bf_nlinks; i++) { 1607 if (bfp->bf_links[i] == blp) { 1608 bfp->bf_lastheard = ddi_get_lbolt(); 1609 fwd_unref(bfp); 1610 return; 1611 } 1612 } 1613 } 1614 } 1615 1616 /* 1617 * Note that we intentionally "unlearn" things that appear to be under 1618 * attack on this link. The forwarding cache is a negative thing for 1619 * security -- it disables reachability as a performance optimization 1620 * -- so leaving out entries optimizes for success and defends against 1621 * the attack. Thus, the bare increment without a check in the delete 1622 * code above is right. (And it's ok if we skid over the limit a 1623 * little, so there's no syncronization needed on the test.) 1624 */ 1625 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) { 1626 if (bfp != NULL) { 1627 if (bfp->bf_vcnt == 0) 1628 fwd_delete(bfp); 1629 fwd_unref(bfp); 1630 } 1631 return; 1632 } 1633 1634 atomic_inc_uint(&blp->bl_learns); 1635 1636 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) { 1637 if (bfp != NULL) 1638 fwd_unref(bfp); 1639 return; 1640 } 1641 KIINCR(bki_count); 1642 1643 if (bfp != NULL) { 1644 /* 1645 * If this is a new destination for the same VLAN, then delete 1646 * so that we can update. If it's a different VLAN, then we're 1647 * not going to delete the original. Split off instead into an 1648 * IVL entry. 1649 */ 1650 if (bfp->bf_vlanid == vlanid) { 1651 /* save the count of IVL duplicates */ 1652 bfpnew->bf_vcnt = bfp->bf_vcnt; 1653 1654 /* entry deletes count as learning events */ 1655 atomic_inc_uint(&blp->bl_learns); 1656 1657 /* destroy and create anew; node moved */ 1658 fwd_delete(bfp); 1659 replaced = B_TRUE; 1660 KIINCR(bki_moved); 1661 } else { 1662 bfp->bf_vcnt++; 1663 bfpnew->bf_flags |= BFF_VLANLOCAL; 1664 } 1665 fwd_unref(bfp); 1666 } 1667 bfpnew->bf_links[0] = blp; 1668 bfpnew->bf_nlinks = 1; 1669 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1670 if (!fwd_insert(bip, bfpnew)) 1671 fwd_free(bfpnew); 1672 else if (!replaced) 1673 KIINCR(bki_source); 1674 } 1675 1676 /* 1677 * Process the VLAN headers for output on a given link. There are several 1678 * cases (noting that we don't map VLANs): 1679 * 1. The input packet is good as it is; either 1680 * a. It has no tag, and output has same PVID 1681 * b. It has a non-zero priority-only tag for PVID, and b_band is same 1682 * c. It has a tag with VLAN different from PVID, and b_band is same 1683 * 2. The tag must change: non-zero b_band is different from tag priority 1684 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero) 1685 * 4. The packet has no tag and needs one: 1686 * a. VLAN ID same as PVID, but b_band is non-zero 1687 * b. VLAN ID different from PVID 1688 * We exclude case 1 first, then modify the packet. Note that output packets 1689 * get a priority set by the mblk, not by the header, because QoS in bridging 1690 * requires priority recalculation at each node. 1691 * 1692 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present. 1693 */ 1694 static mblk_t * 1695 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid) 1696 { 1697 boolean_t source_has_tag = (tci != 0xFFFF); 1698 mblk_t *mpcopy; 1699 size_t mlen, minlen; 1700 struct ether_vlan_header *evh; 1701 int pri; 1702 1703 /* This helps centralize error handling in the caller. */ 1704 if (mp == NULL) 1705 return (mp); 1706 1707 /* No forwarded packet can have hardware checksum enabled */ 1708 DB_CKSUMFLAGS(mp) = 0; 1709 1710 /* Get the no-modification cases out of the way first */ 1711 if (!source_has_tag && vlanid == pvid) /* 1a */ 1712 return (mp); 1713 1714 pri = VLAN_PRI(tci); 1715 if (source_has_tag && mp->b_band == pri) { 1716 if (vlanid != pvid) /* 1c */ 1717 return (mp); 1718 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */ 1719 return (mp); 1720 } 1721 1722 /* 1723 * We now know that we must modify the packet. Prepare for that. Note 1724 * that if a tag is present, the caller has already done a pullup for 1725 * the VLAN header, so we're good to go. 1726 */ 1727 if (MBLKL(mp) < sizeof (struct ether_header)) { 1728 mpcopy = msgpullup(mp, sizeof (struct ether_header)); 1729 if (mpcopy == NULL) { 1730 freemsg(mp); 1731 return (NULL); 1732 } 1733 mp = mpcopy; 1734 } 1735 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) || 1736 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) { 1737 minlen = mlen = MBLKL(mp); 1738 if (!source_has_tag) 1739 minlen += VLAN_INCR; 1740 ASSERT(minlen >= sizeof (struct ether_vlan_header)); 1741 /* 1742 * We're willing to copy some data to avoid fragmentation, but 1743 * not a lot. 1744 */ 1745 if (minlen > 256) 1746 minlen = sizeof (struct ether_vlan_header); 1747 mpcopy = allocb(minlen, BPRI_MED); 1748 if (mpcopy == NULL) { 1749 freemsg(mp); 1750 return (NULL); 1751 } 1752 if (mlen <= minlen) { 1753 /* We toss the first mblk when we can. */ 1754 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen); 1755 mpcopy->b_wptr += mlen; 1756 mpcopy->b_cont = mp->b_cont; 1757 freeb(mp); 1758 } else { 1759 /* If not, then just copy what we need */ 1760 if (!source_has_tag) 1761 minlen = sizeof (struct ether_header); 1762 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen); 1763 mpcopy->b_wptr += minlen; 1764 mpcopy->b_cont = mp; 1765 mp->b_rptr += minlen; 1766 } 1767 mp = mpcopy; 1768 } 1769 1770 /* LINTED: pointer alignment */ 1771 evh = (struct ether_vlan_header *)mp->b_rptr; 1772 if (source_has_tag) { 1773 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */ 1774 evh->ether_tpid = evh->ether_type; 1775 mlen = MBLKL(mp); 1776 if (mlen > sizeof (struct ether_vlan_header)) 1777 ovbcopy(mp->b_rptr + 1778 sizeof (struct ether_vlan_header), 1779 mp->b_rptr + sizeof (struct ether_header), 1780 mlen - sizeof (struct ether_vlan_header)); 1781 mp->b_wptr -= VLAN_INCR; 1782 } else { /* 2 */ 1783 if (vlanid == pvid) 1784 vlanid = VLAN_ID_NONE; 1785 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1786 evh->ether_tci = htons(tci); 1787 } 1788 } else { 1789 /* case 4: no header present, but one is needed */ 1790 mlen = MBLKL(mp); 1791 if (mlen > sizeof (struct ether_header)) 1792 ovbcopy(mp->b_rptr + sizeof (struct ether_header), 1793 mp->b_rptr + sizeof (struct ether_vlan_header), 1794 mlen - sizeof (struct ether_header)); 1795 mp->b_wptr += VLAN_INCR; 1796 ASSERT(mp->b_wptr <= DB_LIM(mp)); 1797 if (vlanid == pvid) 1798 vlanid = VLAN_ID_NONE; 1799 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1800 evh->ether_type = evh->ether_tpid; 1801 evh->ether_tpid = htons(ETHERTYPE_VLAN); 1802 evh->ether_tci = htons(tci); 1803 } 1804 return (mp); 1805 } 1806 1807 /* Record VLAN information and strip header if requested . */ 1808 static void 1809 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr) 1810 { 1811 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 1812 struct ether_vlan_header *evhp; 1813 uint16_t ether_type; 1814 1815 /* LINTED: alignment */ 1816 evhp = (struct ether_vlan_header *)mp->b_rptr; 1817 hdr_info->mhi_istagged = B_TRUE; 1818 hdr_info->mhi_tci = ntohs(evhp->ether_tci); 1819 if (striphdr) { 1820 /* 1821 * For VLAN tagged frames update the ether_type 1822 * in hdr_info before stripping the header. 1823 */ 1824 ether_type = ntohs(evhp->ether_type); 1825 hdr_info->mhi_origsap = ether_type; 1826 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ? 1827 ether_type : DLS_SAP_LLC; 1828 mp->b_rptr = (uchar_t *)(evhp + 1); 1829 } 1830 } else { 1831 hdr_info->mhi_istagged = B_FALSE; 1832 hdr_info->mhi_tci = VLAN_ID_NONE; 1833 if (striphdr) 1834 mp->b_rptr += sizeof (struct ether_header); 1835 } 1836 } 1837 1838 /* 1839 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID. 1840 */ 1841 static boolean_t 1842 bridge_can_send(bridge_link_t *blp, uint16_t vlanid) 1843 { 1844 ASSERT(vlanid != VLAN_ID_NONE); 1845 if (blp->bl_flags & BLF_DELETED) 1846 return (B_FALSE); 1847 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING) 1848 return (B_FALSE); 1849 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid)); 1850 } 1851 1852 /* 1853 * This function scans the bridge forwarding tables in order to forward a given 1854 * packet. If the packet either doesn't need forwarding (the current link is 1855 * correct) or the current link needs a copy as well, then the packet is 1856 * returned to the caller. 1857 * 1858 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a 1859 * TRILL tunnel. If the destination points there, then drop instead. 1860 */ 1861 static mblk_t * 1862 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 1863 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit) 1864 { 1865 mblk_t *mpsend, *mpcopy; 1866 bridge_inst_t *bip = blp->bl_inst; 1867 bridge_link_t *blpsend, *blpnext; 1868 bridge_fwd_t *bfp; 1869 uint_t i; 1870 boolean_t selfseen = B_FALSE; 1871 void *tdp; 1872 const uint8_t *daddr = hdr_info->mhi_daddr; 1873 1874 /* 1875 * Check for the IEEE "reserved" multicast addresses. Messages sent to 1876 * these addresses are used for link-local control (STP and pause), and 1877 * are never forwarded or redirected. 1878 */ 1879 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 && 1880 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) { 1881 if (from_trill) { 1882 freemsg(mp); 1883 mp = NULL; 1884 } 1885 return (mp); 1886 } 1887 1888 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) { 1889 1890 /* 1891 * If trill indicates a destination for this node, then it's 1892 * clearly not intended for local delivery. We must tell TRILL 1893 * to encapsulate, as long as we didn't just decapsulate it. 1894 */ 1895 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) { 1896 /* 1897 * Error case: can't reencapsulate if the protocols are 1898 * working correctly. 1899 */ 1900 if (from_trill) { 1901 freemsg(mp); 1902 return (NULL); 1903 } 1904 mutex_enter(&blp->bl_trilllock); 1905 if ((tdp = blp->bl_trilldata) != NULL) { 1906 blp->bl_trillthreads++; 1907 mutex_exit(&blp->bl_trilllock); 1908 update_header(mp, hdr_info, B_FALSE); 1909 if (is_xmit) 1910 mp = mac_fix_cksum(mp); 1911 /* all trill data frames have Inner.VLAN */ 1912 mp = reform_vlan_header(mp, vlanid, tci, 0); 1913 if (mp == NULL) { 1914 KIINCR(bki_drops); 1915 fwd_unref(bfp); 1916 return (NULL); 1917 } 1918 trill_encap_fn(tdp, blp, hdr_info, mp, 1919 bfp->bf_trill_nick); 1920 mutex_enter(&blp->bl_trilllock); 1921 if (--blp->bl_trillthreads == 0 && 1922 blp->bl_trilldata == NULL) 1923 cv_broadcast(&blp->bl_trillwait); 1924 } 1925 mutex_exit(&blp->bl_trilllock); 1926 1927 /* if TRILL has been disabled, then kill this stray */ 1928 if (tdp == NULL) { 1929 freemsg(mp); 1930 fwd_delete(bfp); 1931 } 1932 fwd_unref(bfp); 1933 return (NULL); 1934 } 1935 1936 /* find first link we can send on */ 1937 for (i = 0; i < bfp->bf_nlinks; i++) { 1938 blpsend = bfp->bf_links[i]; 1939 if (blpsend == blp) 1940 selfseen = B_TRUE; 1941 else if (bridge_can_send(blpsend, vlanid)) 1942 break; 1943 } 1944 1945 while (i < bfp->bf_nlinks) { 1946 blpsend = bfp->bf_links[i]; 1947 for (i++; i < bfp->bf_nlinks; i++) { 1948 blpnext = bfp->bf_links[i]; 1949 if (blpnext == blp) 1950 selfseen = B_TRUE; 1951 else if (bridge_can_send(blpnext, vlanid)) 1952 break; 1953 } 1954 if (i == bfp->bf_nlinks && !selfseen) { 1955 mpsend = mp; 1956 mp = NULL; 1957 } else { 1958 mpsend = copymsg(mp); 1959 } 1960 1961 if (!from_trill && is_xmit) 1962 mpsend = mac_fix_cksum(mpsend); 1963 1964 mpsend = reform_vlan_header(mpsend, vlanid, tci, 1965 blpsend->bl_pvid); 1966 if (mpsend == NULL) { 1967 KIINCR(bki_drops); 1968 continue; 1969 } 1970 1971 KIINCR(bki_forwards); 1972 /* 1973 * No need to bump up the link reference count, as 1974 * the forwarding entry itself holds a reference to 1975 * the link. 1976 */ 1977 if (bfp->bf_flags & BFF_LOCALADDR) { 1978 mac_rx_common(blpsend->bl_mh, NULL, mpsend); 1979 } else { 1980 KLPINCR(blpsend, bkl_xmit); 1981 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, 1982 mpsend); 1983 freemsg(mpsend); 1984 } 1985 } 1986 /* 1987 * Handle a special case: if we're transmitting to the original 1988 * link, then check whether the localaddr flag is set. If it 1989 * is, then receive instead. This doesn't happen with ordinary 1990 * bridging, but does happen often with TRILL decapsulation. 1991 */ 1992 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) { 1993 mac_rx_common(blp->bl_mh, NULL, mp); 1994 mp = NULL; 1995 } 1996 fwd_unref(bfp); 1997 } else { 1998 /* 1999 * TRILL has two cases to handle. If the packet is off the 2000 * wire (not from TRILL), then we need to send up into the 2001 * TRILL module to have the distribution tree computed. If the 2002 * packet is from TRILL (decapsulated), then we're part of the 2003 * distribution tree, and we need to copy the packet on member 2004 * interfaces. 2005 * 2006 * Thus, the from TRILL case is identical to the STP case. 2007 */ 2008 if (!from_trill && blp->bl_trilldata != NULL) { 2009 mutex_enter(&blp->bl_trilllock); 2010 if ((tdp = blp->bl_trilldata) != NULL) { 2011 blp->bl_trillthreads++; 2012 mutex_exit(&blp->bl_trilllock); 2013 if ((mpsend = copymsg(mp)) != NULL) { 2014 update_header(mpsend, 2015 hdr_info, B_FALSE); 2016 /* 2017 * all trill data frames have 2018 * Inner.VLAN 2019 */ 2020 mpsend = reform_vlan_header(mpsend, 2021 vlanid, tci, 0); 2022 if (mpsend == NULL) { 2023 KIINCR(bki_drops); 2024 } else { 2025 trill_encap_fn(tdp, blp, 2026 hdr_info, mpsend, 2027 RBRIDGE_NICKNAME_NONE); 2028 } 2029 } 2030 mutex_enter(&blp->bl_trilllock); 2031 if (--blp->bl_trillthreads == 0 && 2032 blp->bl_trilldata == NULL) 2033 cv_broadcast(&blp->bl_trillwait); 2034 } 2035 mutex_exit(&blp->bl_trilllock); 2036 } 2037 2038 /* 2039 * This is an unknown destination, so flood. 2040 */ 2041 rw_enter(&bip->bi_rwlock, RW_READER); 2042 for (blpnext = list_head(&bip->bi_links); blpnext != NULL; 2043 blpnext = list_next(&bip->bi_links, blpnext)) { 2044 if (blpnext == blp) 2045 selfseen = B_TRUE; 2046 else if (bridge_can_send(blpnext, vlanid)) 2047 break; 2048 } 2049 if (blpnext != NULL) 2050 atomic_inc_uint(&blpnext->bl_refs); 2051 rw_exit(&bip->bi_rwlock); 2052 while ((blpsend = blpnext) != NULL) { 2053 rw_enter(&bip->bi_rwlock, RW_READER); 2054 for (blpnext = list_next(&bip->bi_links, blpsend); 2055 blpnext != NULL; 2056 blpnext = list_next(&bip->bi_links, blpnext)) { 2057 if (blpnext == blp) 2058 selfseen = B_TRUE; 2059 else if (bridge_can_send(blpnext, vlanid)) 2060 break; 2061 } 2062 if (blpnext != NULL) 2063 atomic_inc_uint(&blpnext->bl_refs); 2064 rw_exit(&bip->bi_rwlock); 2065 if (blpnext == NULL && !selfseen) { 2066 mpsend = mp; 2067 mp = NULL; 2068 } else { 2069 mpsend = copymsg(mp); 2070 } 2071 2072 if (!from_trill && is_xmit) 2073 mpsend = mac_fix_cksum(mpsend); 2074 2075 mpsend = reform_vlan_header(mpsend, vlanid, tci, 2076 blpsend->bl_pvid); 2077 if (mpsend == NULL) { 2078 KIINCR(bki_drops); 2079 continue; 2080 } 2081 2082 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST) 2083 KIINCR(bki_unknown); 2084 else 2085 KIINCR(bki_mbcast); 2086 KLPINCR(blpsend, bkl_xmit); 2087 if ((mpcopy = copymsg(mpsend)) != NULL) 2088 mac_rx_common(blpsend->bl_mh, NULL, mpcopy); 2089 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend); 2090 freemsg(mpsend); 2091 link_unref(blpsend); 2092 } 2093 } 2094 2095 /* 2096 * At this point, if np is non-NULL, it means that the caller needs to 2097 * continue on the selected link. 2098 */ 2099 return (mp); 2100 } 2101 2102 /* 2103 * Extract and validate the VLAN information for a given packet. This checks 2104 * conformance with the rules for use of the PVID on the link, and for the 2105 * allowed (configured) VLAN set. 2106 * 2107 * Returns B_TRUE if the packet passes, B_FALSE if it fails. 2108 */ 2109 static boolean_t 2110 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 2111 uint16_t *vlanidp, uint16_t *tcip) 2112 { 2113 uint16_t tci, vlanid; 2114 2115 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 2116 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci); 2117 ptrdiff_t mlen; 2118 2119 /* 2120 * Extract the VLAN ID information, regardless of alignment, 2121 * and without a pullup. This isn't attractive, but we do this 2122 * to avoid having to deal with the pointers stashed in 2123 * hdr_info moving around or having the caller deal with a new 2124 * mblk_t pointer. 2125 */ 2126 while (mp != NULL) { 2127 mlen = MBLKL(mp); 2128 if (mlen > tpos && mlen > 0) 2129 break; 2130 tpos -= mlen; 2131 mp = mp->b_cont; 2132 } 2133 if (mp == NULL) 2134 return (B_FALSE); 2135 tci = mp->b_rptr[tpos] << 8; 2136 if (++tpos >= mlen) { 2137 do { 2138 mp = mp->b_cont; 2139 } while (mp != NULL && MBLKL(mp) == 0); 2140 if (mp == NULL) 2141 return (B_FALSE); 2142 tpos = 0; 2143 } 2144 tci |= mp->b_rptr[tpos]; 2145 2146 vlanid = VLAN_ID(tci); 2147 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX) 2148 return (B_FALSE); 2149 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid) 2150 goto input_no_vlan; 2151 if (!BRIDGE_VLAN_ISSET(blp, vlanid)) 2152 return (B_FALSE); 2153 } else { 2154 tci = 0xFFFF; 2155 input_no_vlan: 2156 /* 2157 * If PVID is set to zero, then untagged traffic is not 2158 * supported here. Do not learn or forward. 2159 */ 2160 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE) 2161 return (B_FALSE); 2162 } 2163 2164 *tcip = tci; 2165 *vlanidp = vlanid; 2166 return (B_TRUE); 2167 } 2168 2169 /* 2170 * Handle MAC notifications. 2171 */ 2172 static void 2173 bridge_notify_cb(void *arg, mac_notify_type_t note_type) 2174 { 2175 bridge_link_t *blp = arg; 2176 2177 switch (note_type) { 2178 case MAC_NOTE_UNICST: 2179 bridge_new_unicst(blp); 2180 break; 2181 2182 case MAC_NOTE_SDU_SIZE: { 2183 uint_t maxsdu; 2184 bridge_inst_t *bip = blp->bl_inst; 2185 bridge_mac_t *bmp = bip->bi_mac; 2186 boolean_t notify = B_FALSE; 2187 mblk_t *mlist = NULL; 2188 2189 mac_sdu_get(blp->bl_mh, NULL, &maxsdu); 2190 rw_enter(&bip->bi_rwlock, RW_READER); 2191 if (list_prev(&bip->bi_links, blp) == NULL && 2192 list_next(&bip->bi_links, blp) == NULL) { 2193 notify = (maxsdu != bmp->bm_maxsdu); 2194 bmp->bm_maxsdu = maxsdu; 2195 } 2196 blp->bl_maxsdu = maxsdu; 2197 if (maxsdu != bmp->bm_maxsdu) 2198 link_sdu_fail(blp, B_TRUE, &mlist); 2199 else if (notify) 2200 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2201 rw_exit(&bip->bi_rwlock); 2202 send_up_messages(bip, mlist); 2203 break; 2204 } 2205 } 2206 } 2207 2208 /* 2209 * This is called by the MAC layer. As with the transmit side, we're right in 2210 * the data path for all I/O on this port, so if we don't need to forward this 2211 * packet anywhere, we have to send it upwards via mac_rx_common. 2212 */ 2213 static void 2214 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext) 2215 { 2216 mblk_t *mp, *mpcopy; 2217 bridge_link_t *blp = (bridge_link_t *)mh; 2218 bridge_inst_t *bip = blp->bl_inst; 2219 bridge_mac_t *bmp = bip->bi_mac; 2220 mac_header_info_t hdr_info; 2221 uint16_t vlanid, tci; 2222 boolean_t trillmode = B_FALSE; 2223 2224 KIINCR(bki_recv); 2225 KLINCR(bkl_recv); 2226 2227 /* 2228 * Regardless of state, check for inbound TRILL packets when TRILL is 2229 * active. These are pulled out of band and sent for TRILL handling. 2230 */ 2231 if (blp->bl_trilldata != NULL) { 2232 void *tdp; 2233 mblk_t *newhead; 2234 mblk_t *tail = NULL; 2235 2236 mutex_enter(&blp->bl_trilllock); 2237 if ((tdp = blp->bl_trilldata) != NULL) { 2238 blp->bl_trillthreads++; 2239 mutex_exit(&blp->bl_trilllock); 2240 trillmode = B_TRUE; 2241 newhead = mpnext; 2242 while ((mp = mpnext) != NULL) { 2243 boolean_t raw_isis, bridge_group; 2244 2245 mpnext = mp->b_next; 2246 2247 /* 2248 * If the header isn't readable, then leave on 2249 * the list and continue. 2250 */ 2251 if (mac_header_info(blp->bl_mh, mp, 2252 &hdr_info) != 0) { 2253 tail = mp; 2254 continue; 2255 } 2256 2257 /* 2258 * The TRILL document specifies that, on 2259 * Ethernet alone, IS-IS packets arrive with 2260 * LLC rather than Ethertype, and using a 2261 * specific destination address. We must check 2262 * for that here. Also, we need to give BPDUs 2263 * to TRILL for processing. 2264 */ 2265 raw_isis = bridge_group = B_FALSE; 2266 if (hdr_info.mhi_dsttype == 2267 MAC_ADDRTYPE_MULTICAST) { 2268 if (memcmp(hdr_info.mhi_daddr, 2269 all_isis_rbridges, ETHERADDRL) == 0) 2270 raw_isis = B_TRUE; 2271 else if (memcmp(hdr_info.mhi_daddr, 2272 bridge_group_address, ETHERADDRL) == 2273 0) 2274 bridge_group = B_TRUE; 2275 } 2276 if (!raw_isis && !bridge_group && 2277 hdr_info.mhi_bindsap != ETHERTYPE_TRILL && 2278 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN || 2279 /* LINTED: alignment */ 2280 ((struct ether_vlan_header *)mp->b_rptr)-> 2281 ether_type != htons(ETHERTYPE_TRILL))) { 2282 tail = mp; 2283 continue; 2284 } 2285 2286 /* 2287 * We've got TRILL input. Remove from the list 2288 * and send up through the TRILL module. (Send 2289 * a copy through promiscuous receive just to 2290 * support snooping on TRILL. Order isn't 2291 * preserved strictly, but that doesn't matter 2292 * here.) 2293 */ 2294 if (tail != NULL) 2295 tail->b_next = mpnext; 2296 mp->b_next = NULL; 2297 if (mp == newhead) 2298 newhead = mpnext; 2299 mac_trill_snoop(blp->bl_mh, mp); 2300 update_header(mp, &hdr_info, B_TRUE); 2301 /* 2302 * On raw IS-IS and BPDU frames, we have to 2303 * make sure that the length is trimmed 2304 * properly. We use origsap in order to cope 2305 * with jumbograms for IS-IS. (Regular mac 2306 * can't.) 2307 */ 2308 if (raw_isis || bridge_group) { 2309 size_t msglen = msgdsize(mp); 2310 2311 if (msglen > hdr_info.mhi_origsap) { 2312 (void) adjmsg(mp, 2313 hdr_info.mhi_origsap - 2314 msglen); 2315 } else if (msglen < 2316 hdr_info.mhi_origsap) { 2317 freemsg(mp); 2318 continue; 2319 } 2320 } 2321 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info); 2322 } 2323 mpnext = newhead; 2324 mutex_enter(&blp->bl_trilllock); 2325 if (--blp->bl_trillthreads == 0 && 2326 blp->bl_trilldata == NULL) 2327 cv_broadcast(&blp->bl_trillwait); 2328 } 2329 mutex_exit(&blp->bl_trilllock); 2330 if (mpnext == NULL) 2331 return; 2332 } 2333 2334 /* 2335 * If this is a TRILL RBridge, then just check whether this link is 2336 * used at all for forwarding. If not, then we're done. 2337 */ 2338 if (trillmode) { 2339 if (!(blp->bl_flags & BLF_TRILLACTIVE) || 2340 (blp->bl_flags & BLF_SDUFAIL)) { 2341 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2342 return; 2343 } 2344 } else { 2345 /* 2346 * For regular (STP) bridges, if we're in blocking or listening 2347 * state, then do nothing. We don't learn or forward until 2348 * told to do so. 2349 */ 2350 if (blp->bl_state == BLS_BLOCKLISTEN) { 2351 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2352 return; 2353 } 2354 } 2355 2356 /* 2357 * Send a copy of the message chain up to the observability node users. 2358 * For TRILL, we must obey the VLAN AF rules, so we go packet-by- 2359 * packet. 2360 */ 2361 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2362 (bmp->bm_flags & BMF_STARTED) && 2363 (mp = copymsgchain(mpnext)) != NULL) { 2364 mac_rx(bmp->bm_mh, NULL, mp); 2365 } 2366 2367 /* 2368 * We must be in learning or forwarding state, or using TRILL on a link 2369 * with one or more VLANs active. For each packet in the list, process 2370 * the source address, and then attempt to forward. 2371 */ 2372 while ((mp = mpnext) != NULL) { 2373 mpnext = mp->b_next; 2374 mp->b_next = NULL; 2375 2376 /* 2377 * If we can't decode the header or if the header specifies a 2378 * multicast source address (impossible!), then don't bother 2379 * learning or forwarding, but go ahead and forward up the 2380 * stack for subsequent processing. 2381 */ 2382 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 || 2383 (hdr_info.mhi_saddr[0] & 1) != 0) { 2384 KIINCR(bki_drops); 2385 KLINCR(bkl_drops); 2386 mac_rx_common(blp->bl_mh, rsrc, mp); 2387 continue; 2388 } 2389 2390 /* 2391 * Extract and validate the VLAN ID for this packet. 2392 */ 2393 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2394 !BRIDGE_AF_ISSET(blp, vlanid)) { 2395 mac_rx_common(blp->bl_mh, rsrc, mp); 2396 continue; 2397 } 2398 2399 if (trillmode) { 2400 /* 2401 * Special test required by TRILL document: must 2402 * discard frames with outer address set to ESADI. 2403 */ 2404 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges, 2405 ETHERADDRL) == 0) { 2406 mac_rx_common(blp->bl_mh, rsrc, mp); 2407 continue; 2408 } 2409 2410 /* 2411 * If we're in TRILL mode, then the call above to get 2412 * the VLAN ID has also checked that we're the 2413 * appointed forwarder, so report that we're handling 2414 * this packet to any observability node users. 2415 */ 2416 if ((bmp->bm_flags & BMF_STARTED) && 2417 (mpcopy = copymsg(mp)) != NULL) 2418 mac_rx(bmp->bm_mh, NULL, mpcopy); 2419 } 2420 2421 /* 2422 * First process the source address and learn from it. For 2423 * TRILL, we learn only if we're the appointed forwarder. 2424 */ 2425 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2426 vlanid); 2427 2428 /* 2429 * Now check whether we're forwarding and look up the 2430 * destination. If we can forward, do so. 2431 */ 2432 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2433 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2434 B_FALSE, B_FALSE); 2435 } 2436 if (mp != NULL) 2437 mac_rx_common(blp->bl_mh, rsrc, mp); 2438 } 2439 } 2440 2441 2442 /* ARGSUSED */ 2443 static mblk_t * 2444 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) 2445 { 2446 bridge_link_t *blp = (bridge_link_t *)mh; 2447 bridge_inst_t *bip = blp->bl_inst; 2448 bridge_mac_t *bmp = bip->bi_mac; 2449 mac_header_info_t hdr_info; 2450 uint16_t vlanid, tci; 2451 mblk_t *mp, *mpcopy; 2452 boolean_t trillmode; 2453 2454 trillmode = blp->bl_trilldata != NULL; 2455 2456 /* 2457 * If we're using STP and we're in blocking or listening state, or if 2458 * we're using TRILL and no VLANs are active, then behave as though the 2459 * bridge isn't here at all, and send on the local link alone. 2460 */ 2461 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) || 2462 (trillmode && 2463 (!(blp->bl_flags & BLF_TRILLACTIVE) || 2464 (blp->bl_flags & BLF_SDUFAIL)))) { 2465 KIINCR(bki_sent); 2466 KLINCR(bkl_xmit); 2467 MAC_RING_TX(blp->bl_mh, rh, mpnext, mp); 2468 return (mp); 2469 } 2470 2471 /* 2472 * Send a copy of the message up to the observability node users. 2473 * TRILL needs to check on a packet-by-packet basis. 2474 */ 2475 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2476 (bmp->bm_flags & BMF_STARTED) && 2477 (mp = copymsgchain(mpnext)) != NULL) { 2478 mac_rx(bmp->bm_mh, NULL, mp); 2479 } 2480 2481 while ((mp = mpnext) != NULL) { 2482 mpnext = mp->b_next; 2483 mp->b_next = NULL; 2484 2485 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2486 freemsg(mp); 2487 continue; 2488 } 2489 2490 /* 2491 * Extract and validate the VLAN ID for this packet. 2492 */ 2493 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2494 !BRIDGE_AF_ISSET(blp, vlanid)) { 2495 freemsg(mp); 2496 continue; 2497 } 2498 2499 /* 2500 * If we're using TRILL, then we've now validated that we're 2501 * the forwarder for this VLAN, so go ahead and let 2502 * observability node users know about the packet. 2503 */ 2504 if (trillmode && (bmp->bm_flags & BMF_STARTED) && 2505 (mpcopy = copymsg(mp)) != NULL) { 2506 mac_rx(bmp->bm_mh, NULL, mpcopy); 2507 } 2508 2509 /* 2510 * We have to learn from our own transmitted packets, because 2511 * there may be a Solaris DLPI raw sender (which can specify its 2512 * own source address) using promiscuous mode for receive. The 2513 * mac layer information won't (and can't) tell us everything 2514 * we need to know. 2515 */ 2516 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2517 vlanid); 2518 2519 /* attempt forwarding */ 2520 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2521 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2522 B_FALSE, B_TRUE); 2523 } 2524 if (mp != NULL) { 2525 MAC_RING_TX(blp->bl_mh, rh, mp, mp); 2526 if (mp == NULL) { 2527 KIINCR(bki_sent); 2528 KLINCR(bkl_xmit); 2529 } 2530 } 2531 /* 2532 * If we get stuck, then stop. Don't let the user's output 2533 * packets get out of order. (More importantly: don't try to 2534 * bridge the same packet multiple times if flow control is 2535 * asserted.) 2536 */ 2537 if (mp != NULL) { 2538 mp->b_next = mpnext; 2539 break; 2540 } 2541 } 2542 return (mp); 2543 } 2544 2545 /* 2546 * This is called by TRILL when it decapsulates an packet, and we must forward 2547 * locally. On failure, we just drop. 2548 * 2549 * Note that the ingress_nick reported by TRILL must not represent this local 2550 * node. 2551 */ 2552 void 2553 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick) 2554 { 2555 mac_header_info_t hdr_info; 2556 uint16_t vlanid, tci; 2557 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2558 mblk_t *mpcopy; 2559 2560 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2561 freemsg(mp); 2562 return; 2563 } 2564 2565 /* Extract VLAN ID for this packet. */ 2566 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) { 2567 struct ether_vlan_header *evhp; 2568 2569 /* LINTED: alignment */ 2570 evhp = (struct ether_vlan_header *)mp->b_rptr; 2571 tci = ntohs(evhp->ether_tci); 2572 vlanid = VLAN_ID(tci); 2573 } else { 2574 /* Inner VLAN headers are required in TRILL data packets */ 2575 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *, 2576 blp, mblk_t *, mp, uint16_t, ingress_nick); 2577 freemsg(mp); 2578 return; 2579 } 2580 2581 /* Learn the location of this sender in the RBridge network */ 2582 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid); 2583 2584 /* attempt forwarding */ 2585 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE); 2586 if (mp != NULL) { 2587 if (bridge_can_send(blp, vlanid)) { 2588 /* Deliver a copy locally as well */ 2589 if ((mpcopy = copymsg(mp)) != NULL) 2590 mac_rx_common(blp->bl_mh, NULL, mpcopy); 2591 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2592 } 2593 if (mp == NULL) { 2594 KIINCR(bki_sent); 2595 KLINCR(bkl_xmit); 2596 } else { 2597 freemsg(mp); 2598 } 2599 } 2600 } 2601 2602 /* 2603 * This function is used by TRILL _only_ to transmit TRILL-encapsulated 2604 * packets. It sends on a single underlying link and does not bridge. 2605 */ 2606 mblk_t * 2607 bridge_trill_output(bridge_link_t *blp, mblk_t *mp) 2608 { 2609 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2610 2611 mac_trill_snoop(blp->bl_mh, mp); 2612 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2613 if (mp == NULL) { 2614 KIINCR(bki_sent); 2615 KLINCR(bkl_xmit); 2616 } 2617 return (mp); 2618 } 2619 2620 /* 2621 * Set the "appointed forwarder" flag array for this link. TRILL controls 2622 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for 2623 * the forwarder. 2624 */ 2625 void 2626 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr) 2627 { 2628 int i; 2629 uint_t newflags = 0; 2630 2631 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) { 2632 if ((blp->bl_afs[i] = arr[i]) != 0) 2633 newflags = BLF_TRILLACTIVE; 2634 } 2635 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags; 2636 } 2637 2638 void 2639 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill) 2640 { 2641 bridge_inst_t *bip = blp->bl_inst; 2642 bridge_fwd_t *bfp, *bfnext; 2643 avl_tree_t fwd_scavenge; 2644 int i; 2645 2646 _NOTE(ARGUNUSED(vlan)); 2647 2648 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 2649 offsetof(bridge_fwd_t, bf_node)); 2650 rw_enter(&bip->bi_rwlock, RW_WRITER); 2651 bfnext = avl_first(&bip->bi_fwd); 2652 while ((bfp = bfnext) != NULL) { 2653 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 2654 if (bfp->bf_flags & BFF_LOCALADDR) 2655 continue; 2656 if (dotrill) { 2657 /* port doesn't matter if we're flushing TRILL */ 2658 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE) 2659 continue; 2660 } else { 2661 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) 2662 continue; 2663 for (i = 0; i < bfp->bf_nlinks; i++) { 2664 if (bfp->bf_links[i] == blp) 2665 break; 2666 } 2667 if (i >= bfp->bf_nlinks) 2668 continue; 2669 } 2670 ASSERT(bfp->bf_flags & BFF_INTREE); 2671 avl_remove(&bip->bi_fwd, bfp); 2672 bfp->bf_flags &= ~BFF_INTREE; 2673 avl_add(&fwd_scavenge, bfp); 2674 } 2675 rw_exit(&bip->bi_rwlock); 2676 bfnext = avl_first(&fwd_scavenge); 2677 while ((bfp = bfnext) != NULL) { 2678 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 2679 avl_remove(&fwd_scavenge, bfp); 2680 fwd_unref(bfp); 2681 } 2682 avl_destroy(&fwd_scavenge); 2683 } 2684 2685 /* 2686 * Let the mac module take or drop a reference to a bridge link. When this is 2687 * called, the mac module is holding the mi_bridge_lock, so the link cannot be 2688 * in the process of entering or leaving a bridge. 2689 */ 2690 static void 2691 bridge_ref_cb(mac_handle_t mh, boolean_t hold) 2692 { 2693 bridge_link_t *blp = (bridge_link_t *)mh; 2694 2695 if (hold) 2696 atomic_inc_uint(&blp->bl_refs); 2697 else 2698 link_unref(blp); 2699 } 2700 2701 /* 2702 * Handle link state changes reported by the mac layer. This acts as a filter 2703 * for link state changes: if a link is reporting down, but there are other 2704 * links still up on the bridge, then the state is changed to "up." When the 2705 * last link goes down, all are marked down, and when the first link goes up, 2706 * all are marked up. (Recursion is avoided by the use of the "redo" function.) 2707 * 2708 * We treat unknown as equivalent to "up." 2709 */ 2710 static link_state_t 2711 bridge_ls_cb(mac_handle_t mh, link_state_t newls) 2712 { 2713 bridge_link_t *blp = (bridge_link_t *)mh; 2714 bridge_link_t *blcmp; 2715 bridge_inst_t *bip; 2716 bridge_mac_t *bmp; 2717 2718 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN || 2719 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) { 2720 blp->bl_linkstate = newls; 2721 return (newls); 2722 } 2723 2724 /* 2725 * Scan first to see if there are any other non-down links. If there 2726 * are, then we're done. Otherwise, if all others are down, then the 2727 * state of this link is the state of the bridge. 2728 */ 2729 bip = blp->bl_inst; 2730 rw_enter(&bip->bi_rwlock, RW_WRITER); 2731 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2732 blcmp = list_next(&bip->bi_links, blcmp)) { 2733 if (blcmp != blp && 2734 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 2735 blcmp->bl_linkstate != LINK_STATE_DOWN) 2736 break; 2737 } 2738 2739 if (blcmp != NULL) { 2740 /* 2741 * If there are other links that are considered up, then tell 2742 * the caller that the link is actually still up, regardless of 2743 * this link's underlying state. 2744 */ 2745 blp->bl_linkstate = newls; 2746 newls = LINK_STATE_UP; 2747 } else if (blp->bl_linkstate != newls) { 2748 /* 2749 * If we've found no other 'up' links, and this link has 2750 * changed state, then report the new state of the bridge to 2751 * all other clients. 2752 */ 2753 blp->bl_linkstate = newls; 2754 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2755 blcmp = list_next(&bip->bi_links, blcmp)) { 2756 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED)) 2757 mac_link_redo(blcmp->bl_mh, newls); 2758 } 2759 bmp = bip->bi_mac; 2760 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN) 2761 bmp->bm_linkstate = LINK_STATE_UP; 2762 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 2763 } 2764 rw_exit(&bip->bi_rwlock); 2765 return (newls); 2766 } 2767 2768 static void 2769 bridge_add_link(void *arg) 2770 { 2771 mblk_t *mp = arg; 2772 bridge_stream_t *bsp; 2773 bridge_inst_t *bip, *bipt; 2774 bridge_mac_t *bmp; 2775 datalink_id_t linkid; 2776 int err; 2777 mac_handle_t mh; 2778 uint_t maxsdu; 2779 bridge_link_t *blp = NULL, *blpt; 2780 const mac_info_t *mip; 2781 boolean_t macopen = B_FALSE; 2782 char linkname[MAXLINKNAMELEN]; 2783 char kstatname[KSTAT_STRLEN]; 2784 int i; 2785 link_state_t linkstate; 2786 mblk_t *mlist; 2787 2788 bsp = (bridge_stream_t *)mp->b_next; 2789 mp->b_next = NULL; 2790 bip = bsp->bs_inst; 2791 /* LINTED: alignment */ 2792 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2793 2794 /* 2795 * First make sure that there is no other bridge that has this link. 2796 * We don't want to overlap operations from two bridges; the MAC layer 2797 * supports only one bridge on a given MAC at a time. 2798 * 2799 * We rely on the fact that there's just one taskq thread for the 2800 * bridging module: once we've checked for a duplicate, we can drop the 2801 * lock, because no other thread could possibly be adding another link 2802 * until we're done. 2803 */ 2804 mutex_enter(&inst_lock); 2805 for (bipt = list_head(&inst_list); bipt != NULL; 2806 bipt = list_next(&inst_list, bipt)) { 2807 rw_enter(&bipt->bi_rwlock, RW_READER); 2808 for (blpt = list_head(&bipt->bi_links); blpt != NULL; 2809 blpt = list_next(&bipt->bi_links, blpt)) { 2810 if (linkid == blpt->bl_linkid) 2811 break; 2812 } 2813 rw_exit(&bipt->bi_rwlock); 2814 if (blpt != NULL) 2815 break; 2816 } 2817 mutex_exit(&inst_lock); 2818 if (bipt != NULL) { 2819 err = EBUSY; 2820 goto fail; 2821 } 2822 2823 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 2824 goto fail; 2825 macopen = B_TRUE; 2826 2827 /* we bridge only Ethernet */ 2828 mip = mac_info(mh); 2829 if (mip->mi_media != DL_ETHER) { 2830 err = ENOTSUP; 2831 goto fail; 2832 } 2833 2834 /* 2835 * Get the current maximum SDU on this interface. If there are other 2836 * links on the bridge, then this one must match, or it errors out. 2837 * Otherwise, the first link becomes the standard for the new bridge. 2838 */ 2839 mac_sdu_get(mh, NULL, &maxsdu); 2840 bmp = bip->bi_mac; 2841 if (list_is_empty(&bip->bi_links)) { 2842 bmp->bm_maxsdu = maxsdu; 2843 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2844 } 2845 2846 /* figure the kstat name; also used as the mac client name */ 2847 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t); 2848 if (i < 0 || i >= MAXLINKNAMELEN) 2849 i = MAXLINKNAMELEN - 1; 2850 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i); 2851 linkname[i] = '\0'; 2852 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name, 2853 linkname); 2854 2855 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) { 2856 err = ENOMEM; 2857 goto fail; 2858 } 2859 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 2860 if (blp->bl_lfailmp == NULL) { 2861 kmem_free(blp, sizeof (*blp)); 2862 blp = NULL; 2863 err = ENOMEM; 2864 goto fail; 2865 } 2866 2867 blp->bl_refs = 1; 2868 atomic_inc_uint(&bip->bi_refs); 2869 blp->bl_inst = bip; 2870 blp->bl_mh = mh; 2871 blp->bl_linkid = linkid; 2872 blp->bl_maxsdu = maxsdu; 2873 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL); 2874 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL); 2875 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 2876 2877 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0); 2878 if (err != 0) 2879 goto fail; 2880 blp->bl_flags |= BLF_CLIENT_OPEN; 2881 2882 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE); 2883 if (err != 0) 2884 goto fail; 2885 blp->bl_flags |= BLF_MARGIN_ADDED; 2886 2887 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp); 2888 2889 /* Enable Bridging on the link */ 2890 err = mac_bridge_set(mh, (mac_handle_t)blp); 2891 if (err != 0) 2892 goto fail; 2893 blp->bl_flags |= BLF_SET_BRIDGE; 2894 2895 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL, 2896 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); 2897 if (err != 0) 2898 goto fail; 2899 blp->bl_flags |= BLF_PROM_ADDED; 2900 2901 bridge_new_unicst(blp); 2902 2903 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats, 2904 link_kstats_list, Dim(link_kstats_list), kstatname); 2905 2906 /* 2907 * The link holds a reference to the bridge instance, so that the 2908 * instance can't go away before the link is freed. The insertion into 2909 * bi_links holds a reference on the link (reference set to 1 above). 2910 * When marking as removed from bi_links (BLF_DELETED), drop the 2911 * reference on the link. When freeing the link, drop the reference on 2912 * the instance. BLF_LINK_ADDED tracks link insertion in bi_links list. 2913 */ 2914 rw_enter(&bip->bi_rwlock, RW_WRITER); 2915 list_insert_tail(&bip->bi_links, blp); 2916 blp->bl_flags |= BLF_LINK_ADDED; 2917 2918 /* 2919 * If the new link is no good on this bridge, then let the daemon know 2920 * about the problem. 2921 */ 2922 mlist = NULL; 2923 if (maxsdu != bmp->bm_maxsdu) 2924 link_sdu_fail(blp, B_TRUE, &mlist); 2925 rw_exit(&bip->bi_rwlock); 2926 send_up_messages(bip, mlist); 2927 2928 /* 2929 * Trigger a link state update so that if this link is the first one 2930 * "up" in the bridge, then we notify everyone. This triggers a trip 2931 * through bridge_ls_cb. 2932 */ 2933 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE); 2934 blp->bl_linkstate = LINK_STATE_DOWN; 2935 mac_link_update(mh, linkstate); 2936 2937 /* 2938 * We now need to report back to the stream that invoked us, and then 2939 * drop the reference on the stream that we're holding. 2940 */ 2941 miocack(bsp->bs_wq, mp, 0, 0); 2942 stream_unref(bsp); 2943 return; 2944 2945 fail: 2946 if (blp == NULL) { 2947 if (macopen) 2948 mac_close(mh); 2949 } else { 2950 link_shutdown(blp); 2951 } 2952 miocnak(bsp->bs_wq, mp, 0, err); 2953 stream_unref(bsp); 2954 } 2955 2956 static void 2957 bridge_rem_link(void *arg) 2958 { 2959 mblk_t *mp = arg; 2960 bridge_stream_t *bsp; 2961 bridge_inst_t *bip; 2962 bridge_mac_t *bmp; 2963 datalink_id_t linkid; 2964 bridge_link_t *blp, *blsave; 2965 boolean_t found; 2966 mblk_t *mlist; 2967 2968 bsp = (bridge_stream_t *)mp->b_next; 2969 mp->b_next = NULL; 2970 bip = bsp->bs_inst; 2971 /* LINTED: alignment */ 2972 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2973 2974 /* 2975 * We become reader here so that we can loop over the other links and 2976 * deliver link up/down notification. 2977 */ 2978 rw_enter(&bip->bi_rwlock, RW_READER); 2979 found = B_FALSE; 2980 for (blp = list_head(&bip->bi_links); blp != NULL; 2981 blp = list_next(&bip->bi_links, blp)) { 2982 if (blp->bl_linkid == linkid && 2983 !(blp->bl_flags & BLF_DELETED)) { 2984 blp->bl_flags |= BLF_DELETED; 2985 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 2986 blp, DDI_SLEEP); 2987 found = B_TRUE; 2988 break; 2989 } 2990 } 2991 2992 /* 2993 * Check if this link is up and the remainder of the links are all 2994 * down. 2995 */ 2996 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) { 2997 for (blp = list_head(&bip->bi_links); blp != NULL; 2998 blp = list_next(&bip->bi_links, blp)) { 2999 if (blp->bl_linkstate != LINK_STATE_DOWN && 3000 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) 3001 break; 3002 } 3003 if (blp == NULL) { 3004 for (blp = list_head(&bip->bi_links); blp != NULL; 3005 blp = list_next(&bip->bi_links, blp)) { 3006 if (!(blp->bl_flags & BLF_DELETED)) 3007 mac_link_redo(blp->bl_mh, 3008 LINK_STATE_DOWN); 3009 } 3010 bmp = bip->bi_mac; 3011 bmp->bm_linkstate = LINK_STATE_DOWN; 3012 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 3013 } 3014 } 3015 3016 /* 3017 * Check if there's just one working link left on the bridge. If so, 3018 * then that link is now authoritative for bridge MTU. 3019 */ 3020 blsave = NULL; 3021 for (blp = list_head(&bip->bi_links); blp != NULL; 3022 blp = list_next(&bip->bi_links, blp)) { 3023 if (!(blp->bl_flags & BLF_DELETED)) { 3024 if (blsave == NULL) 3025 blsave = blp; 3026 else 3027 break; 3028 } 3029 } 3030 mlist = NULL; 3031 bmp = bip->bi_mac; 3032 if (blsave != NULL && blp == NULL && 3033 blsave->bl_maxsdu != bmp->bm_maxsdu) { 3034 bmp->bm_maxsdu = blsave->bl_maxsdu; 3035 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu); 3036 link_sdu_fail(blsave, B_FALSE, &mlist); 3037 } 3038 rw_exit(&bip->bi_rwlock); 3039 send_up_messages(bip, mlist); 3040 3041 if (found) 3042 miocack(bsp->bs_wq, mp, 0, 0); 3043 else 3044 miocnak(bsp->bs_wq, mp, 0, ENOENT); 3045 stream_unref(bsp); 3046 } 3047 3048 /* 3049 * This function intentionally returns with bi_rwlock held; it is intended for 3050 * quick checks and updates. 3051 */ 3052 static bridge_link_t * 3053 enter_link(bridge_inst_t *bip, datalink_id_t linkid) 3054 { 3055 bridge_link_t *blp; 3056 3057 rw_enter(&bip->bi_rwlock, RW_READER); 3058 for (blp = list_head(&bip->bi_links); blp != NULL; 3059 blp = list_next(&bip->bi_links, blp)) { 3060 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED)) 3061 break; 3062 } 3063 return (blp); 3064 } 3065 3066 static void 3067 bridge_ioctl(queue_t *wq, mblk_t *mp) 3068 { 3069 bridge_stream_t *bsp = wq->q_ptr; 3070 bridge_inst_t *bip; 3071 struct iocblk *iop; 3072 int rc = EINVAL; 3073 int len = 0; 3074 bridge_link_t *blp; 3075 cred_t *cr; 3076 3077 /* LINTED: alignment */ 3078 iop = (struct iocblk *)mp->b_rptr; 3079 3080 /* 3081 * For now, all of the bridge ioctls are privileged. 3082 */ 3083 if ((cr = msg_getcred(mp, NULL)) == NULL) 3084 cr = iop->ioc_cr; 3085 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) { 3086 miocnak(wq, mp, 0, EPERM); 3087 return; 3088 } 3089 3090 switch (iop->ioc_cmd) { 3091 case BRIOC_NEWBRIDGE: { 3092 bridge_newbridge_t *bnb; 3093 3094 if (bsp->bs_inst != NULL || 3095 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0) 3096 break; 3097 /* LINTED: alignment */ 3098 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr; 3099 bnb->bnb_name[MAXNAMELEN-1] = '\0'; 3100 rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr); 3101 if (rc != 0) 3102 break; 3103 3104 rw_enter(&bip->bi_rwlock, RW_WRITER); 3105 if (bip->bi_control != NULL) { 3106 rw_exit(&bip->bi_rwlock); 3107 bridge_unref(bip); 3108 rc = EBUSY; 3109 } else { 3110 atomic_inc_uint(&bip->bi_refs); 3111 bsp->bs_inst = bip; /* stream holds reference */ 3112 bip->bi_control = bsp; 3113 rw_exit(&bip->bi_rwlock); 3114 rc = 0; 3115 } 3116 break; 3117 } 3118 3119 case BRIOC_ADDLINK: 3120 if ((bip = bsp->bs_inst) == NULL || 3121 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3122 break; 3123 /* 3124 * We cannot perform the action in this thread, because we're 3125 * not in process context, and we may already be holding 3126 * MAC-related locks. Place the request on taskq. 3127 */ 3128 mp->b_next = (mblk_t *)bsp; 3129 stream_ref(bsp); 3130 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp, 3131 DDI_SLEEP); 3132 return; 3133 3134 case BRIOC_REMLINK: 3135 if ((bip = bsp->bs_inst) == NULL || 3136 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3137 break; 3138 /* 3139 * We cannot perform the action in this thread, because we're 3140 * not in process context, and we may already be holding 3141 * MAC-related locks. Place the request on taskq. 3142 */ 3143 mp->b_next = (mblk_t *)bsp; 3144 stream_ref(bsp); 3145 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp, 3146 DDI_SLEEP); 3147 return; 3148 3149 case BRIOC_SETSTATE: { 3150 bridge_setstate_t *bss; 3151 3152 if ((bip = bsp->bs_inst) == NULL || 3153 (rc = miocpullup(mp, sizeof (*bss))) != 0) 3154 break; 3155 /* LINTED: alignment */ 3156 bss = (bridge_setstate_t *)mp->b_cont->b_rptr; 3157 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) { 3158 rc = ENOENT; 3159 } else { 3160 rc = 0; 3161 blp->bl_state = bss->bss_state; 3162 } 3163 rw_exit(&bip->bi_rwlock); 3164 break; 3165 } 3166 3167 case BRIOC_SETPVID: { 3168 bridge_setpvid_t *bsv; 3169 3170 if ((bip = bsp->bs_inst) == NULL || 3171 (rc = miocpullup(mp, sizeof (*bsv))) != 0) 3172 break; 3173 /* LINTED: alignment */ 3174 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr; 3175 if (bsv->bsv_vlan > VLAN_ID_MAX) 3176 break; 3177 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) { 3178 rc = ENOENT; 3179 } else if (blp->bl_pvid == bsv->bsv_vlan) { 3180 rc = 0; 3181 } else { 3182 rc = 0; 3183 BRIDGE_VLAN_CLR(blp, blp->bl_pvid); 3184 blp->bl_pvid = bsv->bsv_vlan; 3185 if (blp->bl_pvid != 0) 3186 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3187 } 3188 rw_exit(&bip->bi_rwlock); 3189 break; 3190 } 3191 3192 case BRIOC_VLANENAB: { 3193 bridge_vlanenab_t *bve; 3194 3195 if ((bip = bsp->bs_inst) == NULL || 3196 (rc = miocpullup(mp, sizeof (*bve))) != 0) 3197 break; 3198 /* LINTED: alignment */ 3199 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr; 3200 if (bve->bve_vlan > VLAN_ID_MAX) 3201 break; 3202 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) { 3203 rc = ENOENT; 3204 } else { 3205 rc = 0; 3206 /* special case: vlan 0 means "all" */ 3207 if (bve->bve_vlan == 0) { 3208 (void) memset(blp->bl_vlans, 3209 bve->bve_onoff ? ~0 : 0, 3210 sizeof (blp->bl_vlans)); 3211 BRIDGE_VLAN_CLR(blp, 0); 3212 if (blp->bl_pvid != 0) 3213 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3214 } else if (bve->bve_vlan == blp->bl_pvid) { 3215 rc = EINVAL; 3216 } else if (bve->bve_onoff) { 3217 BRIDGE_VLAN_SET(blp, bve->bve_vlan); 3218 } else { 3219 BRIDGE_VLAN_CLR(blp, bve->bve_vlan); 3220 } 3221 } 3222 rw_exit(&bip->bi_rwlock); 3223 break; 3224 } 3225 3226 case BRIOC_FLUSHFWD: { 3227 bridge_flushfwd_t *bff; 3228 bridge_fwd_t *bfp, *bfnext; 3229 avl_tree_t fwd_scavenge; 3230 int i; 3231 3232 if ((bip = bsp->bs_inst) == NULL || 3233 (rc = miocpullup(mp, sizeof (*bff))) != 0) 3234 break; 3235 /* LINTED: alignment */ 3236 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr; 3237 rw_enter(&bip->bi_rwlock, RW_WRITER); 3238 /* This case means "all" */ 3239 if (bff->bff_linkid == DATALINK_INVALID_LINKID) { 3240 blp = NULL; 3241 } else { 3242 for (blp = list_head(&bip->bi_links); blp != NULL; 3243 blp = list_next(&bip->bi_links, blp)) { 3244 if (blp->bl_linkid == bff->bff_linkid && 3245 !(blp->bl_flags & BLF_DELETED)) 3246 break; 3247 } 3248 if (blp == NULL) { 3249 rc = ENOENT; 3250 rw_exit(&bip->bi_rwlock); 3251 break; 3252 } 3253 } 3254 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 3255 offsetof(bridge_fwd_t, bf_node)); 3256 bfnext = avl_first(&bip->bi_fwd); 3257 while ((bfp = bfnext) != NULL) { 3258 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 3259 if (bfp->bf_flags & BFF_LOCALADDR) 3260 continue; 3261 if (blp != NULL) { 3262 for (i = 0; i < bfp->bf_maxlinks; i++) { 3263 if (bfp->bf_links[i] == blp) 3264 break; 3265 } 3266 /* 3267 * If the link is there and we're excluding, 3268 * then skip. If the link is not there and 3269 * we're doing only that link, then skip. 3270 */ 3271 if ((i < bfp->bf_maxlinks) == bff->bff_exclude) 3272 continue; 3273 } 3274 ASSERT(bfp->bf_flags & BFF_INTREE); 3275 avl_remove(&bip->bi_fwd, bfp); 3276 bfp->bf_flags &= ~BFF_INTREE; 3277 avl_add(&fwd_scavenge, bfp); 3278 } 3279 rw_exit(&bip->bi_rwlock); 3280 bfnext = avl_first(&fwd_scavenge); 3281 while ((bfp = bfnext) != NULL) { 3282 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 3283 avl_remove(&fwd_scavenge, bfp); 3284 fwd_unref(bfp); /* drop tree reference */ 3285 } 3286 avl_destroy(&fwd_scavenge); 3287 break; 3288 } 3289 3290 case BRIOC_TABLEMAX: 3291 if ((bip = bsp->bs_inst) == NULL || 3292 (rc = miocpullup(mp, sizeof (uint32_t))) != 0) 3293 break; 3294 /* LINTED: alignment */ 3295 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr; 3296 break; 3297 } 3298 3299 if (rc == 0) 3300 miocack(wq, mp, len, 0); 3301 else 3302 miocnak(wq, mp, 0, rc); 3303 } 3304 3305 static void 3306 bridge_wput(queue_t *wq, mblk_t *mp) 3307 { 3308 switch (DB_TYPE(mp)) { 3309 case M_IOCTL: 3310 bridge_ioctl(wq, mp); 3311 break; 3312 case M_FLUSH: 3313 if (*mp->b_rptr & FLUSHW) 3314 *mp->b_rptr &= ~FLUSHW; 3315 if (*mp->b_rptr & FLUSHR) 3316 qreply(wq, mp); 3317 else 3318 freemsg(mp); 3319 break; 3320 default: 3321 freemsg(mp); 3322 break; 3323 } 3324 } 3325 3326 /* 3327 * This function allocates the main data structures for the bridge driver and 3328 * connects us into devfs. 3329 */ 3330 static void 3331 bridge_inst_init(void) 3332 { 3333 bridge_scan_interval = 5 * drv_usectohz(1000000); 3334 bridge_fwd_age = 25 * drv_usectohz(1000000); 3335 3336 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL); 3337 list_create(&bmac_list, sizeof (bridge_mac_t), 3338 offsetof(bridge_mac_t, bm_node)); 3339 list_create(&inst_list, sizeof (bridge_inst_t), 3340 offsetof(bridge_inst_t, bi_node)); 3341 cv_init(&inst_cv, NULL, CV_DRIVER, NULL); 3342 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL); 3343 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL); 3344 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL); 3345 3346 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb, 3347 bridge_ls_cb); 3348 } 3349 3350 /* 3351 * This function disconnects from devfs and destroys all data structures in 3352 * preparation for unload. It's assumed that there are no active bridge 3353 * references left at this point. 3354 */ 3355 static void 3356 bridge_inst_fini(void) 3357 { 3358 mac_bridge_vectors(NULL, NULL, NULL, NULL); 3359 if (bridge_timerid != 0) 3360 (void) untimeout(bridge_timerid); 3361 rw_destroy(&bmac_rwlock); 3362 list_destroy(&bmac_list); 3363 list_destroy(&inst_list); 3364 cv_destroy(&inst_cv); 3365 mutex_destroy(&inst_lock); 3366 cv_destroy(&stream_ref_cv); 3367 mutex_destroy(&stream_ref_lock); 3368 } 3369 3370 /* 3371 * bridge_attach() 3372 * 3373 * Description: 3374 * Attach bridge driver to the system. 3375 */ 3376 static int 3377 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3378 { 3379 if (cmd != DDI_ATTACH) 3380 return (DDI_FAILURE); 3381 3382 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO, 3383 CLONE_DEV) == DDI_FAILURE) { 3384 return (DDI_FAILURE); 3385 } 3386 3387 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list, 3388 DLDIOCCNT(bridge_ioc_list)) != 0) { 3389 ddi_remove_minor_node(dip, BRIDGE_CTL); 3390 return (DDI_FAILURE); 3391 } 3392 3393 bridge_dev_info = dip; 3394 bridge_major = ddi_driver_major(dip); 3395 bridge_taskq = ddi_taskq_create(dip, BRIDGE_DEV_NAME, 1, 3396 TASKQ_DEFAULTPRI, 0); 3397 return (DDI_SUCCESS); 3398 } 3399 3400 /* 3401 * bridge_detach() 3402 * 3403 * Description: 3404 * Detach an interface to the system. 3405 */ 3406 static int 3407 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3408 { 3409 if (cmd != DDI_DETACH) 3410 return (DDI_FAILURE); 3411 3412 ddi_remove_minor_node(dip, NULL); 3413 ddi_taskq_destroy(bridge_taskq); 3414 bridge_dev_info = NULL; 3415 return (DDI_SUCCESS); 3416 } 3417 3418 /* 3419 * bridge_info() 3420 * 3421 * Description: 3422 * Translate "dev_t" to a pointer to the associated "dev_info_t". 3423 */ 3424 /* ARGSUSED */ 3425 static int 3426 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 3427 void **result) 3428 { 3429 int rc; 3430 3431 switch (infocmd) { 3432 case DDI_INFO_DEVT2DEVINFO: 3433 if (bridge_dev_info == NULL) { 3434 rc = DDI_FAILURE; 3435 } else { 3436 *result = (void *)bridge_dev_info; 3437 rc = DDI_SUCCESS; 3438 } 3439 break; 3440 case DDI_INFO_DEVT2INSTANCE: 3441 *result = NULL; 3442 rc = DDI_SUCCESS; 3443 break; 3444 default: 3445 rc = DDI_FAILURE; 3446 break; 3447 } 3448 return (rc); 3449 } 3450 3451 static struct module_info bridge_modinfo = { 3452 2105, /* mi_idnum */ 3453 BRIDGE_DEV_NAME, /* mi_idname */ 3454 0, /* mi_minpsz */ 3455 16384, /* mi_maxpsz */ 3456 65536, /* mi_hiwat */ 3457 128 /* mi_lowat */ 3458 }; 3459 3460 static struct qinit bridge_rinit = { 3461 NULL, /* qi_putp */ 3462 NULL, /* qi_srvp */ 3463 bridge_open, /* qi_qopen */ 3464 bridge_close, /* qi_qclose */ 3465 NULL, /* qi_qadmin */ 3466 &bridge_modinfo, /* qi_minfo */ 3467 NULL /* qi_mstat */ 3468 }; 3469 3470 static struct qinit bridge_winit = { 3471 (int (*)())bridge_wput, /* qi_putp */ 3472 NULL, /* qi_srvp */ 3473 NULL, /* qi_qopen */ 3474 NULL, /* qi_qclose */ 3475 NULL, /* qi_qadmin */ 3476 &bridge_modinfo, /* qi_minfo */ 3477 NULL /* qi_mstat */ 3478 }; 3479 3480 static struct streamtab bridge_tab = { 3481 &bridge_rinit, /* st_rdinit */ 3482 &bridge_winit /* st_wrinit */ 3483 }; 3484 3485 /* No STREAMS perimeters; we do all our own locking */ 3486 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach, 3487 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab, 3488 ddi_quiesce_not_supported); 3489 3490 static struct modldrv modldrv = { 3491 &mod_driverops, 3492 "bridging driver", 3493 &bridge_ops 3494 }; 3495 3496 static struct modlinkage modlinkage = { 3497 MODREV_1, 3498 (void *)&modldrv, 3499 NULL 3500 }; 3501 3502 int 3503 _init(void) 3504 { 3505 int retv; 3506 3507 mac_init_ops(NULL, BRIDGE_DEV_NAME); 3508 bridge_inst_init(); 3509 if ((retv = mod_install(&modlinkage)) != 0) 3510 bridge_inst_fini(); 3511 return (retv); 3512 } 3513 3514 int 3515 _fini(void) 3516 { 3517 int retv; 3518 3519 rw_enter(&bmac_rwlock, RW_READER); 3520 retv = list_is_empty(&bmac_list) ? 0 : EBUSY; 3521 rw_exit(&bmac_rwlock); 3522 if (retv == 0 && 3523 (retv = mod_remove(&modlinkage)) == 0) 3524 bridge_inst_fini(); 3525 return (retv); 3526 } 3527 3528 int 3529 _info(struct modinfo *modinfop) 3530 { 3531 return (mod_info(&modlinkage, modinfop)); 3532 } 3533