1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 * Copyright 2019 Joyent, Inc. 27 */ 28 29 /* 30 * This module implements a STREAMS driver that provides layer-two (Ethernet) 31 * bridging functionality. The STREAMS interface is used to provide 32 * observability (snoop/wireshark) and control, but not for interface plumbing. 33 */ 34 35 #include <sys/types.h> 36 #include <sys/bitmap.h> 37 #include <sys/cmn_err.h> 38 #include <sys/conf.h> 39 #include <sys/ddi.h> 40 #include <sys/errno.h> 41 #include <sys/kstat.h> 42 #include <sys/modctl.h> 43 #include <sys/note.h> 44 #include <sys/param.h> 45 #include <sys/pattr.h> 46 #include <sys/policy.h> 47 #include <sys/sdt.h> 48 #include <sys/stat.h> 49 #include <sys/stream.h> 50 #include <sys/stropts.h> 51 #include <sys/strsun.h> 52 #include <sys/sunddi.h> 53 #include <sys/sysmacros.h> 54 #include <sys/systm.h> 55 #include <sys/time.h> 56 #include <sys/dlpi.h> 57 #include <sys/dls.h> 58 #include <sys/mac_ether.h> 59 #include <sys/mac_provider.h> 60 #include <sys/mac_client_priv.h> 61 #include <sys/mac_impl.h> 62 #include <sys/vlan.h> 63 #include <net/bridge.h> 64 #include <net/bridge_impl.h> 65 #include <net/trill.h> 66 #include <sys/dld_ioc.h> 67 68 /* 69 * Locks and reference counts: object lifetime and design. 70 * 71 * bridge_mac_t 72 * Bridge mac (snoop) instances are in bmac_list, which is protected by 73 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer(). 74 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes 75 * away, the bridge_mac_t remains until either all of the users go away 76 * (detected by a timer) or until the instance is picked up again by the same 77 * bridge starting back up. 78 * 79 * bridge_inst_t 80 * Bridge instances are in inst_list, which is protected by inst_lock. 81 * They're allocated by inst_alloc() and freed by inst_free(). After 82 * allocation, an instance is placed in inst_list, and the reference count is 83 * incremented to represent this. That reference is decremented when the 84 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last 85 * reference is freed, the instance is removed from the list. 86 * 87 * Bridge instances have lists of links and an AVL tree of forwarding 88 * entries. Each of these structures holds one reference on the bridge 89 * instance. These lists and tree are protected by bi_rwlock. 90 * 91 * bridge_stream_t 92 * Bridge streams are allocated by stream_alloc() and freed by stream_free(). 93 * These streams are created when "bridged" opens /dev/bridgectl, and are 94 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the 95 * links on the bridge. When a stream closes, the bridge instance created is 96 * destroyed. There's at most one bridge instance for a given control 97 * stream. 98 * 99 * bridge_link_t 100 * Links are allocated by bridge_add_link() and freed by link_free(). The 101 * bi_links list holds a reference to the link. When the BLF_DELETED flag is 102 * set, that reference is dropped. The link isn't removed from the list 103 * until the last reference drops. Each forwarding entry that uses a given 104 * link holds a reference, as does each thread transmitting a packet via the 105 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on 106 * a link when transmitting. 107 * 108 * It's important that once BLF_DELETED is set, there's no way for the 109 * reference count to increase again. If it can, then the link may be 110 * double-freed. The BLF_FREED flag is intended for use with assertions to 111 * guard against this in testing. 112 * 113 * bridge_fwd_t 114 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by 115 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike 116 * other data structures, the reference is dropped when the entry is removed 117 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each 118 * thread that's forwarding a packet to a known destination holds a reference 119 * to a forwarding entry. 120 * 121 * TRILL notes: 122 * 123 * The TRILL module does all of its I/O through bridging. It uses references 124 * on the bridge_inst_t and bridge_link_t structures, and has seven entry 125 * points and four callbacks. One entry point is for setting the callbacks 126 * (bridge_trill_register_cb). There are four entry points for taking bridge 127 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two 128 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps) 129 * that need to be bridged locally, and for TRILL-encapsulated output packets 130 * (bridge_trill_output). 131 * 132 * The four callbacks comprise two notification functions for bridges and 133 * links being deleted, one function for raw received TRILL packets, and one 134 * for bridge output to non-local TRILL destinations (tunnel entry). 135 */ 136 137 /* 138 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module. 139 */ 140 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES; 141 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES; 142 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS; 143 144 static const char *inst_kstats_list[] = { KSINST_NAMES }; 145 static const char *link_kstats_list[] = { KSLINK_NAMES }; 146 147 #define KREF(p, m, vn) p->m.vn.value.ui64 148 #define KINCR(p, m, vn) ++KREF(p, m, vn) 149 #define KDECR(p, m, vn) --KREF(p, m, vn) 150 151 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn) 152 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn) 153 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn) 154 155 #define KIINCR(vn) KIPINCR(bip, vn) 156 #define KIDECR(vn) KIPDECR(bip, vn) 157 #define KLINCR(vn) KLPINCR(blp, vn) 158 159 #define Dim(x) (sizeof (x) / sizeof (*(x))) 160 161 /* Amount of overhead added when encapsulating with VLAN headers */ 162 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \ 163 sizeof (struct ether_header)) 164 165 static dev_info_t *bridge_dev_info; 166 static major_t bridge_major; 167 static ddi_taskq_t *bridge_taskq; 168 169 /* 170 * These are the bridge instance management data structures. The mutex lock 171 * protects the list of bridge instances. A reference count is then used on 172 * each instance to determine when to free it. We use mac_minor_hold() to 173 * allocate minor_t values, which are used both for self-cloning /dev/net/ 174 * device nodes as well as client streams. Minor node 0 is reserved for the 175 * allocation control node. 176 */ 177 static list_t inst_list; 178 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */ 179 static kmutex_t inst_lock; 180 181 static krwlock_t bmac_rwlock; 182 static list_t bmac_list; 183 184 /* Wait for taskq entries that use STREAMS */ 185 static kcondvar_t stream_ref_cv; 186 static kmutex_t stream_ref_lock; 187 188 static timeout_id_t bridge_timerid; 189 static clock_t bridge_scan_interval; 190 static clock_t bridge_fwd_age; 191 192 static bridge_inst_t *bridge_find_name(const char *); 193 static void bridge_timer(void *); 194 static void bridge_unref(bridge_inst_t *); 195 196 static const uint8_t zero_addr[ETHERADDRL] = { 0 }; 197 198 /* Global TRILL linkage */ 199 static trill_recv_pkt_t trill_recv_fn; 200 static trill_encap_pkt_t trill_encap_fn; 201 static trill_br_dstr_t trill_brdstr_fn; 202 static trill_ln_dstr_t trill_lndstr_fn; 203 204 /* special settings to accommodate DLD flow control; see dld_str.c */ 205 static struct module_info bridge_dld_modinfo = { 206 0, /* mi_idnum */ 207 BRIDGE_DEV_NAME, /* mi_idname */ 208 0, /* mi_minpsz */ 209 INFPSZ, /* mi_maxpsz */ 210 1, /* mi_hiwat */ 211 0 /* mi_lowat */ 212 }; 213 214 static struct qinit bridge_dld_rinit = { 215 NULL, /* qi_putp */ 216 NULL, /* qi_srvp */ 217 dld_open, /* qi_qopen */ 218 dld_close, /* qi_qclose */ 219 NULL, /* qi_qadmin */ 220 &bridge_dld_modinfo, /* qi_minfo */ 221 NULL /* qi_mstat */ 222 }; 223 224 static struct qinit bridge_dld_winit = { 225 dld_wput, /* qi_putp */ 226 dld_wsrv, /* qi_srvp */ 227 NULL, /* qi_qopen */ 228 NULL, /* qi_qclose */ 229 NULL, /* qi_qadmin */ 230 &bridge_dld_modinfo, /* qi_minfo */ 231 NULL /* qi_mstat */ 232 }; 233 234 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *); 235 236 /* GLDv3 control ioctls used by Bridging */ 237 static dld_ioc_info_t bridge_ioc_list[] = { 238 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t), 239 bridge_ioc_listfwd, NULL}, 240 }; 241 242 /* 243 * Given a bridge mac pointer, get a ref-held pointer to the corresponding 244 * bridge instance, if any. We must hold the global bmac_rwlock so that 245 * bm_inst doesn't slide out from under us. 246 */ 247 static bridge_inst_t * 248 mac_to_inst(const bridge_mac_t *bmp) 249 { 250 bridge_inst_t *bip; 251 252 rw_enter(&bmac_rwlock, RW_READER); 253 if ((bip = bmp->bm_inst) != NULL) 254 atomic_inc_uint(&bip->bi_refs); 255 rw_exit(&bmac_rwlock); 256 return (bip); 257 } 258 259 static void 260 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist) 261 { 262 mblk_t *mp; 263 bridge_ctl_t *bcp; 264 bridge_link_t *blcmp; 265 bridge_inst_t *bip; 266 bridge_mac_t *bmp; 267 268 if (failed) { 269 if (blp->bl_flags & BLF_SDUFAIL) 270 return; 271 blp->bl_flags |= BLF_SDUFAIL; 272 } else { 273 if (!(blp->bl_flags & BLF_SDUFAIL)) 274 return; 275 blp->bl_flags &= ~BLF_SDUFAIL; 276 } 277 278 /* 279 * If this link is otherwise up, then check if there are any other 280 * non-failed non-down links. If not, then we control the state of the 281 * whole bridge. 282 */ 283 bip = blp->bl_inst; 284 bmp = bip->bi_mac; 285 if (blp->bl_linkstate != LINK_STATE_DOWN) { 286 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 287 blcmp = list_next(&bip->bi_links, blcmp)) { 288 if (blp != blcmp && 289 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 290 blcmp->bl_linkstate != LINK_STATE_DOWN) 291 break; 292 } 293 if (blcmp == NULL) { 294 bmp->bm_linkstate = failed ? LINK_STATE_DOWN : 295 LINK_STATE_UP; 296 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 297 } 298 } 299 300 /* 301 * If we're becoming failed, then the link's current true state needs 302 * to be reflected upwards to this link's clients. If we're becoming 303 * unfailed, then we get the state of the bridge instead on all 304 * clients. 305 */ 306 if (failed) { 307 if (bmp->bm_linkstate != blp->bl_linkstate) 308 mac_link_redo(blp->bl_mh, blp->bl_linkstate); 309 } else { 310 mac_link_redo(blp->bl_mh, bmp->bm_linkstate); 311 } 312 313 /* get the current mblk we're going to send up */ 314 if ((mp = blp->bl_lfailmp) == NULL && 315 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL) 316 return; 317 318 /* get a new one for next time */ 319 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 320 321 /* if none for next time, then report only failures */ 322 if (blp->bl_lfailmp == NULL && !failed) { 323 blp->bl_lfailmp = mp; 324 return; 325 } 326 327 /* LINTED: alignment */ 328 bcp = (bridge_ctl_t *)mp->b_rptr; 329 bcp->bc_linkid = blp->bl_linkid; 330 bcp->bc_failed = failed; 331 mp->b_wptr = (uchar_t *)(bcp + 1); 332 mp->b_next = *mlist; 333 *mlist = mp; 334 } 335 336 /* 337 * Send control messages (link SDU changes) using the stream to the 338 * bridge instance daemon. 339 */ 340 static void 341 send_up_messages(bridge_inst_t *bip, mblk_t *mp) 342 { 343 mblk_t *mnext; 344 queue_t *rq; 345 346 rq = bip->bi_control->bs_wq; 347 rq = OTHERQ(rq); 348 while (mp != NULL) { 349 mnext = mp->b_next; 350 mp->b_next = NULL; 351 putnext(rq, mp); 352 mp = mnext; 353 } 354 } 355 356 /* ARGSUSED */ 357 static int 358 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val) 359 { 360 return (ENOTSUP); 361 } 362 363 static int 364 bridge_m_start(void *arg) 365 { 366 bridge_mac_t *bmp = arg; 367 368 bmp->bm_flags |= BMF_STARTED; 369 return (0); 370 } 371 372 static void 373 bridge_m_stop(void *arg) 374 { 375 bridge_mac_t *bmp = arg; 376 377 bmp->bm_flags &= ~BMF_STARTED; 378 } 379 380 /* ARGSUSED */ 381 static int 382 bridge_m_setpromisc(void *arg, boolean_t on) 383 { 384 return (0); 385 } 386 387 /* ARGSUSED */ 388 static int 389 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 390 { 391 return (0); 392 } 393 394 /* ARGSUSED */ 395 static int 396 bridge_m_unicst(void *arg, const uint8_t *macaddr) 397 { 398 return (ENOTSUP); 399 } 400 401 static mblk_t * 402 bridge_m_tx(void *arg, mblk_t *mp) 403 { 404 _NOTE(ARGUNUSED(arg)); 405 freemsgchain(mp); 406 return (NULL); 407 } 408 409 /* ARGSUSED */ 410 static int 411 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) 412 { 413 bridge_listfwd_t *blf = karg; 414 bridge_inst_t *bip; 415 bridge_fwd_t *bfp, match; 416 avl_index_t where; 417 418 bip = bridge_find_name(blf->blf_name); 419 if (bip == NULL) 420 return (ENOENT); 421 422 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL); 423 match.bf_flags |= BFF_VLANLOCAL; 424 rw_enter(&bip->bi_rwlock, RW_READER); 425 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL) 426 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER); 427 else 428 bfp = AVL_NEXT(&bip->bi_fwd, bfp); 429 if (bfp == NULL) { 430 bzero(blf, sizeof (*blf)); 431 } else { 432 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL); 433 blf->blf_trill_nick = bfp->bf_trill_nick; 434 blf->blf_ms_age = 435 drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000; 436 blf->blf_is_local = 437 (bfp->bf_flags & BFF_LOCALADDR) != 0; 438 blf->blf_linkid = bfp->bf_links[0]->bl_linkid; 439 } 440 rw_exit(&bip->bi_rwlock); 441 bridge_unref(bip); 442 return (0); 443 } 444 445 static int 446 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 447 uint_t pr_valsize, const void *pr_val) 448 { 449 bridge_mac_t *bmp = arg; 450 bridge_inst_t *bip; 451 bridge_link_t *blp; 452 int err; 453 uint_t maxsdu; 454 mblk_t *mlist; 455 456 _NOTE(ARGUNUSED(pr_name)); 457 switch (pr_num) { 458 case MAC_PROP_MTU: 459 if (pr_valsize < sizeof (bmp->bm_maxsdu)) { 460 err = EINVAL; 461 break; 462 } 463 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu)); 464 if (maxsdu == bmp->bm_maxsdu) { 465 err = 0; 466 } else if ((bip = mac_to_inst(bmp)) == NULL) { 467 err = ENXIO; 468 } else { 469 rw_enter(&bip->bi_rwlock, RW_WRITER); 470 mlist = NULL; 471 for (blp = list_head(&bip->bi_links); blp != NULL; 472 blp = list_next(&bip->bi_links, blp)) { 473 if (blp->bl_flags & BLF_DELETED) 474 continue; 475 if (blp->bl_maxsdu == maxsdu) 476 link_sdu_fail(blp, B_FALSE, &mlist); 477 else if (blp->bl_maxsdu == bmp->bm_maxsdu) 478 link_sdu_fail(blp, B_TRUE, &mlist); 479 } 480 rw_exit(&bip->bi_rwlock); 481 bmp->bm_maxsdu = maxsdu; 482 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 483 send_up_messages(bip, mlist); 484 bridge_unref(bip); 485 err = 0; 486 } 487 break; 488 489 default: 490 err = ENOTSUP; 491 break; 492 } 493 return (err); 494 } 495 496 static int 497 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 498 uint_t pr_valsize, void *pr_val) 499 { 500 bridge_mac_t *bmp = arg; 501 int err = 0; 502 503 _NOTE(ARGUNUSED(pr_name)); 504 switch (pr_num) { 505 case MAC_PROP_STATUS: 506 ASSERT(pr_valsize >= sizeof (bmp->bm_linkstate)); 507 bcopy(&bmp->bm_linkstate, pr_val, sizeof (&bmp->bm_linkstate)); 508 break; 509 510 default: 511 err = ENOTSUP; 512 break; 513 } 514 return (err); 515 } 516 517 static void 518 bridge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 519 mac_prop_info_handle_t prh) 520 { 521 bridge_mac_t *bmp = arg; 522 523 _NOTE(ARGUNUSED(pr_name)); 524 525 switch (pr_num) { 526 case MAC_PROP_MTU: 527 mac_prop_info_set_range_uint32(prh, bmp->bm_maxsdu, 528 bmp->bm_maxsdu); 529 break; 530 case MAC_PROP_STATUS: 531 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 532 break; 533 } 534 } 535 536 static mac_callbacks_t bridge_m_callbacks = { 537 MC_SETPROP | MC_GETPROP | MC_PROPINFO, 538 bridge_m_getstat, 539 bridge_m_start, 540 bridge_m_stop, 541 bridge_m_setpromisc, 542 bridge_m_multicst, 543 bridge_m_unicst, 544 bridge_m_tx, 545 NULL, /* reserved */ 546 NULL, /* ioctl */ 547 NULL, /* getcapab */ 548 NULL, /* open */ 549 NULL, /* close */ 550 bridge_m_setprop, 551 bridge_m_getprop, 552 bridge_m_propinfo 553 }; 554 555 /* 556 * Create kstats from a list. 557 */ 558 static kstat_t * 559 kstat_setup(kstat_named_t *knt, const char **names, int nstat, 560 const char *unitname) 561 { 562 kstat_t *ksp; 563 int i; 564 565 for (i = 0; i < nstat; i++) 566 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64); 567 568 ksp = kstat_create_zone(BRIDGE_DEV_NAME, 0, unitname, "net", 569 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 570 if (ksp != NULL) { 571 ksp->ks_data = knt; 572 kstat_install(ksp); 573 } 574 return (ksp); 575 } 576 577 /* 578 * Find an existing bridge_mac_t structure or allocate a new one for the given 579 * bridge instance. This creates the mac driver instance that snoop can use. 580 */ 581 static int 582 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp) 583 { 584 bridge_mac_t *bmp, *bnew; 585 mac_register_t *mac; 586 int err; 587 588 *bmacp = NULL; 589 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 590 return (EINVAL); 591 592 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP); 593 594 rw_enter(&bmac_rwlock, RW_WRITER); 595 for (bmp = list_head(&bmac_list); bmp != NULL; 596 bmp = list_next(&bmac_list, bmp)) { 597 if (strcmp(bip->bi_name, bmp->bm_name) == 0) { 598 ASSERT(bmp->bm_inst == NULL); 599 bmp->bm_inst = bip; 600 rw_exit(&bmac_rwlock); 601 kmem_free(bnew, sizeof (*bnew)); 602 mac_free(mac); 603 *bmacp = bmp; 604 return (0); 605 } 606 } 607 608 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 609 mac->m_driver = bnew; 610 mac->m_dip = bridge_dev_info; 611 mac->m_instance = (uint_t)-1; 612 mac->m_src_addr = (uint8_t *)zero_addr; 613 mac->m_callbacks = &bridge_m_callbacks; 614 615 /* 616 * Note that the SDU limits are irrelevant, as nobody transmits on the 617 * bridge node itself. It's mainly for monitoring but we allow 618 * setting the bridge MTU for quick transition of all links part of the 619 * bridge to a new MTU. 620 */ 621 mac->m_min_sdu = 1; 622 mac->m_max_sdu = 1500; 623 err = mac_register(mac, &bnew->bm_mh); 624 mac_free(mac); 625 if (err != 0) { 626 rw_exit(&bmac_rwlock); 627 kmem_free(bnew, sizeof (*bnew)); 628 return (err); 629 } 630 631 bnew->bm_inst = bip; 632 (void) strcpy(bnew->bm_name, bip->bi_name); 633 if (list_is_empty(&bmac_list)) { 634 bridge_timerid = timeout(bridge_timer, NULL, 635 bridge_scan_interval); 636 } 637 list_insert_tail(&bmac_list, bnew); 638 rw_exit(&bmac_rwlock); 639 640 /* 641 * Mark the MAC as unable to go "active" so that only passive clients 642 * (such as snoop) can bind to it. 643 */ 644 mac_no_active(bnew->bm_mh); 645 *bmacp = bnew; 646 return (0); 647 } 648 649 /* 650 * Disconnect the given bridge_mac_t from its bridge instance. The bridge 651 * instance is going away. The mac instance can't go away until the clients 652 * are gone (see bridge_timer). 653 */ 654 static void 655 bmac_disconnect(bridge_mac_t *bmp) 656 { 657 bridge_inst_t *bip; 658 659 bmp->bm_linkstate = LINK_STATE_DOWN; 660 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 661 662 rw_enter(&bmac_rwlock, RW_READER); 663 bip = bmp->bm_inst; 664 bip->bi_mac = NULL; 665 bmp->bm_inst = NULL; 666 rw_exit(&bmac_rwlock); 667 } 668 669 /* This is used by the avl trees to sort forwarding table entries */ 670 static int 671 fwd_compare(const void *addr1, const void *addr2) 672 { 673 const bridge_fwd_t *fwd1 = addr1; 674 const bridge_fwd_t *fwd2 = addr2; 675 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL); 676 677 if (diff != 0) 678 return (diff > 0 ? 1 : -1); 679 680 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) { 681 if (fwd1->bf_vlanid > fwd2->bf_vlanid) 682 return (1); 683 else if (fwd1->bf_vlanid < fwd2->bf_vlanid) 684 return (-1); 685 } 686 return (0); 687 } 688 689 static void 690 inst_free(bridge_inst_t *bip) 691 { 692 ASSERT(bip->bi_mac == NULL); 693 rw_destroy(&bip->bi_rwlock); 694 list_destroy(&bip->bi_links); 695 cv_destroy(&bip->bi_linkwait); 696 avl_destroy(&bip->bi_fwd); 697 if (bip->bi_ksp != NULL) 698 kstat_delete(bip->bi_ksp); 699 kmem_free(bip, sizeof (*bip)); 700 } 701 702 static bridge_inst_t * 703 inst_alloc(const char *bridge) 704 { 705 bridge_inst_t *bip; 706 707 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP); 708 bip->bi_refs = 1; 709 (void) strcpy(bip->bi_name, bridge); 710 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL); 711 list_create(&bip->bi_links, sizeof (bridge_link_t), 712 offsetof(bridge_link_t, bl_node)); 713 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL); 714 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t), 715 offsetof(bridge_fwd_t, bf_node)); 716 return (bip); 717 } 718 719 static bridge_inst_t * 720 bridge_find_name(const char *bridge) 721 { 722 bridge_inst_t *bip; 723 724 mutex_enter(&inst_lock); 725 for (bip = list_head(&inst_list); bip != NULL; 726 bip = list_next(&inst_list, bip)) { 727 if (!(bip->bi_flags & BIF_SHUTDOWN) && 728 strcmp(bridge, bip->bi_name) == 0) { 729 atomic_inc_uint(&bip->bi_refs); 730 break; 731 } 732 } 733 mutex_exit(&inst_lock); 734 735 return (bip); 736 } 737 738 static int 739 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc, 740 cred_t *cred) 741 { 742 bridge_inst_t *bip, *bipnew; 743 bridge_mac_t *bmp = NULL; 744 int err; 745 746 *bipc = NULL; 747 bipnew = inst_alloc(bridge); 748 749 mutex_enter(&inst_lock); 750 lookup_retry: 751 for (bip = list_head(&inst_list); bip != NULL; 752 bip = list_next(&inst_list, bip)) { 753 if (strcmp(bridge, bip->bi_name) == 0) 754 break; 755 } 756 757 /* This should not take long; if it does, we've got a design problem */ 758 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) { 759 cv_wait(&inst_cv, &inst_lock); 760 goto lookup_retry; 761 } 762 763 if (bip == NULL) { 764 bip = bipnew; 765 bipnew = NULL; 766 list_insert_tail(&inst_list, bip); 767 } 768 769 mutex_exit(&inst_lock); 770 if (bipnew != NULL) { 771 inst_free(bipnew); 772 return (EEXIST); 773 } 774 775 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats, 776 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name); 777 778 err = bmac_alloc(bip, &bmp); 779 if ((bip->bi_mac = bmp) == NULL) 780 goto fail_create; 781 782 /* 783 * bm_inst is set, so the timer cannot yank the DLS rug from under us. 784 * No extra locking is needed here. 785 */ 786 if (!(bmp->bm_flags & BMF_DLS)) { 787 err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred)); 788 if (err != 0) 789 goto fail_create; 790 bmp->bm_flags |= BMF_DLS; 791 } 792 793 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh)); 794 *bipc = bip; 795 return (0); 796 797 fail_create: 798 ASSERT(bip->bi_trilldata == NULL); 799 bip->bi_flags |= BIF_SHUTDOWN; 800 bridge_unref(bip); 801 return (err); 802 } 803 804 static void 805 bridge_unref(bridge_inst_t *bip) 806 { 807 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) { 808 ASSERT(bip->bi_flags & BIF_SHUTDOWN); 809 /* free up mac for reuse before leaving global list */ 810 if (bip->bi_mac != NULL) 811 bmac_disconnect(bip->bi_mac); 812 mutex_enter(&inst_lock); 813 list_remove(&inst_list, bip); 814 cv_broadcast(&inst_cv); 815 mutex_exit(&inst_lock); 816 inst_free(bip); 817 } 818 } 819 820 /* 821 * Stream instances are used only for allocating bridges and serving as a 822 * control node. They serve no data-handling function. 823 */ 824 static bridge_stream_t * 825 stream_alloc(void) 826 { 827 bridge_stream_t *bsp; 828 minor_t mn; 829 830 if ((mn = mac_minor_hold(B_FALSE)) == 0) 831 return (NULL); 832 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP); 833 bsp->bs_minor = mn; 834 return (bsp); 835 } 836 837 static void 838 stream_free(bridge_stream_t *bsp) 839 { 840 mac_minor_rele(bsp->bs_minor); 841 kmem_free(bsp, sizeof (*bsp)); 842 } 843 844 /* Reference hold/release functions for STREAMS-related taskq */ 845 static void 846 stream_ref(bridge_stream_t *bsp) 847 { 848 mutex_enter(&stream_ref_lock); 849 bsp->bs_taskq_cnt++; 850 mutex_exit(&stream_ref_lock); 851 } 852 853 static void 854 stream_unref(bridge_stream_t *bsp) 855 { 856 mutex_enter(&stream_ref_lock); 857 if (--bsp->bs_taskq_cnt == 0) 858 cv_broadcast(&stream_ref_cv); 859 mutex_exit(&stream_ref_lock); 860 } 861 862 static void 863 link_free(bridge_link_t *blp) 864 { 865 bridge_inst_t *bip = blp->bl_inst; 866 867 ASSERT(!(blp->bl_flags & BLF_FREED)); 868 blp->bl_flags |= BLF_FREED; 869 if (blp->bl_ksp != NULL) 870 kstat_delete(blp->bl_ksp); 871 if (blp->bl_lfailmp != NULL) 872 freeb(blp->bl_lfailmp); 873 cv_destroy(&blp->bl_trillwait); 874 mutex_destroy(&blp->bl_trilllock); 875 kmem_free(blp, sizeof (*blp)); 876 /* Don't unreference the bridge until the MAC is closed */ 877 bridge_unref(bip); 878 } 879 880 static void 881 link_unref(bridge_link_t *blp) 882 { 883 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) { 884 bridge_inst_t *bip = blp->bl_inst; 885 886 ASSERT(blp->bl_flags & BLF_DELETED); 887 rw_enter(&bip->bi_rwlock, RW_WRITER); 888 if (blp->bl_flags & BLF_LINK_ADDED) 889 list_remove(&bip->bi_links, blp); 890 rw_exit(&bip->bi_rwlock); 891 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links)) 892 cv_broadcast(&bip->bi_linkwait); 893 link_free(blp); 894 } 895 } 896 897 static bridge_fwd_t * 898 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick) 899 { 900 bridge_fwd_t *bfp; 901 902 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)), 903 KM_NOSLEEP); 904 if (bfp != NULL) { 905 bcopy(addr, bfp->bf_dest, ETHERADDRL); 906 bfp->bf_lastheard = ddi_get_lbolt(); 907 bfp->bf_maxlinks = nlinks; 908 bfp->bf_links = (bridge_link_t **)(bfp + 1); 909 bfp->bf_trill_nick = nick; 910 } 911 return (bfp); 912 } 913 914 static bridge_fwd_t * 915 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid) 916 { 917 bridge_fwd_t *bfp, *vbfp; 918 bridge_fwd_t match; 919 920 bcopy(addr, match.bf_dest, ETHERADDRL); 921 match.bf_flags = 0; 922 rw_enter(&bip->bi_rwlock, RW_READER); 923 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 924 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) { 925 match.bf_vlanid = vlanid; 926 match.bf_flags = BFF_VLANLOCAL; 927 vbfp = avl_find(&bip->bi_fwd, &match, NULL); 928 if (vbfp != NULL) 929 bfp = vbfp; 930 } 931 atomic_inc_uint(&bfp->bf_refs); 932 } 933 rw_exit(&bip->bi_rwlock); 934 return (bfp); 935 } 936 937 static void 938 fwd_free(bridge_fwd_t *bfp) 939 { 940 uint_t i; 941 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst; 942 943 KIDECR(bki_count); 944 for (i = 0; i < bfp->bf_nlinks; i++) 945 link_unref(bfp->bf_links[i]); 946 kmem_free(bfp, 947 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *)); 948 } 949 950 static void 951 fwd_unref(bridge_fwd_t *bfp) 952 { 953 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) { 954 ASSERT(!(bfp->bf_flags & BFF_INTREE)); 955 fwd_free(bfp); 956 } 957 } 958 959 static void 960 fwd_delete(bridge_fwd_t *bfp) 961 { 962 bridge_inst_t *bip; 963 bridge_fwd_t *bfpzero; 964 965 if (bfp->bf_flags & BFF_INTREE) { 966 ASSERT(bfp->bf_nlinks > 0); 967 bip = bfp->bf_links[0]->bl_inst; 968 rw_enter(&bip->bi_rwlock, RW_WRITER); 969 /* Another thread could beat us to this */ 970 if (bfp->bf_flags & BFF_INTREE) { 971 avl_remove(&bip->bi_fwd, bfp); 972 bfp->bf_flags &= ~BFF_INTREE; 973 if (bfp->bf_flags & BFF_VLANLOCAL) { 974 bfp->bf_flags &= ~BFF_VLANLOCAL; 975 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL); 976 if (bfpzero != NULL && bfpzero->bf_vcnt > 0) 977 bfpzero->bf_vcnt--; 978 } 979 rw_exit(&bip->bi_rwlock); 980 fwd_unref(bfp); /* no longer in avl tree */ 981 } else { 982 rw_exit(&bip->bi_rwlock); 983 } 984 } 985 } 986 987 static boolean_t 988 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp) 989 { 990 avl_index_t idx; 991 boolean_t retv; 992 993 rw_enter(&bip->bi_rwlock, RW_WRITER); 994 if (!(bip->bi_flags & BIF_SHUTDOWN) && 995 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax && 996 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) { 997 avl_insert(&bip->bi_fwd, bfp, idx); 998 bfp->bf_flags |= BFF_INTREE; 999 atomic_inc_uint(&bfp->bf_refs); /* avl entry */ 1000 retv = B_TRUE; 1001 } else { 1002 retv = B_FALSE; 1003 } 1004 rw_exit(&bip->bi_rwlock); 1005 return (retv); 1006 } 1007 1008 static void 1009 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr, 1010 const uint8_t *newaddr) 1011 { 1012 bridge_inst_t *bip = blp->bl_inst; 1013 bridge_fwd_t *bfp, *bfnew; 1014 bridge_fwd_t match; 1015 avl_index_t idx; 1016 boolean_t drop_ref = B_FALSE; 1017 1018 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0) 1019 return; 1020 1021 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0) 1022 goto no_old_addr; 1023 1024 /* 1025 * Find the previous entry, and remove our link from it. 1026 */ 1027 bcopy(oldaddr, match.bf_dest, ETHERADDRL); 1028 rw_enter(&bip->bi_rwlock, RW_WRITER); 1029 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 1030 int i; 1031 1032 /* 1033 * See if we're in the list, and remove if so. 1034 */ 1035 for (i = 0; i < bfp->bf_nlinks; i++) { 1036 if (bfp->bf_links[i] == blp) { 1037 /* 1038 * We assume writes are atomic, so no special 1039 * MT handling is needed. The list length is 1040 * decremented first, and then we remove 1041 * entries. 1042 */ 1043 bfp->bf_nlinks--; 1044 for (; i < bfp->bf_nlinks; i++) 1045 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1046 drop_ref = B_TRUE; 1047 break; 1048 } 1049 } 1050 /* If no more links, then remove and free up */ 1051 if (bfp->bf_nlinks == 0) { 1052 avl_remove(&bip->bi_fwd, bfp); 1053 bfp->bf_flags &= ~BFF_INTREE; 1054 } else { 1055 bfp = NULL; 1056 } 1057 } 1058 rw_exit(&bip->bi_rwlock); 1059 if (bfp != NULL) 1060 fwd_unref(bfp); /* no longer in avl tree */ 1061 1062 /* 1063 * Now get the new link address and add this link to the list. The 1064 * list should be of length 1 unless the user has configured multiple 1065 * NICs with the same address. (That's an incorrect configuration, but 1066 * we support it anyway.) 1067 */ 1068 no_old_addr: 1069 bfp = NULL; 1070 if ((bip->bi_flags & BIF_SHUTDOWN) || 1071 bcmp(newaddr, zero_addr, ETHERADDRL) == 0) 1072 goto no_new_addr; 1073 1074 bcopy(newaddr, match.bf_dest, ETHERADDRL); 1075 rw_enter(&bip->bi_rwlock, RW_WRITER); 1076 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) { 1077 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE); 1078 if (bfnew != NULL) 1079 KIINCR(bki_count); 1080 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) { 1081 /* special case: link fits in existing entry */ 1082 bfnew = bfp; 1083 } else { 1084 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1, 1085 RBRIDGE_NICKNAME_NONE); 1086 if (bfnew != NULL) { 1087 KIINCR(bki_count); 1088 avl_remove(&bip->bi_fwd, bfp); 1089 bfp->bf_flags &= ~BFF_INTREE; 1090 bfnew->bf_nlinks = bfp->bf_nlinks; 1091 bcopy(bfp->bf_links, bfnew->bf_links, 1092 bfp->bf_nlinks * sizeof (bfp)); 1093 /* reset the idx value due to removal above */ 1094 (void) avl_find(&bip->bi_fwd, &match, &idx); 1095 } 1096 } 1097 1098 if (bfnew != NULL) { 1099 bfnew->bf_links[bfnew->bf_nlinks++] = blp; 1100 if (drop_ref) 1101 drop_ref = B_FALSE; 1102 else 1103 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1104 1105 if (bfnew != bfp) { 1106 /* local addresses are not subject to table limits */ 1107 avl_insert(&bip->bi_fwd, bfnew, idx); 1108 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR); 1109 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */ 1110 } 1111 } 1112 rw_exit(&bip->bi_rwlock); 1113 1114 no_new_addr: 1115 /* 1116 * If we found an existing entry and we replaced it with a new one, 1117 * then drop the table reference from the old one. We removed it from 1118 * the AVL tree above. 1119 */ 1120 if (bfnew != NULL && bfp != NULL && bfnew != bfp) 1121 fwd_unref(bfp); 1122 1123 /* Account for removed entry. */ 1124 if (drop_ref) 1125 link_unref(blp); 1126 } 1127 1128 static void 1129 bridge_new_unicst(bridge_link_t *blp) 1130 { 1131 uint8_t new_mac[ETHERADDRL]; 1132 1133 mac_unicast_primary_get(blp->bl_mh, new_mac); 1134 fwd_update_local(blp, blp->bl_local_mac, new_mac); 1135 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL); 1136 } 1137 1138 /* 1139 * We must shut down a link prior to freeing it, and doing that requires 1140 * blocking to wait for running MAC threads while holding a reference. This is 1141 * run from a taskq to accomplish proper link shutdown followed by reference 1142 * drop. 1143 */ 1144 static void 1145 link_shutdown(void *arg) 1146 { 1147 bridge_link_t *blp = arg; 1148 mac_handle_t mh = blp->bl_mh; 1149 bridge_inst_t *bip; 1150 bridge_fwd_t *bfp, *bfnext; 1151 avl_tree_t fwd_scavenge; 1152 int i; 1153 1154 /* 1155 * This link is being destroyed. Notify TRILL now that it's no longer 1156 * possible to send packets. Data packets may still arrive until TRILL 1157 * calls bridge_trill_lnunref. 1158 */ 1159 if (blp->bl_trilldata != NULL) 1160 trill_lndstr_fn(blp->bl_trilldata, blp); 1161 1162 if (blp->bl_flags & BLF_PROM_ADDED) 1163 (void) mac_promisc_remove(blp->bl_mphp); 1164 1165 if (blp->bl_flags & BLF_SET_BRIDGE) 1166 mac_bridge_clear(mh, (mac_handle_t)blp); 1167 1168 if (blp->bl_flags & BLF_MARGIN_ADDED) { 1169 (void) mac_notify_remove(blp->bl_mnh, B_TRUE); 1170 (void) mac_margin_remove(mh, blp->bl_margin); 1171 } 1172 1173 /* Tell the clients the real link state when we leave */ 1174 mac_link_redo(blp->bl_mh, 1175 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE)); 1176 1177 /* Destroy all of the forwarding entries related to this link */ 1178 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1179 offsetof(bridge_fwd_t, bf_node)); 1180 bip = blp->bl_inst; 1181 rw_enter(&bip->bi_rwlock, RW_WRITER); 1182 bfnext = avl_first(&bip->bi_fwd); 1183 while ((bfp = bfnext) != NULL) { 1184 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1185 for (i = 0; i < bfp->bf_nlinks; i++) { 1186 if (bfp->bf_links[i] == blp) 1187 break; 1188 } 1189 if (i >= bfp->bf_nlinks) 1190 continue; 1191 if (bfp->bf_nlinks > 1) { 1192 /* note that this can't be the last reference */ 1193 link_unref(blp); 1194 bfp->bf_nlinks--; 1195 for (; i < bfp->bf_nlinks; i++) 1196 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1197 } else { 1198 ASSERT(bfp->bf_flags & BFF_INTREE); 1199 avl_remove(&bip->bi_fwd, bfp); 1200 bfp->bf_flags &= ~BFF_INTREE; 1201 avl_add(&fwd_scavenge, bfp); 1202 } 1203 } 1204 rw_exit(&bip->bi_rwlock); 1205 bfnext = avl_first(&fwd_scavenge); 1206 while ((bfp = bfnext) != NULL) { 1207 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1208 avl_remove(&fwd_scavenge, bfp); 1209 fwd_unref(bfp); 1210 } 1211 avl_destroy(&fwd_scavenge); 1212 1213 if (blp->bl_flags & BLF_CLIENT_OPEN) 1214 mac_client_close(blp->bl_mch, 0); 1215 1216 mac_close(mh); 1217 1218 /* 1219 * We are now completely removed from the active list, so drop the 1220 * reference (see bridge_add_link). 1221 */ 1222 link_unref(blp); 1223 } 1224 1225 static void 1226 shutdown_inst(bridge_inst_t *bip) 1227 { 1228 bridge_link_t *blp, *blnext; 1229 bridge_fwd_t *bfp; 1230 1231 mutex_enter(&inst_lock); 1232 if (bip->bi_flags & BIF_SHUTDOWN) { 1233 mutex_exit(&inst_lock); 1234 return; 1235 } 1236 1237 /* 1238 * Once on the inst_list, the bridge instance must not leave that list 1239 * without having the shutdown flag set first. When the shutdown flag 1240 * is set, we own the list reference, so we must drop it before 1241 * returning. 1242 */ 1243 bip->bi_flags |= BIF_SHUTDOWN; 1244 mutex_exit(&inst_lock); 1245 1246 bip->bi_control = NULL; 1247 1248 rw_enter(&bip->bi_rwlock, RW_READER); 1249 blnext = list_head(&bip->bi_links); 1250 while ((blp = blnext) != NULL) { 1251 blnext = list_next(&bip->bi_links, blp); 1252 if (!(blp->bl_flags & BLF_DELETED)) { 1253 blp->bl_flags |= BLF_DELETED; 1254 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 1255 blp, DDI_SLEEP); 1256 } 1257 } 1258 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) { 1259 atomic_inc_uint(&bfp->bf_refs); 1260 rw_exit(&bip->bi_rwlock); 1261 fwd_delete(bfp); 1262 fwd_unref(bfp); 1263 rw_enter(&bip->bi_rwlock, RW_READER); 1264 } 1265 rw_exit(&bip->bi_rwlock); 1266 1267 /* 1268 * This bridge is being destroyed. Notify TRILL once all of the 1269 * links are all gone. 1270 */ 1271 mutex_enter(&inst_lock); 1272 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links)) 1273 cv_wait(&bip->bi_linkwait, &inst_lock); 1274 mutex_exit(&inst_lock); 1275 if (bip->bi_trilldata != NULL) 1276 trill_brdstr_fn(bip->bi_trilldata, bip); 1277 1278 bridge_unref(bip); 1279 } 1280 1281 /* 1282 * This is called once by the TRILL module when it starts up. It just sets the 1283 * global TRILL callback function pointers -- data transmit/receive and bridge 1284 * and link destroy notification. There's only one TRILL module, so only one 1285 * registration is needed. 1286 * 1287 * TRILL should call this function with NULL pointers before unloading. It 1288 * must not do so before dropping all references to bridges and links. We 1289 * assert that this is true on debug builds. 1290 */ 1291 void 1292 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn, 1293 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn) 1294 { 1295 #ifdef DEBUG 1296 if (recv_fn == NULL && trill_recv_fn != NULL) { 1297 bridge_inst_t *bip; 1298 bridge_link_t *blp; 1299 1300 mutex_enter(&inst_lock); 1301 for (bip = list_head(&inst_list); bip != NULL; 1302 bip = list_next(&inst_list, bip)) { 1303 ASSERT(bip->bi_trilldata == NULL); 1304 rw_enter(&bip->bi_rwlock, RW_READER); 1305 for (blp = list_head(&bip->bi_links); blp != NULL; 1306 blp = list_next(&bip->bi_links, blp)) { 1307 ASSERT(blp->bl_trilldata == NULL); 1308 } 1309 rw_exit(&bip->bi_rwlock); 1310 } 1311 mutex_exit(&inst_lock); 1312 } 1313 #endif 1314 trill_recv_fn = recv_fn; 1315 trill_encap_fn = encap_fn; 1316 trill_brdstr_fn = brdstr_fn; 1317 trill_lndstr_fn = lndstr_fn; 1318 } 1319 1320 /* 1321 * This registers the TRILL instance pointer with a bridge. Before this 1322 * pointer is set, the forwarding, TRILL receive, and bridge destructor 1323 * functions won't be called. 1324 * 1325 * TRILL holds a reference on a bridge with this call. It must free the 1326 * reference by calling the unregister function below. 1327 */ 1328 bridge_inst_t * 1329 bridge_trill_brref(const char *bname, void *ptr) 1330 { 1331 char bridge[MAXLINKNAMELEN]; 1332 bridge_inst_t *bip; 1333 1334 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname); 1335 bip = bridge_find_name(bridge); 1336 if (bip != NULL) { 1337 ASSERT(bip->bi_trilldata == NULL && ptr != NULL); 1338 bip->bi_trilldata = ptr; 1339 } 1340 return (bip); 1341 } 1342 1343 void 1344 bridge_trill_brunref(bridge_inst_t *bip) 1345 { 1346 ASSERT(bip->bi_trilldata != NULL); 1347 bip->bi_trilldata = NULL; 1348 bridge_unref(bip); 1349 } 1350 1351 /* 1352 * TRILL calls this function when referencing a particular link on a bridge. 1353 * 1354 * It holds a reference on the link, so TRILL must clear out the reference when 1355 * it's done with the link (on unbinding). 1356 */ 1357 bridge_link_t * 1358 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr) 1359 { 1360 bridge_link_t *blp; 1361 1362 ASSERT(ptr != NULL); 1363 rw_enter(&bip->bi_rwlock, RW_READER); 1364 for (blp = list_head(&bip->bi_links); blp != NULL; 1365 blp = list_next(&bip->bi_links, blp)) { 1366 if (!(blp->bl_flags & BLF_DELETED) && 1367 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) { 1368 blp->bl_trilldata = ptr; 1369 blp->bl_flags &= ~BLF_TRILLACTIVE; 1370 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs)); 1371 atomic_inc_uint(&blp->bl_refs); 1372 break; 1373 } 1374 } 1375 rw_exit(&bip->bi_rwlock); 1376 return (blp); 1377 } 1378 1379 void 1380 bridge_trill_lnunref(bridge_link_t *blp) 1381 { 1382 mutex_enter(&blp->bl_trilllock); 1383 ASSERT(blp->bl_trilldata != NULL); 1384 blp->bl_trilldata = NULL; 1385 blp->bl_flags &= ~BLF_TRILLACTIVE; 1386 while (blp->bl_trillthreads > 0) 1387 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock); 1388 mutex_exit(&blp->bl_trilllock); 1389 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 1390 link_unref(blp); 1391 } 1392 1393 /* 1394 * This periodic timer performs three functions: 1395 * 1. It scans the list of learned forwarding entries, and removes ones that 1396 * haven't been heard from in a while. The time limit is backed down if 1397 * we're above the configured table limit. 1398 * 2. It walks the links and decays away the bl_learns counter. 1399 * 3. It scans the observability node entries looking for ones that can be 1400 * freed up. 1401 */ 1402 /* ARGSUSED */ 1403 static void 1404 bridge_timer(void *arg) 1405 { 1406 bridge_inst_t *bip; 1407 bridge_fwd_t *bfp, *bfnext; 1408 bridge_mac_t *bmp, *bmnext; 1409 bridge_link_t *blp; 1410 int err; 1411 datalink_id_t tmpid; 1412 avl_tree_t fwd_scavenge; 1413 clock_t age_limit; 1414 uint32_t ldecay; 1415 1416 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1417 offsetof(bridge_fwd_t, bf_node)); 1418 mutex_enter(&inst_lock); 1419 for (bip = list_head(&inst_list); bip != NULL; 1420 bip = list_next(&inst_list, bip)) { 1421 if (bip->bi_flags & BIF_SHUTDOWN) 1422 continue; 1423 rw_enter(&bip->bi_rwlock, RW_WRITER); 1424 /* compute scaled maximum age based on table limit */ 1425 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax) 1426 bip->bi_tshift++; 1427 else 1428 bip->bi_tshift = 0; 1429 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) { 1430 if (bip->bi_tshift != 0) 1431 bip->bi_tshift--; 1432 age_limit = 1; 1433 } 1434 bfnext = avl_first(&bip->bi_fwd); 1435 while ((bfp = bfnext) != NULL) { 1436 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1437 if (!(bfp->bf_flags & BFF_LOCALADDR) && 1438 (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) { 1439 ASSERT(bfp->bf_flags & BFF_INTREE); 1440 avl_remove(&bip->bi_fwd, bfp); 1441 bfp->bf_flags &= ~BFF_INTREE; 1442 avl_add(&fwd_scavenge, bfp); 1443 } 1444 } 1445 for (blp = list_head(&bip->bi_links); blp != NULL; 1446 blp = list_next(&bip->bi_links, blp)) { 1447 ldecay = mac_get_ldecay(blp->bl_mh); 1448 if (ldecay >= blp->bl_learns) 1449 blp->bl_learns = 0; 1450 else 1451 atomic_add_int(&blp->bl_learns, -(int)ldecay); 1452 } 1453 rw_exit(&bip->bi_rwlock); 1454 bfnext = avl_first(&fwd_scavenge); 1455 while ((bfp = bfnext) != NULL) { 1456 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1457 avl_remove(&fwd_scavenge, bfp); 1458 KIINCR(bki_expire); 1459 fwd_unref(bfp); /* drop tree reference */ 1460 } 1461 } 1462 mutex_exit(&inst_lock); 1463 avl_destroy(&fwd_scavenge); 1464 1465 /* 1466 * Scan the bridge_mac_t entries and try to free up the ones that are 1467 * no longer active. This must be done by polling, as neither DLS nor 1468 * MAC provides a driver any sort of positive control over clients. 1469 */ 1470 rw_enter(&bmac_rwlock, RW_WRITER); 1471 bmnext = list_head(&bmac_list); 1472 while ((bmp = bmnext) != NULL) { 1473 bmnext = list_next(&bmac_list, bmp); 1474 1475 /* ignore active bridges */ 1476 if (bmp->bm_inst != NULL) 1477 continue; 1478 1479 if (bmp->bm_flags & BMF_DLS) { 1480 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE); 1481 ASSERT(err == 0 || err == EBUSY); 1482 if (err == 0) 1483 bmp->bm_flags &= ~BMF_DLS; 1484 } 1485 1486 if (!(bmp->bm_flags & BMF_DLS)) { 1487 err = mac_unregister(bmp->bm_mh); 1488 ASSERT(err == 0 || err == EBUSY); 1489 if (err == 0) { 1490 list_remove(&bmac_list, bmp); 1491 kmem_free(bmp, sizeof (*bmp)); 1492 } 1493 } 1494 } 1495 if (list_is_empty(&bmac_list)) { 1496 bridge_timerid = 0; 1497 } else { 1498 bridge_timerid = timeout(bridge_timer, NULL, 1499 bridge_scan_interval); 1500 } 1501 rw_exit(&bmac_rwlock); 1502 } 1503 1504 static int 1505 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1506 { 1507 bridge_stream_t *bsp; 1508 1509 if (rq->q_ptr != NULL) 1510 return (0); 1511 1512 if (sflag & MODOPEN) 1513 return (EINVAL); 1514 1515 /* 1516 * Check the minor node number being opened. This tells us which 1517 * bridge instance the user wants. 1518 */ 1519 if (getminor(*devp) != 0) { 1520 /* 1521 * This is a regular DLPI stream for snoop or the like. 1522 * Redirect it through DLD. 1523 */ 1524 rq->q_qinfo = &bridge_dld_rinit; 1525 OTHERQ(rq)->q_qinfo = &bridge_dld_winit; 1526 return (dld_open(rq, devp, oflag, sflag, credp)); 1527 } else { 1528 /* 1529 * Allocate the bridge control stream structure. 1530 */ 1531 if ((bsp = stream_alloc()) == NULL) 1532 return (ENOSR); 1533 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp; 1534 bsp->bs_wq = WR(rq); 1535 *devp = makedevice(getmajor(*devp), bsp->bs_minor); 1536 qprocson(rq); 1537 return (0); 1538 } 1539 } 1540 1541 /* 1542 * This is used only for bridge control streams. DLPI goes through dld 1543 * instead. 1544 */ 1545 /* ARGSUSED */ 1546 static int 1547 bridge_close(queue_t *rq, int flags __unused, cred_t *credp __unused) 1548 { 1549 bridge_stream_t *bsp = rq->q_ptr; 1550 bridge_inst_t *bip; 1551 1552 /* 1553 * Wait for any stray taskq (add/delete link) entries related to this 1554 * stream to leave the system. 1555 */ 1556 mutex_enter(&stream_ref_lock); 1557 while (bsp->bs_taskq_cnt != 0) 1558 cv_wait(&stream_ref_cv, &stream_ref_lock); 1559 mutex_exit(&stream_ref_lock); 1560 1561 qprocsoff(rq); 1562 if ((bip = bsp->bs_inst) != NULL) 1563 shutdown_inst(bip); 1564 rq->q_ptr = WR(rq)->q_ptr = NULL; 1565 stream_free(bsp); 1566 if (bip != NULL) 1567 bridge_unref(bip); 1568 1569 return (0); 1570 } 1571 1572 static void 1573 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick, 1574 uint16_t vlanid) 1575 { 1576 bridge_inst_t *bip = blp->bl_inst; 1577 bridge_fwd_t *bfp, *bfpnew; 1578 int i; 1579 boolean_t replaced = B_FALSE; 1580 1581 /* Ignore multi-destination address used as source; it's nonsense. */ 1582 if (*saddr & 1) 1583 return; 1584 1585 /* 1586 * If the source is known, then check whether it belongs on this link. 1587 * If not, and this isn't a fixed local address, then we've detected a 1588 * move. If it's not known, learn it. 1589 */ 1590 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) { 1591 /* 1592 * If the packet has a fixed local source address, then there's 1593 * nothing we can learn. We must quit. If this was a received 1594 * packet, then the sender has stolen our address, but there's 1595 * nothing we can do. If it's a transmitted packet, then 1596 * that's the normal case. 1597 */ 1598 if (bfp->bf_flags & BFF_LOCALADDR) { 1599 fwd_unref(bfp); 1600 return; 1601 } 1602 1603 /* 1604 * Check if the link (and TRILL sender, if any) being used is 1605 * among the ones registered for this address. If so, then 1606 * this is information that we already know. 1607 */ 1608 if (bfp->bf_trill_nick == ingress_nick) { 1609 for (i = 0; i < bfp->bf_nlinks; i++) { 1610 if (bfp->bf_links[i] == blp) { 1611 bfp->bf_lastheard = ddi_get_lbolt(); 1612 fwd_unref(bfp); 1613 return; 1614 } 1615 } 1616 } 1617 } 1618 1619 /* 1620 * Note that we intentionally "unlearn" things that appear to be under 1621 * attack on this link. The forwarding cache is a negative thing for 1622 * security -- it disables reachability as a performance optimization 1623 * -- so leaving out entries optimizes for success and defends against 1624 * the attack. Thus, the bare increment without a check in the delete 1625 * code above is right. (And it's ok if we skid over the limit a 1626 * little, so there's no syncronization needed on the test.) 1627 */ 1628 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) { 1629 if (bfp != NULL) { 1630 if (bfp->bf_vcnt == 0) 1631 fwd_delete(bfp); 1632 fwd_unref(bfp); 1633 } 1634 return; 1635 } 1636 1637 atomic_inc_uint(&blp->bl_learns); 1638 1639 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) { 1640 if (bfp != NULL) 1641 fwd_unref(bfp); 1642 return; 1643 } 1644 KIINCR(bki_count); 1645 1646 if (bfp != NULL) { 1647 /* 1648 * If this is a new destination for the same VLAN, then delete 1649 * so that we can update. If it's a different VLAN, then we're 1650 * not going to delete the original. Split off instead into an 1651 * IVL entry. 1652 */ 1653 if (bfp->bf_vlanid == vlanid) { 1654 /* save the count of IVL duplicates */ 1655 bfpnew->bf_vcnt = bfp->bf_vcnt; 1656 1657 /* entry deletes count as learning events */ 1658 atomic_inc_uint(&blp->bl_learns); 1659 1660 /* destroy and create anew; node moved */ 1661 fwd_delete(bfp); 1662 replaced = B_TRUE; 1663 KIINCR(bki_moved); 1664 } else { 1665 bfp->bf_vcnt++; 1666 bfpnew->bf_flags |= BFF_VLANLOCAL; 1667 } 1668 fwd_unref(bfp); 1669 } 1670 bfpnew->bf_links[0] = blp; 1671 bfpnew->bf_nlinks = 1; 1672 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1673 if (!fwd_insert(bip, bfpnew)) 1674 fwd_free(bfpnew); 1675 else if (!replaced) 1676 KIINCR(bki_source); 1677 } 1678 1679 /* 1680 * Process the VLAN headers for output on a given link. There are several 1681 * cases (noting that we don't map VLANs): 1682 * 1. The input packet is good as it is; either 1683 * a. It has no tag, and output has same PVID 1684 * b. It has a non-zero priority-only tag for PVID, and b_band is same 1685 * c. It has a tag with VLAN different from PVID, and b_band is same 1686 * 2. The tag must change: non-zero b_band is different from tag priority 1687 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero) 1688 * 4. The packet has no tag and needs one: 1689 * a. VLAN ID same as PVID, but b_band is non-zero 1690 * b. VLAN ID different from PVID 1691 * We exclude case 1 first, then modify the packet. Note that output packets 1692 * get a priority set by the mblk, not by the header, because QoS in bridging 1693 * requires priority recalculation at each node. 1694 * 1695 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present. 1696 */ 1697 static mblk_t * 1698 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid) 1699 { 1700 boolean_t source_has_tag = (tci != 0xFFFF); 1701 mblk_t *mpcopy; 1702 size_t mlen, minlen; 1703 struct ether_vlan_header *evh; 1704 int pri; 1705 1706 /* This helps centralize error handling in the caller. */ 1707 if (mp == NULL) 1708 return (mp); 1709 1710 /* 1711 * A forwarded packet cannot have hardware offloads enabled 1712 * because we don't know if the destination can handle them. 1713 * By this point, any hardware offloads present should have 1714 * been emulated. 1715 */ 1716 DB_CKSUMFLAGS(mp) = 0; 1717 1718 /* Get the no-modification cases out of the way first */ 1719 if (!source_has_tag && vlanid == pvid) /* 1a */ 1720 return (mp); 1721 1722 pri = VLAN_PRI(tci); 1723 if (source_has_tag && mp->b_band == pri) { 1724 if (vlanid != pvid) /* 1c */ 1725 return (mp); 1726 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */ 1727 return (mp); 1728 } 1729 1730 /* 1731 * We now know that we must modify the packet. Prepare for that. Note 1732 * that if a tag is present, the caller has already done a pullup for 1733 * the VLAN header, so we're good to go. 1734 */ 1735 if (MBLKL(mp) < sizeof (struct ether_header)) { 1736 mpcopy = msgpullup(mp, sizeof (struct ether_header)); 1737 if (mpcopy == NULL) { 1738 freemsg(mp); 1739 return (NULL); 1740 } 1741 mp = mpcopy; 1742 } 1743 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) || 1744 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) { 1745 minlen = mlen = MBLKL(mp); 1746 if (!source_has_tag) 1747 minlen += VLAN_INCR; 1748 ASSERT(minlen >= sizeof (struct ether_vlan_header)); 1749 /* 1750 * We're willing to copy some data to avoid fragmentation, but 1751 * not a lot. 1752 */ 1753 if (minlen > 256) 1754 minlen = sizeof (struct ether_vlan_header); 1755 mpcopy = allocb(minlen, BPRI_MED); 1756 if (mpcopy == NULL) { 1757 freemsg(mp); 1758 return (NULL); 1759 } 1760 if (mlen <= minlen) { 1761 /* We toss the first mblk when we can. */ 1762 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen); 1763 mpcopy->b_wptr += mlen; 1764 mpcopy->b_cont = mp->b_cont; 1765 freeb(mp); 1766 } else { 1767 /* If not, then just copy what we need */ 1768 if (!source_has_tag) 1769 minlen = sizeof (struct ether_header); 1770 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen); 1771 mpcopy->b_wptr += minlen; 1772 mpcopy->b_cont = mp; 1773 mp->b_rptr += minlen; 1774 } 1775 mp = mpcopy; 1776 } 1777 1778 /* LINTED: pointer alignment */ 1779 evh = (struct ether_vlan_header *)mp->b_rptr; 1780 if (source_has_tag) { 1781 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */ 1782 evh->ether_tpid = evh->ether_type; 1783 mlen = MBLKL(mp); 1784 if (mlen > sizeof (struct ether_vlan_header)) 1785 ovbcopy(mp->b_rptr + 1786 sizeof (struct ether_vlan_header), 1787 mp->b_rptr + sizeof (struct ether_header), 1788 mlen - sizeof (struct ether_vlan_header)); 1789 mp->b_wptr -= VLAN_INCR; 1790 } else { /* 2 */ 1791 if (vlanid == pvid) 1792 vlanid = VLAN_ID_NONE; 1793 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1794 evh->ether_tci = htons(tci); 1795 } 1796 } else { 1797 /* case 4: no header present, but one is needed */ 1798 mlen = MBLKL(mp); 1799 if (mlen > sizeof (struct ether_header)) 1800 ovbcopy(mp->b_rptr + sizeof (struct ether_header), 1801 mp->b_rptr + sizeof (struct ether_vlan_header), 1802 mlen - sizeof (struct ether_header)); 1803 mp->b_wptr += VLAN_INCR; 1804 ASSERT(mp->b_wptr <= DB_LIM(mp)); 1805 if (vlanid == pvid) 1806 vlanid = VLAN_ID_NONE; 1807 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1808 evh->ether_type = evh->ether_tpid; 1809 evh->ether_tpid = htons(ETHERTYPE_VLAN); 1810 evh->ether_tci = htons(tci); 1811 } 1812 return (mp); 1813 } 1814 1815 /* Record VLAN information and strip header if requested . */ 1816 static void 1817 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr) 1818 { 1819 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 1820 struct ether_vlan_header *evhp; 1821 uint16_t ether_type; 1822 1823 /* LINTED: alignment */ 1824 evhp = (struct ether_vlan_header *)mp->b_rptr; 1825 hdr_info->mhi_istagged = B_TRUE; 1826 hdr_info->mhi_tci = ntohs(evhp->ether_tci); 1827 if (striphdr) { 1828 /* 1829 * For VLAN tagged frames update the ether_type 1830 * in hdr_info before stripping the header. 1831 */ 1832 ether_type = ntohs(evhp->ether_type); 1833 hdr_info->mhi_origsap = ether_type; 1834 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ? 1835 ether_type : DLS_SAP_LLC; 1836 mp->b_rptr = (uchar_t *)(evhp + 1); 1837 } 1838 } else { 1839 hdr_info->mhi_istagged = B_FALSE; 1840 hdr_info->mhi_tci = VLAN_ID_NONE; 1841 if (striphdr) 1842 mp->b_rptr += sizeof (struct ether_header); 1843 } 1844 } 1845 1846 /* 1847 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID. 1848 */ 1849 static boolean_t 1850 bridge_can_send(bridge_link_t *blp, uint16_t vlanid) 1851 { 1852 ASSERT(vlanid != VLAN_ID_NONE); 1853 if (blp->bl_flags & BLF_DELETED) 1854 return (B_FALSE); 1855 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING) 1856 return (B_FALSE); 1857 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid)); 1858 } 1859 1860 /* 1861 * This function scans the bridge forwarding tables in order to forward a given 1862 * packet. If the packet either doesn't need forwarding (the current link is 1863 * correct) or the current link needs a copy as well, then the packet is 1864 * returned to the caller. 1865 * 1866 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a 1867 * TRILL tunnel. If the destination points there, then drop instead. 1868 */ 1869 static mblk_t * 1870 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 1871 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit) 1872 { 1873 mblk_t *mpsend, *mpcopy; 1874 bridge_inst_t *bip = blp->bl_inst; 1875 bridge_link_t *blpsend, *blpnext; 1876 bridge_fwd_t *bfp; 1877 uint_t i; 1878 boolean_t selfseen = B_FALSE; 1879 void *tdp; 1880 const uint8_t *daddr = hdr_info->mhi_daddr; 1881 1882 /* 1883 * Check for the IEEE "reserved" multicast addresses. Messages sent to 1884 * these addresses are used for link-local control (STP and pause), and 1885 * are never forwarded or redirected. 1886 */ 1887 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 && 1888 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) { 1889 if (from_trill) { 1890 freemsg(mp); 1891 mp = NULL; 1892 } 1893 return (mp); 1894 } 1895 1896 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) { 1897 1898 /* 1899 * If trill indicates a destination for this node, then it's 1900 * clearly not intended for local delivery. We must tell TRILL 1901 * to encapsulate, as long as we didn't just decapsulate it. 1902 */ 1903 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) { 1904 /* 1905 * Error case: can't reencapsulate if the protocols are 1906 * working correctly. 1907 */ 1908 if (from_trill) { 1909 freemsg(mp); 1910 return (NULL); 1911 } 1912 mutex_enter(&blp->bl_trilllock); 1913 if ((tdp = blp->bl_trilldata) != NULL) { 1914 blp->bl_trillthreads++; 1915 mutex_exit(&blp->bl_trilllock); 1916 update_header(mp, hdr_info, B_FALSE); 1917 1918 /* 1919 * All trill data frames have 1920 * Inner.VLAN. 1921 */ 1922 mp = reform_vlan_header(mp, vlanid, tci, 0); 1923 1924 if (mp == NULL) { 1925 KIINCR(bki_drops); 1926 goto done; 1927 } 1928 1929 trill_encap_fn(tdp, blp, hdr_info, mp, 1930 bfp->bf_trill_nick); 1931 1932 done: 1933 mutex_enter(&blp->bl_trilllock); 1934 if (--blp->bl_trillthreads == 0 && 1935 blp->bl_trilldata == NULL) 1936 cv_broadcast(&blp->bl_trillwait); 1937 } 1938 mutex_exit(&blp->bl_trilllock); 1939 1940 /* if TRILL has been disabled, then kill this stray */ 1941 if (tdp == NULL) { 1942 freemsg(mp); 1943 fwd_delete(bfp); 1944 } 1945 fwd_unref(bfp); 1946 return (NULL); 1947 } 1948 1949 /* find first link we can send on */ 1950 for (i = 0; i < bfp->bf_nlinks; i++) { 1951 blpsend = bfp->bf_links[i]; 1952 if (blpsend == blp) 1953 selfseen = B_TRUE; 1954 else if (bridge_can_send(blpsend, vlanid)) 1955 break; 1956 } 1957 1958 while (i < bfp->bf_nlinks) { 1959 blpsend = bfp->bf_links[i]; 1960 for (i++; i < bfp->bf_nlinks; i++) { 1961 blpnext = bfp->bf_links[i]; 1962 if (blpnext == blp) 1963 selfseen = B_TRUE; 1964 else if (bridge_can_send(blpnext, vlanid)) 1965 break; 1966 } 1967 if (i == bfp->bf_nlinks && !selfseen) { 1968 mpsend = mp; 1969 mp = NULL; 1970 } else { 1971 mpsend = copymsg(mp); 1972 } 1973 1974 mpsend = reform_vlan_header(mpsend, vlanid, tci, 1975 blpsend->bl_pvid); 1976 1977 if (mpsend == NULL) { 1978 KIINCR(bki_drops); 1979 continue; 1980 } 1981 1982 KIINCR(bki_forwards); 1983 1984 /* 1985 * No need to bump up the link reference count, as 1986 * the forwarding entry itself holds a reference to 1987 * the link. 1988 */ 1989 if (bfp->bf_flags & BFF_LOCALADDR) { 1990 mac_rx_common(blpsend->bl_mh, NULL, mpsend); 1991 } else { 1992 KLPINCR(blpsend, bkl_xmit); 1993 mpsend = mac_ring_tx(blpsend->bl_mh, NULL, 1994 mpsend); 1995 freemsg(mpsend); 1996 } 1997 } 1998 1999 /* 2000 * Handle a special case: if we're transmitting to the original 2001 * link, then check whether the localaddr flag is set. If it 2002 * is, then receive instead. This doesn't happen with ordinary 2003 * bridging, but does happen often with TRILL decapsulation. 2004 */ 2005 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) { 2006 mac_rx_common(blp->bl_mh, NULL, mp); 2007 mp = NULL; 2008 } 2009 fwd_unref(bfp); 2010 } else { 2011 /* 2012 * TRILL has two cases to handle. If the packet is off the 2013 * wire (not from TRILL), then we need to send up into the 2014 * TRILL module to have the distribution tree computed. If the 2015 * packet is from TRILL (decapsulated), then we're part of the 2016 * distribution tree, and we need to copy the packet on member 2017 * interfaces. 2018 * 2019 * Thus, the from TRILL case is identical to the STP case. 2020 */ 2021 if (!from_trill && blp->bl_trilldata != NULL) { 2022 mutex_enter(&blp->bl_trilllock); 2023 if ((tdp = blp->bl_trilldata) != NULL) { 2024 blp->bl_trillthreads++; 2025 mutex_exit(&blp->bl_trilllock); 2026 if ((mpsend = copymsg(mp)) != NULL) { 2027 update_header(mpsend, 2028 hdr_info, B_FALSE); 2029 /* 2030 * all trill data frames have 2031 * Inner.VLAN 2032 */ 2033 mpsend = reform_vlan_header(mpsend, 2034 vlanid, tci, 0); 2035 if (mpsend == NULL) { 2036 KIINCR(bki_drops); 2037 } else { 2038 trill_encap_fn(tdp, blp, 2039 hdr_info, mpsend, 2040 RBRIDGE_NICKNAME_NONE); 2041 } 2042 } 2043 mutex_enter(&blp->bl_trilllock); 2044 if (--blp->bl_trillthreads == 0 && 2045 blp->bl_trilldata == NULL) 2046 cv_broadcast(&blp->bl_trillwait); 2047 } 2048 mutex_exit(&blp->bl_trilllock); 2049 } 2050 2051 /* 2052 * This is an unknown destination, so flood. 2053 */ 2054 rw_enter(&bip->bi_rwlock, RW_READER); 2055 for (blpnext = list_head(&bip->bi_links); blpnext != NULL; 2056 blpnext = list_next(&bip->bi_links, blpnext)) { 2057 if (blpnext == blp) 2058 selfseen = B_TRUE; 2059 else if (bridge_can_send(blpnext, vlanid)) 2060 break; 2061 } 2062 if (blpnext != NULL) 2063 atomic_inc_uint(&blpnext->bl_refs); 2064 rw_exit(&bip->bi_rwlock); 2065 while ((blpsend = blpnext) != NULL) { 2066 rw_enter(&bip->bi_rwlock, RW_READER); 2067 for (blpnext = list_next(&bip->bi_links, blpsend); 2068 blpnext != NULL; 2069 blpnext = list_next(&bip->bi_links, blpnext)) { 2070 if (blpnext == blp) 2071 selfseen = B_TRUE; 2072 else if (bridge_can_send(blpnext, vlanid)) 2073 break; 2074 } 2075 if (blpnext != NULL) 2076 atomic_inc_uint(&blpnext->bl_refs); 2077 rw_exit(&bip->bi_rwlock); 2078 if (blpnext == NULL && !selfseen) { 2079 mpsend = mp; 2080 mp = NULL; 2081 } else { 2082 mpsend = copymsg(mp); 2083 } 2084 2085 mpsend = reform_vlan_header(mpsend, vlanid, tci, 2086 blpsend->bl_pvid); 2087 2088 if (mpsend == NULL) { 2089 KIINCR(bki_drops); 2090 continue; 2091 } 2092 2093 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST) 2094 KIINCR(bki_unknown); 2095 else 2096 KIINCR(bki_mbcast); 2097 2098 KLPINCR(blpsend, bkl_xmit); 2099 if ((mpcopy = copymsg(mpsend)) != NULL) { 2100 mac_rx_common(blpsend->bl_mh, NULL, mpcopy); 2101 } 2102 2103 mpsend = mac_ring_tx(blpsend->bl_mh, NULL, mpsend); 2104 freemsg(mpsend); 2105 link_unref(blpsend); 2106 } 2107 } 2108 2109 /* 2110 * At this point, if np is non-NULL, it means that the caller needs to 2111 * continue on the selected link. 2112 */ 2113 return (mp); 2114 } 2115 2116 /* 2117 * Extract and validate the VLAN information for a given packet. This checks 2118 * conformance with the rules for use of the PVID on the link, and for the 2119 * allowed (configured) VLAN set. 2120 * 2121 * Returns B_TRUE if the packet passes, B_FALSE if it fails. 2122 */ 2123 static boolean_t 2124 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 2125 uint16_t *vlanidp, uint16_t *tcip) 2126 { 2127 uint16_t tci, vlanid; 2128 2129 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 2130 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci); 2131 ptrdiff_t mlen; 2132 2133 /* 2134 * Extract the VLAN ID information, regardless of alignment, 2135 * and without a pullup. This isn't attractive, but we do this 2136 * to avoid having to deal with the pointers stashed in 2137 * hdr_info moving around or having the caller deal with a new 2138 * mblk_t pointer. 2139 */ 2140 while (mp != NULL) { 2141 mlen = MBLKL(mp); 2142 if (mlen > tpos && mlen > 0) 2143 break; 2144 tpos -= mlen; 2145 mp = mp->b_cont; 2146 } 2147 if (mp == NULL) 2148 return (B_FALSE); 2149 tci = mp->b_rptr[tpos] << 8; 2150 if (++tpos >= mlen) { 2151 do { 2152 mp = mp->b_cont; 2153 } while (mp != NULL && MBLKL(mp) == 0); 2154 if (mp == NULL) 2155 return (B_FALSE); 2156 tpos = 0; 2157 } 2158 tci |= mp->b_rptr[tpos]; 2159 2160 vlanid = VLAN_ID(tci); 2161 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX) 2162 return (B_FALSE); 2163 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid) 2164 goto input_no_vlan; 2165 if (!BRIDGE_VLAN_ISSET(blp, vlanid)) 2166 return (B_FALSE); 2167 } else { 2168 tci = 0xFFFF; 2169 input_no_vlan: 2170 /* 2171 * If PVID is set to zero, then untagged traffic is not 2172 * supported here. Do not learn or forward. 2173 */ 2174 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE) 2175 return (B_FALSE); 2176 } 2177 2178 *tcip = tci; 2179 *vlanidp = vlanid; 2180 return (B_TRUE); 2181 } 2182 2183 /* 2184 * Handle MAC notifications. 2185 */ 2186 static void 2187 bridge_notify_cb(void *arg, mac_notify_type_t note_type) 2188 { 2189 bridge_link_t *blp = arg; 2190 2191 switch (note_type) { 2192 case MAC_NOTE_UNICST: 2193 bridge_new_unicst(blp); 2194 break; 2195 2196 case MAC_NOTE_SDU_SIZE: { 2197 uint_t maxsdu; 2198 bridge_inst_t *bip = blp->bl_inst; 2199 bridge_mac_t *bmp = bip->bi_mac; 2200 boolean_t notify = B_FALSE; 2201 mblk_t *mlist = NULL; 2202 2203 mac_sdu_get(blp->bl_mh, NULL, &maxsdu); 2204 rw_enter(&bip->bi_rwlock, RW_READER); 2205 if (list_prev(&bip->bi_links, blp) == NULL && 2206 list_next(&bip->bi_links, blp) == NULL) { 2207 notify = (maxsdu != bmp->bm_maxsdu); 2208 bmp->bm_maxsdu = maxsdu; 2209 } 2210 blp->bl_maxsdu = maxsdu; 2211 if (maxsdu != bmp->bm_maxsdu) 2212 link_sdu_fail(blp, B_TRUE, &mlist); 2213 else if (notify) 2214 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2215 rw_exit(&bip->bi_rwlock); 2216 send_up_messages(bip, mlist); 2217 break; 2218 } 2219 } 2220 } 2221 2222 /* 2223 * This is called by the MAC layer. As with the transmit side, we're right in 2224 * the data path for all I/O on this port, so if we don't need to forward this 2225 * packet anywhere, we have to send it upwards via mac_rx_common. 2226 */ 2227 static void 2228 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext) 2229 { 2230 mblk_t *mp, *mpcopy; 2231 bridge_link_t *blp = (bridge_link_t *)mh; 2232 bridge_inst_t *bip = blp->bl_inst; 2233 bridge_mac_t *bmp = bip->bi_mac; 2234 mac_header_info_t hdr_info; 2235 uint16_t vlanid, tci; 2236 boolean_t trillmode = B_FALSE; 2237 2238 KIINCR(bki_recv); 2239 KLINCR(bkl_recv); 2240 2241 /* 2242 * Regardless of state, check for inbound TRILL packets when TRILL is 2243 * active. These are pulled out of band and sent for TRILL handling. 2244 */ 2245 if (blp->bl_trilldata != NULL) { 2246 void *tdp; 2247 mblk_t *newhead; 2248 mblk_t *tail = NULL; 2249 2250 mutex_enter(&blp->bl_trilllock); 2251 if ((tdp = blp->bl_trilldata) != NULL) { 2252 blp->bl_trillthreads++; 2253 mutex_exit(&blp->bl_trilllock); 2254 trillmode = B_TRUE; 2255 newhead = mpnext; 2256 while ((mp = mpnext) != NULL) { 2257 boolean_t raw_isis, bridge_group; 2258 2259 mpnext = mp->b_next; 2260 2261 /* 2262 * If the header isn't readable, then leave on 2263 * the list and continue. 2264 */ 2265 if (mac_header_info(blp->bl_mh, mp, 2266 &hdr_info) != 0) { 2267 tail = mp; 2268 continue; 2269 } 2270 2271 /* 2272 * The TRILL document specifies that, on 2273 * Ethernet alone, IS-IS packets arrive with 2274 * LLC rather than Ethertype, and using a 2275 * specific destination address. We must check 2276 * for that here. Also, we need to give BPDUs 2277 * to TRILL for processing. 2278 */ 2279 raw_isis = bridge_group = B_FALSE; 2280 if (hdr_info.mhi_dsttype == 2281 MAC_ADDRTYPE_MULTICAST) { 2282 if (memcmp(hdr_info.mhi_daddr, 2283 all_isis_rbridges, ETHERADDRL) == 0) 2284 raw_isis = B_TRUE; 2285 else if (memcmp(hdr_info.mhi_daddr, 2286 bridge_group_address, ETHERADDRL) == 2287 0) 2288 bridge_group = B_TRUE; 2289 } 2290 if (!raw_isis && !bridge_group && 2291 hdr_info.mhi_bindsap != ETHERTYPE_TRILL && 2292 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN || 2293 /* LINTED: alignment */ 2294 ((struct ether_vlan_header *)mp->b_rptr)-> 2295 ether_type != htons(ETHERTYPE_TRILL))) { 2296 tail = mp; 2297 continue; 2298 } 2299 2300 /* 2301 * We've got TRILL input. Remove from the list 2302 * and send up through the TRILL module. (Send 2303 * a copy through promiscuous receive just to 2304 * support snooping on TRILL. Order isn't 2305 * preserved strictly, but that doesn't matter 2306 * here.) 2307 */ 2308 if (tail != NULL) 2309 tail->b_next = mpnext; 2310 mp->b_next = NULL; 2311 if (mp == newhead) 2312 newhead = mpnext; 2313 mac_trill_snoop(blp->bl_mh, mp); 2314 update_header(mp, &hdr_info, B_TRUE); 2315 /* 2316 * On raw IS-IS and BPDU frames, we have to 2317 * make sure that the length is trimmed 2318 * properly. We use origsap in order to cope 2319 * with jumbograms for IS-IS. (Regular mac 2320 * can't.) 2321 */ 2322 if (raw_isis || bridge_group) { 2323 size_t msglen = msgdsize(mp); 2324 2325 if (msglen > hdr_info.mhi_origsap) { 2326 (void) adjmsg(mp, 2327 hdr_info.mhi_origsap - 2328 msglen); 2329 } else if (msglen < 2330 hdr_info.mhi_origsap) { 2331 freemsg(mp); 2332 continue; 2333 } 2334 } 2335 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info); 2336 } 2337 mpnext = newhead; 2338 mutex_enter(&blp->bl_trilllock); 2339 if (--blp->bl_trillthreads == 0 && 2340 blp->bl_trilldata == NULL) 2341 cv_broadcast(&blp->bl_trillwait); 2342 } 2343 mutex_exit(&blp->bl_trilllock); 2344 if (mpnext == NULL) 2345 return; 2346 } 2347 2348 /* 2349 * If this is a TRILL RBridge, then just check whether this link is 2350 * used at all for forwarding. If not, then we're done. 2351 */ 2352 if (trillmode) { 2353 if (!(blp->bl_flags & BLF_TRILLACTIVE) || 2354 (blp->bl_flags & BLF_SDUFAIL)) { 2355 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2356 return; 2357 } 2358 } else { 2359 /* 2360 * For regular (STP) bridges, if we're in blocking or listening 2361 * state, then do nothing. We don't learn or forward until 2362 * told to do so. 2363 */ 2364 if (blp->bl_state == BLS_BLOCKLISTEN) { 2365 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2366 return; 2367 } 2368 } 2369 2370 /* 2371 * Send a copy of the message chain up to the observability node users. 2372 * For TRILL, we must obey the VLAN AF rules, so we go packet-by- 2373 * packet. 2374 */ 2375 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2376 (bmp->bm_flags & BMF_STARTED) && 2377 (mp = copymsgchain(mpnext)) != NULL) { 2378 mac_rx(bmp->bm_mh, NULL, mp); 2379 } 2380 2381 /* 2382 * We must be in learning or forwarding state, or using TRILL on a link 2383 * with one or more VLANs active. For each packet in the list, process 2384 * the source address, and then attempt to forward. 2385 */ 2386 while ((mp = mpnext) != NULL) { 2387 mpnext = mp->b_next; 2388 mp->b_next = NULL; 2389 2390 /* 2391 * If we can't decode the header or if the header specifies a 2392 * multicast source address (impossible!), then don't bother 2393 * learning or forwarding, but go ahead and forward up the 2394 * stack for subsequent processing. 2395 */ 2396 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 || 2397 (hdr_info.mhi_saddr[0] & 1) != 0) { 2398 KIINCR(bki_drops); 2399 KLINCR(bkl_drops); 2400 mac_rx_common(blp->bl_mh, rsrc, mp); 2401 continue; 2402 } 2403 2404 /* 2405 * Extract and validate the VLAN ID for this packet. 2406 */ 2407 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2408 !BRIDGE_AF_ISSET(blp, vlanid)) { 2409 mac_rx_common(blp->bl_mh, rsrc, mp); 2410 continue; 2411 } 2412 2413 if (trillmode) { 2414 /* 2415 * Special test required by TRILL document: must 2416 * discard frames with outer address set to ESADI. 2417 */ 2418 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges, 2419 ETHERADDRL) == 0) { 2420 mac_rx_common(blp->bl_mh, rsrc, mp); 2421 continue; 2422 } 2423 2424 /* 2425 * If we're in TRILL mode, then the call above to get 2426 * the VLAN ID has also checked that we're the 2427 * appointed forwarder, so report that we're handling 2428 * this packet to any observability node users. 2429 */ 2430 if ((bmp->bm_flags & BMF_STARTED) && 2431 (mpcopy = copymsg(mp)) != NULL) 2432 mac_rx(bmp->bm_mh, NULL, mpcopy); 2433 } 2434 2435 /* 2436 * First process the source address and learn from it. For 2437 * TRILL, we learn only if we're the appointed forwarder. 2438 */ 2439 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2440 vlanid); 2441 2442 /* 2443 * Now check whether we're forwarding and look up the 2444 * destination. If we can forward, do so. 2445 */ 2446 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2447 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2448 B_FALSE, B_FALSE); 2449 } 2450 if (mp != NULL) 2451 mac_rx_common(blp->bl_mh, rsrc, mp); 2452 } 2453 } 2454 2455 2456 /* ARGSUSED */ 2457 static mblk_t * 2458 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) 2459 { 2460 bridge_link_t *blp = (bridge_link_t *)mh; 2461 bridge_inst_t *bip = blp->bl_inst; 2462 bridge_mac_t *bmp = bip->bi_mac; 2463 mac_header_info_t hdr_info; 2464 uint16_t vlanid, tci; 2465 mblk_t *mp, *mpcopy; 2466 boolean_t trillmode; 2467 2468 trillmode = blp->bl_trilldata != NULL; 2469 2470 /* 2471 * If we're using STP and we're in blocking or listening state, or if 2472 * we're using TRILL and no VLANs are active, then behave as though the 2473 * bridge isn't here at all, and send on the local link alone. 2474 */ 2475 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) || 2476 (trillmode && 2477 (!(blp->bl_flags & BLF_TRILLACTIVE) || 2478 (blp->bl_flags & BLF_SDUFAIL)))) { 2479 KIINCR(bki_sent); 2480 KLINCR(bkl_xmit); 2481 mp = mac_ring_tx(blp->bl_mh, rh, mpnext); 2482 return (mp); 2483 } 2484 2485 /* 2486 * Send a copy of the message up to the observability node users. 2487 * TRILL needs to check on a packet-by-packet basis. 2488 */ 2489 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2490 (bmp->bm_flags & BMF_STARTED) && 2491 (mp = copymsgchain(mpnext)) != NULL) { 2492 mac_rx(bmp->bm_mh, NULL, mp); 2493 } 2494 2495 while ((mp = mpnext) != NULL) { 2496 mpnext = mp->b_next; 2497 mp->b_next = NULL; 2498 2499 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2500 freemsg(mp); 2501 continue; 2502 } 2503 2504 /* 2505 * Extract and validate the VLAN ID for this packet. 2506 */ 2507 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2508 !BRIDGE_AF_ISSET(blp, vlanid)) { 2509 freemsg(mp); 2510 continue; 2511 } 2512 2513 /* 2514 * If we're using TRILL, then we've now validated that we're 2515 * the forwarder for this VLAN, so go ahead and let 2516 * observability node users know about the packet. 2517 */ 2518 if (trillmode && (bmp->bm_flags & BMF_STARTED) && 2519 (mpcopy = copymsg(mp)) != NULL) { 2520 mac_rx(bmp->bm_mh, NULL, mpcopy); 2521 } 2522 2523 /* 2524 * We have to learn from our own transmitted packets, because 2525 * there may be a Solaris DLPI raw sender (which can specify its 2526 * own source address) using promiscuous mode for receive. The 2527 * mac layer information won't (and can't) tell us everything 2528 * we need to know. 2529 */ 2530 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2531 vlanid); 2532 2533 /* attempt forwarding */ 2534 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2535 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2536 B_FALSE, B_TRUE); 2537 } 2538 if (mp != NULL) { 2539 mp = mac_ring_tx(blp->bl_mh, rh, mp); 2540 if (mp == NULL) { 2541 KIINCR(bki_sent); 2542 KLINCR(bkl_xmit); 2543 } 2544 } 2545 /* 2546 * If we get stuck, then stop. Don't let the user's output 2547 * packets get out of order. (More importantly: don't try to 2548 * bridge the same packet multiple times if flow control is 2549 * asserted.) 2550 */ 2551 if (mp != NULL) { 2552 mp->b_next = mpnext; 2553 break; 2554 } 2555 } 2556 return (mp); 2557 } 2558 2559 /* 2560 * This is called by TRILL when it decapsulates an packet, and we must forward 2561 * locally. On failure, we just drop. 2562 * 2563 * Note that the ingress_nick reported by TRILL must not represent this local 2564 * node. 2565 */ 2566 void 2567 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick) 2568 { 2569 mac_header_info_t hdr_info; 2570 uint16_t vlanid, tci; 2571 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2572 mblk_t *mpcopy; 2573 2574 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2575 freemsg(mp); 2576 return; 2577 } 2578 2579 /* Extract VLAN ID for this packet. */ 2580 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) { 2581 struct ether_vlan_header *evhp; 2582 2583 /* LINTED: alignment */ 2584 evhp = (struct ether_vlan_header *)mp->b_rptr; 2585 tci = ntohs(evhp->ether_tci); 2586 vlanid = VLAN_ID(tci); 2587 } else { 2588 /* Inner VLAN headers are required in TRILL data packets */ 2589 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *, 2590 blp, mblk_t *, mp, uint16_t, ingress_nick); 2591 freemsg(mp); 2592 return; 2593 } 2594 2595 /* Learn the location of this sender in the RBridge network */ 2596 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid); 2597 2598 /* attempt forwarding */ 2599 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE); 2600 if (mp != NULL) { 2601 if (bridge_can_send(blp, vlanid)) { 2602 /* Deliver a copy locally as well */ 2603 if ((mpcopy = copymsg(mp)) != NULL) 2604 mac_rx_common(blp->bl_mh, NULL, mpcopy); 2605 mp = mac_ring_tx(blp->bl_mh, NULL, mp); 2606 } 2607 if (mp == NULL) { 2608 KIINCR(bki_sent); 2609 KLINCR(bkl_xmit); 2610 } else { 2611 freemsg(mp); 2612 } 2613 } 2614 } 2615 2616 /* 2617 * This function is used by TRILL _only_ to transmit TRILL-encapsulated 2618 * packets. It sends on a single underlying link and does not bridge. 2619 */ 2620 mblk_t * 2621 bridge_trill_output(bridge_link_t *blp, mblk_t *mp) 2622 { 2623 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2624 2625 mac_trill_snoop(blp->bl_mh, mp); 2626 mp = mac_ring_tx(blp->bl_mh, NULL, mp); 2627 if (mp == NULL) { 2628 KIINCR(bki_sent); 2629 KLINCR(bkl_xmit); 2630 } 2631 return (mp); 2632 } 2633 2634 /* 2635 * Set the "appointed forwarder" flag array for this link. TRILL controls 2636 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for 2637 * the forwarder. 2638 */ 2639 void 2640 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr) 2641 { 2642 int i; 2643 uint_t newflags = 0; 2644 2645 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) { 2646 if ((blp->bl_afs[i] = arr[i]) != 0) 2647 newflags = BLF_TRILLACTIVE; 2648 } 2649 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags; 2650 } 2651 2652 void 2653 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill) 2654 { 2655 bridge_inst_t *bip = blp->bl_inst; 2656 bridge_fwd_t *bfp, *bfnext; 2657 avl_tree_t fwd_scavenge; 2658 int i; 2659 2660 _NOTE(ARGUNUSED(vlan)); 2661 2662 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 2663 offsetof(bridge_fwd_t, bf_node)); 2664 rw_enter(&bip->bi_rwlock, RW_WRITER); 2665 bfnext = avl_first(&bip->bi_fwd); 2666 while ((bfp = bfnext) != NULL) { 2667 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 2668 if (bfp->bf_flags & BFF_LOCALADDR) 2669 continue; 2670 if (dotrill) { 2671 /* port doesn't matter if we're flushing TRILL */ 2672 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE) 2673 continue; 2674 } else { 2675 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) 2676 continue; 2677 for (i = 0; i < bfp->bf_nlinks; i++) { 2678 if (bfp->bf_links[i] == blp) 2679 break; 2680 } 2681 if (i >= bfp->bf_nlinks) 2682 continue; 2683 } 2684 ASSERT(bfp->bf_flags & BFF_INTREE); 2685 avl_remove(&bip->bi_fwd, bfp); 2686 bfp->bf_flags &= ~BFF_INTREE; 2687 avl_add(&fwd_scavenge, bfp); 2688 } 2689 rw_exit(&bip->bi_rwlock); 2690 bfnext = avl_first(&fwd_scavenge); 2691 while ((bfp = bfnext) != NULL) { 2692 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 2693 avl_remove(&fwd_scavenge, bfp); 2694 fwd_unref(bfp); 2695 } 2696 avl_destroy(&fwd_scavenge); 2697 } 2698 2699 /* 2700 * Let the mac module take or drop a reference to a bridge link. When this is 2701 * called, the mac module is holding the mi_bridge_lock, so the link cannot be 2702 * in the process of entering or leaving a bridge. 2703 */ 2704 static void 2705 bridge_ref_cb(mac_handle_t mh, boolean_t hold) 2706 { 2707 bridge_link_t *blp = (bridge_link_t *)mh; 2708 2709 if (hold) 2710 atomic_inc_uint(&blp->bl_refs); 2711 else 2712 link_unref(blp); 2713 } 2714 2715 /* 2716 * Handle link state changes reported by the mac layer. This acts as a filter 2717 * for link state changes: if a link is reporting down, but there are other 2718 * links still up on the bridge, then the state is changed to "up." When the 2719 * last link goes down, all are marked down, and when the first link goes up, 2720 * all are marked up. (Recursion is avoided by the use of the "redo" function.) 2721 * 2722 * We treat unknown as equivalent to "up." 2723 */ 2724 static link_state_t 2725 bridge_ls_cb(mac_handle_t mh, link_state_t newls) 2726 { 2727 bridge_link_t *blp = (bridge_link_t *)mh; 2728 bridge_link_t *blcmp; 2729 bridge_inst_t *bip; 2730 bridge_mac_t *bmp; 2731 2732 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN || 2733 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) { 2734 blp->bl_linkstate = newls; 2735 return (newls); 2736 } 2737 2738 /* 2739 * Scan first to see if there are any other non-down links. If there 2740 * are, then we're done. Otherwise, if all others are down, then the 2741 * state of this link is the state of the bridge. 2742 */ 2743 bip = blp->bl_inst; 2744 rw_enter(&bip->bi_rwlock, RW_WRITER); 2745 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2746 blcmp = list_next(&bip->bi_links, blcmp)) { 2747 if (blcmp != blp && 2748 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 2749 blcmp->bl_linkstate != LINK_STATE_DOWN) 2750 break; 2751 } 2752 2753 if (blcmp != NULL) { 2754 /* 2755 * If there are other links that are considered up, then tell 2756 * the caller that the link is actually still up, regardless of 2757 * this link's underlying state. 2758 */ 2759 blp->bl_linkstate = newls; 2760 newls = LINK_STATE_UP; 2761 } else if (blp->bl_linkstate != newls) { 2762 /* 2763 * If we've found no other 'up' links, and this link has 2764 * changed state, then report the new state of the bridge to 2765 * all other clients. 2766 */ 2767 blp->bl_linkstate = newls; 2768 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2769 blcmp = list_next(&bip->bi_links, blcmp)) { 2770 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED)) 2771 mac_link_redo(blcmp->bl_mh, newls); 2772 } 2773 bmp = bip->bi_mac; 2774 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN) 2775 bmp->bm_linkstate = LINK_STATE_UP; 2776 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 2777 } 2778 rw_exit(&bip->bi_rwlock); 2779 return (newls); 2780 } 2781 2782 static void 2783 bridge_add_link(void *arg) 2784 { 2785 mblk_t *mp = arg; 2786 bridge_stream_t *bsp; 2787 bridge_inst_t *bip, *bipt; 2788 bridge_mac_t *bmp; 2789 datalink_id_t linkid; 2790 int err; 2791 mac_handle_t mh; 2792 uint_t maxsdu; 2793 bridge_link_t *blp = NULL, *blpt; 2794 const mac_info_t *mip; 2795 boolean_t macopen = B_FALSE; 2796 char linkname[MAXLINKNAMELEN]; 2797 char kstatname[KSTAT_STRLEN]; 2798 int i; 2799 link_state_t linkstate; 2800 mblk_t *mlist; 2801 2802 bsp = (bridge_stream_t *)mp->b_next; 2803 mp->b_next = NULL; 2804 bip = bsp->bs_inst; 2805 /* LINTED: alignment */ 2806 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2807 2808 /* 2809 * First make sure that there is no other bridge that has this link. 2810 * We don't want to overlap operations from two bridges; the MAC layer 2811 * supports only one bridge on a given MAC at a time. 2812 * 2813 * We rely on the fact that there's just one taskq thread for the 2814 * bridging module: once we've checked for a duplicate, we can drop the 2815 * lock, because no other thread could possibly be adding another link 2816 * until we're done. 2817 */ 2818 mutex_enter(&inst_lock); 2819 for (bipt = list_head(&inst_list); bipt != NULL; 2820 bipt = list_next(&inst_list, bipt)) { 2821 rw_enter(&bipt->bi_rwlock, RW_READER); 2822 for (blpt = list_head(&bipt->bi_links); blpt != NULL; 2823 blpt = list_next(&bipt->bi_links, blpt)) { 2824 if (linkid == blpt->bl_linkid) 2825 break; 2826 } 2827 rw_exit(&bipt->bi_rwlock); 2828 if (blpt != NULL) 2829 break; 2830 } 2831 mutex_exit(&inst_lock); 2832 if (bipt != NULL) { 2833 err = EBUSY; 2834 goto fail; 2835 } 2836 2837 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 2838 goto fail; 2839 macopen = B_TRUE; 2840 2841 /* we bridge only Ethernet */ 2842 mip = mac_info(mh); 2843 if (mip->mi_media != DL_ETHER) { 2844 err = ENOTSUP; 2845 goto fail; 2846 } 2847 2848 /* 2849 * Get the current maximum SDU on this interface. If there are other 2850 * links on the bridge, then this one must match, or it errors out. 2851 * Otherwise, the first link becomes the standard for the new bridge. 2852 */ 2853 mac_sdu_get(mh, NULL, &maxsdu); 2854 bmp = bip->bi_mac; 2855 if (list_is_empty(&bip->bi_links)) { 2856 bmp->bm_maxsdu = maxsdu; 2857 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2858 } 2859 2860 /* figure the kstat name; also used as the mac client name */ 2861 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t); 2862 if (i < 0 || i >= MAXLINKNAMELEN) 2863 i = MAXLINKNAMELEN - 1; 2864 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i); 2865 linkname[i] = '\0'; 2866 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name, 2867 linkname); 2868 2869 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) { 2870 err = ENOMEM; 2871 goto fail; 2872 } 2873 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 2874 if (blp->bl_lfailmp == NULL) { 2875 kmem_free(blp, sizeof (*blp)); 2876 blp = NULL; 2877 err = ENOMEM; 2878 goto fail; 2879 } 2880 2881 blp->bl_refs = 1; 2882 atomic_inc_uint(&bip->bi_refs); 2883 blp->bl_inst = bip; 2884 blp->bl_mh = mh; 2885 blp->bl_linkid = linkid; 2886 blp->bl_maxsdu = maxsdu; 2887 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL); 2888 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL); 2889 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 2890 2891 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0); 2892 if (err != 0) 2893 goto fail; 2894 blp->bl_flags |= BLF_CLIENT_OPEN; 2895 2896 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE); 2897 if (err != 0) 2898 goto fail; 2899 blp->bl_flags |= BLF_MARGIN_ADDED; 2900 2901 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp); 2902 2903 /* Enable Bridging on the link */ 2904 err = mac_bridge_set(mh, (mac_handle_t)blp); 2905 if (err != 0) 2906 goto fail; 2907 blp->bl_flags |= BLF_SET_BRIDGE; 2908 2909 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL, 2910 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); 2911 if (err != 0) 2912 goto fail; 2913 blp->bl_flags |= BLF_PROM_ADDED; 2914 2915 bridge_new_unicst(blp); 2916 2917 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats, 2918 link_kstats_list, Dim(link_kstats_list), kstatname); 2919 2920 /* 2921 * The link holds a reference to the bridge instance, so that the 2922 * instance can't go away before the link is freed. The insertion into 2923 * bi_links holds a reference on the link (reference set to 1 above). 2924 * When marking as removed from bi_links (BLF_DELETED), drop the 2925 * reference on the link. When freeing the link, drop the reference on 2926 * the instance. BLF_LINK_ADDED tracks link insertion in bi_links list. 2927 */ 2928 rw_enter(&bip->bi_rwlock, RW_WRITER); 2929 list_insert_tail(&bip->bi_links, blp); 2930 blp->bl_flags |= BLF_LINK_ADDED; 2931 2932 /* 2933 * If the new link is no good on this bridge, then let the daemon know 2934 * about the problem. 2935 */ 2936 mlist = NULL; 2937 if (maxsdu != bmp->bm_maxsdu) 2938 link_sdu_fail(blp, B_TRUE, &mlist); 2939 rw_exit(&bip->bi_rwlock); 2940 send_up_messages(bip, mlist); 2941 2942 /* 2943 * Trigger a link state update so that if this link is the first one 2944 * "up" in the bridge, then we notify everyone. This triggers a trip 2945 * through bridge_ls_cb. 2946 */ 2947 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE); 2948 blp->bl_linkstate = LINK_STATE_DOWN; 2949 mac_link_update(mh, linkstate); 2950 2951 /* 2952 * We now need to report back to the stream that invoked us, and then 2953 * drop the reference on the stream that we're holding. 2954 */ 2955 miocack(bsp->bs_wq, mp, 0, 0); 2956 stream_unref(bsp); 2957 return; 2958 2959 fail: 2960 if (blp == NULL) { 2961 if (macopen) 2962 mac_close(mh); 2963 } else { 2964 link_shutdown(blp); 2965 } 2966 miocnak(bsp->bs_wq, mp, 0, err); 2967 stream_unref(bsp); 2968 } 2969 2970 static void 2971 bridge_rem_link(void *arg) 2972 { 2973 mblk_t *mp = arg; 2974 bridge_stream_t *bsp; 2975 bridge_inst_t *bip; 2976 bridge_mac_t *bmp; 2977 datalink_id_t linkid; 2978 bridge_link_t *blp, *blsave; 2979 boolean_t found; 2980 mblk_t *mlist; 2981 2982 bsp = (bridge_stream_t *)mp->b_next; 2983 mp->b_next = NULL; 2984 bip = bsp->bs_inst; 2985 /* LINTED: alignment */ 2986 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2987 2988 /* 2989 * We become reader here so that we can loop over the other links and 2990 * deliver link up/down notification. 2991 */ 2992 rw_enter(&bip->bi_rwlock, RW_READER); 2993 found = B_FALSE; 2994 for (blp = list_head(&bip->bi_links); blp != NULL; 2995 blp = list_next(&bip->bi_links, blp)) { 2996 if (blp->bl_linkid == linkid && 2997 !(blp->bl_flags & BLF_DELETED)) { 2998 blp->bl_flags |= BLF_DELETED; 2999 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 3000 blp, DDI_SLEEP); 3001 found = B_TRUE; 3002 break; 3003 } 3004 } 3005 3006 /* 3007 * Check if this link is up and the remainder of the links are all 3008 * down. 3009 */ 3010 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) { 3011 for (blp = list_head(&bip->bi_links); blp != NULL; 3012 blp = list_next(&bip->bi_links, blp)) { 3013 if (blp->bl_linkstate != LINK_STATE_DOWN && 3014 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) 3015 break; 3016 } 3017 if (blp == NULL) { 3018 for (blp = list_head(&bip->bi_links); blp != NULL; 3019 blp = list_next(&bip->bi_links, blp)) { 3020 if (!(blp->bl_flags & BLF_DELETED)) 3021 mac_link_redo(blp->bl_mh, 3022 LINK_STATE_DOWN); 3023 } 3024 bmp = bip->bi_mac; 3025 bmp->bm_linkstate = LINK_STATE_DOWN; 3026 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 3027 } 3028 } 3029 3030 /* 3031 * Check if there's just one working link left on the bridge. If so, 3032 * then that link is now authoritative for bridge MTU. 3033 */ 3034 blsave = NULL; 3035 for (blp = list_head(&bip->bi_links); blp != NULL; 3036 blp = list_next(&bip->bi_links, blp)) { 3037 if (!(blp->bl_flags & BLF_DELETED)) { 3038 if (blsave == NULL) 3039 blsave = blp; 3040 else 3041 break; 3042 } 3043 } 3044 mlist = NULL; 3045 bmp = bip->bi_mac; 3046 if (blsave != NULL && blp == NULL && 3047 blsave->bl_maxsdu != bmp->bm_maxsdu) { 3048 bmp->bm_maxsdu = blsave->bl_maxsdu; 3049 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu); 3050 link_sdu_fail(blsave, B_FALSE, &mlist); 3051 } 3052 rw_exit(&bip->bi_rwlock); 3053 send_up_messages(bip, mlist); 3054 3055 if (found) 3056 miocack(bsp->bs_wq, mp, 0, 0); 3057 else 3058 miocnak(bsp->bs_wq, mp, 0, ENOENT); 3059 stream_unref(bsp); 3060 } 3061 3062 /* 3063 * This function intentionally returns with bi_rwlock held; it is intended for 3064 * quick checks and updates. 3065 */ 3066 static bridge_link_t * 3067 enter_link(bridge_inst_t *bip, datalink_id_t linkid) 3068 { 3069 bridge_link_t *blp; 3070 3071 rw_enter(&bip->bi_rwlock, RW_READER); 3072 for (blp = list_head(&bip->bi_links); blp != NULL; 3073 blp = list_next(&bip->bi_links, blp)) { 3074 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED)) 3075 break; 3076 } 3077 return (blp); 3078 } 3079 3080 static void 3081 bridge_ioctl(queue_t *wq, mblk_t *mp) 3082 { 3083 bridge_stream_t *bsp = wq->q_ptr; 3084 bridge_inst_t *bip; 3085 struct iocblk *iop; 3086 int rc = EINVAL; 3087 int len = 0; 3088 bridge_link_t *blp; 3089 cred_t *cr; 3090 3091 /* LINTED: alignment */ 3092 iop = (struct iocblk *)mp->b_rptr; 3093 3094 /* 3095 * For now, all of the bridge ioctls are privileged. 3096 */ 3097 if ((cr = msg_getcred(mp, NULL)) == NULL) 3098 cr = iop->ioc_cr; 3099 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) { 3100 miocnak(wq, mp, 0, EPERM); 3101 return; 3102 } 3103 3104 switch (iop->ioc_cmd) { 3105 case BRIOC_NEWBRIDGE: { 3106 bridge_newbridge_t *bnb; 3107 3108 if (bsp->bs_inst != NULL || 3109 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0) 3110 break; 3111 /* LINTED: alignment */ 3112 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr; 3113 bnb->bnb_name[MAXNAMELEN-1] = '\0'; 3114 rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr); 3115 if (rc != 0) 3116 break; 3117 3118 rw_enter(&bip->bi_rwlock, RW_WRITER); 3119 if (bip->bi_control != NULL) { 3120 rw_exit(&bip->bi_rwlock); 3121 bridge_unref(bip); 3122 rc = EBUSY; 3123 } else { 3124 atomic_inc_uint(&bip->bi_refs); 3125 bsp->bs_inst = bip; /* stream holds reference */ 3126 bip->bi_control = bsp; 3127 rw_exit(&bip->bi_rwlock); 3128 rc = 0; 3129 } 3130 break; 3131 } 3132 3133 case BRIOC_ADDLINK: 3134 if ((bip = bsp->bs_inst) == NULL || 3135 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3136 break; 3137 /* 3138 * We cannot perform the action in this thread, because we're 3139 * not in process context, and we may already be holding 3140 * MAC-related locks. Place the request on taskq. 3141 */ 3142 mp->b_next = (mblk_t *)bsp; 3143 stream_ref(bsp); 3144 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp, 3145 DDI_SLEEP); 3146 return; 3147 3148 case BRIOC_REMLINK: 3149 if ((bip = bsp->bs_inst) == NULL || 3150 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3151 break; 3152 /* 3153 * We cannot perform the action in this thread, because we're 3154 * not in process context, and we may already be holding 3155 * MAC-related locks. Place the request on taskq. 3156 */ 3157 mp->b_next = (mblk_t *)bsp; 3158 stream_ref(bsp); 3159 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp, 3160 DDI_SLEEP); 3161 return; 3162 3163 case BRIOC_SETSTATE: { 3164 bridge_setstate_t *bss; 3165 3166 if ((bip = bsp->bs_inst) == NULL || 3167 (rc = miocpullup(mp, sizeof (*bss))) != 0) 3168 break; 3169 /* LINTED: alignment */ 3170 bss = (bridge_setstate_t *)mp->b_cont->b_rptr; 3171 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) { 3172 rc = ENOENT; 3173 } else { 3174 rc = 0; 3175 blp->bl_state = bss->bss_state; 3176 } 3177 rw_exit(&bip->bi_rwlock); 3178 break; 3179 } 3180 3181 case BRIOC_SETPVID: { 3182 bridge_setpvid_t *bsv; 3183 3184 if ((bip = bsp->bs_inst) == NULL || 3185 (rc = miocpullup(mp, sizeof (*bsv))) != 0) 3186 break; 3187 /* LINTED: alignment */ 3188 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr; 3189 if (bsv->bsv_vlan > VLAN_ID_MAX) 3190 break; 3191 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) { 3192 rc = ENOENT; 3193 } else if (blp->bl_pvid == bsv->bsv_vlan) { 3194 rc = 0; 3195 } else { 3196 rc = 0; 3197 BRIDGE_VLAN_CLR(blp, blp->bl_pvid); 3198 blp->bl_pvid = bsv->bsv_vlan; 3199 if (blp->bl_pvid != 0) 3200 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3201 } 3202 rw_exit(&bip->bi_rwlock); 3203 break; 3204 } 3205 3206 case BRIOC_VLANENAB: { 3207 bridge_vlanenab_t *bve; 3208 3209 if ((bip = bsp->bs_inst) == NULL || 3210 (rc = miocpullup(mp, sizeof (*bve))) != 0) 3211 break; 3212 /* LINTED: alignment */ 3213 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr; 3214 if (bve->bve_vlan > VLAN_ID_MAX) 3215 break; 3216 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) { 3217 rc = ENOENT; 3218 } else { 3219 rc = 0; 3220 /* special case: vlan 0 means "all" */ 3221 if (bve->bve_vlan == 0) { 3222 (void) memset(blp->bl_vlans, 3223 bve->bve_onoff ? ~0 : 0, 3224 sizeof (blp->bl_vlans)); 3225 BRIDGE_VLAN_CLR(blp, 0); 3226 if (blp->bl_pvid != 0) 3227 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3228 } else if (bve->bve_vlan == blp->bl_pvid) { 3229 rc = EINVAL; 3230 } else if (bve->bve_onoff) { 3231 BRIDGE_VLAN_SET(blp, bve->bve_vlan); 3232 } else { 3233 BRIDGE_VLAN_CLR(blp, bve->bve_vlan); 3234 } 3235 } 3236 rw_exit(&bip->bi_rwlock); 3237 break; 3238 } 3239 3240 case BRIOC_FLUSHFWD: { 3241 bridge_flushfwd_t *bff; 3242 bridge_fwd_t *bfp, *bfnext; 3243 avl_tree_t fwd_scavenge; 3244 int i; 3245 3246 if ((bip = bsp->bs_inst) == NULL || 3247 (rc = miocpullup(mp, sizeof (*bff))) != 0) 3248 break; 3249 /* LINTED: alignment */ 3250 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr; 3251 rw_enter(&bip->bi_rwlock, RW_WRITER); 3252 /* This case means "all" */ 3253 if (bff->bff_linkid == DATALINK_INVALID_LINKID) { 3254 blp = NULL; 3255 } else { 3256 for (blp = list_head(&bip->bi_links); blp != NULL; 3257 blp = list_next(&bip->bi_links, blp)) { 3258 if (blp->bl_linkid == bff->bff_linkid && 3259 !(blp->bl_flags & BLF_DELETED)) 3260 break; 3261 } 3262 if (blp == NULL) { 3263 rc = ENOENT; 3264 rw_exit(&bip->bi_rwlock); 3265 break; 3266 } 3267 } 3268 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 3269 offsetof(bridge_fwd_t, bf_node)); 3270 bfnext = avl_first(&bip->bi_fwd); 3271 while ((bfp = bfnext) != NULL) { 3272 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 3273 if (bfp->bf_flags & BFF_LOCALADDR) 3274 continue; 3275 if (blp != NULL) { 3276 for (i = 0; i < bfp->bf_maxlinks; i++) { 3277 if (bfp->bf_links[i] == blp) 3278 break; 3279 } 3280 /* 3281 * If the link is there and we're excluding, 3282 * then skip. If the link is not there and 3283 * we're doing only that link, then skip. 3284 */ 3285 if ((i < bfp->bf_maxlinks) == bff->bff_exclude) 3286 continue; 3287 } 3288 ASSERT(bfp->bf_flags & BFF_INTREE); 3289 avl_remove(&bip->bi_fwd, bfp); 3290 bfp->bf_flags &= ~BFF_INTREE; 3291 avl_add(&fwd_scavenge, bfp); 3292 } 3293 rw_exit(&bip->bi_rwlock); 3294 bfnext = avl_first(&fwd_scavenge); 3295 while ((bfp = bfnext) != NULL) { 3296 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 3297 avl_remove(&fwd_scavenge, bfp); 3298 fwd_unref(bfp); /* drop tree reference */ 3299 } 3300 avl_destroy(&fwd_scavenge); 3301 break; 3302 } 3303 3304 case BRIOC_TABLEMAX: 3305 if ((bip = bsp->bs_inst) == NULL || 3306 (rc = miocpullup(mp, sizeof (uint32_t))) != 0) 3307 break; 3308 /* LINTED: alignment */ 3309 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr; 3310 break; 3311 } 3312 3313 if (rc == 0) 3314 miocack(wq, mp, len, 0); 3315 else 3316 miocnak(wq, mp, 0, rc); 3317 } 3318 3319 static int 3320 bridge_wput(queue_t *wq, mblk_t *mp) 3321 { 3322 switch (DB_TYPE(mp)) { 3323 case M_IOCTL: 3324 bridge_ioctl(wq, mp); 3325 break; 3326 case M_FLUSH: 3327 if (*mp->b_rptr & FLUSHW) 3328 *mp->b_rptr &= ~FLUSHW; 3329 if (*mp->b_rptr & FLUSHR) 3330 qreply(wq, mp); 3331 else 3332 freemsg(mp); 3333 break; 3334 default: 3335 freemsg(mp); 3336 break; 3337 } 3338 return (0); 3339 } 3340 3341 /* 3342 * This function allocates the main data structures for the bridge driver and 3343 * connects us into devfs. 3344 */ 3345 static void 3346 bridge_inst_init(void) 3347 { 3348 bridge_scan_interval = 5 * drv_usectohz(1000000); 3349 bridge_fwd_age = 25 * drv_usectohz(1000000); 3350 3351 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL); 3352 list_create(&bmac_list, sizeof (bridge_mac_t), 3353 offsetof(bridge_mac_t, bm_node)); 3354 list_create(&inst_list, sizeof (bridge_inst_t), 3355 offsetof(bridge_inst_t, bi_node)); 3356 cv_init(&inst_cv, NULL, CV_DRIVER, NULL); 3357 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL); 3358 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL); 3359 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL); 3360 3361 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb, 3362 bridge_ls_cb); 3363 } 3364 3365 /* 3366 * This function disconnects from devfs and destroys all data structures in 3367 * preparation for unload. It's assumed that there are no active bridge 3368 * references left at this point. 3369 */ 3370 static void 3371 bridge_inst_fini(void) 3372 { 3373 mac_bridge_vectors(NULL, NULL, NULL, NULL); 3374 if (bridge_timerid != 0) 3375 (void) untimeout(bridge_timerid); 3376 rw_destroy(&bmac_rwlock); 3377 list_destroy(&bmac_list); 3378 list_destroy(&inst_list); 3379 cv_destroy(&inst_cv); 3380 mutex_destroy(&inst_lock); 3381 cv_destroy(&stream_ref_cv); 3382 mutex_destroy(&stream_ref_lock); 3383 } 3384 3385 /* 3386 * bridge_attach() 3387 * 3388 * Description: 3389 * Attach bridge driver to the system. 3390 */ 3391 static int 3392 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3393 { 3394 if (cmd != DDI_ATTACH) 3395 return (DDI_FAILURE); 3396 3397 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO, 3398 CLONE_DEV) == DDI_FAILURE) { 3399 return (DDI_FAILURE); 3400 } 3401 3402 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list, 3403 DLDIOCCNT(bridge_ioc_list)) != 0) { 3404 ddi_remove_minor_node(dip, BRIDGE_CTL); 3405 return (DDI_FAILURE); 3406 } 3407 3408 bridge_dev_info = dip; 3409 bridge_major = ddi_driver_major(dip); 3410 bridge_taskq = ddi_taskq_create(dip, BRIDGE_DEV_NAME, 1, 3411 TASKQ_DEFAULTPRI, 0); 3412 return (DDI_SUCCESS); 3413 } 3414 3415 /* 3416 * bridge_detach() 3417 * 3418 * Description: 3419 * Detach an interface to the system. 3420 */ 3421 static int 3422 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3423 { 3424 if (cmd != DDI_DETACH) 3425 return (DDI_FAILURE); 3426 3427 ddi_remove_minor_node(dip, NULL); 3428 ddi_taskq_destroy(bridge_taskq); 3429 bridge_dev_info = NULL; 3430 return (DDI_SUCCESS); 3431 } 3432 3433 /* 3434 * bridge_info() 3435 * 3436 * Description: 3437 * Translate "dev_t" to a pointer to the associated "dev_info_t". 3438 */ 3439 /* ARGSUSED */ 3440 static int 3441 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 3442 void **result) 3443 { 3444 int rc; 3445 3446 switch (infocmd) { 3447 case DDI_INFO_DEVT2DEVINFO: 3448 if (bridge_dev_info == NULL) { 3449 rc = DDI_FAILURE; 3450 } else { 3451 *result = (void *)bridge_dev_info; 3452 rc = DDI_SUCCESS; 3453 } 3454 break; 3455 case DDI_INFO_DEVT2INSTANCE: 3456 *result = NULL; 3457 rc = DDI_SUCCESS; 3458 break; 3459 default: 3460 rc = DDI_FAILURE; 3461 break; 3462 } 3463 return (rc); 3464 } 3465 3466 static struct module_info bridge_modinfo = { 3467 2105, /* mi_idnum */ 3468 BRIDGE_DEV_NAME, /* mi_idname */ 3469 0, /* mi_minpsz */ 3470 16384, /* mi_maxpsz */ 3471 65536, /* mi_hiwat */ 3472 128 /* mi_lowat */ 3473 }; 3474 3475 static struct qinit bridge_rinit = { 3476 NULL, /* qi_putp */ 3477 NULL, /* qi_srvp */ 3478 bridge_open, /* qi_qopen */ 3479 bridge_close, /* qi_qclose */ 3480 NULL, /* qi_qadmin */ 3481 &bridge_modinfo, /* qi_minfo */ 3482 NULL /* qi_mstat */ 3483 }; 3484 3485 static struct qinit bridge_winit = { 3486 (int (*)())bridge_wput, /* qi_putp */ 3487 NULL, /* qi_srvp */ 3488 NULL, /* qi_qopen */ 3489 NULL, /* qi_qclose */ 3490 NULL, /* qi_qadmin */ 3491 &bridge_modinfo, /* qi_minfo */ 3492 NULL /* qi_mstat */ 3493 }; 3494 3495 static struct streamtab bridge_tab = { 3496 &bridge_rinit, /* st_rdinit */ 3497 &bridge_winit /* st_wrinit */ 3498 }; 3499 3500 /* No STREAMS perimeters; we do all our own locking */ 3501 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach, 3502 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab, 3503 ddi_quiesce_not_supported); 3504 3505 static struct modldrv modldrv = { 3506 &mod_driverops, 3507 "bridging driver", 3508 &bridge_ops 3509 }; 3510 3511 static struct modlinkage modlinkage = { 3512 MODREV_1, 3513 (void *)&modldrv, 3514 NULL 3515 }; 3516 3517 int 3518 _init(void) 3519 { 3520 int retv; 3521 3522 mac_init_ops(NULL, BRIDGE_DEV_NAME); 3523 bridge_inst_init(); 3524 if ((retv = mod_install(&modlinkage)) != 0) 3525 bridge_inst_fini(); 3526 return (retv); 3527 } 3528 3529 int 3530 _fini(void) 3531 { 3532 int retv; 3533 3534 rw_enter(&bmac_rwlock, RW_READER); 3535 retv = list_is_empty(&bmac_list) ? 0 : EBUSY; 3536 rw_exit(&bmac_rwlock); 3537 if (retv == 0 && 3538 (retv = mod_remove(&modlinkage)) == 0) 3539 bridge_inst_fini(); 3540 return (retv); 3541 } 3542 3543 int 3544 _info(struct modinfo *modinfop) 3545 { 3546 return (mod_info(&modlinkage, modinfop)); 3547 } 3548