1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This module implements a STREAMS driver that provides layer-two (Ethernet) 29 * bridging functionality. The STREAMS interface is used to provide 30 * observability (snoop/wireshark) and control, but not for interface plumbing. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/bitmap.h> 35 #include <sys/cmn_err.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/errno.h> 39 #include <sys/kstat.h> 40 #include <sys/modctl.h> 41 #include <sys/note.h> 42 #include <sys/param.h> 43 #include <sys/policy.h> 44 #include <sys/sdt.h> 45 #include <sys/stat.h> 46 #include <sys/stream.h> 47 #include <sys/stropts.h> 48 #include <sys/strsun.h> 49 #include <sys/sunddi.h> 50 #include <sys/sysmacros.h> 51 #include <sys/systm.h> 52 #include <sys/time.h> 53 #include <sys/dlpi.h> 54 #include <sys/dls.h> 55 #include <sys/mac_ether.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_client_priv.h> 58 #include <sys/mac_impl.h> 59 #include <sys/vlan.h> 60 #include <net/bridge.h> 61 #include <net/bridge_impl.h> 62 #include <net/trill.h> 63 #include <sys/dld_ioc.h> 64 65 /* 66 * Locks and reference counts: object lifetime and design. 67 * 68 * bridge_mac_t 69 * Bridge mac (snoop) instances are in bmac_list, which is protected by 70 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer(). 71 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes 72 * away, the bridge_mac_t remains until either all of the users go away 73 * (detected by a timer) or until the instance is picked up again by the same 74 * bridge starting back up. 75 * 76 * bridge_inst_t 77 * Bridge instances are in inst_list, which is protected by inst_lock. 78 * They're allocated by inst_alloc() and freed by inst_free(). After 79 * allocation, an instance is placed in inst_list, and the reference count is 80 * incremented to represent this. That reference is decremented when the 81 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last 82 * reference is freed, the instance is removed from the list. 83 * 84 * Bridge instances have lists of links and an AVL tree of forwarding 85 * entries. Each of these structures holds one reference on the bridge 86 * instance. These lists and tree are protected by bi_rwlock. 87 * 88 * bridge_stream_t 89 * Bridge streams are allocated by stream_alloc() and freed by stream_free(). 90 * These streams are created when "bridged" opens /dev/bridgectl, and are 91 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the 92 * links on the bridge. When a stream closes, the bridge instance created is 93 * destroyed. There's at most one bridge instance for a given control 94 * stream. 95 * 96 * bridge_link_t 97 * Links are allocated by bridge_add_link() and freed by link_free(). The 98 * bi_links list holds a reference to the link. When the BLF_DELETED flag is 99 * set, that reference is dropped. The link isn't removed from the list 100 * until the last reference drops. Each forwarding entry that uses a given 101 * link holds a reference, as does each thread transmitting a packet via the 102 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on 103 * a link when transmitting. 104 * 105 * It's important that once BLF_DELETED is set, there's no way for the 106 * reference count to increase again. If it can, then the link may be 107 * double-freed. The BLF_FREED flag is intended for use with assertions to 108 * guard against this in testing. 109 * 110 * bridge_fwd_t 111 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by 112 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike 113 * other data structures, the reference is dropped when the entry is removed 114 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each 115 * thread that's forwarding a packet to a known destination holds a reference 116 * to a forwarding entry. 117 * 118 * TRILL notes: 119 * 120 * The TRILL module does all of its I/O through bridging. It uses references 121 * on the bridge_inst_t and bridge_link_t structures, and has seven entry 122 * points and four callbacks. One entry point is for setting the callbacks 123 * (bridge_trill_register_cb). There are four entry points for taking bridge 124 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two 125 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps) 126 * that need to be bridged locally, and for TRILL-encapsulated output packets 127 * (bridge_trill_output). 128 * 129 * The four callbacks comprise two notification functions for bridges and 130 * links being deleted, one function for raw received TRILL packets, and one 131 * for bridge output to non-local TRILL destinations (tunnel entry). 132 */ 133 134 /* 135 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module. 136 */ 137 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES; 138 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES; 139 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS; 140 141 static const char *inst_kstats_list[] = { KSINST_NAMES }; 142 static const char *link_kstats_list[] = { KSLINK_NAMES }; 143 144 #define KREF(p, m, vn) p->m.vn.value.ui64 145 #define KINCR(p, m, vn) ++KREF(p, m, vn) 146 #define KDECR(p, m, vn) --KREF(p, m, vn) 147 148 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn) 149 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn) 150 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn) 151 152 #define KIINCR(vn) KIPINCR(bip, vn) 153 #define KIDECR(vn) KIPDECR(bip, vn) 154 #define KLINCR(vn) KLPINCR(blp, vn) 155 156 #define Dim(x) (sizeof (x) / sizeof (*(x))) 157 158 /* Amount of overhead added when encapsulating with VLAN headers */ 159 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \ 160 sizeof (struct ether_header)) 161 162 static dev_info_t *bridge_dev_info; 163 static major_t bridge_major; 164 static ddi_taskq_t *bridge_taskq; 165 166 /* 167 * These are the bridge instance management data structures. The mutex lock 168 * protects the list of bridge instances. A reference count is then used on 169 * each instance to determine when to free it. We use mac_minor_hold() to 170 * allocate minor_t values, which are used both for self-cloning /dev/net/ 171 * device nodes as well as client streams. Minor node 0 is reserved for the 172 * allocation control node. 173 */ 174 static list_t inst_list; 175 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */ 176 static kmutex_t inst_lock; 177 178 static krwlock_t bmac_rwlock; 179 static list_t bmac_list; 180 181 /* Wait for taskq entries that use STREAMS */ 182 static kcondvar_t stream_ref_cv; 183 static kmutex_t stream_ref_lock; 184 185 static timeout_id_t bridge_timerid; 186 static clock_t bridge_scan_interval; 187 static clock_t bridge_fwd_age; 188 189 static bridge_inst_t *bridge_find_name(const char *); 190 static void bridge_timer(void *); 191 static void bridge_unref(bridge_inst_t *); 192 193 static const uint8_t zero_addr[ETHERADDRL] = { 0 }; 194 195 /* Global TRILL linkage */ 196 static trill_recv_pkt_t trill_recv_fn; 197 static trill_encap_pkt_t trill_encap_fn; 198 static trill_br_dstr_t trill_brdstr_fn; 199 static trill_ln_dstr_t trill_lndstr_fn; 200 201 /* special settings to accommodate DLD flow control; see dld_str.c */ 202 static struct module_info bridge_dld_modinfo = { 203 0, /* mi_idnum */ 204 "bridge", /* mi_idname */ 205 0, /* mi_minpsz */ 206 INFPSZ, /* mi_maxpsz */ 207 1, /* mi_hiwat */ 208 0 /* mi_lowat */ 209 }; 210 211 static struct qinit bridge_dld_rinit = { 212 NULL, /* qi_putp */ 213 NULL, /* qi_srvp */ 214 dld_open, /* qi_qopen */ 215 dld_close, /* qi_qclose */ 216 NULL, /* qi_qadmin */ 217 &bridge_dld_modinfo, /* qi_minfo */ 218 NULL /* qi_mstat */ 219 }; 220 221 static struct qinit bridge_dld_winit = { 222 (int (*)())dld_wput, /* qi_putp */ 223 (int (*)())dld_wsrv, /* qi_srvp */ 224 NULL, /* qi_qopen */ 225 NULL, /* qi_qclose */ 226 NULL, /* qi_qadmin */ 227 &bridge_dld_modinfo, /* qi_minfo */ 228 NULL /* qi_mstat */ 229 }; 230 231 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *); 232 233 /* GLDv3 control ioctls used by Bridging */ 234 static dld_ioc_info_t bridge_ioc_list[] = { 235 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t), 236 bridge_ioc_listfwd, NULL}, 237 }; 238 239 /* 240 * Given a bridge mac pointer, get a ref-held pointer to the corresponding 241 * bridge instance, if any. We must hold the global bmac_rwlock so that 242 * bm_inst doesn't slide out from under us. 243 */ 244 static bridge_inst_t * 245 mac_to_inst(const bridge_mac_t *bmp) 246 { 247 bridge_inst_t *bip; 248 249 rw_enter(&bmac_rwlock, RW_READER); 250 if ((bip = bmp->bm_inst) != NULL) 251 atomic_inc_uint(&bip->bi_refs); 252 rw_exit(&bmac_rwlock); 253 return (bip); 254 } 255 256 static void 257 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist) 258 { 259 mblk_t *mp; 260 bridge_ctl_t *bcp; 261 bridge_link_t *blcmp; 262 bridge_inst_t *bip; 263 bridge_mac_t *bmp; 264 265 if (failed) { 266 if (blp->bl_flags & BLF_SDUFAIL) 267 return; 268 blp->bl_flags |= BLF_SDUFAIL; 269 } else { 270 if (!(blp->bl_flags & BLF_SDUFAIL)) 271 return; 272 blp->bl_flags &= ~BLF_SDUFAIL; 273 } 274 275 /* 276 * If this link is otherwise up, then check if there are any other 277 * non-failed non-down links. If not, then we control the state of the 278 * whole bridge. 279 */ 280 bip = blp->bl_inst; 281 bmp = bip->bi_mac; 282 if (blp->bl_linkstate != LINK_STATE_DOWN) { 283 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 284 blcmp = list_next(&bip->bi_links, blcmp)) { 285 if (blp != blcmp && 286 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 287 blcmp->bl_linkstate != LINK_STATE_DOWN) 288 break; 289 } 290 if (blcmp == NULL) { 291 bmp->bm_linkstate = failed ? LINK_STATE_DOWN : 292 LINK_STATE_UP; 293 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 294 } 295 } 296 297 /* 298 * If we're becoming failed, then the link's current true state needs 299 * to be reflected upwards to this link's clients. If we're becoming 300 * unfailed, then we get the state of the bridge instead on all 301 * clients. 302 */ 303 if (failed) { 304 if (bmp->bm_linkstate != blp->bl_linkstate) 305 mac_link_redo(blp->bl_mh, blp->bl_linkstate); 306 } else { 307 mac_link_redo(blp->bl_mh, bmp->bm_linkstate); 308 } 309 310 /* get the current mblk we're going to send up */ 311 if ((mp = blp->bl_lfailmp) == NULL && 312 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL) 313 return; 314 315 /* get a new one for next time */ 316 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 317 318 /* if none for next time, then report only failures */ 319 if (blp->bl_lfailmp == NULL && !failed) { 320 blp->bl_lfailmp = mp; 321 return; 322 } 323 324 /* LINTED: alignment */ 325 bcp = (bridge_ctl_t *)mp->b_rptr; 326 bcp->bc_linkid = blp->bl_linkid; 327 bcp->bc_failed = failed; 328 mp->b_wptr = (uchar_t *)(bcp + 1); 329 mp->b_next = *mlist; 330 *mlist = mp; 331 } 332 333 /* 334 * Send control messages (link SDU changes) using the stream to the 335 * bridge instance daemon. 336 */ 337 static void 338 send_up_messages(bridge_inst_t *bip, mblk_t *mp) 339 { 340 mblk_t *mnext; 341 queue_t *rq; 342 343 rq = bip->bi_control->bs_wq; 344 rq = OTHERQ(rq); 345 while (mp != NULL) { 346 mnext = mp->b_next; 347 mp->b_next = NULL; 348 putnext(rq, mp); 349 mp = mnext; 350 } 351 } 352 353 /* ARGSUSED */ 354 static int 355 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val) 356 { 357 return (ENOTSUP); 358 } 359 360 static int 361 bridge_m_start(void *arg) 362 { 363 bridge_mac_t *bmp = arg; 364 365 bmp->bm_flags |= BMF_STARTED; 366 return (0); 367 } 368 369 static void 370 bridge_m_stop(void *arg) 371 { 372 bridge_mac_t *bmp = arg; 373 374 bmp->bm_flags &= ~BMF_STARTED; 375 } 376 377 /* ARGSUSED */ 378 static int 379 bridge_m_setpromisc(void *arg, boolean_t on) 380 { 381 return (0); 382 } 383 384 /* ARGSUSED */ 385 static int 386 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 387 { 388 return (0); 389 } 390 391 /* ARGSUSED */ 392 static int 393 bridge_m_unicst(void *arg, const uint8_t *macaddr) 394 { 395 return (ENOTSUP); 396 } 397 398 static mblk_t * 399 bridge_m_tx(void *arg, mblk_t *mp) 400 { 401 _NOTE(ARGUNUSED(arg)); 402 freemsgchain(mp); 403 return (NULL); 404 } 405 406 /* ARGSUSED */ 407 static int 408 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) 409 { 410 bridge_listfwd_t *blf = karg; 411 bridge_inst_t *bip; 412 bridge_fwd_t *bfp, match; 413 avl_index_t where; 414 415 bip = bridge_find_name(blf->blf_name); 416 if (bip == NULL) 417 return (ENOENT); 418 419 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL); 420 match.bf_flags |= BFF_VLANLOCAL; 421 rw_enter(&bip->bi_rwlock, RW_READER); 422 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL) 423 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER); 424 else 425 bfp = AVL_NEXT(&bip->bi_fwd, bfp); 426 if (bfp == NULL) { 427 bzero(blf, sizeof (*blf)); 428 } else { 429 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL); 430 blf->blf_trill_nick = bfp->bf_trill_nick; 431 blf->blf_ms_age = 432 drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000; 433 blf->blf_is_local = 434 (bfp->bf_flags & BFF_LOCALADDR) != 0; 435 blf->blf_linkid = bfp->bf_links[0]->bl_linkid; 436 } 437 rw_exit(&bip->bi_rwlock); 438 bridge_unref(bip); 439 return (0); 440 } 441 442 static int 443 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 444 uint_t pr_valsize, const void *pr_val) 445 { 446 bridge_mac_t *bmp = arg; 447 bridge_inst_t *bip; 448 bridge_link_t *blp; 449 int err; 450 uint_t maxsdu; 451 mblk_t *mlist; 452 453 _NOTE(ARGUNUSED(pr_name)); 454 switch (pr_num) { 455 case MAC_PROP_MTU: 456 if (pr_valsize < sizeof (bmp->bm_maxsdu)) { 457 err = EINVAL; 458 break; 459 } 460 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu)); 461 if (maxsdu == bmp->bm_maxsdu) { 462 err = 0; 463 } else if ((bip = mac_to_inst(bmp)) == NULL) { 464 err = ENXIO; 465 } else { 466 rw_enter(&bip->bi_rwlock, RW_WRITER); 467 mlist = NULL; 468 for (blp = list_head(&bip->bi_links); blp != NULL; 469 blp = list_next(&bip->bi_links, blp)) { 470 if (blp->bl_flags & BLF_DELETED) 471 continue; 472 if (blp->bl_maxsdu == maxsdu) 473 link_sdu_fail(blp, B_FALSE, &mlist); 474 else if (blp->bl_maxsdu == bmp->bm_maxsdu) 475 link_sdu_fail(blp, B_TRUE, &mlist); 476 } 477 rw_exit(&bip->bi_rwlock); 478 bmp->bm_maxsdu = maxsdu; 479 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 480 send_up_messages(bip, mlist); 481 bridge_unref(bip); 482 err = 0; 483 } 484 break; 485 486 default: 487 err = ENOTSUP; 488 break; 489 } 490 return (err); 491 } 492 493 static int 494 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 495 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 496 { 497 bridge_mac_t *bmp = arg; 498 int err = 0; 499 500 _NOTE(ARGUNUSED(pr_name)); 501 switch (pr_num) { 502 case MAC_PROP_MTU: { 503 mac_propval_range_t range; 504 505 if (!(pr_flags & MAC_PROP_POSSIBLE)) 506 return (ENOTSUP); 507 if (pr_valsize < sizeof (mac_propval_range_t)) 508 return (EINVAL); 509 range.mpr_count = 1; 510 range.mpr_type = MAC_PROPVAL_UINT32; 511 range.range_uint32[0].mpur_min = 512 range.range_uint32[0].mpur_max = bmp->bm_maxsdu; 513 bcopy(&range, pr_val, sizeof (range)); 514 *perm = MAC_PROP_PERM_RW; 515 break; 516 } 517 case MAC_PROP_STATUS: 518 if (pr_valsize < sizeof (bmp->bm_linkstate)) { 519 err = EINVAL; 520 } else { 521 bcopy(&bmp->bm_linkstate, pr_val, 522 sizeof (&bmp->bm_linkstate)); 523 *perm = MAC_PROP_PERM_READ; 524 } 525 break; 526 527 default: 528 err = ENOTSUP; 529 break; 530 } 531 return (err); 532 } 533 534 static mac_callbacks_t bridge_m_callbacks = { 535 MC_SETPROP | MC_GETPROP, 536 bridge_m_getstat, 537 bridge_m_start, 538 bridge_m_stop, 539 bridge_m_setpromisc, 540 bridge_m_multicst, 541 bridge_m_unicst, 542 bridge_m_tx, 543 NULL, /* ioctl */ 544 NULL, /* getcapab */ 545 NULL, /* open */ 546 NULL, /* close */ 547 bridge_m_setprop, 548 bridge_m_getprop 549 }; 550 551 /* 552 * Create kstats from a list. 553 */ 554 static kstat_t * 555 kstat_setup(kstat_named_t *knt, const char **names, int nstat, 556 const char *unitname) 557 { 558 kstat_t *ksp; 559 int i; 560 561 for (i = 0; i < nstat; i++) 562 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64); 563 564 ksp = kstat_create_zone("bridge", 0, unitname, "net", 565 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 566 if (ksp != NULL) { 567 ksp->ks_data = knt; 568 kstat_install(ksp); 569 } 570 return (ksp); 571 } 572 573 /* 574 * Find an existing bridge_mac_t structure or allocate a new one for the given 575 * bridge instance. This creates the mac driver instance that snoop can use. 576 */ 577 static int 578 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp) 579 { 580 bridge_mac_t *bmp, *bnew; 581 mac_register_t *mac; 582 int err; 583 584 *bmacp = NULL; 585 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 586 return (EINVAL); 587 588 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP); 589 590 rw_enter(&bmac_rwlock, RW_WRITER); 591 for (bmp = list_head(&bmac_list); bmp != NULL; 592 bmp = list_next(&bmac_list, bmp)) { 593 if (strcmp(bip->bi_name, bmp->bm_name) == 0) { 594 ASSERT(bmp->bm_inst == NULL); 595 bmp->bm_inst = bip; 596 rw_exit(&bmac_rwlock); 597 kmem_free(bnew, sizeof (*bnew)); 598 mac_free(mac); 599 *bmacp = bmp; 600 return (0); 601 } 602 } 603 604 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 605 mac->m_driver = bnew; 606 mac->m_dip = bridge_dev_info; 607 mac->m_instance = (uint_t)-1; 608 mac->m_src_addr = (uint8_t *)zero_addr; 609 mac->m_callbacks = &bridge_m_callbacks; 610 611 /* 612 * Note that the SDU limits are irrelevant, as nobody transmits on the 613 * bridge node itself. It's mainly for monitoring but we allow 614 * setting the bridge MTU for quick transition of all links part of the 615 * bridge to a new MTU. 616 */ 617 mac->m_min_sdu = 1; 618 mac->m_max_sdu = 1500; 619 err = mac_register(mac, &bnew->bm_mh); 620 mac_free(mac); 621 if (err != 0) { 622 rw_exit(&bmac_rwlock); 623 kmem_free(bnew, sizeof (*bnew)); 624 return (err); 625 } 626 627 bnew->bm_inst = bip; 628 (void) strcpy(bnew->bm_name, bip->bi_name); 629 if (list_is_empty(&bmac_list)) { 630 bridge_timerid = timeout(bridge_timer, NULL, 631 bridge_scan_interval); 632 } 633 list_insert_tail(&bmac_list, bnew); 634 rw_exit(&bmac_rwlock); 635 636 /* 637 * Mark the MAC as unable to go "active" so that only passive clients 638 * (such as snoop) can bind to it. 639 */ 640 mac_no_active(bnew->bm_mh); 641 *bmacp = bnew; 642 return (0); 643 } 644 645 /* 646 * Disconnect the given bridge_mac_t from its bridge instance. The bridge 647 * instance is going away. The mac instance can't go away until the clients 648 * are gone (see bridge_timer). 649 */ 650 static void 651 bmac_disconnect(bridge_mac_t *bmp) 652 { 653 bridge_inst_t *bip; 654 655 bmp->bm_linkstate = LINK_STATE_DOWN; 656 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 657 658 rw_enter(&bmac_rwlock, RW_READER); 659 bip = bmp->bm_inst; 660 bip->bi_mac = NULL; 661 bmp->bm_inst = NULL; 662 rw_exit(&bmac_rwlock); 663 } 664 665 /* This is used by the avl trees to sort forwarding table entries */ 666 static int 667 fwd_compare(const void *addr1, const void *addr2) 668 { 669 const bridge_fwd_t *fwd1 = addr1; 670 const bridge_fwd_t *fwd2 = addr2; 671 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL); 672 673 if (diff != 0) 674 return (diff > 0 ? 1 : -1); 675 676 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) { 677 if (fwd1->bf_vlanid > fwd2->bf_vlanid) 678 return (1); 679 else if (fwd1->bf_vlanid < fwd2->bf_vlanid) 680 return (-1); 681 } 682 return (0); 683 } 684 685 static void 686 inst_free(bridge_inst_t *bip) 687 { 688 ASSERT(bip->bi_mac == NULL); 689 rw_destroy(&bip->bi_rwlock); 690 list_destroy(&bip->bi_links); 691 cv_destroy(&bip->bi_linkwait); 692 avl_destroy(&bip->bi_fwd); 693 if (bip->bi_ksp != NULL) 694 kstat_delete(bip->bi_ksp); 695 kmem_free(bip, sizeof (*bip)); 696 } 697 698 static bridge_inst_t * 699 inst_alloc(const char *bridge) 700 { 701 bridge_inst_t *bip; 702 703 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP); 704 bip->bi_refs = 1; 705 (void) strcpy(bip->bi_name, bridge); 706 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL); 707 list_create(&bip->bi_links, sizeof (bridge_link_t), 708 offsetof(bridge_link_t, bl_node)); 709 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL); 710 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t), 711 offsetof(bridge_fwd_t, bf_node)); 712 return (bip); 713 } 714 715 static bridge_inst_t * 716 bridge_find_name(const char *bridge) 717 { 718 bridge_inst_t *bip; 719 720 mutex_enter(&inst_lock); 721 for (bip = list_head(&inst_list); bip != NULL; 722 bip = list_next(&inst_list, bip)) { 723 if (!(bip->bi_flags & BIF_SHUTDOWN) && 724 strcmp(bridge, bip->bi_name) == 0) { 725 atomic_inc_uint(&bip->bi_refs); 726 break; 727 } 728 } 729 mutex_exit(&inst_lock); 730 731 return (bip); 732 } 733 734 static int 735 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc, 736 cred_t *cred) 737 { 738 bridge_inst_t *bip, *bipnew; 739 bridge_mac_t *bmp = NULL; 740 int err; 741 742 *bipc = NULL; 743 bipnew = inst_alloc(bridge); 744 745 mutex_enter(&inst_lock); 746 lookup_retry: 747 for (bip = list_head(&inst_list); bip != NULL; 748 bip = list_next(&inst_list, bip)) { 749 if (strcmp(bridge, bip->bi_name) == 0) 750 break; 751 } 752 753 /* This should not take long; if it does, we've got a design problem */ 754 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) { 755 cv_wait(&inst_cv, &inst_lock); 756 goto lookup_retry; 757 } 758 759 if (bip != NULL) { 760 /* We weren't expecting to find anything */ 761 bip = NULL; 762 err = EEXIST; 763 } else { 764 bip = bipnew; 765 bipnew = NULL; 766 list_insert_tail(&inst_list, bip); 767 } 768 769 mutex_exit(&inst_lock); 770 if (bip == NULL) 771 goto fail; 772 773 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats, 774 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name); 775 776 err = bmac_alloc(bip, &bmp); 777 if ((bip->bi_mac = bmp) == NULL) 778 goto fail_create; 779 780 /* 781 * bm_inst is set, so the timer cannot yank the DLS rug from under us. 782 * No extra locking is needed here. 783 */ 784 if (!(bmp->bm_flags & BMF_DLS)) { 785 err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred)); 786 if (err != 0) 787 goto fail_create; 788 bmp->bm_flags |= BMF_DLS; 789 } 790 791 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh)); 792 *bipc = bip; 793 return (0); 794 795 fail_create: 796 if (bmp != NULL) 797 bmac_disconnect(bip->bi_mac); 798 bipnew = bip; 799 fail: 800 ASSERT(bipnew->bi_trilldata == NULL); 801 bipnew->bi_flags |= BIF_SHUTDOWN; 802 inst_free(bipnew); 803 return (err); 804 } 805 806 static void 807 bridge_unref(bridge_inst_t *bip) 808 { 809 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) { 810 ASSERT(bip->bi_flags & BIF_SHUTDOWN); 811 /* free up mac for reuse before leaving global list */ 812 if (bip->bi_mac != NULL) 813 bmac_disconnect(bip->bi_mac); 814 mutex_enter(&inst_lock); 815 list_remove(&inst_list, bip); 816 cv_broadcast(&inst_cv); 817 mutex_exit(&inst_lock); 818 inst_free(bip); 819 } 820 } 821 822 /* 823 * Stream instances are used only for allocating bridges and serving as a 824 * control node. They serve no data-handling function. 825 */ 826 static bridge_stream_t * 827 stream_alloc(void) 828 { 829 bridge_stream_t *bsp; 830 minor_t mn; 831 832 if ((mn = mac_minor_hold(B_FALSE)) == 0) 833 return (NULL); 834 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP); 835 bsp->bs_minor = mn; 836 return (bsp); 837 } 838 839 static void 840 stream_free(bridge_stream_t *bsp) 841 { 842 mac_minor_rele(bsp->bs_minor); 843 kmem_free(bsp, sizeof (*bsp)); 844 } 845 846 /* Reference hold/release functions for STREAMS-related taskq */ 847 static void 848 stream_ref(bridge_stream_t *bsp) 849 { 850 mutex_enter(&stream_ref_lock); 851 bsp->bs_taskq_cnt++; 852 mutex_exit(&stream_ref_lock); 853 } 854 855 static void 856 stream_unref(bridge_stream_t *bsp) 857 { 858 mutex_enter(&stream_ref_lock); 859 if (--bsp->bs_taskq_cnt == 0) 860 cv_broadcast(&stream_ref_cv); 861 mutex_exit(&stream_ref_lock); 862 } 863 864 static void 865 link_free(bridge_link_t *blp) 866 { 867 bridge_inst_t *bip = blp->bl_inst; 868 869 ASSERT(!(blp->bl_flags & BLF_FREED)); 870 blp->bl_flags |= BLF_FREED; 871 if (blp->bl_ksp != NULL) 872 kstat_delete(blp->bl_ksp); 873 if (blp->bl_lfailmp != NULL) 874 freeb(blp->bl_lfailmp); 875 cv_destroy(&blp->bl_trillwait); 876 mutex_destroy(&blp->bl_trilllock); 877 kmem_free(blp, sizeof (*blp)); 878 /* Don't unreference the bridge until the MAC is closed */ 879 bridge_unref(bip); 880 } 881 882 static void 883 link_unref(bridge_link_t *blp) 884 { 885 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) { 886 bridge_inst_t *bip = blp->bl_inst; 887 888 ASSERT(blp->bl_flags & BLF_DELETED); 889 rw_enter(&bip->bi_rwlock, RW_WRITER); 890 list_remove(&bip->bi_links, blp); 891 rw_exit(&bip->bi_rwlock); 892 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links)) 893 cv_broadcast(&bip->bi_linkwait); 894 link_free(blp); 895 } 896 } 897 898 static bridge_fwd_t * 899 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick) 900 { 901 bridge_fwd_t *bfp; 902 903 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)), 904 KM_NOSLEEP); 905 if (bfp != NULL) { 906 bcopy(addr, bfp->bf_dest, ETHERADDRL); 907 bfp->bf_lastheard = ddi_get_lbolt(); 908 bfp->bf_maxlinks = nlinks; 909 bfp->bf_links = (bridge_link_t **)(bfp + 1); 910 bfp->bf_trill_nick = nick; 911 } 912 return (bfp); 913 } 914 915 static bridge_fwd_t * 916 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid) 917 { 918 bridge_fwd_t *bfp, *vbfp; 919 bridge_fwd_t match; 920 921 bcopy(addr, match.bf_dest, ETHERADDRL); 922 match.bf_flags = 0; 923 rw_enter(&bip->bi_rwlock, RW_READER); 924 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 925 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) { 926 match.bf_vlanid = vlanid; 927 match.bf_flags = BFF_VLANLOCAL; 928 vbfp = avl_find(&bip->bi_fwd, &match, NULL); 929 if (vbfp != NULL) 930 bfp = vbfp; 931 } 932 atomic_inc_uint(&bfp->bf_refs); 933 } 934 rw_exit(&bip->bi_rwlock); 935 return (bfp); 936 } 937 938 static void 939 fwd_free(bridge_fwd_t *bfp) 940 { 941 uint_t i; 942 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst; 943 944 KIDECR(bki_count); 945 for (i = 0; i < bfp->bf_nlinks; i++) 946 link_unref(bfp->bf_links[i]); 947 kmem_free(bfp, 948 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *)); 949 } 950 951 static void 952 fwd_unref(bridge_fwd_t *bfp) 953 { 954 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) { 955 ASSERT(!(bfp->bf_flags & BFF_INTREE)); 956 fwd_free(bfp); 957 } 958 } 959 960 static void 961 fwd_delete(bridge_fwd_t *bfp) 962 { 963 bridge_inst_t *bip; 964 bridge_fwd_t *bfpzero; 965 966 if (bfp->bf_flags & BFF_INTREE) { 967 ASSERT(bfp->bf_nlinks > 0); 968 bip = bfp->bf_links[0]->bl_inst; 969 rw_enter(&bip->bi_rwlock, RW_WRITER); 970 /* Another thread could beat us to this */ 971 if (bfp->bf_flags & BFF_INTREE) { 972 avl_remove(&bip->bi_fwd, bfp); 973 bfp->bf_flags &= ~BFF_INTREE; 974 if (bfp->bf_flags & BFF_VLANLOCAL) { 975 bfp->bf_flags &= ~BFF_VLANLOCAL; 976 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL); 977 if (bfpzero != NULL && bfpzero->bf_vcnt > 0) 978 bfpzero->bf_vcnt--; 979 } 980 rw_exit(&bip->bi_rwlock); 981 fwd_unref(bfp); /* no longer in avl tree */ 982 } else { 983 rw_exit(&bip->bi_rwlock); 984 } 985 } 986 } 987 988 static boolean_t 989 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp) 990 { 991 avl_index_t idx; 992 boolean_t retv; 993 994 rw_enter(&bip->bi_rwlock, RW_WRITER); 995 if (!(bip->bi_flags & BIF_SHUTDOWN) && 996 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax && 997 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) { 998 avl_insert(&bip->bi_fwd, bfp, idx); 999 bfp->bf_flags |= BFF_INTREE; 1000 atomic_inc_uint(&bfp->bf_refs); /* avl entry */ 1001 retv = B_TRUE; 1002 } else { 1003 retv = B_FALSE; 1004 } 1005 rw_exit(&bip->bi_rwlock); 1006 return (retv); 1007 } 1008 1009 static void 1010 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr, 1011 const uint8_t *newaddr) 1012 { 1013 bridge_inst_t *bip = blp->bl_inst; 1014 bridge_fwd_t *bfp, *bfnew; 1015 bridge_fwd_t match; 1016 avl_index_t idx; 1017 boolean_t drop_ref = B_FALSE; 1018 1019 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0) 1020 return; 1021 1022 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0) 1023 goto no_old_addr; 1024 1025 /* 1026 * Find the previous entry, and remove our link from it. 1027 */ 1028 bcopy(oldaddr, match.bf_dest, ETHERADDRL); 1029 rw_enter(&bip->bi_rwlock, RW_WRITER); 1030 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 1031 int i; 1032 1033 /* 1034 * See if we're in the list, and remove if so. 1035 */ 1036 for (i = 0; i < bfp->bf_nlinks; i++) { 1037 if (bfp->bf_links[i] == blp) { 1038 /* 1039 * We assume writes are atomic, so no special 1040 * MT handling is needed. The list length is 1041 * decremented first, and then we remove 1042 * entries. 1043 */ 1044 bfp->bf_nlinks--; 1045 for (; i < bfp->bf_nlinks; i++) 1046 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1047 drop_ref = B_TRUE; 1048 break; 1049 } 1050 } 1051 /* If no more links, then remove and free up */ 1052 if (bfp->bf_nlinks == 0) { 1053 avl_remove(&bip->bi_fwd, bfp); 1054 bfp->bf_flags &= ~BFF_INTREE; 1055 } else { 1056 bfp = NULL; 1057 } 1058 } 1059 rw_exit(&bip->bi_rwlock); 1060 if (bfp != NULL) 1061 fwd_unref(bfp); /* no longer in avl tree */ 1062 1063 /* 1064 * Now get the new link address and add this link to the list. The 1065 * list should be of length 1 unless the user has configured multiple 1066 * NICs with the same address. (That's an incorrect configuration, but 1067 * we support it anyway.) 1068 */ 1069 no_old_addr: 1070 bfp = NULL; 1071 if ((bip->bi_flags & BIF_SHUTDOWN) || 1072 bcmp(newaddr, zero_addr, ETHERADDRL) == 0) 1073 goto no_new_addr; 1074 1075 bcopy(newaddr, match.bf_dest, ETHERADDRL); 1076 rw_enter(&bip->bi_rwlock, RW_WRITER); 1077 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) { 1078 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE); 1079 if (bfnew != NULL) 1080 KIINCR(bki_count); 1081 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) { 1082 /* special case: link fits in existing entry */ 1083 bfnew = bfp; 1084 } else { 1085 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1, 1086 RBRIDGE_NICKNAME_NONE); 1087 if (bfnew != NULL) { 1088 KIINCR(bki_count); 1089 avl_remove(&bip->bi_fwd, bfp); 1090 bfp->bf_flags &= ~BFF_INTREE; 1091 bfnew->bf_nlinks = bfp->bf_nlinks; 1092 bcopy(bfp->bf_links, bfnew->bf_links, 1093 bfp->bf_nlinks * sizeof (bfp)); 1094 /* reset the idx value due to removal above */ 1095 (void) avl_find(&bip->bi_fwd, &match, &idx); 1096 } 1097 } 1098 1099 if (bfnew != NULL) { 1100 bfnew->bf_links[bfnew->bf_nlinks++] = blp; 1101 if (drop_ref) 1102 drop_ref = B_FALSE; 1103 else 1104 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1105 1106 if (bfnew != bfp) { 1107 /* local addresses are not subject to table limits */ 1108 avl_insert(&bip->bi_fwd, bfnew, idx); 1109 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR); 1110 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */ 1111 } 1112 } 1113 rw_exit(&bip->bi_rwlock); 1114 1115 no_new_addr: 1116 /* 1117 * If we found an existing entry and we replaced it with a new one, 1118 * then drop the table reference from the old one. We removed it from 1119 * the AVL tree above. 1120 */ 1121 if (bfnew != NULL && bfp != NULL && bfnew != bfp) 1122 fwd_unref(bfp); 1123 1124 /* Account for removed entry. */ 1125 if (drop_ref) 1126 link_unref(blp); 1127 } 1128 1129 static void 1130 bridge_new_unicst(bridge_link_t *blp) 1131 { 1132 uint8_t new_mac[ETHERADDRL]; 1133 1134 mac_unicast_primary_get(blp->bl_mh, new_mac); 1135 fwd_update_local(blp, blp->bl_local_mac, new_mac); 1136 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL); 1137 } 1138 1139 /* 1140 * We must shut down a link prior to freeing it, and doing that requires 1141 * blocking to wait for running MAC threads while holding a reference. This is 1142 * run from a taskq to accomplish proper link shutdown followed by reference 1143 * drop. 1144 */ 1145 static void 1146 link_shutdown(void *arg) 1147 { 1148 bridge_link_t *blp = arg; 1149 mac_handle_t mh = blp->bl_mh; 1150 bridge_inst_t *bip; 1151 bridge_fwd_t *bfp, *bfnext; 1152 avl_tree_t fwd_scavenge; 1153 int i; 1154 1155 /* 1156 * This link is being destroyed. Notify TRILL now that it's no longer 1157 * possible to send packets. Data packets may still arrive until TRILL 1158 * calls bridge_trill_lnunref. 1159 */ 1160 if (blp->bl_trilldata != NULL) 1161 trill_lndstr_fn(blp->bl_trilldata, blp); 1162 1163 if (blp->bl_flags & BLF_PROM_ADDED) 1164 (void) mac_promisc_remove(blp->bl_mphp); 1165 1166 if (blp->bl_flags & BLF_SET_BRIDGE) 1167 mac_bridge_clear(mh, (mac_handle_t)blp); 1168 1169 if (blp->bl_flags & BLF_MARGIN_ADDED) { 1170 (void) mac_notify_remove(blp->bl_mnh, B_TRUE); 1171 (void) mac_margin_remove(mh, blp->bl_margin); 1172 } 1173 1174 /* Tell the clients the real link state when we leave */ 1175 mac_link_redo(blp->bl_mh, 1176 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE)); 1177 1178 /* Destroy all of the forwarding entries related to this link */ 1179 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1180 offsetof(bridge_fwd_t, bf_node)); 1181 bip = blp->bl_inst; 1182 rw_enter(&bip->bi_rwlock, RW_WRITER); 1183 bfnext = avl_first(&bip->bi_fwd); 1184 while ((bfp = bfnext) != NULL) { 1185 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1186 for (i = 0; i < bfp->bf_nlinks; i++) { 1187 if (bfp->bf_links[i] == blp) 1188 break; 1189 } 1190 if (i >= bfp->bf_nlinks) 1191 continue; 1192 if (bfp->bf_nlinks > 1) { 1193 /* note that this can't be the last reference */ 1194 link_unref(blp); 1195 bfp->bf_nlinks--; 1196 for (; i < bfp->bf_nlinks; i++) 1197 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1198 } else { 1199 ASSERT(bfp->bf_flags & BFF_INTREE); 1200 avl_remove(&bip->bi_fwd, bfp); 1201 bfp->bf_flags &= ~BFF_INTREE; 1202 avl_add(&fwd_scavenge, bfp); 1203 } 1204 } 1205 rw_exit(&bip->bi_rwlock); 1206 bfnext = avl_first(&fwd_scavenge); 1207 while ((bfp = bfnext) != NULL) { 1208 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1209 avl_remove(&fwd_scavenge, bfp); 1210 fwd_unref(bfp); 1211 } 1212 avl_destroy(&fwd_scavenge); 1213 1214 if (blp->bl_flags & BLF_CLIENT_OPEN) 1215 mac_client_close(blp->bl_mch, 0); 1216 1217 mac_close(mh); 1218 1219 /* 1220 * We are now completely removed from the active list, so drop the 1221 * reference (see bridge_add_link). 1222 */ 1223 link_unref(blp); 1224 } 1225 1226 static void 1227 shutdown_inst(bridge_inst_t *bip) 1228 { 1229 bridge_link_t *blp, *blnext; 1230 bridge_fwd_t *bfp; 1231 1232 mutex_enter(&inst_lock); 1233 if (bip->bi_flags & BIF_SHUTDOWN) { 1234 mutex_exit(&inst_lock); 1235 return; 1236 } 1237 1238 /* 1239 * Once on the inst_list, the bridge instance must not leave that list 1240 * without having the shutdown flag set first. When the shutdown flag 1241 * is set, we own the list reference, so we must drop it before 1242 * returning. 1243 */ 1244 bip->bi_flags |= BIF_SHUTDOWN; 1245 mutex_exit(&inst_lock); 1246 1247 bip->bi_control = NULL; 1248 1249 rw_enter(&bip->bi_rwlock, RW_READER); 1250 blnext = list_head(&bip->bi_links); 1251 while ((blp = blnext) != NULL) { 1252 blnext = list_next(&bip->bi_links, blp); 1253 if (!(blp->bl_flags & BLF_DELETED)) { 1254 blp->bl_flags |= BLF_DELETED; 1255 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 1256 blp, DDI_SLEEP); 1257 } 1258 } 1259 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) { 1260 atomic_inc_uint(&bfp->bf_refs); 1261 rw_exit(&bip->bi_rwlock); 1262 fwd_delete(bfp); 1263 fwd_unref(bfp); 1264 rw_enter(&bip->bi_rwlock, RW_READER); 1265 } 1266 rw_exit(&bip->bi_rwlock); 1267 1268 /* 1269 * This bridge is being destroyed. Notify TRILL once all of the 1270 * links are all gone. 1271 */ 1272 mutex_enter(&inst_lock); 1273 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links)) 1274 cv_wait(&bip->bi_linkwait, &inst_lock); 1275 mutex_exit(&inst_lock); 1276 if (bip->bi_trilldata != NULL) 1277 trill_brdstr_fn(bip->bi_trilldata, bip); 1278 1279 bridge_unref(bip); 1280 } 1281 1282 /* 1283 * This is called once by the TRILL module when it starts up. It just sets the 1284 * global TRILL callback function pointers -- data transmit/receive and bridge 1285 * and link destroy notification. There's only one TRILL module, so only one 1286 * registration is needed. 1287 * 1288 * TRILL should call this function with NULL pointers before unloading. It 1289 * must not do so before dropping all references to bridges and links. We 1290 * assert that this is true on debug builds. 1291 */ 1292 void 1293 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn, 1294 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn) 1295 { 1296 #ifdef DEBUG 1297 if (recv_fn == NULL && trill_recv_fn != NULL) { 1298 bridge_inst_t *bip; 1299 bridge_link_t *blp; 1300 1301 mutex_enter(&inst_lock); 1302 for (bip = list_head(&inst_list); bip != NULL; 1303 bip = list_next(&inst_list, bip)) { 1304 ASSERT(bip->bi_trilldata == NULL); 1305 rw_enter(&bip->bi_rwlock, RW_READER); 1306 for (blp = list_head(&bip->bi_links); blp != NULL; 1307 blp = list_next(&bip->bi_links, blp)) { 1308 ASSERT(blp->bl_trilldata == NULL); 1309 } 1310 rw_exit(&bip->bi_rwlock); 1311 } 1312 mutex_exit(&inst_lock); 1313 } 1314 #endif 1315 trill_recv_fn = recv_fn; 1316 trill_encap_fn = encap_fn; 1317 trill_brdstr_fn = brdstr_fn; 1318 trill_lndstr_fn = lndstr_fn; 1319 } 1320 1321 /* 1322 * This registers the TRILL instance pointer with a bridge. Before this 1323 * pointer is set, the forwarding, TRILL receive, and bridge destructor 1324 * functions won't be called. 1325 * 1326 * TRILL holds a reference on a bridge with this call. It must free the 1327 * reference by calling the unregister function below. 1328 */ 1329 bridge_inst_t * 1330 bridge_trill_brref(const char *bname, void *ptr) 1331 { 1332 char bridge[MAXLINKNAMELEN]; 1333 bridge_inst_t *bip; 1334 1335 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname); 1336 bip = bridge_find_name(bridge); 1337 if (bip != NULL) { 1338 ASSERT(bip->bi_trilldata == NULL && ptr != NULL); 1339 bip->bi_trilldata = ptr; 1340 } 1341 return (bip); 1342 } 1343 1344 void 1345 bridge_trill_brunref(bridge_inst_t *bip) 1346 { 1347 ASSERT(bip->bi_trilldata != NULL); 1348 bip->bi_trilldata = NULL; 1349 bridge_unref(bip); 1350 } 1351 1352 /* 1353 * TRILL calls this function when referencing a particular link on a bridge. 1354 * 1355 * It holds a reference on the link, so TRILL must clear out the reference when 1356 * it's done with the link (on unbinding). 1357 */ 1358 bridge_link_t * 1359 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr) 1360 { 1361 bridge_link_t *blp; 1362 1363 ASSERT(ptr != NULL); 1364 rw_enter(&bip->bi_rwlock, RW_READER); 1365 for (blp = list_head(&bip->bi_links); blp != NULL; 1366 blp = list_next(&bip->bi_links, blp)) { 1367 if (!(blp->bl_flags & BLF_DELETED) && 1368 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) { 1369 blp->bl_trilldata = ptr; 1370 blp->bl_flags &= ~BLF_TRILLACTIVE; 1371 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs)); 1372 atomic_inc_uint(&blp->bl_refs); 1373 break; 1374 } 1375 } 1376 rw_exit(&bip->bi_rwlock); 1377 return (blp); 1378 } 1379 1380 void 1381 bridge_trill_lnunref(bridge_link_t *blp) 1382 { 1383 mutex_enter(&blp->bl_trilllock); 1384 ASSERT(blp->bl_trilldata != NULL); 1385 blp->bl_trilldata = NULL; 1386 blp->bl_flags &= ~BLF_TRILLACTIVE; 1387 while (blp->bl_trillthreads > 0) 1388 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock); 1389 mutex_exit(&blp->bl_trilllock); 1390 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 1391 link_unref(blp); 1392 } 1393 1394 /* 1395 * This periodic timer performs three functions: 1396 * 1. It scans the list of learned forwarding entries, and removes ones that 1397 * haven't been heard from in a while. The time limit is backed down if 1398 * we're above the configured table limit. 1399 * 2. It walks the links and decays away the bl_learns counter. 1400 * 3. It scans the observability node entries looking for ones that can be 1401 * freed up. 1402 */ 1403 /* ARGSUSED */ 1404 static void 1405 bridge_timer(void *arg) 1406 { 1407 bridge_inst_t *bip; 1408 bridge_fwd_t *bfp, *bfnext; 1409 bridge_mac_t *bmp, *bmnext; 1410 bridge_link_t *blp; 1411 int err; 1412 datalink_id_t tmpid; 1413 avl_tree_t fwd_scavenge; 1414 clock_t age_limit; 1415 uint32_t ldecay; 1416 1417 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1418 offsetof(bridge_fwd_t, bf_node)); 1419 mutex_enter(&inst_lock); 1420 for (bip = list_head(&inst_list); bip != NULL; 1421 bip = list_next(&inst_list, bip)) { 1422 if (bip->bi_flags & BIF_SHUTDOWN) 1423 continue; 1424 rw_enter(&bip->bi_rwlock, RW_WRITER); 1425 /* compute scaled maximum age based on table limit */ 1426 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax) 1427 bip->bi_tshift++; 1428 else 1429 bip->bi_tshift = 0; 1430 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) { 1431 if (bip->bi_tshift != 0) 1432 bip->bi_tshift--; 1433 age_limit = 1; 1434 } 1435 bfnext = avl_first(&bip->bi_fwd); 1436 while ((bfp = bfnext) != NULL) { 1437 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1438 if (!(bfp->bf_flags & BFF_LOCALADDR) && 1439 (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) { 1440 ASSERT(bfp->bf_flags & BFF_INTREE); 1441 avl_remove(&bip->bi_fwd, bfp); 1442 bfp->bf_flags &= ~BFF_INTREE; 1443 avl_add(&fwd_scavenge, bfp); 1444 } 1445 } 1446 for (blp = list_head(&bip->bi_links); blp != NULL; 1447 blp = list_next(&bip->bi_links, blp)) { 1448 ldecay = mac_get_ldecay(blp->bl_mh); 1449 if (ldecay >= blp->bl_learns) 1450 blp->bl_learns = 0; 1451 else 1452 atomic_add_int(&blp->bl_learns, -(int)ldecay); 1453 } 1454 rw_exit(&bip->bi_rwlock); 1455 bfnext = avl_first(&fwd_scavenge); 1456 while ((bfp = bfnext) != NULL) { 1457 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1458 avl_remove(&fwd_scavenge, bfp); 1459 KIINCR(bki_expire); 1460 fwd_unref(bfp); /* drop tree reference */ 1461 } 1462 } 1463 mutex_exit(&inst_lock); 1464 avl_destroy(&fwd_scavenge); 1465 1466 /* 1467 * Scan the bridge_mac_t entries and try to free up the ones that are 1468 * no longer active. This must be done by polling, as neither DLS nor 1469 * MAC provides a driver any sort of positive control over clients. 1470 */ 1471 rw_enter(&bmac_rwlock, RW_WRITER); 1472 bmnext = list_head(&bmac_list); 1473 while ((bmp = bmnext) != NULL) { 1474 bmnext = list_next(&bmac_list, bmp); 1475 1476 /* ignore active bridges */ 1477 if (bmp->bm_inst != NULL) 1478 continue; 1479 1480 if (bmp->bm_flags & BMF_DLS) { 1481 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE); 1482 ASSERT(err == 0 || err == EBUSY); 1483 if (err == 0) 1484 bmp->bm_flags &= ~BMF_DLS; 1485 } 1486 1487 if (!(bmp->bm_flags & BMF_DLS)) { 1488 err = mac_unregister(bmp->bm_mh); 1489 ASSERT(err == 0 || err == EBUSY); 1490 if (err == 0) { 1491 list_remove(&bmac_list, bmp); 1492 kmem_free(bmp, sizeof (*bmp)); 1493 } 1494 } 1495 } 1496 if (list_is_empty(&bmac_list)) { 1497 bridge_timerid = 0; 1498 } else { 1499 bridge_timerid = timeout(bridge_timer, NULL, 1500 bridge_scan_interval); 1501 } 1502 rw_exit(&bmac_rwlock); 1503 } 1504 1505 static int 1506 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1507 { 1508 bridge_stream_t *bsp; 1509 1510 if (rq->q_ptr != NULL) 1511 return (0); 1512 1513 if (sflag & MODOPEN) 1514 return (EINVAL); 1515 1516 /* 1517 * Check the minor node number being opened. This tells us which 1518 * bridge instance the user wants. 1519 */ 1520 if (getminor(*devp) != 0) { 1521 /* 1522 * This is a regular DLPI stream for snoop or the like. 1523 * Redirect it through DLD. 1524 */ 1525 rq->q_qinfo = &bridge_dld_rinit; 1526 OTHERQ(rq)->q_qinfo = &bridge_dld_winit; 1527 return (dld_open(rq, devp, oflag, sflag, credp)); 1528 } else { 1529 /* 1530 * Allocate the bridge control stream structure. 1531 */ 1532 if ((bsp = stream_alloc()) == NULL) 1533 return (ENOSR); 1534 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp; 1535 bsp->bs_wq = WR(rq); 1536 *devp = makedevice(getmajor(*devp), bsp->bs_minor); 1537 qprocson(rq); 1538 return (0); 1539 } 1540 } 1541 1542 /* 1543 * This is used only for bridge control streams. DLPI goes through dld 1544 * instead. 1545 */ 1546 static int 1547 bridge_close(queue_t *rq) 1548 { 1549 bridge_stream_t *bsp = rq->q_ptr; 1550 bridge_inst_t *bip; 1551 1552 /* 1553 * Wait for any stray taskq (add/delete link) entries related to this 1554 * stream to leave the system. 1555 */ 1556 mutex_enter(&stream_ref_lock); 1557 while (bsp->bs_taskq_cnt != 0) 1558 cv_wait(&stream_ref_cv, &stream_ref_lock); 1559 mutex_exit(&stream_ref_lock); 1560 1561 qprocsoff(rq); 1562 if ((bip = bsp->bs_inst) != NULL) 1563 shutdown_inst(bip); 1564 rq->q_ptr = WR(rq)->q_ptr = NULL; 1565 stream_free(bsp); 1566 if (bip != NULL) 1567 bridge_unref(bip); 1568 1569 return (0); 1570 } 1571 1572 static void 1573 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick, 1574 uint16_t vlanid) 1575 { 1576 bridge_inst_t *bip = blp->bl_inst; 1577 bridge_fwd_t *bfp, *bfpnew; 1578 int i; 1579 boolean_t replaced = B_FALSE; 1580 1581 /* Ignore multi-destination address used as source; it's nonsense. */ 1582 if (*saddr & 1) 1583 return; 1584 1585 /* 1586 * If the source is known, then check whether it belongs on this link. 1587 * If not, and this isn't a fixed local address, then we've detected a 1588 * move. If it's not known, learn it. 1589 */ 1590 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) { 1591 /* 1592 * If the packet has a fixed local source address, then there's 1593 * nothing we can learn. We must quit. If this was a received 1594 * packet, then the sender has stolen our address, but there's 1595 * nothing we can do. If it's a transmitted packet, then 1596 * that's the normal case. 1597 */ 1598 if (bfp->bf_flags & BFF_LOCALADDR) { 1599 fwd_unref(bfp); 1600 return; 1601 } 1602 1603 /* 1604 * Check if the link (and TRILL sender, if any) being used is 1605 * among the ones registered for this address. If so, then 1606 * this is information that we already know. 1607 */ 1608 if (bfp->bf_trill_nick == ingress_nick) { 1609 for (i = 0; i < bfp->bf_nlinks; i++) { 1610 if (bfp->bf_links[i] == blp) { 1611 bfp->bf_lastheard = ddi_get_lbolt(); 1612 fwd_unref(bfp); 1613 return; 1614 } 1615 } 1616 } 1617 } 1618 1619 /* 1620 * Note that we intentionally "unlearn" things that appear to be under 1621 * attack on this link. The forwarding cache is a negative thing for 1622 * security -- it disables reachability as a performance optimization 1623 * -- so leaving out entries optimizes for success and defends against 1624 * the attack. Thus, the bare increment without a check in the delete 1625 * code above is right. (And it's ok if we skid over the limit a 1626 * little, so there's no syncronization needed on the test.) 1627 */ 1628 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) { 1629 if (bfp != NULL) { 1630 if (bfp->bf_vcnt == 0) 1631 fwd_delete(bfp); 1632 fwd_unref(bfp); 1633 } 1634 return; 1635 } 1636 1637 atomic_inc_uint(&blp->bl_learns); 1638 1639 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) { 1640 if (bfp != NULL) 1641 fwd_unref(bfp); 1642 return; 1643 } 1644 KIINCR(bki_count); 1645 1646 if (bfp != NULL) { 1647 /* 1648 * If this is a new destination for the same VLAN, then delete 1649 * so that we can update. If it's a different VLAN, then we're 1650 * not going to delete the original. Split off instead into an 1651 * IVL entry. 1652 */ 1653 if (bfp->bf_vlanid == vlanid) { 1654 /* save the count of IVL duplicates */ 1655 bfpnew->bf_vcnt = bfp->bf_vcnt; 1656 1657 /* entry deletes count as learning events */ 1658 atomic_inc_uint(&blp->bl_learns); 1659 1660 /* destroy and create anew; node moved */ 1661 fwd_delete(bfp); 1662 replaced = B_TRUE; 1663 KIINCR(bki_moved); 1664 } else { 1665 bfp->bf_vcnt++; 1666 bfpnew->bf_flags |= BFF_VLANLOCAL; 1667 } 1668 fwd_unref(bfp); 1669 } 1670 bfpnew->bf_links[0] = blp; 1671 bfpnew->bf_nlinks = 1; 1672 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1673 if (!fwd_insert(bip, bfpnew)) 1674 fwd_free(bfpnew); 1675 else if (!replaced) 1676 KIINCR(bki_source); 1677 } 1678 1679 /* 1680 * Process the VLAN headers for output on a given link. There are several 1681 * cases (noting that we don't map VLANs): 1682 * 1. The input packet is good as it is; either 1683 * a. It has no tag, and output has same PVID 1684 * b. It has a non-zero priority-only tag for PVID, and b_band is same 1685 * c. It has a tag with VLAN different from PVID, and b_band is same 1686 * 2. The tag must change: non-zero b_band is different from tag priority 1687 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero) 1688 * 4. The packet has no tag and needs one: 1689 * a. VLAN ID same as PVID, but b_band is non-zero 1690 * b. VLAN ID different from PVID 1691 * We exclude case 1 first, then modify the packet. Note that output packets 1692 * get a priority set by the mblk, not by the header, because QoS in bridging 1693 * requires priority recalculation at each node. 1694 * 1695 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present. 1696 */ 1697 static mblk_t * 1698 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid) 1699 { 1700 boolean_t source_has_tag = (tci != 0xFFFF); 1701 mblk_t *mpcopy; 1702 size_t mlen, minlen; 1703 struct ether_vlan_header *evh; 1704 int pri; 1705 1706 /* This helps centralize error handling in the caller. */ 1707 if (mp == NULL) 1708 return (mp); 1709 1710 /* No forwarded packet can have hardware checksum enabled */ 1711 DB_CKSUMFLAGS(mp) = 0; 1712 1713 /* Get the no-modification cases out of the way first */ 1714 if (!source_has_tag && vlanid == pvid) /* 1a */ 1715 return (mp); 1716 1717 pri = VLAN_PRI(tci); 1718 if (source_has_tag && mp->b_band == pri) { 1719 if (vlanid != pvid) /* 1c */ 1720 return (mp); 1721 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */ 1722 return (mp); 1723 } 1724 1725 /* 1726 * We now know that we must modify the packet. Prepare for that. Note 1727 * that if a tag is present, the caller has already done a pullup for 1728 * the VLAN header, so we're good to go. 1729 */ 1730 if (MBLKL(mp) < sizeof (struct ether_header)) { 1731 mpcopy = msgpullup(mp, sizeof (struct ether_header)); 1732 if (mpcopy == NULL) { 1733 freemsg(mp); 1734 return (NULL); 1735 } 1736 mp = mpcopy; 1737 } 1738 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) || 1739 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) { 1740 minlen = mlen = MBLKL(mp); 1741 if (!source_has_tag) 1742 minlen += VLAN_INCR; 1743 ASSERT(minlen >= sizeof (struct ether_vlan_header)); 1744 /* 1745 * We're willing to copy some data to avoid fragmentation, but 1746 * not a lot. 1747 */ 1748 if (minlen > 256) 1749 minlen = sizeof (struct ether_vlan_header); 1750 mpcopy = allocb(minlen, BPRI_MED); 1751 if (mpcopy == NULL) { 1752 freemsg(mp); 1753 return (NULL); 1754 } 1755 if (mlen <= minlen) { 1756 /* We toss the first mblk when we can. */ 1757 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen); 1758 mpcopy->b_wptr += mlen; 1759 mpcopy->b_cont = mp->b_cont; 1760 freeb(mp); 1761 } else { 1762 /* If not, then just copy what we need */ 1763 if (!source_has_tag) 1764 minlen = sizeof (struct ether_header); 1765 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen); 1766 mpcopy->b_wptr += minlen; 1767 mpcopy->b_cont = mp; 1768 mp->b_rptr += minlen; 1769 } 1770 mp = mpcopy; 1771 } 1772 1773 /* LINTED: pointer alignment */ 1774 evh = (struct ether_vlan_header *)mp->b_rptr; 1775 if (source_has_tag) { 1776 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */ 1777 evh->ether_tpid = evh->ether_type; 1778 mlen = MBLKL(mp); 1779 if (mlen > sizeof (struct ether_vlan_header)) 1780 ovbcopy(mp->b_rptr + 1781 sizeof (struct ether_vlan_header), 1782 mp->b_rptr + sizeof (struct ether_header), 1783 mlen - sizeof (struct ether_vlan_header)); 1784 mp->b_wptr -= VLAN_INCR; 1785 } else { /* 2 */ 1786 if (vlanid == pvid) 1787 vlanid = VLAN_ID_NONE; 1788 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1789 evh->ether_tci = htons(tci); 1790 } 1791 } else { 1792 /* case 4: no header present, but one is needed */ 1793 mlen = MBLKL(mp); 1794 if (mlen > sizeof (struct ether_header)) 1795 ovbcopy(mp->b_rptr + sizeof (struct ether_header), 1796 mp->b_rptr + sizeof (struct ether_vlan_header), 1797 mlen - sizeof (struct ether_header)); 1798 mp->b_wptr += VLAN_INCR; 1799 ASSERT(mp->b_wptr <= DB_LIM(mp)); 1800 if (vlanid == pvid) 1801 vlanid = VLAN_ID_NONE; 1802 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1803 evh->ether_type = evh->ether_tpid; 1804 evh->ether_tpid = htons(ETHERTYPE_VLAN); 1805 evh->ether_tci = htons(tci); 1806 } 1807 return (mp); 1808 } 1809 1810 /* Record VLAN information and strip header if requested . */ 1811 static void 1812 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr) 1813 { 1814 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 1815 struct ether_vlan_header *evhp; 1816 uint16_t ether_type; 1817 1818 /* LINTED: alignment */ 1819 evhp = (struct ether_vlan_header *)mp->b_rptr; 1820 hdr_info->mhi_istagged = B_TRUE; 1821 hdr_info->mhi_tci = ntohs(evhp->ether_tci); 1822 if (striphdr) { 1823 /* 1824 * For VLAN tagged frames update the ether_type 1825 * in hdr_info before stripping the header. 1826 */ 1827 ether_type = ntohs(evhp->ether_type); 1828 hdr_info->mhi_origsap = ether_type; 1829 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ? 1830 ether_type : DLS_SAP_LLC; 1831 mp->b_rptr = (uchar_t *)(evhp + 1); 1832 } 1833 } else { 1834 hdr_info->mhi_istagged = B_FALSE; 1835 hdr_info->mhi_tci = VLAN_ID_NONE; 1836 if (striphdr) 1837 mp->b_rptr += sizeof (struct ether_header); 1838 } 1839 } 1840 1841 /* 1842 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID. 1843 */ 1844 static boolean_t 1845 bridge_can_send(bridge_link_t *blp, uint16_t vlanid) 1846 { 1847 ASSERT(vlanid != VLAN_ID_NONE); 1848 if (blp->bl_flags & BLF_DELETED) 1849 return (B_FALSE); 1850 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING) 1851 return (B_FALSE); 1852 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid)); 1853 } 1854 1855 /* 1856 * This function scans the bridge forwarding tables in order to forward a given 1857 * packet. If the packet either doesn't need forwarding (the current link is 1858 * correct) or the current link needs a copy as well, then the packet is 1859 * returned to the caller. 1860 * 1861 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a 1862 * TRILL tunnel. If the destination points there, then drop instead. 1863 */ 1864 static mblk_t * 1865 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 1866 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit) 1867 { 1868 mblk_t *mpsend, *mpcopy; 1869 bridge_inst_t *bip = blp->bl_inst; 1870 bridge_link_t *blpsend, *blpnext; 1871 bridge_fwd_t *bfp; 1872 uint_t i; 1873 boolean_t selfseen = B_FALSE; 1874 void *tdp; 1875 const uint8_t *daddr = hdr_info->mhi_daddr; 1876 1877 /* 1878 * Check for the IEEE "reserved" multicast addresses. Messages sent to 1879 * these addresses are used for link-local control (STP and pause), and 1880 * are never forwarded or redirected. 1881 */ 1882 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 && 1883 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) { 1884 if (from_trill) { 1885 freemsg(mp); 1886 mp = NULL; 1887 } 1888 return (mp); 1889 } 1890 1891 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) { 1892 1893 /* 1894 * If trill indicates a destination for this node, then it's 1895 * clearly not intended for local delivery. We must tell TRILL 1896 * to encapsulate, as long as we didn't just decapsulate it. 1897 */ 1898 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) { 1899 /* 1900 * Error case: can't reencapsulate if the protocols are 1901 * working correctly. 1902 */ 1903 if (from_trill) { 1904 freemsg(mp); 1905 return (NULL); 1906 } 1907 mutex_enter(&blp->bl_trilllock); 1908 if ((tdp = blp->bl_trilldata) != NULL) { 1909 blp->bl_trillthreads++; 1910 mutex_exit(&blp->bl_trilllock); 1911 update_header(mp, hdr_info, B_FALSE); 1912 if (is_xmit) 1913 mp = mac_fix_cksum(mp); 1914 /* all trill data frames have Inner.VLAN */ 1915 mp = reform_vlan_header(mp, vlanid, tci, 0); 1916 if (mp == NULL) { 1917 KIINCR(bki_drops); 1918 fwd_unref(bfp); 1919 return (NULL); 1920 } 1921 trill_encap_fn(tdp, blp, hdr_info, mp, 1922 bfp->bf_trill_nick); 1923 mutex_enter(&blp->bl_trilllock); 1924 if (--blp->bl_trillthreads == 0 && 1925 blp->bl_trilldata == NULL) 1926 cv_broadcast(&blp->bl_trillwait); 1927 } 1928 mutex_exit(&blp->bl_trilllock); 1929 1930 /* if TRILL has been disabled, then kill this stray */ 1931 if (tdp == NULL) { 1932 freemsg(mp); 1933 fwd_delete(bfp); 1934 } 1935 fwd_unref(bfp); 1936 return (NULL); 1937 } 1938 1939 /* find first link we can send on */ 1940 for (i = 0; i < bfp->bf_nlinks; i++) { 1941 blpsend = bfp->bf_links[i]; 1942 if (blpsend == blp) 1943 selfseen = B_TRUE; 1944 else if (bridge_can_send(blpsend, vlanid)) 1945 break; 1946 } 1947 1948 while (i < bfp->bf_nlinks) { 1949 blpsend = bfp->bf_links[i]; 1950 for (i++; i < bfp->bf_nlinks; i++) { 1951 blpnext = bfp->bf_links[i]; 1952 if (blpnext == blp) 1953 selfseen = B_TRUE; 1954 else if (bridge_can_send(blpnext, vlanid)) 1955 break; 1956 } 1957 if (i == bfp->bf_nlinks && !selfseen) { 1958 mpsend = mp; 1959 mp = NULL; 1960 } else { 1961 mpsend = copymsg(mp); 1962 } 1963 1964 if (!from_trill && is_xmit) 1965 mpsend = mac_fix_cksum(mpsend); 1966 1967 mpsend = reform_vlan_header(mpsend, vlanid, tci, 1968 blpsend->bl_pvid); 1969 if (mpsend == NULL) { 1970 KIINCR(bki_drops); 1971 continue; 1972 } 1973 1974 KIINCR(bki_forwards); 1975 /* 1976 * No need to bump up the link reference count, as 1977 * the forwarding entry itself holds a reference to 1978 * the link. 1979 */ 1980 if (bfp->bf_flags & BFF_LOCALADDR) { 1981 mac_rx_common(blpsend->bl_mh, NULL, mpsend); 1982 } else { 1983 KLPINCR(blpsend, bkl_xmit); 1984 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, 1985 mpsend); 1986 freemsg(mpsend); 1987 } 1988 } 1989 /* 1990 * Handle a special case: if we're transmitting to the original 1991 * link, then check whether the localaddr flag is set. If it 1992 * is, then receive instead. This doesn't happen with ordinary 1993 * bridging, but does happen often with TRILL decapsulation. 1994 */ 1995 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) { 1996 mac_rx_common(blp->bl_mh, NULL, mp); 1997 mp = NULL; 1998 } 1999 fwd_unref(bfp); 2000 } else { 2001 /* 2002 * TRILL has two cases to handle. If the packet is off the 2003 * wire (not from TRILL), then we need to send up into the 2004 * TRILL module to have the distribution tree computed. If the 2005 * packet is from TRILL (decapsulated), then we're part of the 2006 * distribution tree, and we need to copy the packet on member 2007 * interfaces. 2008 * 2009 * Thus, the from TRILL case is identical to the STP case. 2010 */ 2011 if (!from_trill && blp->bl_trilldata != NULL) { 2012 mutex_enter(&blp->bl_trilllock); 2013 if ((tdp = blp->bl_trilldata) != NULL) { 2014 blp->bl_trillthreads++; 2015 mutex_exit(&blp->bl_trilllock); 2016 if ((mpsend = copymsg(mp)) != NULL) { 2017 update_header(mpsend, 2018 hdr_info, B_FALSE); 2019 /* 2020 * all trill data frames have 2021 * Inner.VLAN 2022 */ 2023 mpsend = reform_vlan_header(mpsend, 2024 vlanid, tci, 0); 2025 if (mpsend == NULL) { 2026 KIINCR(bki_drops); 2027 } else { 2028 trill_encap_fn(tdp, blp, 2029 hdr_info, mpsend, 2030 RBRIDGE_NICKNAME_NONE); 2031 } 2032 } 2033 mutex_enter(&blp->bl_trilllock); 2034 if (--blp->bl_trillthreads == 0 && 2035 blp->bl_trilldata == NULL) 2036 cv_broadcast(&blp->bl_trillwait); 2037 } 2038 mutex_exit(&blp->bl_trilllock); 2039 } 2040 2041 /* 2042 * This is an unknown destination, so flood. 2043 */ 2044 rw_enter(&bip->bi_rwlock, RW_READER); 2045 for (blpnext = list_head(&bip->bi_links); blpnext != NULL; 2046 blpnext = list_next(&bip->bi_links, blpnext)) { 2047 if (blpnext == blp) 2048 selfseen = B_TRUE; 2049 else if (bridge_can_send(blpnext, vlanid)) 2050 break; 2051 } 2052 if (blpnext != NULL) 2053 atomic_inc_uint(&blpnext->bl_refs); 2054 rw_exit(&bip->bi_rwlock); 2055 while ((blpsend = blpnext) != NULL) { 2056 rw_enter(&bip->bi_rwlock, RW_READER); 2057 for (blpnext = list_next(&bip->bi_links, blpsend); 2058 blpnext != NULL; 2059 blpnext = list_next(&bip->bi_links, blpnext)) { 2060 if (blpnext == blp) 2061 selfseen = B_TRUE; 2062 else if (bridge_can_send(blpnext, vlanid)) 2063 break; 2064 } 2065 if (blpnext != NULL) 2066 atomic_inc_uint(&blpnext->bl_refs); 2067 rw_exit(&bip->bi_rwlock); 2068 if (blpnext == NULL && !selfseen) { 2069 mpsend = mp; 2070 mp = NULL; 2071 } else { 2072 mpsend = copymsg(mp); 2073 } 2074 2075 if (!from_trill && is_xmit) 2076 mpsend = mac_fix_cksum(mpsend); 2077 2078 mpsend = reform_vlan_header(mpsend, vlanid, tci, 2079 blpsend->bl_pvid); 2080 if (mpsend == NULL) { 2081 KIINCR(bki_drops); 2082 continue; 2083 } 2084 2085 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST) 2086 KIINCR(bki_unknown); 2087 else 2088 KIINCR(bki_mbcast); 2089 KLPINCR(blpsend, bkl_xmit); 2090 if ((mpcopy = copymsg(mpsend)) != NULL) 2091 mac_rx_common(blpsend->bl_mh, NULL, mpcopy); 2092 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend); 2093 freemsg(mpsend); 2094 link_unref(blpsend); 2095 } 2096 } 2097 2098 /* 2099 * At this point, if np is non-NULL, it means that the caller needs to 2100 * continue on the selected link. 2101 */ 2102 return (mp); 2103 } 2104 2105 /* 2106 * Extract and validate the VLAN information for a given packet. This checks 2107 * conformance with the rules for use of the PVID on the link, and for the 2108 * allowed (configured) VLAN set. 2109 * 2110 * Returns B_TRUE if the packet passes, B_FALSE if it fails. 2111 */ 2112 static boolean_t 2113 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 2114 uint16_t *vlanidp, uint16_t *tcip) 2115 { 2116 uint16_t tci, vlanid; 2117 2118 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 2119 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci); 2120 ptrdiff_t mlen; 2121 2122 /* 2123 * Extract the VLAN ID information, regardless of alignment, 2124 * and without a pullup. This isn't attractive, but we do this 2125 * to avoid having to deal with the pointers stashed in 2126 * hdr_info moving around or having the caller deal with a new 2127 * mblk_t pointer. 2128 */ 2129 while (mp != NULL) { 2130 mlen = MBLKL(mp); 2131 if (mlen > tpos && mlen > 0) 2132 break; 2133 tpos -= mlen; 2134 mp = mp->b_cont; 2135 } 2136 if (mp == NULL) 2137 return (B_FALSE); 2138 tci = mp->b_rptr[tpos] << 8; 2139 if (++tpos >= mlen) { 2140 do { 2141 mp = mp->b_cont; 2142 } while (mp != NULL && MBLKL(mp) == 0); 2143 if (mp == NULL) 2144 return (B_FALSE); 2145 tpos = 0; 2146 } 2147 tci |= mp->b_rptr[tpos]; 2148 2149 vlanid = VLAN_ID(tci); 2150 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX) 2151 return (B_FALSE); 2152 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid) 2153 goto input_no_vlan; 2154 if (!BRIDGE_VLAN_ISSET(blp, vlanid)) 2155 return (B_FALSE); 2156 } else { 2157 tci = 0xFFFF; 2158 input_no_vlan: 2159 /* 2160 * If PVID is set to zero, then untagged traffic is not 2161 * supported here. Do not learn or forward. 2162 */ 2163 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE) 2164 return (B_FALSE); 2165 } 2166 2167 *tcip = tci; 2168 *vlanidp = vlanid; 2169 return (B_TRUE); 2170 } 2171 2172 /* 2173 * Handle MAC notifications. 2174 */ 2175 static void 2176 bridge_notify_cb(void *arg, mac_notify_type_t note_type) 2177 { 2178 bridge_link_t *blp = arg; 2179 2180 switch (note_type) { 2181 case MAC_NOTE_UNICST: 2182 bridge_new_unicst(blp); 2183 break; 2184 2185 case MAC_NOTE_SDU_SIZE: { 2186 uint_t maxsdu; 2187 bridge_inst_t *bip = blp->bl_inst; 2188 bridge_mac_t *bmp = bip->bi_mac; 2189 boolean_t notify = B_FALSE; 2190 mblk_t *mlist = NULL; 2191 2192 mac_sdu_get(blp->bl_mh, NULL, &maxsdu); 2193 rw_enter(&bip->bi_rwlock, RW_READER); 2194 if (list_prev(&bip->bi_links, blp) == NULL && 2195 list_next(&bip->bi_links, blp) == NULL) { 2196 notify = (maxsdu != bmp->bm_maxsdu); 2197 bmp->bm_maxsdu = maxsdu; 2198 } 2199 blp->bl_maxsdu = maxsdu; 2200 if (maxsdu != bmp->bm_maxsdu) 2201 link_sdu_fail(blp, B_TRUE, &mlist); 2202 else if (notify) 2203 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2204 rw_exit(&bip->bi_rwlock); 2205 send_up_messages(bip, mlist); 2206 break; 2207 } 2208 } 2209 } 2210 2211 /* 2212 * This is called by the MAC layer. As with the transmit side, we're right in 2213 * the data path for all I/O on this port, so if we don't need to forward this 2214 * packet anywhere, we have to send it upwards via mac_rx_common. 2215 */ 2216 static void 2217 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext) 2218 { 2219 mblk_t *mp, *mpcopy; 2220 bridge_link_t *blp = (bridge_link_t *)mh; 2221 bridge_inst_t *bip = blp->bl_inst; 2222 bridge_mac_t *bmp = bip->bi_mac; 2223 mac_header_info_t hdr_info; 2224 uint16_t vlanid, tci; 2225 boolean_t trillmode = B_FALSE; 2226 2227 KIINCR(bki_recv); 2228 KLINCR(bkl_recv); 2229 2230 /* 2231 * Regardless of state, check for inbound TRILL packets when TRILL is 2232 * active. These are pulled out of band and sent for TRILL handling. 2233 */ 2234 if (blp->bl_trilldata != NULL) { 2235 void *tdp; 2236 mblk_t *newhead; 2237 mblk_t *tail = NULL; 2238 2239 mutex_enter(&blp->bl_trilllock); 2240 if ((tdp = blp->bl_trilldata) != NULL) { 2241 blp->bl_trillthreads++; 2242 mutex_exit(&blp->bl_trilllock); 2243 trillmode = B_TRUE; 2244 newhead = mpnext; 2245 while ((mp = mpnext) != NULL) { 2246 boolean_t raw_isis, bridge_group; 2247 2248 mpnext = mp->b_next; 2249 2250 /* 2251 * If the header isn't readable, then leave on 2252 * the list and continue. 2253 */ 2254 if (mac_header_info(blp->bl_mh, mp, 2255 &hdr_info) != 0) { 2256 tail = mp; 2257 continue; 2258 } 2259 2260 /* 2261 * The TRILL document specifies that, on 2262 * Ethernet alone, IS-IS packets arrive with 2263 * LLC rather than Ethertype, and using a 2264 * specific destination address. We must check 2265 * for that here. Also, we need to give BPDUs 2266 * to TRILL for processing. 2267 */ 2268 raw_isis = bridge_group = B_FALSE; 2269 if (hdr_info.mhi_dsttype == 2270 MAC_ADDRTYPE_MULTICAST) { 2271 if (memcmp(hdr_info.mhi_daddr, 2272 all_isis_rbridges, ETHERADDRL) == 0) 2273 raw_isis = B_TRUE; 2274 else if (memcmp(hdr_info.mhi_daddr, 2275 bridge_group_address, ETHERADDRL) == 2276 0) 2277 bridge_group = B_TRUE; 2278 } 2279 if (!raw_isis && !bridge_group && 2280 hdr_info.mhi_bindsap != ETHERTYPE_TRILL && 2281 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN || 2282 /* LINTED: alignment */ 2283 ((struct ether_vlan_header *)mp->b_rptr)-> 2284 ether_type != htons(ETHERTYPE_TRILL))) { 2285 tail = mp; 2286 continue; 2287 } 2288 2289 /* 2290 * We've got TRILL input. Remove from the list 2291 * and send up through the TRILL module. (Send 2292 * a copy through promiscuous receive just to 2293 * support snooping on TRILL. Order isn't 2294 * preserved strictly, but that doesn't matter 2295 * here.) 2296 */ 2297 if (tail != NULL) 2298 tail->b_next = mpnext; 2299 mp->b_next = NULL; 2300 if (mp == newhead) 2301 newhead = mpnext; 2302 mac_trill_snoop(blp->bl_mh, mp); 2303 update_header(mp, &hdr_info, B_TRUE); 2304 /* 2305 * On raw IS-IS and BPDU frames, we have to 2306 * make sure that the length is trimmed 2307 * properly. We use origsap in order to cope 2308 * with jumbograms for IS-IS. (Regular mac 2309 * can't.) 2310 */ 2311 if (raw_isis || bridge_group) { 2312 size_t msglen = msgdsize(mp); 2313 2314 if (msglen > hdr_info.mhi_origsap) { 2315 (void) adjmsg(mp, 2316 hdr_info.mhi_origsap - 2317 msglen); 2318 } else if (msglen < 2319 hdr_info.mhi_origsap) { 2320 freemsg(mp); 2321 continue; 2322 } 2323 } 2324 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info); 2325 } 2326 mpnext = newhead; 2327 mutex_enter(&blp->bl_trilllock); 2328 if (--blp->bl_trillthreads == 0 && 2329 blp->bl_trilldata == NULL) 2330 cv_broadcast(&blp->bl_trillwait); 2331 } 2332 mutex_exit(&blp->bl_trilllock); 2333 if (mpnext == NULL) 2334 return; 2335 } 2336 2337 /* 2338 * If this is a TRILL RBridge, then just check whether this link is 2339 * used at all for forwarding. If not, then we're done. 2340 */ 2341 if (trillmode) { 2342 if (!(blp->bl_flags & BLF_TRILLACTIVE) || 2343 (blp->bl_flags & BLF_SDUFAIL)) { 2344 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2345 return; 2346 } 2347 } else { 2348 /* 2349 * For regular (STP) bridges, if we're in blocking or listening 2350 * state, then do nothing. We don't learn or forward until 2351 * told to do so. 2352 */ 2353 if (blp->bl_state == BLS_BLOCKLISTEN) { 2354 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2355 return; 2356 } 2357 } 2358 2359 /* 2360 * Send a copy of the message chain up to the observability node users. 2361 * For TRILL, we must obey the VLAN AF rules, so we go packet-by- 2362 * packet. 2363 */ 2364 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2365 (bmp->bm_flags & BMF_STARTED) && 2366 (mp = copymsgchain(mpnext)) != NULL) { 2367 mac_rx(bmp->bm_mh, NULL, mp); 2368 } 2369 2370 /* 2371 * We must be in learning or forwarding state, or using TRILL on a link 2372 * with one or more VLANs active. For each packet in the list, process 2373 * the source address, and then attempt to forward. 2374 */ 2375 while ((mp = mpnext) != NULL) { 2376 mpnext = mp->b_next; 2377 mp->b_next = NULL; 2378 2379 /* 2380 * If we can't decode the header or if the header specifies a 2381 * multicast source address (impossible!), then don't bother 2382 * learning or forwarding, but go ahead and forward up the 2383 * stack for subsequent processing. 2384 */ 2385 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 || 2386 (hdr_info.mhi_saddr[0] & 1) != 0) { 2387 KIINCR(bki_drops); 2388 KLINCR(bkl_drops); 2389 mac_rx_common(blp->bl_mh, rsrc, mp); 2390 continue; 2391 } 2392 2393 /* 2394 * Extract and validate the VLAN ID for this packet. 2395 */ 2396 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2397 !BRIDGE_AF_ISSET(blp, vlanid)) { 2398 mac_rx_common(blp->bl_mh, rsrc, mp); 2399 continue; 2400 } 2401 2402 if (trillmode) { 2403 /* 2404 * Special test required by TRILL document: must 2405 * discard frames with outer address set to ESADI. 2406 */ 2407 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges, 2408 ETHERADDRL) == 0) { 2409 mac_rx_common(blp->bl_mh, rsrc, mp); 2410 continue; 2411 } 2412 2413 /* 2414 * If we're in TRILL mode, then the call above to get 2415 * the VLAN ID has also checked that we're the 2416 * appointed forwarder, so report that we're handling 2417 * this packet to any observability node users. 2418 */ 2419 if ((bmp->bm_flags & BMF_STARTED) && 2420 (mpcopy = copymsg(mp)) != NULL) 2421 mac_rx(bmp->bm_mh, NULL, mpcopy); 2422 } 2423 2424 /* 2425 * First process the source address and learn from it. For 2426 * TRILL, we learn only if we're the appointed forwarder. 2427 */ 2428 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2429 vlanid); 2430 2431 /* 2432 * Now check whether we're forwarding and look up the 2433 * destination. If we can forward, do so. 2434 */ 2435 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2436 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2437 B_FALSE, B_FALSE); 2438 } 2439 if (mp != NULL) 2440 mac_rx_common(blp->bl_mh, rsrc, mp); 2441 } 2442 } 2443 2444 2445 /* ARGSUSED */ 2446 static mblk_t * 2447 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) 2448 { 2449 bridge_link_t *blp = (bridge_link_t *)mh; 2450 bridge_inst_t *bip = blp->bl_inst; 2451 bridge_mac_t *bmp = bip->bi_mac; 2452 mac_header_info_t hdr_info; 2453 uint16_t vlanid, tci; 2454 mblk_t *mp, *mpcopy; 2455 boolean_t trillmode; 2456 2457 trillmode = blp->bl_trilldata != NULL; 2458 2459 /* 2460 * If we're using STP and we're in blocking or listening state, or if 2461 * we're using TRILL and no VLANs are active, then behave as though the 2462 * bridge isn't here at all, and send on the local link alone. 2463 */ 2464 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) || 2465 (trillmode && 2466 (!(blp->bl_flags & BLF_TRILLACTIVE) || 2467 (blp->bl_flags & BLF_SDUFAIL)))) { 2468 KIINCR(bki_sent); 2469 KLINCR(bkl_xmit); 2470 MAC_RING_TX(blp->bl_mh, rh, mpnext, mp); 2471 return (mp); 2472 } 2473 2474 /* 2475 * Send a copy of the message up to the observability node users. 2476 * TRILL needs to check on a packet-by-packet basis. 2477 */ 2478 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2479 (bmp->bm_flags & BMF_STARTED) && 2480 (mp = copymsgchain(mpnext)) != NULL) { 2481 mac_rx(bmp->bm_mh, NULL, mp); 2482 } 2483 2484 while ((mp = mpnext) != NULL) { 2485 mpnext = mp->b_next; 2486 mp->b_next = NULL; 2487 2488 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2489 freemsg(mp); 2490 continue; 2491 } 2492 2493 /* 2494 * Extract and validate the VLAN ID for this packet. 2495 */ 2496 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2497 !BRIDGE_AF_ISSET(blp, vlanid)) { 2498 freemsg(mp); 2499 continue; 2500 } 2501 2502 /* 2503 * If we're using TRILL, then we've now validated that we're 2504 * the forwarder for this VLAN, so go ahead and let 2505 * observability node users know about the packet. 2506 */ 2507 if (trillmode && (bmp->bm_flags & BMF_STARTED) && 2508 (mpcopy = copymsg(mp)) != NULL) { 2509 mac_rx(bmp->bm_mh, NULL, mpcopy); 2510 } 2511 2512 /* 2513 * We have to learn from our own transmitted packets, because 2514 * there may be a Solaris DLPI raw sender (who can specify his 2515 * own source address) using promiscuous mode for receive. The 2516 * mac layer information won't (and can't) tell us everything 2517 * we need to know. 2518 */ 2519 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2520 vlanid); 2521 2522 /* attempt forwarding */ 2523 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2524 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2525 B_FALSE, B_TRUE); 2526 } 2527 if (mp != NULL) { 2528 MAC_RING_TX(blp->bl_mh, rh, mp, mp); 2529 if (mp == NULL) { 2530 KIINCR(bki_sent); 2531 KLINCR(bkl_xmit); 2532 } 2533 } 2534 /* 2535 * If we get stuck, then stop. Don't let the user's output 2536 * packets get out of order. (More importantly: don't try to 2537 * bridge the same packet multiple times if flow control is 2538 * asserted.) 2539 */ 2540 if (mp != NULL) { 2541 mp->b_next = mpnext; 2542 break; 2543 } 2544 } 2545 return (mp); 2546 } 2547 2548 /* 2549 * This is called by TRILL when it decapsulates an packet, and we must forward 2550 * locally. On failure, we just drop. 2551 * 2552 * Note that the ingress_nick reported by TRILL must not represent this local 2553 * node. 2554 */ 2555 void 2556 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick) 2557 { 2558 mac_header_info_t hdr_info; 2559 uint16_t vlanid, tci; 2560 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2561 mblk_t *mpcopy; 2562 2563 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2564 freemsg(mp); 2565 return; 2566 } 2567 2568 /* Extract VLAN ID for this packet. */ 2569 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) { 2570 struct ether_vlan_header *evhp; 2571 2572 /* LINTED: alignment */ 2573 evhp = (struct ether_vlan_header *)mp->b_rptr; 2574 tci = ntohs(evhp->ether_tci); 2575 vlanid = VLAN_ID(tci); 2576 } else { 2577 /* Inner VLAN headers are required in TRILL data packets */ 2578 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *, 2579 blp, mblk_t *, mp, uint16_t, ingress_nick); 2580 freemsg(mp); 2581 return; 2582 } 2583 2584 /* Learn the location of this sender in the RBridge network */ 2585 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid); 2586 2587 /* attempt forwarding */ 2588 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE); 2589 if (mp != NULL) { 2590 if (bridge_can_send(blp, vlanid)) { 2591 /* Deliver a copy locally as well */ 2592 if ((mpcopy = copymsg(mp)) != NULL) 2593 mac_rx_common(blp->bl_mh, NULL, mpcopy); 2594 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2595 } 2596 if (mp == NULL) { 2597 KIINCR(bki_sent); 2598 KLINCR(bkl_xmit); 2599 } else { 2600 freemsg(mp); 2601 } 2602 } 2603 } 2604 2605 /* 2606 * This function is used by TRILL _only_ to transmit TRILL-encapsulated 2607 * packets. It sends on a single underlying link and does not bridge. 2608 */ 2609 mblk_t * 2610 bridge_trill_output(bridge_link_t *blp, mblk_t *mp) 2611 { 2612 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2613 2614 mac_trill_snoop(blp->bl_mh, mp); 2615 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2616 if (mp == NULL) { 2617 KIINCR(bki_sent); 2618 KLINCR(bkl_xmit); 2619 } 2620 return (mp); 2621 } 2622 2623 /* 2624 * Set the "appointed forwarder" flag array for this link. TRILL controls 2625 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for 2626 * the forwarder. 2627 */ 2628 void 2629 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr) 2630 { 2631 int i; 2632 uint_t newflags = 0; 2633 2634 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) { 2635 if ((blp->bl_afs[i] = arr[i]) != 0) 2636 newflags = BLF_TRILLACTIVE; 2637 } 2638 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags; 2639 } 2640 2641 void 2642 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill) 2643 { 2644 bridge_inst_t *bip = blp->bl_inst; 2645 bridge_fwd_t *bfp, *bfnext; 2646 avl_tree_t fwd_scavenge; 2647 int i; 2648 2649 _NOTE(ARGUNUSED(vlan)); 2650 2651 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 2652 offsetof(bridge_fwd_t, bf_node)); 2653 rw_enter(&bip->bi_rwlock, RW_WRITER); 2654 bfnext = avl_first(&bip->bi_fwd); 2655 while ((bfp = bfnext) != NULL) { 2656 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 2657 if (bfp->bf_flags & BFF_LOCALADDR) 2658 continue; 2659 if (dotrill) { 2660 /* port doesn't matter if we're flushing TRILL */ 2661 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE) 2662 continue; 2663 } else { 2664 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) 2665 continue; 2666 for (i = 0; i < bfp->bf_nlinks; i++) { 2667 if (bfp->bf_links[i] == blp) 2668 break; 2669 } 2670 if (i >= bfp->bf_nlinks) 2671 continue; 2672 } 2673 ASSERT(bfp->bf_flags & BFF_INTREE); 2674 avl_remove(&bip->bi_fwd, bfp); 2675 bfp->bf_flags &= ~BFF_INTREE; 2676 avl_add(&fwd_scavenge, bfp); 2677 } 2678 rw_exit(&bip->bi_rwlock); 2679 bfnext = avl_first(&fwd_scavenge); 2680 while ((bfp = bfnext) != NULL) { 2681 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 2682 avl_remove(&fwd_scavenge, bfp); 2683 fwd_unref(bfp); 2684 } 2685 avl_destroy(&fwd_scavenge); 2686 } 2687 2688 /* 2689 * Let the mac module take or drop a reference to a bridge link. When this is 2690 * called, the mac module is holding the mi_bridge_lock, so the link cannot be 2691 * in the process of entering or leaving a bridge. 2692 */ 2693 static void 2694 bridge_ref_cb(mac_handle_t mh, boolean_t hold) 2695 { 2696 bridge_link_t *blp = (bridge_link_t *)mh; 2697 2698 if (hold) 2699 atomic_inc_uint(&blp->bl_refs); 2700 else 2701 link_unref(blp); 2702 } 2703 2704 /* 2705 * Handle link state changes reported by the mac layer. This acts as a filter 2706 * for link state changes: if a link is reporting down, but there are other 2707 * links still up on the bridge, then the state is changed to "up." When the 2708 * last link goes down, all are marked down, and when the first link goes up, 2709 * all are marked up. (Recursion is avoided by the use of the "redo" function.) 2710 * 2711 * We treat unknown as equivalent to "up." 2712 */ 2713 static link_state_t 2714 bridge_ls_cb(mac_handle_t mh, link_state_t newls) 2715 { 2716 bridge_link_t *blp = (bridge_link_t *)mh; 2717 bridge_link_t *blcmp; 2718 bridge_inst_t *bip; 2719 bridge_mac_t *bmp; 2720 2721 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN || 2722 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) { 2723 blp->bl_linkstate = newls; 2724 return (newls); 2725 } 2726 2727 /* 2728 * Scan first to see if there are any other non-down links. If there 2729 * are, then we're done. Otherwise, if all others are down, then the 2730 * state of this link is the state of the bridge. 2731 */ 2732 bip = blp->bl_inst; 2733 rw_enter(&bip->bi_rwlock, RW_WRITER); 2734 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2735 blcmp = list_next(&bip->bi_links, blcmp)) { 2736 if (blcmp != blp && 2737 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 2738 blcmp->bl_linkstate != LINK_STATE_DOWN) 2739 break; 2740 } 2741 2742 if (blcmp != NULL) { 2743 /* 2744 * If there are other links that are considered up, then tell 2745 * the caller that the link is actually still up, regardless of 2746 * this link's underlying state. 2747 */ 2748 blp->bl_linkstate = newls; 2749 newls = LINK_STATE_UP; 2750 } else if (blp->bl_linkstate != newls) { 2751 /* 2752 * If we've found no other 'up' links, and this link has 2753 * changed state, then report the new state of the bridge to 2754 * all other clients. 2755 */ 2756 blp->bl_linkstate = newls; 2757 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2758 blcmp = list_next(&bip->bi_links, blcmp)) { 2759 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED)) 2760 mac_link_redo(blcmp->bl_mh, newls); 2761 } 2762 bmp = bip->bi_mac; 2763 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN) 2764 bmp->bm_linkstate = LINK_STATE_UP; 2765 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 2766 } 2767 rw_exit(&bip->bi_rwlock); 2768 return (newls); 2769 } 2770 2771 static void 2772 bridge_add_link(void *arg) 2773 { 2774 mblk_t *mp = arg; 2775 bridge_stream_t *bsp; 2776 bridge_inst_t *bip, *bipt; 2777 bridge_mac_t *bmp; 2778 datalink_id_t linkid; 2779 int err; 2780 mac_handle_t mh; 2781 uint_t maxsdu; 2782 bridge_link_t *blp = NULL, *blpt; 2783 const mac_info_t *mip; 2784 boolean_t macopen = B_FALSE; 2785 char linkname[MAXLINKNAMELEN]; 2786 char kstatname[KSTAT_STRLEN]; 2787 int i; 2788 link_state_t linkstate; 2789 mblk_t *mlist; 2790 2791 bsp = (bridge_stream_t *)mp->b_next; 2792 mp->b_next = NULL; 2793 bip = bsp->bs_inst; 2794 /* LINTED: alignment */ 2795 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2796 2797 /* 2798 * First make sure that there is no other bridge that has this link. 2799 * We don't want to overlap operations from two bridges; the MAC layer 2800 * supports only one bridge on a given MAC at a time. 2801 * 2802 * We rely on the fact that there's just one taskq thread for the 2803 * bridging module: once we've checked for a duplicate, we can drop the 2804 * lock, because no other thread could possibly be adding another link 2805 * until we're done. 2806 */ 2807 mutex_enter(&inst_lock); 2808 for (bipt = list_head(&inst_list); bipt != NULL; 2809 bipt = list_next(&inst_list, bipt)) { 2810 rw_enter(&bipt->bi_rwlock, RW_READER); 2811 for (blpt = list_head(&bipt->bi_links); blpt != NULL; 2812 blpt = list_next(&bipt->bi_links, blpt)) { 2813 if (linkid == blpt->bl_linkid) 2814 break; 2815 } 2816 rw_exit(&bipt->bi_rwlock); 2817 if (blpt != NULL) 2818 break; 2819 } 2820 mutex_exit(&inst_lock); 2821 if (bipt != NULL) { 2822 err = EBUSY; 2823 goto fail; 2824 } 2825 2826 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 2827 goto fail; 2828 macopen = B_TRUE; 2829 2830 /* we bridge only Ethernet */ 2831 mip = mac_info(mh); 2832 if (mip->mi_media != DL_ETHER) { 2833 err = ENOTSUP; 2834 goto fail; 2835 } 2836 2837 /* 2838 * Get the current maximum SDU on this interface. If there are other 2839 * links on the bridge, then this one must match, or it errors out. 2840 * Otherwise, the first link becomes the standard for the new bridge. 2841 */ 2842 mac_sdu_get(mh, NULL, &maxsdu); 2843 bmp = bip->bi_mac; 2844 if (list_is_empty(&bip->bi_links)) { 2845 bmp->bm_maxsdu = maxsdu; 2846 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2847 } 2848 2849 /* figure the kstat name; also used as the mac client name */ 2850 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t); 2851 if (i < 0 || i >= MAXLINKNAMELEN) 2852 i = MAXLINKNAMELEN - 1; 2853 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i); 2854 linkname[i] = '\0'; 2855 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name, 2856 linkname); 2857 2858 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) { 2859 err = ENOMEM; 2860 goto fail; 2861 } 2862 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 2863 if (blp->bl_lfailmp == NULL) { 2864 kmem_free(blp, sizeof (*blp)); 2865 err = ENOMEM; 2866 goto fail; 2867 } 2868 2869 atomic_inc_uint(&bip->bi_refs); 2870 blp->bl_inst = bip; 2871 blp->bl_mh = mh; 2872 blp->bl_linkid = linkid; 2873 blp->bl_maxsdu = maxsdu; 2874 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL); 2875 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL); 2876 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 2877 2878 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0); 2879 if (err != 0) 2880 goto fail; 2881 blp->bl_flags |= BLF_CLIENT_OPEN; 2882 2883 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE); 2884 if (err != 0) 2885 goto fail; 2886 blp->bl_flags |= BLF_MARGIN_ADDED; 2887 2888 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp); 2889 2890 err = mac_bridge_set(mh, (mac_handle_t)blp); 2891 if (err != 0) 2892 goto fail; 2893 blp->bl_flags |= BLF_SET_BRIDGE; 2894 2895 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL, 2896 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); 2897 if (err != 0) 2898 goto fail; 2899 blp->bl_flags |= BLF_PROM_ADDED; 2900 2901 bridge_new_unicst(blp); 2902 2903 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats, 2904 link_kstats_list, Dim(link_kstats_list), kstatname); 2905 2906 /* 2907 * The link holds a reference to the bridge instance, so that the 2908 * instance can't go away before the link is freed. The insertion into 2909 * bi_links holds a reference on the link. When marking as removed 2910 * from bi_links (BLF_DELETED), drop the reference on the link. When 2911 * freeing the link, drop the reference on the instance. 2912 */ 2913 rw_enter(&bip->bi_rwlock, RW_WRITER); 2914 list_insert_tail(&bip->bi_links, blp); 2915 atomic_inc_uint(&blp->bl_refs); 2916 2917 /* 2918 * If the new link is no good on this bridge, then let the daemon know 2919 * about the problem. 2920 */ 2921 mlist = NULL; 2922 if (maxsdu != bmp->bm_maxsdu) 2923 link_sdu_fail(blp, B_TRUE, &mlist); 2924 rw_exit(&bip->bi_rwlock); 2925 send_up_messages(bip, mlist); 2926 2927 /* 2928 * Trigger a link state update so that if this link is the first one 2929 * "up" in the bridge, then we notify everyone. This triggers a trip 2930 * through bridge_ls_cb. 2931 */ 2932 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE); 2933 blp->bl_linkstate = LINK_STATE_DOWN; 2934 mac_link_update(mh, linkstate); 2935 2936 /* 2937 * We now need to report back to the stream that invoked us, and then 2938 * drop the reference on the stream that we're holding. 2939 */ 2940 miocack(bsp->bs_wq, mp, 0, 0); 2941 stream_unref(bsp); 2942 return; 2943 2944 fail: 2945 if (blp == NULL) { 2946 if (macopen) 2947 mac_close(mh); 2948 } else { 2949 link_shutdown(blp); 2950 link_free(blp); 2951 } 2952 miocnak(bsp->bs_wq, mp, 0, err); 2953 stream_unref(bsp); 2954 } 2955 2956 static void 2957 bridge_rem_link(void *arg) 2958 { 2959 mblk_t *mp = arg; 2960 bridge_stream_t *bsp; 2961 bridge_inst_t *bip; 2962 bridge_mac_t *bmp; 2963 datalink_id_t linkid; 2964 bridge_link_t *blp, *blsave; 2965 boolean_t found; 2966 mblk_t *mlist; 2967 2968 bsp = (bridge_stream_t *)mp->b_next; 2969 mp->b_next = NULL; 2970 bip = bsp->bs_inst; 2971 /* LINTED: alignment */ 2972 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2973 2974 /* 2975 * We become reader here so that we can loop over the other links and 2976 * deliver link up/down notification. 2977 */ 2978 rw_enter(&bip->bi_rwlock, RW_READER); 2979 found = B_FALSE; 2980 for (blp = list_head(&bip->bi_links); blp != NULL; 2981 blp = list_next(&bip->bi_links, blp)) { 2982 if (blp->bl_linkid == linkid && 2983 !(blp->bl_flags & BLF_DELETED)) { 2984 blp->bl_flags |= BLF_DELETED; 2985 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 2986 blp, DDI_SLEEP); 2987 found = B_TRUE; 2988 break; 2989 } 2990 } 2991 2992 /* 2993 * Check if this link is up and the remainder of the links are all 2994 * down. 2995 */ 2996 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) { 2997 for (blp = list_head(&bip->bi_links); blp != NULL; 2998 blp = list_next(&bip->bi_links, blp)) { 2999 if (blp->bl_linkstate != LINK_STATE_DOWN && 3000 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) 3001 break; 3002 } 3003 if (blp == NULL) { 3004 for (blp = list_head(&bip->bi_links); blp != NULL; 3005 blp = list_next(&bip->bi_links, blp)) { 3006 if (!(blp->bl_flags & BLF_DELETED)) 3007 mac_link_redo(blp->bl_mh, 3008 LINK_STATE_DOWN); 3009 } 3010 bmp = bip->bi_mac; 3011 bmp->bm_linkstate = LINK_STATE_DOWN; 3012 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 3013 } 3014 } 3015 3016 /* 3017 * Check if there's just one working link left on the bridge. If so, 3018 * then that link is now authoritative for bridge MTU. 3019 */ 3020 blsave = NULL; 3021 for (blp = list_head(&bip->bi_links); blp != NULL; 3022 blp = list_next(&bip->bi_links, blp)) { 3023 if (!(blp->bl_flags & BLF_DELETED)) { 3024 if (blsave == NULL) 3025 blsave = blp; 3026 else 3027 break; 3028 } 3029 } 3030 mlist = NULL; 3031 bmp = bip->bi_mac; 3032 if (blsave != NULL && blp == NULL && 3033 blsave->bl_maxsdu != bmp->bm_maxsdu) { 3034 bmp->bm_maxsdu = blsave->bl_maxsdu; 3035 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu); 3036 link_sdu_fail(blsave, B_FALSE, &mlist); 3037 } 3038 rw_exit(&bip->bi_rwlock); 3039 send_up_messages(bip, mlist); 3040 3041 if (found) 3042 miocack(bsp->bs_wq, mp, 0, 0); 3043 else 3044 miocnak(bsp->bs_wq, mp, 0, ENOENT); 3045 stream_unref(bsp); 3046 } 3047 3048 /* 3049 * This function intentionally returns with bi_rwlock held; it is intended for 3050 * quick checks and updates. 3051 */ 3052 static bridge_link_t * 3053 enter_link(bridge_inst_t *bip, datalink_id_t linkid) 3054 { 3055 bridge_link_t *blp; 3056 3057 rw_enter(&bip->bi_rwlock, RW_READER); 3058 for (blp = list_head(&bip->bi_links); blp != NULL; 3059 blp = list_next(&bip->bi_links, blp)) { 3060 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED)) 3061 break; 3062 } 3063 return (blp); 3064 } 3065 3066 static void 3067 bridge_ioctl(queue_t *wq, mblk_t *mp) 3068 { 3069 bridge_stream_t *bsp = wq->q_ptr; 3070 bridge_inst_t *bip; 3071 struct iocblk *iop; 3072 int rc = EINVAL; 3073 int len = 0; 3074 bridge_link_t *blp; 3075 cred_t *cr; 3076 3077 /* LINTED: alignment */ 3078 iop = (struct iocblk *)mp->b_rptr; 3079 3080 /* 3081 * For now, all of the bridge ioctls are privileged. 3082 */ 3083 if ((cr = msg_getcred(mp, NULL)) == NULL) 3084 cr = iop->ioc_cr; 3085 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) { 3086 miocnak(wq, mp, 0, EPERM); 3087 return; 3088 } 3089 3090 switch (iop->ioc_cmd) { 3091 case BRIOC_NEWBRIDGE: { 3092 bridge_newbridge_t *bnb; 3093 3094 if (bsp->bs_inst != NULL || 3095 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0) 3096 break; 3097 /* LINTED: alignment */ 3098 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr; 3099 bnb->bnb_name[MAXNAMELEN-1] = '\0'; 3100 rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr); 3101 if (rc != 0) 3102 break; 3103 3104 rw_enter(&bip->bi_rwlock, RW_WRITER); 3105 if (bip->bi_control != NULL) { 3106 rw_exit(&bip->bi_rwlock); 3107 bridge_unref(bip); 3108 rc = EBUSY; 3109 } else { 3110 atomic_inc_uint(&bip->bi_refs); 3111 bsp->bs_inst = bip; /* stream holds reference */ 3112 bip->bi_control = bsp; 3113 rw_exit(&bip->bi_rwlock); 3114 rc = 0; 3115 } 3116 break; 3117 } 3118 3119 case BRIOC_ADDLINK: 3120 if ((bip = bsp->bs_inst) == NULL || 3121 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3122 break; 3123 /* 3124 * We cannot perform the action in this thread, because we're 3125 * not in process context, and we may already be holding 3126 * MAC-related locks. Place the request on taskq. 3127 */ 3128 mp->b_next = (mblk_t *)bsp; 3129 stream_ref(bsp); 3130 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp, 3131 DDI_SLEEP); 3132 return; 3133 3134 case BRIOC_REMLINK: 3135 if ((bip = bsp->bs_inst) == NULL || 3136 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3137 break; 3138 /* 3139 * We cannot perform the action in this thread, because we're 3140 * not in process context, and we may already be holding 3141 * MAC-related locks. Place the request on taskq. 3142 */ 3143 mp->b_next = (mblk_t *)bsp; 3144 stream_ref(bsp); 3145 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp, 3146 DDI_SLEEP); 3147 return; 3148 3149 case BRIOC_SETSTATE: { 3150 bridge_setstate_t *bss; 3151 3152 if ((bip = bsp->bs_inst) == NULL || 3153 (rc = miocpullup(mp, sizeof (*bss))) != 0) 3154 break; 3155 /* LINTED: alignment */ 3156 bss = (bridge_setstate_t *)mp->b_cont->b_rptr; 3157 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) { 3158 rc = ENOENT; 3159 } else { 3160 rc = 0; 3161 blp->bl_state = bss->bss_state; 3162 } 3163 rw_exit(&bip->bi_rwlock); 3164 break; 3165 } 3166 3167 case BRIOC_SETPVID: { 3168 bridge_setpvid_t *bsv; 3169 3170 if ((bip = bsp->bs_inst) == NULL || 3171 (rc = miocpullup(mp, sizeof (*bsv))) != 0) 3172 break; 3173 /* LINTED: alignment */ 3174 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr; 3175 if (bsv->bsv_vlan > VLAN_ID_MAX) 3176 break; 3177 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) { 3178 rc = ENOENT; 3179 } else if (blp->bl_pvid == bsv->bsv_vlan) { 3180 rc = 0; 3181 } else { 3182 rc = 0; 3183 BRIDGE_VLAN_CLR(blp, blp->bl_pvid); 3184 blp->bl_pvid = bsv->bsv_vlan; 3185 if (blp->bl_pvid != 0) 3186 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3187 } 3188 rw_exit(&bip->bi_rwlock); 3189 break; 3190 } 3191 3192 case BRIOC_VLANENAB: { 3193 bridge_vlanenab_t *bve; 3194 3195 if ((bip = bsp->bs_inst) == NULL || 3196 (rc = miocpullup(mp, sizeof (*bve))) != 0) 3197 break; 3198 /* LINTED: alignment */ 3199 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr; 3200 if (bve->bve_vlan > VLAN_ID_MAX) 3201 break; 3202 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) { 3203 rc = ENOENT; 3204 } else { 3205 rc = 0; 3206 /* special case: vlan 0 means "all" */ 3207 if (bve->bve_vlan == 0) { 3208 (void) memset(blp->bl_vlans, 3209 bve->bve_onoff ? ~0 : 0, 3210 sizeof (blp->bl_vlans)); 3211 BRIDGE_VLAN_CLR(blp, 0); 3212 if (blp->bl_pvid != 0) 3213 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3214 } else if (bve->bve_vlan == blp->bl_pvid) { 3215 rc = EINVAL; 3216 } else if (bve->bve_onoff) { 3217 BRIDGE_VLAN_SET(blp, bve->bve_vlan); 3218 } else { 3219 BRIDGE_VLAN_CLR(blp, bve->bve_vlan); 3220 } 3221 } 3222 rw_exit(&bip->bi_rwlock); 3223 break; 3224 } 3225 3226 case BRIOC_FLUSHFWD: { 3227 bridge_flushfwd_t *bff; 3228 bridge_fwd_t *bfp, *bfnext; 3229 avl_tree_t fwd_scavenge; 3230 int i; 3231 3232 if ((bip = bsp->bs_inst) == NULL || 3233 (rc = miocpullup(mp, sizeof (*bff))) != 0) 3234 break; 3235 /* LINTED: alignment */ 3236 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr; 3237 rw_enter(&bip->bi_rwlock, RW_WRITER); 3238 /* This case means "all" */ 3239 if (bff->bff_linkid == DATALINK_INVALID_LINKID) { 3240 blp = NULL; 3241 } else { 3242 for (blp = list_head(&bip->bi_links); blp != NULL; 3243 blp = list_next(&bip->bi_links, blp)) { 3244 if (blp->bl_linkid == bff->bff_linkid && 3245 !(blp->bl_flags & BLF_DELETED)) 3246 break; 3247 } 3248 if (blp == NULL) { 3249 rc = ENOENT; 3250 rw_exit(&bip->bi_rwlock); 3251 break; 3252 } 3253 } 3254 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 3255 offsetof(bridge_fwd_t, bf_node)); 3256 bfnext = avl_first(&bip->bi_fwd); 3257 while ((bfp = bfnext) != NULL) { 3258 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 3259 if (bfp->bf_flags & BFF_LOCALADDR) 3260 continue; 3261 if (blp != NULL) { 3262 for (i = 0; i < bfp->bf_maxlinks; i++) { 3263 if (bfp->bf_links[i] == blp) 3264 break; 3265 } 3266 /* 3267 * If the link is there and we're excluding, 3268 * then skip. If the link is not there and 3269 * we're doing only that link, then skip. 3270 */ 3271 if ((i < bfp->bf_maxlinks) == bff->bff_exclude) 3272 continue; 3273 } 3274 ASSERT(bfp->bf_flags & BFF_INTREE); 3275 avl_remove(&bip->bi_fwd, bfp); 3276 bfp->bf_flags &= ~BFF_INTREE; 3277 avl_add(&fwd_scavenge, bfp); 3278 } 3279 rw_exit(&bip->bi_rwlock); 3280 bfnext = avl_first(&fwd_scavenge); 3281 while ((bfp = bfnext) != NULL) { 3282 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 3283 avl_remove(&fwd_scavenge, bfp); 3284 fwd_unref(bfp); /* drop tree reference */ 3285 } 3286 avl_destroy(&fwd_scavenge); 3287 break; 3288 } 3289 3290 case BRIOC_TABLEMAX: 3291 if ((bip = bsp->bs_inst) == NULL || 3292 (rc = miocpullup(mp, sizeof (uint32_t))) != 0) 3293 break; 3294 /* LINTED: alignment */ 3295 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr; 3296 break; 3297 } 3298 3299 if (rc == 0) 3300 miocack(wq, mp, len, 0); 3301 else 3302 miocnak(wq, mp, 0, rc); 3303 } 3304 3305 static void 3306 bridge_wput(queue_t *wq, mblk_t *mp) 3307 { 3308 switch (DB_TYPE(mp)) { 3309 case M_IOCTL: 3310 bridge_ioctl(wq, mp); 3311 break; 3312 case M_FLUSH: 3313 if (*mp->b_rptr & FLUSHW) 3314 *mp->b_rptr &= ~FLUSHW; 3315 if (*mp->b_rptr & FLUSHR) 3316 qreply(wq, mp); 3317 else 3318 freemsg(mp); 3319 break; 3320 default: 3321 freemsg(mp); 3322 break; 3323 } 3324 } 3325 3326 /* 3327 * This function allocates the main data structures for the bridge driver and 3328 * connects us into devfs. 3329 */ 3330 static void 3331 bridge_inst_init(void) 3332 { 3333 bridge_scan_interval = 5 * drv_usectohz(1000000); 3334 bridge_fwd_age = 25 * drv_usectohz(1000000); 3335 3336 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL); 3337 list_create(&bmac_list, sizeof (bridge_mac_t), 3338 offsetof(bridge_mac_t, bm_node)); 3339 list_create(&inst_list, sizeof (bridge_inst_t), 3340 offsetof(bridge_inst_t, bi_node)); 3341 cv_init(&inst_cv, NULL, CV_DRIVER, NULL); 3342 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL); 3343 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL); 3344 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL); 3345 3346 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb, 3347 bridge_ls_cb); 3348 } 3349 3350 /* 3351 * This function disconnects from devfs and destroys all data structures in 3352 * preparation for unload. It's assumed that there are no active bridge 3353 * references left at this point. 3354 */ 3355 static void 3356 bridge_inst_fini(void) 3357 { 3358 mac_bridge_vectors(NULL, NULL, NULL, NULL); 3359 if (bridge_timerid != 0) 3360 (void) untimeout(bridge_timerid); 3361 rw_destroy(&bmac_rwlock); 3362 list_destroy(&bmac_list); 3363 list_destroy(&inst_list); 3364 cv_destroy(&inst_cv); 3365 mutex_destroy(&inst_lock); 3366 cv_destroy(&stream_ref_cv); 3367 mutex_destroy(&stream_ref_lock); 3368 } 3369 3370 /* 3371 * bridge_attach() 3372 * 3373 * Description: 3374 * Attach bridge driver to the system. 3375 */ 3376 static int 3377 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3378 { 3379 if (cmd != DDI_ATTACH) 3380 return (DDI_FAILURE); 3381 3382 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO, 3383 CLONE_DEV) == DDI_FAILURE) { 3384 return (DDI_FAILURE); 3385 } 3386 3387 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list, 3388 DLDIOCCNT(bridge_ioc_list)) != 0) { 3389 ddi_remove_minor_node(dip, BRIDGE_CTL); 3390 return (DDI_FAILURE); 3391 } 3392 3393 bridge_dev_info = dip; 3394 bridge_major = ddi_driver_major(dip); 3395 bridge_taskq = ddi_taskq_create(dip, "bridge", 1, TASKQ_DEFAULTPRI, 0); 3396 return (DDI_SUCCESS); 3397 } 3398 3399 /* 3400 * bridge_detach() 3401 * 3402 * Description: 3403 * Detach an interface to the system. 3404 */ 3405 static int 3406 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3407 { 3408 if (cmd != DDI_DETACH) 3409 return (DDI_FAILURE); 3410 3411 ddi_remove_minor_node(dip, NULL); 3412 ddi_taskq_destroy(bridge_taskq); 3413 bridge_dev_info = NULL; 3414 return (DDI_SUCCESS); 3415 } 3416 3417 /* 3418 * bridge_info() 3419 * 3420 * Description: 3421 * Translate "dev_t" to a pointer to the associated "dev_info_t". 3422 */ 3423 /* ARGSUSED */ 3424 static int 3425 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 3426 void **result) 3427 { 3428 int rc; 3429 3430 switch (infocmd) { 3431 case DDI_INFO_DEVT2DEVINFO: 3432 if (bridge_dev_info == NULL) { 3433 rc = DDI_FAILURE; 3434 } else { 3435 *result = (void *)bridge_dev_info; 3436 rc = DDI_SUCCESS; 3437 } 3438 break; 3439 case DDI_INFO_DEVT2INSTANCE: 3440 *result = NULL; 3441 rc = DDI_SUCCESS; 3442 break; 3443 default: 3444 rc = DDI_FAILURE; 3445 break; 3446 } 3447 return (rc); 3448 } 3449 3450 static struct module_info bridge_modinfo = { 3451 2105, /* mi_idnum */ 3452 "bridge", /* mi_idname */ 3453 0, /* mi_minpsz */ 3454 16384, /* mi_maxpsz */ 3455 65536, /* mi_hiwat */ 3456 128 /* mi_lowat */ 3457 }; 3458 3459 static struct qinit bridge_rinit = { 3460 NULL, /* qi_putp */ 3461 NULL, /* qi_srvp */ 3462 bridge_open, /* qi_qopen */ 3463 bridge_close, /* qi_qclose */ 3464 NULL, /* qi_qadmin */ 3465 &bridge_modinfo, /* qi_minfo */ 3466 NULL /* qi_mstat */ 3467 }; 3468 3469 static struct qinit bridge_winit = { 3470 (int (*)())bridge_wput, /* qi_putp */ 3471 NULL, /* qi_srvp */ 3472 NULL, /* qi_qopen */ 3473 NULL, /* qi_qclose */ 3474 NULL, /* qi_qadmin */ 3475 &bridge_modinfo, /* qi_minfo */ 3476 NULL /* qi_mstat */ 3477 }; 3478 3479 static struct streamtab bridge_tab = { 3480 &bridge_rinit, /* st_rdinit */ 3481 &bridge_winit /* st_wrinit */ 3482 }; 3483 3484 /* No STREAMS perimeters; we do all our own locking */ 3485 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach, 3486 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab, 3487 ddi_quiesce_not_supported); 3488 3489 static struct modldrv modldrv = { 3490 &mod_driverops, 3491 "bridging driver", 3492 &bridge_ops 3493 }; 3494 3495 static struct modlinkage modlinkage = { 3496 MODREV_1, 3497 (void *)&modldrv, 3498 NULL 3499 }; 3500 3501 int 3502 _init(void) 3503 { 3504 int retv; 3505 3506 bridge_inst_init(); 3507 if ((retv = mod_install(&modlinkage)) != 0) 3508 bridge_inst_fini(); 3509 return (retv); 3510 } 3511 3512 int 3513 _fini(void) 3514 { 3515 int retv; 3516 3517 rw_enter(&bmac_rwlock, RW_READER); 3518 retv = list_is_empty(&bmac_list) ? 0 : EBUSY; 3519 rw_exit(&bmac_rwlock); 3520 if (retv == 0 && 3521 (retv = mod_remove(&modlinkage)) == 0) 3522 bridge_inst_fini(); 3523 return (retv); 3524 } 3525 3526 int 3527 _info(struct modinfo *modinfop) 3528 { 3529 return (mod_info(&modlinkage, modinfop)); 3530 } 3531