1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This module implements a STREAMS driver that provides layer-two (Ethernet) 29 * bridging functionality. The STREAMS interface is used to provide 30 * observability (snoop/wireshark) and control, but not for interface plumbing. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/bitmap.h> 35 #include <sys/cmn_err.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/errno.h> 39 #include <sys/kstat.h> 40 #include <sys/modctl.h> 41 #include <sys/note.h> 42 #include <sys/param.h> 43 #include <sys/policy.h> 44 #include <sys/sdt.h> 45 #include <sys/stat.h> 46 #include <sys/stream.h> 47 #include <sys/stropts.h> 48 #include <sys/strsun.h> 49 #include <sys/sunddi.h> 50 #include <sys/sysmacros.h> 51 #include <sys/systm.h> 52 #include <sys/time.h> 53 #include <sys/dlpi.h> 54 #include <sys/dls.h> 55 #include <sys/mac_ether.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_client_priv.h> 58 #include <sys/mac_impl.h> 59 #include <sys/vlan.h> 60 #include <net/bridge.h> 61 #include <net/bridge_impl.h> 62 #include <net/trill.h> 63 #include <sys/dld_ioc.h> 64 65 /* 66 * Locks and reference counts: object lifetime and design. 67 * 68 * bridge_mac_t 69 * Bridge mac (snoop) instances are in bmac_list, which is protected by 70 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer(). 71 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes 72 * away, the bridge_mac_t remains until either all of the users go away 73 * (detected by a timer) or until the instance is picked up again by the same 74 * bridge starting back up. 75 * 76 * bridge_inst_t 77 * Bridge instances are in inst_list, which is protected by inst_lock. 78 * They're allocated by inst_alloc() and freed by inst_free(). After 79 * allocation, an instance is placed in inst_list, and the reference count is 80 * incremented to represent this. That reference is decremented when the 81 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last 82 * reference is freed, the instance is removed from the list. 83 * 84 * Bridge instances have lists of links and an AVL tree of forwarding 85 * entries. Each of these structures holds one reference on the bridge 86 * instance. These lists and tree are protected by bi_rwlock. 87 * 88 * bridge_stream_t 89 * Bridge streams are allocated by stream_alloc() and freed by stream_free(). 90 * These streams are created when "bridged" opens /dev/bridgectl, and are 91 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the 92 * links on the bridge. When a stream closes, the bridge instance created is 93 * destroyed. There's at most one bridge instance for a given control 94 * stream. 95 * 96 * bridge_link_t 97 * Links are allocated by bridge_add_link() and freed by link_free(). The 98 * bi_links list holds a reference to the link. When the BLF_DELETED flag is 99 * set, that reference is dropped. The link isn't removed from the list 100 * until the last reference drops. Each forwarding entry that uses a given 101 * link holds a reference, as does each thread transmitting a packet via the 102 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on 103 * a link when transmitting. 104 * 105 * It's important that once BLF_DELETED is set, there's no way for the 106 * reference count to increase again. If it can, then the link may be 107 * double-freed. The BLF_FREED flag is intended for use with assertions to 108 * guard against this in testing. 109 * 110 * bridge_fwd_t 111 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by 112 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike 113 * other data structures, the reference is dropped when the entry is removed 114 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each 115 * thread that's forwarding a packet to a known destination holds a reference 116 * to a forwarding entry. 117 * 118 * TRILL notes: 119 * 120 * The TRILL module does all of its I/O through bridging. It uses references 121 * on the bridge_inst_t and bridge_link_t structures, and has seven entry 122 * points and four callbacks. One entry point is for setting the callbacks 123 * (bridge_trill_register_cb). There are four entry points for taking bridge 124 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two 125 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps) 126 * that need to be bridged locally, and for TRILL-encapsulated output packets 127 * (bridge_trill_output). 128 * 129 * The four callbacks comprise two notification functions for bridges and 130 * links being deleted, one function for raw received TRILL packets, and one 131 * for bridge output to non-local TRILL destinations (tunnel entry). 132 */ 133 134 /* 135 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module. 136 */ 137 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES; 138 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES; 139 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS; 140 141 static const char *inst_kstats_list[] = { KSINST_NAMES }; 142 static const char *link_kstats_list[] = { KSLINK_NAMES }; 143 144 #define KREF(p, m, vn) p->m.vn.value.ui64 145 #define KINCR(p, m, vn) ++KREF(p, m, vn) 146 #define KDECR(p, m, vn) --KREF(p, m, vn) 147 148 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn) 149 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn) 150 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn) 151 152 #define KIINCR(vn) KIPINCR(bip, vn) 153 #define KIDECR(vn) KIPDECR(bip, vn) 154 #define KLINCR(vn) KLPINCR(blp, vn) 155 156 #define Dim(x) (sizeof (x) / sizeof (*(x))) 157 158 /* Amount of overhead added when encapsulating with VLAN headers */ 159 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \ 160 sizeof (struct ether_header)) 161 162 static dev_info_t *bridge_dev_info; 163 static major_t bridge_major; 164 static ddi_taskq_t *bridge_taskq; 165 166 /* 167 * These are the bridge instance management data structures. The mutex lock 168 * protects the list of bridge instances. A reference count is then used on 169 * each instance to determine when to free it. We use mac_minor_hold() to 170 * allocate minor_t values, which are used both for self-cloning /dev/net/ 171 * device nodes as well as client streams. Minor node 0 is reserved for the 172 * allocation control node. 173 */ 174 static list_t inst_list; 175 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */ 176 static kmutex_t inst_lock; 177 178 static krwlock_t bmac_rwlock; 179 static list_t bmac_list; 180 181 /* Wait for taskq entries that use STREAMS */ 182 static kcondvar_t stream_ref_cv; 183 static kmutex_t stream_ref_lock; 184 185 static timeout_id_t bridge_timerid; 186 static clock_t bridge_scan_interval; 187 static clock_t bridge_fwd_age; 188 189 static bridge_inst_t *bridge_find_name(const char *); 190 static void bridge_timer(void *); 191 static void bridge_unref(bridge_inst_t *); 192 193 static const uint8_t zero_addr[ETHERADDRL] = { 0 }; 194 195 /* Global TRILL linkage */ 196 static trill_recv_pkt_t trill_recv_fn; 197 static trill_encap_pkt_t trill_encap_fn; 198 static trill_br_dstr_t trill_brdstr_fn; 199 static trill_ln_dstr_t trill_lndstr_fn; 200 201 /* special settings to accommodate DLD flow control; see dld_str.c */ 202 static struct module_info bridge_dld_modinfo = { 203 0, /* mi_idnum */ 204 BRIDGE_DEV_NAME, /* mi_idname */ 205 0, /* mi_minpsz */ 206 INFPSZ, /* mi_maxpsz */ 207 1, /* mi_hiwat */ 208 0 /* mi_lowat */ 209 }; 210 211 static struct qinit bridge_dld_rinit = { 212 NULL, /* qi_putp */ 213 NULL, /* qi_srvp */ 214 dld_open, /* qi_qopen */ 215 dld_close, /* qi_qclose */ 216 NULL, /* qi_qadmin */ 217 &bridge_dld_modinfo, /* qi_minfo */ 218 NULL /* qi_mstat */ 219 }; 220 221 static struct qinit bridge_dld_winit = { 222 (int (*)())dld_wput, /* qi_putp */ 223 (int (*)())dld_wsrv, /* qi_srvp */ 224 NULL, /* qi_qopen */ 225 NULL, /* qi_qclose */ 226 NULL, /* qi_qadmin */ 227 &bridge_dld_modinfo, /* qi_minfo */ 228 NULL /* qi_mstat */ 229 }; 230 231 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *); 232 233 /* GLDv3 control ioctls used by Bridging */ 234 static dld_ioc_info_t bridge_ioc_list[] = { 235 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t), 236 bridge_ioc_listfwd, NULL}, 237 }; 238 239 /* 240 * Given a bridge mac pointer, get a ref-held pointer to the corresponding 241 * bridge instance, if any. We must hold the global bmac_rwlock so that 242 * bm_inst doesn't slide out from under us. 243 */ 244 static bridge_inst_t * 245 mac_to_inst(const bridge_mac_t *bmp) 246 { 247 bridge_inst_t *bip; 248 249 rw_enter(&bmac_rwlock, RW_READER); 250 if ((bip = bmp->bm_inst) != NULL) 251 atomic_inc_uint(&bip->bi_refs); 252 rw_exit(&bmac_rwlock); 253 return (bip); 254 } 255 256 static void 257 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist) 258 { 259 mblk_t *mp; 260 bridge_ctl_t *bcp; 261 bridge_link_t *blcmp; 262 bridge_inst_t *bip; 263 bridge_mac_t *bmp; 264 265 if (failed) { 266 if (blp->bl_flags & BLF_SDUFAIL) 267 return; 268 blp->bl_flags |= BLF_SDUFAIL; 269 } else { 270 if (!(blp->bl_flags & BLF_SDUFAIL)) 271 return; 272 blp->bl_flags &= ~BLF_SDUFAIL; 273 } 274 275 /* 276 * If this link is otherwise up, then check if there are any other 277 * non-failed non-down links. If not, then we control the state of the 278 * whole bridge. 279 */ 280 bip = blp->bl_inst; 281 bmp = bip->bi_mac; 282 if (blp->bl_linkstate != LINK_STATE_DOWN) { 283 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 284 blcmp = list_next(&bip->bi_links, blcmp)) { 285 if (blp != blcmp && 286 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 287 blcmp->bl_linkstate != LINK_STATE_DOWN) 288 break; 289 } 290 if (blcmp == NULL) { 291 bmp->bm_linkstate = failed ? LINK_STATE_DOWN : 292 LINK_STATE_UP; 293 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 294 } 295 } 296 297 /* 298 * If we're becoming failed, then the link's current true state needs 299 * to be reflected upwards to this link's clients. If we're becoming 300 * unfailed, then we get the state of the bridge instead on all 301 * clients. 302 */ 303 if (failed) { 304 if (bmp->bm_linkstate != blp->bl_linkstate) 305 mac_link_redo(blp->bl_mh, blp->bl_linkstate); 306 } else { 307 mac_link_redo(blp->bl_mh, bmp->bm_linkstate); 308 } 309 310 /* get the current mblk we're going to send up */ 311 if ((mp = blp->bl_lfailmp) == NULL && 312 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL) 313 return; 314 315 /* get a new one for next time */ 316 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 317 318 /* if none for next time, then report only failures */ 319 if (blp->bl_lfailmp == NULL && !failed) { 320 blp->bl_lfailmp = mp; 321 return; 322 } 323 324 /* LINTED: alignment */ 325 bcp = (bridge_ctl_t *)mp->b_rptr; 326 bcp->bc_linkid = blp->bl_linkid; 327 bcp->bc_failed = failed; 328 mp->b_wptr = (uchar_t *)(bcp + 1); 329 mp->b_next = *mlist; 330 *mlist = mp; 331 } 332 333 /* 334 * Send control messages (link SDU changes) using the stream to the 335 * bridge instance daemon. 336 */ 337 static void 338 send_up_messages(bridge_inst_t *bip, mblk_t *mp) 339 { 340 mblk_t *mnext; 341 queue_t *rq; 342 343 rq = bip->bi_control->bs_wq; 344 rq = OTHERQ(rq); 345 while (mp != NULL) { 346 mnext = mp->b_next; 347 mp->b_next = NULL; 348 putnext(rq, mp); 349 mp = mnext; 350 } 351 } 352 353 /* ARGSUSED */ 354 static int 355 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val) 356 { 357 return (ENOTSUP); 358 } 359 360 static int 361 bridge_m_start(void *arg) 362 { 363 bridge_mac_t *bmp = arg; 364 365 bmp->bm_flags |= BMF_STARTED; 366 return (0); 367 } 368 369 static void 370 bridge_m_stop(void *arg) 371 { 372 bridge_mac_t *bmp = arg; 373 374 bmp->bm_flags &= ~BMF_STARTED; 375 } 376 377 /* ARGSUSED */ 378 static int 379 bridge_m_setpromisc(void *arg, boolean_t on) 380 { 381 return (0); 382 } 383 384 /* ARGSUSED */ 385 static int 386 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 387 { 388 return (0); 389 } 390 391 /* ARGSUSED */ 392 static int 393 bridge_m_unicst(void *arg, const uint8_t *macaddr) 394 { 395 return (ENOTSUP); 396 } 397 398 static mblk_t * 399 bridge_m_tx(void *arg, mblk_t *mp) 400 { 401 _NOTE(ARGUNUSED(arg)); 402 freemsgchain(mp); 403 return (NULL); 404 } 405 406 /* ARGSUSED */ 407 static int 408 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) 409 { 410 bridge_listfwd_t *blf = karg; 411 bridge_inst_t *bip; 412 bridge_fwd_t *bfp, match; 413 avl_index_t where; 414 415 bip = bridge_find_name(blf->blf_name); 416 if (bip == NULL) 417 return (ENOENT); 418 419 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL); 420 match.bf_flags |= BFF_VLANLOCAL; 421 rw_enter(&bip->bi_rwlock, RW_READER); 422 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL) 423 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER); 424 else 425 bfp = AVL_NEXT(&bip->bi_fwd, bfp); 426 if (bfp == NULL) { 427 bzero(blf, sizeof (*blf)); 428 } else { 429 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL); 430 blf->blf_trill_nick = bfp->bf_trill_nick; 431 blf->blf_ms_age = 432 drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000; 433 blf->blf_is_local = 434 (bfp->bf_flags & BFF_LOCALADDR) != 0; 435 blf->blf_linkid = bfp->bf_links[0]->bl_linkid; 436 } 437 rw_exit(&bip->bi_rwlock); 438 bridge_unref(bip); 439 return (0); 440 } 441 442 static int 443 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 444 uint_t pr_valsize, const void *pr_val) 445 { 446 bridge_mac_t *bmp = arg; 447 bridge_inst_t *bip; 448 bridge_link_t *blp; 449 int err; 450 uint_t maxsdu; 451 mblk_t *mlist; 452 453 _NOTE(ARGUNUSED(pr_name)); 454 switch (pr_num) { 455 case MAC_PROP_MTU: 456 if (pr_valsize < sizeof (bmp->bm_maxsdu)) { 457 err = EINVAL; 458 break; 459 } 460 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu)); 461 if (maxsdu == bmp->bm_maxsdu) { 462 err = 0; 463 } else if ((bip = mac_to_inst(bmp)) == NULL) { 464 err = ENXIO; 465 } else { 466 rw_enter(&bip->bi_rwlock, RW_WRITER); 467 mlist = NULL; 468 for (blp = list_head(&bip->bi_links); blp != NULL; 469 blp = list_next(&bip->bi_links, blp)) { 470 if (blp->bl_flags & BLF_DELETED) 471 continue; 472 if (blp->bl_maxsdu == maxsdu) 473 link_sdu_fail(blp, B_FALSE, &mlist); 474 else if (blp->bl_maxsdu == bmp->bm_maxsdu) 475 link_sdu_fail(blp, B_TRUE, &mlist); 476 } 477 rw_exit(&bip->bi_rwlock); 478 bmp->bm_maxsdu = maxsdu; 479 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 480 send_up_messages(bip, mlist); 481 bridge_unref(bip); 482 err = 0; 483 } 484 break; 485 486 default: 487 err = ENOTSUP; 488 break; 489 } 490 return (err); 491 } 492 493 static int 494 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 495 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 496 { 497 bridge_mac_t *bmp = arg; 498 int err = 0; 499 500 _NOTE(ARGUNUSED(pr_name)); 501 switch (pr_num) { 502 case MAC_PROP_MTU: { 503 mac_propval_range_t range; 504 505 if (!(pr_flags & MAC_PROP_POSSIBLE)) 506 return (ENOTSUP); 507 if (pr_valsize < sizeof (mac_propval_range_t)) 508 return (EINVAL); 509 range.mpr_count = 1; 510 range.mpr_type = MAC_PROPVAL_UINT32; 511 range.range_uint32[0].mpur_min = 512 range.range_uint32[0].mpur_max = bmp->bm_maxsdu; 513 bcopy(&range, pr_val, sizeof (range)); 514 *perm = MAC_PROP_PERM_RW; 515 break; 516 } 517 case MAC_PROP_STATUS: 518 if (pr_valsize < sizeof (bmp->bm_linkstate)) { 519 err = EINVAL; 520 } else { 521 bcopy(&bmp->bm_linkstate, pr_val, 522 sizeof (&bmp->bm_linkstate)); 523 *perm = MAC_PROP_PERM_READ; 524 } 525 break; 526 527 default: 528 err = ENOTSUP; 529 break; 530 } 531 return (err); 532 } 533 534 static mac_callbacks_t bridge_m_callbacks = { 535 MC_SETPROP | MC_GETPROP, 536 bridge_m_getstat, 537 bridge_m_start, 538 bridge_m_stop, 539 bridge_m_setpromisc, 540 bridge_m_multicst, 541 bridge_m_unicst, 542 bridge_m_tx, 543 NULL, /* ioctl */ 544 NULL, /* getcapab */ 545 NULL, /* open */ 546 NULL, /* close */ 547 bridge_m_setprop, 548 bridge_m_getprop 549 }; 550 551 /* 552 * Create kstats from a list. 553 */ 554 static kstat_t * 555 kstat_setup(kstat_named_t *knt, const char **names, int nstat, 556 const char *unitname) 557 { 558 kstat_t *ksp; 559 int i; 560 561 for (i = 0; i < nstat; i++) 562 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64); 563 564 ksp = kstat_create_zone(BRIDGE_DEV_NAME, 0, unitname, "net", 565 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 566 if (ksp != NULL) { 567 ksp->ks_data = knt; 568 kstat_install(ksp); 569 } 570 return (ksp); 571 } 572 573 /* 574 * Find an existing bridge_mac_t structure or allocate a new one for the given 575 * bridge instance. This creates the mac driver instance that snoop can use. 576 */ 577 static int 578 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp) 579 { 580 bridge_mac_t *bmp, *bnew; 581 mac_register_t *mac; 582 int err; 583 584 *bmacp = NULL; 585 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 586 return (EINVAL); 587 588 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP); 589 590 rw_enter(&bmac_rwlock, RW_WRITER); 591 for (bmp = list_head(&bmac_list); bmp != NULL; 592 bmp = list_next(&bmac_list, bmp)) { 593 if (strcmp(bip->bi_name, bmp->bm_name) == 0) { 594 ASSERT(bmp->bm_inst == NULL); 595 bmp->bm_inst = bip; 596 rw_exit(&bmac_rwlock); 597 kmem_free(bnew, sizeof (*bnew)); 598 mac_free(mac); 599 *bmacp = bmp; 600 return (0); 601 } 602 } 603 604 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 605 mac->m_driver = bnew; 606 mac->m_dip = bridge_dev_info; 607 mac->m_instance = (uint_t)-1; 608 mac->m_src_addr = (uint8_t *)zero_addr; 609 mac->m_callbacks = &bridge_m_callbacks; 610 611 /* 612 * Note that the SDU limits are irrelevant, as nobody transmits on the 613 * bridge node itself. It's mainly for monitoring but we allow 614 * setting the bridge MTU for quick transition of all links part of the 615 * bridge to a new MTU. 616 */ 617 mac->m_min_sdu = 1; 618 mac->m_max_sdu = 1500; 619 err = mac_register(mac, &bnew->bm_mh); 620 mac_free(mac); 621 if (err != 0) { 622 rw_exit(&bmac_rwlock); 623 kmem_free(bnew, sizeof (*bnew)); 624 return (err); 625 } 626 627 bnew->bm_inst = bip; 628 (void) strcpy(bnew->bm_name, bip->bi_name); 629 if (list_is_empty(&bmac_list)) { 630 bridge_timerid = timeout(bridge_timer, NULL, 631 bridge_scan_interval); 632 } 633 list_insert_tail(&bmac_list, bnew); 634 rw_exit(&bmac_rwlock); 635 636 /* 637 * Mark the MAC as unable to go "active" so that only passive clients 638 * (such as snoop) can bind to it. 639 */ 640 mac_no_active(bnew->bm_mh); 641 *bmacp = bnew; 642 return (0); 643 } 644 645 /* 646 * Disconnect the given bridge_mac_t from its bridge instance. The bridge 647 * instance is going away. The mac instance can't go away until the clients 648 * are gone (see bridge_timer). 649 */ 650 static void 651 bmac_disconnect(bridge_mac_t *bmp) 652 { 653 bridge_inst_t *bip; 654 655 bmp->bm_linkstate = LINK_STATE_DOWN; 656 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 657 658 rw_enter(&bmac_rwlock, RW_READER); 659 bip = bmp->bm_inst; 660 bip->bi_mac = NULL; 661 bmp->bm_inst = NULL; 662 rw_exit(&bmac_rwlock); 663 } 664 665 /* This is used by the avl trees to sort forwarding table entries */ 666 static int 667 fwd_compare(const void *addr1, const void *addr2) 668 { 669 const bridge_fwd_t *fwd1 = addr1; 670 const bridge_fwd_t *fwd2 = addr2; 671 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL); 672 673 if (diff != 0) 674 return (diff > 0 ? 1 : -1); 675 676 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) { 677 if (fwd1->bf_vlanid > fwd2->bf_vlanid) 678 return (1); 679 else if (fwd1->bf_vlanid < fwd2->bf_vlanid) 680 return (-1); 681 } 682 return (0); 683 } 684 685 static void 686 inst_free(bridge_inst_t *bip) 687 { 688 ASSERT(bip->bi_mac == NULL); 689 rw_destroy(&bip->bi_rwlock); 690 list_destroy(&bip->bi_links); 691 cv_destroy(&bip->bi_linkwait); 692 avl_destroy(&bip->bi_fwd); 693 if (bip->bi_ksp != NULL) 694 kstat_delete(bip->bi_ksp); 695 kmem_free(bip, sizeof (*bip)); 696 } 697 698 static bridge_inst_t * 699 inst_alloc(const char *bridge) 700 { 701 bridge_inst_t *bip; 702 703 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP); 704 bip->bi_refs = 1; 705 (void) strcpy(bip->bi_name, bridge); 706 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL); 707 list_create(&bip->bi_links, sizeof (bridge_link_t), 708 offsetof(bridge_link_t, bl_node)); 709 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL); 710 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t), 711 offsetof(bridge_fwd_t, bf_node)); 712 return (bip); 713 } 714 715 static bridge_inst_t * 716 bridge_find_name(const char *bridge) 717 { 718 bridge_inst_t *bip; 719 720 mutex_enter(&inst_lock); 721 for (bip = list_head(&inst_list); bip != NULL; 722 bip = list_next(&inst_list, bip)) { 723 if (!(bip->bi_flags & BIF_SHUTDOWN) && 724 strcmp(bridge, bip->bi_name) == 0) { 725 atomic_inc_uint(&bip->bi_refs); 726 break; 727 } 728 } 729 mutex_exit(&inst_lock); 730 731 return (bip); 732 } 733 734 static int 735 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc, 736 cred_t *cred) 737 { 738 bridge_inst_t *bip, *bipnew; 739 bridge_mac_t *bmp = NULL; 740 int err; 741 742 *bipc = NULL; 743 bipnew = inst_alloc(bridge); 744 745 mutex_enter(&inst_lock); 746 lookup_retry: 747 for (bip = list_head(&inst_list); bip != NULL; 748 bip = list_next(&inst_list, bip)) { 749 if (strcmp(bridge, bip->bi_name) == 0) 750 break; 751 } 752 753 /* This should not take long; if it does, we've got a design problem */ 754 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) { 755 cv_wait(&inst_cv, &inst_lock); 756 goto lookup_retry; 757 } 758 759 if (bip == NULL) { 760 bip = bipnew; 761 bipnew = NULL; 762 list_insert_tail(&inst_list, bip); 763 } 764 765 mutex_exit(&inst_lock); 766 if (bipnew != NULL) { 767 inst_free(bipnew); 768 return (EEXIST); 769 } 770 771 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats, 772 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name); 773 774 err = bmac_alloc(bip, &bmp); 775 if ((bip->bi_mac = bmp) == NULL) 776 goto fail_create; 777 778 /* 779 * bm_inst is set, so the timer cannot yank the DLS rug from under us. 780 * No extra locking is needed here. 781 */ 782 if (!(bmp->bm_flags & BMF_DLS)) { 783 err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred)); 784 if (err != 0) 785 goto fail_create; 786 bmp->bm_flags |= BMF_DLS; 787 } 788 789 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh)); 790 *bipc = bip; 791 return (0); 792 793 fail_create: 794 ASSERT(bip->bi_trilldata == NULL); 795 bip->bi_flags |= BIF_SHUTDOWN; 796 bridge_unref(bip); 797 return (err); 798 } 799 800 static void 801 bridge_unref(bridge_inst_t *bip) 802 { 803 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) { 804 ASSERT(bip->bi_flags & BIF_SHUTDOWN); 805 /* free up mac for reuse before leaving global list */ 806 if (bip->bi_mac != NULL) 807 bmac_disconnect(bip->bi_mac); 808 mutex_enter(&inst_lock); 809 list_remove(&inst_list, bip); 810 cv_broadcast(&inst_cv); 811 mutex_exit(&inst_lock); 812 inst_free(bip); 813 } 814 } 815 816 /* 817 * Stream instances are used only for allocating bridges and serving as a 818 * control node. They serve no data-handling function. 819 */ 820 static bridge_stream_t * 821 stream_alloc(void) 822 { 823 bridge_stream_t *bsp; 824 minor_t mn; 825 826 if ((mn = mac_minor_hold(B_FALSE)) == 0) 827 return (NULL); 828 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP); 829 bsp->bs_minor = mn; 830 return (bsp); 831 } 832 833 static void 834 stream_free(bridge_stream_t *bsp) 835 { 836 mac_minor_rele(bsp->bs_minor); 837 kmem_free(bsp, sizeof (*bsp)); 838 } 839 840 /* Reference hold/release functions for STREAMS-related taskq */ 841 static void 842 stream_ref(bridge_stream_t *bsp) 843 { 844 mutex_enter(&stream_ref_lock); 845 bsp->bs_taskq_cnt++; 846 mutex_exit(&stream_ref_lock); 847 } 848 849 static void 850 stream_unref(bridge_stream_t *bsp) 851 { 852 mutex_enter(&stream_ref_lock); 853 if (--bsp->bs_taskq_cnt == 0) 854 cv_broadcast(&stream_ref_cv); 855 mutex_exit(&stream_ref_lock); 856 } 857 858 static void 859 link_free(bridge_link_t *blp) 860 { 861 bridge_inst_t *bip = blp->bl_inst; 862 863 ASSERT(!(blp->bl_flags & BLF_FREED)); 864 blp->bl_flags |= BLF_FREED; 865 if (blp->bl_ksp != NULL) 866 kstat_delete(blp->bl_ksp); 867 if (blp->bl_lfailmp != NULL) 868 freeb(blp->bl_lfailmp); 869 cv_destroy(&blp->bl_trillwait); 870 mutex_destroy(&blp->bl_trilllock); 871 kmem_free(blp, sizeof (*blp)); 872 /* Don't unreference the bridge until the MAC is closed */ 873 bridge_unref(bip); 874 } 875 876 static void 877 link_unref(bridge_link_t *blp) 878 { 879 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) { 880 bridge_inst_t *bip = blp->bl_inst; 881 882 ASSERT(blp->bl_flags & BLF_DELETED); 883 rw_enter(&bip->bi_rwlock, RW_WRITER); 884 list_remove(&bip->bi_links, blp); 885 rw_exit(&bip->bi_rwlock); 886 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links)) 887 cv_broadcast(&bip->bi_linkwait); 888 link_free(blp); 889 } 890 } 891 892 static bridge_fwd_t * 893 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick) 894 { 895 bridge_fwd_t *bfp; 896 897 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)), 898 KM_NOSLEEP); 899 if (bfp != NULL) { 900 bcopy(addr, bfp->bf_dest, ETHERADDRL); 901 bfp->bf_lastheard = ddi_get_lbolt(); 902 bfp->bf_maxlinks = nlinks; 903 bfp->bf_links = (bridge_link_t **)(bfp + 1); 904 bfp->bf_trill_nick = nick; 905 } 906 return (bfp); 907 } 908 909 static bridge_fwd_t * 910 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid) 911 { 912 bridge_fwd_t *bfp, *vbfp; 913 bridge_fwd_t match; 914 915 bcopy(addr, match.bf_dest, ETHERADDRL); 916 match.bf_flags = 0; 917 rw_enter(&bip->bi_rwlock, RW_READER); 918 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 919 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) { 920 match.bf_vlanid = vlanid; 921 match.bf_flags = BFF_VLANLOCAL; 922 vbfp = avl_find(&bip->bi_fwd, &match, NULL); 923 if (vbfp != NULL) 924 bfp = vbfp; 925 } 926 atomic_inc_uint(&bfp->bf_refs); 927 } 928 rw_exit(&bip->bi_rwlock); 929 return (bfp); 930 } 931 932 static void 933 fwd_free(bridge_fwd_t *bfp) 934 { 935 uint_t i; 936 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst; 937 938 KIDECR(bki_count); 939 for (i = 0; i < bfp->bf_nlinks; i++) 940 link_unref(bfp->bf_links[i]); 941 kmem_free(bfp, 942 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *)); 943 } 944 945 static void 946 fwd_unref(bridge_fwd_t *bfp) 947 { 948 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) { 949 ASSERT(!(bfp->bf_flags & BFF_INTREE)); 950 fwd_free(bfp); 951 } 952 } 953 954 static void 955 fwd_delete(bridge_fwd_t *bfp) 956 { 957 bridge_inst_t *bip; 958 bridge_fwd_t *bfpzero; 959 960 if (bfp->bf_flags & BFF_INTREE) { 961 ASSERT(bfp->bf_nlinks > 0); 962 bip = bfp->bf_links[0]->bl_inst; 963 rw_enter(&bip->bi_rwlock, RW_WRITER); 964 /* Another thread could beat us to this */ 965 if (bfp->bf_flags & BFF_INTREE) { 966 avl_remove(&bip->bi_fwd, bfp); 967 bfp->bf_flags &= ~BFF_INTREE; 968 if (bfp->bf_flags & BFF_VLANLOCAL) { 969 bfp->bf_flags &= ~BFF_VLANLOCAL; 970 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL); 971 if (bfpzero != NULL && bfpzero->bf_vcnt > 0) 972 bfpzero->bf_vcnt--; 973 } 974 rw_exit(&bip->bi_rwlock); 975 fwd_unref(bfp); /* no longer in avl tree */ 976 } else { 977 rw_exit(&bip->bi_rwlock); 978 } 979 } 980 } 981 982 static boolean_t 983 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp) 984 { 985 avl_index_t idx; 986 boolean_t retv; 987 988 rw_enter(&bip->bi_rwlock, RW_WRITER); 989 if (!(bip->bi_flags & BIF_SHUTDOWN) && 990 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax && 991 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) { 992 avl_insert(&bip->bi_fwd, bfp, idx); 993 bfp->bf_flags |= BFF_INTREE; 994 atomic_inc_uint(&bfp->bf_refs); /* avl entry */ 995 retv = B_TRUE; 996 } else { 997 retv = B_FALSE; 998 } 999 rw_exit(&bip->bi_rwlock); 1000 return (retv); 1001 } 1002 1003 static void 1004 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr, 1005 const uint8_t *newaddr) 1006 { 1007 bridge_inst_t *bip = blp->bl_inst; 1008 bridge_fwd_t *bfp, *bfnew; 1009 bridge_fwd_t match; 1010 avl_index_t idx; 1011 boolean_t drop_ref = B_FALSE; 1012 1013 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0) 1014 return; 1015 1016 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0) 1017 goto no_old_addr; 1018 1019 /* 1020 * Find the previous entry, and remove our link from it. 1021 */ 1022 bcopy(oldaddr, match.bf_dest, ETHERADDRL); 1023 rw_enter(&bip->bi_rwlock, RW_WRITER); 1024 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 1025 int i; 1026 1027 /* 1028 * See if we're in the list, and remove if so. 1029 */ 1030 for (i = 0; i < bfp->bf_nlinks; i++) { 1031 if (bfp->bf_links[i] == blp) { 1032 /* 1033 * We assume writes are atomic, so no special 1034 * MT handling is needed. The list length is 1035 * decremented first, and then we remove 1036 * entries. 1037 */ 1038 bfp->bf_nlinks--; 1039 for (; i < bfp->bf_nlinks; i++) 1040 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1041 drop_ref = B_TRUE; 1042 break; 1043 } 1044 } 1045 /* If no more links, then remove and free up */ 1046 if (bfp->bf_nlinks == 0) { 1047 avl_remove(&bip->bi_fwd, bfp); 1048 bfp->bf_flags &= ~BFF_INTREE; 1049 } else { 1050 bfp = NULL; 1051 } 1052 } 1053 rw_exit(&bip->bi_rwlock); 1054 if (bfp != NULL) 1055 fwd_unref(bfp); /* no longer in avl tree */ 1056 1057 /* 1058 * Now get the new link address and add this link to the list. The 1059 * list should be of length 1 unless the user has configured multiple 1060 * NICs with the same address. (That's an incorrect configuration, but 1061 * we support it anyway.) 1062 */ 1063 no_old_addr: 1064 bfp = NULL; 1065 if ((bip->bi_flags & BIF_SHUTDOWN) || 1066 bcmp(newaddr, zero_addr, ETHERADDRL) == 0) 1067 goto no_new_addr; 1068 1069 bcopy(newaddr, match.bf_dest, ETHERADDRL); 1070 rw_enter(&bip->bi_rwlock, RW_WRITER); 1071 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) { 1072 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE); 1073 if (bfnew != NULL) 1074 KIINCR(bki_count); 1075 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) { 1076 /* special case: link fits in existing entry */ 1077 bfnew = bfp; 1078 } else { 1079 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1, 1080 RBRIDGE_NICKNAME_NONE); 1081 if (bfnew != NULL) { 1082 KIINCR(bki_count); 1083 avl_remove(&bip->bi_fwd, bfp); 1084 bfp->bf_flags &= ~BFF_INTREE; 1085 bfnew->bf_nlinks = bfp->bf_nlinks; 1086 bcopy(bfp->bf_links, bfnew->bf_links, 1087 bfp->bf_nlinks * sizeof (bfp)); 1088 /* reset the idx value due to removal above */ 1089 (void) avl_find(&bip->bi_fwd, &match, &idx); 1090 } 1091 } 1092 1093 if (bfnew != NULL) { 1094 bfnew->bf_links[bfnew->bf_nlinks++] = blp; 1095 if (drop_ref) 1096 drop_ref = B_FALSE; 1097 else 1098 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1099 1100 if (bfnew != bfp) { 1101 /* local addresses are not subject to table limits */ 1102 avl_insert(&bip->bi_fwd, bfnew, idx); 1103 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR); 1104 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */ 1105 } 1106 } 1107 rw_exit(&bip->bi_rwlock); 1108 1109 no_new_addr: 1110 /* 1111 * If we found an existing entry and we replaced it with a new one, 1112 * then drop the table reference from the old one. We removed it from 1113 * the AVL tree above. 1114 */ 1115 if (bfnew != NULL && bfp != NULL && bfnew != bfp) 1116 fwd_unref(bfp); 1117 1118 /* Account for removed entry. */ 1119 if (drop_ref) 1120 link_unref(blp); 1121 } 1122 1123 static void 1124 bridge_new_unicst(bridge_link_t *blp) 1125 { 1126 uint8_t new_mac[ETHERADDRL]; 1127 1128 mac_unicast_primary_get(blp->bl_mh, new_mac); 1129 fwd_update_local(blp, blp->bl_local_mac, new_mac); 1130 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL); 1131 } 1132 1133 /* 1134 * We must shut down a link prior to freeing it, and doing that requires 1135 * blocking to wait for running MAC threads while holding a reference. This is 1136 * run from a taskq to accomplish proper link shutdown followed by reference 1137 * drop. 1138 */ 1139 static void 1140 link_shutdown(void *arg) 1141 { 1142 bridge_link_t *blp = arg; 1143 mac_handle_t mh = blp->bl_mh; 1144 bridge_inst_t *bip; 1145 bridge_fwd_t *bfp, *bfnext; 1146 avl_tree_t fwd_scavenge; 1147 int i; 1148 1149 /* 1150 * This link is being destroyed. Notify TRILL now that it's no longer 1151 * possible to send packets. Data packets may still arrive until TRILL 1152 * calls bridge_trill_lnunref. 1153 */ 1154 if (blp->bl_trilldata != NULL) 1155 trill_lndstr_fn(blp->bl_trilldata, blp); 1156 1157 if (blp->bl_flags & BLF_PROM_ADDED) 1158 (void) mac_promisc_remove(blp->bl_mphp); 1159 1160 if (blp->bl_flags & BLF_SET_BRIDGE) 1161 mac_bridge_clear(mh, (mac_handle_t)blp); 1162 1163 if (blp->bl_flags & BLF_MARGIN_ADDED) { 1164 (void) mac_notify_remove(blp->bl_mnh, B_TRUE); 1165 (void) mac_margin_remove(mh, blp->bl_margin); 1166 } 1167 1168 /* Tell the clients the real link state when we leave */ 1169 mac_link_redo(blp->bl_mh, 1170 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE)); 1171 1172 /* Destroy all of the forwarding entries related to this link */ 1173 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1174 offsetof(bridge_fwd_t, bf_node)); 1175 bip = blp->bl_inst; 1176 rw_enter(&bip->bi_rwlock, RW_WRITER); 1177 bfnext = avl_first(&bip->bi_fwd); 1178 while ((bfp = bfnext) != NULL) { 1179 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1180 for (i = 0; i < bfp->bf_nlinks; i++) { 1181 if (bfp->bf_links[i] == blp) 1182 break; 1183 } 1184 if (i >= bfp->bf_nlinks) 1185 continue; 1186 if (bfp->bf_nlinks > 1) { 1187 /* note that this can't be the last reference */ 1188 link_unref(blp); 1189 bfp->bf_nlinks--; 1190 for (; i < bfp->bf_nlinks; i++) 1191 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1192 } else { 1193 ASSERT(bfp->bf_flags & BFF_INTREE); 1194 avl_remove(&bip->bi_fwd, bfp); 1195 bfp->bf_flags &= ~BFF_INTREE; 1196 avl_add(&fwd_scavenge, bfp); 1197 } 1198 } 1199 rw_exit(&bip->bi_rwlock); 1200 bfnext = avl_first(&fwd_scavenge); 1201 while ((bfp = bfnext) != NULL) { 1202 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1203 avl_remove(&fwd_scavenge, bfp); 1204 fwd_unref(bfp); 1205 } 1206 avl_destroy(&fwd_scavenge); 1207 1208 if (blp->bl_flags & BLF_CLIENT_OPEN) 1209 mac_client_close(blp->bl_mch, 0); 1210 1211 mac_close(mh); 1212 1213 /* 1214 * We are now completely removed from the active list, so drop the 1215 * reference (see bridge_add_link). 1216 */ 1217 link_unref(blp); 1218 } 1219 1220 static void 1221 shutdown_inst(bridge_inst_t *bip) 1222 { 1223 bridge_link_t *blp, *blnext; 1224 bridge_fwd_t *bfp; 1225 1226 mutex_enter(&inst_lock); 1227 if (bip->bi_flags & BIF_SHUTDOWN) { 1228 mutex_exit(&inst_lock); 1229 return; 1230 } 1231 1232 /* 1233 * Once on the inst_list, the bridge instance must not leave that list 1234 * without having the shutdown flag set first. When the shutdown flag 1235 * is set, we own the list reference, so we must drop it before 1236 * returning. 1237 */ 1238 bip->bi_flags |= BIF_SHUTDOWN; 1239 mutex_exit(&inst_lock); 1240 1241 bip->bi_control = NULL; 1242 1243 rw_enter(&bip->bi_rwlock, RW_READER); 1244 blnext = list_head(&bip->bi_links); 1245 while ((blp = blnext) != NULL) { 1246 blnext = list_next(&bip->bi_links, blp); 1247 if (!(blp->bl_flags & BLF_DELETED)) { 1248 blp->bl_flags |= BLF_DELETED; 1249 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 1250 blp, DDI_SLEEP); 1251 } 1252 } 1253 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) { 1254 atomic_inc_uint(&bfp->bf_refs); 1255 rw_exit(&bip->bi_rwlock); 1256 fwd_delete(bfp); 1257 fwd_unref(bfp); 1258 rw_enter(&bip->bi_rwlock, RW_READER); 1259 } 1260 rw_exit(&bip->bi_rwlock); 1261 1262 /* 1263 * This bridge is being destroyed. Notify TRILL once all of the 1264 * links are all gone. 1265 */ 1266 mutex_enter(&inst_lock); 1267 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links)) 1268 cv_wait(&bip->bi_linkwait, &inst_lock); 1269 mutex_exit(&inst_lock); 1270 if (bip->bi_trilldata != NULL) 1271 trill_brdstr_fn(bip->bi_trilldata, bip); 1272 1273 bridge_unref(bip); 1274 } 1275 1276 /* 1277 * This is called once by the TRILL module when it starts up. It just sets the 1278 * global TRILL callback function pointers -- data transmit/receive and bridge 1279 * and link destroy notification. There's only one TRILL module, so only one 1280 * registration is needed. 1281 * 1282 * TRILL should call this function with NULL pointers before unloading. It 1283 * must not do so before dropping all references to bridges and links. We 1284 * assert that this is true on debug builds. 1285 */ 1286 void 1287 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn, 1288 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn) 1289 { 1290 #ifdef DEBUG 1291 if (recv_fn == NULL && trill_recv_fn != NULL) { 1292 bridge_inst_t *bip; 1293 bridge_link_t *blp; 1294 1295 mutex_enter(&inst_lock); 1296 for (bip = list_head(&inst_list); bip != NULL; 1297 bip = list_next(&inst_list, bip)) { 1298 ASSERT(bip->bi_trilldata == NULL); 1299 rw_enter(&bip->bi_rwlock, RW_READER); 1300 for (blp = list_head(&bip->bi_links); blp != NULL; 1301 blp = list_next(&bip->bi_links, blp)) { 1302 ASSERT(blp->bl_trilldata == NULL); 1303 } 1304 rw_exit(&bip->bi_rwlock); 1305 } 1306 mutex_exit(&inst_lock); 1307 } 1308 #endif 1309 trill_recv_fn = recv_fn; 1310 trill_encap_fn = encap_fn; 1311 trill_brdstr_fn = brdstr_fn; 1312 trill_lndstr_fn = lndstr_fn; 1313 } 1314 1315 /* 1316 * This registers the TRILL instance pointer with a bridge. Before this 1317 * pointer is set, the forwarding, TRILL receive, and bridge destructor 1318 * functions won't be called. 1319 * 1320 * TRILL holds a reference on a bridge with this call. It must free the 1321 * reference by calling the unregister function below. 1322 */ 1323 bridge_inst_t * 1324 bridge_trill_brref(const char *bname, void *ptr) 1325 { 1326 char bridge[MAXLINKNAMELEN]; 1327 bridge_inst_t *bip; 1328 1329 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname); 1330 bip = bridge_find_name(bridge); 1331 if (bip != NULL) { 1332 ASSERT(bip->bi_trilldata == NULL && ptr != NULL); 1333 bip->bi_trilldata = ptr; 1334 } 1335 return (bip); 1336 } 1337 1338 void 1339 bridge_trill_brunref(bridge_inst_t *bip) 1340 { 1341 ASSERT(bip->bi_trilldata != NULL); 1342 bip->bi_trilldata = NULL; 1343 bridge_unref(bip); 1344 } 1345 1346 /* 1347 * TRILL calls this function when referencing a particular link on a bridge. 1348 * 1349 * It holds a reference on the link, so TRILL must clear out the reference when 1350 * it's done with the link (on unbinding). 1351 */ 1352 bridge_link_t * 1353 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr) 1354 { 1355 bridge_link_t *blp; 1356 1357 ASSERT(ptr != NULL); 1358 rw_enter(&bip->bi_rwlock, RW_READER); 1359 for (blp = list_head(&bip->bi_links); blp != NULL; 1360 blp = list_next(&bip->bi_links, blp)) { 1361 if (!(blp->bl_flags & BLF_DELETED) && 1362 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) { 1363 blp->bl_trilldata = ptr; 1364 blp->bl_flags &= ~BLF_TRILLACTIVE; 1365 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs)); 1366 atomic_inc_uint(&blp->bl_refs); 1367 break; 1368 } 1369 } 1370 rw_exit(&bip->bi_rwlock); 1371 return (blp); 1372 } 1373 1374 void 1375 bridge_trill_lnunref(bridge_link_t *blp) 1376 { 1377 mutex_enter(&blp->bl_trilllock); 1378 ASSERT(blp->bl_trilldata != NULL); 1379 blp->bl_trilldata = NULL; 1380 blp->bl_flags &= ~BLF_TRILLACTIVE; 1381 while (blp->bl_trillthreads > 0) 1382 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock); 1383 mutex_exit(&blp->bl_trilllock); 1384 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 1385 link_unref(blp); 1386 } 1387 1388 /* 1389 * This periodic timer performs three functions: 1390 * 1. It scans the list of learned forwarding entries, and removes ones that 1391 * haven't been heard from in a while. The time limit is backed down if 1392 * we're above the configured table limit. 1393 * 2. It walks the links and decays away the bl_learns counter. 1394 * 3. It scans the observability node entries looking for ones that can be 1395 * freed up. 1396 */ 1397 /* ARGSUSED */ 1398 static void 1399 bridge_timer(void *arg) 1400 { 1401 bridge_inst_t *bip; 1402 bridge_fwd_t *bfp, *bfnext; 1403 bridge_mac_t *bmp, *bmnext; 1404 bridge_link_t *blp; 1405 int err; 1406 datalink_id_t tmpid; 1407 avl_tree_t fwd_scavenge; 1408 clock_t age_limit; 1409 uint32_t ldecay; 1410 1411 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1412 offsetof(bridge_fwd_t, bf_node)); 1413 mutex_enter(&inst_lock); 1414 for (bip = list_head(&inst_list); bip != NULL; 1415 bip = list_next(&inst_list, bip)) { 1416 if (bip->bi_flags & BIF_SHUTDOWN) 1417 continue; 1418 rw_enter(&bip->bi_rwlock, RW_WRITER); 1419 /* compute scaled maximum age based on table limit */ 1420 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax) 1421 bip->bi_tshift++; 1422 else 1423 bip->bi_tshift = 0; 1424 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) { 1425 if (bip->bi_tshift != 0) 1426 bip->bi_tshift--; 1427 age_limit = 1; 1428 } 1429 bfnext = avl_first(&bip->bi_fwd); 1430 while ((bfp = bfnext) != NULL) { 1431 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1432 if (!(bfp->bf_flags & BFF_LOCALADDR) && 1433 (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) { 1434 ASSERT(bfp->bf_flags & BFF_INTREE); 1435 avl_remove(&bip->bi_fwd, bfp); 1436 bfp->bf_flags &= ~BFF_INTREE; 1437 avl_add(&fwd_scavenge, bfp); 1438 } 1439 } 1440 for (blp = list_head(&bip->bi_links); blp != NULL; 1441 blp = list_next(&bip->bi_links, blp)) { 1442 ldecay = mac_get_ldecay(blp->bl_mh); 1443 if (ldecay >= blp->bl_learns) 1444 blp->bl_learns = 0; 1445 else 1446 atomic_add_int(&blp->bl_learns, -(int)ldecay); 1447 } 1448 rw_exit(&bip->bi_rwlock); 1449 bfnext = avl_first(&fwd_scavenge); 1450 while ((bfp = bfnext) != NULL) { 1451 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1452 avl_remove(&fwd_scavenge, bfp); 1453 KIINCR(bki_expire); 1454 fwd_unref(bfp); /* drop tree reference */ 1455 } 1456 } 1457 mutex_exit(&inst_lock); 1458 avl_destroy(&fwd_scavenge); 1459 1460 /* 1461 * Scan the bridge_mac_t entries and try to free up the ones that are 1462 * no longer active. This must be done by polling, as neither DLS nor 1463 * MAC provides a driver any sort of positive control over clients. 1464 */ 1465 rw_enter(&bmac_rwlock, RW_WRITER); 1466 bmnext = list_head(&bmac_list); 1467 while ((bmp = bmnext) != NULL) { 1468 bmnext = list_next(&bmac_list, bmp); 1469 1470 /* ignore active bridges */ 1471 if (bmp->bm_inst != NULL) 1472 continue; 1473 1474 if (bmp->bm_flags & BMF_DLS) { 1475 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE); 1476 ASSERT(err == 0 || err == EBUSY); 1477 if (err == 0) 1478 bmp->bm_flags &= ~BMF_DLS; 1479 } 1480 1481 if (!(bmp->bm_flags & BMF_DLS)) { 1482 err = mac_unregister(bmp->bm_mh); 1483 ASSERT(err == 0 || err == EBUSY); 1484 if (err == 0) { 1485 list_remove(&bmac_list, bmp); 1486 kmem_free(bmp, sizeof (*bmp)); 1487 } 1488 } 1489 } 1490 if (list_is_empty(&bmac_list)) { 1491 bridge_timerid = 0; 1492 } else { 1493 bridge_timerid = timeout(bridge_timer, NULL, 1494 bridge_scan_interval); 1495 } 1496 rw_exit(&bmac_rwlock); 1497 } 1498 1499 static int 1500 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1501 { 1502 bridge_stream_t *bsp; 1503 1504 if (rq->q_ptr != NULL) 1505 return (0); 1506 1507 if (sflag & MODOPEN) 1508 return (EINVAL); 1509 1510 /* 1511 * Check the minor node number being opened. This tells us which 1512 * bridge instance the user wants. 1513 */ 1514 if (getminor(*devp) != 0) { 1515 /* 1516 * This is a regular DLPI stream for snoop or the like. 1517 * Redirect it through DLD. 1518 */ 1519 rq->q_qinfo = &bridge_dld_rinit; 1520 OTHERQ(rq)->q_qinfo = &bridge_dld_winit; 1521 return (dld_open(rq, devp, oflag, sflag, credp)); 1522 } else { 1523 /* 1524 * Allocate the bridge control stream structure. 1525 */ 1526 if ((bsp = stream_alloc()) == NULL) 1527 return (ENOSR); 1528 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp; 1529 bsp->bs_wq = WR(rq); 1530 *devp = makedevice(getmajor(*devp), bsp->bs_minor); 1531 qprocson(rq); 1532 return (0); 1533 } 1534 } 1535 1536 /* 1537 * This is used only for bridge control streams. DLPI goes through dld 1538 * instead. 1539 */ 1540 static int 1541 bridge_close(queue_t *rq) 1542 { 1543 bridge_stream_t *bsp = rq->q_ptr; 1544 bridge_inst_t *bip; 1545 1546 /* 1547 * Wait for any stray taskq (add/delete link) entries related to this 1548 * stream to leave the system. 1549 */ 1550 mutex_enter(&stream_ref_lock); 1551 while (bsp->bs_taskq_cnt != 0) 1552 cv_wait(&stream_ref_cv, &stream_ref_lock); 1553 mutex_exit(&stream_ref_lock); 1554 1555 qprocsoff(rq); 1556 if ((bip = bsp->bs_inst) != NULL) 1557 shutdown_inst(bip); 1558 rq->q_ptr = WR(rq)->q_ptr = NULL; 1559 stream_free(bsp); 1560 if (bip != NULL) 1561 bridge_unref(bip); 1562 1563 return (0); 1564 } 1565 1566 static void 1567 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick, 1568 uint16_t vlanid) 1569 { 1570 bridge_inst_t *bip = blp->bl_inst; 1571 bridge_fwd_t *bfp, *bfpnew; 1572 int i; 1573 boolean_t replaced = B_FALSE; 1574 1575 /* Ignore multi-destination address used as source; it's nonsense. */ 1576 if (*saddr & 1) 1577 return; 1578 1579 /* 1580 * If the source is known, then check whether it belongs on this link. 1581 * If not, and this isn't a fixed local address, then we've detected a 1582 * move. If it's not known, learn it. 1583 */ 1584 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) { 1585 /* 1586 * If the packet has a fixed local source address, then there's 1587 * nothing we can learn. We must quit. If this was a received 1588 * packet, then the sender has stolen our address, but there's 1589 * nothing we can do. If it's a transmitted packet, then 1590 * that's the normal case. 1591 */ 1592 if (bfp->bf_flags & BFF_LOCALADDR) { 1593 fwd_unref(bfp); 1594 return; 1595 } 1596 1597 /* 1598 * Check if the link (and TRILL sender, if any) being used is 1599 * among the ones registered for this address. If so, then 1600 * this is information that we already know. 1601 */ 1602 if (bfp->bf_trill_nick == ingress_nick) { 1603 for (i = 0; i < bfp->bf_nlinks; i++) { 1604 if (bfp->bf_links[i] == blp) { 1605 bfp->bf_lastheard = ddi_get_lbolt(); 1606 fwd_unref(bfp); 1607 return; 1608 } 1609 } 1610 } 1611 } 1612 1613 /* 1614 * Note that we intentionally "unlearn" things that appear to be under 1615 * attack on this link. The forwarding cache is a negative thing for 1616 * security -- it disables reachability as a performance optimization 1617 * -- so leaving out entries optimizes for success and defends against 1618 * the attack. Thus, the bare increment without a check in the delete 1619 * code above is right. (And it's ok if we skid over the limit a 1620 * little, so there's no syncronization needed on the test.) 1621 */ 1622 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) { 1623 if (bfp != NULL) { 1624 if (bfp->bf_vcnt == 0) 1625 fwd_delete(bfp); 1626 fwd_unref(bfp); 1627 } 1628 return; 1629 } 1630 1631 atomic_inc_uint(&blp->bl_learns); 1632 1633 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) { 1634 if (bfp != NULL) 1635 fwd_unref(bfp); 1636 return; 1637 } 1638 KIINCR(bki_count); 1639 1640 if (bfp != NULL) { 1641 /* 1642 * If this is a new destination for the same VLAN, then delete 1643 * so that we can update. If it's a different VLAN, then we're 1644 * not going to delete the original. Split off instead into an 1645 * IVL entry. 1646 */ 1647 if (bfp->bf_vlanid == vlanid) { 1648 /* save the count of IVL duplicates */ 1649 bfpnew->bf_vcnt = bfp->bf_vcnt; 1650 1651 /* entry deletes count as learning events */ 1652 atomic_inc_uint(&blp->bl_learns); 1653 1654 /* destroy and create anew; node moved */ 1655 fwd_delete(bfp); 1656 replaced = B_TRUE; 1657 KIINCR(bki_moved); 1658 } else { 1659 bfp->bf_vcnt++; 1660 bfpnew->bf_flags |= BFF_VLANLOCAL; 1661 } 1662 fwd_unref(bfp); 1663 } 1664 bfpnew->bf_links[0] = blp; 1665 bfpnew->bf_nlinks = 1; 1666 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1667 if (!fwd_insert(bip, bfpnew)) 1668 fwd_free(bfpnew); 1669 else if (!replaced) 1670 KIINCR(bki_source); 1671 } 1672 1673 /* 1674 * Process the VLAN headers for output on a given link. There are several 1675 * cases (noting that we don't map VLANs): 1676 * 1. The input packet is good as it is; either 1677 * a. It has no tag, and output has same PVID 1678 * b. It has a non-zero priority-only tag for PVID, and b_band is same 1679 * c. It has a tag with VLAN different from PVID, and b_band is same 1680 * 2. The tag must change: non-zero b_band is different from tag priority 1681 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero) 1682 * 4. The packet has no tag and needs one: 1683 * a. VLAN ID same as PVID, but b_band is non-zero 1684 * b. VLAN ID different from PVID 1685 * We exclude case 1 first, then modify the packet. Note that output packets 1686 * get a priority set by the mblk, not by the header, because QoS in bridging 1687 * requires priority recalculation at each node. 1688 * 1689 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present. 1690 */ 1691 static mblk_t * 1692 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid) 1693 { 1694 boolean_t source_has_tag = (tci != 0xFFFF); 1695 mblk_t *mpcopy; 1696 size_t mlen, minlen; 1697 struct ether_vlan_header *evh; 1698 int pri; 1699 1700 /* This helps centralize error handling in the caller. */ 1701 if (mp == NULL) 1702 return (mp); 1703 1704 /* No forwarded packet can have hardware checksum enabled */ 1705 DB_CKSUMFLAGS(mp) = 0; 1706 1707 /* Get the no-modification cases out of the way first */ 1708 if (!source_has_tag && vlanid == pvid) /* 1a */ 1709 return (mp); 1710 1711 pri = VLAN_PRI(tci); 1712 if (source_has_tag && mp->b_band == pri) { 1713 if (vlanid != pvid) /* 1c */ 1714 return (mp); 1715 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */ 1716 return (mp); 1717 } 1718 1719 /* 1720 * We now know that we must modify the packet. Prepare for that. Note 1721 * that if a tag is present, the caller has already done a pullup for 1722 * the VLAN header, so we're good to go. 1723 */ 1724 if (MBLKL(mp) < sizeof (struct ether_header)) { 1725 mpcopy = msgpullup(mp, sizeof (struct ether_header)); 1726 if (mpcopy == NULL) { 1727 freemsg(mp); 1728 return (NULL); 1729 } 1730 mp = mpcopy; 1731 } 1732 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) || 1733 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) { 1734 minlen = mlen = MBLKL(mp); 1735 if (!source_has_tag) 1736 minlen += VLAN_INCR; 1737 ASSERT(minlen >= sizeof (struct ether_vlan_header)); 1738 /* 1739 * We're willing to copy some data to avoid fragmentation, but 1740 * not a lot. 1741 */ 1742 if (minlen > 256) 1743 minlen = sizeof (struct ether_vlan_header); 1744 mpcopy = allocb(minlen, BPRI_MED); 1745 if (mpcopy == NULL) { 1746 freemsg(mp); 1747 return (NULL); 1748 } 1749 if (mlen <= minlen) { 1750 /* We toss the first mblk when we can. */ 1751 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen); 1752 mpcopy->b_wptr += mlen; 1753 mpcopy->b_cont = mp->b_cont; 1754 freeb(mp); 1755 } else { 1756 /* If not, then just copy what we need */ 1757 if (!source_has_tag) 1758 minlen = sizeof (struct ether_header); 1759 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen); 1760 mpcopy->b_wptr += minlen; 1761 mpcopy->b_cont = mp; 1762 mp->b_rptr += minlen; 1763 } 1764 mp = mpcopy; 1765 } 1766 1767 /* LINTED: pointer alignment */ 1768 evh = (struct ether_vlan_header *)mp->b_rptr; 1769 if (source_has_tag) { 1770 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */ 1771 evh->ether_tpid = evh->ether_type; 1772 mlen = MBLKL(mp); 1773 if (mlen > sizeof (struct ether_vlan_header)) 1774 ovbcopy(mp->b_rptr + 1775 sizeof (struct ether_vlan_header), 1776 mp->b_rptr + sizeof (struct ether_header), 1777 mlen - sizeof (struct ether_vlan_header)); 1778 mp->b_wptr -= VLAN_INCR; 1779 } else { /* 2 */ 1780 if (vlanid == pvid) 1781 vlanid = VLAN_ID_NONE; 1782 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1783 evh->ether_tci = htons(tci); 1784 } 1785 } else { 1786 /* case 4: no header present, but one is needed */ 1787 mlen = MBLKL(mp); 1788 if (mlen > sizeof (struct ether_header)) 1789 ovbcopy(mp->b_rptr + sizeof (struct ether_header), 1790 mp->b_rptr + sizeof (struct ether_vlan_header), 1791 mlen - sizeof (struct ether_header)); 1792 mp->b_wptr += VLAN_INCR; 1793 ASSERT(mp->b_wptr <= DB_LIM(mp)); 1794 if (vlanid == pvid) 1795 vlanid = VLAN_ID_NONE; 1796 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1797 evh->ether_type = evh->ether_tpid; 1798 evh->ether_tpid = htons(ETHERTYPE_VLAN); 1799 evh->ether_tci = htons(tci); 1800 } 1801 return (mp); 1802 } 1803 1804 /* Record VLAN information and strip header if requested . */ 1805 static void 1806 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr) 1807 { 1808 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 1809 struct ether_vlan_header *evhp; 1810 uint16_t ether_type; 1811 1812 /* LINTED: alignment */ 1813 evhp = (struct ether_vlan_header *)mp->b_rptr; 1814 hdr_info->mhi_istagged = B_TRUE; 1815 hdr_info->mhi_tci = ntohs(evhp->ether_tci); 1816 if (striphdr) { 1817 /* 1818 * For VLAN tagged frames update the ether_type 1819 * in hdr_info before stripping the header. 1820 */ 1821 ether_type = ntohs(evhp->ether_type); 1822 hdr_info->mhi_origsap = ether_type; 1823 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ? 1824 ether_type : DLS_SAP_LLC; 1825 mp->b_rptr = (uchar_t *)(evhp + 1); 1826 } 1827 } else { 1828 hdr_info->mhi_istagged = B_FALSE; 1829 hdr_info->mhi_tci = VLAN_ID_NONE; 1830 if (striphdr) 1831 mp->b_rptr += sizeof (struct ether_header); 1832 } 1833 } 1834 1835 /* 1836 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID. 1837 */ 1838 static boolean_t 1839 bridge_can_send(bridge_link_t *blp, uint16_t vlanid) 1840 { 1841 ASSERT(vlanid != VLAN_ID_NONE); 1842 if (blp->bl_flags & BLF_DELETED) 1843 return (B_FALSE); 1844 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING) 1845 return (B_FALSE); 1846 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid)); 1847 } 1848 1849 /* 1850 * This function scans the bridge forwarding tables in order to forward a given 1851 * packet. If the packet either doesn't need forwarding (the current link is 1852 * correct) or the current link needs a copy as well, then the packet is 1853 * returned to the caller. 1854 * 1855 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a 1856 * TRILL tunnel. If the destination points there, then drop instead. 1857 */ 1858 static mblk_t * 1859 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 1860 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit) 1861 { 1862 mblk_t *mpsend, *mpcopy; 1863 bridge_inst_t *bip = blp->bl_inst; 1864 bridge_link_t *blpsend, *blpnext; 1865 bridge_fwd_t *bfp; 1866 uint_t i; 1867 boolean_t selfseen = B_FALSE; 1868 void *tdp; 1869 const uint8_t *daddr = hdr_info->mhi_daddr; 1870 1871 /* 1872 * Check for the IEEE "reserved" multicast addresses. Messages sent to 1873 * these addresses are used for link-local control (STP and pause), and 1874 * are never forwarded or redirected. 1875 */ 1876 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 && 1877 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) { 1878 if (from_trill) { 1879 freemsg(mp); 1880 mp = NULL; 1881 } 1882 return (mp); 1883 } 1884 1885 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) { 1886 1887 /* 1888 * If trill indicates a destination for this node, then it's 1889 * clearly not intended for local delivery. We must tell TRILL 1890 * to encapsulate, as long as we didn't just decapsulate it. 1891 */ 1892 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) { 1893 /* 1894 * Error case: can't reencapsulate if the protocols are 1895 * working correctly. 1896 */ 1897 if (from_trill) { 1898 freemsg(mp); 1899 return (NULL); 1900 } 1901 mutex_enter(&blp->bl_trilllock); 1902 if ((tdp = blp->bl_trilldata) != NULL) { 1903 blp->bl_trillthreads++; 1904 mutex_exit(&blp->bl_trilllock); 1905 update_header(mp, hdr_info, B_FALSE); 1906 if (is_xmit) 1907 mp = mac_fix_cksum(mp); 1908 /* all trill data frames have Inner.VLAN */ 1909 mp = reform_vlan_header(mp, vlanid, tci, 0); 1910 if (mp == NULL) { 1911 KIINCR(bki_drops); 1912 fwd_unref(bfp); 1913 return (NULL); 1914 } 1915 trill_encap_fn(tdp, blp, hdr_info, mp, 1916 bfp->bf_trill_nick); 1917 mutex_enter(&blp->bl_trilllock); 1918 if (--blp->bl_trillthreads == 0 && 1919 blp->bl_trilldata == NULL) 1920 cv_broadcast(&blp->bl_trillwait); 1921 } 1922 mutex_exit(&blp->bl_trilllock); 1923 1924 /* if TRILL has been disabled, then kill this stray */ 1925 if (tdp == NULL) { 1926 freemsg(mp); 1927 fwd_delete(bfp); 1928 } 1929 fwd_unref(bfp); 1930 return (NULL); 1931 } 1932 1933 /* find first link we can send on */ 1934 for (i = 0; i < bfp->bf_nlinks; i++) { 1935 blpsend = bfp->bf_links[i]; 1936 if (blpsend == blp) 1937 selfseen = B_TRUE; 1938 else if (bridge_can_send(blpsend, vlanid)) 1939 break; 1940 } 1941 1942 while (i < bfp->bf_nlinks) { 1943 blpsend = bfp->bf_links[i]; 1944 for (i++; i < bfp->bf_nlinks; i++) { 1945 blpnext = bfp->bf_links[i]; 1946 if (blpnext == blp) 1947 selfseen = B_TRUE; 1948 else if (bridge_can_send(blpnext, vlanid)) 1949 break; 1950 } 1951 if (i == bfp->bf_nlinks && !selfseen) { 1952 mpsend = mp; 1953 mp = NULL; 1954 } else { 1955 mpsend = copymsg(mp); 1956 } 1957 1958 if (!from_trill && is_xmit) 1959 mpsend = mac_fix_cksum(mpsend); 1960 1961 mpsend = reform_vlan_header(mpsend, vlanid, tci, 1962 blpsend->bl_pvid); 1963 if (mpsend == NULL) { 1964 KIINCR(bki_drops); 1965 continue; 1966 } 1967 1968 KIINCR(bki_forwards); 1969 /* 1970 * No need to bump up the link reference count, as 1971 * the forwarding entry itself holds a reference to 1972 * the link. 1973 */ 1974 if (bfp->bf_flags & BFF_LOCALADDR) { 1975 mac_rx_common(blpsend->bl_mh, NULL, mpsend); 1976 } else { 1977 KLPINCR(blpsend, bkl_xmit); 1978 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, 1979 mpsend); 1980 freemsg(mpsend); 1981 } 1982 } 1983 /* 1984 * Handle a special case: if we're transmitting to the original 1985 * link, then check whether the localaddr flag is set. If it 1986 * is, then receive instead. This doesn't happen with ordinary 1987 * bridging, but does happen often with TRILL decapsulation. 1988 */ 1989 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) { 1990 mac_rx_common(blp->bl_mh, NULL, mp); 1991 mp = NULL; 1992 } 1993 fwd_unref(bfp); 1994 } else { 1995 /* 1996 * TRILL has two cases to handle. If the packet is off the 1997 * wire (not from TRILL), then we need to send up into the 1998 * TRILL module to have the distribution tree computed. If the 1999 * packet is from TRILL (decapsulated), then we're part of the 2000 * distribution tree, and we need to copy the packet on member 2001 * interfaces. 2002 * 2003 * Thus, the from TRILL case is identical to the STP case. 2004 */ 2005 if (!from_trill && blp->bl_trilldata != NULL) { 2006 mutex_enter(&blp->bl_trilllock); 2007 if ((tdp = blp->bl_trilldata) != NULL) { 2008 blp->bl_trillthreads++; 2009 mutex_exit(&blp->bl_trilllock); 2010 if ((mpsend = copymsg(mp)) != NULL) { 2011 update_header(mpsend, 2012 hdr_info, B_FALSE); 2013 /* 2014 * all trill data frames have 2015 * Inner.VLAN 2016 */ 2017 mpsend = reform_vlan_header(mpsend, 2018 vlanid, tci, 0); 2019 if (mpsend == NULL) { 2020 KIINCR(bki_drops); 2021 } else { 2022 trill_encap_fn(tdp, blp, 2023 hdr_info, mpsend, 2024 RBRIDGE_NICKNAME_NONE); 2025 } 2026 } 2027 mutex_enter(&blp->bl_trilllock); 2028 if (--blp->bl_trillthreads == 0 && 2029 blp->bl_trilldata == NULL) 2030 cv_broadcast(&blp->bl_trillwait); 2031 } 2032 mutex_exit(&blp->bl_trilllock); 2033 } 2034 2035 /* 2036 * This is an unknown destination, so flood. 2037 */ 2038 rw_enter(&bip->bi_rwlock, RW_READER); 2039 for (blpnext = list_head(&bip->bi_links); blpnext != NULL; 2040 blpnext = list_next(&bip->bi_links, blpnext)) { 2041 if (blpnext == blp) 2042 selfseen = B_TRUE; 2043 else if (bridge_can_send(blpnext, vlanid)) 2044 break; 2045 } 2046 if (blpnext != NULL) 2047 atomic_inc_uint(&blpnext->bl_refs); 2048 rw_exit(&bip->bi_rwlock); 2049 while ((blpsend = blpnext) != NULL) { 2050 rw_enter(&bip->bi_rwlock, RW_READER); 2051 for (blpnext = list_next(&bip->bi_links, blpsend); 2052 blpnext != NULL; 2053 blpnext = list_next(&bip->bi_links, blpnext)) { 2054 if (blpnext == blp) 2055 selfseen = B_TRUE; 2056 else if (bridge_can_send(blpnext, vlanid)) 2057 break; 2058 } 2059 if (blpnext != NULL) 2060 atomic_inc_uint(&blpnext->bl_refs); 2061 rw_exit(&bip->bi_rwlock); 2062 if (blpnext == NULL && !selfseen) { 2063 mpsend = mp; 2064 mp = NULL; 2065 } else { 2066 mpsend = copymsg(mp); 2067 } 2068 2069 if (!from_trill && is_xmit) 2070 mpsend = mac_fix_cksum(mpsend); 2071 2072 mpsend = reform_vlan_header(mpsend, vlanid, tci, 2073 blpsend->bl_pvid); 2074 if (mpsend == NULL) { 2075 KIINCR(bki_drops); 2076 continue; 2077 } 2078 2079 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST) 2080 KIINCR(bki_unknown); 2081 else 2082 KIINCR(bki_mbcast); 2083 KLPINCR(blpsend, bkl_xmit); 2084 if ((mpcopy = copymsg(mpsend)) != NULL) 2085 mac_rx_common(blpsend->bl_mh, NULL, mpcopy); 2086 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend); 2087 freemsg(mpsend); 2088 link_unref(blpsend); 2089 } 2090 } 2091 2092 /* 2093 * At this point, if np is non-NULL, it means that the caller needs to 2094 * continue on the selected link. 2095 */ 2096 return (mp); 2097 } 2098 2099 /* 2100 * Extract and validate the VLAN information for a given packet. This checks 2101 * conformance with the rules for use of the PVID on the link, and for the 2102 * allowed (configured) VLAN set. 2103 * 2104 * Returns B_TRUE if the packet passes, B_FALSE if it fails. 2105 */ 2106 static boolean_t 2107 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 2108 uint16_t *vlanidp, uint16_t *tcip) 2109 { 2110 uint16_t tci, vlanid; 2111 2112 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 2113 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci); 2114 ptrdiff_t mlen; 2115 2116 /* 2117 * Extract the VLAN ID information, regardless of alignment, 2118 * and without a pullup. This isn't attractive, but we do this 2119 * to avoid having to deal with the pointers stashed in 2120 * hdr_info moving around or having the caller deal with a new 2121 * mblk_t pointer. 2122 */ 2123 while (mp != NULL) { 2124 mlen = MBLKL(mp); 2125 if (mlen > tpos && mlen > 0) 2126 break; 2127 tpos -= mlen; 2128 mp = mp->b_cont; 2129 } 2130 if (mp == NULL) 2131 return (B_FALSE); 2132 tci = mp->b_rptr[tpos] << 8; 2133 if (++tpos >= mlen) { 2134 do { 2135 mp = mp->b_cont; 2136 } while (mp != NULL && MBLKL(mp) == 0); 2137 if (mp == NULL) 2138 return (B_FALSE); 2139 tpos = 0; 2140 } 2141 tci |= mp->b_rptr[tpos]; 2142 2143 vlanid = VLAN_ID(tci); 2144 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX) 2145 return (B_FALSE); 2146 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid) 2147 goto input_no_vlan; 2148 if (!BRIDGE_VLAN_ISSET(blp, vlanid)) 2149 return (B_FALSE); 2150 } else { 2151 tci = 0xFFFF; 2152 input_no_vlan: 2153 /* 2154 * If PVID is set to zero, then untagged traffic is not 2155 * supported here. Do not learn or forward. 2156 */ 2157 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE) 2158 return (B_FALSE); 2159 } 2160 2161 *tcip = tci; 2162 *vlanidp = vlanid; 2163 return (B_TRUE); 2164 } 2165 2166 /* 2167 * Handle MAC notifications. 2168 */ 2169 static void 2170 bridge_notify_cb(void *arg, mac_notify_type_t note_type) 2171 { 2172 bridge_link_t *blp = arg; 2173 2174 switch (note_type) { 2175 case MAC_NOTE_UNICST: 2176 bridge_new_unicst(blp); 2177 break; 2178 2179 case MAC_NOTE_SDU_SIZE: { 2180 uint_t maxsdu; 2181 bridge_inst_t *bip = blp->bl_inst; 2182 bridge_mac_t *bmp = bip->bi_mac; 2183 boolean_t notify = B_FALSE; 2184 mblk_t *mlist = NULL; 2185 2186 mac_sdu_get(blp->bl_mh, NULL, &maxsdu); 2187 rw_enter(&bip->bi_rwlock, RW_READER); 2188 if (list_prev(&bip->bi_links, blp) == NULL && 2189 list_next(&bip->bi_links, blp) == NULL) { 2190 notify = (maxsdu != bmp->bm_maxsdu); 2191 bmp->bm_maxsdu = maxsdu; 2192 } 2193 blp->bl_maxsdu = maxsdu; 2194 if (maxsdu != bmp->bm_maxsdu) 2195 link_sdu_fail(blp, B_TRUE, &mlist); 2196 else if (notify) 2197 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2198 rw_exit(&bip->bi_rwlock); 2199 send_up_messages(bip, mlist); 2200 break; 2201 } 2202 } 2203 } 2204 2205 /* 2206 * This is called by the MAC layer. As with the transmit side, we're right in 2207 * the data path for all I/O on this port, so if we don't need to forward this 2208 * packet anywhere, we have to send it upwards via mac_rx_common. 2209 */ 2210 static void 2211 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext) 2212 { 2213 mblk_t *mp, *mpcopy; 2214 bridge_link_t *blp = (bridge_link_t *)mh; 2215 bridge_inst_t *bip = blp->bl_inst; 2216 bridge_mac_t *bmp = bip->bi_mac; 2217 mac_header_info_t hdr_info; 2218 uint16_t vlanid, tci; 2219 boolean_t trillmode = B_FALSE; 2220 2221 KIINCR(bki_recv); 2222 KLINCR(bkl_recv); 2223 2224 /* 2225 * Regardless of state, check for inbound TRILL packets when TRILL is 2226 * active. These are pulled out of band and sent for TRILL handling. 2227 */ 2228 if (blp->bl_trilldata != NULL) { 2229 void *tdp; 2230 mblk_t *newhead; 2231 mblk_t *tail = NULL; 2232 2233 mutex_enter(&blp->bl_trilllock); 2234 if ((tdp = blp->bl_trilldata) != NULL) { 2235 blp->bl_trillthreads++; 2236 mutex_exit(&blp->bl_trilllock); 2237 trillmode = B_TRUE; 2238 newhead = mpnext; 2239 while ((mp = mpnext) != NULL) { 2240 boolean_t raw_isis, bridge_group; 2241 2242 mpnext = mp->b_next; 2243 2244 /* 2245 * If the header isn't readable, then leave on 2246 * the list and continue. 2247 */ 2248 if (mac_header_info(blp->bl_mh, mp, 2249 &hdr_info) != 0) { 2250 tail = mp; 2251 continue; 2252 } 2253 2254 /* 2255 * The TRILL document specifies that, on 2256 * Ethernet alone, IS-IS packets arrive with 2257 * LLC rather than Ethertype, and using a 2258 * specific destination address. We must check 2259 * for that here. Also, we need to give BPDUs 2260 * to TRILL for processing. 2261 */ 2262 raw_isis = bridge_group = B_FALSE; 2263 if (hdr_info.mhi_dsttype == 2264 MAC_ADDRTYPE_MULTICAST) { 2265 if (memcmp(hdr_info.mhi_daddr, 2266 all_isis_rbridges, ETHERADDRL) == 0) 2267 raw_isis = B_TRUE; 2268 else if (memcmp(hdr_info.mhi_daddr, 2269 bridge_group_address, ETHERADDRL) == 2270 0) 2271 bridge_group = B_TRUE; 2272 } 2273 if (!raw_isis && !bridge_group && 2274 hdr_info.mhi_bindsap != ETHERTYPE_TRILL && 2275 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN || 2276 /* LINTED: alignment */ 2277 ((struct ether_vlan_header *)mp->b_rptr)-> 2278 ether_type != htons(ETHERTYPE_TRILL))) { 2279 tail = mp; 2280 continue; 2281 } 2282 2283 /* 2284 * We've got TRILL input. Remove from the list 2285 * and send up through the TRILL module. (Send 2286 * a copy through promiscuous receive just to 2287 * support snooping on TRILL. Order isn't 2288 * preserved strictly, but that doesn't matter 2289 * here.) 2290 */ 2291 if (tail != NULL) 2292 tail->b_next = mpnext; 2293 mp->b_next = NULL; 2294 if (mp == newhead) 2295 newhead = mpnext; 2296 mac_trill_snoop(blp->bl_mh, mp); 2297 update_header(mp, &hdr_info, B_TRUE); 2298 /* 2299 * On raw IS-IS and BPDU frames, we have to 2300 * make sure that the length is trimmed 2301 * properly. We use origsap in order to cope 2302 * with jumbograms for IS-IS. (Regular mac 2303 * can't.) 2304 */ 2305 if (raw_isis || bridge_group) { 2306 size_t msglen = msgdsize(mp); 2307 2308 if (msglen > hdr_info.mhi_origsap) { 2309 (void) adjmsg(mp, 2310 hdr_info.mhi_origsap - 2311 msglen); 2312 } else if (msglen < 2313 hdr_info.mhi_origsap) { 2314 freemsg(mp); 2315 continue; 2316 } 2317 } 2318 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info); 2319 } 2320 mpnext = newhead; 2321 mutex_enter(&blp->bl_trilllock); 2322 if (--blp->bl_trillthreads == 0 && 2323 blp->bl_trilldata == NULL) 2324 cv_broadcast(&blp->bl_trillwait); 2325 } 2326 mutex_exit(&blp->bl_trilllock); 2327 if (mpnext == NULL) 2328 return; 2329 } 2330 2331 /* 2332 * If this is a TRILL RBridge, then just check whether this link is 2333 * used at all for forwarding. If not, then we're done. 2334 */ 2335 if (trillmode) { 2336 if (!(blp->bl_flags & BLF_TRILLACTIVE) || 2337 (blp->bl_flags & BLF_SDUFAIL)) { 2338 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2339 return; 2340 } 2341 } else { 2342 /* 2343 * For regular (STP) bridges, if we're in blocking or listening 2344 * state, then do nothing. We don't learn or forward until 2345 * told to do so. 2346 */ 2347 if (blp->bl_state == BLS_BLOCKLISTEN) { 2348 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2349 return; 2350 } 2351 } 2352 2353 /* 2354 * Send a copy of the message chain up to the observability node users. 2355 * For TRILL, we must obey the VLAN AF rules, so we go packet-by- 2356 * packet. 2357 */ 2358 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2359 (bmp->bm_flags & BMF_STARTED) && 2360 (mp = copymsgchain(mpnext)) != NULL) { 2361 mac_rx(bmp->bm_mh, NULL, mp); 2362 } 2363 2364 /* 2365 * We must be in learning or forwarding state, or using TRILL on a link 2366 * with one or more VLANs active. For each packet in the list, process 2367 * the source address, and then attempt to forward. 2368 */ 2369 while ((mp = mpnext) != NULL) { 2370 mpnext = mp->b_next; 2371 mp->b_next = NULL; 2372 2373 /* 2374 * If we can't decode the header or if the header specifies a 2375 * multicast source address (impossible!), then don't bother 2376 * learning or forwarding, but go ahead and forward up the 2377 * stack for subsequent processing. 2378 */ 2379 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 || 2380 (hdr_info.mhi_saddr[0] & 1) != 0) { 2381 KIINCR(bki_drops); 2382 KLINCR(bkl_drops); 2383 mac_rx_common(blp->bl_mh, rsrc, mp); 2384 continue; 2385 } 2386 2387 /* 2388 * Extract and validate the VLAN ID for this packet. 2389 */ 2390 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2391 !BRIDGE_AF_ISSET(blp, vlanid)) { 2392 mac_rx_common(blp->bl_mh, rsrc, mp); 2393 continue; 2394 } 2395 2396 if (trillmode) { 2397 /* 2398 * Special test required by TRILL document: must 2399 * discard frames with outer address set to ESADI. 2400 */ 2401 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges, 2402 ETHERADDRL) == 0) { 2403 mac_rx_common(blp->bl_mh, rsrc, mp); 2404 continue; 2405 } 2406 2407 /* 2408 * If we're in TRILL mode, then the call above to get 2409 * the VLAN ID has also checked that we're the 2410 * appointed forwarder, so report that we're handling 2411 * this packet to any observability node users. 2412 */ 2413 if ((bmp->bm_flags & BMF_STARTED) && 2414 (mpcopy = copymsg(mp)) != NULL) 2415 mac_rx(bmp->bm_mh, NULL, mpcopy); 2416 } 2417 2418 /* 2419 * First process the source address and learn from it. For 2420 * TRILL, we learn only if we're the appointed forwarder. 2421 */ 2422 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2423 vlanid); 2424 2425 /* 2426 * Now check whether we're forwarding and look up the 2427 * destination. If we can forward, do so. 2428 */ 2429 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2430 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2431 B_FALSE, B_FALSE); 2432 } 2433 if (mp != NULL) 2434 mac_rx_common(blp->bl_mh, rsrc, mp); 2435 } 2436 } 2437 2438 2439 /* ARGSUSED */ 2440 static mblk_t * 2441 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) 2442 { 2443 bridge_link_t *blp = (bridge_link_t *)mh; 2444 bridge_inst_t *bip = blp->bl_inst; 2445 bridge_mac_t *bmp = bip->bi_mac; 2446 mac_header_info_t hdr_info; 2447 uint16_t vlanid, tci; 2448 mblk_t *mp, *mpcopy; 2449 boolean_t trillmode; 2450 2451 trillmode = blp->bl_trilldata != NULL; 2452 2453 /* 2454 * If we're using STP and we're in blocking or listening state, or if 2455 * we're using TRILL and no VLANs are active, then behave as though the 2456 * bridge isn't here at all, and send on the local link alone. 2457 */ 2458 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) || 2459 (trillmode && 2460 (!(blp->bl_flags & BLF_TRILLACTIVE) || 2461 (blp->bl_flags & BLF_SDUFAIL)))) { 2462 KIINCR(bki_sent); 2463 KLINCR(bkl_xmit); 2464 MAC_RING_TX(blp->bl_mh, rh, mpnext, mp); 2465 return (mp); 2466 } 2467 2468 /* 2469 * Send a copy of the message up to the observability node users. 2470 * TRILL needs to check on a packet-by-packet basis. 2471 */ 2472 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2473 (bmp->bm_flags & BMF_STARTED) && 2474 (mp = copymsgchain(mpnext)) != NULL) { 2475 mac_rx(bmp->bm_mh, NULL, mp); 2476 } 2477 2478 while ((mp = mpnext) != NULL) { 2479 mpnext = mp->b_next; 2480 mp->b_next = NULL; 2481 2482 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2483 freemsg(mp); 2484 continue; 2485 } 2486 2487 /* 2488 * Extract and validate the VLAN ID for this packet. 2489 */ 2490 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2491 !BRIDGE_AF_ISSET(blp, vlanid)) { 2492 freemsg(mp); 2493 continue; 2494 } 2495 2496 /* 2497 * If we're using TRILL, then we've now validated that we're 2498 * the forwarder for this VLAN, so go ahead and let 2499 * observability node users know about the packet. 2500 */ 2501 if (trillmode && (bmp->bm_flags & BMF_STARTED) && 2502 (mpcopy = copymsg(mp)) != NULL) { 2503 mac_rx(bmp->bm_mh, NULL, mpcopy); 2504 } 2505 2506 /* 2507 * We have to learn from our own transmitted packets, because 2508 * there may be a Solaris DLPI raw sender (who can specify his 2509 * own source address) using promiscuous mode for receive. The 2510 * mac layer information won't (and can't) tell us everything 2511 * we need to know. 2512 */ 2513 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2514 vlanid); 2515 2516 /* attempt forwarding */ 2517 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2518 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2519 B_FALSE, B_TRUE); 2520 } 2521 if (mp != NULL) { 2522 MAC_RING_TX(blp->bl_mh, rh, mp, mp); 2523 if (mp == NULL) { 2524 KIINCR(bki_sent); 2525 KLINCR(bkl_xmit); 2526 } 2527 } 2528 /* 2529 * If we get stuck, then stop. Don't let the user's output 2530 * packets get out of order. (More importantly: don't try to 2531 * bridge the same packet multiple times if flow control is 2532 * asserted.) 2533 */ 2534 if (mp != NULL) { 2535 mp->b_next = mpnext; 2536 break; 2537 } 2538 } 2539 return (mp); 2540 } 2541 2542 /* 2543 * This is called by TRILL when it decapsulates an packet, and we must forward 2544 * locally. On failure, we just drop. 2545 * 2546 * Note that the ingress_nick reported by TRILL must not represent this local 2547 * node. 2548 */ 2549 void 2550 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick) 2551 { 2552 mac_header_info_t hdr_info; 2553 uint16_t vlanid, tci; 2554 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2555 mblk_t *mpcopy; 2556 2557 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2558 freemsg(mp); 2559 return; 2560 } 2561 2562 /* Extract VLAN ID for this packet. */ 2563 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) { 2564 struct ether_vlan_header *evhp; 2565 2566 /* LINTED: alignment */ 2567 evhp = (struct ether_vlan_header *)mp->b_rptr; 2568 tci = ntohs(evhp->ether_tci); 2569 vlanid = VLAN_ID(tci); 2570 } else { 2571 /* Inner VLAN headers are required in TRILL data packets */ 2572 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *, 2573 blp, mblk_t *, mp, uint16_t, ingress_nick); 2574 freemsg(mp); 2575 return; 2576 } 2577 2578 /* Learn the location of this sender in the RBridge network */ 2579 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid); 2580 2581 /* attempt forwarding */ 2582 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE); 2583 if (mp != NULL) { 2584 if (bridge_can_send(blp, vlanid)) { 2585 /* Deliver a copy locally as well */ 2586 if ((mpcopy = copymsg(mp)) != NULL) 2587 mac_rx_common(blp->bl_mh, NULL, mpcopy); 2588 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2589 } 2590 if (mp == NULL) { 2591 KIINCR(bki_sent); 2592 KLINCR(bkl_xmit); 2593 } else { 2594 freemsg(mp); 2595 } 2596 } 2597 } 2598 2599 /* 2600 * This function is used by TRILL _only_ to transmit TRILL-encapsulated 2601 * packets. It sends on a single underlying link and does not bridge. 2602 */ 2603 mblk_t * 2604 bridge_trill_output(bridge_link_t *blp, mblk_t *mp) 2605 { 2606 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2607 2608 mac_trill_snoop(blp->bl_mh, mp); 2609 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2610 if (mp == NULL) { 2611 KIINCR(bki_sent); 2612 KLINCR(bkl_xmit); 2613 } 2614 return (mp); 2615 } 2616 2617 /* 2618 * Set the "appointed forwarder" flag array for this link. TRILL controls 2619 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for 2620 * the forwarder. 2621 */ 2622 void 2623 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr) 2624 { 2625 int i; 2626 uint_t newflags = 0; 2627 2628 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) { 2629 if ((blp->bl_afs[i] = arr[i]) != 0) 2630 newflags = BLF_TRILLACTIVE; 2631 } 2632 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags; 2633 } 2634 2635 void 2636 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill) 2637 { 2638 bridge_inst_t *bip = blp->bl_inst; 2639 bridge_fwd_t *bfp, *bfnext; 2640 avl_tree_t fwd_scavenge; 2641 int i; 2642 2643 _NOTE(ARGUNUSED(vlan)); 2644 2645 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 2646 offsetof(bridge_fwd_t, bf_node)); 2647 rw_enter(&bip->bi_rwlock, RW_WRITER); 2648 bfnext = avl_first(&bip->bi_fwd); 2649 while ((bfp = bfnext) != NULL) { 2650 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 2651 if (bfp->bf_flags & BFF_LOCALADDR) 2652 continue; 2653 if (dotrill) { 2654 /* port doesn't matter if we're flushing TRILL */ 2655 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE) 2656 continue; 2657 } else { 2658 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) 2659 continue; 2660 for (i = 0; i < bfp->bf_nlinks; i++) { 2661 if (bfp->bf_links[i] == blp) 2662 break; 2663 } 2664 if (i >= bfp->bf_nlinks) 2665 continue; 2666 } 2667 ASSERT(bfp->bf_flags & BFF_INTREE); 2668 avl_remove(&bip->bi_fwd, bfp); 2669 bfp->bf_flags &= ~BFF_INTREE; 2670 avl_add(&fwd_scavenge, bfp); 2671 } 2672 rw_exit(&bip->bi_rwlock); 2673 bfnext = avl_first(&fwd_scavenge); 2674 while ((bfp = bfnext) != NULL) { 2675 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 2676 avl_remove(&fwd_scavenge, bfp); 2677 fwd_unref(bfp); 2678 } 2679 avl_destroy(&fwd_scavenge); 2680 } 2681 2682 /* 2683 * Let the mac module take or drop a reference to a bridge link. When this is 2684 * called, the mac module is holding the mi_bridge_lock, so the link cannot be 2685 * in the process of entering or leaving a bridge. 2686 */ 2687 static void 2688 bridge_ref_cb(mac_handle_t mh, boolean_t hold) 2689 { 2690 bridge_link_t *blp = (bridge_link_t *)mh; 2691 2692 if (hold) 2693 atomic_inc_uint(&blp->bl_refs); 2694 else 2695 link_unref(blp); 2696 } 2697 2698 /* 2699 * Handle link state changes reported by the mac layer. This acts as a filter 2700 * for link state changes: if a link is reporting down, but there are other 2701 * links still up on the bridge, then the state is changed to "up." When the 2702 * last link goes down, all are marked down, and when the first link goes up, 2703 * all are marked up. (Recursion is avoided by the use of the "redo" function.) 2704 * 2705 * We treat unknown as equivalent to "up." 2706 */ 2707 static link_state_t 2708 bridge_ls_cb(mac_handle_t mh, link_state_t newls) 2709 { 2710 bridge_link_t *blp = (bridge_link_t *)mh; 2711 bridge_link_t *blcmp; 2712 bridge_inst_t *bip; 2713 bridge_mac_t *bmp; 2714 2715 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN || 2716 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) { 2717 blp->bl_linkstate = newls; 2718 return (newls); 2719 } 2720 2721 /* 2722 * Scan first to see if there are any other non-down links. If there 2723 * are, then we're done. Otherwise, if all others are down, then the 2724 * state of this link is the state of the bridge. 2725 */ 2726 bip = blp->bl_inst; 2727 rw_enter(&bip->bi_rwlock, RW_WRITER); 2728 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2729 blcmp = list_next(&bip->bi_links, blcmp)) { 2730 if (blcmp != blp && 2731 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 2732 blcmp->bl_linkstate != LINK_STATE_DOWN) 2733 break; 2734 } 2735 2736 if (blcmp != NULL) { 2737 /* 2738 * If there are other links that are considered up, then tell 2739 * the caller that the link is actually still up, regardless of 2740 * this link's underlying state. 2741 */ 2742 blp->bl_linkstate = newls; 2743 newls = LINK_STATE_UP; 2744 } else if (blp->bl_linkstate != newls) { 2745 /* 2746 * If we've found no other 'up' links, and this link has 2747 * changed state, then report the new state of the bridge to 2748 * all other clients. 2749 */ 2750 blp->bl_linkstate = newls; 2751 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2752 blcmp = list_next(&bip->bi_links, blcmp)) { 2753 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED)) 2754 mac_link_redo(blcmp->bl_mh, newls); 2755 } 2756 bmp = bip->bi_mac; 2757 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN) 2758 bmp->bm_linkstate = LINK_STATE_UP; 2759 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 2760 } 2761 rw_exit(&bip->bi_rwlock); 2762 return (newls); 2763 } 2764 2765 static void 2766 bridge_add_link(void *arg) 2767 { 2768 mblk_t *mp = arg; 2769 bridge_stream_t *bsp; 2770 bridge_inst_t *bip, *bipt; 2771 bridge_mac_t *bmp; 2772 datalink_id_t linkid; 2773 int err; 2774 mac_handle_t mh; 2775 uint_t maxsdu; 2776 bridge_link_t *blp = NULL, *blpt; 2777 const mac_info_t *mip; 2778 boolean_t macopen = B_FALSE; 2779 char linkname[MAXLINKNAMELEN]; 2780 char kstatname[KSTAT_STRLEN]; 2781 int i; 2782 link_state_t linkstate; 2783 mblk_t *mlist; 2784 2785 bsp = (bridge_stream_t *)mp->b_next; 2786 mp->b_next = NULL; 2787 bip = bsp->bs_inst; 2788 /* LINTED: alignment */ 2789 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2790 2791 /* 2792 * First make sure that there is no other bridge that has this link. 2793 * We don't want to overlap operations from two bridges; the MAC layer 2794 * supports only one bridge on a given MAC at a time. 2795 * 2796 * We rely on the fact that there's just one taskq thread for the 2797 * bridging module: once we've checked for a duplicate, we can drop the 2798 * lock, because no other thread could possibly be adding another link 2799 * until we're done. 2800 */ 2801 mutex_enter(&inst_lock); 2802 for (bipt = list_head(&inst_list); bipt != NULL; 2803 bipt = list_next(&inst_list, bipt)) { 2804 rw_enter(&bipt->bi_rwlock, RW_READER); 2805 for (blpt = list_head(&bipt->bi_links); blpt != NULL; 2806 blpt = list_next(&bipt->bi_links, blpt)) { 2807 if (linkid == blpt->bl_linkid) 2808 break; 2809 } 2810 rw_exit(&bipt->bi_rwlock); 2811 if (blpt != NULL) 2812 break; 2813 } 2814 mutex_exit(&inst_lock); 2815 if (bipt != NULL) { 2816 err = EBUSY; 2817 goto fail; 2818 } 2819 2820 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 2821 goto fail; 2822 macopen = B_TRUE; 2823 2824 /* we bridge only Ethernet */ 2825 mip = mac_info(mh); 2826 if (mip->mi_media != DL_ETHER) { 2827 err = ENOTSUP; 2828 goto fail; 2829 } 2830 2831 /* 2832 * Get the current maximum SDU on this interface. If there are other 2833 * links on the bridge, then this one must match, or it errors out. 2834 * Otherwise, the first link becomes the standard for the new bridge. 2835 */ 2836 mac_sdu_get(mh, NULL, &maxsdu); 2837 bmp = bip->bi_mac; 2838 if (list_is_empty(&bip->bi_links)) { 2839 bmp->bm_maxsdu = maxsdu; 2840 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2841 } 2842 2843 /* figure the kstat name; also used as the mac client name */ 2844 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t); 2845 if (i < 0 || i >= MAXLINKNAMELEN) 2846 i = MAXLINKNAMELEN - 1; 2847 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i); 2848 linkname[i] = '\0'; 2849 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name, 2850 linkname); 2851 2852 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) { 2853 err = ENOMEM; 2854 goto fail; 2855 } 2856 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 2857 if (blp->bl_lfailmp == NULL) { 2858 kmem_free(blp, sizeof (*blp)); 2859 err = ENOMEM; 2860 goto fail; 2861 } 2862 2863 atomic_inc_uint(&bip->bi_refs); 2864 blp->bl_inst = bip; 2865 blp->bl_mh = mh; 2866 blp->bl_linkid = linkid; 2867 blp->bl_maxsdu = maxsdu; 2868 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL); 2869 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL); 2870 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 2871 2872 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0); 2873 if (err != 0) 2874 goto fail; 2875 blp->bl_flags |= BLF_CLIENT_OPEN; 2876 2877 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE); 2878 if (err != 0) 2879 goto fail; 2880 blp->bl_flags |= BLF_MARGIN_ADDED; 2881 2882 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp); 2883 2884 err = mac_bridge_set(mh, (mac_handle_t)blp); 2885 if (err != 0) 2886 goto fail; 2887 blp->bl_flags |= BLF_SET_BRIDGE; 2888 2889 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL, 2890 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); 2891 if (err != 0) 2892 goto fail; 2893 blp->bl_flags |= BLF_PROM_ADDED; 2894 2895 bridge_new_unicst(blp); 2896 2897 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats, 2898 link_kstats_list, Dim(link_kstats_list), kstatname); 2899 2900 /* 2901 * The link holds a reference to the bridge instance, so that the 2902 * instance can't go away before the link is freed. The insertion into 2903 * bi_links holds a reference on the link. When marking as removed 2904 * from bi_links (BLF_DELETED), drop the reference on the link. When 2905 * freeing the link, drop the reference on the instance. 2906 */ 2907 rw_enter(&bip->bi_rwlock, RW_WRITER); 2908 list_insert_tail(&bip->bi_links, blp); 2909 atomic_inc_uint(&blp->bl_refs); 2910 2911 /* 2912 * If the new link is no good on this bridge, then let the daemon know 2913 * about the problem. 2914 */ 2915 mlist = NULL; 2916 if (maxsdu != bmp->bm_maxsdu) 2917 link_sdu_fail(blp, B_TRUE, &mlist); 2918 rw_exit(&bip->bi_rwlock); 2919 send_up_messages(bip, mlist); 2920 2921 /* 2922 * Trigger a link state update so that if this link is the first one 2923 * "up" in the bridge, then we notify everyone. This triggers a trip 2924 * through bridge_ls_cb. 2925 */ 2926 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE); 2927 blp->bl_linkstate = LINK_STATE_DOWN; 2928 mac_link_update(mh, linkstate); 2929 2930 /* 2931 * We now need to report back to the stream that invoked us, and then 2932 * drop the reference on the stream that we're holding. 2933 */ 2934 miocack(bsp->bs_wq, mp, 0, 0); 2935 stream_unref(bsp); 2936 return; 2937 2938 fail: 2939 if (blp == NULL) { 2940 if (macopen) 2941 mac_close(mh); 2942 } else { 2943 link_shutdown(blp); 2944 link_free(blp); 2945 } 2946 miocnak(bsp->bs_wq, mp, 0, err); 2947 stream_unref(bsp); 2948 } 2949 2950 static void 2951 bridge_rem_link(void *arg) 2952 { 2953 mblk_t *mp = arg; 2954 bridge_stream_t *bsp; 2955 bridge_inst_t *bip; 2956 bridge_mac_t *bmp; 2957 datalink_id_t linkid; 2958 bridge_link_t *blp, *blsave; 2959 boolean_t found; 2960 mblk_t *mlist; 2961 2962 bsp = (bridge_stream_t *)mp->b_next; 2963 mp->b_next = NULL; 2964 bip = bsp->bs_inst; 2965 /* LINTED: alignment */ 2966 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2967 2968 /* 2969 * We become reader here so that we can loop over the other links and 2970 * deliver link up/down notification. 2971 */ 2972 rw_enter(&bip->bi_rwlock, RW_READER); 2973 found = B_FALSE; 2974 for (blp = list_head(&bip->bi_links); blp != NULL; 2975 blp = list_next(&bip->bi_links, blp)) { 2976 if (blp->bl_linkid == linkid && 2977 !(blp->bl_flags & BLF_DELETED)) { 2978 blp->bl_flags |= BLF_DELETED; 2979 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 2980 blp, DDI_SLEEP); 2981 found = B_TRUE; 2982 break; 2983 } 2984 } 2985 2986 /* 2987 * Check if this link is up and the remainder of the links are all 2988 * down. 2989 */ 2990 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) { 2991 for (blp = list_head(&bip->bi_links); blp != NULL; 2992 blp = list_next(&bip->bi_links, blp)) { 2993 if (blp->bl_linkstate != LINK_STATE_DOWN && 2994 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) 2995 break; 2996 } 2997 if (blp == NULL) { 2998 for (blp = list_head(&bip->bi_links); blp != NULL; 2999 blp = list_next(&bip->bi_links, blp)) { 3000 if (!(blp->bl_flags & BLF_DELETED)) 3001 mac_link_redo(blp->bl_mh, 3002 LINK_STATE_DOWN); 3003 } 3004 bmp = bip->bi_mac; 3005 bmp->bm_linkstate = LINK_STATE_DOWN; 3006 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 3007 } 3008 } 3009 3010 /* 3011 * Check if there's just one working link left on the bridge. If so, 3012 * then that link is now authoritative for bridge MTU. 3013 */ 3014 blsave = NULL; 3015 for (blp = list_head(&bip->bi_links); blp != NULL; 3016 blp = list_next(&bip->bi_links, blp)) { 3017 if (!(blp->bl_flags & BLF_DELETED)) { 3018 if (blsave == NULL) 3019 blsave = blp; 3020 else 3021 break; 3022 } 3023 } 3024 mlist = NULL; 3025 bmp = bip->bi_mac; 3026 if (blsave != NULL && blp == NULL && 3027 blsave->bl_maxsdu != bmp->bm_maxsdu) { 3028 bmp->bm_maxsdu = blsave->bl_maxsdu; 3029 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu); 3030 link_sdu_fail(blsave, B_FALSE, &mlist); 3031 } 3032 rw_exit(&bip->bi_rwlock); 3033 send_up_messages(bip, mlist); 3034 3035 if (found) 3036 miocack(bsp->bs_wq, mp, 0, 0); 3037 else 3038 miocnak(bsp->bs_wq, mp, 0, ENOENT); 3039 stream_unref(bsp); 3040 } 3041 3042 /* 3043 * This function intentionally returns with bi_rwlock held; it is intended for 3044 * quick checks and updates. 3045 */ 3046 static bridge_link_t * 3047 enter_link(bridge_inst_t *bip, datalink_id_t linkid) 3048 { 3049 bridge_link_t *blp; 3050 3051 rw_enter(&bip->bi_rwlock, RW_READER); 3052 for (blp = list_head(&bip->bi_links); blp != NULL; 3053 blp = list_next(&bip->bi_links, blp)) { 3054 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED)) 3055 break; 3056 } 3057 return (blp); 3058 } 3059 3060 static void 3061 bridge_ioctl(queue_t *wq, mblk_t *mp) 3062 { 3063 bridge_stream_t *bsp = wq->q_ptr; 3064 bridge_inst_t *bip; 3065 struct iocblk *iop; 3066 int rc = EINVAL; 3067 int len = 0; 3068 bridge_link_t *blp; 3069 cred_t *cr; 3070 3071 /* LINTED: alignment */ 3072 iop = (struct iocblk *)mp->b_rptr; 3073 3074 /* 3075 * For now, all of the bridge ioctls are privileged. 3076 */ 3077 if ((cr = msg_getcred(mp, NULL)) == NULL) 3078 cr = iop->ioc_cr; 3079 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) { 3080 miocnak(wq, mp, 0, EPERM); 3081 return; 3082 } 3083 3084 switch (iop->ioc_cmd) { 3085 case BRIOC_NEWBRIDGE: { 3086 bridge_newbridge_t *bnb; 3087 3088 if (bsp->bs_inst != NULL || 3089 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0) 3090 break; 3091 /* LINTED: alignment */ 3092 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr; 3093 bnb->bnb_name[MAXNAMELEN-1] = '\0'; 3094 rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr); 3095 if (rc != 0) 3096 break; 3097 3098 rw_enter(&bip->bi_rwlock, RW_WRITER); 3099 if (bip->bi_control != NULL) { 3100 rw_exit(&bip->bi_rwlock); 3101 bridge_unref(bip); 3102 rc = EBUSY; 3103 } else { 3104 atomic_inc_uint(&bip->bi_refs); 3105 bsp->bs_inst = bip; /* stream holds reference */ 3106 bip->bi_control = bsp; 3107 rw_exit(&bip->bi_rwlock); 3108 rc = 0; 3109 } 3110 break; 3111 } 3112 3113 case BRIOC_ADDLINK: 3114 if ((bip = bsp->bs_inst) == NULL || 3115 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3116 break; 3117 /* 3118 * We cannot perform the action in this thread, because we're 3119 * not in process context, and we may already be holding 3120 * MAC-related locks. Place the request on taskq. 3121 */ 3122 mp->b_next = (mblk_t *)bsp; 3123 stream_ref(bsp); 3124 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp, 3125 DDI_SLEEP); 3126 return; 3127 3128 case BRIOC_REMLINK: 3129 if ((bip = bsp->bs_inst) == NULL || 3130 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3131 break; 3132 /* 3133 * We cannot perform the action in this thread, because we're 3134 * not in process context, and we may already be holding 3135 * MAC-related locks. Place the request on taskq. 3136 */ 3137 mp->b_next = (mblk_t *)bsp; 3138 stream_ref(bsp); 3139 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp, 3140 DDI_SLEEP); 3141 return; 3142 3143 case BRIOC_SETSTATE: { 3144 bridge_setstate_t *bss; 3145 3146 if ((bip = bsp->bs_inst) == NULL || 3147 (rc = miocpullup(mp, sizeof (*bss))) != 0) 3148 break; 3149 /* LINTED: alignment */ 3150 bss = (bridge_setstate_t *)mp->b_cont->b_rptr; 3151 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) { 3152 rc = ENOENT; 3153 } else { 3154 rc = 0; 3155 blp->bl_state = bss->bss_state; 3156 } 3157 rw_exit(&bip->bi_rwlock); 3158 break; 3159 } 3160 3161 case BRIOC_SETPVID: { 3162 bridge_setpvid_t *bsv; 3163 3164 if ((bip = bsp->bs_inst) == NULL || 3165 (rc = miocpullup(mp, sizeof (*bsv))) != 0) 3166 break; 3167 /* LINTED: alignment */ 3168 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr; 3169 if (bsv->bsv_vlan > VLAN_ID_MAX) 3170 break; 3171 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) { 3172 rc = ENOENT; 3173 } else if (blp->bl_pvid == bsv->bsv_vlan) { 3174 rc = 0; 3175 } else { 3176 rc = 0; 3177 BRIDGE_VLAN_CLR(blp, blp->bl_pvid); 3178 blp->bl_pvid = bsv->bsv_vlan; 3179 if (blp->bl_pvid != 0) 3180 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3181 } 3182 rw_exit(&bip->bi_rwlock); 3183 break; 3184 } 3185 3186 case BRIOC_VLANENAB: { 3187 bridge_vlanenab_t *bve; 3188 3189 if ((bip = bsp->bs_inst) == NULL || 3190 (rc = miocpullup(mp, sizeof (*bve))) != 0) 3191 break; 3192 /* LINTED: alignment */ 3193 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr; 3194 if (bve->bve_vlan > VLAN_ID_MAX) 3195 break; 3196 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) { 3197 rc = ENOENT; 3198 } else { 3199 rc = 0; 3200 /* special case: vlan 0 means "all" */ 3201 if (bve->bve_vlan == 0) { 3202 (void) memset(blp->bl_vlans, 3203 bve->bve_onoff ? ~0 : 0, 3204 sizeof (blp->bl_vlans)); 3205 BRIDGE_VLAN_CLR(blp, 0); 3206 if (blp->bl_pvid != 0) 3207 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3208 } else if (bve->bve_vlan == blp->bl_pvid) { 3209 rc = EINVAL; 3210 } else if (bve->bve_onoff) { 3211 BRIDGE_VLAN_SET(blp, bve->bve_vlan); 3212 } else { 3213 BRIDGE_VLAN_CLR(blp, bve->bve_vlan); 3214 } 3215 } 3216 rw_exit(&bip->bi_rwlock); 3217 break; 3218 } 3219 3220 case BRIOC_FLUSHFWD: { 3221 bridge_flushfwd_t *bff; 3222 bridge_fwd_t *bfp, *bfnext; 3223 avl_tree_t fwd_scavenge; 3224 int i; 3225 3226 if ((bip = bsp->bs_inst) == NULL || 3227 (rc = miocpullup(mp, sizeof (*bff))) != 0) 3228 break; 3229 /* LINTED: alignment */ 3230 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr; 3231 rw_enter(&bip->bi_rwlock, RW_WRITER); 3232 /* This case means "all" */ 3233 if (bff->bff_linkid == DATALINK_INVALID_LINKID) { 3234 blp = NULL; 3235 } else { 3236 for (blp = list_head(&bip->bi_links); blp != NULL; 3237 blp = list_next(&bip->bi_links, blp)) { 3238 if (blp->bl_linkid == bff->bff_linkid && 3239 !(blp->bl_flags & BLF_DELETED)) 3240 break; 3241 } 3242 if (blp == NULL) { 3243 rc = ENOENT; 3244 rw_exit(&bip->bi_rwlock); 3245 break; 3246 } 3247 } 3248 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 3249 offsetof(bridge_fwd_t, bf_node)); 3250 bfnext = avl_first(&bip->bi_fwd); 3251 while ((bfp = bfnext) != NULL) { 3252 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 3253 if (bfp->bf_flags & BFF_LOCALADDR) 3254 continue; 3255 if (blp != NULL) { 3256 for (i = 0; i < bfp->bf_maxlinks; i++) { 3257 if (bfp->bf_links[i] == blp) 3258 break; 3259 } 3260 /* 3261 * If the link is there and we're excluding, 3262 * then skip. If the link is not there and 3263 * we're doing only that link, then skip. 3264 */ 3265 if ((i < bfp->bf_maxlinks) == bff->bff_exclude) 3266 continue; 3267 } 3268 ASSERT(bfp->bf_flags & BFF_INTREE); 3269 avl_remove(&bip->bi_fwd, bfp); 3270 bfp->bf_flags &= ~BFF_INTREE; 3271 avl_add(&fwd_scavenge, bfp); 3272 } 3273 rw_exit(&bip->bi_rwlock); 3274 bfnext = avl_first(&fwd_scavenge); 3275 while ((bfp = bfnext) != NULL) { 3276 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 3277 avl_remove(&fwd_scavenge, bfp); 3278 fwd_unref(bfp); /* drop tree reference */ 3279 } 3280 avl_destroy(&fwd_scavenge); 3281 break; 3282 } 3283 3284 case BRIOC_TABLEMAX: 3285 if ((bip = bsp->bs_inst) == NULL || 3286 (rc = miocpullup(mp, sizeof (uint32_t))) != 0) 3287 break; 3288 /* LINTED: alignment */ 3289 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr; 3290 break; 3291 } 3292 3293 if (rc == 0) 3294 miocack(wq, mp, len, 0); 3295 else 3296 miocnak(wq, mp, 0, rc); 3297 } 3298 3299 static void 3300 bridge_wput(queue_t *wq, mblk_t *mp) 3301 { 3302 switch (DB_TYPE(mp)) { 3303 case M_IOCTL: 3304 bridge_ioctl(wq, mp); 3305 break; 3306 case M_FLUSH: 3307 if (*mp->b_rptr & FLUSHW) 3308 *mp->b_rptr &= ~FLUSHW; 3309 if (*mp->b_rptr & FLUSHR) 3310 qreply(wq, mp); 3311 else 3312 freemsg(mp); 3313 break; 3314 default: 3315 freemsg(mp); 3316 break; 3317 } 3318 } 3319 3320 /* 3321 * This function allocates the main data structures for the bridge driver and 3322 * connects us into devfs. 3323 */ 3324 static void 3325 bridge_inst_init(void) 3326 { 3327 bridge_scan_interval = 5 * drv_usectohz(1000000); 3328 bridge_fwd_age = 25 * drv_usectohz(1000000); 3329 3330 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL); 3331 list_create(&bmac_list, sizeof (bridge_mac_t), 3332 offsetof(bridge_mac_t, bm_node)); 3333 list_create(&inst_list, sizeof (bridge_inst_t), 3334 offsetof(bridge_inst_t, bi_node)); 3335 cv_init(&inst_cv, NULL, CV_DRIVER, NULL); 3336 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL); 3337 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL); 3338 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL); 3339 3340 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb, 3341 bridge_ls_cb); 3342 } 3343 3344 /* 3345 * This function disconnects from devfs and destroys all data structures in 3346 * preparation for unload. It's assumed that there are no active bridge 3347 * references left at this point. 3348 */ 3349 static void 3350 bridge_inst_fini(void) 3351 { 3352 mac_bridge_vectors(NULL, NULL, NULL, NULL); 3353 if (bridge_timerid != 0) 3354 (void) untimeout(bridge_timerid); 3355 rw_destroy(&bmac_rwlock); 3356 list_destroy(&bmac_list); 3357 list_destroy(&inst_list); 3358 cv_destroy(&inst_cv); 3359 mutex_destroy(&inst_lock); 3360 cv_destroy(&stream_ref_cv); 3361 mutex_destroy(&stream_ref_lock); 3362 } 3363 3364 /* 3365 * bridge_attach() 3366 * 3367 * Description: 3368 * Attach bridge driver to the system. 3369 */ 3370 static int 3371 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3372 { 3373 if (cmd != DDI_ATTACH) 3374 return (DDI_FAILURE); 3375 3376 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO, 3377 CLONE_DEV) == DDI_FAILURE) { 3378 return (DDI_FAILURE); 3379 } 3380 3381 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list, 3382 DLDIOCCNT(bridge_ioc_list)) != 0) { 3383 ddi_remove_minor_node(dip, BRIDGE_CTL); 3384 return (DDI_FAILURE); 3385 } 3386 3387 bridge_dev_info = dip; 3388 bridge_major = ddi_driver_major(dip); 3389 bridge_taskq = ddi_taskq_create(dip, BRIDGE_DEV_NAME, 1, 3390 TASKQ_DEFAULTPRI, 0); 3391 return (DDI_SUCCESS); 3392 } 3393 3394 /* 3395 * bridge_detach() 3396 * 3397 * Description: 3398 * Detach an interface to the system. 3399 */ 3400 static int 3401 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3402 { 3403 if (cmd != DDI_DETACH) 3404 return (DDI_FAILURE); 3405 3406 ddi_remove_minor_node(dip, NULL); 3407 ddi_taskq_destroy(bridge_taskq); 3408 bridge_dev_info = NULL; 3409 return (DDI_SUCCESS); 3410 } 3411 3412 /* 3413 * bridge_info() 3414 * 3415 * Description: 3416 * Translate "dev_t" to a pointer to the associated "dev_info_t". 3417 */ 3418 /* ARGSUSED */ 3419 static int 3420 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 3421 void **result) 3422 { 3423 int rc; 3424 3425 switch (infocmd) { 3426 case DDI_INFO_DEVT2DEVINFO: 3427 if (bridge_dev_info == NULL) { 3428 rc = DDI_FAILURE; 3429 } else { 3430 *result = (void *)bridge_dev_info; 3431 rc = DDI_SUCCESS; 3432 } 3433 break; 3434 case DDI_INFO_DEVT2INSTANCE: 3435 *result = NULL; 3436 rc = DDI_SUCCESS; 3437 break; 3438 default: 3439 rc = DDI_FAILURE; 3440 break; 3441 } 3442 return (rc); 3443 } 3444 3445 static struct module_info bridge_modinfo = { 3446 2105, /* mi_idnum */ 3447 BRIDGE_DEV_NAME, /* mi_idname */ 3448 0, /* mi_minpsz */ 3449 16384, /* mi_maxpsz */ 3450 65536, /* mi_hiwat */ 3451 128 /* mi_lowat */ 3452 }; 3453 3454 static struct qinit bridge_rinit = { 3455 NULL, /* qi_putp */ 3456 NULL, /* qi_srvp */ 3457 bridge_open, /* qi_qopen */ 3458 bridge_close, /* qi_qclose */ 3459 NULL, /* qi_qadmin */ 3460 &bridge_modinfo, /* qi_minfo */ 3461 NULL /* qi_mstat */ 3462 }; 3463 3464 static struct qinit bridge_winit = { 3465 (int (*)())bridge_wput, /* qi_putp */ 3466 NULL, /* qi_srvp */ 3467 NULL, /* qi_qopen */ 3468 NULL, /* qi_qclose */ 3469 NULL, /* qi_qadmin */ 3470 &bridge_modinfo, /* qi_minfo */ 3471 NULL /* qi_mstat */ 3472 }; 3473 3474 static struct streamtab bridge_tab = { 3475 &bridge_rinit, /* st_rdinit */ 3476 &bridge_winit /* st_wrinit */ 3477 }; 3478 3479 /* No STREAMS perimeters; we do all our own locking */ 3480 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach, 3481 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab, 3482 ddi_quiesce_not_supported); 3483 3484 static struct modldrv modldrv = { 3485 &mod_driverops, 3486 "bridging driver", 3487 &bridge_ops 3488 }; 3489 3490 static struct modlinkage modlinkage = { 3491 MODREV_1, 3492 (void *)&modldrv, 3493 NULL 3494 }; 3495 3496 int 3497 _init(void) 3498 { 3499 int retv; 3500 3501 mac_init_ops(NULL, BRIDGE_DEV_NAME); 3502 bridge_inst_init(); 3503 if ((retv = mod_install(&modlinkage)) != 0) 3504 bridge_inst_fini(); 3505 return (retv); 3506 } 3507 3508 int 3509 _fini(void) 3510 { 3511 int retv; 3512 3513 rw_enter(&bmac_rwlock, RW_READER); 3514 retv = list_is_empty(&bmac_list) ? 0 : EBUSY; 3515 rw_exit(&bmac_rwlock); 3516 if (retv == 0 && 3517 (retv = mod_remove(&modlinkage)) == 0) 3518 bridge_inst_fini(); 3519 return (retv); 3520 } 3521 3522 int 3523 _info(struct modinfo *modinfop) 3524 { 3525 return (mod_info(&modlinkage, modinfop)); 3526 } 3527