1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This module implements a STREAMS driver that provides layer-two (Ethernet) 29 * bridging functionality. The STREAMS interface is used to provide 30 * observability (snoop/wireshark) and control, but not for interface plumbing. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/bitmap.h> 35 #include <sys/cmn_err.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/errno.h> 39 #include <sys/kstat.h> 40 #include <sys/modctl.h> 41 #include <sys/note.h> 42 #include <sys/param.h> 43 #include <sys/policy.h> 44 #include <sys/sdt.h> 45 #include <sys/stat.h> 46 #include <sys/stream.h> 47 #include <sys/stropts.h> 48 #include <sys/strsun.h> 49 #include <sys/sunddi.h> 50 #include <sys/sysmacros.h> 51 #include <sys/systm.h> 52 #include <sys/time.h> 53 #include <sys/dlpi.h> 54 #include <sys/dls.h> 55 #include <sys/mac_ether.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_client_priv.h> 58 #include <sys/mac_impl.h> 59 #include <sys/vlan.h> 60 #include <net/bridge.h> 61 #include <net/bridge_impl.h> 62 #include <net/trill.h> 63 #include <sys/dld_ioc.h> 64 65 /* 66 * Locks and reference counts: object lifetime and design. 67 * 68 * bridge_mac_t 69 * Bridge mac (snoop) instances are in bmac_list, which is protected by 70 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer(). 71 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes 72 * away, the bridge_mac_t remains until either all of the users go away 73 * (detected by a timer) or until the instance is picked up again by the same 74 * bridge starting back up. 75 * 76 * bridge_inst_t 77 * Bridge instances are in inst_list, which is protected by inst_lock. 78 * They're allocated by inst_alloc() and freed by inst_free(). After 79 * allocation, an instance is placed in inst_list, and the reference count is 80 * incremented to represent this. That reference is decremented when the 81 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last 82 * reference is freed, the instance is removed from the list. 83 * 84 * Bridge instances have lists of links and an AVL tree of forwarding 85 * entries. Each of these structures holds one reference on the bridge 86 * instance. These lists and tree are protected by bi_rwlock. 87 * 88 * bridge_stream_t 89 * Bridge streams are allocated by stream_alloc() and freed by stream_free(). 90 * These streams are created when "bridged" opens /dev/bridgectl, and are 91 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the 92 * links on the bridge. When a stream closes, the bridge instance created is 93 * destroyed. There's at most one bridge instance for a given control 94 * stream. 95 * 96 * bridge_link_t 97 * Links are allocated by bridge_add_link() and freed by link_free(). The 98 * bi_links list holds a reference to the link. When the BLF_DELETED flag is 99 * set, that reference is dropped. The link isn't removed from the list 100 * until the last reference drops. Each forwarding entry that uses a given 101 * link holds a reference, as does each thread transmitting a packet via the 102 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on 103 * a link when transmitting. 104 * 105 * It's important that once BLF_DELETED is set, there's no way for the 106 * reference count to increase again. If it can, then the link may be 107 * double-freed. The BLF_FREED flag is intended for use with assertions to 108 * guard against this in testing. 109 * 110 * bridge_fwd_t 111 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by 112 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike 113 * other data structures, the reference is dropped when the entry is removed 114 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each 115 * thread that's forwarding a packet to a known destination holds a reference 116 * to a forwarding entry. 117 * 118 * TRILL notes: 119 * 120 * The TRILL module does all of its I/O through bridging. It uses references 121 * on the bridge_inst_t and bridge_link_t structures, and has seven entry 122 * points and four callbacks. One entry point is for setting the callbacks 123 * (bridge_trill_register_cb). There are four entry points for taking bridge 124 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two 125 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps) 126 * that need to be bridged locally, and for TRILL-encapsulated output packets 127 * (bridge_trill_output). 128 * 129 * The four callbacks comprise two notification functions for bridges and 130 * links being deleted, one function for raw received TRILL packets, and one 131 * for bridge output to non-local TRILL destinations (tunnel entry). 132 */ 133 134 /* 135 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module. 136 */ 137 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES; 138 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES; 139 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS; 140 141 static const char *inst_kstats_list[] = { KSINST_NAMES }; 142 static const char *link_kstats_list[] = { KSLINK_NAMES }; 143 144 #define KREF(p, m, vn) p->m.vn.value.ui64 145 #define KINCR(p, m, vn) ++KREF(p, m, vn) 146 #define KDECR(p, m, vn) --KREF(p, m, vn) 147 148 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn) 149 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn) 150 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn) 151 152 #define KIINCR(vn) KIPINCR(bip, vn) 153 #define KIDECR(vn) KIPDECR(bip, vn) 154 #define KLINCR(vn) KLPINCR(blp, vn) 155 156 #define Dim(x) (sizeof (x) / sizeof (*(x))) 157 158 /* Amount of overhead added when encapsulating with VLAN headers */ 159 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \ 160 sizeof (struct ether_header)) 161 162 static dev_info_t *bridge_dev_info; 163 static major_t bridge_major; 164 static ddi_taskq_t *bridge_taskq; 165 166 /* 167 * These are the bridge instance management data structures. The mutex lock 168 * protects the list of bridge instances. A reference count is then used on 169 * each instance to determine when to free it. We use mac_minor_hold() to 170 * allocate minor_t values, which are used both for self-cloning /dev/net/ 171 * device nodes as well as client streams. Minor node 0 is reserved for the 172 * allocation control node. 173 */ 174 static list_t inst_list; 175 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */ 176 static kmutex_t inst_lock; 177 178 static krwlock_t bmac_rwlock; 179 static list_t bmac_list; 180 181 /* Wait for taskq entries that use STREAMS */ 182 static kcondvar_t stream_ref_cv; 183 static kmutex_t stream_ref_lock; 184 185 static timeout_id_t bridge_timerid; 186 static clock_t bridge_scan_interval; 187 static clock_t bridge_fwd_age; 188 189 static bridge_inst_t *bridge_find_name(const char *); 190 static void bridge_timer(void *); 191 static void bridge_unref(bridge_inst_t *); 192 193 static const uint8_t zero_addr[ETHERADDRL] = { 0 }; 194 195 /* Global TRILL linkage */ 196 static trill_recv_pkt_t trill_recv_fn; 197 static trill_encap_pkt_t trill_encap_fn; 198 static trill_br_dstr_t trill_brdstr_fn; 199 static trill_ln_dstr_t trill_lndstr_fn; 200 201 /* special settings to accommodate DLD flow control; see dld_str.c */ 202 static struct module_info bridge_dld_modinfo = { 203 0, /* mi_idnum */ 204 "bridge", /* mi_idname */ 205 0, /* mi_minpsz */ 206 INFPSZ, /* mi_maxpsz */ 207 1, /* mi_hiwat */ 208 0 /* mi_lowat */ 209 }; 210 211 static struct qinit bridge_dld_rinit = { 212 NULL, /* qi_putp */ 213 NULL, /* qi_srvp */ 214 dld_open, /* qi_qopen */ 215 dld_close, /* qi_qclose */ 216 NULL, /* qi_qadmin */ 217 &bridge_dld_modinfo, /* qi_minfo */ 218 NULL /* qi_mstat */ 219 }; 220 221 static struct qinit bridge_dld_winit = { 222 (int (*)())dld_wput, /* qi_putp */ 223 (int (*)())dld_wsrv, /* qi_srvp */ 224 NULL, /* qi_qopen */ 225 NULL, /* qi_qclose */ 226 NULL, /* qi_qadmin */ 227 &bridge_dld_modinfo, /* qi_minfo */ 228 NULL /* qi_mstat */ 229 }; 230 231 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *); 232 233 /* GLDv3 control ioctls used by Bridging */ 234 static dld_ioc_info_t bridge_ioc_list[] = { 235 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t), 236 bridge_ioc_listfwd, NULL}, 237 }; 238 239 /* 240 * Given a bridge mac pointer, get a ref-held pointer to the corresponding 241 * bridge instance, if any. We must hold the global bmac_rwlock so that 242 * bm_inst doesn't slide out from under us. 243 */ 244 static bridge_inst_t * 245 mac_to_inst(const bridge_mac_t *bmp) 246 { 247 bridge_inst_t *bip; 248 249 rw_enter(&bmac_rwlock, RW_READER); 250 if ((bip = bmp->bm_inst) != NULL) 251 atomic_inc_uint(&bip->bi_refs); 252 rw_exit(&bmac_rwlock); 253 return (bip); 254 } 255 256 static void 257 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist) 258 { 259 mblk_t *mp; 260 bridge_ctl_t *bcp; 261 bridge_link_t *blcmp; 262 bridge_inst_t *bip; 263 bridge_mac_t *bmp; 264 265 if (failed) { 266 if (blp->bl_flags & BLF_SDUFAIL) 267 return; 268 blp->bl_flags |= BLF_SDUFAIL; 269 } else { 270 if (!(blp->bl_flags & BLF_SDUFAIL)) 271 return; 272 blp->bl_flags &= ~BLF_SDUFAIL; 273 } 274 275 /* 276 * If this link is otherwise up, then check if there are any other 277 * non-failed non-down links. If not, then we control the state of the 278 * whole bridge. 279 */ 280 bip = blp->bl_inst; 281 bmp = bip->bi_mac; 282 if (blp->bl_linkstate != LINK_STATE_DOWN) { 283 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 284 blcmp = list_next(&bip->bi_links, blcmp)) { 285 if (blp != blcmp && 286 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 287 blcmp->bl_linkstate != LINK_STATE_DOWN) 288 break; 289 } 290 if (blcmp == NULL) { 291 bmp->bm_linkstate = failed ? LINK_STATE_DOWN : 292 LINK_STATE_UP; 293 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 294 } 295 } 296 297 /* 298 * If we're becoming failed, then the link's current true state needs 299 * to be reflected upwards to this link's clients. If we're becoming 300 * unfailed, then we get the state of the bridge instead on all 301 * clients. 302 */ 303 if (failed) { 304 if (bmp->bm_linkstate != blp->bl_linkstate) 305 mac_link_redo(blp->bl_mh, blp->bl_linkstate); 306 } else { 307 mac_link_redo(blp->bl_mh, bmp->bm_linkstate); 308 } 309 310 /* get the current mblk we're going to send up */ 311 if ((mp = blp->bl_lfailmp) == NULL && 312 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL) 313 return; 314 315 /* get a new one for next time */ 316 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 317 318 /* if none for next time, then report only failures */ 319 if (blp->bl_lfailmp == NULL && !failed) { 320 blp->bl_lfailmp = mp; 321 return; 322 } 323 324 /* LINTED: alignment */ 325 bcp = (bridge_ctl_t *)mp->b_rptr; 326 bcp->bc_linkid = blp->bl_linkid; 327 bcp->bc_failed = failed; 328 mp->b_wptr = (uchar_t *)(bcp + 1); 329 mp->b_next = *mlist; 330 *mlist = mp; 331 } 332 333 /* 334 * Send control messages (link SDU changes) using the stream to the 335 * bridge instance daemon. 336 */ 337 static void 338 send_up_messages(bridge_inst_t *bip, mblk_t *mp) 339 { 340 mblk_t *mnext; 341 queue_t *rq; 342 343 rq = bip->bi_control->bs_wq; 344 rq = OTHERQ(rq); 345 while (mp != NULL) { 346 mnext = mp->b_next; 347 mp->b_next = NULL; 348 putnext(rq, mp); 349 mp = mnext; 350 } 351 } 352 353 /* ARGSUSED */ 354 static int 355 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val) 356 { 357 return (ENOTSUP); 358 } 359 360 static int 361 bridge_m_start(void *arg) 362 { 363 bridge_mac_t *bmp = arg; 364 365 bmp->bm_flags |= BMF_STARTED; 366 return (0); 367 } 368 369 static void 370 bridge_m_stop(void *arg) 371 { 372 bridge_mac_t *bmp = arg; 373 374 bmp->bm_flags &= ~BMF_STARTED; 375 } 376 377 /* ARGSUSED */ 378 static int 379 bridge_m_setpromisc(void *arg, boolean_t on) 380 { 381 return (0); 382 } 383 384 /* ARGSUSED */ 385 static int 386 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 387 { 388 return (0); 389 } 390 391 /* ARGSUSED */ 392 static int 393 bridge_m_unicst(void *arg, const uint8_t *macaddr) 394 { 395 return (ENOTSUP); 396 } 397 398 static mblk_t * 399 bridge_m_tx(void *arg, mblk_t *mp) 400 { 401 _NOTE(ARGUNUSED(arg)); 402 freemsgchain(mp); 403 return (NULL); 404 } 405 406 /* ARGSUSED */ 407 static int 408 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) 409 { 410 bridge_listfwd_t *blf = karg; 411 bridge_inst_t *bip; 412 bridge_fwd_t *bfp, match; 413 avl_index_t where; 414 415 bip = bridge_find_name(blf->blf_name); 416 if (bip == NULL) 417 return (ENOENT); 418 419 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL); 420 match.bf_flags |= BFF_VLANLOCAL; 421 rw_enter(&bip->bi_rwlock, RW_READER); 422 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL) 423 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER); 424 else 425 bfp = AVL_NEXT(&bip->bi_fwd, bfp); 426 if (bfp == NULL) { 427 bzero(blf, sizeof (*blf)); 428 } else { 429 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL); 430 blf->blf_trill_nick = bfp->bf_trill_nick; 431 blf->blf_ms_age = 432 drv_hztousec(lbolt - bfp->bf_lastheard) / 1000; 433 blf->blf_is_local = 434 (bfp->bf_flags & BFF_LOCALADDR) != 0; 435 blf->blf_linkid = bfp->bf_links[0]->bl_linkid; 436 } 437 rw_exit(&bip->bi_rwlock); 438 bridge_unref(bip); 439 return (0); 440 } 441 442 static int 443 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 444 uint_t pr_valsize, const void *pr_val) 445 { 446 bridge_mac_t *bmp = arg; 447 bridge_inst_t *bip; 448 bridge_link_t *blp; 449 int err; 450 uint_t maxsdu; 451 mblk_t *mlist; 452 453 _NOTE(ARGUNUSED(pr_name)); 454 switch (pr_num) { 455 case MAC_PROP_MTU: 456 if (pr_valsize < sizeof (bmp->bm_maxsdu)) { 457 err = EINVAL; 458 break; 459 } 460 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu)); 461 if (maxsdu == bmp->bm_maxsdu) { 462 err = 0; 463 } else if ((bip = mac_to_inst(bmp)) == NULL) { 464 err = ENXIO; 465 } else { 466 rw_enter(&bip->bi_rwlock, RW_WRITER); 467 mlist = NULL; 468 for (blp = list_head(&bip->bi_links); blp != NULL; 469 blp = list_next(&bip->bi_links, blp)) { 470 if (blp->bl_flags & BLF_DELETED) 471 continue; 472 if (blp->bl_maxsdu == maxsdu) 473 link_sdu_fail(blp, B_FALSE, &mlist); 474 else if (blp->bl_maxsdu == bmp->bm_maxsdu) 475 link_sdu_fail(blp, B_TRUE, &mlist); 476 } 477 rw_exit(&bip->bi_rwlock); 478 bmp->bm_maxsdu = maxsdu; 479 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 480 send_up_messages(bip, mlist); 481 bridge_unref(bip); 482 err = 0; 483 } 484 break; 485 486 default: 487 err = ENOTSUP; 488 break; 489 } 490 return (err); 491 } 492 493 static int 494 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 495 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 496 { 497 bridge_mac_t *bmp = arg; 498 int err = 0; 499 500 _NOTE(ARGUNUSED(pr_name)); 501 switch (pr_num) { 502 case MAC_PROP_MTU: { 503 mac_propval_range_t range; 504 505 if (!(pr_flags & MAC_PROP_POSSIBLE)) 506 return (ENOTSUP); 507 if (pr_valsize < sizeof (mac_propval_range_t)) 508 return (EINVAL); 509 range.mpr_count = 1; 510 range.mpr_type = MAC_PROPVAL_UINT32; 511 range.range_uint32[0].mpur_min = 512 range.range_uint32[0].mpur_max = bmp->bm_maxsdu; 513 bcopy(&range, pr_val, sizeof (range)); 514 *perm = MAC_PROP_PERM_RW; 515 break; 516 } 517 case MAC_PROP_STATUS: 518 if (pr_valsize < sizeof (bmp->bm_linkstate)) { 519 err = EINVAL; 520 } else { 521 bcopy(&bmp->bm_linkstate, pr_val, 522 sizeof (&bmp->bm_linkstate)); 523 *perm = MAC_PROP_PERM_READ; 524 } 525 break; 526 527 default: 528 err = ENOTSUP; 529 break; 530 } 531 return (err); 532 } 533 534 static mac_callbacks_t bridge_m_callbacks = { 535 MC_SETPROP | MC_GETPROP, 536 bridge_m_getstat, 537 bridge_m_start, 538 bridge_m_stop, 539 bridge_m_setpromisc, 540 bridge_m_multicst, 541 bridge_m_unicst, 542 bridge_m_tx, 543 NULL, /* ioctl */ 544 NULL, /* getcapab */ 545 NULL, /* open */ 546 NULL, /* close */ 547 bridge_m_setprop, 548 bridge_m_getprop 549 }; 550 551 /* 552 * Create kstats from a list. 553 */ 554 static kstat_t * 555 kstat_setup(kstat_named_t *knt, const char **names, int nstat, 556 const char *unitname) 557 { 558 kstat_t *ksp; 559 int i; 560 561 for (i = 0; i < nstat; i++) 562 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64); 563 564 ksp = kstat_create_zone("bridge", 0, unitname, "net", 565 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 566 if (ksp != NULL) { 567 ksp->ks_data = knt; 568 kstat_install(ksp); 569 } 570 return (ksp); 571 } 572 573 /* 574 * Find an existing bridge_mac_t structure or allocate a new one for the given 575 * bridge instance. This creates the mac driver instance that snoop can use. 576 */ 577 static int 578 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp) 579 { 580 bridge_mac_t *bmp, *bnew; 581 mac_register_t *mac; 582 int err; 583 584 *bmacp = NULL; 585 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 586 return (EINVAL); 587 588 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP); 589 590 rw_enter(&bmac_rwlock, RW_WRITER); 591 for (bmp = list_head(&bmac_list); bmp != NULL; 592 bmp = list_next(&bmac_list, bmp)) { 593 if (strcmp(bip->bi_name, bmp->bm_name) == 0) { 594 ASSERT(bmp->bm_inst == NULL); 595 bmp->bm_inst = bip; 596 rw_exit(&bmac_rwlock); 597 kmem_free(bnew, sizeof (*bnew)); 598 mac_free(mac); 599 *bmacp = bmp; 600 return (0); 601 } 602 } 603 604 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 605 mac->m_driver = bnew; 606 mac->m_dip = bridge_dev_info; 607 mac->m_instance = (uint_t)-1; 608 mac->m_src_addr = (uint8_t *)zero_addr; 609 mac->m_callbacks = &bridge_m_callbacks; 610 611 /* 612 * Note that the SDU limits are irrelevant, as nobody transmits on the 613 * bridge node itself. It's mainly for monitoring but we allow 614 * setting the bridge MTU for quick transition of all links part of the 615 * bridge to a new MTU. 616 */ 617 mac->m_min_sdu = 1; 618 mac->m_max_sdu = 1500; 619 err = mac_register(mac, &bnew->bm_mh); 620 mac_free(mac); 621 if (err != 0) { 622 rw_exit(&bmac_rwlock); 623 kmem_free(bnew, sizeof (*bnew)); 624 return (err); 625 } 626 627 bnew->bm_inst = bip; 628 (void) strcpy(bnew->bm_name, bip->bi_name); 629 if (list_is_empty(&bmac_list)) { 630 bridge_timerid = timeout(bridge_timer, NULL, 631 bridge_scan_interval); 632 } 633 list_insert_tail(&bmac_list, bnew); 634 rw_exit(&bmac_rwlock); 635 636 /* 637 * Mark the MAC as unable to go "active" so that only passive clients 638 * (such as snoop) can bind to it. 639 */ 640 mac_no_active(bnew->bm_mh); 641 *bmacp = bnew; 642 return (0); 643 } 644 645 /* 646 * Disconnect the given bridge_mac_t from its bridge instance. The bridge 647 * instance is going away. The mac instance can't go away until the clients 648 * are gone (see bridge_timer). 649 */ 650 static void 651 bmac_disconnect(bridge_mac_t *bmp) 652 { 653 bridge_inst_t *bip; 654 655 bmp->bm_linkstate = LINK_STATE_DOWN; 656 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 657 658 rw_enter(&bmac_rwlock, RW_READER); 659 bip = bmp->bm_inst; 660 bip->bi_mac = NULL; 661 bmp->bm_inst = NULL; 662 rw_exit(&bmac_rwlock); 663 } 664 665 /* This is used by the avl trees to sort forwarding table entries */ 666 static int 667 fwd_compare(const void *addr1, const void *addr2) 668 { 669 const bridge_fwd_t *fwd1 = addr1; 670 const bridge_fwd_t *fwd2 = addr2; 671 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL); 672 673 if (diff != 0) 674 return (diff > 0 ? 1 : -1); 675 676 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) { 677 if (fwd1->bf_vlanid > fwd2->bf_vlanid) 678 return (1); 679 else if (fwd1->bf_vlanid < fwd2->bf_vlanid) 680 return (-1); 681 } 682 return (0); 683 } 684 685 static void 686 inst_free(bridge_inst_t *bip) 687 { 688 ASSERT(bip->bi_mac == NULL); 689 rw_destroy(&bip->bi_rwlock); 690 list_destroy(&bip->bi_links); 691 cv_destroy(&bip->bi_linkwait); 692 avl_destroy(&bip->bi_fwd); 693 if (bip->bi_ksp != NULL) 694 kstat_delete(bip->bi_ksp); 695 kmem_free(bip, sizeof (*bip)); 696 } 697 698 static bridge_inst_t * 699 inst_alloc(const char *bridge) 700 { 701 bridge_inst_t *bip; 702 703 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP); 704 bip->bi_refs = 1; 705 (void) strcpy(bip->bi_name, bridge); 706 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL); 707 list_create(&bip->bi_links, sizeof (bridge_link_t), 708 offsetof(bridge_link_t, bl_node)); 709 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL); 710 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t), 711 offsetof(bridge_fwd_t, bf_node)); 712 return (bip); 713 } 714 715 static bridge_inst_t * 716 bridge_find_name(const char *bridge) 717 { 718 bridge_inst_t *bip; 719 720 mutex_enter(&inst_lock); 721 for (bip = list_head(&inst_list); bip != NULL; 722 bip = list_next(&inst_list, bip)) { 723 if (!(bip->bi_flags & BIF_SHUTDOWN) && 724 strcmp(bridge, bip->bi_name) == 0) { 725 atomic_inc_uint(&bip->bi_refs); 726 break; 727 } 728 } 729 mutex_exit(&inst_lock); 730 731 return (bip); 732 } 733 734 static int 735 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc) 736 { 737 bridge_inst_t *bip, *bipnew; 738 bridge_mac_t *bmp = NULL; 739 int err; 740 741 *bipc = NULL; 742 bipnew = inst_alloc(bridge); 743 744 mutex_enter(&inst_lock); 745 lookup_retry: 746 for (bip = list_head(&inst_list); bip != NULL; 747 bip = list_next(&inst_list, bip)) { 748 if (strcmp(bridge, bip->bi_name) == 0) 749 break; 750 } 751 752 /* This should not take long; if it does, we've got a design problem */ 753 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) { 754 cv_wait(&inst_cv, &inst_lock); 755 goto lookup_retry; 756 } 757 758 if (bip != NULL) { 759 /* We weren't expecting to find anything */ 760 bip = NULL; 761 err = EEXIST; 762 } else { 763 bip = bipnew; 764 bipnew = NULL; 765 list_insert_tail(&inst_list, bip); 766 } 767 768 mutex_exit(&inst_lock); 769 if (bip == NULL) 770 goto fail; 771 772 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats, 773 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name); 774 775 err = bmac_alloc(bip, &bmp); 776 if ((bip->bi_mac = bmp) == NULL) 777 goto fail_create; 778 779 /* 780 * bm_inst is set, so the timer cannot yank the DLS rug from under us. 781 * No extra locking is needed here. 782 */ 783 if (!(bmp->bm_flags & BMF_DLS)) { 784 if ((err = dls_devnet_create(bmp->bm_mh, linkid)) != 0) 785 goto fail_create; 786 bmp->bm_flags |= BMF_DLS; 787 } 788 789 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh)); 790 *bipc = bip; 791 return (0); 792 793 fail_create: 794 if (bmp != NULL) 795 bmac_disconnect(bip->bi_mac); 796 bipnew = bip; 797 fail: 798 ASSERT(bipnew->bi_trilldata == NULL); 799 bipnew->bi_flags |= BIF_SHUTDOWN; 800 inst_free(bipnew); 801 return (err); 802 } 803 804 static void 805 bridge_unref(bridge_inst_t *bip) 806 { 807 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) { 808 ASSERT(bip->bi_flags & BIF_SHUTDOWN); 809 /* free up mac for reuse before leaving global list */ 810 if (bip->bi_mac != NULL) 811 bmac_disconnect(bip->bi_mac); 812 mutex_enter(&inst_lock); 813 list_remove(&inst_list, bip); 814 cv_broadcast(&inst_cv); 815 mutex_exit(&inst_lock); 816 inst_free(bip); 817 } 818 } 819 820 /* 821 * Stream instances are used only for allocating bridges and serving as a 822 * control node. They serve no data-handling function. 823 */ 824 static bridge_stream_t * 825 stream_alloc(void) 826 { 827 bridge_stream_t *bsp; 828 minor_t mn; 829 830 if ((mn = mac_minor_hold(B_FALSE)) == 0) 831 return (NULL); 832 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP); 833 bsp->bs_minor = mn; 834 return (bsp); 835 } 836 837 static void 838 stream_free(bridge_stream_t *bsp) 839 { 840 mac_minor_rele(bsp->bs_minor); 841 kmem_free(bsp, sizeof (*bsp)); 842 } 843 844 /* Reference hold/release functions for STREAMS-related taskq */ 845 static void 846 stream_ref(bridge_stream_t *bsp) 847 { 848 mutex_enter(&stream_ref_lock); 849 bsp->bs_taskq_cnt++; 850 mutex_exit(&stream_ref_lock); 851 } 852 853 static void 854 stream_unref(bridge_stream_t *bsp) 855 { 856 mutex_enter(&stream_ref_lock); 857 if (--bsp->bs_taskq_cnt == 0) 858 cv_broadcast(&stream_ref_cv); 859 mutex_exit(&stream_ref_lock); 860 } 861 862 static void 863 link_free(bridge_link_t *blp) 864 { 865 bridge_inst_t *bip = blp->bl_inst; 866 867 ASSERT(!(blp->bl_flags & BLF_FREED)); 868 blp->bl_flags |= BLF_FREED; 869 if (blp->bl_ksp != NULL) 870 kstat_delete(blp->bl_ksp); 871 if (blp->bl_lfailmp != NULL) 872 freeb(blp->bl_lfailmp); 873 cv_destroy(&blp->bl_trillwait); 874 mutex_destroy(&blp->bl_trilllock); 875 kmem_free(blp, sizeof (*blp)); 876 /* Don't unreference the bridge until the MAC is closed */ 877 bridge_unref(bip); 878 } 879 880 static void 881 link_unref(bridge_link_t *blp) 882 { 883 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) { 884 bridge_inst_t *bip = blp->bl_inst; 885 886 ASSERT(blp->bl_flags & BLF_DELETED); 887 rw_enter(&bip->bi_rwlock, RW_WRITER); 888 list_remove(&bip->bi_links, blp); 889 rw_exit(&bip->bi_rwlock); 890 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links)) 891 cv_broadcast(&bip->bi_linkwait); 892 link_free(blp); 893 } 894 } 895 896 static bridge_fwd_t * 897 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick) 898 { 899 bridge_fwd_t *bfp; 900 901 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)), 902 KM_NOSLEEP); 903 if (bfp != NULL) { 904 bcopy(addr, bfp->bf_dest, ETHERADDRL); 905 bfp->bf_lastheard = lbolt; 906 bfp->bf_maxlinks = nlinks; 907 bfp->bf_links = (bridge_link_t **)(bfp + 1); 908 bfp->bf_trill_nick = nick; 909 } 910 return (bfp); 911 } 912 913 static bridge_fwd_t * 914 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid) 915 { 916 bridge_fwd_t *bfp, *vbfp; 917 bridge_fwd_t match; 918 919 bcopy(addr, match.bf_dest, ETHERADDRL); 920 match.bf_flags = 0; 921 rw_enter(&bip->bi_rwlock, RW_READER); 922 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 923 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) { 924 match.bf_vlanid = vlanid; 925 match.bf_flags = BFF_VLANLOCAL; 926 vbfp = avl_find(&bip->bi_fwd, &match, NULL); 927 if (vbfp != NULL) 928 bfp = vbfp; 929 } 930 atomic_inc_uint(&bfp->bf_refs); 931 } 932 rw_exit(&bip->bi_rwlock); 933 return (bfp); 934 } 935 936 static void 937 fwd_free(bridge_fwd_t *bfp) 938 { 939 uint_t i; 940 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst; 941 942 KIDECR(bki_count); 943 for (i = 0; i < bfp->bf_nlinks; i++) 944 link_unref(bfp->bf_links[i]); 945 kmem_free(bfp, 946 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *)); 947 } 948 949 static void 950 fwd_unref(bridge_fwd_t *bfp) 951 { 952 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) { 953 ASSERT(!(bfp->bf_flags & BFF_INTREE)); 954 fwd_free(bfp); 955 } 956 } 957 958 static void 959 fwd_delete(bridge_fwd_t *bfp) 960 { 961 bridge_inst_t *bip; 962 bridge_fwd_t *bfpzero; 963 964 if (bfp->bf_flags & BFF_INTREE) { 965 ASSERT(bfp->bf_nlinks > 0); 966 bip = bfp->bf_links[0]->bl_inst; 967 rw_enter(&bip->bi_rwlock, RW_WRITER); 968 /* Another thread could beat us to this */ 969 if (bfp->bf_flags & BFF_INTREE) { 970 avl_remove(&bip->bi_fwd, bfp); 971 bfp->bf_flags &= ~BFF_INTREE; 972 if (bfp->bf_flags & BFF_VLANLOCAL) { 973 bfp->bf_flags &= ~BFF_VLANLOCAL; 974 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL); 975 if (bfpzero != NULL && bfpzero->bf_vcnt > 0) 976 bfpzero->bf_vcnt--; 977 } 978 rw_exit(&bip->bi_rwlock); 979 fwd_unref(bfp); /* no longer in avl tree */ 980 } else { 981 rw_exit(&bip->bi_rwlock); 982 } 983 } 984 } 985 986 static boolean_t 987 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp) 988 { 989 avl_index_t idx; 990 boolean_t retv; 991 992 rw_enter(&bip->bi_rwlock, RW_WRITER); 993 if (!(bip->bi_flags & BIF_SHUTDOWN) && 994 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax && 995 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) { 996 avl_insert(&bip->bi_fwd, bfp, idx); 997 bfp->bf_flags |= BFF_INTREE; 998 atomic_inc_uint(&bfp->bf_refs); /* avl entry */ 999 retv = B_TRUE; 1000 } else { 1001 retv = B_FALSE; 1002 } 1003 rw_exit(&bip->bi_rwlock); 1004 return (retv); 1005 } 1006 1007 static void 1008 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr, 1009 const uint8_t *newaddr) 1010 { 1011 bridge_inst_t *bip = blp->bl_inst; 1012 bridge_fwd_t *bfp, *bfnew; 1013 bridge_fwd_t match; 1014 avl_index_t idx; 1015 boolean_t drop_ref = B_FALSE; 1016 1017 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0) 1018 return; 1019 1020 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0) 1021 goto no_old_addr; 1022 1023 /* 1024 * Find the previous entry, and remove our link from it. 1025 */ 1026 bcopy(oldaddr, match.bf_dest, ETHERADDRL); 1027 rw_enter(&bip->bi_rwlock, RW_WRITER); 1028 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 1029 int i; 1030 1031 /* 1032 * See if we're in the list, and remove if so. 1033 */ 1034 for (i = 0; i < bfp->bf_nlinks; i++) { 1035 if (bfp->bf_links[i] == blp) { 1036 /* 1037 * We assume writes are atomic, so no special 1038 * MT handling is needed. The list length is 1039 * decremented first, and then we remove 1040 * entries. 1041 */ 1042 bfp->bf_nlinks--; 1043 for (; i < bfp->bf_nlinks; i++) 1044 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1045 drop_ref = B_TRUE; 1046 break; 1047 } 1048 } 1049 /* If no more links, then remove and free up */ 1050 if (bfp->bf_nlinks == 0) { 1051 avl_remove(&bip->bi_fwd, bfp); 1052 bfp->bf_flags &= ~BFF_INTREE; 1053 } else { 1054 bfp = NULL; 1055 } 1056 } 1057 rw_exit(&bip->bi_rwlock); 1058 if (bfp != NULL) 1059 fwd_unref(bfp); /* no longer in avl tree */ 1060 1061 /* 1062 * Now get the new link address and add this link to the list. The 1063 * list should be of length 1 unless the user has configured multiple 1064 * NICs with the same address. (That's an incorrect configuration, but 1065 * we support it anyway.) 1066 */ 1067 no_old_addr: 1068 bfp = NULL; 1069 if ((bip->bi_flags & BIF_SHUTDOWN) || 1070 bcmp(newaddr, zero_addr, ETHERADDRL) == 0) 1071 goto no_new_addr; 1072 1073 bcopy(newaddr, match.bf_dest, ETHERADDRL); 1074 rw_enter(&bip->bi_rwlock, RW_WRITER); 1075 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) { 1076 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE); 1077 if (bfnew != NULL) 1078 KIINCR(bki_count); 1079 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) { 1080 /* special case: link fits in existing entry */ 1081 bfnew = bfp; 1082 } else { 1083 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1, 1084 RBRIDGE_NICKNAME_NONE); 1085 if (bfnew != NULL) { 1086 KIINCR(bki_count); 1087 avl_remove(&bip->bi_fwd, bfp); 1088 bfp->bf_flags &= ~BFF_INTREE; 1089 bfnew->bf_nlinks = bfp->bf_nlinks; 1090 bcopy(bfp->bf_links, bfnew->bf_links, 1091 bfp->bf_nlinks * sizeof (bfp)); 1092 /* reset the idx value due to removal above */ 1093 (void) avl_find(&bip->bi_fwd, &match, &idx); 1094 } 1095 } 1096 1097 if (bfnew != NULL) { 1098 bfnew->bf_links[bfnew->bf_nlinks++] = blp; 1099 if (drop_ref) 1100 drop_ref = B_FALSE; 1101 else 1102 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1103 1104 if (bfnew != bfp) { 1105 /* local addresses are not subject to table limits */ 1106 avl_insert(&bip->bi_fwd, bfnew, idx); 1107 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR); 1108 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */ 1109 } 1110 } 1111 rw_exit(&bip->bi_rwlock); 1112 1113 no_new_addr: 1114 /* 1115 * If we found an existing entry and we replaced it with a new one, 1116 * then drop the table reference from the old one. We removed it from 1117 * the AVL tree above. 1118 */ 1119 if (bfnew != NULL && bfp != NULL && bfnew != bfp) 1120 fwd_unref(bfp); 1121 1122 /* Account for removed entry. */ 1123 if (drop_ref) 1124 link_unref(blp); 1125 } 1126 1127 static void 1128 bridge_new_unicst(bridge_link_t *blp) 1129 { 1130 uint8_t new_mac[ETHERADDRL]; 1131 1132 mac_unicast_primary_get(blp->bl_mh, new_mac); 1133 fwd_update_local(blp, blp->bl_local_mac, new_mac); 1134 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL); 1135 } 1136 1137 /* 1138 * We must shut down a link prior to freeing it, and doing that requires 1139 * blocking to wait for running MAC threads while holding a reference. This is 1140 * run from a taskq to accomplish proper link shutdown followed by reference 1141 * drop. 1142 */ 1143 static void 1144 link_shutdown(void *arg) 1145 { 1146 bridge_link_t *blp = arg; 1147 mac_handle_t mh = blp->bl_mh; 1148 bridge_inst_t *bip; 1149 bridge_fwd_t *bfp, *bfnext; 1150 avl_tree_t fwd_scavenge; 1151 int i; 1152 1153 /* 1154 * This link is being destroyed. Notify TRILL now that it's no longer 1155 * possible to send packets. Data packets may still arrive until TRILL 1156 * calls bridge_trill_lnunref. 1157 */ 1158 if (blp->bl_trilldata != NULL) 1159 trill_lndstr_fn(blp->bl_trilldata, blp); 1160 1161 if (blp->bl_flags & BLF_PROM_ADDED) 1162 (void) mac_promisc_remove(blp->bl_mphp); 1163 1164 if (blp->bl_flags & BLF_SET_BRIDGE) 1165 mac_bridge_clear(mh, (mac_handle_t)blp); 1166 1167 if (blp->bl_flags & BLF_MARGIN_ADDED) { 1168 (void) mac_notify_remove(blp->bl_mnh, B_TRUE); 1169 (void) mac_margin_remove(mh, blp->bl_margin); 1170 } 1171 1172 /* Tell the clients the real link state when we leave */ 1173 mac_link_redo(blp->bl_mh, 1174 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE)); 1175 1176 /* Destroy all of the forwarding entries related to this link */ 1177 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1178 offsetof(bridge_fwd_t, bf_node)); 1179 bip = blp->bl_inst; 1180 rw_enter(&bip->bi_rwlock, RW_WRITER); 1181 bfnext = avl_first(&bip->bi_fwd); 1182 while ((bfp = bfnext) != NULL) { 1183 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1184 for (i = 0; i < bfp->bf_nlinks; i++) { 1185 if (bfp->bf_links[i] == blp) 1186 break; 1187 } 1188 if (i >= bfp->bf_nlinks) 1189 continue; 1190 if (bfp->bf_nlinks > 1) { 1191 /* note that this can't be the last reference */ 1192 link_unref(blp); 1193 bfp->bf_nlinks--; 1194 for (; i < bfp->bf_nlinks; i++) 1195 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1196 } else { 1197 ASSERT(bfp->bf_flags & BFF_INTREE); 1198 avl_remove(&bip->bi_fwd, bfp); 1199 bfp->bf_flags &= ~BFF_INTREE; 1200 avl_add(&fwd_scavenge, bfp); 1201 } 1202 } 1203 rw_exit(&bip->bi_rwlock); 1204 bfnext = avl_first(&fwd_scavenge); 1205 while ((bfp = bfnext) != NULL) { 1206 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1207 avl_remove(&fwd_scavenge, bfp); 1208 fwd_unref(bfp); 1209 } 1210 avl_destroy(&fwd_scavenge); 1211 1212 if (blp->bl_flags & BLF_CLIENT_OPEN) 1213 mac_client_close(blp->bl_mch, 0); 1214 1215 mac_close(mh); 1216 1217 /* 1218 * We are now completely removed from the active list, so drop the 1219 * reference (see bridge_add_link). 1220 */ 1221 link_unref(blp); 1222 } 1223 1224 static void 1225 shutdown_inst(bridge_inst_t *bip) 1226 { 1227 bridge_link_t *blp, *blnext; 1228 bridge_fwd_t *bfp; 1229 1230 mutex_enter(&inst_lock); 1231 if (bip->bi_flags & BIF_SHUTDOWN) { 1232 mutex_exit(&inst_lock); 1233 return; 1234 } 1235 1236 /* 1237 * Once on the inst_list, the bridge instance must not leave that list 1238 * without having the shutdown flag set first. When the shutdown flag 1239 * is set, we own the list reference, so we must drop it before 1240 * returning. 1241 */ 1242 bip->bi_flags |= BIF_SHUTDOWN; 1243 mutex_exit(&inst_lock); 1244 1245 bip->bi_control = NULL; 1246 1247 rw_enter(&bip->bi_rwlock, RW_READER); 1248 blnext = list_head(&bip->bi_links); 1249 while ((blp = blnext) != NULL) { 1250 blnext = list_next(&bip->bi_links, blp); 1251 if (!(blp->bl_flags & BLF_DELETED)) { 1252 blp->bl_flags |= BLF_DELETED; 1253 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 1254 blp, DDI_SLEEP); 1255 } 1256 } 1257 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) { 1258 atomic_inc_uint(&bfp->bf_refs); 1259 rw_exit(&bip->bi_rwlock); 1260 fwd_delete(bfp); 1261 fwd_unref(bfp); 1262 rw_enter(&bip->bi_rwlock, RW_READER); 1263 } 1264 rw_exit(&bip->bi_rwlock); 1265 1266 /* 1267 * This bridge is being destroyed. Notify TRILL once all of the 1268 * links are all gone. 1269 */ 1270 mutex_enter(&inst_lock); 1271 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links)) 1272 cv_wait(&bip->bi_linkwait, &inst_lock); 1273 mutex_exit(&inst_lock); 1274 if (bip->bi_trilldata != NULL) 1275 trill_brdstr_fn(bip->bi_trilldata, bip); 1276 1277 bridge_unref(bip); 1278 } 1279 1280 /* 1281 * This is called once by the TRILL module when it starts up. It just sets the 1282 * global TRILL callback function pointers -- data transmit/receive and bridge 1283 * and link destroy notification. There's only one TRILL module, so only one 1284 * registration is needed. 1285 * 1286 * TRILL should call this function with NULL pointers before unloading. It 1287 * must not do so before dropping all references to bridges and links. We 1288 * assert that this is true on debug builds. 1289 */ 1290 void 1291 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn, 1292 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn) 1293 { 1294 #ifdef DEBUG 1295 if (recv_fn == NULL && trill_recv_fn != NULL) { 1296 bridge_inst_t *bip; 1297 bridge_link_t *blp; 1298 1299 mutex_enter(&inst_lock); 1300 for (bip = list_head(&inst_list); bip != NULL; 1301 bip = list_next(&inst_list, bip)) { 1302 ASSERT(bip->bi_trilldata == NULL); 1303 rw_enter(&bip->bi_rwlock, RW_READER); 1304 for (blp = list_head(&bip->bi_links); blp != NULL; 1305 blp = list_next(&bip->bi_links, blp)) { 1306 ASSERT(blp->bl_trilldata == NULL); 1307 } 1308 rw_exit(&bip->bi_rwlock); 1309 } 1310 mutex_exit(&inst_lock); 1311 } 1312 #endif 1313 trill_recv_fn = recv_fn; 1314 trill_encap_fn = encap_fn; 1315 trill_brdstr_fn = brdstr_fn; 1316 trill_lndstr_fn = lndstr_fn; 1317 } 1318 1319 /* 1320 * This registers the TRILL instance pointer with a bridge. Before this 1321 * pointer is set, the forwarding, TRILL receive, and bridge destructor 1322 * functions won't be called. 1323 * 1324 * TRILL holds a reference on a bridge with this call. It must free the 1325 * reference by calling the unregister function below. 1326 */ 1327 bridge_inst_t * 1328 bridge_trill_brref(const char *bname, void *ptr) 1329 { 1330 char bridge[MAXLINKNAMELEN]; 1331 bridge_inst_t *bip; 1332 1333 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname); 1334 bip = bridge_find_name(bridge); 1335 if (bip != NULL) { 1336 ASSERT(bip->bi_trilldata == NULL && ptr != NULL); 1337 bip->bi_trilldata = ptr; 1338 } 1339 return (bip); 1340 } 1341 1342 void 1343 bridge_trill_brunref(bridge_inst_t *bip) 1344 { 1345 ASSERT(bip->bi_trilldata != NULL); 1346 bip->bi_trilldata = NULL; 1347 bridge_unref(bip); 1348 } 1349 1350 /* 1351 * TRILL calls this function when referencing a particular link on a bridge. 1352 * 1353 * It holds a reference on the link, so TRILL must clear out the reference when 1354 * it's done with the link (on unbinding). 1355 */ 1356 bridge_link_t * 1357 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr) 1358 { 1359 bridge_link_t *blp; 1360 1361 ASSERT(ptr != NULL); 1362 rw_enter(&bip->bi_rwlock, RW_READER); 1363 for (blp = list_head(&bip->bi_links); blp != NULL; 1364 blp = list_next(&bip->bi_links, blp)) { 1365 if (!(blp->bl_flags & BLF_DELETED) && 1366 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) { 1367 blp->bl_trilldata = ptr; 1368 blp->bl_flags &= ~BLF_TRILLACTIVE; 1369 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs)); 1370 atomic_inc_uint(&blp->bl_refs); 1371 break; 1372 } 1373 } 1374 rw_exit(&bip->bi_rwlock); 1375 return (blp); 1376 } 1377 1378 void 1379 bridge_trill_lnunref(bridge_link_t *blp) 1380 { 1381 mutex_enter(&blp->bl_trilllock); 1382 ASSERT(blp->bl_trilldata != NULL); 1383 blp->bl_trilldata = NULL; 1384 blp->bl_flags &= ~BLF_TRILLACTIVE; 1385 while (blp->bl_trillthreads > 0) 1386 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock); 1387 mutex_exit(&blp->bl_trilllock); 1388 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 1389 link_unref(blp); 1390 } 1391 1392 /* 1393 * This periodic timer performs three functions: 1394 * 1. It scans the list of learned forwarding entries, and removes ones that 1395 * haven't been heard from in a while. The time limit is backed down if 1396 * we're above the configured table limit. 1397 * 2. It walks the links and decays away the bl_learns counter. 1398 * 3. It scans the observability node entries looking for ones that can be 1399 * freed up. 1400 */ 1401 /* ARGSUSED */ 1402 static void 1403 bridge_timer(void *arg) 1404 { 1405 bridge_inst_t *bip; 1406 bridge_fwd_t *bfp, *bfnext; 1407 bridge_mac_t *bmp, *bmnext; 1408 bridge_link_t *blp; 1409 int err; 1410 datalink_id_t tmpid; 1411 avl_tree_t fwd_scavenge; 1412 clock_t age_limit; 1413 uint32_t ldecay; 1414 1415 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1416 offsetof(bridge_fwd_t, bf_node)); 1417 mutex_enter(&inst_lock); 1418 for (bip = list_head(&inst_list); bip != NULL; 1419 bip = list_next(&inst_list, bip)) { 1420 if (bip->bi_flags & BIF_SHUTDOWN) 1421 continue; 1422 rw_enter(&bip->bi_rwlock, RW_WRITER); 1423 /* compute scaled maximum age based on table limit */ 1424 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax) 1425 bip->bi_tshift++; 1426 else 1427 bip->bi_tshift = 0; 1428 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) { 1429 if (bip->bi_tshift != 0) 1430 bip->bi_tshift--; 1431 age_limit = 1; 1432 } 1433 bfnext = avl_first(&bip->bi_fwd); 1434 while ((bfp = bfnext) != NULL) { 1435 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1436 if (!(bfp->bf_flags & BFF_LOCALADDR) && 1437 (lbolt - bfp->bf_lastheard) > age_limit) { 1438 ASSERT(bfp->bf_flags & BFF_INTREE); 1439 avl_remove(&bip->bi_fwd, bfp); 1440 bfp->bf_flags &= ~BFF_INTREE; 1441 avl_add(&fwd_scavenge, bfp); 1442 } 1443 } 1444 for (blp = list_head(&bip->bi_links); blp != NULL; 1445 blp = list_next(&bip->bi_links, blp)) { 1446 ldecay = mac_get_ldecay(blp->bl_mh); 1447 if (ldecay >= blp->bl_learns) 1448 blp->bl_learns = 0; 1449 else 1450 atomic_add_int(&blp->bl_learns, -(int)ldecay); 1451 } 1452 rw_exit(&bip->bi_rwlock); 1453 bfnext = avl_first(&fwd_scavenge); 1454 while ((bfp = bfnext) != NULL) { 1455 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1456 avl_remove(&fwd_scavenge, bfp); 1457 KIINCR(bki_expire); 1458 fwd_unref(bfp); /* drop tree reference */ 1459 } 1460 } 1461 mutex_exit(&inst_lock); 1462 avl_destroy(&fwd_scavenge); 1463 1464 /* 1465 * Scan the bridge_mac_t entries and try to free up the ones that are 1466 * no longer active. This must be done by polling, as neither DLS nor 1467 * MAC provides a driver any sort of positive control over clients. 1468 */ 1469 rw_enter(&bmac_rwlock, RW_WRITER); 1470 bmnext = list_head(&bmac_list); 1471 while ((bmp = bmnext) != NULL) { 1472 bmnext = list_next(&bmac_list, bmp); 1473 1474 /* ignore active bridges */ 1475 if (bmp->bm_inst != NULL) 1476 continue; 1477 1478 if (bmp->bm_flags & BMF_DLS) { 1479 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE); 1480 ASSERT(err == 0 || err == EBUSY); 1481 if (err == 0) 1482 bmp->bm_flags &= ~BMF_DLS; 1483 } 1484 1485 if (!(bmp->bm_flags & BMF_DLS)) { 1486 err = mac_unregister(bmp->bm_mh); 1487 ASSERT(err == 0 || err == EBUSY); 1488 if (err == 0) { 1489 list_remove(&bmac_list, bmp); 1490 kmem_free(bmp, sizeof (*bmp)); 1491 } 1492 } 1493 } 1494 if (list_is_empty(&bmac_list)) { 1495 bridge_timerid = 0; 1496 } else { 1497 bridge_timerid = timeout(bridge_timer, NULL, 1498 bridge_scan_interval); 1499 } 1500 rw_exit(&bmac_rwlock); 1501 } 1502 1503 static int 1504 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1505 { 1506 bridge_stream_t *bsp; 1507 1508 if (rq->q_ptr != NULL) 1509 return (0); 1510 1511 if (sflag & MODOPEN) 1512 return (EINVAL); 1513 1514 /* 1515 * Check the minor node number being opened. This tells us which 1516 * bridge instance the user wants. 1517 */ 1518 if (getminor(*devp) != 0) { 1519 /* 1520 * This is a regular DLPI stream for snoop or the like. 1521 * Redirect it through DLD. 1522 */ 1523 rq->q_qinfo = &bridge_dld_rinit; 1524 OTHERQ(rq)->q_qinfo = &bridge_dld_winit; 1525 return (dld_open(rq, devp, oflag, sflag, credp)); 1526 } else { 1527 /* 1528 * Allocate the bridge control stream structure. 1529 */ 1530 if ((bsp = stream_alloc()) == NULL) 1531 return (ENOSR); 1532 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp; 1533 bsp->bs_wq = WR(rq); 1534 *devp = makedevice(getmajor(*devp), bsp->bs_minor); 1535 qprocson(rq); 1536 return (0); 1537 } 1538 } 1539 1540 /* 1541 * This is used only for bridge control streams. DLPI goes through dld 1542 * instead. 1543 */ 1544 static int 1545 bridge_close(queue_t *rq) 1546 { 1547 bridge_stream_t *bsp = rq->q_ptr; 1548 bridge_inst_t *bip; 1549 1550 /* 1551 * Wait for any stray taskq (add/delete link) entries related to this 1552 * stream to leave the system. 1553 */ 1554 mutex_enter(&stream_ref_lock); 1555 while (bsp->bs_taskq_cnt != 0) 1556 cv_wait(&stream_ref_cv, &stream_ref_lock); 1557 mutex_exit(&stream_ref_lock); 1558 1559 qprocsoff(rq); 1560 if ((bip = bsp->bs_inst) != NULL) 1561 shutdown_inst(bip); 1562 rq->q_ptr = WR(rq)->q_ptr = NULL; 1563 stream_free(bsp); 1564 if (bip != NULL) 1565 bridge_unref(bip); 1566 1567 return (0); 1568 } 1569 1570 static void 1571 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick, 1572 uint16_t vlanid) 1573 { 1574 bridge_inst_t *bip = blp->bl_inst; 1575 bridge_fwd_t *bfp, *bfpnew; 1576 int i; 1577 boolean_t replaced = B_FALSE; 1578 1579 /* Ignore multi-destination address used as source; it's nonsense. */ 1580 if (*saddr & 1) 1581 return; 1582 1583 /* 1584 * If the source is known, then check whether it belongs on this link. 1585 * If not, and this isn't a fixed local address, then we've detected a 1586 * move. If it's not known, learn it. 1587 */ 1588 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) { 1589 /* 1590 * If the packet has a fixed local source address, then there's 1591 * nothing we can learn. We must quit. If this was a received 1592 * packet, then the sender has stolen our address, but there's 1593 * nothing we can do. If it's a transmitted packet, then 1594 * that's the normal case. 1595 */ 1596 if (bfp->bf_flags & BFF_LOCALADDR) { 1597 fwd_unref(bfp); 1598 return; 1599 } 1600 1601 /* 1602 * Check if the link (and TRILL sender, if any) being used is 1603 * among the ones registered for this address. If so, then 1604 * this is information that we already know. 1605 */ 1606 if (bfp->bf_trill_nick == ingress_nick) { 1607 for (i = 0; i < bfp->bf_nlinks; i++) { 1608 if (bfp->bf_links[i] == blp) { 1609 bfp->bf_lastheard = lbolt; 1610 fwd_unref(bfp); 1611 return; 1612 } 1613 } 1614 } 1615 } 1616 1617 /* 1618 * Note that we intentionally "unlearn" things that appear to be under 1619 * attack on this link. The forwarding cache is a negative thing for 1620 * security -- it disables reachability as a performance optimization 1621 * -- so leaving out entries optimizes for success and defends against 1622 * the attack. Thus, the bare increment without a check in the delete 1623 * code above is right. (And it's ok if we skid over the limit a 1624 * little, so there's no syncronization needed on the test.) 1625 */ 1626 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) { 1627 if (bfp != NULL) { 1628 if (bfp->bf_vcnt == 0) 1629 fwd_delete(bfp); 1630 fwd_unref(bfp); 1631 } 1632 return; 1633 } 1634 1635 atomic_inc_uint(&blp->bl_learns); 1636 1637 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) { 1638 if (bfp != NULL) 1639 fwd_unref(bfp); 1640 return; 1641 } 1642 KIINCR(bki_count); 1643 1644 if (bfp != NULL) { 1645 /* 1646 * If this is a new destination for the same VLAN, then delete 1647 * so that we can update. If it's a different VLAN, then we're 1648 * not going to delete the original. Split off instead into an 1649 * IVL entry. 1650 */ 1651 if (bfp->bf_vlanid == vlanid) { 1652 /* save the count of IVL duplicates */ 1653 bfpnew->bf_vcnt = bfp->bf_vcnt; 1654 1655 /* entry deletes count as learning events */ 1656 atomic_inc_uint(&blp->bl_learns); 1657 1658 /* destroy and create anew; node moved */ 1659 fwd_delete(bfp); 1660 replaced = B_TRUE; 1661 KIINCR(bki_moved); 1662 } else { 1663 bfp->bf_vcnt++; 1664 bfpnew->bf_flags |= BFF_VLANLOCAL; 1665 } 1666 fwd_unref(bfp); 1667 } 1668 bfpnew->bf_links[0] = blp; 1669 bfpnew->bf_nlinks = 1; 1670 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1671 if (!fwd_insert(bip, bfpnew)) 1672 fwd_free(bfpnew); 1673 else if (!replaced) 1674 KIINCR(bki_source); 1675 } 1676 1677 /* 1678 * Process the VLAN headers for output on a given link. There are several 1679 * cases (noting that we don't map VLANs): 1680 * 1. The input packet is good as it is; either 1681 * a. It has no tag, and output has same PVID 1682 * b. It has a non-zero priority-only tag for PVID, and b_band is same 1683 * c. It has a tag with VLAN different from PVID, and b_band is same 1684 * 2. The tag must change: non-zero b_band is different from tag priority 1685 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero) 1686 * 4. The packet has no tag and needs one: 1687 * a. VLAN ID same as PVID, but b_band is non-zero 1688 * b. VLAN ID different from PVID 1689 * We exclude case 1 first, then modify the packet. Note that output packets 1690 * get a priority set by the mblk, not by the header, because QoS in bridging 1691 * requires priority recalculation at each node. 1692 * 1693 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present. 1694 */ 1695 static mblk_t * 1696 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid) 1697 { 1698 boolean_t source_has_tag = (tci != 0xFFFF); 1699 mblk_t *mpcopy; 1700 size_t mlen, minlen; 1701 struct ether_vlan_header *evh; 1702 int pri; 1703 1704 /* This helps centralize error handling in the caller. */ 1705 if (mp == NULL) 1706 return (mp); 1707 1708 /* No forwarded packet can have hardware checksum enabled */ 1709 DB_CKSUMFLAGS(mp) = 0; 1710 1711 /* Get the no-modification cases out of the way first */ 1712 if (!source_has_tag && vlanid == pvid) /* 1a */ 1713 return (mp); 1714 1715 pri = VLAN_PRI(tci); 1716 if (source_has_tag && mp->b_band == pri) { 1717 if (vlanid != pvid) /* 1c */ 1718 return (mp); 1719 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */ 1720 return (mp); 1721 } 1722 1723 /* 1724 * We now know that we must modify the packet. Prepare for that. Note 1725 * that if a tag is present, the caller has already done a pullup for 1726 * the VLAN header, so we're good to go. 1727 */ 1728 if (MBLKL(mp) < sizeof (struct ether_header)) { 1729 mpcopy = msgpullup(mp, sizeof (struct ether_header)); 1730 if (mpcopy == NULL) { 1731 freemsg(mp); 1732 return (NULL); 1733 } 1734 mp = mpcopy; 1735 } 1736 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) || 1737 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) { 1738 minlen = mlen = MBLKL(mp); 1739 if (!source_has_tag) 1740 minlen += VLAN_INCR; 1741 ASSERT(minlen >= sizeof (struct ether_vlan_header)); 1742 /* 1743 * We're willing to copy some data to avoid fragmentation, but 1744 * not a lot. 1745 */ 1746 if (minlen > 256) 1747 minlen = sizeof (struct ether_vlan_header); 1748 mpcopy = allocb(minlen, BPRI_MED); 1749 if (mpcopy == NULL) { 1750 freemsg(mp); 1751 return (NULL); 1752 } 1753 if (mlen <= minlen) { 1754 /* We toss the first mblk when we can. */ 1755 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen); 1756 mpcopy->b_wptr += mlen; 1757 mpcopy->b_cont = mp->b_cont; 1758 freeb(mp); 1759 } else { 1760 /* If not, then just copy what we need */ 1761 if (!source_has_tag) 1762 minlen = sizeof (struct ether_header); 1763 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen); 1764 mpcopy->b_wptr += minlen; 1765 mpcopy->b_cont = mp; 1766 mp->b_rptr += minlen; 1767 } 1768 mp = mpcopy; 1769 } 1770 1771 /* LINTED: pointer alignment */ 1772 evh = (struct ether_vlan_header *)mp->b_rptr; 1773 if (source_has_tag) { 1774 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */ 1775 evh->ether_tpid = evh->ether_type; 1776 mlen = MBLKL(mp); 1777 if (mlen > sizeof (struct ether_vlan_header)) 1778 ovbcopy(mp->b_rptr + 1779 sizeof (struct ether_vlan_header), 1780 mp->b_rptr + sizeof (struct ether_header), 1781 mlen - sizeof (struct ether_vlan_header)); 1782 mp->b_wptr -= VLAN_INCR; 1783 } else { /* 2 */ 1784 if (vlanid == pvid) 1785 vlanid = VLAN_ID_NONE; 1786 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1787 evh->ether_tci = htons(tci); 1788 } 1789 } else { 1790 /* case 4: no header present, but one is needed */ 1791 mlen = MBLKL(mp); 1792 if (mlen > sizeof (struct ether_header)) 1793 ovbcopy(mp->b_rptr + sizeof (struct ether_header), 1794 mp->b_rptr + sizeof (struct ether_vlan_header), 1795 mlen - sizeof (struct ether_header)); 1796 mp->b_wptr += VLAN_INCR; 1797 ASSERT(mp->b_wptr <= DB_LIM(mp)); 1798 if (vlanid == pvid) 1799 vlanid = VLAN_ID_NONE; 1800 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1801 evh->ether_type = evh->ether_tpid; 1802 evh->ether_tpid = htons(ETHERTYPE_VLAN); 1803 evh->ether_tci = htons(tci); 1804 } 1805 return (mp); 1806 } 1807 1808 /* Record VLAN information and strip header if requested . */ 1809 static void 1810 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr) 1811 { 1812 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 1813 struct ether_vlan_header *evhp; 1814 uint16_t ether_type; 1815 1816 /* LINTED: alignment */ 1817 evhp = (struct ether_vlan_header *)mp->b_rptr; 1818 hdr_info->mhi_istagged = B_TRUE; 1819 hdr_info->mhi_tci = ntohs(evhp->ether_tci); 1820 if (striphdr) { 1821 /* 1822 * For VLAN tagged frames update the ether_type 1823 * in hdr_info before stripping the header. 1824 */ 1825 ether_type = ntohs(evhp->ether_type); 1826 hdr_info->mhi_origsap = ether_type; 1827 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ? 1828 ether_type : DLS_SAP_LLC; 1829 mp->b_rptr = (uchar_t *)(evhp + 1); 1830 } 1831 } else { 1832 hdr_info->mhi_istagged = B_FALSE; 1833 hdr_info->mhi_tci = VLAN_ID_NONE; 1834 if (striphdr) 1835 mp->b_rptr += sizeof (struct ether_header); 1836 } 1837 } 1838 1839 /* 1840 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID. 1841 */ 1842 static boolean_t 1843 bridge_can_send(bridge_link_t *blp, uint16_t vlanid) 1844 { 1845 ASSERT(vlanid != VLAN_ID_NONE); 1846 if (blp->bl_flags & BLF_DELETED) 1847 return (B_FALSE); 1848 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING) 1849 return (B_FALSE); 1850 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid)); 1851 } 1852 1853 /* 1854 * This function scans the bridge forwarding tables in order to forward a given 1855 * packet. If the packet either doesn't need forwarding (the current link is 1856 * correct) or the current link needs a copy as well, then the packet is 1857 * returned to the caller. 1858 * 1859 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a 1860 * TRILL tunnel. If the destination points there, then drop instead. 1861 */ 1862 static mblk_t * 1863 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 1864 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit) 1865 { 1866 mblk_t *mpsend, *mpcopy; 1867 bridge_inst_t *bip = blp->bl_inst; 1868 bridge_link_t *blpsend, *blpnext; 1869 bridge_fwd_t *bfp; 1870 uint_t i; 1871 boolean_t selfseen = B_FALSE; 1872 void *tdp; 1873 const uint8_t *daddr = hdr_info->mhi_daddr; 1874 1875 /* 1876 * Check for the IEEE "reserved" multicast addresses. Messages sent to 1877 * these addresses are used for link-local control (STP and pause), and 1878 * are never forwarded or redirected. 1879 */ 1880 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 && 1881 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) { 1882 if (from_trill) { 1883 freemsg(mp); 1884 mp = NULL; 1885 } 1886 return (mp); 1887 } 1888 1889 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) { 1890 1891 /* 1892 * If trill indicates a destination for this node, then it's 1893 * clearly not intended for local delivery. We must tell TRILL 1894 * to encapsulate, as long as we didn't just decapsulate it. 1895 */ 1896 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) { 1897 /* 1898 * Error case: can't reencapsulate if the protocols are 1899 * working correctly. 1900 */ 1901 if (from_trill) { 1902 freemsg(mp); 1903 return (NULL); 1904 } 1905 mutex_enter(&blp->bl_trilllock); 1906 if ((tdp = blp->bl_trilldata) != NULL) { 1907 blp->bl_trillthreads++; 1908 mutex_exit(&blp->bl_trilllock); 1909 update_header(mp, hdr_info, B_FALSE); 1910 if (is_xmit) 1911 mp = mac_fix_cksum(mp); 1912 /* all trill data frames have Inner.VLAN */ 1913 mp = reform_vlan_header(mp, vlanid, tci, 0); 1914 if (mp == NULL) { 1915 KIINCR(bki_drops); 1916 fwd_unref(bfp); 1917 return (NULL); 1918 } 1919 trill_encap_fn(tdp, blp, hdr_info, mp, 1920 bfp->bf_trill_nick); 1921 mutex_enter(&blp->bl_trilllock); 1922 if (--blp->bl_trillthreads == 0 && 1923 blp->bl_trilldata == NULL) 1924 cv_broadcast(&blp->bl_trillwait); 1925 } 1926 mutex_exit(&blp->bl_trilllock); 1927 1928 /* if TRILL has been disabled, then kill this stray */ 1929 if (tdp == NULL) { 1930 freemsg(mp); 1931 fwd_delete(bfp); 1932 } 1933 fwd_unref(bfp); 1934 return (NULL); 1935 } 1936 1937 /* find first link we can send on */ 1938 for (i = 0; i < bfp->bf_nlinks; i++) { 1939 blpsend = bfp->bf_links[i]; 1940 if (blpsend == blp) 1941 selfseen = B_TRUE; 1942 else if (bridge_can_send(blpsend, vlanid)) 1943 break; 1944 } 1945 1946 while (i < bfp->bf_nlinks) { 1947 blpsend = bfp->bf_links[i]; 1948 for (i++; i < bfp->bf_nlinks; i++) { 1949 blpnext = bfp->bf_links[i]; 1950 if (blpnext == blp) 1951 selfseen = B_TRUE; 1952 else if (bridge_can_send(blpnext, vlanid)) 1953 break; 1954 } 1955 if (i == bfp->bf_nlinks && !selfseen) { 1956 mpsend = mp; 1957 mp = NULL; 1958 } else { 1959 mpsend = copymsg(mp); 1960 } 1961 1962 if (!from_trill && is_xmit) 1963 mpsend = mac_fix_cksum(mpsend); 1964 1965 mpsend = reform_vlan_header(mpsend, vlanid, tci, 1966 blpsend->bl_pvid); 1967 if (mpsend == NULL) { 1968 KIINCR(bki_drops); 1969 continue; 1970 } 1971 1972 KIINCR(bki_forwards); 1973 /* 1974 * No need to bump up the link reference count, as 1975 * the forwarding entry itself holds a reference to 1976 * the link. 1977 */ 1978 if (bfp->bf_flags & BFF_LOCALADDR) { 1979 mac_rx_common(blpsend->bl_mh, NULL, mpsend); 1980 } else { 1981 KLPINCR(blpsend, bkl_xmit); 1982 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, 1983 mpsend); 1984 freemsg(mpsend); 1985 } 1986 } 1987 /* 1988 * Handle a special case: if we're transmitting to the original 1989 * link, then check whether the localaddr flag is set. If it 1990 * is, then receive instead. This doesn't happen with ordinary 1991 * bridging, but does happen often with TRILL decapsulation. 1992 */ 1993 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) { 1994 mac_rx_common(blp->bl_mh, NULL, mp); 1995 mp = NULL; 1996 } 1997 fwd_unref(bfp); 1998 } else { 1999 /* 2000 * TRILL has two cases to handle. If the packet is off the 2001 * wire (not from TRILL), then we need to send up into the 2002 * TRILL module to have the distribution tree computed. If the 2003 * packet is from TRILL (decapsulated), then we're part of the 2004 * distribution tree, and we need to copy the packet on member 2005 * interfaces. 2006 * 2007 * Thus, the from TRILL case is identical to the STP case. 2008 */ 2009 if (!from_trill && blp->bl_trilldata != NULL) { 2010 mutex_enter(&blp->bl_trilllock); 2011 if ((tdp = blp->bl_trilldata) != NULL) { 2012 blp->bl_trillthreads++; 2013 mutex_exit(&blp->bl_trilllock); 2014 if ((mpsend = copymsg(mp)) != NULL) { 2015 update_header(mpsend, 2016 hdr_info, B_FALSE); 2017 /* 2018 * all trill data frames have 2019 * Inner.VLAN 2020 */ 2021 mpsend = reform_vlan_header(mpsend, 2022 vlanid, tci, 0); 2023 if (mpsend == NULL) { 2024 KIINCR(bki_drops); 2025 } else { 2026 trill_encap_fn(tdp, blp, 2027 hdr_info, mpsend, 2028 RBRIDGE_NICKNAME_NONE); 2029 } 2030 } 2031 mutex_enter(&blp->bl_trilllock); 2032 if (--blp->bl_trillthreads == 0 && 2033 blp->bl_trilldata == NULL) 2034 cv_broadcast(&blp->bl_trillwait); 2035 } 2036 mutex_exit(&blp->bl_trilllock); 2037 } 2038 2039 /* 2040 * This is an unknown destination, so flood. 2041 */ 2042 rw_enter(&bip->bi_rwlock, RW_READER); 2043 for (blpnext = list_head(&bip->bi_links); blpnext != NULL; 2044 blpnext = list_next(&bip->bi_links, blpnext)) { 2045 if (blpnext == blp) 2046 selfseen = B_TRUE; 2047 else if (bridge_can_send(blpnext, vlanid)) 2048 break; 2049 } 2050 if (blpnext != NULL) 2051 atomic_inc_uint(&blpnext->bl_refs); 2052 rw_exit(&bip->bi_rwlock); 2053 while ((blpsend = blpnext) != NULL) { 2054 rw_enter(&bip->bi_rwlock, RW_READER); 2055 for (blpnext = list_next(&bip->bi_links, blpsend); 2056 blpnext != NULL; 2057 blpnext = list_next(&bip->bi_links, blpnext)) { 2058 if (blpnext == blp) 2059 selfseen = B_TRUE; 2060 else if (bridge_can_send(blpnext, vlanid)) 2061 break; 2062 } 2063 if (blpnext != NULL) 2064 atomic_inc_uint(&blpnext->bl_refs); 2065 rw_exit(&bip->bi_rwlock); 2066 if (blpnext == NULL && !selfseen) { 2067 mpsend = mp; 2068 mp = NULL; 2069 } else { 2070 mpsend = copymsg(mp); 2071 } 2072 2073 if (!from_trill && is_xmit) 2074 mpsend = mac_fix_cksum(mpsend); 2075 2076 mpsend = reform_vlan_header(mpsend, vlanid, tci, 2077 blpsend->bl_pvid); 2078 if (mpsend == NULL) { 2079 KIINCR(bki_drops); 2080 continue; 2081 } 2082 2083 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST) 2084 KIINCR(bki_unknown); 2085 else 2086 KIINCR(bki_mbcast); 2087 KLPINCR(blpsend, bkl_xmit); 2088 if ((mpcopy = copymsg(mpsend)) != NULL) 2089 mac_rx_common(blpsend->bl_mh, NULL, mpcopy); 2090 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend); 2091 freemsg(mpsend); 2092 link_unref(blpsend); 2093 } 2094 } 2095 2096 /* 2097 * At this point, if np is non-NULL, it means that the caller needs to 2098 * continue on the selected link. 2099 */ 2100 return (mp); 2101 } 2102 2103 /* 2104 * Extract and validate the VLAN information for a given packet. This checks 2105 * conformance with the rules for use of the PVID on the link, and for the 2106 * allowed (configured) VLAN set. 2107 * 2108 * Returns B_TRUE if the packet passes, B_FALSE if it fails. 2109 */ 2110 static boolean_t 2111 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 2112 uint16_t *vlanidp, uint16_t *tcip) 2113 { 2114 uint16_t tci, vlanid; 2115 2116 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 2117 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci); 2118 ptrdiff_t mlen; 2119 2120 /* 2121 * Extract the VLAN ID information, regardless of alignment, 2122 * and without a pullup. This isn't attractive, but we do this 2123 * to avoid having to deal with the pointers stashed in 2124 * hdr_info moving around or having the caller deal with a new 2125 * mblk_t pointer. 2126 */ 2127 while (mp != NULL) { 2128 mlen = MBLKL(mp); 2129 if (mlen > tpos && mlen > 0) 2130 break; 2131 tpos -= mlen; 2132 mp = mp->b_cont; 2133 } 2134 if (mp == NULL) 2135 return (B_FALSE); 2136 tci = mp->b_rptr[tpos] << 8; 2137 if (++tpos >= mlen) { 2138 do { 2139 mp = mp->b_cont; 2140 } while (mp != NULL && MBLKL(mp) == 0); 2141 if (mp == NULL) 2142 return (B_FALSE); 2143 tpos = 0; 2144 } 2145 tci |= mp->b_rptr[tpos]; 2146 2147 vlanid = VLAN_ID(tci); 2148 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX) 2149 return (B_FALSE); 2150 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid) 2151 goto input_no_vlan; 2152 if (!BRIDGE_VLAN_ISSET(blp, vlanid)) 2153 return (B_FALSE); 2154 } else { 2155 tci = 0xFFFF; 2156 input_no_vlan: 2157 /* 2158 * If PVID is set to zero, then untagged traffic is not 2159 * supported here. Do not learn or forward. 2160 */ 2161 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE) 2162 return (B_FALSE); 2163 } 2164 2165 *tcip = tci; 2166 *vlanidp = vlanid; 2167 return (B_TRUE); 2168 } 2169 2170 /* 2171 * Handle MAC notifications. 2172 */ 2173 static void 2174 bridge_notify_cb(void *arg, mac_notify_type_t note_type) 2175 { 2176 bridge_link_t *blp = arg; 2177 2178 switch (note_type) { 2179 case MAC_NOTE_UNICST: 2180 bridge_new_unicst(blp); 2181 break; 2182 2183 case MAC_NOTE_SDU_SIZE: { 2184 uint_t maxsdu; 2185 bridge_inst_t *bip = blp->bl_inst; 2186 bridge_mac_t *bmp = bip->bi_mac; 2187 boolean_t notify = B_FALSE; 2188 mblk_t *mlist = NULL; 2189 2190 mac_sdu_get(blp->bl_mh, NULL, &maxsdu); 2191 rw_enter(&bip->bi_rwlock, RW_READER); 2192 if (list_prev(&bip->bi_links, blp) == NULL && 2193 list_next(&bip->bi_links, blp) == NULL) { 2194 notify = (maxsdu != bmp->bm_maxsdu); 2195 bmp->bm_maxsdu = maxsdu; 2196 } 2197 blp->bl_maxsdu = maxsdu; 2198 if (maxsdu != bmp->bm_maxsdu) 2199 link_sdu_fail(blp, B_TRUE, &mlist); 2200 else if (notify) 2201 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2202 rw_exit(&bip->bi_rwlock); 2203 send_up_messages(bip, mlist); 2204 break; 2205 } 2206 } 2207 } 2208 2209 /* 2210 * This is called by the MAC layer. As with the transmit side, we're right in 2211 * the data path for all I/O on this port, so if we don't need to forward this 2212 * packet anywhere, we have to send it upwards via mac_rx_common. 2213 */ 2214 static void 2215 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext) 2216 { 2217 mblk_t *mp, *mpcopy; 2218 bridge_link_t *blp = (bridge_link_t *)mh; 2219 bridge_inst_t *bip = blp->bl_inst; 2220 bridge_mac_t *bmp = bip->bi_mac; 2221 mac_header_info_t hdr_info; 2222 uint16_t vlanid, tci; 2223 boolean_t trillmode = B_FALSE; 2224 2225 KIINCR(bki_recv); 2226 KLINCR(bkl_recv); 2227 2228 /* 2229 * Regardless of state, check for inbound TRILL packets when TRILL is 2230 * active. These are pulled out of band and sent for TRILL handling. 2231 */ 2232 if (blp->bl_trilldata != NULL) { 2233 void *tdp; 2234 mblk_t *newhead; 2235 mblk_t *tail = NULL; 2236 2237 mutex_enter(&blp->bl_trilllock); 2238 if ((tdp = blp->bl_trilldata) != NULL) { 2239 blp->bl_trillthreads++; 2240 mutex_exit(&blp->bl_trilllock); 2241 trillmode = B_TRUE; 2242 newhead = mpnext; 2243 while ((mp = mpnext) != NULL) { 2244 boolean_t raw_isis, bridge_group; 2245 2246 mpnext = mp->b_next; 2247 2248 /* 2249 * If the header isn't readable, then leave on 2250 * the list and continue. 2251 */ 2252 if (mac_header_info(blp->bl_mh, mp, 2253 &hdr_info) != 0) { 2254 tail = mp; 2255 continue; 2256 } 2257 2258 /* 2259 * The TRILL document specifies that, on 2260 * Ethernet alone, IS-IS packets arrive with 2261 * LLC rather than Ethertype, and using a 2262 * specific destination address. We must check 2263 * for that here. Also, we need to give BPDUs 2264 * to TRILL for processing. 2265 */ 2266 raw_isis = bridge_group = B_FALSE; 2267 if (hdr_info.mhi_dsttype == 2268 MAC_ADDRTYPE_MULTICAST) { 2269 if (memcmp(hdr_info.mhi_daddr, 2270 all_isis_rbridges, ETHERADDRL) == 0) 2271 raw_isis = B_TRUE; 2272 else if (memcmp(hdr_info.mhi_daddr, 2273 bridge_group_address, ETHERADDRL) == 2274 0) 2275 bridge_group = B_TRUE; 2276 } 2277 if (!raw_isis && !bridge_group && 2278 hdr_info.mhi_bindsap != ETHERTYPE_TRILL && 2279 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN || 2280 /* LINTED: alignment */ 2281 ((struct ether_vlan_header *)mp->b_rptr)-> 2282 ether_type != htons(ETHERTYPE_TRILL))) { 2283 tail = mp; 2284 continue; 2285 } 2286 2287 /* 2288 * We've got TRILL input. Remove from the list 2289 * and send up through the TRILL module. (Send 2290 * a copy through promiscuous receive just to 2291 * support snooping on TRILL. Order isn't 2292 * preserved strictly, but that doesn't matter 2293 * here.) 2294 */ 2295 if (tail != NULL) 2296 tail->b_next = mpnext; 2297 mp->b_next = NULL; 2298 if (mp == newhead) 2299 newhead = mpnext; 2300 mac_trill_snoop(blp->bl_mh, mp); 2301 update_header(mp, &hdr_info, B_TRUE); 2302 /* 2303 * On raw IS-IS and BPDU frames, we have to 2304 * make sure that the length is trimmed 2305 * properly. We use origsap in order to cope 2306 * with jumbograms for IS-IS. (Regular mac 2307 * can't.) 2308 */ 2309 if (raw_isis || bridge_group) { 2310 size_t msglen = msgdsize(mp); 2311 2312 if (msglen > hdr_info.mhi_origsap) { 2313 (void) adjmsg(mp, 2314 hdr_info.mhi_origsap - 2315 msglen); 2316 } else if (msglen < 2317 hdr_info.mhi_origsap) { 2318 freemsg(mp); 2319 continue; 2320 } 2321 } 2322 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info); 2323 } 2324 mpnext = newhead; 2325 mutex_enter(&blp->bl_trilllock); 2326 if (--blp->bl_trillthreads == 0 && 2327 blp->bl_trilldata == NULL) 2328 cv_broadcast(&blp->bl_trillwait); 2329 } 2330 mutex_exit(&blp->bl_trilllock); 2331 if (mpnext == NULL) 2332 return; 2333 } 2334 2335 /* 2336 * If this is a TRILL RBridge, then just check whether this link is 2337 * used at all for forwarding. If not, then we're done. 2338 */ 2339 if (trillmode) { 2340 if (!(blp->bl_flags & BLF_TRILLACTIVE) || 2341 (blp->bl_flags & BLF_SDUFAIL)) { 2342 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2343 return; 2344 } 2345 } else { 2346 /* 2347 * For regular (STP) bridges, if we're in blocking or listening 2348 * state, then do nothing. We don't learn or forward until 2349 * told to do so. 2350 */ 2351 if (blp->bl_state == BLS_BLOCKLISTEN) { 2352 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2353 return; 2354 } 2355 } 2356 2357 /* 2358 * Send a copy of the message chain up to the observability node users. 2359 * For TRILL, we must obey the VLAN AF rules, so we go packet-by- 2360 * packet. 2361 */ 2362 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2363 (bmp->bm_flags & BMF_STARTED) && 2364 (mp = copymsgchain(mpnext)) != NULL) { 2365 mac_rx(bmp->bm_mh, NULL, mp); 2366 } 2367 2368 /* 2369 * We must be in learning or forwarding state, or using TRILL on a link 2370 * with one or more VLANs active. For each packet in the list, process 2371 * the source address, and then attempt to forward. 2372 */ 2373 while ((mp = mpnext) != NULL) { 2374 mpnext = mp->b_next; 2375 mp->b_next = NULL; 2376 2377 /* 2378 * If we can't decode the header or if the header specifies a 2379 * multicast source address (impossible!), then don't bother 2380 * learning or forwarding, but go ahead and forward up the 2381 * stack for subsequent processing. 2382 */ 2383 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 || 2384 (hdr_info.mhi_saddr[0] & 1) != 0) { 2385 KIINCR(bki_drops); 2386 KLINCR(bkl_drops); 2387 mac_rx_common(blp->bl_mh, rsrc, mp); 2388 continue; 2389 } 2390 2391 /* 2392 * Extract and validate the VLAN ID for this packet. 2393 */ 2394 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2395 !BRIDGE_AF_ISSET(blp, vlanid)) { 2396 mac_rx_common(blp->bl_mh, rsrc, mp); 2397 continue; 2398 } 2399 2400 if (trillmode) { 2401 /* 2402 * Special test required by TRILL document: must 2403 * discard frames with outer address set to ESADI. 2404 */ 2405 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges, 2406 ETHERADDRL) == 0) { 2407 mac_rx_common(blp->bl_mh, rsrc, mp); 2408 continue; 2409 } 2410 2411 /* 2412 * If we're in TRILL mode, then the call above to get 2413 * the VLAN ID has also checked that we're the 2414 * appointed forwarder, so report that we're handling 2415 * this packet to any observability node users. 2416 */ 2417 if ((bmp->bm_flags & BMF_STARTED) && 2418 (mpcopy = copymsg(mp)) != NULL) 2419 mac_rx(bmp->bm_mh, NULL, mpcopy); 2420 } 2421 2422 /* 2423 * First process the source address and learn from it. For 2424 * TRILL, we learn only if we're the appointed forwarder. 2425 */ 2426 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2427 vlanid); 2428 2429 /* 2430 * Now check whether we're forwarding and look up the 2431 * destination. If we can forward, do so. 2432 */ 2433 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2434 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2435 B_FALSE, B_FALSE); 2436 } 2437 if (mp != NULL) 2438 mac_rx_common(blp->bl_mh, rsrc, mp); 2439 } 2440 } 2441 2442 2443 /* ARGSUSED */ 2444 static mblk_t * 2445 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) 2446 { 2447 bridge_link_t *blp = (bridge_link_t *)mh; 2448 bridge_inst_t *bip = blp->bl_inst; 2449 bridge_mac_t *bmp = bip->bi_mac; 2450 mac_header_info_t hdr_info; 2451 uint16_t vlanid, tci; 2452 mblk_t *mp, *mpcopy; 2453 boolean_t trillmode; 2454 2455 trillmode = blp->bl_trilldata != NULL; 2456 2457 /* 2458 * If we're using STP and we're in blocking or listening state, or if 2459 * we're using TRILL and no VLANs are active, then behave as though the 2460 * bridge isn't here at all, and send on the local link alone. 2461 */ 2462 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) || 2463 (trillmode && 2464 (!(blp->bl_flags & BLF_TRILLACTIVE) || 2465 (blp->bl_flags & BLF_SDUFAIL)))) { 2466 KIINCR(bki_sent); 2467 KLINCR(bkl_xmit); 2468 MAC_RING_TX(blp->bl_mh, rh, mpnext, mp); 2469 return (mp); 2470 } 2471 2472 /* 2473 * Send a copy of the message up to the observability node users. 2474 * TRILL needs to check on a packet-by-packet basis. 2475 */ 2476 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2477 (bmp->bm_flags & BMF_STARTED) && 2478 (mp = copymsgchain(mpnext)) != NULL) { 2479 mac_rx(bmp->bm_mh, NULL, mp); 2480 } 2481 2482 while ((mp = mpnext) != NULL) { 2483 mpnext = mp->b_next; 2484 mp->b_next = NULL; 2485 2486 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2487 freemsg(mp); 2488 continue; 2489 } 2490 2491 /* 2492 * Extract and validate the VLAN ID for this packet. 2493 */ 2494 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2495 !BRIDGE_AF_ISSET(blp, vlanid)) { 2496 freemsg(mp); 2497 continue; 2498 } 2499 2500 /* 2501 * If we're using TRILL, then we've now validated that we're 2502 * the forwarder for this VLAN, so go ahead and let 2503 * observability node users know about the packet. 2504 */ 2505 if (trillmode && (bmp->bm_flags & BMF_STARTED) && 2506 (mpcopy = copymsg(mp)) != NULL) { 2507 mac_rx(bmp->bm_mh, NULL, mpcopy); 2508 } 2509 2510 /* 2511 * We have to learn from our own transmitted packets, because 2512 * there may be a Solaris DLPI raw sender (who can specify his 2513 * own source address) using promiscuous mode for receive. The 2514 * mac layer information won't (and can't) tell us everything 2515 * we need to know. 2516 */ 2517 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2518 vlanid); 2519 2520 /* attempt forwarding */ 2521 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2522 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2523 B_FALSE, B_TRUE); 2524 } 2525 if (mp != NULL) { 2526 MAC_RING_TX(blp->bl_mh, rh, mp, mp); 2527 if (mp == NULL) { 2528 KIINCR(bki_sent); 2529 KLINCR(bkl_xmit); 2530 } 2531 } 2532 /* 2533 * If we get stuck, then stop. Don't let the user's output 2534 * packets get out of order. (More importantly: don't try to 2535 * bridge the same packet multiple times if flow control is 2536 * asserted.) 2537 */ 2538 if (mp != NULL) { 2539 mp->b_next = mpnext; 2540 break; 2541 } 2542 } 2543 return (mp); 2544 } 2545 2546 /* 2547 * This is called by TRILL when it decapsulates an packet, and we must forward 2548 * locally. On failure, we just drop. 2549 * 2550 * Note that the ingress_nick reported by TRILL must not represent this local 2551 * node. 2552 */ 2553 void 2554 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick) 2555 { 2556 mac_header_info_t hdr_info; 2557 uint16_t vlanid, tci; 2558 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2559 mblk_t *mpcopy; 2560 2561 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2562 freemsg(mp); 2563 return; 2564 } 2565 2566 /* Extract VLAN ID for this packet. */ 2567 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) { 2568 struct ether_vlan_header *evhp; 2569 2570 /* LINTED: alignment */ 2571 evhp = (struct ether_vlan_header *)mp->b_rptr; 2572 tci = ntohs(evhp->ether_tci); 2573 vlanid = VLAN_ID(tci); 2574 } else { 2575 /* Inner VLAN headers are required in TRILL data packets */ 2576 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *, 2577 blp, mblk_t *, mp, uint16_t, ingress_nick); 2578 freemsg(mp); 2579 return; 2580 } 2581 2582 /* Learn the location of this sender in the RBridge network */ 2583 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid); 2584 2585 /* attempt forwarding */ 2586 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE); 2587 if (mp != NULL) { 2588 if (bridge_can_send(blp, vlanid)) { 2589 /* Deliver a copy locally as well */ 2590 if ((mpcopy = copymsg(mp)) != NULL) 2591 mac_rx_common(blp->bl_mh, NULL, mpcopy); 2592 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2593 } 2594 if (mp == NULL) { 2595 KIINCR(bki_sent); 2596 KLINCR(bkl_xmit); 2597 } else { 2598 freemsg(mp); 2599 } 2600 } 2601 } 2602 2603 /* 2604 * This function is used by TRILL _only_ to transmit TRILL-encapsulated 2605 * packets. It sends on a single underlying link and does not bridge. 2606 */ 2607 mblk_t * 2608 bridge_trill_output(bridge_link_t *blp, mblk_t *mp) 2609 { 2610 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2611 2612 mac_trill_snoop(blp->bl_mh, mp); 2613 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2614 if (mp == NULL) { 2615 KIINCR(bki_sent); 2616 KLINCR(bkl_xmit); 2617 } 2618 return (mp); 2619 } 2620 2621 /* 2622 * Set the "appointed forwarder" flag array for this link. TRILL controls 2623 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for 2624 * the forwarder. 2625 */ 2626 void 2627 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr) 2628 { 2629 int i; 2630 uint_t newflags = 0; 2631 2632 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) { 2633 if ((blp->bl_afs[i] = arr[i]) != 0) 2634 newflags = BLF_TRILLACTIVE; 2635 } 2636 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags; 2637 } 2638 2639 void 2640 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill) 2641 { 2642 bridge_inst_t *bip = blp->bl_inst; 2643 bridge_fwd_t *bfp, *bfnext; 2644 avl_tree_t fwd_scavenge; 2645 int i; 2646 2647 _NOTE(ARGUNUSED(vlan)); 2648 2649 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 2650 offsetof(bridge_fwd_t, bf_node)); 2651 rw_enter(&bip->bi_rwlock, RW_WRITER); 2652 bfnext = avl_first(&bip->bi_fwd); 2653 while ((bfp = bfnext) != NULL) { 2654 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 2655 if (bfp->bf_flags & BFF_LOCALADDR) 2656 continue; 2657 if (dotrill) { 2658 /* port doesn't matter if we're flushing TRILL */ 2659 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE) 2660 continue; 2661 } else { 2662 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) 2663 continue; 2664 for (i = 0; i < bfp->bf_nlinks; i++) { 2665 if (bfp->bf_links[i] == blp) 2666 break; 2667 } 2668 if (i >= bfp->bf_nlinks) 2669 continue; 2670 } 2671 ASSERT(bfp->bf_flags & BFF_INTREE); 2672 avl_remove(&bip->bi_fwd, bfp); 2673 bfp->bf_flags &= ~BFF_INTREE; 2674 avl_add(&fwd_scavenge, bfp); 2675 } 2676 rw_exit(&bip->bi_rwlock); 2677 bfnext = avl_first(&fwd_scavenge); 2678 while ((bfp = bfnext) != NULL) { 2679 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 2680 avl_remove(&fwd_scavenge, bfp); 2681 fwd_unref(bfp); 2682 } 2683 avl_destroy(&fwd_scavenge); 2684 } 2685 2686 /* 2687 * Let the mac module take or drop a reference to a bridge link. When this is 2688 * called, the mac module is holding the mi_bridge_lock, so the link cannot be 2689 * in the process of entering or leaving a bridge. 2690 */ 2691 static void 2692 bridge_ref_cb(mac_handle_t mh, boolean_t hold) 2693 { 2694 bridge_link_t *blp = (bridge_link_t *)mh; 2695 2696 if (hold) 2697 atomic_inc_uint(&blp->bl_refs); 2698 else 2699 link_unref(blp); 2700 } 2701 2702 /* 2703 * Handle link state changes reported by the mac layer. This acts as a filter 2704 * for link state changes: if a link is reporting down, but there are other 2705 * links still up on the bridge, then the state is changed to "up." When the 2706 * last link goes down, all are marked down, and when the first link goes up, 2707 * all are marked up. (Recursion is avoided by the use of the "redo" function.) 2708 * 2709 * We treat unknown as equivalent to "up." 2710 */ 2711 static link_state_t 2712 bridge_ls_cb(mac_handle_t mh, link_state_t newls) 2713 { 2714 bridge_link_t *blp = (bridge_link_t *)mh; 2715 bridge_link_t *blcmp; 2716 bridge_inst_t *bip; 2717 bridge_mac_t *bmp; 2718 2719 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN || 2720 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) { 2721 blp->bl_linkstate = newls; 2722 return (newls); 2723 } 2724 2725 /* 2726 * Scan first to see if there are any other non-down links. If there 2727 * are, then we're done. Otherwise, if all others are down, then the 2728 * state of this link is the state of the bridge. 2729 */ 2730 bip = blp->bl_inst; 2731 rw_enter(&bip->bi_rwlock, RW_WRITER); 2732 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2733 blcmp = list_next(&bip->bi_links, blcmp)) { 2734 if (blcmp != blp && 2735 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 2736 blcmp->bl_linkstate != LINK_STATE_DOWN) 2737 break; 2738 } 2739 2740 if (blcmp != NULL) { 2741 /* 2742 * If there are other links that are considered up, then tell 2743 * the caller that the link is actually still up, regardless of 2744 * this link's underlying state. 2745 */ 2746 blp->bl_linkstate = newls; 2747 newls = LINK_STATE_UP; 2748 } else if (blp->bl_linkstate != newls) { 2749 /* 2750 * If we've found no other 'up' links, and this link has 2751 * changed state, then report the new state of the bridge to 2752 * all other clients. 2753 */ 2754 blp->bl_linkstate = newls; 2755 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2756 blcmp = list_next(&bip->bi_links, blcmp)) { 2757 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED)) 2758 mac_link_redo(blcmp->bl_mh, newls); 2759 } 2760 bmp = bip->bi_mac; 2761 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN) 2762 bmp->bm_linkstate = LINK_STATE_UP; 2763 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 2764 } 2765 rw_exit(&bip->bi_rwlock); 2766 return (newls); 2767 } 2768 2769 static void 2770 bridge_add_link(void *arg) 2771 { 2772 mblk_t *mp = arg; 2773 bridge_stream_t *bsp; 2774 bridge_inst_t *bip, *bipt; 2775 bridge_mac_t *bmp; 2776 datalink_id_t linkid; 2777 int err; 2778 mac_handle_t mh; 2779 uint_t maxsdu; 2780 bridge_link_t *blp = NULL, *blpt; 2781 const mac_info_t *mip; 2782 boolean_t macopen = B_FALSE; 2783 char linkname[MAXLINKNAMELEN]; 2784 char kstatname[KSTAT_STRLEN]; 2785 int i; 2786 link_state_t linkstate; 2787 mblk_t *mlist; 2788 2789 bsp = (bridge_stream_t *)mp->b_next; 2790 mp->b_next = NULL; 2791 bip = bsp->bs_inst; 2792 /* LINTED: alignment */ 2793 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2794 2795 /* 2796 * First make sure that there is no other bridge that has this link. 2797 * We don't want to overlap operations from two bridges; the MAC layer 2798 * supports only one bridge on a given MAC at a time. 2799 * 2800 * We rely on the fact that there's just one taskq thread for the 2801 * bridging module: once we've checked for a duplicate, we can drop the 2802 * lock, because no other thread could possibly be adding another link 2803 * until we're done. 2804 */ 2805 mutex_enter(&inst_lock); 2806 for (bipt = list_head(&inst_list); bipt != NULL; 2807 bipt = list_next(&inst_list, bipt)) { 2808 rw_enter(&bipt->bi_rwlock, RW_READER); 2809 for (blpt = list_head(&bipt->bi_links); blpt != NULL; 2810 blpt = list_next(&bipt->bi_links, blpt)) { 2811 if (linkid == blpt->bl_linkid) 2812 break; 2813 } 2814 rw_exit(&bipt->bi_rwlock); 2815 if (blpt != NULL) 2816 break; 2817 } 2818 mutex_exit(&inst_lock); 2819 if (bipt != NULL) { 2820 err = EBUSY; 2821 goto fail; 2822 } 2823 2824 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 2825 goto fail; 2826 macopen = B_TRUE; 2827 2828 /* we bridge only Ethernet */ 2829 mip = mac_info(mh); 2830 if (mip->mi_media != DL_ETHER) { 2831 err = ENOTSUP; 2832 goto fail; 2833 } 2834 2835 /* 2836 * Get the current maximum SDU on this interface. If there are other 2837 * links on the bridge, then this one must match, or it errors out. 2838 * Otherwise, the first link becomes the standard for the new bridge. 2839 */ 2840 mac_sdu_get(mh, NULL, &maxsdu); 2841 bmp = bip->bi_mac; 2842 if (list_is_empty(&bip->bi_links)) { 2843 bmp->bm_maxsdu = maxsdu; 2844 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2845 } 2846 2847 /* figure the kstat name; also used as the mac client name */ 2848 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t); 2849 if (i < 0 || i >= MAXLINKNAMELEN) 2850 i = MAXLINKNAMELEN - 1; 2851 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i); 2852 linkname[i] = '\0'; 2853 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name, 2854 linkname); 2855 2856 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) { 2857 err = ENOMEM; 2858 goto fail; 2859 } 2860 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 2861 if (blp->bl_lfailmp == NULL) { 2862 kmem_free(blp, sizeof (*blp)); 2863 err = ENOMEM; 2864 goto fail; 2865 } 2866 2867 atomic_inc_uint(&bip->bi_refs); 2868 blp->bl_inst = bip; 2869 blp->bl_mh = mh; 2870 blp->bl_linkid = linkid; 2871 blp->bl_maxsdu = maxsdu; 2872 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL); 2873 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL); 2874 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 2875 2876 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0); 2877 if (err != 0) 2878 goto fail; 2879 blp->bl_flags |= BLF_CLIENT_OPEN; 2880 2881 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE); 2882 if (err != 0) 2883 goto fail; 2884 blp->bl_flags |= BLF_MARGIN_ADDED; 2885 2886 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp); 2887 2888 err = mac_bridge_set(mh, (mac_handle_t)blp); 2889 if (err != 0) 2890 goto fail; 2891 blp->bl_flags |= BLF_SET_BRIDGE; 2892 2893 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL, 2894 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); 2895 if (err != 0) 2896 goto fail; 2897 blp->bl_flags |= BLF_PROM_ADDED; 2898 2899 bridge_new_unicst(blp); 2900 2901 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats, 2902 link_kstats_list, Dim(link_kstats_list), kstatname); 2903 2904 /* 2905 * The link holds a reference to the bridge instance, so that the 2906 * instance can't go away before the link is freed. The insertion into 2907 * bi_links holds a reference on the link. When marking as removed 2908 * from bi_links (BLF_DELETED), drop the reference on the link. When 2909 * freeing the link, drop the reference on the instance. 2910 */ 2911 rw_enter(&bip->bi_rwlock, RW_WRITER); 2912 list_insert_tail(&bip->bi_links, blp); 2913 atomic_inc_uint(&blp->bl_refs); 2914 2915 /* 2916 * If the new link is no good on this bridge, then let the daemon know 2917 * about the problem. 2918 */ 2919 mlist = NULL; 2920 if (maxsdu != bmp->bm_maxsdu) 2921 link_sdu_fail(blp, B_TRUE, &mlist); 2922 rw_exit(&bip->bi_rwlock); 2923 send_up_messages(bip, mlist); 2924 2925 /* 2926 * Trigger a link state update so that if this link is the first one 2927 * "up" in the bridge, then we notify everyone. This triggers a trip 2928 * through bridge_ls_cb. 2929 */ 2930 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE); 2931 blp->bl_linkstate = LINK_STATE_DOWN; 2932 mac_link_update(mh, linkstate); 2933 2934 /* 2935 * We now need to report back to the stream that invoked us, and then 2936 * drop the reference on the stream that we're holding. 2937 */ 2938 miocack(bsp->bs_wq, mp, 0, 0); 2939 stream_unref(bsp); 2940 return; 2941 2942 fail: 2943 if (blp == NULL) { 2944 if (macopen) 2945 mac_close(mh); 2946 } else { 2947 link_shutdown(blp); 2948 link_free(blp); 2949 } 2950 miocnak(bsp->bs_wq, mp, 0, err); 2951 stream_unref(bsp); 2952 } 2953 2954 static void 2955 bridge_rem_link(void *arg) 2956 { 2957 mblk_t *mp = arg; 2958 bridge_stream_t *bsp; 2959 bridge_inst_t *bip; 2960 bridge_mac_t *bmp; 2961 datalink_id_t linkid; 2962 bridge_link_t *blp, *blsave; 2963 boolean_t found; 2964 mblk_t *mlist; 2965 2966 bsp = (bridge_stream_t *)mp->b_next; 2967 mp->b_next = NULL; 2968 bip = bsp->bs_inst; 2969 /* LINTED: alignment */ 2970 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2971 2972 /* 2973 * We become reader here so that we can loop over the other links and 2974 * deliver link up/down notification. 2975 */ 2976 rw_enter(&bip->bi_rwlock, RW_READER); 2977 found = B_FALSE; 2978 for (blp = list_head(&bip->bi_links); blp != NULL; 2979 blp = list_next(&bip->bi_links, blp)) { 2980 if (blp->bl_linkid == linkid && 2981 !(blp->bl_flags & BLF_DELETED)) { 2982 blp->bl_flags |= BLF_DELETED; 2983 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 2984 blp, DDI_SLEEP); 2985 found = B_TRUE; 2986 break; 2987 } 2988 } 2989 2990 /* 2991 * Check if this link is up and the remainder of the links are all 2992 * down. 2993 */ 2994 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) { 2995 for (blp = list_head(&bip->bi_links); blp != NULL; 2996 blp = list_next(&bip->bi_links, blp)) { 2997 if (blp->bl_linkstate != LINK_STATE_DOWN && 2998 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) 2999 break; 3000 } 3001 if (blp == NULL) { 3002 for (blp = list_head(&bip->bi_links); blp != NULL; 3003 blp = list_next(&bip->bi_links, blp)) { 3004 if (!(blp->bl_flags & BLF_DELETED)) 3005 mac_link_redo(blp->bl_mh, 3006 LINK_STATE_DOWN); 3007 } 3008 bmp = bip->bi_mac; 3009 bmp->bm_linkstate = LINK_STATE_DOWN; 3010 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 3011 } 3012 } 3013 3014 /* 3015 * Check if there's just one working link left on the bridge. If so, 3016 * then that link is now authoritative for bridge MTU. 3017 */ 3018 blsave = NULL; 3019 for (blp = list_head(&bip->bi_links); blp != NULL; 3020 blp = list_next(&bip->bi_links, blp)) { 3021 if (!(blp->bl_flags & BLF_DELETED)) { 3022 if (blsave == NULL) 3023 blsave = blp; 3024 else 3025 break; 3026 } 3027 } 3028 mlist = NULL; 3029 bmp = bip->bi_mac; 3030 if (blsave != NULL && blp == NULL && 3031 blsave->bl_maxsdu != bmp->bm_maxsdu) { 3032 bmp->bm_maxsdu = blsave->bl_maxsdu; 3033 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu); 3034 link_sdu_fail(blsave, B_FALSE, &mlist); 3035 } 3036 rw_exit(&bip->bi_rwlock); 3037 send_up_messages(bip, mlist); 3038 3039 if (found) 3040 miocack(bsp->bs_wq, mp, 0, 0); 3041 else 3042 miocnak(bsp->bs_wq, mp, 0, ENOENT); 3043 stream_unref(bsp); 3044 } 3045 3046 /* 3047 * This function intentionally returns with bi_rwlock held; it is intended for 3048 * quick checks and updates. 3049 */ 3050 static bridge_link_t * 3051 enter_link(bridge_inst_t *bip, datalink_id_t linkid) 3052 { 3053 bridge_link_t *blp; 3054 3055 rw_enter(&bip->bi_rwlock, RW_READER); 3056 for (blp = list_head(&bip->bi_links); blp != NULL; 3057 blp = list_next(&bip->bi_links, blp)) { 3058 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED)) 3059 break; 3060 } 3061 return (blp); 3062 } 3063 3064 static void 3065 bridge_ioctl(queue_t *wq, mblk_t *mp) 3066 { 3067 bridge_stream_t *bsp = wq->q_ptr; 3068 bridge_inst_t *bip; 3069 struct iocblk *iop; 3070 int rc = EINVAL; 3071 int len = 0; 3072 bridge_link_t *blp; 3073 cred_t *cr; 3074 3075 /* LINTED: alignment */ 3076 iop = (struct iocblk *)mp->b_rptr; 3077 3078 /* 3079 * For now, all of the bridge ioctls are privileged. 3080 */ 3081 if ((cr = msg_getcred(mp, NULL)) == NULL) 3082 cr = iop->ioc_cr; 3083 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) { 3084 miocnak(wq, mp, 0, EPERM); 3085 return; 3086 } 3087 3088 switch (iop->ioc_cmd) { 3089 case BRIOC_NEWBRIDGE: { 3090 bridge_newbridge_t *bnb; 3091 3092 if (bsp->bs_inst != NULL || 3093 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0) 3094 break; 3095 /* LINTED: alignment */ 3096 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr; 3097 bnb->bnb_name[MAXNAMELEN-1] = '\0'; 3098 if ((rc = bridge_create(bnb->bnb_linkid, 3099 bnb->bnb_name, &bip)) != 0) 3100 break; 3101 3102 rw_enter(&bip->bi_rwlock, RW_WRITER); 3103 if (bip->bi_control != NULL) { 3104 rw_exit(&bip->bi_rwlock); 3105 bridge_unref(bip); 3106 rc = EBUSY; 3107 } else { 3108 atomic_inc_uint(&bip->bi_refs); 3109 bsp->bs_inst = bip; /* stream holds reference */ 3110 bip->bi_control = bsp; 3111 rw_exit(&bip->bi_rwlock); 3112 rc = 0; 3113 } 3114 break; 3115 } 3116 3117 case BRIOC_ADDLINK: 3118 if ((bip = bsp->bs_inst) == NULL || 3119 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3120 break; 3121 /* 3122 * We cannot perform the action in this thread, because we're 3123 * not in process context, and we may already be holding 3124 * MAC-related locks. Place the request on taskq. 3125 */ 3126 mp->b_next = (mblk_t *)bsp; 3127 stream_ref(bsp); 3128 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp, 3129 DDI_SLEEP); 3130 return; 3131 3132 case BRIOC_REMLINK: 3133 if ((bip = bsp->bs_inst) == NULL || 3134 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3135 break; 3136 /* 3137 * We cannot perform the action in this thread, because we're 3138 * not in process context, and we may already be holding 3139 * MAC-related locks. Place the request on taskq. 3140 */ 3141 mp->b_next = (mblk_t *)bsp; 3142 stream_ref(bsp); 3143 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp, 3144 DDI_SLEEP); 3145 return; 3146 3147 case BRIOC_SETSTATE: { 3148 bridge_setstate_t *bss; 3149 3150 if ((bip = bsp->bs_inst) == NULL || 3151 (rc = miocpullup(mp, sizeof (*bss))) != 0) 3152 break; 3153 /* LINTED: alignment */ 3154 bss = (bridge_setstate_t *)mp->b_cont->b_rptr; 3155 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) { 3156 rc = ENOENT; 3157 } else { 3158 rc = 0; 3159 blp->bl_state = bss->bss_state; 3160 } 3161 rw_exit(&bip->bi_rwlock); 3162 break; 3163 } 3164 3165 case BRIOC_SETPVID: { 3166 bridge_setpvid_t *bsv; 3167 3168 if ((bip = bsp->bs_inst) == NULL || 3169 (rc = miocpullup(mp, sizeof (*bsv))) != 0) 3170 break; 3171 /* LINTED: alignment */ 3172 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr; 3173 if (bsv->bsv_vlan > VLAN_ID_MAX) 3174 break; 3175 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) { 3176 rc = ENOENT; 3177 } else if (blp->bl_pvid == bsv->bsv_vlan) { 3178 rc = 0; 3179 } else { 3180 rc = 0; 3181 BRIDGE_VLAN_CLR(blp, blp->bl_pvid); 3182 blp->bl_pvid = bsv->bsv_vlan; 3183 if (blp->bl_pvid != 0) 3184 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3185 } 3186 rw_exit(&bip->bi_rwlock); 3187 break; 3188 } 3189 3190 case BRIOC_VLANENAB: { 3191 bridge_vlanenab_t *bve; 3192 3193 if ((bip = bsp->bs_inst) == NULL || 3194 (rc = miocpullup(mp, sizeof (*bve))) != 0) 3195 break; 3196 /* LINTED: alignment */ 3197 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr; 3198 if (bve->bve_vlan > VLAN_ID_MAX) 3199 break; 3200 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) { 3201 rc = ENOENT; 3202 } else { 3203 rc = 0; 3204 /* special case: vlan 0 means "all" */ 3205 if (bve->bve_vlan == 0) { 3206 (void) memset(blp->bl_vlans, 3207 bve->bve_onoff ? ~0 : 0, 3208 sizeof (blp->bl_vlans)); 3209 BRIDGE_VLAN_CLR(blp, 0); 3210 if (blp->bl_pvid != 0) 3211 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3212 } else if (bve->bve_vlan == blp->bl_pvid) { 3213 rc = EINVAL; 3214 } else if (bve->bve_onoff) { 3215 BRIDGE_VLAN_SET(blp, bve->bve_vlan); 3216 } else { 3217 BRIDGE_VLAN_CLR(blp, bve->bve_vlan); 3218 } 3219 } 3220 rw_exit(&bip->bi_rwlock); 3221 break; 3222 } 3223 3224 case BRIOC_FLUSHFWD: { 3225 bridge_flushfwd_t *bff; 3226 bridge_fwd_t *bfp, *bfnext; 3227 avl_tree_t fwd_scavenge; 3228 int i; 3229 3230 if ((bip = bsp->bs_inst) == NULL || 3231 (rc = miocpullup(mp, sizeof (*bff))) != 0) 3232 break; 3233 /* LINTED: alignment */ 3234 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr; 3235 rw_enter(&bip->bi_rwlock, RW_WRITER); 3236 /* This case means "all" */ 3237 if (bff->bff_linkid == DATALINK_INVALID_LINKID) { 3238 blp = NULL; 3239 } else { 3240 for (blp = list_head(&bip->bi_links); blp != NULL; 3241 blp = list_next(&bip->bi_links, blp)) { 3242 if (blp->bl_linkid == bff->bff_linkid && 3243 !(blp->bl_flags & BLF_DELETED)) 3244 break; 3245 } 3246 if (blp == NULL) { 3247 rc = ENOENT; 3248 rw_exit(&bip->bi_rwlock); 3249 break; 3250 } 3251 } 3252 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 3253 offsetof(bridge_fwd_t, bf_node)); 3254 bfnext = avl_first(&bip->bi_fwd); 3255 while ((bfp = bfnext) != NULL) { 3256 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 3257 if (bfp->bf_flags & BFF_LOCALADDR) 3258 continue; 3259 if (blp != NULL) { 3260 for (i = 0; i < bfp->bf_maxlinks; i++) { 3261 if (bfp->bf_links[i] == blp) 3262 break; 3263 } 3264 /* 3265 * If the link is there and we're excluding, 3266 * then skip. If the link is not there and 3267 * we're doing only that link, then skip. 3268 */ 3269 if ((i < bfp->bf_maxlinks) == bff->bff_exclude) 3270 continue; 3271 } 3272 ASSERT(bfp->bf_flags & BFF_INTREE); 3273 avl_remove(&bip->bi_fwd, bfp); 3274 bfp->bf_flags &= ~BFF_INTREE; 3275 avl_add(&fwd_scavenge, bfp); 3276 } 3277 rw_exit(&bip->bi_rwlock); 3278 bfnext = avl_first(&fwd_scavenge); 3279 while ((bfp = bfnext) != NULL) { 3280 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 3281 avl_remove(&fwd_scavenge, bfp); 3282 fwd_unref(bfp); /* drop tree reference */ 3283 } 3284 avl_destroy(&fwd_scavenge); 3285 break; 3286 } 3287 3288 case BRIOC_TABLEMAX: 3289 if ((bip = bsp->bs_inst) == NULL || 3290 (rc = miocpullup(mp, sizeof (uint32_t))) != 0) 3291 break; 3292 /* LINTED: alignment */ 3293 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr; 3294 break; 3295 } 3296 3297 if (rc == 0) 3298 miocack(wq, mp, len, 0); 3299 else 3300 miocnak(wq, mp, 0, rc); 3301 } 3302 3303 static void 3304 bridge_wput(queue_t *wq, mblk_t *mp) 3305 { 3306 switch (DB_TYPE(mp)) { 3307 case M_IOCTL: 3308 bridge_ioctl(wq, mp); 3309 break; 3310 case M_FLUSH: 3311 if (*mp->b_rptr & FLUSHW) 3312 *mp->b_rptr &= ~FLUSHW; 3313 if (*mp->b_rptr & FLUSHR) 3314 qreply(wq, mp); 3315 else 3316 freemsg(mp); 3317 break; 3318 default: 3319 freemsg(mp); 3320 break; 3321 } 3322 } 3323 3324 /* 3325 * This function allocates the main data structures for the bridge driver and 3326 * connects us into devfs. 3327 */ 3328 static void 3329 bridge_inst_init(void) 3330 { 3331 bridge_scan_interval = 5 * drv_usectohz(1000000); 3332 bridge_fwd_age = 25 * drv_usectohz(1000000); 3333 3334 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL); 3335 list_create(&bmac_list, sizeof (bridge_mac_t), 3336 offsetof(bridge_mac_t, bm_node)); 3337 list_create(&inst_list, sizeof (bridge_inst_t), 3338 offsetof(bridge_inst_t, bi_node)); 3339 cv_init(&inst_cv, NULL, CV_DRIVER, NULL); 3340 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL); 3341 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL); 3342 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL); 3343 3344 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb, 3345 bridge_ls_cb); 3346 } 3347 3348 /* 3349 * This function disconnects from devfs and destroys all data structures in 3350 * preparation for unload. It's assumed that there are no active bridge 3351 * references left at this point. 3352 */ 3353 static void 3354 bridge_inst_fini(void) 3355 { 3356 mac_bridge_vectors(NULL, NULL, NULL, NULL); 3357 if (bridge_timerid != 0) 3358 (void) untimeout(bridge_timerid); 3359 rw_destroy(&bmac_rwlock); 3360 list_destroy(&bmac_list); 3361 list_destroy(&inst_list); 3362 cv_destroy(&inst_cv); 3363 mutex_destroy(&inst_lock); 3364 cv_destroy(&stream_ref_cv); 3365 mutex_destroy(&stream_ref_lock); 3366 } 3367 3368 /* 3369 * bridge_attach() 3370 * 3371 * Description: 3372 * Attach bridge driver to the system. 3373 */ 3374 static int 3375 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3376 { 3377 if (cmd != DDI_ATTACH) 3378 return (DDI_FAILURE); 3379 3380 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO, 3381 CLONE_DEV) == DDI_FAILURE) { 3382 return (DDI_FAILURE); 3383 } 3384 3385 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list, 3386 DLDIOCCNT(bridge_ioc_list)) != 0) { 3387 ddi_remove_minor_node(dip, BRIDGE_CTL); 3388 return (DDI_FAILURE); 3389 } 3390 3391 bridge_dev_info = dip; 3392 bridge_major = ddi_driver_major(dip); 3393 bridge_taskq = ddi_taskq_create(dip, "bridge", 1, TASKQ_DEFAULTPRI, 0); 3394 return (DDI_SUCCESS); 3395 } 3396 3397 /* 3398 * bridge_detach() 3399 * 3400 * Description: 3401 * Detach an interface to the system. 3402 */ 3403 static int 3404 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3405 { 3406 if (cmd != DDI_DETACH) 3407 return (DDI_FAILURE); 3408 3409 ddi_remove_minor_node(dip, NULL); 3410 ddi_taskq_destroy(bridge_taskq); 3411 bridge_dev_info = NULL; 3412 return (DDI_SUCCESS); 3413 } 3414 3415 /* 3416 * bridge_info() 3417 * 3418 * Description: 3419 * Translate "dev_t" to a pointer to the associated "dev_info_t". 3420 */ 3421 /* ARGSUSED */ 3422 static int 3423 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 3424 void **result) 3425 { 3426 int rc; 3427 3428 switch (infocmd) { 3429 case DDI_INFO_DEVT2DEVINFO: 3430 if (bridge_dev_info == NULL) { 3431 rc = DDI_FAILURE; 3432 } else { 3433 *result = (void *)bridge_dev_info; 3434 rc = DDI_SUCCESS; 3435 } 3436 break; 3437 case DDI_INFO_DEVT2INSTANCE: 3438 *result = NULL; 3439 rc = DDI_SUCCESS; 3440 break; 3441 default: 3442 rc = DDI_FAILURE; 3443 break; 3444 } 3445 return (rc); 3446 } 3447 3448 static struct module_info bridge_modinfo = { 3449 2105, /* mi_idnum */ 3450 "bridge", /* mi_idname */ 3451 0, /* mi_minpsz */ 3452 16384, /* mi_maxpsz */ 3453 65536, /* mi_hiwat */ 3454 128 /* mi_lowat */ 3455 }; 3456 3457 static struct qinit bridge_rinit = { 3458 NULL, /* qi_putp */ 3459 NULL, /* qi_srvp */ 3460 bridge_open, /* qi_qopen */ 3461 bridge_close, /* qi_qclose */ 3462 NULL, /* qi_qadmin */ 3463 &bridge_modinfo, /* qi_minfo */ 3464 NULL /* qi_mstat */ 3465 }; 3466 3467 static struct qinit bridge_winit = { 3468 (int (*)())bridge_wput, /* qi_putp */ 3469 NULL, /* qi_srvp */ 3470 NULL, /* qi_qopen */ 3471 NULL, /* qi_qclose */ 3472 NULL, /* qi_qadmin */ 3473 &bridge_modinfo, /* qi_minfo */ 3474 NULL /* qi_mstat */ 3475 }; 3476 3477 static struct streamtab bridge_tab = { 3478 &bridge_rinit, /* st_rdinit */ 3479 &bridge_winit /* st_wrinit */ 3480 }; 3481 3482 /* No STREAMS perimeters; we do all our own locking */ 3483 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach, 3484 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab, 3485 ddi_quiesce_not_supported); 3486 3487 static struct modldrv modldrv = { 3488 &mod_driverops, 3489 "bridging driver", 3490 &bridge_ops 3491 }; 3492 3493 static struct modlinkage modlinkage = { 3494 MODREV_1, 3495 (void *)&modldrv, 3496 NULL 3497 }; 3498 3499 int 3500 _init(void) 3501 { 3502 int retv; 3503 3504 bridge_inst_init(); 3505 if ((retv = mod_install(&modlinkage)) != 0) 3506 bridge_inst_fini(); 3507 return (retv); 3508 } 3509 3510 int 3511 _fini(void) 3512 { 3513 int retv; 3514 3515 rw_enter(&bmac_rwlock, RW_READER); 3516 retv = list_is_empty(&bmac_list) ? 0 : EBUSY; 3517 rw_exit(&bmac_rwlock); 3518 if (retv == 0 && 3519 (retv = mod_remove(&modlinkage)) == 0) 3520 bridge_inst_fini(); 3521 return (retv); 3522 } 3523 3524 int 3525 _info(struct modinfo *modinfop) 3526 { 3527 return (mod_info(&modlinkage, modinfop)); 3528 } 3529