1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This module implements a STREAMS driver that provides layer-two (Ethernet) 29 * bridging functionality. The STREAMS interface is used to provide 30 * observability (snoop/wireshark) and control, but not for interface plumbing. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/bitmap.h> 35 #include <sys/cmn_err.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/errno.h> 39 #include <sys/kstat.h> 40 #include <sys/modctl.h> 41 #include <sys/note.h> 42 #include <sys/param.h> 43 #include <sys/policy.h> 44 #include <sys/sdt.h> 45 #include <sys/stat.h> 46 #include <sys/stream.h> 47 #include <sys/stropts.h> 48 #include <sys/strsun.h> 49 #include <sys/sunddi.h> 50 #include <sys/sysmacros.h> 51 #include <sys/systm.h> 52 #include <sys/time.h> 53 #include <sys/dlpi.h> 54 #include <sys/dls.h> 55 #include <sys/mac_ether.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_client_priv.h> 58 #include <sys/mac_impl.h> 59 #include <sys/vlan.h> 60 #include <net/bridge.h> 61 #include <net/bridge_impl.h> 62 #include <net/trill.h> 63 #include <sys/dld_ioc.h> 64 65 /* 66 * Locks and reference counts: object lifetime and design. 67 * 68 * bridge_mac_t 69 * Bridge mac (snoop) instances are in bmac_list, which is protected by 70 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer(). 71 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes 72 * away, the bridge_mac_t remains until either all of the users go away 73 * (detected by a timer) or until the instance is picked up again by the same 74 * bridge starting back up. 75 * 76 * bridge_inst_t 77 * Bridge instances are in inst_list, which is protected by inst_lock. 78 * They're allocated by inst_alloc() and freed by inst_free(). After 79 * allocation, an instance is placed in inst_list, and the reference count is 80 * incremented to represent this. That reference is decremented when the 81 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last 82 * reference is freed, the instance is removed from the list. 83 * 84 * Bridge instances have lists of links and an AVL tree of forwarding 85 * entries. Each of these structures holds one reference on the bridge 86 * instance. These lists and tree are protected by bi_rwlock. 87 * 88 * bridge_stream_t 89 * Bridge streams are allocated by stream_alloc() and freed by stream_free(). 90 * These streams are created when "bridged" opens /dev/bridgectl, and are 91 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the 92 * links on the bridge. When a stream closes, the bridge instance created is 93 * destroyed. There's at most one bridge instance for a given control 94 * stream. 95 * 96 * bridge_link_t 97 * Links are allocated by bridge_add_link() and freed by link_free(). The 98 * bi_links list holds a reference to the link. When the BLF_DELETED flag is 99 * set, that reference is dropped. The link isn't removed from the list 100 * until the last reference drops. Each forwarding entry that uses a given 101 * link holds a reference, as does each thread transmitting a packet via the 102 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on 103 * a link when transmitting. 104 * 105 * It's important that once BLF_DELETED is set, there's no way for the 106 * reference count to increase again. If it can, then the link may be 107 * double-freed. The BLF_FREED flag is intended for use with assertions to 108 * guard against this in testing. 109 * 110 * bridge_fwd_t 111 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by 112 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike 113 * other data structures, the reference is dropped when the entry is removed 114 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each 115 * thread that's forwarding a packet to a known destination holds a reference 116 * to a forwarding entry. 117 * 118 * TRILL notes: 119 * 120 * The TRILL module does all of its I/O through bridging. It uses references 121 * on the bridge_inst_t and bridge_link_t structures, and has seven entry 122 * points and four callbacks. One entry point is for setting the callbacks 123 * (bridge_trill_register_cb). There are four entry points for taking bridge 124 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two 125 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps) 126 * that need to be bridged locally, and for TRILL-encapsulated output packets 127 * (bridge_trill_output). 128 * 129 * The four callbacks comprise two notification functions for bridges and 130 * links being deleted, one function for raw received TRILL packets, and one 131 * for bridge output to non-local TRILL destinations (tunnel entry). 132 */ 133 134 /* 135 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module. 136 */ 137 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES; 138 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES; 139 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS; 140 141 static const char *inst_kstats_list[] = { KSINST_NAMES }; 142 static const char *link_kstats_list[] = { KSLINK_NAMES }; 143 144 #define KREF(p, m, vn) p->m.vn.value.ui64 145 #define KINCR(p, m, vn) ++KREF(p, m, vn) 146 #define KDECR(p, m, vn) --KREF(p, m, vn) 147 148 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn) 149 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn) 150 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn) 151 152 #define KIINCR(vn) KIPINCR(bip, vn) 153 #define KIDECR(vn) KIPDECR(bip, vn) 154 #define KLINCR(vn) KLPINCR(blp, vn) 155 156 #define Dim(x) (sizeof (x) / sizeof (*(x))) 157 158 /* Amount of overhead added when encapsulating with VLAN headers */ 159 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \ 160 sizeof (struct ether_header)) 161 162 static dev_info_t *bridge_dev_info; 163 static major_t bridge_major; 164 static ddi_taskq_t *bridge_taskq; 165 166 /* 167 * These are the bridge instance management data structures. The mutex lock 168 * protects the list of bridge instances. A reference count is then used on 169 * each instance to determine when to free it. We use mac_minor_hold() to 170 * allocate minor_t values, which are used both for self-cloning /dev/net/ 171 * device nodes as well as client streams. Minor node 0 is reserved for the 172 * allocation control node. 173 */ 174 static list_t inst_list; 175 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */ 176 static kmutex_t inst_lock; 177 178 static krwlock_t bmac_rwlock; 179 static list_t bmac_list; 180 181 /* Wait for taskq entries that use STREAMS */ 182 static kcondvar_t stream_ref_cv; 183 static kmutex_t stream_ref_lock; 184 185 static timeout_id_t bridge_timerid; 186 static clock_t bridge_scan_interval; 187 static clock_t bridge_fwd_age; 188 189 static bridge_inst_t *bridge_find_name(const char *); 190 static void bridge_timer(void *); 191 static void bridge_unref(bridge_inst_t *); 192 193 static const uint8_t zero_addr[ETHERADDRL] = { 0 }; 194 195 /* Global TRILL linkage */ 196 static trill_recv_pkt_t trill_recv_fn; 197 static trill_encap_pkt_t trill_encap_fn; 198 static trill_br_dstr_t trill_brdstr_fn; 199 static trill_ln_dstr_t trill_lndstr_fn; 200 201 /* special settings to accommodate DLD flow control; see dld_str.c */ 202 static struct module_info bridge_dld_modinfo = { 203 0, /* mi_idnum */ 204 BRIDGE_DEV_NAME, /* mi_idname */ 205 0, /* mi_minpsz */ 206 INFPSZ, /* mi_maxpsz */ 207 1, /* mi_hiwat */ 208 0 /* mi_lowat */ 209 }; 210 211 static struct qinit bridge_dld_rinit = { 212 NULL, /* qi_putp */ 213 NULL, /* qi_srvp */ 214 dld_open, /* qi_qopen */ 215 dld_close, /* qi_qclose */ 216 NULL, /* qi_qadmin */ 217 &bridge_dld_modinfo, /* qi_minfo */ 218 NULL /* qi_mstat */ 219 }; 220 221 static struct qinit bridge_dld_winit = { 222 (int (*)())dld_wput, /* qi_putp */ 223 (int (*)())dld_wsrv, /* qi_srvp */ 224 NULL, /* qi_qopen */ 225 NULL, /* qi_qclose */ 226 NULL, /* qi_qadmin */ 227 &bridge_dld_modinfo, /* qi_minfo */ 228 NULL /* qi_mstat */ 229 }; 230 231 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *); 232 233 /* GLDv3 control ioctls used by Bridging */ 234 static dld_ioc_info_t bridge_ioc_list[] = { 235 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t), 236 bridge_ioc_listfwd, NULL}, 237 }; 238 239 /* 240 * Given a bridge mac pointer, get a ref-held pointer to the corresponding 241 * bridge instance, if any. We must hold the global bmac_rwlock so that 242 * bm_inst doesn't slide out from under us. 243 */ 244 static bridge_inst_t * 245 mac_to_inst(const bridge_mac_t *bmp) 246 { 247 bridge_inst_t *bip; 248 249 rw_enter(&bmac_rwlock, RW_READER); 250 if ((bip = bmp->bm_inst) != NULL) 251 atomic_inc_uint(&bip->bi_refs); 252 rw_exit(&bmac_rwlock); 253 return (bip); 254 } 255 256 static void 257 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist) 258 { 259 mblk_t *mp; 260 bridge_ctl_t *bcp; 261 bridge_link_t *blcmp; 262 bridge_inst_t *bip; 263 bridge_mac_t *bmp; 264 265 if (failed) { 266 if (blp->bl_flags & BLF_SDUFAIL) 267 return; 268 blp->bl_flags |= BLF_SDUFAIL; 269 } else { 270 if (!(blp->bl_flags & BLF_SDUFAIL)) 271 return; 272 blp->bl_flags &= ~BLF_SDUFAIL; 273 } 274 275 /* 276 * If this link is otherwise up, then check if there are any other 277 * non-failed non-down links. If not, then we control the state of the 278 * whole bridge. 279 */ 280 bip = blp->bl_inst; 281 bmp = bip->bi_mac; 282 if (blp->bl_linkstate != LINK_STATE_DOWN) { 283 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 284 blcmp = list_next(&bip->bi_links, blcmp)) { 285 if (blp != blcmp && 286 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 287 blcmp->bl_linkstate != LINK_STATE_DOWN) 288 break; 289 } 290 if (blcmp == NULL) { 291 bmp->bm_linkstate = failed ? LINK_STATE_DOWN : 292 LINK_STATE_UP; 293 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 294 } 295 } 296 297 /* 298 * If we're becoming failed, then the link's current true state needs 299 * to be reflected upwards to this link's clients. If we're becoming 300 * unfailed, then we get the state of the bridge instead on all 301 * clients. 302 */ 303 if (failed) { 304 if (bmp->bm_linkstate != blp->bl_linkstate) 305 mac_link_redo(blp->bl_mh, blp->bl_linkstate); 306 } else { 307 mac_link_redo(blp->bl_mh, bmp->bm_linkstate); 308 } 309 310 /* get the current mblk we're going to send up */ 311 if ((mp = blp->bl_lfailmp) == NULL && 312 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL) 313 return; 314 315 /* get a new one for next time */ 316 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 317 318 /* if none for next time, then report only failures */ 319 if (blp->bl_lfailmp == NULL && !failed) { 320 blp->bl_lfailmp = mp; 321 return; 322 } 323 324 /* LINTED: alignment */ 325 bcp = (bridge_ctl_t *)mp->b_rptr; 326 bcp->bc_linkid = blp->bl_linkid; 327 bcp->bc_failed = failed; 328 mp->b_wptr = (uchar_t *)(bcp + 1); 329 mp->b_next = *mlist; 330 *mlist = mp; 331 } 332 333 /* 334 * Send control messages (link SDU changes) using the stream to the 335 * bridge instance daemon. 336 */ 337 static void 338 send_up_messages(bridge_inst_t *bip, mblk_t *mp) 339 { 340 mblk_t *mnext; 341 queue_t *rq; 342 343 rq = bip->bi_control->bs_wq; 344 rq = OTHERQ(rq); 345 while (mp != NULL) { 346 mnext = mp->b_next; 347 mp->b_next = NULL; 348 putnext(rq, mp); 349 mp = mnext; 350 } 351 } 352 353 /* ARGSUSED */ 354 static int 355 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val) 356 { 357 return (ENOTSUP); 358 } 359 360 static int 361 bridge_m_start(void *arg) 362 { 363 bridge_mac_t *bmp = arg; 364 365 bmp->bm_flags |= BMF_STARTED; 366 return (0); 367 } 368 369 static void 370 bridge_m_stop(void *arg) 371 { 372 bridge_mac_t *bmp = arg; 373 374 bmp->bm_flags &= ~BMF_STARTED; 375 } 376 377 /* ARGSUSED */ 378 static int 379 bridge_m_setpromisc(void *arg, boolean_t on) 380 { 381 return (0); 382 } 383 384 /* ARGSUSED */ 385 static int 386 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 387 { 388 return (0); 389 } 390 391 /* ARGSUSED */ 392 static int 393 bridge_m_unicst(void *arg, const uint8_t *macaddr) 394 { 395 return (ENOTSUP); 396 } 397 398 static mblk_t * 399 bridge_m_tx(void *arg, mblk_t *mp) 400 { 401 _NOTE(ARGUNUSED(arg)); 402 freemsgchain(mp); 403 return (NULL); 404 } 405 406 /* ARGSUSED */ 407 static int 408 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) 409 { 410 bridge_listfwd_t *blf = karg; 411 bridge_inst_t *bip; 412 bridge_fwd_t *bfp, match; 413 avl_index_t where; 414 415 bip = bridge_find_name(blf->blf_name); 416 if (bip == NULL) 417 return (ENOENT); 418 419 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL); 420 match.bf_flags |= BFF_VLANLOCAL; 421 rw_enter(&bip->bi_rwlock, RW_READER); 422 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL) 423 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER); 424 else 425 bfp = AVL_NEXT(&bip->bi_fwd, bfp); 426 if (bfp == NULL) { 427 bzero(blf, sizeof (*blf)); 428 } else { 429 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL); 430 blf->blf_trill_nick = bfp->bf_trill_nick; 431 blf->blf_ms_age = 432 drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000; 433 blf->blf_is_local = 434 (bfp->bf_flags & BFF_LOCALADDR) != 0; 435 blf->blf_linkid = bfp->bf_links[0]->bl_linkid; 436 } 437 rw_exit(&bip->bi_rwlock); 438 bridge_unref(bip); 439 return (0); 440 } 441 442 static int 443 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 444 uint_t pr_valsize, const void *pr_val) 445 { 446 bridge_mac_t *bmp = arg; 447 bridge_inst_t *bip; 448 bridge_link_t *blp; 449 int err; 450 uint_t maxsdu; 451 mblk_t *mlist; 452 453 _NOTE(ARGUNUSED(pr_name)); 454 switch (pr_num) { 455 case MAC_PROP_MTU: 456 if (pr_valsize < sizeof (bmp->bm_maxsdu)) { 457 err = EINVAL; 458 break; 459 } 460 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu)); 461 if (maxsdu == bmp->bm_maxsdu) { 462 err = 0; 463 } else if ((bip = mac_to_inst(bmp)) == NULL) { 464 err = ENXIO; 465 } else { 466 rw_enter(&bip->bi_rwlock, RW_WRITER); 467 mlist = NULL; 468 for (blp = list_head(&bip->bi_links); blp != NULL; 469 blp = list_next(&bip->bi_links, blp)) { 470 if (blp->bl_flags & BLF_DELETED) 471 continue; 472 if (blp->bl_maxsdu == maxsdu) 473 link_sdu_fail(blp, B_FALSE, &mlist); 474 else if (blp->bl_maxsdu == bmp->bm_maxsdu) 475 link_sdu_fail(blp, B_TRUE, &mlist); 476 } 477 rw_exit(&bip->bi_rwlock); 478 bmp->bm_maxsdu = maxsdu; 479 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 480 send_up_messages(bip, mlist); 481 bridge_unref(bip); 482 err = 0; 483 } 484 break; 485 486 default: 487 err = ENOTSUP; 488 break; 489 } 490 return (err); 491 } 492 493 static int 494 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 495 uint_t pr_valsize, void *pr_val) 496 { 497 bridge_mac_t *bmp = arg; 498 int err = 0; 499 500 _NOTE(ARGUNUSED(pr_name)); 501 switch (pr_num) { 502 case MAC_PROP_STATUS: 503 ASSERT(pr_valsize >= sizeof (bmp->bm_linkstate)); 504 bcopy(&bmp->bm_linkstate, pr_val, sizeof (&bmp->bm_linkstate)); 505 break; 506 507 default: 508 err = ENOTSUP; 509 break; 510 } 511 return (err); 512 } 513 514 static void 515 bridge_m_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num, 516 mac_prop_info_handle_t prh) 517 { 518 bridge_mac_t *bmp = arg; 519 520 _NOTE(ARGUNUSED(pr_name)); 521 522 switch (pr_num) { 523 case MAC_PROP_MTU: 524 mac_prop_info_set_range_uint32(prh, bmp->bm_maxsdu, 525 bmp->bm_maxsdu); 526 break; 527 case MAC_PROP_STATUS: 528 mac_prop_info_set_perm(prh, MAC_PROP_PERM_READ); 529 break; 530 } 531 } 532 533 static mac_callbacks_t bridge_m_callbacks = { 534 MC_SETPROP | MC_GETPROP | MC_PROPINFO, 535 bridge_m_getstat, 536 bridge_m_start, 537 bridge_m_stop, 538 bridge_m_setpromisc, 539 bridge_m_multicst, 540 bridge_m_unicst, 541 bridge_m_tx, 542 NULL, /* reserved */ 543 NULL, /* ioctl */ 544 NULL, /* getcapab */ 545 NULL, /* open */ 546 NULL, /* close */ 547 bridge_m_setprop, 548 bridge_m_getprop, 549 bridge_m_propinfo 550 }; 551 552 /* 553 * Create kstats from a list. 554 */ 555 static kstat_t * 556 kstat_setup(kstat_named_t *knt, const char **names, int nstat, 557 const char *unitname) 558 { 559 kstat_t *ksp; 560 int i; 561 562 for (i = 0; i < nstat; i++) 563 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64); 564 565 ksp = kstat_create_zone(BRIDGE_DEV_NAME, 0, unitname, "net", 566 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 567 if (ksp != NULL) { 568 ksp->ks_data = knt; 569 kstat_install(ksp); 570 } 571 return (ksp); 572 } 573 574 /* 575 * Find an existing bridge_mac_t structure or allocate a new one for the given 576 * bridge instance. This creates the mac driver instance that snoop can use. 577 */ 578 static int 579 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp) 580 { 581 bridge_mac_t *bmp, *bnew; 582 mac_register_t *mac; 583 int err; 584 585 *bmacp = NULL; 586 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 587 return (EINVAL); 588 589 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP); 590 591 rw_enter(&bmac_rwlock, RW_WRITER); 592 for (bmp = list_head(&bmac_list); bmp != NULL; 593 bmp = list_next(&bmac_list, bmp)) { 594 if (strcmp(bip->bi_name, bmp->bm_name) == 0) { 595 ASSERT(bmp->bm_inst == NULL); 596 bmp->bm_inst = bip; 597 rw_exit(&bmac_rwlock); 598 kmem_free(bnew, sizeof (*bnew)); 599 mac_free(mac); 600 *bmacp = bmp; 601 return (0); 602 } 603 } 604 605 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 606 mac->m_driver = bnew; 607 mac->m_dip = bridge_dev_info; 608 mac->m_instance = (uint_t)-1; 609 mac->m_src_addr = (uint8_t *)zero_addr; 610 mac->m_callbacks = &bridge_m_callbacks; 611 612 /* 613 * Note that the SDU limits are irrelevant, as nobody transmits on the 614 * bridge node itself. It's mainly for monitoring but we allow 615 * setting the bridge MTU for quick transition of all links part of the 616 * bridge to a new MTU. 617 */ 618 mac->m_min_sdu = 1; 619 mac->m_max_sdu = 1500; 620 err = mac_register(mac, &bnew->bm_mh); 621 mac_free(mac); 622 if (err != 0) { 623 rw_exit(&bmac_rwlock); 624 kmem_free(bnew, sizeof (*bnew)); 625 return (err); 626 } 627 628 bnew->bm_inst = bip; 629 (void) strcpy(bnew->bm_name, bip->bi_name); 630 if (list_is_empty(&bmac_list)) { 631 bridge_timerid = timeout(bridge_timer, NULL, 632 bridge_scan_interval); 633 } 634 list_insert_tail(&bmac_list, bnew); 635 rw_exit(&bmac_rwlock); 636 637 /* 638 * Mark the MAC as unable to go "active" so that only passive clients 639 * (such as snoop) can bind to it. 640 */ 641 mac_no_active(bnew->bm_mh); 642 *bmacp = bnew; 643 return (0); 644 } 645 646 /* 647 * Disconnect the given bridge_mac_t from its bridge instance. The bridge 648 * instance is going away. The mac instance can't go away until the clients 649 * are gone (see bridge_timer). 650 */ 651 static void 652 bmac_disconnect(bridge_mac_t *bmp) 653 { 654 bridge_inst_t *bip; 655 656 bmp->bm_linkstate = LINK_STATE_DOWN; 657 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 658 659 rw_enter(&bmac_rwlock, RW_READER); 660 bip = bmp->bm_inst; 661 bip->bi_mac = NULL; 662 bmp->bm_inst = NULL; 663 rw_exit(&bmac_rwlock); 664 } 665 666 /* This is used by the avl trees to sort forwarding table entries */ 667 static int 668 fwd_compare(const void *addr1, const void *addr2) 669 { 670 const bridge_fwd_t *fwd1 = addr1; 671 const bridge_fwd_t *fwd2 = addr2; 672 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL); 673 674 if (diff != 0) 675 return (diff > 0 ? 1 : -1); 676 677 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) { 678 if (fwd1->bf_vlanid > fwd2->bf_vlanid) 679 return (1); 680 else if (fwd1->bf_vlanid < fwd2->bf_vlanid) 681 return (-1); 682 } 683 return (0); 684 } 685 686 static void 687 inst_free(bridge_inst_t *bip) 688 { 689 ASSERT(bip->bi_mac == NULL); 690 rw_destroy(&bip->bi_rwlock); 691 list_destroy(&bip->bi_links); 692 cv_destroy(&bip->bi_linkwait); 693 avl_destroy(&bip->bi_fwd); 694 if (bip->bi_ksp != NULL) 695 kstat_delete(bip->bi_ksp); 696 kmem_free(bip, sizeof (*bip)); 697 } 698 699 static bridge_inst_t * 700 inst_alloc(const char *bridge) 701 { 702 bridge_inst_t *bip; 703 704 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP); 705 bip->bi_refs = 1; 706 (void) strcpy(bip->bi_name, bridge); 707 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL); 708 list_create(&bip->bi_links, sizeof (bridge_link_t), 709 offsetof(bridge_link_t, bl_node)); 710 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL); 711 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t), 712 offsetof(bridge_fwd_t, bf_node)); 713 return (bip); 714 } 715 716 static bridge_inst_t * 717 bridge_find_name(const char *bridge) 718 { 719 bridge_inst_t *bip; 720 721 mutex_enter(&inst_lock); 722 for (bip = list_head(&inst_list); bip != NULL; 723 bip = list_next(&inst_list, bip)) { 724 if (!(bip->bi_flags & BIF_SHUTDOWN) && 725 strcmp(bridge, bip->bi_name) == 0) { 726 atomic_inc_uint(&bip->bi_refs); 727 break; 728 } 729 } 730 mutex_exit(&inst_lock); 731 732 return (bip); 733 } 734 735 static int 736 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc, 737 cred_t *cred) 738 { 739 bridge_inst_t *bip, *bipnew; 740 bridge_mac_t *bmp = NULL; 741 int err; 742 743 *bipc = NULL; 744 bipnew = inst_alloc(bridge); 745 746 mutex_enter(&inst_lock); 747 lookup_retry: 748 for (bip = list_head(&inst_list); bip != NULL; 749 bip = list_next(&inst_list, bip)) { 750 if (strcmp(bridge, bip->bi_name) == 0) 751 break; 752 } 753 754 /* This should not take long; if it does, we've got a design problem */ 755 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) { 756 cv_wait(&inst_cv, &inst_lock); 757 goto lookup_retry; 758 } 759 760 if (bip == NULL) { 761 bip = bipnew; 762 bipnew = NULL; 763 list_insert_tail(&inst_list, bip); 764 } 765 766 mutex_exit(&inst_lock); 767 if (bipnew != NULL) { 768 inst_free(bipnew); 769 return (EEXIST); 770 } 771 772 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats, 773 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name); 774 775 err = bmac_alloc(bip, &bmp); 776 if ((bip->bi_mac = bmp) == NULL) 777 goto fail_create; 778 779 /* 780 * bm_inst is set, so the timer cannot yank the DLS rug from under us. 781 * No extra locking is needed here. 782 */ 783 if (!(bmp->bm_flags & BMF_DLS)) { 784 err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred)); 785 if (err != 0) 786 goto fail_create; 787 bmp->bm_flags |= BMF_DLS; 788 } 789 790 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh)); 791 *bipc = bip; 792 return (0); 793 794 fail_create: 795 ASSERT(bip->bi_trilldata == NULL); 796 bip->bi_flags |= BIF_SHUTDOWN; 797 bridge_unref(bip); 798 return (err); 799 } 800 801 static void 802 bridge_unref(bridge_inst_t *bip) 803 { 804 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) { 805 ASSERT(bip->bi_flags & BIF_SHUTDOWN); 806 /* free up mac for reuse before leaving global list */ 807 if (bip->bi_mac != NULL) 808 bmac_disconnect(bip->bi_mac); 809 mutex_enter(&inst_lock); 810 list_remove(&inst_list, bip); 811 cv_broadcast(&inst_cv); 812 mutex_exit(&inst_lock); 813 inst_free(bip); 814 } 815 } 816 817 /* 818 * Stream instances are used only for allocating bridges and serving as a 819 * control node. They serve no data-handling function. 820 */ 821 static bridge_stream_t * 822 stream_alloc(void) 823 { 824 bridge_stream_t *bsp; 825 minor_t mn; 826 827 if ((mn = mac_minor_hold(B_FALSE)) == 0) 828 return (NULL); 829 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP); 830 bsp->bs_minor = mn; 831 return (bsp); 832 } 833 834 static void 835 stream_free(bridge_stream_t *bsp) 836 { 837 mac_minor_rele(bsp->bs_minor); 838 kmem_free(bsp, sizeof (*bsp)); 839 } 840 841 /* Reference hold/release functions for STREAMS-related taskq */ 842 static void 843 stream_ref(bridge_stream_t *bsp) 844 { 845 mutex_enter(&stream_ref_lock); 846 bsp->bs_taskq_cnt++; 847 mutex_exit(&stream_ref_lock); 848 } 849 850 static void 851 stream_unref(bridge_stream_t *bsp) 852 { 853 mutex_enter(&stream_ref_lock); 854 if (--bsp->bs_taskq_cnt == 0) 855 cv_broadcast(&stream_ref_cv); 856 mutex_exit(&stream_ref_lock); 857 } 858 859 static void 860 link_free(bridge_link_t *blp) 861 { 862 bridge_inst_t *bip = blp->bl_inst; 863 864 ASSERT(!(blp->bl_flags & BLF_FREED)); 865 blp->bl_flags |= BLF_FREED; 866 if (blp->bl_ksp != NULL) 867 kstat_delete(blp->bl_ksp); 868 if (blp->bl_lfailmp != NULL) 869 freeb(blp->bl_lfailmp); 870 cv_destroy(&blp->bl_trillwait); 871 mutex_destroy(&blp->bl_trilllock); 872 kmem_free(blp, sizeof (*blp)); 873 /* Don't unreference the bridge until the MAC is closed */ 874 bridge_unref(bip); 875 } 876 877 static void 878 link_unref(bridge_link_t *blp) 879 { 880 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) { 881 bridge_inst_t *bip = blp->bl_inst; 882 883 ASSERT(blp->bl_flags & BLF_DELETED); 884 rw_enter(&bip->bi_rwlock, RW_WRITER); 885 if (blp->bl_flags & BLF_LINK_ADDED) 886 list_remove(&bip->bi_links, blp); 887 rw_exit(&bip->bi_rwlock); 888 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links)) 889 cv_broadcast(&bip->bi_linkwait); 890 link_free(blp); 891 } 892 } 893 894 static bridge_fwd_t * 895 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick) 896 { 897 bridge_fwd_t *bfp; 898 899 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)), 900 KM_NOSLEEP); 901 if (bfp != NULL) { 902 bcopy(addr, bfp->bf_dest, ETHERADDRL); 903 bfp->bf_lastheard = ddi_get_lbolt(); 904 bfp->bf_maxlinks = nlinks; 905 bfp->bf_links = (bridge_link_t **)(bfp + 1); 906 bfp->bf_trill_nick = nick; 907 } 908 return (bfp); 909 } 910 911 static bridge_fwd_t * 912 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid) 913 { 914 bridge_fwd_t *bfp, *vbfp; 915 bridge_fwd_t match; 916 917 bcopy(addr, match.bf_dest, ETHERADDRL); 918 match.bf_flags = 0; 919 rw_enter(&bip->bi_rwlock, RW_READER); 920 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 921 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) { 922 match.bf_vlanid = vlanid; 923 match.bf_flags = BFF_VLANLOCAL; 924 vbfp = avl_find(&bip->bi_fwd, &match, NULL); 925 if (vbfp != NULL) 926 bfp = vbfp; 927 } 928 atomic_inc_uint(&bfp->bf_refs); 929 } 930 rw_exit(&bip->bi_rwlock); 931 return (bfp); 932 } 933 934 static void 935 fwd_free(bridge_fwd_t *bfp) 936 { 937 uint_t i; 938 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst; 939 940 KIDECR(bki_count); 941 for (i = 0; i < bfp->bf_nlinks; i++) 942 link_unref(bfp->bf_links[i]); 943 kmem_free(bfp, 944 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *)); 945 } 946 947 static void 948 fwd_unref(bridge_fwd_t *bfp) 949 { 950 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) { 951 ASSERT(!(bfp->bf_flags & BFF_INTREE)); 952 fwd_free(bfp); 953 } 954 } 955 956 static void 957 fwd_delete(bridge_fwd_t *bfp) 958 { 959 bridge_inst_t *bip; 960 bridge_fwd_t *bfpzero; 961 962 if (bfp->bf_flags & BFF_INTREE) { 963 ASSERT(bfp->bf_nlinks > 0); 964 bip = bfp->bf_links[0]->bl_inst; 965 rw_enter(&bip->bi_rwlock, RW_WRITER); 966 /* Another thread could beat us to this */ 967 if (bfp->bf_flags & BFF_INTREE) { 968 avl_remove(&bip->bi_fwd, bfp); 969 bfp->bf_flags &= ~BFF_INTREE; 970 if (bfp->bf_flags & BFF_VLANLOCAL) { 971 bfp->bf_flags &= ~BFF_VLANLOCAL; 972 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL); 973 if (bfpzero != NULL && bfpzero->bf_vcnt > 0) 974 bfpzero->bf_vcnt--; 975 } 976 rw_exit(&bip->bi_rwlock); 977 fwd_unref(bfp); /* no longer in avl tree */ 978 } else { 979 rw_exit(&bip->bi_rwlock); 980 } 981 } 982 } 983 984 static boolean_t 985 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp) 986 { 987 avl_index_t idx; 988 boolean_t retv; 989 990 rw_enter(&bip->bi_rwlock, RW_WRITER); 991 if (!(bip->bi_flags & BIF_SHUTDOWN) && 992 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax && 993 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) { 994 avl_insert(&bip->bi_fwd, bfp, idx); 995 bfp->bf_flags |= BFF_INTREE; 996 atomic_inc_uint(&bfp->bf_refs); /* avl entry */ 997 retv = B_TRUE; 998 } else { 999 retv = B_FALSE; 1000 } 1001 rw_exit(&bip->bi_rwlock); 1002 return (retv); 1003 } 1004 1005 static void 1006 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr, 1007 const uint8_t *newaddr) 1008 { 1009 bridge_inst_t *bip = blp->bl_inst; 1010 bridge_fwd_t *bfp, *bfnew; 1011 bridge_fwd_t match; 1012 avl_index_t idx; 1013 boolean_t drop_ref = B_FALSE; 1014 1015 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0) 1016 return; 1017 1018 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0) 1019 goto no_old_addr; 1020 1021 /* 1022 * Find the previous entry, and remove our link from it. 1023 */ 1024 bcopy(oldaddr, match.bf_dest, ETHERADDRL); 1025 rw_enter(&bip->bi_rwlock, RW_WRITER); 1026 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 1027 int i; 1028 1029 /* 1030 * See if we're in the list, and remove if so. 1031 */ 1032 for (i = 0; i < bfp->bf_nlinks; i++) { 1033 if (bfp->bf_links[i] == blp) { 1034 /* 1035 * We assume writes are atomic, so no special 1036 * MT handling is needed. The list length is 1037 * decremented first, and then we remove 1038 * entries. 1039 */ 1040 bfp->bf_nlinks--; 1041 for (; i < bfp->bf_nlinks; i++) 1042 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1043 drop_ref = B_TRUE; 1044 break; 1045 } 1046 } 1047 /* If no more links, then remove and free up */ 1048 if (bfp->bf_nlinks == 0) { 1049 avl_remove(&bip->bi_fwd, bfp); 1050 bfp->bf_flags &= ~BFF_INTREE; 1051 } else { 1052 bfp = NULL; 1053 } 1054 } 1055 rw_exit(&bip->bi_rwlock); 1056 if (bfp != NULL) 1057 fwd_unref(bfp); /* no longer in avl tree */ 1058 1059 /* 1060 * Now get the new link address and add this link to the list. The 1061 * list should be of length 1 unless the user has configured multiple 1062 * NICs with the same address. (That's an incorrect configuration, but 1063 * we support it anyway.) 1064 */ 1065 no_old_addr: 1066 bfp = NULL; 1067 if ((bip->bi_flags & BIF_SHUTDOWN) || 1068 bcmp(newaddr, zero_addr, ETHERADDRL) == 0) 1069 goto no_new_addr; 1070 1071 bcopy(newaddr, match.bf_dest, ETHERADDRL); 1072 rw_enter(&bip->bi_rwlock, RW_WRITER); 1073 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) { 1074 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE); 1075 if (bfnew != NULL) 1076 KIINCR(bki_count); 1077 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) { 1078 /* special case: link fits in existing entry */ 1079 bfnew = bfp; 1080 } else { 1081 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1, 1082 RBRIDGE_NICKNAME_NONE); 1083 if (bfnew != NULL) { 1084 KIINCR(bki_count); 1085 avl_remove(&bip->bi_fwd, bfp); 1086 bfp->bf_flags &= ~BFF_INTREE; 1087 bfnew->bf_nlinks = bfp->bf_nlinks; 1088 bcopy(bfp->bf_links, bfnew->bf_links, 1089 bfp->bf_nlinks * sizeof (bfp)); 1090 /* reset the idx value due to removal above */ 1091 (void) avl_find(&bip->bi_fwd, &match, &idx); 1092 } 1093 } 1094 1095 if (bfnew != NULL) { 1096 bfnew->bf_links[bfnew->bf_nlinks++] = blp; 1097 if (drop_ref) 1098 drop_ref = B_FALSE; 1099 else 1100 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1101 1102 if (bfnew != bfp) { 1103 /* local addresses are not subject to table limits */ 1104 avl_insert(&bip->bi_fwd, bfnew, idx); 1105 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR); 1106 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */ 1107 } 1108 } 1109 rw_exit(&bip->bi_rwlock); 1110 1111 no_new_addr: 1112 /* 1113 * If we found an existing entry and we replaced it with a new one, 1114 * then drop the table reference from the old one. We removed it from 1115 * the AVL tree above. 1116 */ 1117 if (bfnew != NULL && bfp != NULL && bfnew != bfp) 1118 fwd_unref(bfp); 1119 1120 /* Account for removed entry. */ 1121 if (drop_ref) 1122 link_unref(blp); 1123 } 1124 1125 static void 1126 bridge_new_unicst(bridge_link_t *blp) 1127 { 1128 uint8_t new_mac[ETHERADDRL]; 1129 1130 mac_unicast_primary_get(blp->bl_mh, new_mac); 1131 fwd_update_local(blp, blp->bl_local_mac, new_mac); 1132 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL); 1133 } 1134 1135 /* 1136 * We must shut down a link prior to freeing it, and doing that requires 1137 * blocking to wait for running MAC threads while holding a reference. This is 1138 * run from a taskq to accomplish proper link shutdown followed by reference 1139 * drop. 1140 */ 1141 static void 1142 link_shutdown(void *arg) 1143 { 1144 bridge_link_t *blp = arg; 1145 mac_handle_t mh = blp->bl_mh; 1146 bridge_inst_t *bip; 1147 bridge_fwd_t *bfp, *bfnext; 1148 avl_tree_t fwd_scavenge; 1149 int i; 1150 1151 /* 1152 * This link is being destroyed. Notify TRILL now that it's no longer 1153 * possible to send packets. Data packets may still arrive until TRILL 1154 * calls bridge_trill_lnunref. 1155 */ 1156 if (blp->bl_trilldata != NULL) 1157 trill_lndstr_fn(blp->bl_trilldata, blp); 1158 1159 if (blp->bl_flags & BLF_PROM_ADDED) 1160 (void) mac_promisc_remove(blp->bl_mphp); 1161 1162 if (blp->bl_flags & BLF_SET_BRIDGE) 1163 mac_bridge_clear(mh, (mac_handle_t)blp); 1164 1165 if (blp->bl_flags & BLF_MARGIN_ADDED) { 1166 (void) mac_notify_remove(blp->bl_mnh, B_TRUE); 1167 (void) mac_margin_remove(mh, blp->bl_margin); 1168 } 1169 1170 /* Tell the clients the real link state when we leave */ 1171 mac_link_redo(blp->bl_mh, 1172 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE)); 1173 1174 /* Destroy all of the forwarding entries related to this link */ 1175 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1176 offsetof(bridge_fwd_t, bf_node)); 1177 bip = blp->bl_inst; 1178 rw_enter(&bip->bi_rwlock, RW_WRITER); 1179 bfnext = avl_first(&bip->bi_fwd); 1180 while ((bfp = bfnext) != NULL) { 1181 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1182 for (i = 0; i < bfp->bf_nlinks; i++) { 1183 if (bfp->bf_links[i] == blp) 1184 break; 1185 } 1186 if (i >= bfp->bf_nlinks) 1187 continue; 1188 if (bfp->bf_nlinks > 1) { 1189 /* note that this can't be the last reference */ 1190 link_unref(blp); 1191 bfp->bf_nlinks--; 1192 for (; i < bfp->bf_nlinks; i++) 1193 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1194 } else { 1195 ASSERT(bfp->bf_flags & BFF_INTREE); 1196 avl_remove(&bip->bi_fwd, bfp); 1197 bfp->bf_flags &= ~BFF_INTREE; 1198 avl_add(&fwd_scavenge, bfp); 1199 } 1200 } 1201 rw_exit(&bip->bi_rwlock); 1202 bfnext = avl_first(&fwd_scavenge); 1203 while ((bfp = bfnext) != NULL) { 1204 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1205 avl_remove(&fwd_scavenge, bfp); 1206 fwd_unref(bfp); 1207 } 1208 avl_destroy(&fwd_scavenge); 1209 1210 if (blp->bl_flags & BLF_CLIENT_OPEN) 1211 mac_client_close(blp->bl_mch, 0); 1212 1213 mac_close(mh); 1214 1215 /* 1216 * We are now completely removed from the active list, so drop the 1217 * reference (see bridge_add_link). 1218 */ 1219 link_unref(blp); 1220 } 1221 1222 static void 1223 shutdown_inst(bridge_inst_t *bip) 1224 { 1225 bridge_link_t *blp, *blnext; 1226 bridge_fwd_t *bfp; 1227 1228 mutex_enter(&inst_lock); 1229 if (bip->bi_flags & BIF_SHUTDOWN) { 1230 mutex_exit(&inst_lock); 1231 return; 1232 } 1233 1234 /* 1235 * Once on the inst_list, the bridge instance must not leave that list 1236 * without having the shutdown flag set first. When the shutdown flag 1237 * is set, we own the list reference, so we must drop it before 1238 * returning. 1239 */ 1240 bip->bi_flags |= BIF_SHUTDOWN; 1241 mutex_exit(&inst_lock); 1242 1243 bip->bi_control = NULL; 1244 1245 rw_enter(&bip->bi_rwlock, RW_READER); 1246 blnext = list_head(&bip->bi_links); 1247 while ((blp = blnext) != NULL) { 1248 blnext = list_next(&bip->bi_links, blp); 1249 if (!(blp->bl_flags & BLF_DELETED)) { 1250 blp->bl_flags |= BLF_DELETED; 1251 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 1252 blp, DDI_SLEEP); 1253 } 1254 } 1255 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) { 1256 atomic_inc_uint(&bfp->bf_refs); 1257 rw_exit(&bip->bi_rwlock); 1258 fwd_delete(bfp); 1259 fwd_unref(bfp); 1260 rw_enter(&bip->bi_rwlock, RW_READER); 1261 } 1262 rw_exit(&bip->bi_rwlock); 1263 1264 /* 1265 * This bridge is being destroyed. Notify TRILL once all of the 1266 * links are all gone. 1267 */ 1268 mutex_enter(&inst_lock); 1269 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links)) 1270 cv_wait(&bip->bi_linkwait, &inst_lock); 1271 mutex_exit(&inst_lock); 1272 if (bip->bi_trilldata != NULL) 1273 trill_brdstr_fn(bip->bi_trilldata, bip); 1274 1275 bridge_unref(bip); 1276 } 1277 1278 /* 1279 * This is called once by the TRILL module when it starts up. It just sets the 1280 * global TRILL callback function pointers -- data transmit/receive and bridge 1281 * and link destroy notification. There's only one TRILL module, so only one 1282 * registration is needed. 1283 * 1284 * TRILL should call this function with NULL pointers before unloading. It 1285 * must not do so before dropping all references to bridges and links. We 1286 * assert that this is true on debug builds. 1287 */ 1288 void 1289 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn, 1290 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn) 1291 { 1292 #ifdef DEBUG 1293 if (recv_fn == NULL && trill_recv_fn != NULL) { 1294 bridge_inst_t *bip; 1295 bridge_link_t *blp; 1296 1297 mutex_enter(&inst_lock); 1298 for (bip = list_head(&inst_list); bip != NULL; 1299 bip = list_next(&inst_list, bip)) { 1300 ASSERT(bip->bi_trilldata == NULL); 1301 rw_enter(&bip->bi_rwlock, RW_READER); 1302 for (blp = list_head(&bip->bi_links); blp != NULL; 1303 blp = list_next(&bip->bi_links, blp)) { 1304 ASSERT(blp->bl_trilldata == NULL); 1305 } 1306 rw_exit(&bip->bi_rwlock); 1307 } 1308 mutex_exit(&inst_lock); 1309 } 1310 #endif 1311 trill_recv_fn = recv_fn; 1312 trill_encap_fn = encap_fn; 1313 trill_brdstr_fn = brdstr_fn; 1314 trill_lndstr_fn = lndstr_fn; 1315 } 1316 1317 /* 1318 * This registers the TRILL instance pointer with a bridge. Before this 1319 * pointer is set, the forwarding, TRILL receive, and bridge destructor 1320 * functions won't be called. 1321 * 1322 * TRILL holds a reference on a bridge with this call. It must free the 1323 * reference by calling the unregister function below. 1324 */ 1325 bridge_inst_t * 1326 bridge_trill_brref(const char *bname, void *ptr) 1327 { 1328 char bridge[MAXLINKNAMELEN]; 1329 bridge_inst_t *bip; 1330 1331 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname); 1332 bip = bridge_find_name(bridge); 1333 if (bip != NULL) { 1334 ASSERT(bip->bi_trilldata == NULL && ptr != NULL); 1335 bip->bi_trilldata = ptr; 1336 } 1337 return (bip); 1338 } 1339 1340 void 1341 bridge_trill_brunref(bridge_inst_t *bip) 1342 { 1343 ASSERT(bip->bi_trilldata != NULL); 1344 bip->bi_trilldata = NULL; 1345 bridge_unref(bip); 1346 } 1347 1348 /* 1349 * TRILL calls this function when referencing a particular link on a bridge. 1350 * 1351 * It holds a reference on the link, so TRILL must clear out the reference when 1352 * it's done with the link (on unbinding). 1353 */ 1354 bridge_link_t * 1355 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr) 1356 { 1357 bridge_link_t *blp; 1358 1359 ASSERT(ptr != NULL); 1360 rw_enter(&bip->bi_rwlock, RW_READER); 1361 for (blp = list_head(&bip->bi_links); blp != NULL; 1362 blp = list_next(&bip->bi_links, blp)) { 1363 if (!(blp->bl_flags & BLF_DELETED) && 1364 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) { 1365 blp->bl_trilldata = ptr; 1366 blp->bl_flags &= ~BLF_TRILLACTIVE; 1367 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs)); 1368 atomic_inc_uint(&blp->bl_refs); 1369 break; 1370 } 1371 } 1372 rw_exit(&bip->bi_rwlock); 1373 return (blp); 1374 } 1375 1376 void 1377 bridge_trill_lnunref(bridge_link_t *blp) 1378 { 1379 mutex_enter(&blp->bl_trilllock); 1380 ASSERT(blp->bl_trilldata != NULL); 1381 blp->bl_trilldata = NULL; 1382 blp->bl_flags &= ~BLF_TRILLACTIVE; 1383 while (blp->bl_trillthreads > 0) 1384 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock); 1385 mutex_exit(&blp->bl_trilllock); 1386 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 1387 link_unref(blp); 1388 } 1389 1390 /* 1391 * This periodic timer performs three functions: 1392 * 1. It scans the list of learned forwarding entries, and removes ones that 1393 * haven't been heard from in a while. The time limit is backed down if 1394 * we're above the configured table limit. 1395 * 2. It walks the links and decays away the bl_learns counter. 1396 * 3. It scans the observability node entries looking for ones that can be 1397 * freed up. 1398 */ 1399 /* ARGSUSED */ 1400 static void 1401 bridge_timer(void *arg) 1402 { 1403 bridge_inst_t *bip; 1404 bridge_fwd_t *bfp, *bfnext; 1405 bridge_mac_t *bmp, *bmnext; 1406 bridge_link_t *blp; 1407 int err; 1408 datalink_id_t tmpid; 1409 avl_tree_t fwd_scavenge; 1410 clock_t age_limit; 1411 uint32_t ldecay; 1412 1413 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1414 offsetof(bridge_fwd_t, bf_node)); 1415 mutex_enter(&inst_lock); 1416 for (bip = list_head(&inst_list); bip != NULL; 1417 bip = list_next(&inst_list, bip)) { 1418 if (bip->bi_flags & BIF_SHUTDOWN) 1419 continue; 1420 rw_enter(&bip->bi_rwlock, RW_WRITER); 1421 /* compute scaled maximum age based on table limit */ 1422 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax) 1423 bip->bi_tshift++; 1424 else 1425 bip->bi_tshift = 0; 1426 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) { 1427 if (bip->bi_tshift != 0) 1428 bip->bi_tshift--; 1429 age_limit = 1; 1430 } 1431 bfnext = avl_first(&bip->bi_fwd); 1432 while ((bfp = bfnext) != NULL) { 1433 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1434 if (!(bfp->bf_flags & BFF_LOCALADDR) && 1435 (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) { 1436 ASSERT(bfp->bf_flags & BFF_INTREE); 1437 avl_remove(&bip->bi_fwd, bfp); 1438 bfp->bf_flags &= ~BFF_INTREE; 1439 avl_add(&fwd_scavenge, bfp); 1440 } 1441 } 1442 for (blp = list_head(&bip->bi_links); blp != NULL; 1443 blp = list_next(&bip->bi_links, blp)) { 1444 ldecay = mac_get_ldecay(blp->bl_mh); 1445 if (ldecay >= blp->bl_learns) 1446 blp->bl_learns = 0; 1447 else 1448 atomic_add_int(&blp->bl_learns, -(int)ldecay); 1449 } 1450 rw_exit(&bip->bi_rwlock); 1451 bfnext = avl_first(&fwd_scavenge); 1452 while ((bfp = bfnext) != NULL) { 1453 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1454 avl_remove(&fwd_scavenge, bfp); 1455 KIINCR(bki_expire); 1456 fwd_unref(bfp); /* drop tree reference */ 1457 } 1458 } 1459 mutex_exit(&inst_lock); 1460 avl_destroy(&fwd_scavenge); 1461 1462 /* 1463 * Scan the bridge_mac_t entries and try to free up the ones that are 1464 * no longer active. This must be done by polling, as neither DLS nor 1465 * MAC provides a driver any sort of positive control over clients. 1466 */ 1467 rw_enter(&bmac_rwlock, RW_WRITER); 1468 bmnext = list_head(&bmac_list); 1469 while ((bmp = bmnext) != NULL) { 1470 bmnext = list_next(&bmac_list, bmp); 1471 1472 /* ignore active bridges */ 1473 if (bmp->bm_inst != NULL) 1474 continue; 1475 1476 if (bmp->bm_flags & BMF_DLS) { 1477 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE); 1478 ASSERT(err == 0 || err == EBUSY); 1479 if (err == 0) 1480 bmp->bm_flags &= ~BMF_DLS; 1481 } 1482 1483 if (!(bmp->bm_flags & BMF_DLS)) { 1484 err = mac_unregister(bmp->bm_mh); 1485 ASSERT(err == 0 || err == EBUSY); 1486 if (err == 0) { 1487 list_remove(&bmac_list, bmp); 1488 kmem_free(bmp, sizeof (*bmp)); 1489 } 1490 } 1491 } 1492 if (list_is_empty(&bmac_list)) { 1493 bridge_timerid = 0; 1494 } else { 1495 bridge_timerid = timeout(bridge_timer, NULL, 1496 bridge_scan_interval); 1497 } 1498 rw_exit(&bmac_rwlock); 1499 } 1500 1501 static int 1502 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1503 { 1504 bridge_stream_t *bsp; 1505 1506 if (rq->q_ptr != NULL) 1507 return (0); 1508 1509 if (sflag & MODOPEN) 1510 return (EINVAL); 1511 1512 /* 1513 * Check the minor node number being opened. This tells us which 1514 * bridge instance the user wants. 1515 */ 1516 if (getminor(*devp) != 0) { 1517 /* 1518 * This is a regular DLPI stream for snoop or the like. 1519 * Redirect it through DLD. 1520 */ 1521 rq->q_qinfo = &bridge_dld_rinit; 1522 OTHERQ(rq)->q_qinfo = &bridge_dld_winit; 1523 return (dld_open(rq, devp, oflag, sflag, credp)); 1524 } else { 1525 /* 1526 * Allocate the bridge control stream structure. 1527 */ 1528 if ((bsp = stream_alloc()) == NULL) 1529 return (ENOSR); 1530 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp; 1531 bsp->bs_wq = WR(rq); 1532 *devp = makedevice(getmajor(*devp), bsp->bs_minor); 1533 qprocson(rq); 1534 return (0); 1535 } 1536 } 1537 1538 /* 1539 * This is used only for bridge control streams. DLPI goes through dld 1540 * instead. 1541 */ 1542 static int 1543 bridge_close(queue_t *rq) 1544 { 1545 bridge_stream_t *bsp = rq->q_ptr; 1546 bridge_inst_t *bip; 1547 1548 /* 1549 * Wait for any stray taskq (add/delete link) entries related to this 1550 * stream to leave the system. 1551 */ 1552 mutex_enter(&stream_ref_lock); 1553 while (bsp->bs_taskq_cnt != 0) 1554 cv_wait(&stream_ref_cv, &stream_ref_lock); 1555 mutex_exit(&stream_ref_lock); 1556 1557 qprocsoff(rq); 1558 if ((bip = bsp->bs_inst) != NULL) 1559 shutdown_inst(bip); 1560 rq->q_ptr = WR(rq)->q_ptr = NULL; 1561 stream_free(bsp); 1562 if (bip != NULL) 1563 bridge_unref(bip); 1564 1565 return (0); 1566 } 1567 1568 static void 1569 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick, 1570 uint16_t vlanid) 1571 { 1572 bridge_inst_t *bip = blp->bl_inst; 1573 bridge_fwd_t *bfp, *bfpnew; 1574 int i; 1575 boolean_t replaced = B_FALSE; 1576 1577 /* Ignore multi-destination address used as source; it's nonsense. */ 1578 if (*saddr & 1) 1579 return; 1580 1581 /* 1582 * If the source is known, then check whether it belongs on this link. 1583 * If not, and this isn't a fixed local address, then we've detected a 1584 * move. If it's not known, learn it. 1585 */ 1586 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) { 1587 /* 1588 * If the packet has a fixed local source address, then there's 1589 * nothing we can learn. We must quit. If this was a received 1590 * packet, then the sender has stolen our address, but there's 1591 * nothing we can do. If it's a transmitted packet, then 1592 * that's the normal case. 1593 */ 1594 if (bfp->bf_flags & BFF_LOCALADDR) { 1595 fwd_unref(bfp); 1596 return; 1597 } 1598 1599 /* 1600 * Check if the link (and TRILL sender, if any) being used is 1601 * among the ones registered for this address. If so, then 1602 * this is information that we already know. 1603 */ 1604 if (bfp->bf_trill_nick == ingress_nick) { 1605 for (i = 0; i < bfp->bf_nlinks; i++) { 1606 if (bfp->bf_links[i] == blp) { 1607 bfp->bf_lastheard = ddi_get_lbolt(); 1608 fwd_unref(bfp); 1609 return; 1610 } 1611 } 1612 } 1613 } 1614 1615 /* 1616 * Note that we intentionally "unlearn" things that appear to be under 1617 * attack on this link. The forwarding cache is a negative thing for 1618 * security -- it disables reachability as a performance optimization 1619 * -- so leaving out entries optimizes for success and defends against 1620 * the attack. Thus, the bare increment without a check in the delete 1621 * code above is right. (And it's ok if we skid over the limit a 1622 * little, so there's no syncronization needed on the test.) 1623 */ 1624 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) { 1625 if (bfp != NULL) { 1626 if (bfp->bf_vcnt == 0) 1627 fwd_delete(bfp); 1628 fwd_unref(bfp); 1629 } 1630 return; 1631 } 1632 1633 atomic_inc_uint(&blp->bl_learns); 1634 1635 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) { 1636 if (bfp != NULL) 1637 fwd_unref(bfp); 1638 return; 1639 } 1640 KIINCR(bki_count); 1641 1642 if (bfp != NULL) { 1643 /* 1644 * If this is a new destination for the same VLAN, then delete 1645 * so that we can update. If it's a different VLAN, then we're 1646 * not going to delete the original. Split off instead into an 1647 * IVL entry. 1648 */ 1649 if (bfp->bf_vlanid == vlanid) { 1650 /* save the count of IVL duplicates */ 1651 bfpnew->bf_vcnt = bfp->bf_vcnt; 1652 1653 /* entry deletes count as learning events */ 1654 atomic_inc_uint(&blp->bl_learns); 1655 1656 /* destroy and create anew; node moved */ 1657 fwd_delete(bfp); 1658 replaced = B_TRUE; 1659 KIINCR(bki_moved); 1660 } else { 1661 bfp->bf_vcnt++; 1662 bfpnew->bf_flags |= BFF_VLANLOCAL; 1663 } 1664 fwd_unref(bfp); 1665 } 1666 bfpnew->bf_links[0] = blp; 1667 bfpnew->bf_nlinks = 1; 1668 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1669 if (!fwd_insert(bip, bfpnew)) 1670 fwd_free(bfpnew); 1671 else if (!replaced) 1672 KIINCR(bki_source); 1673 } 1674 1675 /* 1676 * Process the VLAN headers for output on a given link. There are several 1677 * cases (noting that we don't map VLANs): 1678 * 1. The input packet is good as it is; either 1679 * a. It has no tag, and output has same PVID 1680 * b. It has a non-zero priority-only tag for PVID, and b_band is same 1681 * c. It has a tag with VLAN different from PVID, and b_band is same 1682 * 2. The tag must change: non-zero b_band is different from tag priority 1683 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero) 1684 * 4. The packet has no tag and needs one: 1685 * a. VLAN ID same as PVID, but b_band is non-zero 1686 * b. VLAN ID different from PVID 1687 * We exclude case 1 first, then modify the packet. Note that output packets 1688 * get a priority set by the mblk, not by the header, because QoS in bridging 1689 * requires priority recalculation at each node. 1690 * 1691 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present. 1692 */ 1693 static mblk_t * 1694 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid) 1695 { 1696 boolean_t source_has_tag = (tci != 0xFFFF); 1697 mblk_t *mpcopy; 1698 size_t mlen, minlen; 1699 struct ether_vlan_header *evh; 1700 int pri; 1701 1702 /* This helps centralize error handling in the caller. */ 1703 if (mp == NULL) 1704 return (mp); 1705 1706 /* No forwarded packet can have hardware checksum enabled */ 1707 DB_CKSUMFLAGS(mp) = 0; 1708 1709 /* Get the no-modification cases out of the way first */ 1710 if (!source_has_tag && vlanid == pvid) /* 1a */ 1711 return (mp); 1712 1713 pri = VLAN_PRI(tci); 1714 if (source_has_tag && mp->b_band == pri) { 1715 if (vlanid != pvid) /* 1c */ 1716 return (mp); 1717 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */ 1718 return (mp); 1719 } 1720 1721 /* 1722 * We now know that we must modify the packet. Prepare for that. Note 1723 * that if a tag is present, the caller has already done a pullup for 1724 * the VLAN header, so we're good to go. 1725 */ 1726 if (MBLKL(mp) < sizeof (struct ether_header)) { 1727 mpcopy = msgpullup(mp, sizeof (struct ether_header)); 1728 if (mpcopy == NULL) { 1729 freemsg(mp); 1730 return (NULL); 1731 } 1732 mp = mpcopy; 1733 } 1734 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) || 1735 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) { 1736 minlen = mlen = MBLKL(mp); 1737 if (!source_has_tag) 1738 minlen += VLAN_INCR; 1739 ASSERT(minlen >= sizeof (struct ether_vlan_header)); 1740 /* 1741 * We're willing to copy some data to avoid fragmentation, but 1742 * not a lot. 1743 */ 1744 if (minlen > 256) 1745 minlen = sizeof (struct ether_vlan_header); 1746 mpcopy = allocb(minlen, BPRI_MED); 1747 if (mpcopy == NULL) { 1748 freemsg(mp); 1749 return (NULL); 1750 } 1751 if (mlen <= minlen) { 1752 /* We toss the first mblk when we can. */ 1753 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen); 1754 mpcopy->b_wptr += mlen; 1755 mpcopy->b_cont = mp->b_cont; 1756 freeb(mp); 1757 } else { 1758 /* If not, then just copy what we need */ 1759 if (!source_has_tag) 1760 minlen = sizeof (struct ether_header); 1761 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen); 1762 mpcopy->b_wptr += minlen; 1763 mpcopy->b_cont = mp; 1764 mp->b_rptr += minlen; 1765 } 1766 mp = mpcopy; 1767 } 1768 1769 /* LINTED: pointer alignment */ 1770 evh = (struct ether_vlan_header *)mp->b_rptr; 1771 if (source_has_tag) { 1772 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */ 1773 evh->ether_tpid = evh->ether_type; 1774 mlen = MBLKL(mp); 1775 if (mlen > sizeof (struct ether_vlan_header)) 1776 ovbcopy(mp->b_rptr + 1777 sizeof (struct ether_vlan_header), 1778 mp->b_rptr + sizeof (struct ether_header), 1779 mlen - sizeof (struct ether_vlan_header)); 1780 mp->b_wptr -= VLAN_INCR; 1781 } else { /* 2 */ 1782 if (vlanid == pvid) 1783 vlanid = VLAN_ID_NONE; 1784 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1785 evh->ether_tci = htons(tci); 1786 } 1787 } else { 1788 /* case 4: no header present, but one is needed */ 1789 mlen = MBLKL(mp); 1790 if (mlen > sizeof (struct ether_header)) 1791 ovbcopy(mp->b_rptr + sizeof (struct ether_header), 1792 mp->b_rptr + sizeof (struct ether_vlan_header), 1793 mlen - sizeof (struct ether_header)); 1794 mp->b_wptr += VLAN_INCR; 1795 ASSERT(mp->b_wptr <= DB_LIM(mp)); 1796 if (vlanid == pvid) 1797 vlanid = VLAN_ID_NONE; 1798 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1799 evh->ether_type = evh->ether_tpid; 1800 evh->ether_tpid = htons(ETHERTYPE_VLAN); 1801 evh->ether_tci = htons(tci); 1802 } 1803 return (mp); 1804 } 1805 1806 /* Record VLAN information and strip header if requested . */ 1807 static void 1808 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr) 1809 { 1810 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 1811 struct ether_vlan_header *evhp; 1812 uint16_t ether_type; 1813 1814 /* LINTED: alignment */ 1815 evhp = (struct ether_vlan_header *)mp->b_rptr; 1816 hdr_info->mhi_istagged = B_TRUE; 1817 hdr_info->mhi_tci = ntohs(evhp->ether_tci); 1818 if (striphdr) { 1819 /* 1820 * For VLAN tagged frames update the ether_type 1821 * in hdr_info before stripping the header. 1822 */ 1823 ether_type = ntohs(evhp->ether_type); 1824 hdr_info->mhi_origsap = ether_type; 1825 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ? 1826 ether_type : DLS_SAP_LLC; 1827 mp->b_rptr = (uchar_t *)(evhp + 1); 1828 } 1829 } else { 1830 hdr_info->mhi_istagged = B_FALSE; 1831 hdr_info->mhi_tci = VLAN_ID_NONE; 1832 if (striphdr) 1833 mp->b_rptr += sizeof (struct ether_header); 1834 } 1835 } 1836 1837 /* 1838 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID. 1839 */ 1840 static boolean_t 1841 bridge_can_send(bridge_link_t *blp, uint16_t vlanid) 1842 { 1843 ASSERT(vlanid != VLAN_ID_NONE); 1844 if (blp->bl_flags & BLF_DELETED) 1845 return (B_FALSE); 1846 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING) 1847 return (B_FALSE); 1848 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid)); 1849 } 1850 1851 /* 1852 * This function scans the bridge forwarding tables in order to forward a given 1853 * packet. If the packet either doesn't need forwarding (the current link is 1854 * correct) or the current link needs a copy as well, then the packet is 1855 * returned to the caller. 1856 * 1857 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a 1858 * TRILL tunnel. If the destination points there, then drop instead. 1859 */ 1860 static mblk_t * 1861 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 1862 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit) 1863 { 1864 mblk_t *mpsend, *mpcopy; 1865 bridge_inst_t *bip = blp->bl_inst; 1866 bridge_link_t *blpsend, *blpnext; 1867 bridge_fwd_t *bfp; 1868 uint_t i; 1869 boolean_t selfseen = B_FALSE; 1870 void *tdp; 1871 const uint8_t *daddr = hdr_info->mhi_daddr; 1872 1873 /* 1874 * Check for the IEEE "reserved" multicast addresses. Messages sent to 1875 * these addresses are used for link-local control (STP and pause), and 1876 * are never forwarded or redirected. 1877 */ 1878 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 && 1879 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) { 1880 if (from_trill) { 1881 freemsg(mp); 1882 mp = NULL; 1883 } 1884 return (mp); 1885 } 1886 1887 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) { 1888 1889 /* 1890 * If trill indicates a destination for this node, then it's 1891 * clearly not intended for local delivery. We must tell TRILL 1892 * to encapsulate, as long as we didn't just decapsulate it. 1893 */ 1894 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) { 1895 /* 1896 * Error case: can't reencapsulate if the protocols are 1897 * working correctly. 1898 */ 1899 if (from_trill) { 1900 freemsg(mp); 1901 return (NULL); 1902 } 1903 mutex_enter(&blp->bl_trilllock); 1904 if ((tdp = blp->bl_trilldata) != NULL) { 1905 blp->bl_trillthreads++; 1906 mutex_exit(&blp->bl_trilllock); 1907 update_header(mp, hdr_info, B_FALSE); 1908 if (is_xmit) 1909 mp = mac_fix_cksum(mp); 1910 /* all trill data frames have Inner.VLAN */ 1911 mp = reform_vlan_header(mp, vlanid, tci, 0); 1912 if (mp == NULL) { 1913 KIINCR(bki_drops); 1914 fwd_unref(bfp); 1915 return (NULL); 1916 } 1917 trill_encap_fn(tdp, blp, hdr_info, mp, 1918 bfp->bf_trill_nick); 1919 mutex_enter(&blp->bl_trilllock); 1920 if (--blp->bl_trillthreads == 0 && 1921 blp->bl_trilldata == NULL) 1922 cv_broadcast(&blp->bl_trillwait); 1923 } 1924 mutex_exit(&blp->bl_trilllock); 1925 1926 /* if TRILL has been disabled, then kill this stray */ 1927 if (tdp == NULL) { 1928 freemsg(mp); 1929 fwd_delete(bfp); 1930 } 1931 fwd_unref(bfp); 1932 return (NULL); 1933 } 1934 1935 /* find first link we can send on */ 1936 for (i = 0; i < bfp->bf_nlinks; i++) { 1937 blpsend = bfp->bf_links[i]; 1938 if (blpsend == blp) 1939 selfseen = B_TRUE; 1940 else if (bridge_can_send(blpsend, vlanid)) 1941 break; 1942 } 1943 1944 while (i < bfp->bf_nlinks) { 1945 blpsend = bfp->bf_links[i]; 1946 for (i++; i < bfp->bf_nlinks; i++) { 1947 blpnext = bfp->bf_links[i]; 1948 if (blpnext == blp) 1949 selfseen = B_TRUE; 1950 else if (bridge_can_send(blpnext, vlanid)) 1951 break; 1952 } 1953 if (i == bfp->bf_nlinks && !selfseen) { 1954 mpsend = mp; 1955 mp = NULL; 1956 } else { 1957 mpsend = copymsg(mp); 1958 } 1959 1960 if (!from_trill && is_xmit) 1961 mpsend = mac_fix_cksum(mpsend); 1962 1963 mpsend = reform_vlan_header(mpsend, vlanid, tci, 1964 blpsend->bl_pvid); 1965 if (mpsend == NULL) { 1966 KIINCR(bki_drops); 1967 continue; 1968 } 1969 1970 KIINCR(bki_forwards); 1971 /* 1972 * No need to bump up the link reference count, as 1973 * the forwarding entry itself holds a reference to 1974 * the link. 1975 */ 1976 if (bfp->bf_flags & BFF_LOCALADDR) { 1977 mac_rx_common(blpsend->bl_mh, NULL, mpsend); 1978 } else { 1979 KLPINCR(blpsend, bkl_xmit); 1980 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, 1981 mpsend); 1982 freemsg(mpsend); 1983 } 1984 } 1985 /* 1986 * Handle a special case: if we're transmitting to the original 1987 * link, then check whether the localaddr flag is set. If it 1988 * is, then receive instead. This doesn't happen with ordinary 1989 * bridging, but does happen often with TRILL decapsulation. 1990 */ 1991 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) { 1992 mac_rx_common(blp->bl_mh, NULL, mp); 1993 mp = NULL; 1994 } 1995 fwd_unref(bfp); 1996 } else { 1997 /* 1998 * TRILL has two cases to handle. If the packet is off the 1999 * wire (not from TRILL), then we need to send up into the 2000 * TRILL module to have the distribution tree computed. If the 2001 * packet is from TRILL (decapsulated), then we're part of the 2002 * distribution tree, and we need to copy the packet on member 2003 * interfaces. 2004 * 2005 * Thus, the from TRILL case is identical to the STP case. 2006 */ 2007 if (!from_trill && blp->bl_trilldata != NULL) { 2008 mutex_enter(&blp->bl_trilllock); 2009 if ((tdp = blp->bl_trilldata) != NULL) { 2010 blp->bl_trillthreads++; 2011 mutex_exit(&blp->bl_trilllock); 2012 if ((mpsend = copymsg(mp)) != NULL) { 2013 update_header(mpsend, 2014 hdr_info, B_FALSE); 2015 /* 2016 * all trill data frames have 2017 * Inner.VLAN 2018 */ 2019 mpsend = reform_vlan_header(mpsend, 2020 vlanid, tci, 0); 2021 if (mpsend == NULL) { 2022 KIINCR(bki_drops); 2023 } else { 2024 trill_encap_fn(tdp, blp, 2025 hdr_info, mpsend, 2026 RBRIDGE_NICKNAME_NONE); 2027 } 2028 } 2029 mutex_enter(&blp->bl_trilllock); 2030 if (--blp->bl_trillthreads == 0 && 2031 blp->bl_trilldata == NULL) 2032 cv_broadcast(&blp->bl_trillwait); 2033 } 2034 mutex_exit(&blp->bl_trilllock); 2035 } 2036 2037 /* 2038 * This is an unknown destination, so flood. 2039 */ 2040 rw_enter(&bip->bi_rwlock, RW_READER); 2041 for (blpnext = list_head(&bip->bi_links); blpnext != NULL; 2042 blpnext = list_next(&bip->bi_links, blpnext)) { 2043 if (blpnext == blp) 2044 selfseen = B_TRUE; 2045 else if (bridge_can_send(blpnext, vlanid)) 2046 break; 2047 } 2048 if (blpnext != NULL) 2049 atomic_inc_uint(&blpnext->bl_refs); 2050 rw_exit(&bip->bi_rwlock); 2051 while ((blpsend = blpnext) != NULL) { 2052 rw_enter(&bip->bi_rwlock, RW_READER); 2053 for (blpnext = list_next(&bip->bi_links, blpsend); 2054 blpnext != NULL; 2055 blpnext = list_next(&bip->bi_links, blpnext)) { 2056 if (blpnext == blp) 2057 selfseen = B_TRUE; 2058 else if (bridge_can_send(blpnext, vlanid)) 2059 break; 2060 } 2061 if (blpnext != NULL) 2062 atomic_inc_uint(&blpnext->bl_refs); 2063 rw_exit(&bip->bi_rwlock); 2064 if (blpnext == NULL && !selfseen) { 2065 mpsend = mp; 2066 mp = NULL; 2067 } else { 2068 mpsend = copymsg(mp); 2069 } 2070 2071 if (!from_trill && is_xmit) 2072 mpsend = mac_fix_cksum(mpsend); 2073 2074 mpsend = reform_vlan_header(mpsend, vlanid, tci, 2075 blpsend->bl_pvid); 2076 if (mpsend == NULL) { 2077 KIINCR(bki_drops); 2078 continue; 2079 } 2080 2081 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST) 2082 KIINCR(bki_unknown); 2083 else 2084 KIINCR(bki_mbcast); 2085 KLPINCR(blpsend, bkl_xmit); 2086 if ((mpcopy = copymsg(mpsend)) != NULL) 2087 mac_rx_common(blpsend->bl_mh, NULL, mpcopy); 2088 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend); 2089 freemsg(mpsend); 2090 link_unref(blpsend); 2091 } 2092 } 2093 2094 /* 2095 * At this point, if np is non-NULL, it means that the caller needs to 2096 * continue on the selected link. 2097 */ 2098 return (mp); 2099 } 2100 2101 /* 2102 * Extract and validate the VLAN information for a given packet. This checks 2103 * conformance with the rules for use of the PVID on the link, and for the 2104 * allowed (configured) VLAN set. 2105 * 2106 * Returns B_TRUE if the packet passes, B_FALSE if it fails. 2107 */ 2108 static boolean_t 2109 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 2110 uint16_t *vlanidp, uint16_t *tcip) 2111 { 2112 uint16_t tci, vlanid; 2113 2114 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 2115 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci); 2116 ptrdiff_t mlen; 2117 2118 /* 2119 * Extract the VLAN ID information, regardless of alignment, 2120 * and without a pullup. This isn't attractive, but we do this 2121 * to avoid having to deal with the pointers stashed in 2122 * hdr_info moving around or having the caller deal with a new 2123 * mblk_t pointer. 2124 */ 2125 while (mp != NULL) { 2126 mlen = MBLKL(mp); 2127 if (mlen > tpos && mlen > 0) 2128 break; 2129 tpos -= mlen; 2130 mp = mp->b_cont; 2131 } 2132 if (mp == NULL) 2133 return (B_FALSE); 2134 tci = mp->b_rptr[tpos] << 8; 2135 if (++tpos >= mlen) { 2136 do { 2137 mp = mp->b_cont; 2138 } while (mp != NULL && MBLKL(mp) == 0); 2139 if (mp == NULL) 2140 return (B_FALSE); 2141 tpos = 0; 2142 } 2143 tci |= mp->b_rptr[tpos]; 2144 2145 vlanid = VLAN_ID(tci); 2146 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX) 2147 return (B_FALSE); 2148 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid) 2149 goto input_no_vlan; 2150 if (!BRIDGE_VLAN_ISSET(blp, vlanid)) 2151 return (B_FALSE); 2152 } else { 2153 tci = 0xFFFF; 2154 input_no_vlan: 2155 /* 2156 * If PVID is set to zero, then untagged traffic is not 2157 * supported here. Do not learn or forward. 2158 */ 2159 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE) 2160 return (B_FALSE); 2161 } 2162 2163 *tcip = tci; 2164 *vlanidp = vlanid; 2165 return (B_TRUE); 2166 } 2167 2168 /* 2169 * Handle MAC notifications. 2170 */ 2171 static void 2172 bridge_notify_cb(void *arg, mac_notify_type_t note_type) 2173 { 2174 bridge_link_t *blp = arg; 2175 2176 switch (note_type) { 2177 case MAC_NOTE_UNICST: 2178 bridge_new_unicst(blp); 2179 break; 2180 2181 case MAC_NOTE_SDU_SIZE: { 2182 uint_t maxsdu; 2183 bridge_inst_t *bip = blp->bl_inst; 2184 bridge_mac_t *bmp = bip->bi_mac; 2185 boolean_t notify = B_FALSE; 2186 mblk_t *mlist = NULL; 2187 2188 mac_sdu_get(blp->bl_mh, NULL, &maxsdu); 2189 rw_enter(&bip->bi_rwlock, RW_READER); 2190 if (list_prev(&bip->bi_links, blp) == NULL && 2191 list_next(&bip->bi_links, blp) == NULL) { 2192 notify = (maxsdu != bmp->bm_maxsdu); 2193 bmp->bm_maxsdu = maxsdu; 2194 } 2195 blp->bl_maxsdu = maxsdu; 2196 if (maxsdu != bmp->bm_maxsdu) 2197 link_sdu_fail(blp, B_TRUE, &mlist); 2198 else if (notify) 2199 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2200 rw_exit(&bip->bi_rwlock); 2201 send_up_messages(bip, mlist); 2202 break; 2203 } 2204 } 2205 } 2206 2207 /* 2208 * This is called by the MAC layer. As with the transmit side, we're right in 2209 * the data path for all I/O on this port, so if we don't need to forward this 2210 * packet anywhere, we have to send it upwards via mac_rx_common. 2211 */ 2212 static void 2213 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext) 2214 { 2215 mblk_t *mp, *mpcopy; 2216 bridge_link_t *blp = (bridge_link_t *)mh; 2217 bridge_inst_t *bip = blp->bl_inst; 2218 bridge_mac_t *bmp = bip->bi_mac; 2219 mac_header_info_t hdr_info; 2220 uint16_t vlanid, tci; 2221 boolean_t trillmode = B_FALSE; 2222 2223 KIINCR(bki_recv); 2224 KLINCR(bkl_recv); 2225 2226 /* 2227 * Regardless of state, check for inbound TRILL packets when TRILL is 2228 * active. These are pulled out of band and sent for TRILL handling. 2229 */ 2230 if (blp->bl_trilldata != NULL) { 2231 void *tdp; 2232 mblk_t *newhead; 2233 mblk_t *tail = NULL; 2234 2235 mutex_enter(&blp->bl_trilllock); 2236 if ((tdp = blp->bl_trilldata) != NULL) { 2237 blp->bl_trillthreads++; 2238 mutex_exit(&blp->bl_trilllock); 2239 trillmode = B_TRUE; 2240 newhead = mpnext; 2241 while ((mp = mpnext) != NULL) { 2242 boolean_t raw_isis, bridge_group; 2243 2244 mpnext = mp->b_next; 2245 2246 /* 2247 * If the header isn't readable, then leave on 2248 * the list and continue. 2249 */ 2250 if (mac_header_info(blp->bl_mh, mp, 2251 &hdr_info) != 0) { 2252 tail = mp; 2253 continue; 2254 } 2255 2256 /* 2257 * The TRILL document specifies that, on 2258 * Ethernet alone, IS-IS packets arrive with 2259 * LLC rather than Ethertype, and using a 2260 * specific destination address. We must check 2261 * for that here. Also, we need to give BPDUs 2262 * to TRILL for processing. 2263 */ 2264 raw_isis = bridge_group = B_FALSE; 2265 if (hdr_info.mhi_dsttype == 2266 MAC_ADDRTYPE_MULTICAST) { 2267 if (memcmp(hdr_info.mhi_daddr, 2268 all_isis_rbridges, ETHERADDRL) == 0) 2269 raw_isis = B_TRUE; 2270 else if (memcmp(hdr_info.mhi_daddr, 2271 bridge_group_address, ETHERADDRL) == 2272 0) 2273 bridge_group = B_TRUE; 2274 } 2275 if (!raw_isis && !bridge_group && 2276 hdr_info.mhi_bindsap != ETHERTYPE_TRILL && 2277 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN || 2278 /* LINTED: alignment */ 2279 ((struct ether_vlan_header *)mp->b_rptr)-> 2280 ether_type != htons(ETHERTYPE_TRILL))) { 2281 tail = mp; 2282 continue; 2283 } 2284 2285 /* 2286 * We've got TRILL input. Remove from the list 2287 * and send up through the TRILL module. (Send 2288 * a copy through promiscuous receive just to 2289 * support snooping on TRILL. Order isn't 2290 * preserved strictly, but that doesn't matter 2291 * here.) 2292 */ 2293 if (tail != NULL) 2294 tail->b_next = mpnext; 2295 mp->b_next = NULL; 2296 if (mp == newhead) 2297 newhead = mpnext; 2298 mac_trill_snoop(blp->bl_mh, mp); 2299 update_header(mp, &hdr_info, B_TRUE); 2300 /* 2301 * On raw IS-IS and BPDU frames, we have to 2302 * make sure that the length is trimmed 2303 * properly. We use origsap in order to cope 2304 * with jumbograms for IS-IS. (Regular mac 2305 * can't.) 2306 */ 2307 if (raw_isis || bridge_group) { 2308 size_t msglen = msgdsize(mp); 2309 2310 if (msglen > hdr_info.mhi_origsap) { 2311 (void) adjmsg(mp, 2312 hdr_info.mhi_origsap - 2313 msglen); 2314 } else if (msglen < 2315 hdr_info.mhi_origsap) { 2316 freemsg(mp); 2317 continue; 2318 } 2319 } 2320 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info); 2321 } 2322 mpnext = newhead; 2323 mutex_enter(&blp->bl_trilllock); 2324 if (--blp->bl_trillthreads == 0 && 2325 blp->bl_trilldata == NULL) 2326 cv_broadcast(&blp->bl_trillwait); 2327 } 2328 mutex_exit(&blp->bl_trilllock); 2329 if (mpnext == NULL) 2330 return; 2331 } 2332 2333 /* 2334 * If this is a TRILL RBridge, then just check whether this link is 2335 * used at all for forwarding. If not, then we're done. 2336 */ 2337 if (trillmode) { 2338 if (!(blp->bl_flags & BLF_TRILLACTIVE) || 2339 (blp->bl_flags & BLF_SDUFAIL)) { 2340 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2341 return; 2342 } 2343 } else { 2344 /* 2345 * For regular (STP) bridges, if we're in blocking or listening 2346 * state, then do nothing. We don't learn or forward until 2347 * told to do so. 2348 */ 2349 if (blp->bl_state == BLS_BLOCKLISTEN) { 2350 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2351 return; 2352 } 2353 } 2354 2355 /* 2356 * Send a copy of the message chain up to the observability node users. 2357 * For TRILL, we must obey the VLAN AF rules, so we go packet-by- 2358 * packet. 2359 */ 2360 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2361 (bmp->bm_flags & BMF_STARTED) && 2362 (mp = copymsgchain(mpnext)) != NULL) { 2363 mac_rx(bmp->bm_mh, NULL, mp); 2364 } 2365 2366 /* 2367 * We must be in learning or forwarding state, or using TRILL on a link 2368 * with one or more VLANs active. For each packet in the list, process 2369 * the source address, and then attempt to forward. 2370 */ 2371 while ((mp = mpnext) != NULL) { 2372 mpnext = mp->b_next; 2373 mp->b_next = NULL; 2374 2375 /* 2376 * If we can't decode the header or if the header specifies a 2377 * multicast source address (impossible!), then don't bother 2378 * learning or forwarding, but go ahead and forward up the 2379 * stack for subsequent processing. 2380 */ 2381 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 || 2382 (hdr_info.mhi_saddr[0] & 1) != 0) { 2383 KIINCR(bki_drops); 2384 KLINCR(bkl_drops); 2385 mac_rx_common(blp->bl_mh, rsrc, mp); 2386 continue; 2387 } 2388 2389 /* 2390 * Extract and validate the VLAN ID for this packet. 2391 */ 2392 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2393 !BRIDGE_AF_ISSET(blp, vlanid)) { 2394 mac_rx_common(blp->bl_mh, rsrc, mp); 2395 continue; 2396 } 2397 2398 if (trillmode) { 2399 /* 2400 * Special test required by TRILL document: must 2401 * discard frames with outer address set to ESADI. 2402 */ 2403 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges, 2404 ETHERADDRL) == 0) { 2405 mac_rx_common(blp->bl_mh, rsrc, mp); 2406 continue; 2407 } 2408 2409 /* 2410 * If we're in TRILL mode, then the call above to get 2411 * the VLAN ID has also checked that we're the 2412 * appointed forwarder, so report that we're handling 2413 * this packet to any observability node users. 2414 */ 2415 if ((bmp->bm_flags & BMF_STARTED) && 2416 (mpcopy = copymsg(mp)) != NULL) 2417 mac_rx(bmp->bm_mh, NULL, mpcopy); 2418 } 2419 2420 /* 2421 * First process the source address and learn from it. For 2422 * TRILL, we learn only if we're the appointed forwarder. 2423 */ 2424 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2425 vlanid); 2426 2427 /* 2428 * Now check whether we're forwarding and look up the 2429 * destination. If we can forward, do so. 2430 */ 2431 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2432 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2433 B_FALSE, B_FALSE); 2434 } 2435 if (mp != NULL) 2436 mac_rx_common(blp->bl_mh, rsrc, mp); 2437 } 2438 } 2439 2440 2441 /* ARGSUSED */ 2442 static mblk_t * 2443 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) 2444 { 2445 bridge_link_t *blp = (bridge_link_t *)mh; 2446 bridge_inst_t *bip = blp->bl_inst; 2447 bridge_mac_t *bmp = bip->bi_mac; 2448 mac_header_info_t hdr_info; 2449 uint16_t vlanid, tci; 2450 mblk_t *mp, *mpcopy; 2451 boolean_t trillmode; 2452 2453 trillmode = blp->bl_trilldata != NULL; 2454 2455 /* 2456 * If we're using STP and we're in blocking or listening state, or if 2457 * we're using TRILL and no VLANs are active, then behave as though the 2458 * bridge isn't here at all, and send on the local link alone. 2459 */ 2460 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) || 2461 (trillmode && 2462 (!(blp->bl_flags & BLF_TRILLACTIVE) || 2463 (blp->bl_flags & BLF_SDUFAIL)))) { 2464 KIINCR(bki_sent); 2465 KLINCR(bkl_xmit); 2466 MAC_RING_TX(blp->bl_mh, rh, mpnext, mp); 2467 return (mp); 2468 } 2469 2470 /* 2471 * Send a copy of the message up to the observability node users. 2472 * TRILL needs to check on a packet-by-packet basis. 2473 */ 2474 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2475 (bmp->bm_flags & BMF_STARTED) && 2476 (mp = copymsgchain(mpnext)) != NULL) { 2477 mac_rx(bmp->bm_mh, NULL, mp); 2478 } 2479 2480 while ((mp = mpnext) != NULL) { 2481 mpnext = mp->b_next; 2482 mp->b_next = NULL; 2483 2484 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2485 freemsg(mp); 2486 continue; 2487 } 2488 2489 /* 2490 * Extract and validate the VLAN ID for this packet. 2491 */ 2492 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2493 !BRIDGE_AF_ISSET(blp, vlanid)) { 2494 freemsg(mp); 2495 continue; 2496 } 2497 2498 /* 2499 * If we're using TRILL, then we've now validated that we're 2500 * the forwarder for this VLAN, so go ahead and let 2501 * observability node users know about the packet. 2502 */ 2503 if (trillmode && (bmp->bm_flags & BMF_STARTED) && 2504 (mpcopy = copymsg(mp)) != NULL) { 2505 mac_rx(bmp->bm_mh, NULL, mpcopy); 2506 } 2507 2508 /* 2509 * We have to learn from our own transmitted packets, because 2510 * there may be a Solaris DLPI raw sender (who can specify his 2511 * own source address) using promiscuous mode for receive. The 2512 * mac layer information won't (and can't) tell us everything 2513 * we need to know. 2514 */ 2515 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2516 vlanid); 2517 2518 /* attempt forwarding */ 2519 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2520 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2521 B_FALSE, B_TRUE); 2522 } 2523 if (mp != NULL) { 2524 MAC_RING_TX(blp->bl_mh, rh, mp, mp); 2525 if (mp == NULL) { 2526 KIINCR(bki_sent); 2527 KLINCR(bkl_xmit); 2528 } 2529 } 2530 /* 2531 * If we get stuck, then stop. Don't let the user's output 2532 * packets get out of order. (More importantly: don't try to 2533 * bridge the same packet multiple times if flow control is 2534 * asserted.) 2535 */ 2536 if (mp != NULL) { 2537 mp->b_next = mpnext; 2538 break; 2539 } 2540 } 2541 return (mp); 2542 } 2543 2544 /* 2545 * This is called by TRILL when it decapsulates an packet, and we must forward 2546 * locally. On failure, we just drop. 2547 * 2548 * Note that the ingress_nick reported by TRILL must not represent this local 2549 * node. 2550 */ 2551 void 2552 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick) 2553 { 2554 mac_header_info_t hdr_info; 2555 uint16_t vlanid, tci; 2556 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2557 mblk_t *mpcopy; 2558 2559 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2560 freemsg(mp); 2561 return; 2562 } 2563 2564 /* Extract VLAN ID for this packet. */ 2565 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) { 2566 struct ether_vlan_header *evhp; 2567 2568 /* LINTED: alignment */ 2569 evhp = (struct ether_vlan_header *)mp->b_rptr; 2570 tci = ntohs(evhp->ether_tci); 2571 vlanid = VLAN_ID(tci); 2572 } else { 2573 /* Inner VLAN headers are required in TRILL data packets */ 2574 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *, 2575 blp, mblk_t *, mp, uint16_t, ingress_nick); 2576 freemsg(mp); 2577 return; 2578 } 2579 2580 /* Learn the location of this sender in the RBridge network */ 2581 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid); 2582 2583 /* attempt forwarding */ 2584 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE); 2585 if (mp != NULL) { 2586 if (bridge_can_send(blp, vlanid)) { 2587 /* Deliver a copy locally as well */ 2588 if ((mpcopy = copymsg(mp)) != NULL) 2589 mac_rx_common(blp->bl_mh, NULL, mpcopy); 2590 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2591 } 2592 if (mp == NULL) { 2593 KIINCR(bki_sent); 2594 KLINCR(bkl_xmit); 2595 } else { 2596 freemsg(mp); 2597 } 2598 } 2599 } 2600 2601 /* 2602 * This function is used by TRILL _only_ to transmit TRILL-encapsulated 2603 * packets. It sends on a single underlying link and does not bridge. 2604 */ 2605 mblk_t * 2606 bridge_trill_output(bridge_link_t *blp, mblk_t *mp) 2607 { 2608 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2609 2610 mac_trill_snoop(blp->bl_mh, mp); 2611 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2612 if (mp == NULL) { 2613 KIINCR(bki_sent); 2614 KLINCR(bkl_xmit); 2615 } 2616 return (mp); 2617 } 2618 2619 /* 2620 * Set the "appointed forwarder" flag array for this link. TRILL controls 2621 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for 2622 * the forwarder. 2623 */ 2624 void 2625 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr) 2626 { 2627 int i; 2628 uint_t newflags = 0; 2629 2630 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) { 2631 if ((blp->bl_afs[i] = arr[i]) != 0) 2632 newflags = BLF_TRILLACTIVE; 2633 } 2634 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags; 2635 } 2636 2637 void 2638 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill) 2639 { 2640 bridge_inst_t *bip = blp->bl_inst; 2641 bridge_fwd_t *bfp, *bfnext; 2642 avl_tree_t fwd_scavenge; 2643 int i; 2644 2645 _NOTE(ARGUNUSED(vlan)); 2646 2647 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 2648 offsetof(bridge_fwd_t, bf_node)); 2649 rw_enter(&bip->bi_rwlock, RW_WRITER); 2650 bfnext = avl_first(&bip->bi_fwd); 2651 while ((bfp = bfnext) != NULL) { 2652 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 2653 if (bfp->bf_flags & BFF_LOCALADDR) 2654 continue; 2655 if (dotrill) { 2656 /* port doesn't matter if we're flushing TRILL */ 2657 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE) 2658 continue; 2659 } else { 2660 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) 2661 continue; 2662 for (i = 0; i < bfp->bf_nlinks; i++) { 2663 if (bfp->bf_links[i] == blp) 2664 break; 2665 } 2666 if (i >= bfp->bf_nlinks) 2667 continue; 2668 } 2669 ASSERT(bfp->bf_flags & BFF_INTREE); 2670 avl_remove(&bip->bi_fwd, bfp); 2671 bfp->bf_flags &= ~BFF_INTREE; 2672 avl_add(&fwd_scavenge, bfp); 2673 } 2674 rw_exit(&bip->bi_rwlock); 2675 bfnext = avl_first(&fwd_scavenge); 2676 while ((bfp = bfnext) != NULL) { 2677 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 2678 avl_remove(&fwd_scavenge, bfp); 2679 fwd_unref(bfp); 2680 } 2681 avl_destroy(&fwd_scavenge); 2682 } 2683 2684 /* 2685 * Let the mac module take or drop a reference to a bridge link. When this is 2686 * called, the mac module is holding the mi_bridge_lock, so the link cannot be 2687 * in the process of entering or leaving a bridge. 2688 */ 2689 static void 2690 bridge_ref_cb(mac_handle_t mh, boolean_t hold) 2691 { 2692 bridge_link_t *blp = (bridge_link_t *)mh; 2693 2694 if (hold) 2695 atomic_inc_uint(&blp->bl_refs); 2696 else 2697 link_unref(blp); 2698 } 2699 2700 /* 2701 * Handle link state changes reported by the mac layer. This acts as a filter 2702 * for link state changes: if a link is reporting down, but there are other 2703 * links still up on the bridge, then the state is changed to "up." When the 2704 * last link goes down, all are marked down, and when the first link goes up, 2705 * all are marked up. (Recursion is avoided by the use of the "redo" function.) 2706 * 2707 * We treat unknown as equivalent to "up." 2708 */ 2709 static link_state_t 2710 bridge_ls_cb(mac_handle_t mh, link_state_t newls) 2711 { 2712 bridge_link_t *blp = (bridge_link_t *)mh; 2713 bridge_link_t *blcmp; 2714 bridge_inst_t *bip; 2715 bridge_mac_t *bmp; 2716 2717 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN || 2718 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) { 2719 blp->bl_linkstate = newls; 2720 return (newls); 2721 } 2722 2723 /* 2724 * Scan first to see if there are any other non-down links. If there 2725 * are, then we're done. Otherwise, if all others are down, then the 2726 * state of this link is the state of the bridge. 2727 */ 2728 bip = blp->bl_inst; 2729 rw_enter(&bip->bi_rwlock, RW_WRITER); 2730 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2731 blcmp = list_next(&bip->bi_links, blcmp)) { 2732 if (blcmp != blp && 2733 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 2734 blcmp->bl_linkstate != LINK_STATE_DOWN) 2735 break; 2736 } 2737 2738 if (blcmp != NULL) { 2739 /* 2740 * If there are other links that are considered up, then tell 2741 * the caller that the link is actually still up, regardless of 2742 * this link's underlying state. 2743 */ 2744 blp->bl_linkstate = newls; 2745 newls = LINK_STATE_UP; 2746 } else if (blp->bl_linkstate != newls) { 2747 /* 2748 * If we've found no other 'up' links, and this link has 2749 * changed state, then report the new state of the bridge to 2750 * all other clients. 2751 */ 2752 blp->bl_linkstate = newls; 2753 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2754 blcmp = list_next(&bip->bi_links, blcmp)) { 2755 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED)) 2756 mac_link_redo(blcmp->bl_mh, newls); 2757 } 2758 bmp = bip->bi_mac; 2759 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN) 2760 bmp->bm_linkstate = LINK_STATE_UP; 2761 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 2762 } 2763 rw_exit(&bip->bi_rwlock); 2764 return (newls); 2765 } 2766 2767 static void 2768 bridge_add_link(void *arg) 2769 { 2770 mblk_t *mp = arg; 2771 bridge_stream_t *bsp; 2772 bridge_inst_t *bip, *bipt; 2773 bridge_mac_t *bmp; 2774 datalink_id_t linkid; 2775 int err; 2776 mac_handle_t mh; 2777 uint_t maxsdu; 2778 bridge_link_t *blp = NULL, *blpt; 2779 const mac_info_t *mip; 2780 boolean_t macopen = B_FALSE; 2781 char linkname[MAXLINKNAMELEN]; 2782 char kstatname[KSTAT_STRLEN]; 2783 int i; 2784 link_state_t linkstate; 2785 mblk_t *mlist; 2786 2787 bsp = (bridge_stream_t *)mp->b_next; 2788 mp->b_next = NULL; 2789 bip = bsp->bs_inst; 2790 /* LINTED: alignment */ 2791 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2792 2793 /* 2794 * First make sure that there is no other bridge that has this link. 2795 * We don't want to overlap operations from two bridges; the MAC layer 2796 * supports only one bridge on a given MAC at a time. 2797 * 2798 * We rely on the fact that there's just one taskq thread for the 2799 * bridging module: once we've checked for a duplicate, we can drop the 2800 * lock, because no other thread could possibly be adding another link 2801 * until we're done. 2802 */ 2803 mutex_enter(&inst_lock); 2804 for (bipt = list_head(&inst_list); bipt != NULL; 2805 bipt = list_next(&inst_list, bipt)) { 2806 rw_enter(&bipt->bi_rwlock, RW_READER); 2807 for (blpt = list_head(&bipt->bi_links); blpt != NULL; 2808 blpt = list_next(&bipt->bi_links, blpt)) { 2809 if (linkid == blpt->bl_linkid) 2810 break; 2811 } 2812 rw_exit(&bipt->bi_rwlock); 2813 if (blpt != NULL) 2814 break; 2815 } 2816 mutex_exit(&inst_lock); 2817 if (bipt != NULL) { 2818 err = EBUSY; 2819 goto fail; 2820 } 2821 2822 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 2823 goto fail; 2824 macopen = B_TRUE; 2825 2826 /* we bridge only Ethernet */ 2827 mip = mac_info(mh); 2828 if (mip->mi_media != DL_ETHER) { 2829 err = ENOTSUP; 2830 goto fail; 2831 } 2832 2833 /* 2834 * Get the current maximum SDU on this interface. If there are other 2835 * links on the bridge, then this one must match, or it errors out. 2836 * Otherwise, the first link becomes the standard for the new bridge. 2837 */ 2838 mac_sdu_get(mh, NULL, &maxsdu); 2839 bmp = bip->bi_mac; 2840 if (list_is_empty(&bip->bi_links)) { 2841 bmp->bm_maxsdu = maxsdu; 2842 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2843 } 2844 2845 /* figure the kstat name; also used as the mac client name */ 2846 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t); 2847 if (i < 0 || i >= MAXLINKNAMELEN) 2848 i = MAXLINKNAMELEN - 1; 2849 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i); 2850 linkname[i] = '\0'; 2851 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name, 2852 linkname); 2853 2854 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) { 2855 err = ENOMEM; 2856 goto fail; 2857 } 2858 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 2859 if (blp->bl_lfailmp == NULL) { 2860 kmem_free(blp, sizeof (*blp)); 2861 blp = NULL; 2862 err = ENOMEM; 2863 goto fail; 2864 } 2865 2866 blp->bl_refs = 1; 2867 atomic_inc_uint(&bip->bi_refs); 2868 blp->bl_inst = bip; 2869 blp->bl_mh = mh; 2870 blp->bl_linkid = linkid; 2871 blp->bl_maxsdu = maxsdu; 2872 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL); 2873 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL); 2874 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 2875 2876 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0); 2877 if (err != 0) 2878 goto fail; 2879 blp->bl_flags |= BLF_CLIENT_OPEN; 2880 2881 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE); 2882 if (err != 0) 2883 goto fail; 2884 blp->bl_flags |= BLF_MARGIN_ADDED; 2885 2886 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp); 2887 2888 /* Enable Bridging on the link */ 2889 err = mac_bridge_set(mh, (mac_handle_t)blp); 2890 if (err != 0) 2891 goto fail; 2892 blp->bl_flags |= BLF_SET_BRIDGE; 2893 2894 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL, 2895 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); 2896 if (err != 0) 2897 goto fail; 2898 blp->bl_flags |= BLF_PROM_ADDED; 2899 2900 bridge_new_unicst(blp); 2901 2902 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats, 2903 link_kstats_list, Dim(link_kstats_list), kstatname); 2904 2905 /* 2906 * The link holds a reference to the bridge instance, so that the 2907 * instance can't go away before the link is freed. The insertion into 2908 * bi_links holds a reference on the link (reference set to 1 above). 2909 * When marking as removed from bi_links (BLF_DELETED), drop the 2910 * reference on the link. When freeing the link, drop the reference on 2911 * the instance. BLF_LINK_ADDED tracks link insertion in bi_links list. 2912 */ 2913 rw_enter(&bip->bi_rwlock, RW_WRITER); 2914 list_insert_tail(&bip->bi_links, blp); 2915 blp->bl_flags |= BLF_LINK_ADDED; 2916 2917 /* 2918 * If the new link is no good on this bridge, then let the daemon know 2919 * about the problem. 2920 */ 2921 mlist = NULL; 2922 if (maxsdu != bmp->bm_maxsdu) 2923 link_sdu_fail(blp, B_TRUE, &mlist); 2924 rw_exit(&bip->bi_rwlock); 2925 send_up_messages(bip, mlist); 2926 2927 /* 2928 * Trigger a link state update so that if this link is the first one 2929 * "up" in the bridge, then we notify everyone. This triggers a trip 2930 * through bridge_ls_cb. 2931 */ 2932 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE); 2933 blp->bl_linkstate = LINK_STATE_DOWN; 2934 mac_link_update(mh, linkstate); 2935 2936 /* 2937 * We now need to report back to the stream that invoked us, and then 2938 * drop the reference on the stream that we're holding. 2939 */ 2940 miocack(bsp->bs_wq, mp, 0, 0); 2941 stream_unref(bsp); 2942 return; 2943 2944 fail: 2945 if (blp == NULL) { 2946 if (macopen) 2947 mac_close(mh); 2948 } else { 2949 link_shutdown(blp); 2950 } 2951 miocnak(bsp->bs_wq, mp, 0, err); 2952 stream_unref(bsp); 2953 } 2954 2955 static void 2956 bridge_rem_link(void *arg) 2957 { 2958 mblk_t *mp = arg; 2959 bridge_stream_t *bsp; 2960 bridge_inst_t *bip; 2961 bridge_mac_t *bmp; 2962 datalink_id_t linkid; 2963 bridge_link_t *blp, *blsave; 2964 boolean_t found; 2965 mblk_t *mlist; 2966 2967 bsp = (bridge_stream_t *)mp->b_next; 2968 mp->b_next = NULL; 2969 bip = bsp->bs_inst; 2970 /* LINTED: alignment */ 2971 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2972 2973 /* 2974 * We become reader here so that we can loop over the other links and 2975 * deliver link up/down notification. 2976 */ 2977 rw_enter(&bip->bi_rwlock, RW_READER); 2978 found = B_FALSE; 2979 for (blp = list_head(&bip->bi_links); blp != NULL; 2980 blp = list_next(&bip->bi_links, blp)) { 2981 if (blp->bl_linkid == linkid && 2982 !(blp->bl_flags & BLF_DELETED)) { 2983 blp->bl_flags |= BLF_DELETED; 2984 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 2985 blp, DDI_SLEEP); 2986 found = B_TRUE; 2987 break; 2988 } 2989 } 2990 2991 /* 2992 * Check if this link is up and the remainder of the links are all 2993 * down. 2994 */ 2995 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) { 2996 for (blp = list_head(&bip->bi_links); blp != NULL; 2997 blp = list_next(&bip->bi_links, blp)) { 2998 if (blp->bl_linkstate != LINK_STATE_DOWN && 2999 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) 3000 break; 3001 } 3002 if (blp == NULL) { 3003 for (blp = list_head(&bip->bi_links); blp != NULL; 3004 blp = list_next(&bip->bi_links, blp)) { 3005 if (!(blp->bl_flags & BLF_DELETED)) 3006 mac_link_redo(blp->bl_mh, 3007 LINK_STATE_DOWN); 3008 } 3009 bmp = bip->bi_mac; 3010 bmp->bm_linkstate = LINK_STATE_DOWN; 3011 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 3012 } 3013 } 3014 3015 /* 3016 * Check if there's just one working link left on the bridge. If so, 3017 * then that link is now authoritative for bridge MTU. 3018 */ 3019 blsave = NULL; 3020 for (blp = list_head(&bip->bi_links); blp != NULL; 3021 blp = list_next(&bip->bi_links, blp)) { 3022 if (!(blp->bl_flags & BLF_DELETED)) { 3023 if (blsave == NULL) 3024 blsave = blp; 3025 else 3026 break; 3027 } 3028 } 3029 mlist = NULL; 3030 bmp = bip->bi_mac; 3031 if (blsave != NULL && blp == NULL && 3032 blsave->bl_maxsdu != bmp->bm_maxsdu) { 3033 bmp->bm_maxsdu = blsave->bl_maxsdu; 3034 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu); 3035 link_sdu_fail(blsave, B_FALSE, &mlist); 3036 } 3037 rw_exit(&bip->bi_rwlock); 3038 send_up_messages(bip, mlist); 3039 3040 if (found) 3041 miocack(bsp->bs_wq, mp, 0, 0); 3042 else 3043 miocnak(bsp->bs_wq, mp, 0, ENOENT); 3044 stream_unref(bsp); 3045 } 3046 3047 /* 3048 * This function intentionally returns with bi_rwlock held; it is intended for 3049 * quick checks and updates. 3050 */ 3051 static bridge_link_t * 3052 enter_link(bridge_inst_t *bip, datalink_id_t linkid) 3053 { 3054 bridge_link_t *blp; 3055 3056 rw_enter(&bip->bi_rwlock, RW_READER); 3057 for (blp = list_head(&bip->bi_links); blp != NULL; 3058 blp = list_next(&bip->bi_links, blp)) { 3059 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED)) 3060 break; 3061 } 3062 return (blp); 3063 } 3064 3065 static void 3066 bridge_ioctl(queue_t *wq, mblk_t *mp) 3067 { 3068 bridge_stream_t *bsp = wq->q_ptr; 3069 bridge_inst_t *bip; 3070 struct iocblk *iop; 3071 int rc = EINVAL; 3072 int len = 0; 3073 bridge_link_t *blp; 3074 cred_t *cr; 3075 3076 /* LINTED: alignment */ 3077 iop = (struct iocblk *)mp->b_rptr; 3078 3079 /* 3080 * For now, all of the bridge ioctls are privileged. 3081 */ 3082 if ((cr = msg_getcred(mp, NULL)) == NULL) 3083 cr = iop->ioc_cr; 3084 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) { 3085 miocnak(wq, mp, 0, EPERM); 3086 return; 3087 } 3088 3089 switch (iop->ioc_cmd) { 3090 case BRIOC_NEWBRIDGE: { 3091 bridge_newbridge_t *bnb; 3092 3093 if (bsp->bs_inst != NULL || 3094 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0) 3095 break; 3096 /* LINTED: alignment */ 3097 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr; 3098 bnb->bnb_name[MAXNAMELEN-1] = '\0'; 3099 rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr); 3100 if (rc != 0) 3101 break; 3102 3103 rw_enter(&bip->bi_rwlock, RW_WRITER); 3104 if (bip->bi_control != NULL) { 3105 rw_exit(&bip->bi_rwlock); 3106 bridge_unref(bip); 3107 rc = EBUSY; 3108 } else { 3109 atomic_inc_uint(&bip->bi_refs); 3110 bsp->bs_inst = bip; /* stream holds reference */ 3111 bip->bi_control = bsp; 3112 rw_exit(&bip->bi_rwlock); 3113 rc = 0; 3114 } 3115 break; 3116 } 3117 3118 case BRIOC_ADDLINK: 3119 if ((bip = bsp->bs_inst) == NULL || 3120 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3121 break; 3122 /* 3123 * We cannot perform the action in this thread, because we're 3124 * not in process context, and we may already be holding 3125 * MAC-related locks. Place the request on taskq. 3126 */ 3127 mp->b_next = (mblk_t *)bsp; 3128 stream_ref(bsp); 3129 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp, 3130 DDI_SLEEP); 3131 return; 3132 3133 case BRIOC_REMLINK: 3134 if ((bip = bsp->bs_inst) == NULL || 3135 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3136 break; 3137 /* 3138 * We cannot perform the action in this thread, because we're 3139 * not in process context, and we may already be holding 3140 * MAC-related locks. Place the request on taskq. 3141 */ 3142 mp->b_next = (mblk_t *)bsp; 3143 stream_ref(bsp); 3144 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp, 3145 DDI_SLEEP); 3146 return; 3147 3148 case BRIOC_SETSTATE: { 3149 bridge_setstate_t *bss; 3150 3151 if ((bip = bsp->bs_inst) == NULL || 3152 (rc = miocpullup(mp, sizeof (*bss))) != 0) 3153 break; 3154 /* LINTED: alignment */ 3155 bss = (bridge_setstate_t *)mp->b_cont->b_rptr; 3156 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) { 3157 rc = ENOENT; 3158 } else { 3159 rc = 0; 3160 blp->bl_state = bss->bss_state; 3161 } 3162 rw_exit(&bip->bi_rwlock); 3163 break; 3164 } 3165 3166 case BRIOC_SETPVID: { 3167 bridge_setpvid_t *bsv; 3168 3169 if ((bip = bsp->bs_inst) == NULL || 3170 (rc = miocpullup(mp, sizeof (*bsv))) != 0) 3171 break; 3172 /* LINTED: alignment */ 3173 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr; 3174 if (bsv->bsv_vlan > VLAN_ID_MAX) 3175 break; 3176 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) { 3177 rc = ENOENT; 3178 } else if (blp->bl_pvid == bsv->bsv_vlan) { 3179 rc = 0; 3180 } else { 3181 rc = 0; 3182 BRIDGE_VLAN_CLR(blp, blp->bl_pvid); 3183 blp->bl_pvid = bsv->bsv_vlan; 3184 if (blp->bl_pvid != 0) 3185 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3186 } 3187 rw_exit(&bip->bi_rwlock); 3188 break; 3189 } 3190 3191 case BRIOC_VLANENAB: { 3192 bridge_vlanenab_t *bve; 3193 3194 if ((bip = bsp->bs_inst) == NULL || 3195 (rc = miocpullup(mp, sizeof (*bve))) != 0) 3196 break; 3197 /* LINTED: alignment */ 3198 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr; 3199 if (bve->bve_vlan > VLAN_ID_MAX) 3200 break; 3201 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) { 3202 rc = ENOENT; 3203 } else { 3204 rc = 0; 3205 /* special case: vlan 0 means "all" */ 3206 if (bve->bve_vlan == 0) { 3207 (void) memset(blp->bl_vlans, 3208 bve->bve_onoff ? ~0 : 0, 3209 sizeof (blp->bl_vlans)); 3210 BRIDGE_VLAN_CLR(blp, 0); 3211 if (blp->bl_pvid != 0) 3212 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3213 } else if (bve->bve_vlan == blp->bl_pvid) { 3214 rc = EINVAL; 3215 } else if (bve->bve_onoff) { 3216 BRIDGE_VLAN_SET(blp, bve->bve_vlan); 3217 } else { 3218 BRIDGE_VLAN_CLR(blp, bve->bve_vlan); 3219 } 3220 } 3221 rw_exit(&bip->bi_rwlock); 3222 break; 3223 } 3224 3225 case BRIOC_FLUSHFWD: { 3226 bridge_flushfwd_t *bff; 3227 bridge_fwd_t *bfp, *bfnext; 3228 avl_tree_t fwd_scavenge; 3229 int i; 3230 3231 if ((bip = bsp->bs_inst) == NULL || 3232 (rc = miocpullup(mp, sizeof (*bff))) != 0) 3233 break; 3234 /* LINTED: alignment */ 3235 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr; 3236 rw_enter(&bip->bi_rwlock, RW_WRITER); 3237 /* This case means "all" */ 3238 if (bff->bff_linkid == DATALINK_INVALID_LINKID) { 3239 blp = NULL; 3240 } else { 3241 for (blp = list_head(&bip->bi_links); blp != NULL; 3242 blp = list_next(&bip->bi_links, blp)) { 3243 if (blp->bl_linkid == bff->bff_linkid && 3244 !(blp->bl_flags & BLF_DELETED)) 3245 break; 3246 } 3247 if (blp == NULL) { 3248 rc = ENOENT; 3249 rw_exit(&bip->bi_rwlock); 3250 break; 3251 } 3252 } 3253 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 3254 offsetof(bridge_fwd_t, bf_node)); 3255 bfnext = avl_first(&bip->bi_fwd); 3256 while ((bfp = bfnext) != NULL) { 3257 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 3258 if (bfp->bf_flags & BFF_LOCALADDR) 3259 continue; 3260 if (blp != NULL) { 3261 for (i = 0; i < bfp->bf_maxlinks; i++) { 3262 if (bfp->bf_links[i] == blp) 3263 break; 3264 } 3265 /* 3266 * If the link is there and we're excluding, 3267 * then skip. If the link is not there and 3268 * we're doing only that link, then skip. 3269 */ 3270 if ((i < bfp->bf_maxlinks) == bff->bff_exclude) 3271 continue; 3272 } 3273 ASSERT(bfp->bf_flags & BFF_INTREE); 3274 avl_remove(&bip->bi_fwd, bfp); 3275 bfp->bf_flags &= ~BFF_INTREE; 3276 avl_add(&fwd_scavenge, bfp); 3277 } 3278 rw_exit(&bip->bi_rwlock); 3279 bfnext = avl_first(&fwd_scavenge); 3280 while ((bfp = bfnext) != NULL) { 3281 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 3282 avl_remove(&fwd_scavenge, bfp); 3283 fwd_unref(bfp); /* drop tree reference */ 3284 } 3285 avl_destroy(&fwd_scavenge); 3286 break; 3287 } 3288 3289 case BRIOC_TABLEMAX: 3290 if ((bip = bsp->bs_inst) == NULL || 3291 (rc = miocpullup(mp, sizeof (uint32_t))) != 0) 3292 break; 3293 /* LINTED: alignment */ 3294 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr; 3295 break; 3296 } 3297 3298 if (rc == 0) 3299 miocack(wq, mp, len, 0); 3300 else 3301 miocnak(wq, mp, 0, rc); 3302 } 3303 3304 static void 3305 bridge_wput(queue_t *wq, mblk_t *mp) 3306 { 3307 switch (DB_TYPE(mp)) { 3308 case M_IOCTL: 3309 bridge_ioctl(wq, mp); 3310 break; 3311 case M_FLUSH: 3312 if (*mp->b_rptr & FLUSHW) 3313 *mp->b_rptr &= ~FLUSHW; 3314 if (*mp->b_rptr & FLUSHR) 3315 qreply(wq, mp); 3316 else 3317 freemsg(mp); 3318 break; 3319 default: 3320 freemsg(mp); 3321 break; 3322 } 3323 } 3324 3325 /* 3326 * This function allocates the main data structures for the bridge driver and 3327 * connects us into devfs. 3328 */ 3329 static void 3330 bridge_inst_init(void) 3331 { 3332 bridge_scan_interval = 5 * drv_usectohz(1000000); 3333 bridge_fwd_age = 25 * drv_usectohz(1000000); 3334 3335 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL); 3336 list_create(&bmac_list, sizeof (bridge_mac_t), 3337 offsetof(bridge_mac_t, bm_node)); 3338 list_create(&inst_list, sizeof (bridge_inst_t), 3339 offsetof(bridge_inst_t, bi_node)); 3340 cv_init(&inst_cv, NULL, CV_DRIVER, NULL); 3341 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL); 3342 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL); 3343 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL); 3344 3345 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb, 3346 bridge_ls_cb); 3347 } 3348 3349 /* 3350 * This function disconnects from devfs and destroys all data structures in 3351 * preparation for unload. It's assumed that there are no active bridge 3352 * references left at this point. 3353 */ 3354 static void 3355 bridge_inst_fini(void) 3356 { 3357 mac_bridge_vectors(NULL, NULL, NULL, NULL); 3358 if (bridge_timerid != 0) 3359 (void) untimeout(bridge_timerid); 3360 rw_destroy(&bmac_rwlock); 3361 list_destroy(&bmac_list); 3362 list_destroy(&inst_list); 3363 cv_destroy(&inst_cv); 3364 mutex_destroy(&inst_lock); 3365 cv_destroy(&stream_ref_cv); 3366 mutex_destroy(&stream_ref_lock); 3367 } 3368 3369 /* 3370 * bridge_attach() 3371 * 3372 * Description: 3373 * Attach bridge driver to the system. 3374 */ 3375 static int 3376 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3377 { 3378 if (cmd != DDI_ATTACH) 3379 return (DDI_FAILURE); 3380 3381 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO, 3382 CLONE_DEV) == DDI_FAILURE) { 3383 return (DDI_FAILURE); 3384 } 3385 3386 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list, 3387 DLDIOCCNT(bridge_ioc_list)) != 0) { 3388 ddi_remove_minor_node(dip, BRIDGE_CTL); 3389 return (DDI_FAILURE); 3390 } 3391 3392 bridge_dev_info = dip; 3393 bridge_major = ddi_driver_major(dip); 3394 bridge_taskq = ddi_taskq_create(dip, BRIDGE_DEV_NAME, 1, 3395 TASKQ_DEFAULTPRI, 0); 3396 return (DDI_SUCCESS); 3397 } 3398 3399 /* 3400 * bridge_detach() 3401 * 3402 * Description: 3403 * Detach an interface to the system. 3404 */ 3405 static int 3406 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3407 { 3408 if (cmd != DDI_DETACH) 3409 return (DDI_FAILURE); 3410 3411 ddi_remove_minor_node(dip, NULL); 3412 ddi_taskq_destroy(bridge_taskq); 3413 bridge_dev_info = NULL; 3414 return (DDI_SUCCESS); 3415 } 3416 3417 /* 3418 * bridge_info() 3419 * 3420 * Description: 3421 * Translate "dev_t" to a pointer to the associated "dev_info_t". 3422 */ 3423 /* ARGSUSED */ 3424 static int 3425 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 3426 void **result) 3427 { 3428 int rc; 3429 3430 switch (infocmd) { 3431 case DDI_INFO_DEVT2DEVINFO: 3432 if (bridge_dev_info == NULL) { 3433 rc = DDI_FAILURE; 3434 } else { 3435 *result = (void *)bridge_dev_info; 3436 rc = DDI_SUCCESS; 3437 } 3438 break; 3439 case DDI_INFO_DEVT2INSTANCE: 3440 *result = NULL; 3441 rc = DDI_SUCCESS; 3442 break; 3443 default: 3444 rc = DDI_FAILURE; 3445 break; 3446 } 3447 return (rc); 3448 } 3449 3450 static struct module_info bridge_modinfo = { 3451 2105, /* mi_idnum */ 3452 BRIDGE_DEV_NAME, /* mi_idname */ 3453 0, /* mi_minpsz */ 3454 16384, /* mi_maxpsz */ 3455 65536, /* mi_hiwat */ 3456 128 /* mi_lowat */ 3457 }; 3458 3459 static struct qinit bridge_rinit = { 3460 NULL, /* qi_putp */ 3461 NULL, /* qi_srvp */ 3462 bridge_open, /* qi_qopen */ 3463 bridge_close, /* qi_qclose */ 3464 NULL, /* qi_qadmin */ 3465 &bridge_modinfo, /* qi_minfo */ 3466 NULL /* qi_mstat */ 3467 }; 3468 3469 static struct qinit bridge_winit = { 3470 (int (*)())bridge_wput, /* qi_putp */ 3471 NULL, /* qi_srvp */ 3472 NULL, /* qi_qopen */ 3473 NULL, /* qi_qclose */ 3474 NULL, /* qi_qadmin */ 3475 &bridge_modinfo, /* qi_minfo */ 3476 NULL /* qi_mstat */ 3477 }; 3478 3479 static struct streamtab bridge_tab = { 3480 &bridge_rinit, /* st_rdinit */ 3481 &bridge_winit /* st_wrinit */ 3482 }; 3483 3484 /* No STREAMS perimeters; we do all our own locking */ 3485 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach, 3486 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab, 3487 ddi_quiesce_not_supported); 3488 3489 static struct modldrv modldrv = { 3490 &mod_driverops, 3491 "bridging driver", 3492 &bridge_ops 3493 }; 3494 3495 static struct modlinkage modlinkage = { 3496 MODREV_1, 3497 (void *)&modldrv, 3498 NULL 3499 }; 3500 3501 int 3502 _init(void) 3503 { 3504 int retv; 3505 3506 mac_init_ops(NULL, BRIDGE_DEV_NAME); 3507 bridge_inst_init(); 3508 if ((retv = mod_install(&modlinkage)) != 0) 3509 bridge_inst_fini(); 3510 return (retv); 3511 } 3512 3513 int 3514 _fini(void) 3515 { 3516 int retv; 3517 3518 rw_enter(&bmac_rwlock, RW_READER); 3519 retv = list_is_empty(&bmac_list) ? 0 : EBUSY; 3520 rw_exit(&bmac_rwlock); 3521 if (retv == 0 && 3522 (retv = mod_remove(&modlinkage)) == 0) 3523 bridge_inst_fini(); 3524 return (retv); 3525 } 3526 3527 int 3528 _info(struct modinfo *modinfop) 3529 { 3530 return (mod_info(&modlinkage, modinfop)); 3531 } 3532