1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * This module implements a STREAMS driver that provides layer-two (Ethernet) 29 * bridging functionality. The STREAMS interface is used to provide 30 * observability (snoop/wireshark) and control, but not for interface plumbing. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/bitmap.h> 35 #include <sys/cmn_err.h> 36 #include <sys/conf.h> 37 #include <sys/ddi.h> 38 #include <sys/errno.h> 39 #include <sys/kstat.h> 40 #include <sys/modctl.h> 41 #include <sys/note.h> 42 #include <sys/param.h> 43 #include <sys/policy.h> 44 #include <sys/sdt.h> 45 #include <sys/stat.h> 46 #include <sys/stream.h> 47 #include <sys/stropts.h> 48 #include <sys/strsun.h> 49 #include <sys/sunddi.h> 50 #include <sys/sysmacros.h> 51 #include <sys/systm.h> 52 #include <sys/time.h> 53 #include <sys/dlpi.h> 54 #include <sys/dls.h> 55 #include <sys/mac_ether.h> 56 #include <sys/mac_provider.h> 57 #include <sys/mac_client_priv.h> 58 #include <sys/mac_impl.h> 59 #include <sys/vlan.h> 60 #include <net/bridge.h> 61 #include <net/bridge_impl.h> 62 #include <net/trill.h> 63 #include <sys/dld_ioc.h> 64 65 /* 66 * Locks and reference counts: object lifetime and design. 67 * 68 * bridge_mac_t 69 * Bridge mac (snoop) instances are in bmac_list, which is protected by 70 * bmac_rwlock. They're allocated by bmac_alloc and freed by bridge_timer(). 71 * Every bridge_inst_t has a single bridge_mac_t, but when bridge_inst_t goes 72 * away, the bridge_mac_t remains until either all of the users go away 73 * (detected by a timer) or until the instance is picked up again by the same 74 * bridge starting back up. 75 * 76 * bridge_inst_t 77 * Bridge instances are in inst_list, which is protected by inst_lock. 78 * They're allocated by inst_alloc() and freed by inst_free(). After 79 * allocation, an instance is placed in inst_list, and the reference count is 80 * incremented to represent this. That reference is decremented when the 81 * BIF_SHUTDOWN flag is set, and no new increments may occur. When the last 82 * reference is freed, the instance is removed from the list. 83 * 84 * Bridge instances have lists of links and an AVL tree of forwarding 85 * entries. Each of these structures holds one reference on the bridge 86 * instance. These lists and tree are protected by bi_rwlock. 87 * 88 * bridge_stream_t 89 * Bridge streams are allocated by stream_alloc() and freed by stream_free(). 90 * These streams are created when "bridged" opens /dev/bridgectl, and are 91 * used to create new bridge instances (via BRIOC_NEWBRIDGE) and control the 92 * links on the bridge. When a stream closes, the bridge instance created is 93 * destroyed. There's at most one bridge instance for a given control 94 * stream. 95 * 96 * bridge_link_t 97 * Links are allocated by bridge_add_link() and freed by link_free(). The 98 * bi_links list holds a reference to the link. When the BLF_DELETED flag is 99 * set, that reference is dropped. The link isn't removed from the list 100 * until the last reference drops. Each forwarding entry that uses a given 101 * link holds a reference, as does each thread transmitting a packet via the 102 * link. The MAC layer calls in via bridge_ref_cb() to hold a reference on 103 * a link when transmitting. 104 * 105 * It's important that once BLF_DELETED is set, there's no way for the 106 * reference count to increase again. If it can, then the link may be 107 * double-freed. The BLF_FREED flag is intended for use with assertions to 108 * guard against this in testing. 109 * 110 * bridge_fwd_t 111 * Bridge forwarding entries are allocated by bridge_recv_cb() and freed by 112 * fwd_free(). The bi_fwd AVL tree holds one reference to the entry. Unlike 113 * other data structures, the reference is dropped when the entry is removed 114 * from the tree by fwd_delete(), and the BFF_INTREE flag is removed. Each 115 * thread that's forwarding a packet to a known destination holds a reference 116 * to a forwarding entry. 117 * 118 * TRILL notes: 119 * 120 * The TRILL module does all of its I/O through bridging. It uses references 121 * on the bridge_inst_t and bridge_link_t structures, and has seven entry 122 * points and four callbacks. One entry point is for setting the callbacks 123 * (bridge_trill_register_cb). There are four entry points for taking bridge 124 * and link references (bridge_trill_{br,ln}{ref,unref}). The final two 125 * entry points are for decapsulated packets from TRILL (bridge_trill_decaps) 126 * that need to be bridged locally, and for TRILL-encapsulated output packets 127 * (bridge_trill_output). 128 * 129 * The four callbacks comprise two notification functions for bridges and 130 * links being deleted, one function for raw received TRILL packets, and one 131 * for bridge output to non-local TRILL destinations (tunnel entry). 132 */ 133 134 /* 135 * Ethernet reserved multicast addresses for TRILL; used also in TRILL module. 136 */ 137 const uint8_t all_isis_rbridges[] = ALL_ISIS_RBRIDGES; 138 static const uint8_t all_esadi_rbridges[] = ALL_ESADI_RBRIDGES; 139 const uint8_t bridge_group_address[] = BRIDGE_GROUP_ADDRESS; 140 141 static const char *inst_kstats_list[] = { KSINST_NAMES }; 142 static const char *link_kstats_list[] = { KSLINK_NAMES }; 143 144 #define KREF(p, m, vn) p->m.vn.value.ui64 145 #define KINCR(p, m, vn) ++KREF(p, m, vn) 146 #define KDECR(p, m, vn) --KREF(p, m, vn) 147 148 #define KIPINCR(p, vn) KINCR(p, bi_kstats, vn) 149 #define KIPDECR(p, vn) KDECR(p, bi_kstats, vn) 150 #define KLPINCR(p, vn) KINCR(p, bl_kstats, vn) 151 152 #define KIINCR(vn) KIPINCR(bip, vn) 153 #define KIDECR(vn) KIPDECR(bip, vn) 154 #define KLINCR(vn) KLPINCR(blp, vn) 155 156 #define Dim(x) (sizeof (x) / sizeof (*(x))) 157 158 /* Amount of overhead added when encapsulating with VLAN headers */ 159 #define VLAN_INCR (sizeof (struct ether_vlan_header) - \ 160 sizeof (struct ether_header)) 161 162 static dev_info_t *bridge_dev_info; 163 static major_t bridge_major; 164 static ddi_taskq_t *bridge_taskq; 165 166 /* 167 * These are the bridge instance management data structures. The mutex lock 168 * protects the list of bridge instances. A reference count is then used on 169 * each instance to determine when to free it. We use mac_minor_hold() to 170 * allocate minor_t values, which are used both for self-cloning /dev/net/ 171 * device nodes as well as client streams. Minor node 0 is reserved for the 172 * allocation control node. 173 */ 174 static list_t inst_list; 175 static kcondvar_t inst_cv; /* Allows us to wait for shutdown */ 176 static kmutex_t inst_lock; 177 178 static krwlock_t bmac_rwlock; 179 static list_t bmac_list; 180 181 /* Wait for taskq entries that use STREAMS */ 182 static kcondvar_t stream_ref_cv; 183 static kmutex_t stream_ref_lock; 184 185 static timeout_id_t bridge_timerid; 186 static clock_t bridge_scan_interval; 187 static clock_t bridge_fwd_age; 188 189 static bridge_inst_t *bridge_find_name(const char *); 190 static void bridge_timer(void *); 191 static void bridge_unref(bridge_inst_t *); 192 193 static const uint8_t zero_addr[ETHERADDRL] = { 0 }; 194 195 /* Global TRILL linkage */ 196 static trill_recv_pkt_t trill_recv_fn; 197 static trill_encap_pkt_t trill_encap_fn; 198 static trill_br_dstr_t trill_brdstr_fn; 199 static trill_ln_dstr_t trill_lndstr_fn; 200 201 /* special settings to accommodate DLD flow control; see dld_str.c */ 202 static struct module_info bridge_dld_modinfo = { 203 0, /* mi_idnum */ 204 BRIDGE_DEV_NAME, /* mi_idname */ 205 0, /* mi_minpsz */ 206 INFPSZ, /* mi_maxpsz */ 207 1, /* mi_hiwat */ 208 0 /* mi_lowat */ 209 }; 210 211 static struct qinit bridge_dld_rinit = { 212 NULL, /* qi_putp */ 213 NULL, /* qi_srvp */ 214 dld_open, /* qi_qopen */ 215 dld_close, /* qi_qclose */ 216 NULL, /* qi_qadmin */ 217 &bridge_dld_modinfo, /* qi_minfo */ 218 NULL /* qi_mstat */ 219 }; 220 221 static struct qinit bridge_dld_winit = { 222 (int (*)())dld_wput, /* qi_putp */ 223 (int (*)())dld_wsrv, /* qi_srvp */ 224 NULL, /* qi_qopen */ 225 NULL, /* qi_qclose */ 226 NULL, /* qi_qadmin */ 227 &bridge_dld_modinfo, /* qi_minfo */ 228 NULL /* qi_mstat */ 229 }; 230 231 static int bridge_ioc_listfwd(void *, intptr_t, int, cred_t *, int *); 232 233 /* GLDv3 control ioctls used by Bridging */ 234 static dld_ioc_info_t bridge_ioc_list[] = { 235 {BRIDGE_IOC_LISTFWD, DLDCOPYINOUT, sizeof (bridge_listfwd_t), 236 bridge_ioc_listfwd, NULL}, 237 }; 238 239 /* 240 * Given a bridge mac pointer, get a ref-held pointer to the corresponding 241 * bridge instance, if any. We must hold the global bmac_rwlock so that 242 * bm_inst doesn't slide out from under us. 243 */ 244 static bridge_inst_t * 245 mac_to_inst(const bridge_mac_t *bmp) 246 { 247 bridge_inst_t *bip; 248 249 rw_enter(&bmac_rwlock, RW_READER); 250 if ((bip = bmp->bm_inst) != NULL) 251 atomic_inc_uint(&bip->bi_refs); 252 rw_exit(&bmac_rwlock); 253 return (bip); 254 } 255 256 static void 257 link_sdu_fail(bridge_link_t *blp, boolean_t failed, mblk_t **mlist) 258 { 259 mblk_t *mp; 260 bridge_ctl_t *bcp; 261 bridge_link_t *blcmp; 262 bridge_inst_t *bip; 263 bridge_mac_t *bmp; 264 265 if (failed) { 266 if (blp->bl_flags & BLF_SDUFAIL) 267 return; 268 blp->bl_flags |= BLF_SDUFAIL; 269 } else { 270 if (!(blp->bl_flags & BLF_SDUFAIL)) 271 return; 272 blp->bl_flags &= ~BLF_SDUFAIL; 273 } 274 275 /* 276 * If this link is otherwise up, then check if there are any other 277 * non-failed non-down links. If not, then we control the state of the 278 * whole bridge. 279 */ 280 bip = blp->bl_inst; 281 bmp = bip->bi_mac; 282 if (blp->bl_linkstate != LINK_STATE_DOWN) { 283 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 284 blcmp = list_next(&bip->bi_links, blcmp)) { 285 if (blp != blcmp && 286 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 287 blcmp->bl_linkstate != LINK_STATE_DOWN) 288 break; 289 } 290 if (blcmp == NULL) { 291 bmp->bm_linkstate = failed ? LINK_STATE_DOWN : 292 LINK_STATE_UP; 293 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 294 } 295 } 296 297 /* 298 * If we're becoming failed, then the link's current true state needs 299 * to be reflected upwards to this link's clients. If we're becoming 300 * unfailed, then we get the state of the bridge instead on all 301 * clients. 302 */ 303 if (failed) { 304 if (bmp->bm_linkstate != blp->bl_linkstate) 305 mac_link_redo(blp->bl_mh, blp->bl_linkstate); 306 } else { 307 mac_link_redo(blp->bl_mh, bmp->bm_linkstate); 308 } 309 310 /* get the current mblk we're going to send up */ 311 if ((mp = blp->bl_lfailmp) == NULL && 312 (mp = allocb(sizeof (bridge_ctl_t), BPRI_MED)) == NULL) 313 return; 314 315 /* get a new one for next time */ 316 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 317 318 /* if none for next time, then report only failures */ 319 if (blp->bl_lfailmp == NULL && !failed) { 320 blp->bl_lfailmp = mp; 321 return; 322 } 323 324 /* LINTED: alignment */ 325 bcp = (bridge_ctl_t *)mp->b_rptr; 326 bcp->bc_linkid = blp->bl_linkid; 327 bcp->bc_failed = failed; 328 mp->b_wptr = (uchar_t *)(bcp + 1); 329 mp->b_next = *mlist; 330 *mlist = mp; 331 } 332 333 /* 334 * Send control messages (link SDU changes) using the stream to the 335 * bridge instance daemon. 336 */ 337 static void 338 send_up_messages(bridge_inst_t *bip, mblk_t *mp) 339 { 340 mblk_t *mnext; 341 queue_t *rq; 342 343 rq = bip->bi_control->bs_wq; 344 rq = OTHERQ(rq); 345 while (mp != NULL) { 346 mnext = mp->b_next; 347 mp->b_next = NULL; 348 putnext(rq, mp); 349 mp = mnext; 350 } 351 } 352 353 /* ARGSUSED */ 354 static int 355 bridge_m_getstat(void *arg, uint_t stat, uint64_t *val) 356 { 357 return (ENOTSUP); 358 } 359 360 static int 361 bridge_m_start(void *arg) 362 { 363 bridge_mac_t *bmp = arg; 364 365 bmp->bm_flags |= BMF_STARTED; 366 return (0); 367 } 368 369 static void 370 bridge_m_stop(void *arg) 371 { 372 bridge_mac_t *bmp = arg; 373 374 bmp->bm_flags &= ~BMF_STARTED; 375 } 376 377 /* ARGSUSED */ 378 static int 379 bridge_m_setpromisc(void *arg, boolean_t on) 380 { 381 return (0); 382 } 383 384 /* ARGSUSED */ 385 static int 386 bridge_m_multicst(void *arg, boolean_t add, const uint8_t *mca) 387 { 388 return (0); 389 } 390 391 /* ARGSUSED */ 392 static int 393 bridge_m_unicst(void *arg, const uint8_t *macaddr) 394 { 395 return (ENOTSUP); 396 } 397 398 static mblk_t * 399 bridge_m_tx(void *arg, mblk_t *mp) 400 { 401 _NOTE(ARGUNUSED(arg)); 402 freemsgchain(mp); 403 return (NULL); 404 } 405 406 /* ARGSUSED */ 407 static int 408 bridge_ioc_listfwd(void *karg, intptr_t arg, int mode, cred_t *cred, int *rvalp) 409 { 410 bridge_listfwd_t *blf = karg; 411 bridge_inst_t *bip; 412 bridge_fwd_t *bfp, match; 413 avl_index_t where; 414 415 bip = bridge_find_name(blf->blf_name); 416 if (bip == NULL) 417 return (ENOENT); 418 419 bcopy(blf->blf_dest, match.bf_dest, ETHERADDRL); 420 match.bf_flags |= BFF_VLANLOCAL; 421 rw_enter(&bip->bi_rwlock, RW_READER); 422 if ((bfp = avl_find(&bip->bi_fwd, &match, &where)) == NULL) 423 bfp = avl_nearest(&bip->bi_fwd, where, AVL_AFTER); 424 else 425 bfp = AVL_NEXT(&bip->bi_fwd, bfp); 426 if (bfp == NULL) { 427 bzero(blf, sizeof (*blf)); 428 } else { 429 bcopy(bfp->bf_dest, blf->blf_dest, ETHERADDRL); 430 blf->blf_trill_nick = bfp->bf_trill_nick; 431 blf->blf_ms_age = 432 drv_hztousec(ddi_get_lbolt() - bfp->bf_lastheard) / 1000; 433 blf->blf_is_local = 434 (bfp->bf_flags & BFF_LOCALADDR) != 0; 435 blf->blf_linkid = bfp->bf_links[0]->bl_linkid; 436 } 437 rw_exit(&bip->bi_rwlock); 438 bridge_unref(bip); 439 return (0); 440 } 441 442 static int 443 bridge_m_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 444 uint_t pr_valsize, const void *pr_val) 445 { 446 bridge_mac_t *bmp = arg; 447 bridge_inst_t *bip; 448 bridge_link_t *blp; 449 int err; 450 uint_t maxsdu; 451 mblk_t *mlist; 452 453 _NOTE(ARGUNUSED(pr_name)); 454 switch (pr_num) { 455 case MAC_PROP_MTU: 456 if (pr_valsize < sizeof (bmp->bm_maxsdu)) { 457 err = EINVAL; 458 break; 459 } 460 (void) bcopy(pr_val, &maxsdu, sizeof (maxsdu)); 461 if (maxsdu == bmp->bm_maxsdu) { 462 err = 0; 463 } else if ((bip = mac_to_inst(bmp)) == NULL) { 464 err = ENXIO; 465 } else { 466 rw_enter(&bip->bi_rwlock, RW_WRITER); 467 mlist = NULL; 468 for (blp = list_head(&bip->bi_links); blp != NULL; 469 blp = list_next(&bip->bi_links, blp)) { 470 if (blp->bl_flags & BLF_DELETED) 471 continue; 472 if (blp->bl_maxsdu == maxsdu) 473 link_sdu_fail(blp, B_FALSE, &mlist); 474 else if (blp->bl_maxsdu == bmp->bm_maxsdu) 475 link_sdu_fail(blp, B_TRUE, &mlist); 476 } 477 rw_exit(&bip->bi_rwlock); 478 bmp->bm_maxsdu = maxsdu; 479 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 480 send_up_messages(bip, mlist); 481 bridge_unref(bip); 482 err = 0; 483 } 484 break; 485 486 default: 487 err = ENOTSUP; 488 break; 489 } 490 return (err); 491 } 492 493 static int 494 bridge_m_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num, 495 uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm) 496 { 497 bridge_mac_t *bmp = arg; 498 int err = 0; 499 500 _NOTE(ARGUNUSED(pr_name)); 501 switch (pr_num) { 502 case MAC_PROP_MTU: { 503 mac_propval_range_t range; 504 505 if (!(pr_flags & MAC_PROP_POSSIBLE)) 506 return (ENOTSUP); 507 if (pr_valsize < sizeof (mac_propval_range_t)) 508 return (EINVAL); 509 range.mpr_count = 1; 510 range.mpr_type = MAC_PROPVAL_UINT32; 511 range.range_uint32[0].mpur_min = 512 range.range_uint32[0].mpur_max = bmp->bm_maxsdu; 513 bcopy(&range, pr_val, sizeof (range)); 514 *perm = MAC_PROP_PERM_RW; 515 break; 516 } 517 case MAC_PROP_STATUS: 518 if (pr_valsize < sizeof (bmp->bm_linkstate)) { 519 err = EINVAL; 520 } else { 521 bcopy(&bmp->bm_linkstate, pr_val, 522 sizeof (&bmp->bm_linkstate)); 523 *perm = MAC_PROP_PERM_READ; 524 } 525 break; 526 527 default: 528 err = ENOTSUP; 529 break; 530 } 531 return (err); 532 } 533 534 static mac_callbacks_t bridge_m_callbacks = { 535 MC_SETPROP | MC_GETPROP, 536 bridge_m_getstat, 537 bridge_m_start, 538 bridge_m_stop, 539 bridge_m_setpromisc, 540 bridge_m_multicst, 541 bridge_m_unicst, 542 bridge_m_tx, 543 NULL, /* ioctl */ 544 NULL, /* getcapab */ 545 NULL, /* open */ 546 NULL, /* close */ 547 bridge_m_setprop, 548 bridge_m_getprop 549 }; 550 551 /* 552 * Create kstats from a list. 553 */ 554 static kstat_t * 555 kstat_setup(kstat_named_t *knt, const char **names, int nstat, 556 const char *unitname) 557 { 558 kstat_t *ksp; 559 int i; 560 561 for (i = 0; i < nstat; i++) 562 kstat_named_init(&knt[i], names[i], KSTAT_DATA_UINT64); 563 564 ksp = kstat_create_zone(BRIDGE_DEV_NAME, 0, unitname, "net", 565 KSTAT_TYPE_NAMED, nstat, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 566 if (ksp != NULL) { 567 ksp->ks_data = knt; 568 kstat_install(ksp); 569 } 570 return (ksp); 571 } 572 573 /* 574 * Find an existing bridge_mac_t structure or allocate a new one for the given 575 * bridge instance. This creates the mac driver instance that snoop can use. 576 */ 577 static int 578 bmac_alloc(bridge_inst_t *bip, bridge_mac_t **bmacp) 579 { 580 bridge_mac_t *bmp, *bnew; 581 mac_register_t *mac; 582 int err; 583 584 *bmacp = NULL; 585 if ((mac = mac_alloc(MAC_VERSION)) == NULL) 586 return (EINVAL); 587 588 bnew = kmem_zalloc(sizeof (*bnew), KM_SLEEP); 589 590 rw_enter(&bmac_rwlock, RW_WRITER); 591 for (bmp = list_head(&bmac_list); bmp != NULL; 592 bmp = list_next(&bmac_list, bmp)) { 593 if (strcmp(bip->bi_name, bmp->bm_name) == 0) { 594 ASSERT(bmp->bm_inst == NULL); 595 bmp->bm_inst = bip; 596 rw_exit(&bmac_rwlock); 597 kmem_free(bnew, sizeof (*bnew)); 598 mac_free(mac); 599 *bmacp = bmp; 600 return (0); 601 } 602 } 603 604 mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 605 mac->m_driver = bnew; 606 mac->m_dip = bridge_dev_info; 607 mac->m_instance = (uint_t)-1; 608 mac->m_src_addr = (uint8_t *)zero_addr; 609 mac->m_callbacks = &bridge_m_callbacks; 610 611 /* 612 * Note that the SDU limits are irrelevant, as nobody transmits on the 613 * bridge node itself. It's mainly for monitoring but we allow 614 * setting the bridge MTU for quick transition of all links part of the 615 * bridge to a new MTU. 616 */ 617 mac->m_min_sdu = 1; 618 mac->m_max_sdu = 1500; 619 err = mac_register(mac, &bnew->bm_mh); 620 mac_free(mac); 621 if (err != 0) { 622 rw_exit(&bmac_rwlock); 623 kmem_free(bnew, sizeof (*bnew)); 624 return (err); 625 } 626 627 bnew->bm_inst = bip; 628 (void) strcpy(bnew->bm_name, bip->bi_name); 629 if (list_is_empty(&bmac_list)) { 630 bridge_timerid = timeout(bridge_timer, NULL, 631 bridge_scan_interval); 632 } 633 list_insert_tail(&bmac_list, bnew); 634 rw_exit(&bmac_rwlock); 635 636 /* 637 * Mark the MAC as unable to go "active" so that only passive clients 638 * (such as snoop) can bind to it. 639 */ 640 mac_no_active(bnew->bm_mh); 641 *bmacp = bnew; 642 return (0); 643 } 644 645 /* 646 * Disconnect the given bridge_mac_t from its bridge instance. The bridge 647 * instance is going away. The mac instance can't go away until the clients 648 * are gone (see bridge_timer). 649 */ 650 static void 651 bmac_disconnect(bridge_mac_t *bmp) 652 { 653 bridge_inst_t *bip; 654 655 bmp->bm_linkstate = LINK_STATE_DOWN; 656 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 657 658 rw_enter(&bmac_rwlock, RW_READER); 659 bip = bmp->bm_inst; 660 bip->bi_mac = NULL; 661 bmp->bm_inst = NULL; 662 rw_exit(&bmac_rwlock); 663 } 664 665 /* This is used by the avl trees to sort forwarding table entries */ 666 static int 667 fwd_compare(const void *addr1, const void *addr2) 668 { 669 const bridge_fwd_t *fwd1 = addr1; 670 const bridge_fwd_t *fwd2 = addr2; 671 int diff = memcmp(fwd1->bf_dest, fwd2->bf_dest, ETHERADDRL); 672 673 if (diff != 0) 674 return (diff > 0 ? 1 : -1); 675 676 if ((fwd1->bf_flags ^ fwd2->bf_flags) & BFF_VLANLOCAL) { 677 if (fwd1->bf_vlanid > fwd2->bf_vlanid) 678 return (1); 679 else if (fwd1->bf_vlanid < fwd2->bf_vlanid) 680 return (-1); 681 } 682 return (0); 683 } 684 685 static void 686 inst_free(bridge_inst_t *bip) 687 { 688 ASSERT(bip->bi_mac == NULL); 689 rw_destroy(&bip->bi_rwlock); 690 list_destroy(&bip->bi_links); 691 cv_destroy(&bip->bi_linkwait); 692 avl_destroy(&bip->bi_fwd); 693 if (bip->bi_ksp != NULL) 694 kstat_delete(bip->bi_ksp); 695 kmem_free(bip, sizeof (*bip)); 696 } 697 698 static bridge_inst_t * 699 inst_alloc(const char *bridge) 700 { 701 bridge_inst_t *bip; 702 703 bip = kmem_zalloc(sizeof (*bip), KM_SLEEP); 704 bip->bi_refs = 1; 705 (void) strcpy(bip->bi_name, bridge); 706 rw_init(&bip->bi_rwlock, NULL, RW_DRIVER, NULL); 707 list_create(&bip->bi_links, sizeof (bridge_link_t), 708 offsetof(bridge_link_t, bl_node)); 709 cv_init(&bip->bi_linkwait, NULL, CV_DRIVER, NULL); 710 avl_create(&bip->bi_fwd, fwd_compare, sizeof (bridge_fwd_t), 711 offsetof(bridge_fwd_t, bf_node)); 712 return (bip); 713 } 714 715 static bridge_inst_t * 716 bridge_find_name(const char *bridge) 717 { 718 bridge_inst_t *bip; 719 720 mutex_enter(&inst_lock); 721 for (bip = list_head(&inst_list); bip != NULL; 722 bip = list_next(&inst_list, bip)) { 723 if (!(bip->bi_flags & BIF_SHUTDOWN) && 724 strcmp(bridge, bip->bi_name) == 0) { 725 atomic_inc_uint(&bip->bi_refs); 726 break; 727 } 728 } 729 mutex_exit(&inst_lock); 730 731 return (bip); 732 } 733 734 static int 735 bridge_create(datalink_id_t linkid, const char *bridge, bridge_inst_t **bipc, 736 cred_t *cred) 737 { 738 bridge_inst_t *bip, *bipnew; 739 bridge_mac_t *bmp = NULL; 740 int err; 741 742 *bipc = NULL; 743 bipnew = inst_alloc(bridge); 744 745 mutex_enter(&inst_lock); 746 lookup_retry: 747 for (bip = list_head(&inst_list); bip != NULL; 748 bip = list_next(&inst_list, bip)) { 749 if (strcmp(bridge, bip->bi_name) == 0) 750 break; 751 } 752 753 /* This should not take long; if it does, we've got a design problem */ 754 if (bip != NULL && (bip->bi_flags & BIF_SHUTDOWN)) { 755 cv_wait(&inst_cv, &inst_lock); 756 goto lookup_retry; 757 } 758 759 if (bip == NULL) { 760 bip = bipnew; 761 bipnew = NULL; 762 list_insert_tail(&inst_list, bip); 763 } 764 765 mutex_exit(&inst_lock); 766 if (bipnew != NULL) { 767 inst_free(bipnew); 768 return (EEXIST); 769 } 770 771 bip->bi_ksp = kstat_setup((kstat_named_t *)&bip->bi_kstats, 772 inst_kstats_list, Dim(inst_kstats_list), bip->bi_name); 773 774 err = bmac_alloc(bip, &bmp); 775 if ((bip->bi_mac = bmp) == NULL) 776 goto fail_create; 777 778 /* 779 * bm_inst is set, so the timer cannot yank the DLS rug from under us. 780 * No extra locking is needed here. 781 */ 782 if (!(bmp->bm_flags & BMF_DLS)) { 783 err = dls_devnet_create(bmp->bm_mh, linkid, crgetzoneid(cred)); 784 if (err != 0) 785 goto fail_create; 786 bmp->bm_flags |= BMF_DLS; 787 } 788 789 bip->bi_dev = makedevice(bridge_major, mac_minor(bmp->bm_mh)); 790 *bipc = bip; 791 return (0); 792 793 fail_create: 794 ASSERT(bip->bi_trilldata == NULL); 795 bip->bi_flags |= BIF_SHUTDOWN; 796 bridge_unref(bip); 797 return (err); 798 } 799 800 static void 801 bridge_unref(bridge_inst_t *bip) 802 { 803 if (atomic_dec_uint_nv(&bip->bi_refs) == 0) { 804 ASSERT(bip->bi_flags & BIF_SHUTDOWN); 805 /* free up mac for reuse before leaving global list */ 806 if (bip->bi_mac != NULL) 807 bmac_disconnect(bip->bi_mac); 808 mutex_enter(&inst_lock); 809 list_remove(&inst_list, bip); 810 cv_broadcast(&inst_cv); 811 mutex_exit(&inst_lock); 812 inst_free(bip); 813 } 814 } 815 816 /* 817 * Stream instances are used only for allocating bridges and serving as a 818 * control node. They serve no data-handling function. 819 */ 820 static bridge_stream_t * 821 stream_alloc(void) 822 { 823 bridge_stream_t *bsp; 824 minor_t mn; 825 826 if ((mn = mac_minor_hold(B_FALSE)) == 0) 827 return (NULL); 828 bsp = kmem_zalloc(sizeof (*bsp), KM_SLEEP); 829 bsp->bs_minor = mn; 830 return (bsp); 831 } 832 833 static void 834 stream_free(bridge_stream_t *bsp) 835 { 836 mac_minor_rele(bsp->bs_minor); 837 kmem_free(bsp, sizeof (*bsp)); 838 } 839 840 /* Reference hold/release functions for STREAMS-related taskq */ 841 static void 842 stream_ref(bridge_stream_t *bsp) 843 { 844 mutex_enter(&stream_ref_lock); 845 bsp->bs_taskq_cnt++; 846 mutex_exit(&stream_ref_lock); 847 } 848 849 static void 850 stream_unref(bridge_stream_t *bsp) 851 { 852 mutex_enter(&stream_ref_lock); 853 if (--bsp->bs_taskq_cnt == 0) 854 cv_broadcast(&stream_ref_cv); 855 mutex_exit(&stream_ref_lock); 856 } 857 858 static void 859 link_free(bridge_link_t *blp) 860 { 861 bridge_inst_t *bip = blp->bl_inst; 862 863 ASSERT(!(blp->bl_flags & BLF_FREED)); 864 blp->bl_flags |= BLF_FREED; 865 if (blp->bl_ksp != NULL) 866 kstat_delete(blp->bl_ksp); 867 if (blp->bl_lfailmp != NULL) 868 freeb(blp->bl_lfailmp); 869 cv_destroy(&blp->bl_trillwait); 870 mutex_destroy(&blp->bl_trilllock); 871 kmem_free(blp, sizeof (*blp)); 872 /* Don't unreference the bridge until the MAC is closed */ 873 bridge_unref(bip); 874 } 875 876 static void 877 link_unref(bridge_link_t *blp) 878 { 879 if (atomic_dec_uint_nv(&blp->bl_refs) == 0) { 880 bridge_inst_t *bip = blp->bl_inst; 881 882 ASSERT(blp->bl_flags & BLF_DELETED); 883 rw_enter(&bip->bi_rwlock, RW_WRITER); 884 if (blp->bl_flags & BLF_LINK_ADDED) 885 list_remove(&bip->bi_links, blp); 886 rw_exit(&bip->bi_rwlock); 887 if (bip->bi_trilldata != NULL && list_is_empty(&bip->bi_links)) 888 cv_broadcast(&bip->bi_linkwait); 889 link_free(blp); 890 } 891 } 892 893 static bridge_fwd_t * 894 fwd_alloc(const uint8_t *addr, uint_t nlinks, uint16_t nick) 895 { 896 bridge_fwd_t *bfp; 897 898 bfp = kmem_zalloc(sizeof (*bfp) + (nlinks * sizeof (bridge_link_t *)), 899 KM_NOSLEEP); 900 if (bfp != NULL) { 901 bcopy(addr, bfp->bf_dest, ETHERADDRL); 902 bfp->bf_lastheard = ddi_get_lbolt(); 903 bfp->bf_maxlinks = nlinks; 904 bfp->bf_links = (bridge_link_t **)(bfp + 1); 905 bfp->bf_trill_nick = nick; 906 } 907 return (bfp); 908 } 909 910 static bridge_fwd_t * 911 fwd_find(bridge_inst_t *bip, const uint8_t *addr, uint16_t vlanid) 912 { 913 bridge_fwd_t *bfp, *vbfp; 914 bridge_fwd_t match; 915 916 bcopy(addr, match.bf_dest, ETHERADDRL); 917 match.bf_flags = 0; 918 rw_enter(&bip->bi_rwlock, RW_READER); 919 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 920 if (bfp->bf_vlanid != vlanid && bfp->bf_vcnt > 0) { 921 match.bf_vlanid = vlanid; 922 match.bf_flags = BFF_VLANLOCAL; 923 vbfp = avl_find(&bip->bi_fwd, &match, NULL); 924 if (vbfp != NULL) 925 bfp = vbfp; 926 } 927 atomic_inc_uint(&bfp->bf_refs); 928 } 929 rw_exit(&bip->bi_rwlock); 930 return (bfp); 931 } 932 933 static void 934 fwd_free(bridge_fwd_t *bfp) 935 { 936 uint_t i; 937 bridge_inst_t *bip = bfp->bf_links[0]->bl_inst; 938 939 KIDECR(bki_count); 940 for (i = 0; i < bfp->bf_nlinks; i++) 941 link_unref(bfp->bf_links[i]); 942 kmem_free(bfp, 943 sizeof (*bfp) + bfp->bf_maxlinks * sizeof (bridge_link_t *)); 944 } 945 946 static void 947 fwd_unref(bridge_fwd_t *bfp) 948 { 949 if (atomic_dec_uint_nv(&bfp->bf_refs) == 0) { 950 ASSERT(!(bfp->bf_flags & BFF_INTREE)); 951 fwd_free(bfp); 952 } 953 } 954 955 static void 956 fwd_delete(bridge_fwd_t *bfp) 957 { 958 bridge_inst_t *bip; 959 bridge_fwd_t *bfpzero; 960 961 if (bfp->bf_flags & BFF_INTREE) { 962 ASSERT(bfp->bf_nlinks > 0); 963 bip = bfp->bf_links[0]->bl_inst; 964 rw_enter(&bip->bi_rwlock, RW_WRITER); 965 /* Another thread could beat us to this */ 966 if (bfp->bf_flags & BFF_INTREE) { 967 avl_remove(&bip->bi_fwd, bfp); 968 bfp->bf_flags &= ~BFF_INTREE; 969 if (bfp->bf_flags & BFF_VLANLOCAL) { 970 bfp->bf_flags &= ~BFF_VLANLOCAL; 971 bfpzero = avl_find(&bip->bi_fwd, bfp, NULL); 972 if (bfpzero != NULL && bfpzero->bf_vcnt > 0) 973 bfpzero->bf_vcnt--; 974 } 975 rw_exit(&bip->bi_rwlock); 976 fwd_unref(bfp); /* no longer in avl tree */ 977 } else { 978 rw_exit(&bip->bi_rwlock); 979 } 980 } 981 } 982 983 static boolean_t 984 fwd_insert(bridge_inst_t *bip, bridge_fwd_t *bfp) 985 { 986 avl_index_t idx; 987 boolean_t retv; 988 989 rw_enter(&bip->bi_rwlock, RW_WRITER); 990 if (!(bip->bi_flags & BIF_SHUTDOWN) && 991 avl_numnodes(&bip->bi_fwd) < bip->bi_tablemax && 992 avl_find(&bip->bi_fwd, bfp, &idx) == NULL) { 993 avl_insert(&bip->bi_fwd, bfp, idx); 994 bfp->bf_flags |= BFF_INTREE; 995 atomic_inc_uint(&bfp->bf_refs); /* avl entry */ 996 retv = B_TRUE; 997 } else { 998 retv = B_FALSE; 999 } 1000 rw_exit(&bip->bi_rwlock); 1001 return (retv); 1002 } 1003 1004 static void 1005 fwd_update_local(bridge_link_t *blp, const uint8_t *oldaddr, 1006 const uint8_t *newaddr) 1007 { 1008 bridge_inst_t *bip = blp->bl_inst; 1009 bridge_fwd_t *bfp, *bfnew; 1010 bridge_fwd_t match; 1011 avl_index_t idx; 1012 boolean_t drop_ref = B_FALSE; 1013 1014 if (bcmp(oldaddr, newaddr, ETHERADDRL) == 0) 1015 return; 1016 1017 if (bcmp(oldaddr, zero_addr, ETHERADDRL) == 0) 1018 goto no_old_addr; 1019 1020 /* 1021 * Find the previous entry, and remove our link from it. 1022 */ 1023 bcopy(oldaddr, match.bf_dest, ETHERADDRL); 1024 rw_enter(&bip->bi_rwlock, RW_WRITER); 1025 if ((bfp = avl_find(&bip->bi_fwd, &match, NULL)) != NULL) { 1026 int i; 1027 1028 /* 1029 * See if we're in the list, and remove if so. 1030 */ 1031 for (i = 0; i < bfp->bf_nlinks; i++) { 1032 if (bfp->bf_links[i] == blp) { 1033 /* 1034 * We assume writes are atomic, so no special 1035 * MT handling is needed. The list length is 1036 * decremented first, and then we remove 1037 * entries. 1038 */ 1039 bfp->bf_nlinks--; 1040 for (; i < bfp->bf_nlinks; i++) 1041 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1042 drop_ref = B_TRUE; 1043 break; 1044 } 1045 } 1046 /* If no more links, then remove and free up */ 1047 if (bfp->bf_nlinks == 0) { 1048 avl_remove(&bip->bi_fwd, bfp); 1049 bfp->bf_flags &= ~BFF_INTREE; 1050 } else { 1051 bfp = NULL; 1052 } 1053 } 1054 rw_exit(&bip->bi_rwlock); 1055 if (bfp != NULL) 1056 fwd_unref(bfp); /* no longer in avl tree */ 1057 1058 /* 1059 * Now get the new link address and add this link to the list. The 1060 * list should be of length 1 unless the user has configured multiple 1061 * NICs with the same address. (That's an incorrect configuration, but 1062 * we support it anyway.) 1063 */ 1064 no_old_addr: 1065 bfp = NULL; 1066 if ((bip->bi_flags & BIF_SHUTDOWN) || 1067 bcmp(newaddr, zero_addr, ETHERADDRL) == 0) 1068 goto no_new_addr; 1069 1070 bcopy(newaddr, match.bf_dest, ETHERADDRL); 1071 rw_enter(&bip->bi_rwlock, RW_WRITER); 1072 if ((bfp = avl_find(&bip->bi_fwd, &match, &idx)) == NULL) { 1073 bfnew = fwd_alloc(newaddr, 1, RBRIDGE_NICKNAME_NONE); 1074 if (bfnew != NULL) 1075 KIINCR(bki_count); 1076 } else if (bfp->bf_nlinks < bfp->bf_maxlinks) { 1077 /* special case: link fits in existing entry */ 1078 bfnew = bfp; 1079 } else { 1080 bfnew = fwd_alloc(newaddr, bfp->bf_nlinks + 1, 1081 RBRIDGE_NICKNAME_NONE); 1082 if (bfnew != NULL) { 1083 KIINCR(bki_count); 1084 avl_remove(&bip->bi_fwd, bfp); 1085 bfp->bf_flags &= ~BFF_INTREE; 1086 bfnew->bf_nlinks = bfp->bf_nlinks; 1087 bcopy(bfp->bf_links, bfnew->bf_links, 1088 bfp->bf_nlinks * sizeof (bfp)); 1089 /* reset the idx value due to removal above */ 1090 (void) avl_find(&bip->bi_fwd, &match, &idx); 1091 } 1092 } 1093 1094 if (bfnew != NULL) { 1095 bfnew->bf_links[bfnew->bf_nlinks++] = blp; 1096 if (drop_ref) 1097 drop_ref = B_FALSE; 1098 else 1099 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1100 1101 if (bfnew != bfp) { 1102 /* local addresses are not subject to table limits */ 1103 avl_insert(&bip->bi_fwd, bfnew, idx); 1104 bfnew->bf_flags |= (BFF_INTREE | BFF_LOCALADDR); 1105 atomic_inc_uint(&bfnew->bf_refs); /* avl entry */ 1106 } 1107 } 1108 rw_exit(&bip->bi_rwlock); 1109 1110 no_new_addr: 1111 /* 1112 * If we found an existing entry and we replaced it with a new one, 1113 * then drop the table reference from the old one. We removed it from 1114 * the AVL tree above. 1115 */ 1116 if (bfnew != NULL && bfp != NULL && bfnew != bfp) 1117 fwd_unref(bfp); 1118 1119 /* Account for removed entry. */ 1120 if (drop_ref) 1121 link_unref(blp); 1122 } 1123 1124 static void 1125 bridge_new_unicst(bridge_link_t *blp) 1126 { 1127 uint8_t new_mac[ETHERADDRL]; 1128 1129 mac_unicast_primary_get(blp->bl_mh, new_mac); 1130 fwd_update_local(blp, blp->bl_local_mac, new_mac); 1131 bcopy(new_mac, blp->bl_local_mac, ETHERADDRL); 1132 } 1133 1134 /* 1135 * We must shut down a link prior to freeing it, and doing that requires 1136 * blocking to wait for running MAC threads while holding a reference. This is 1137 * run from a taskq to accomplish proper link shutdown followed by reference 1138 * drop. 1139 */ 1140 static void 1141 link_shutdown(void *arg) 1142 { 1143 bridge_link_t *blp = arg; 1144 mac_handle_t mh = blp->bl_mh; 1145 bridge_inst_t *bip; 1146 bridge_fwd_t *bfp, *bfnext; 1147 avl_tree_t fwd_scavenge; 1148 int i; 1149 1150 /* 1151 * This link is being destroyed. Notify TRILL now that it's no longer 1152 * possible to send packets. Data packets may still arrive until TRILL 1153 * calls bridge_trill_lnunref. 1154 */ 1155 if (blp->bl_trilldata != NULL) 1156 trill_lndstr_fn(blp->bl_trilldata, blp); 1157 1158 if (blp->bl_flags & BLF_PROM_ADDED) 1159 (void) mac_promisc_remove(blp->bl_mphp); 1160 1161 if (blp->bl_flags & BLF_SET_BRIDGE) 1162 mac_bridge_clear(mh, (mac_handle_t)blp); 1163 1164 if (blp->bl_flags & BLF_MARGIN_ADDED) { 1165 (void) mac_notify_remove(blp->bl_mnh, B_TRUE); 1166 (void) mac_margin_remove(mh, blp->bl_margin); 1167 } 1168 1169 /* Tell the clients the real link state when we leave */ 1170 mac_link_redo(blp->bl_mh, 1171 mac_stat_get(blp->bl_mh, MAC_STAT_LOWLINK_STATE)); 1172 1173 /* Destroy all of the forwarding entries related to this link */ 1174 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1175 offsetof(bridge_fwd_t, bf_node)); 1176 bip = blp->bl_inst; 1177 rw_enter(&bip->bi_rwlock, RW_WRITER); 1178 bfnext = avl_first(&bip->bi_fwd); 1179 while ((bfp = bfnext) != NULL) { 1180 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1181 for (i = 0; i < bfp->bf_nlinks; i++) { 1182 if (bfp->bf_links[i] == blp) 1183 break; 1184 } 1185 if (i >= bfp->bf_nlinks) 1186 continue; 1187 if (bfp->bf_nlinks > 1) { 1188 /* note that this can't be the last reference */ 1189 link_unref(blp); 1190 bfp->bf_nlinks--; 1191 for (; i < bfp->bf_nlinks; i++) 1192 bfp->bf_links[i] = bfp->bf_links[i + 1]; 1193 } else { 1194 ASSERT(bfp->bf_flags & BFF_INTREE); 1195 avl_remove(&bip->bi_fwd, bfp); 1196 bfp->bf_flags &= ~BFF_INTREE; 1197 avl_add(&fwd_scavenge, bfp); 1198 } 1199 } 1200 rw_exit(&bip->bi_rwlock); 1201 bfnext = avl_first(&fwd_scavenge); 1202 while ((bfp = bfnext) != NULL) { 1203 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1204 avl_remove(&fwd_scavenge, bfp); 1205 fwd_unref(bfp); 1206 } 1207 avl_destroy(&fwd_scavenge); 1208 1209 if (blp->bl_flags & BLF_CLIENT_OPEN) 1210 mac_client_close(blp->bl_mch, 0); 1211 1212 mac_close(mh); 1213 1214 /* 1215 * We are now completely removed from the active list, so drop the 1216 * reference (see bridge_add_link). 1217 */ 1218 link_unref(blp); 1219 } 1220 1221 static void 1222 shutdown_inst(bridge_inst_t *bip) 1223 { 1224 bridge_link_t *blp, *blnext; 1225 bridge_fwd_t *bfp; 1226 1227 mutex_enter(&inst_lock); 1228 if (bip->bi_flags & BIF_SHUTDOWN) { 1229 mutex_exit(&inst_lock); 1230 return; 1231 } 1232 1233 /* 1234 * Once on the inst_list, the bridge instance must not leave that list 1235 * without having the shutdown flag set first. When the shutdown flag 1236 * is set, we own the list reference, so we must drop it before 1237 * returning. 1238 */ 1239 bip->bi_flags |= BIF_SHUTDOWN; 1240 mutex_exit(&inst_lock); 1241 1242 bip->bi_control = NULL; 1243 1244 rw_enter(&bip->bi_rwlock, RW_READER); 1245 blnext = list_head(&bip->bi_links); 1246 while ((blp = blnext) != NULL) { 1247 blnext = list_next(&bip->bi_links, blp); 1248 if (!(blp->bl_flags & BLF_DELETED)) { 1249 blp->bl_flags |= BLF_DELETED; 1250 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 1251 blp, DDI_SLEEP); 1252 } 1253 } 1254 while ((bfp = avl_first(&bip->bi_fwd)) != NULL) { 1255 atomic_inc_uint(&bfp->bf_refs); 1256 rw_exit(&bip->bi_rwlock); 1257 fwd_delete(bfp); 1258 fwd_unref(bfp); 1259 rw_enter(&bip->bi_rwlock, RW_READER); 1260 } 1261 rw_exit(&bip->bi_rwlock); 1262 1263 /* 1264 * This bridge is being destroyed. Notify TRILL once all of the 1265 * links are all gone. 1266 */ 1267 mutex_enter(&inst_lock); 1268 while (bip->bi_trilldata != NULL && !list_is_empty(&bip->bi_links)) 1269 cv_wait(&bip->bi_linkwait, &inst_lock); 1270 mutex_exit(&inst_lock); 1271 if (bip->bi_trilldata != NULL) 1272 trill_brdstr_fn(bip->bi_trilldata, bip); 1273 1274 bridge_unref(bip); 1275 } 1276 1277 /* 1278 * This is called once by the TRILL module when it starts up. It just sets the 1279 * global TRILL callback function pointers -- data transmit/receive and bridge 1280 * and link destroy notification. There's only one TRILL module, so only one 1281 * registration is needed. 1282 * 1283 * TRILL should call this function with NULL pointers before unloading. It 1284 * must not do so before dropping all references to bridges and links. We 1285 * assert that this is true on debug builds. 1286 */ 1287 void 1288 bridge_trill_register_cb(trill_recv_pkt_t recv_fn, trill_encap_pkt_t encap_fn, 1289 trill_br_dstr_t brdstr_fn, trill_ln_dstr_t lndstr_fn) 1290 { 1291 #ifdef DEBUG 1292 if (recv_fn == NULL && trill_recv_fn != NULL) { 1293 bridge_inst_t *bip; 1294 bridge_link_t *blp; 1295 1296 mutex_enter(&inst_lock); 1297 for (bip = list_head(&inst_list); bip != NULL; 1298 bip = list_next(&inst_list, bip)) { 1299 ASSERT(bip->bi_trilldata == NULL); 1300 rw_enter(&bip->bi_rwlock, RW_READER); 1301 for (blp = list_head(&bip->bi_links); blp != NULL; 1302 blp = list_next(&bip->bi_links, blp)) { 1303 ASSERT(blp->bl_trilldata == NULL); 1304 } 1305 rw_exit(&bip->bi_rwlock); 1306 } 1307 mutex_exit(&inst_lock); 1308 } 1309 #endif 1310 trill_recv_fn = recv_fn; 1311 trill_encap_fn = encap_fn; 1312 trill_brdstr_fn = brdstr_fn; 1313 trill_lndstr_fn = lndstr_fn; 1314 } 1315 1316 /* 1317 * This registers the TRILL instance pointer with a bridge. Before this 1318 * pointer is set, the forwarding, TRILL receive, and bridge destructor 1319 * functions won't be called. 1320 * 1321 * TRILL holds a reference on a bridge with this call. It must free the 1322 * reference by calling the unregister function below. 1323 */ 1324 bridge_inst_t * 1325 bridge_trill_brref(const char *bname, void *ptr) 1326 { 1327 char bridge[MAXLINKNAMELEN]; 1328 bridge_inst_t *bip; 1329 1330 (void) snprintf(bridge, MAXLINKNAMELEN, "%s0", bname); 1331 bip = bridge_find_name(bridge); 1332 if (bip != NULL) { 1333 ASSERT(bip->bi_trilldata == NULL && ptr != NULL); 1334 bip->bi_trilldata = ptr; 1335 } 1336 return (bip); 1337 } 1338 1339 void 1340 bridge_trill_brunref(bridge_inst_t *bip) 1341 { 1342 ASSERT(bip->bi_trilldata != NULL); 1343 bip->bi_trilldata = NULL; 1344 bridge_unref(bip); 1345 } 1346 1347 /* 1348 * TRILL calls this function when referencing a particular link on a bridge. 1349 * 1350 * It holds a reference on the link, so TRILL must clear out the reference when 1351 * it's done with the link (on unbinding). 1352 */ 1353 bridge_link_t * 1354 bridge_trill_lnref(bridge_inst_t *bip, datalink_id_t linkid, void *ptr) 1355 { 1356 bridge_link_t *blp; 1357 1358 ASSERT(ptr != NULL); 1359 rw_enter(&bip->bi_rwlock, RW_READER); 1360 for (blp = list_head(&bip->bi_links); blp != NULL; 1361 blp = list_next(&bip->bi_links, blp)) { 1362 if (!(blp->bl_flags & BLF_DELETED) && 1363 blp->bl_linkid == linkid && blp->bl_trilldata == NULL) { 1364 blp->bl_trilldata = ptr; 1365 blp->bl_flags &= ~BLF_TRILLACTIVE; 1366 (void) memset(blp->bl_afs, 0, sizeof (blp->bl_afs)); 1367 atomic_inc_uint(&blp->bl_refs); 1368 break; 1369 } 1370 } 1371 rw_exit(&bip->bi_rwlock); 1372 return (blp); 1373 } 1374 1375 void 1376 bridge_trill_lnunref(bridge_link_t *blp) 1377 { 1378 mutex_enter(&blp->bl_trilllock); 1379 ASSERT(blp->bl_trilldata != NULL); 1380 blp->bl_trilldata = NULL; 1381 blp->bl_flags &= ~BLF_TRILLACTIVE; 1382 while (blp->bl_trillthreads > 0) 1383 cv_wait(&blp->bl_trillwait, &blp->bl_trilllock); 1384 mutex_exit(&blp->bl_trilllock); 1385 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 1386 link_unref(blp); 1387 } 1388 1389 /* 1390 * This periodic timer performs three functions: 1391 * 1. It scans the list of learned forwarding entries, and removes ones that 1392 * haven't been heard from in a while. The time limit is backed down if 1393 * we're above the configured table limit. 1394 * 2. It walks the links and decays away the bl_learns counter. 1395 * 3. It scans the observability node entries looking for ones that can be 1396 * freed up. 1397 */ 1398 /* ARGSUSED */ 1399 static void 1400 bridge_timer(void *arg) 1401 { 1402 bridge_inst_t *bip; 1403 bridge_fwd_t *bfp, *bfnext; 1404 bridge_mac_t *bmp, *bmnext; 1405 bridge_link_t *blp; 1406 int err; 1407 datalink_id_t tmpid; 1408 avl_tree_t fwd_scavenge; 1409 clock_t age_limit; 1410 uint32_t ldecay; 1411 1412 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 1413 offsetof(bridge_fwd_t, bf_node)); 1414 mutex_enter(&inst_lock); 1415 for (bip = list_head(&inst_list); bip != NULL; 1416 bip = list_next(&inst_list, bip)) { 1417 if (bip->bi_flags & BIF_SHUTDOWN) 1418 continue; 1419 rw_enter(&bip->bi_rwlock, RW_WRITER); 1420 /* compute scaled maximum age based on table limit */ 1421 if (avl_numnodes(&bip->bi_fwd) > bip->bi_tablemax) 1422 bip->bi_tshift++; 1423 else 1424 bip->bi_tshift = 0; 1425 if ((age_limit = bridge_fwd_age >> bip->bi_tshift) == 0) { 1426 if (bip->bi_tshift != 0) 1427 bip->bi_tshift--; 1428 age_limit = 1; 1429 } 1430 bfnext = avl_first(&bip->bi_fwd); 1431 while ((bfp = bfnext) != NULL) { 1432 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 1433 if (!(bfp->bf_flags & BFF_LOCALADDR) && 1434 (ddi_get_lbolt() - bfp->bf_lastheard) > age_limit) { 1435 ASSERT(bfp->bf_flags & BFF_INTREE); 1436 avl_remove(&bip->bi_fwd, bfp); 1437 bfp->bf_flags &= ~BFF_INTREE; 1438 avl_add(&fwd_scavenge, bfp); 1439 } 1440 } 1441 for (blp = list_head(&bip->bi_links); blp != NULL; 1442 blp = list_next(&bip->bi_links, blp)) { 1443 ldecay = mac_get_ldecay(blp->bl_mh); 1444 if (ldecay >= blp->bl_learns) 1445 blp->bl_learns = 0; 1446 else 1447 atomic_add_int(&blp->bl_learns, -(int)ldecay); 1448 } 1449 rw_exit(&bip->bi_rwlock); 1450 bfnext = avl_first(&fwd_scavenge); 1451 while ((bfp = bfnext) != NULL) { 1452 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 1453 avl_remove(&fwd_scavenge, bfp); 1454 KIINCR(bki_expire); 1455 fwd_unref(bfp); /* drop tree reference */ 1456 } 1457 } 1458 mutex_exit(&inst_lock); 1459 avl_destroy(&fwd_scavenge); 1460 1461 /* 1462 * Scan the bridge_mac_t entries and try to free up the ones that are 1463 * no longer active. This must be done by polling, as neither DLS nor 1464 * MAC provides a driver any sort of positive control over clients. 1465 */ 1466 rw_enter(&bmac_rwlock, RW_WRITER); 1467 bmnext = list_head(&bmac_list); 1468 while ((bmp = bmnext) != NULL) { 1469 bmnext = list_next(&bmac_list, bmp); 1470 1471 /* ignore active bridges */ 1472 if (bmp->bm_inst != NULL) 1473 continue; 1474 1475 if (bmp->bm_flags & BMF_DLS) { 1476 err = dls_devnet_destroy(bmp->bm_mh, &tmpid, B_FALSE); 1477 ASSERT(err == 0 || err == EBUSY); 1478 if (err == 0) 1479 bmp->bm_flags &= ~BMF_DLS; 1480 } 1481 1482 if (!(bmp->bm_flags & BMF_DLS)) { 1483 err = mac_unregister(bmp->bm_mh); 1484 ASSERT(err == 0 || err == EBUSY); 1485 if (err == 0) { 1486 list_remove(&bmac_list, bmp); 1487 kmem_free(bmp, sizeof (*bmp)); 1488 } 1489 } 1490 } 1491 if (list_is_empty(&bmac_list)) { 1492 bridge_timerid = 0; 1493 } else { 1494 bridge_timerid = timeout(bridge_timer, NULL, 1495 bridge_scan_interval); 1496 } 1497 rw_exit(&bmac_rwlock); 1498 } 1499 1500 static int 1501 bridge_open(queue_t *rq, dev_t *devp, int oflag, int sflag, cred_t *credp) 1502 { 1503 bridge_stream_t *bsp; 1504 1505 if (rq->q_ptr != NULL) 1506 return (0); 1507 1508 if (sflag & MODOPEN) 1509 return (EINVAL); 1510 1511 /* 1512 * Check the minor node number being opened. This tells us which 1513 * bridge instance the user wants. 1514 */ 1515 if (getminor(*devp) != 0) { 1516 /* 1517 * This is a regular DLPI stream for snoop or the like. 1518 * Redirect it through DLD. 1519 */ 1520 rq->q_qinfo = &bridge_dld_rinit; 1521 OTHERQ(rq)->q_qinfo = &bridge_dld_winit; 1522 return (dld_open(rq, devp, oflag, sflag, credp)); 1523 } else { 1524 /* 1525 * Allocate the bridge control stream structure. 1526 */ 1527 if ((bsp = stream_alloc()) == NULL) 1528 return (ENOSR); 1529 rq->q_ptr = WR(rq)->q_ptr = (caddr_t)bsp; 1530 bsp->bs_wq = WR(rq); 1531 *devp = makedevice(getmajor(*devp), bsp->bs_minor); 1532 qprocson(rq); 1533 return (0); 1534 } 1535 } 1536 1537 /* 1538 * This is used only for bridge control streams. DLPI goes through dld 1539 * instead. 1540 */ 1541 static int 1542 bridge_close(queue_t *rq) 1543 { 1544 bridge_stream_t *bsp = rq->q_ptr; 1545 bridge_inst_t *bip; 1546 1547 /* 1548 * Wait for any stray taskq (add/delete link) entries related to this 1549 * stream to leave the system. 1550 */ 1551 mutex_enter(&stream_ref_lock); 1552 while (bsp->bs_taskq_cnt != 0) 1553 cv_wait(&stream_ref_cv, &stream_ref_lock); 1554 mutex_exit(&stream_ref_lock); 1555 1556 qprocsoff(rq); 1557 if ((bip = bsp->bs_inst) != NULL) 1558 shutdown_inst(bip); 1559 rq->q_ptr = WR(rq)->q_ptr = NULL; 1560 stream_free(bsp); 1561 if (bip != NULL) 1562 bridge_unref(bip); 1563 1564 return (0); 1565 } 1566 1567 static void 1568 bridge_learn(bridge_link_t *blp, const uint8_t *saddr, uint16_t ingress_nick, 1569 uint16_t vlanid) 1570 { 1571 bridge_inst_t *bip = blp->bl_inst; 1572 bridge_fwd_t *bfp, *bfpnew; 1573 int i; 1574 boolean_t replaced = B_FALSE; 1575 1576 /* Ignore multi-destination address used as source; it's nonsense. */ 1577 if (*saddr & 1) 1578 return; 1579 1580 /* 1581 * If the source is known, then check whether it belongs on this link. 1582 * If not, and this isn't a fixed local address, then we've detected a 1583 * move. If it's not known, learn it. 1584 */ 1585 if ((bfp = fwd_find(bip, saddr, vlanid)) != NULL) { 1586 /* 1587 * If the packet has a fixed local source address, then there's 1588 * nothing we can learn. We must quit. If this was a received 1589 * packet, then the sender has stolen our address, but there's 1590 * nothing we can do. If it's a transmitted packet, then 1591 * that's the normal case. 1592 */ 1593 if (bfp->bf_flags & BFF_LOCALADDR) { 1594 fwd_unref(bfp); 1595 return; 1596 } 1597 1598 /* 1599 * Check if the link (and TRILL sender, if any) being used is 1600 * among the ones registered for this address. If so, then 1601 * this is information that we already know. 1602 */ 1603 if (bfp->bf_trill_nick == ingress_nick) { 1604 for (i = 0; i < bfp->bf_nlinks; i++) { 1605 if (bfp->bf_links[i] == blp) { 1606 bfp->bf_lastheard = ddi_get_lbolt(); 1607 fwd_unref(bfp); 1608 return; 1609 } 1610 } 1611 } 1612 } 1613 1614 /* 1615 * Note that we intentionally "unlearn" things that appear to be under 1616 * attack on this link. The forwarding cache is a negative thing for 1617 * security -- it disables reachability as a performance optimization 1618 * -- so leaving out entries optimizes for success and defends against 1619 * the attack. Thus, the bare increment without a check in the delete 1620 * code above is right. (And it's ok if we skid over the limit a 1621 * little, so there's no syncronization needed on the test.) 1622 */ 1623 if (blp->bl_learns >= mac_get_llimit(blp->bl_mh)) { 1624 if (bfp != NULL) { 1625 if (bfp->bf_vcnt == 0) 1626 fwd_delete(bfp); 1627 fwd_unref(bfp); 1628 } 1629 return; 1630 } 1631 1632 atomic_inc_uint(&blp->bl_learns); 1633 1634 if ((bfpnew = fwd_alloc(saddr, 1, ingress_nick)) == NULL) { 1635 if (bfp != NULL) 1636 fwd_unref(bfp); 1637 return; 1638 } 1639 KIINCR(bki_count); 1640 1641 if (bfp != NULL) { 1642 /* 1643 * If this is a new destination for the same VLAN, then delete 1644 * so that we can update. If it's a different VLAN, then we're 1645 * not going to delete the original. Split off instead into an 1646 * IVL entry. 1647 */ 1648 if (bfp->bf_vlanid == vlanid) { 1649 /* save the count of IVL duplicates */ 1650 bfpnew->bf_vcnt = bfp->bf_vcnt; 1651 1652 /* entry deletes count as learning events */ 1653 atomic_inc_uint(&blp->bl_learns); 1654 1655 /* destroy and create anew; node moved */ 1656 fwd_delete(bfp); 1657 replaced = B_TRUE; 1658 KIINCR(bki_moved); 1659 } else { 1660 bfp->bf_vcnt++; 1661 bfpnew->bf_flags |= BFF_VLANLOCAL; 1662 } 1663 fwd_unref(bfp); 1664 } 1665 bfpnew->bf_links[0] = blp; 1666 bfpnew->bf_nlinks = 1; 1667 atomic_inc_uint(&blp->bl_refs); /* bf_links entry */ 1668 if (!fwd_insert(bip, bfpnew)) 1669 fwd_free(bfpnew); 1670 else if (!replaced) 1671 KIINCR(bki_source); 1672 } 1673 1674 /* 1675 * Process the VLAN headers for output on a given link. There are several 1676 * cases (noting that we don't map VLANs): 1677 * 1. The input packet is good as it is; either 1678 * a. It has no tag, and output has same PVID 1679 * b. It has a non-zero priority-only tag for PVID, and b_band is same 1680 * c. It has a tag with VLAN different from PVID, and b_band is same 1681 * 2. The tag must change: non-zero b_band is different from tag priority 1682 * 3. The packet has a tag and should not (VLAN same as PVID, b_band zero) 1683 * 4. The packet has no tag and needs one: 1684 * a. VLAN ID same as PVID, but b_band is non-zero 1685 * b. VLAN ID different from PVID 1686 * We exclude case 1 first, then modify the packet. Note that output packets 1687 * get a priority set by the mblk, not by the header, because QoS in bridging 1688 * requires priority recalculation at each node. 1689 * 1690 * The passed-in tci is the "impossible" value 0xFFFF when no tag is present. 1691 */ 1692 static mblk_t * 1693 reform_vlan_header(mblk_t *mp, uint16_t vlanid, uint16_t tci, uint16_t pvid) 1694 { 1695 boolean_t source_has_tag = (tci != 0xFFFF); 1696 mblk_t *mpcopy; 1697 size_t mlen, minlen; 1698 struct ether_vlan_header *evh; 1699 int pri; 1700 1701 /* This helps centralize error handling in the caller. */ 1702 if (mp == NULL) 1703 return (mp); 1704 1705 /* No forwarded packet can have hardware checksum enabled */ 1706 DB_CKSUMFLAGS(mp) = 0; 1707 1708 /* Get the no-modification cases out of the way first */ 1709 if (!source_has_tag && vlanid == pvid) /* 1a */ 1710 return (mp); 1711 1712 pri = VLAN_PRI(tci); 1713 if (source_has_tag && mp->b_band == pri) { 1714 if (vlanid != pvid) /* 1c */ 1715 return (mp); 1716 if (pri != 0 && VLAN_ID(tci) == 0) /* 1b */ 1717 return (mp); 1718 } 1719 1720 /* 1721 * We now know that we must modify the packet. Prepare for that. Note 1722 * that if a tag is present, the caller has already done a pullup for 1723 * the VLAN header, so we're good to go. 1724 */ 1725 if (MBLKL(mp) < sizeof (struct ether_header)) { 1726 mpcopy = msgpullup(mp, sizeof (struct ether_header)); 1727 if (mpcopy == NULL) { 1728 freemsg(mp); 1729 return (NULL); 1730 } 1731 mp = mpcopy; 1732 } 1733 if (DB_REF(mp) > 1 || !IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)) || 1734 (!source_has_tag && MBLKTAIL(mp) < VLAN_INCR)) { 1735 minlen = mlen = MBLKL(mp); 1736 if (!source_has_tag) 1737 minlen += VLAN_INCR; 1738 ASSERT(minlen >= sizeof (struct ether_vlan_header)); 1739 /* 1740 * We're willing to copy some data to avoid fragmentation, but 1741 * not a lot. 1742 */ 1743 if (minlen > 256) 1744 minlen = sizeof (struct ether_vlan_header); 1745 mpcopy = allocb(minlen, BPRI_MED); 1746 if (mpcopy == NULL) { 1747 freemsg(mp); 1748 return (NULL); 1749 } 1750 if (mlen <= minlen) { 1751 /* We toss the first mblk when we can. */ 1752 bcopy(mp->b_rptr, mpcopy->b_rptr, mlen); 1753 mpcopy->b_wptr += mlen; 1754 mpcopy->b_cont = mp->b_cont; 1755 freeb(mp); 1756 } else { 1757 /* If not, then just copy what we need */ 1758 if (!source_has_tag) 1759 minlen = sizeof (struct ether_header); 1760 bcopy(mp->b_rptr, mpcopy->b_rptr, minlen); 1761 mpcopy->b_wptr += minlen; 1762 mpcopy->b_cont = mp; 1763 mp->b_rptr += minlen; 1764 } 1765 mp = mpcopy; 1766 } 1767 1768 /* LINTED: pointer alignment */ 1769 evh = (struct ether_vlan_header *)mp->b_rptr; 1770 if (source_has_tag) { 1771 if (mp->b_band == 0 && vlanid == pvid) { /* 3 */ 1772 evh->ether_tpid = evh->ether_type; 1773 mlen = MBLKL(mp); 1774 if (mlen > sizeof (struct ether_vlan_header)) 1775 ovbcopy(mp->b_rptr + 1776 sizeof (struct ether_vlan_header), 1777 mp->b_rptr + sizeof (struct ether_header), 1778 mlen - sizeof (struct ether_vlan_header)); 1779 mp->b_wptr -= VLAN_INCR; 1780 } else { /* 2 */ 1781 if (vlanid == pvid) 1782 vlanid = VLAN_ID_NONE; 1783 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1784 evh->ether_tci = htons(tci); 1785 } 1786 } else { 1787 /* case 4: no header present, but one is needed */ 1788 mlen = MBLKL(mp); 1789 if (mlen > sizeof (struct ether_header)) 1790 ovbcopy(mp->b_rptr + sizeof (struct ether_header), 1791 mp->b_rptr + sizeof (struct ether_vlan_header), 1792 mlen - sizeof (struct ether_header)); 1793 mp->b_wptr += VLAN_INCR; 1794 ASSERT(mp->b_wptr <= DB_LIM(mp)); 1795 if (vlanid == pvid) 1796 vlanid = VLAN_ID_NONE; 1797 tci = VLAN_TCI(mp->b_band, ETHER_CFI, vlanid); 1798 evh->ether_type = evh->ether_tpid; 1799 evh->ether_tpid = htons(ETHERTYPE_VLAN); 1800 evh->ether_tci = htons(tci); 1801 } 1802 return (mp); 1803 } 1804 1805 /* Record VLAN information and strip header if requested . */ 1806 static void 1807 update_header(mblk_t *mp, mac_header_info_t *hdr_info, boolean_t striphdr) 1808 { 1809 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 1810 struct ether_vlan_header *evhp; 1811 uint16_t ether_type; 1812 1813 /* LINTED: alignment */ 1814 evhp = (struct ether_vlan_header *)mp->b_rptr; 1815 hdr_info->mhi_istagged = B_TRUE; 1816 hdr_info->mhi_tci = ntohs(evhp->ether_tci); 1817 if (striphdr) { 1818 /* 1819 * For VLAN tagged frames update the ether_type 1820 * in hdr_info before stripping the header. 1821 */ 1822 ether_type = ntohs(evhp->ether_type); 1823 hdr_info->mhi_origsap = ether_type; 1824 hdr_info->mhi_bindsap = (ether_type > ETHERMTU) ? 1825 ether_type : DLS_SAP_LLC; 1826 mp->b_rptr = (uchar_t *)(evhp + 1); 1827 } 1828 } else { 1829 hdr_info->mhi_istagged = B_FALSE; 1830 hdr_info->mhi_tci = VLAN_ID_NONE; 1831 if (striphdr) 1832 mp->b_rptr += sizeof (struct ether_header); 1833 } 1834 } 1835 1836 /* 1837 * Return B_TRUE if we're allowed to send on this link with the given VLAN ID. 1838 */ 1839 static boolean_t 1840 bridge_can_send(bridge_link_t *blp, uint16_t vlanid) 1841 { 1842 ASSERT(vlanid != VLAN_ID_NONE); 1843 if (blp->bl_flags & BLF_DELETED) 1844 return (B_FALSE); 1845 if (blp->bl_trilldata == NULL && blp->bl_state != BLS_FORWARDING) 1846 return (B_FALSE); 1847 return (BRIDGE_VLAN_ISSET(blp, vlanid) && BRIDGE_AF_ISSET(blp, vlanid)); 1848 } 1849 1850 /* 1851 * This function scans the bridge forwarding tables in order to forward a given 1852 * packet. If the packet either doesn't need forwarding (the current link is 1853 * correct) or the current link needs a copy as well, then the packet is 1854 * returned to the caller. 1855 * 1856 * If a packet has been decapsulated from TRILL, then it must *NOT* reenter a 1857 * TRILL tunnel. If the destination points there, then drop instead. 1858 */ 1859 static mblk_t * 1860 bridge_forward(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 1861 uint16_t vlanid, uint16_t tci, boolean_t from_trill, boolean_t is_xmit) 1862 { 1863 mblk_t *mpsend, *mpcopy; 1864 bridge_inst_t *bip = blp->bl_inst; 1865 bridge_link_t *blpsend, *blpnext; 1866 bridge_fwd_t *bfp; 1867 uint_t i; 1868 boolean_t selfseen = B_FALSE; 1869 void *tdp; 1870 const uint8_t *daddr = hdr_info->mhi_daddr; 1871 1872 /* 1873 * Check for the IEEE "reserved" multicast addresses. Messages sent to 1874 * these addresses are used for link-local control (STP and pause), and 1875 * are never forwarded or redirected. 1876 */ 1877 if (daddr[0] == 1 && daddr[1] == 0x80 && daddr[2] == 0xc2 && 1878 daddr[3] == 0 && daddr[4] == 0 && (daddr[5] & 0xf0) == 0) { 1879 if (from_trill) { 1880 freemsg(mp); 1881 mp = NULL; 1882 } 1883 return (mp); 1884 } 1885 1886 if ((bfp = fwd_find(bip, daddr, vlanid)) != NULL) { 1887 1888 /* 1889 * If trill indicates a destination for this node, then it's 1890 * clearly not intended for local delivery. We must tell TRILL 1891 * to encapsulate, as long as we didn't just decapsulate it. 1892 */ 1893 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) { 1894 /* 1895 * Error case: can't reencapsulate if the protocols are 1896 * working correctly. 1897 */ 1898 if (from_trill) { 1899 freemsg(mp); 1900 return (NULL); 1901 } 1902 mutex_enter(&blp->bl_trilllock); 1903 if ((tdp = blp->bl_trilldata) != NULL) { 1904 blp->bl_trillthreads++; 1905 mutex_exit(&blp->bl_trilllock); 1906 update_header(mp, hdr_info, B_FALSE); 1907 if (is_xmit) 1908 mp = mac_fix_cksum(mp); 1909 /* all trill data frames have Inner.VLAN */ 1910 mp = reform_vlan_header(mp, vlanid, tci, 0); 1911 if (mp == NULL) { 1912 KIINCR(bki_drops); 1913 fwd_unref(bfp); 1914 return (NULL); 1915 } 1916 trill_encap_fn(tdp, blp, hdr_info, mp, 1917 bfp->bf_trill_nick); 1918 mutex_enter(&blp->bl_trilllock); 1919 if (--blp->bl_trillthreads == 0 && 1920 blp->bl_trilldata == NULL) 1921 cv_broadcast(&blp->bl_trillwait); 1922 } 1923 mutex_exit(&blp->bl_trilllock); 1924 1925 /* if TRILL has been disabled, then kill this stray */ 1926 if (tdp == NULL) { 1927 freemsg(mp); 1928 fwd_delete(bfp); 1929 } 1930 fwd_unref(bfp); 1931 return (NULL); 1932 } 1933 1934 /* find first link we can send on */ 1935 for (i = 0; i < bfp->bf_nlinks; i++) { 1936 blpsend = bfp->bf_links[i]; 1937 if (blpsend == blp) 1938 selfseen = B_TRUE; 1939 else if (bridge_can_send(blpsend, vlanid)) 1940 break; 1941 } 1942 1943 while (i < bfp->bf_nlinks) { 1944 blpsend = bfp->bf_links[i]; 1945 for (i++; i < bfp->bf_nlinks; i++) { 1946 blpnext = bfp->bf_links[i]; 1947 if (blpnext == blp) 1948 selfseen = B_TRUE; 1949 else if (bridge_can_send(blpnext, vlanid)) 1950 break; 1951 } 1952 if (i == bfp->bf_nlinks && !selfseen) { 1953 mpsend = mp; 1954 mp = NULL; 1955 } else { 1956 mpsend = copymsg(mp); 1957 } 1958 1959 if (!from_trill && is_xmit) 1960 mpsend = mac_fix_cksum(mpsend); 1961 1962 mpsend = reform_vlan_header(mpsend, vlanid, tci, 1963 blpsend->bl_pvid); 1964 if (mpsend == NULL) { 1965 KIINCR(bki_drops); 1966 continue; 1967 } 1968 1969 KIINCR(bki_forwards); 1970 /* 1971 * No need to bump up the link reference count, as 1972 * the forwarding entry itself holds a reference to 1973 * the link. 1974 */ 1975 if (bfp->bf_flags & BFF_LOCALADDR) { 1976 mac_rx_common(blpsend->bl_mh, NULL, mpsend); 1977 } else { 1978 KLPINCR(blpsend, bkl_xmit); 1979 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, 1980 mpsend); 1981 freemsg(mpsend); 1982 } 1983 } 1984 /* 1985 * Handle a special case: if we're transmitting to the original 1986 * link, then check whether the localaddr flag is set. If it 1987 * is, then receive instead. This doesn't happen with ordinary 1988 * bridging, but does happen often with TRILL decapsulation. 1989 */ 1990 if (mp != NULL && is_xmit && (bfp->bf_flags & BFF_LOCALADDR)) { 1991 mac_rx_common(blp->bl_mh, NULL, mp); 1992 mp = NULL; 1993 } 1994 fwd_unref(bfp); 1995 } else { 1996 /* 1997 * TRILL has two cases to handle. If the packet is off the 1998 * wire (not from TRILL), then we need to send up into the 1999 * TRILL module to have the distribution tree computed. If the 2000 * packet is from TRILL (decapsulated), then we're part of the 2001 * distribution tree, and we need to copy the packet on member 2002 * interfaces. 2003 * 2004 * Thus, the from TRILL case is identical to the STP case. 2005 */ 2006 if (!from_trill && blp->bl_trilldata != NULL) { 2007 mutex_enter(&blp->bl_trilllock); 2008 if ((tdp = blp->bl_trilldata) != NULL) { 2009 blp->bl_trillthreads++; 2010 mutex_exit(&blp->bl_trilllock); 2011 if ((mpsend = copymsg(mp)) != NULL) { 2012 update_header(mpsend, 2013 hdr_info, B_FALSE); 2014 /* 2015 * all trill data frames have 2016 * Inner.VLAN 2017 */ 2018 mpsend = reform_vlan_header(mpsend, 2019 vlanid, tci, 0); 2020 if (mpsend == NULL) { 2021 KIINCR(bki_drops); 2022 } else { 2023 trill_encap_fn(tdp, blp, 2024 hdr_info, mpsend, 2025 RBRIDGE_NICKNAME_NONE); 2026 } 2027 } 2028 mutex_enter(&blp->bl_trilllock); 2029 if (--blp->bl_trillthreads == 0 && 2030 blp->bl_trilldata == NULL) 2031 cv_broadcast(&blp->bl_trillwait); 2032 } 2033 mutex_exit(&blp->bl_trilllock); 2034 } 2035 2036 /* 2037 * This is an unknown destination, so flood. 2038 */ 2039 rw_enter(&bip->bi_rwlock, RW_READER); 2040 for (blpnext = list_head(&bip->bi_links); blpnext != NULL; 2041 blpnext = list_next(&bip->bi_links, blpnext)) { 2042 if (blpnext == blp) 2043 selfseen = B_TRUE; 2044 else if (bridge_can_send(blpnext, vlanid)) 2045 break; 2046 } 2047 if (blpnext != NULL) 2048 atomic_inc_uint(&blpnext->bl_refs); 2049 rw_exit(&bip->bi_rwlock); 2050 while ((blpsend = blpnext) != NULL) { 2051 rw_enter(&bip->bi_rwlock, RW_READER); 2052 for (blpnext = list_next(&bip->bi_links, blpsend); 2053 blpnext != NULL; 2054 blpnext = list_next(&bip->bi_links, blpnext)) { 2055 if (blpnext == blp) 2056 selfseen = B_TRUE; 2057 else if (bridge_can_send(blpnext, vlanid)) 2058 break; 2059 } 2060 if (blpnext != NULL) 2061 atomic_inc_uint(&blpnext->bl_refs); 2062 rw_exit(&bip->bi_rwlock); 2063 if (blpnext == NULL && !selfseen) { 2064 mpsend = mp; 2065 mp = NULL; 2066 } else { 2067 mpsend = copymsg(mp); 2068 } 2069 2070 if (!from_trill && is_xmit) 2071 mpsend = mac_fix_cksum(mpsend); 2072 2073 mpsend = reform_vlan_header(mpsend, vlanid, tci, 2074 blpsend->bl_pvid); 2075 if (mpsend == NULL) { 2076 KIINCR(bki_drops); 2077 continue; 2078 } 2079 2080 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST) 2081 KIINCR(bki_unknown); 2082 else 2083 KIINCR(bki_mbcast); 2084 KLPINCR(blpsend, bkl_xmit); 2085 if ((mpcopy = copymsg(mpsend)) != NULL) 2086 mac_rx_common(blpsend->bl_mh, NULL, mpcopy); 2087 MAC_RING_TX(blpsend->bl_mh, NULL, mpsend, mpsend); 2088 freemsg(mpsend); 2089 link_unref(blpsend); 2090 } 2091 } 2092 2093 /* 2094 * At this point, if np is non-NULL, it means that the caller needs to 2095 * continue on the selected link. 2096 */ 2097 return (mp); 2098 } 2099 2100 /* 2101 * Extract and validate the VLAN information for a given packet. This checks 2102 * conformance with the rules for use of the PVID on the link, and for the 2103 * allowed (configured) VLAN set. 2104 * 2105 * Returns B_TRUE if the packet passes, B_FALSE if it fails. 2106 */ 2107 static boolean_t 2108 bridge_get_vlan(bridge_link_t *blp, mac_header_info_t *hdr_info, mblk_t *mp, 2109 uint16_t *vlanidp, uint16_t *tcip) 2110 { 2111 uint16_t tci, vlanid; 2112 2113 if (hdr_info->mhi_bindsap == ETHERTYPE_VLAN) { 2114 ptrdiff_t tpos = offsetof(struct ether_vlan_header, ether_tci); 2115 ptrdiff_t mlen; 2116 2117 /* 2118 * Extract the VLAN ID information, regardless of alignment, 2119 * and without a pullup. This isn't attractive, but we do this 2120 * to avoid having to deal with the pointers stashed in 2121 * hdr_info moving around or having the caller deal with a new 2122 * mblk_t pointer. 2123 */ 2124 while (mp != NULL) { 2125 mlen = MBLKL(mp); 2126 if (mlen > tpos && mlen > 0) 2127 break; 2128 tpos -= mlen; 2129 mp = mp->b_cont; 2130 } 2131 if (mp == NULL) 2132 return (B_FALSE); 2133 tci = mp->b_rptr[tpos] << 8; 2134 if (++tpos >= mlen) { 2135 do { 2136 mp = mp->b_cont; 2137 } while (mp != NULL && MBLKL(mp) == 0); 2138 if (mp == NULL) 2139 return (B_FALSE); 2140 tpos = 0; 2141 } 2142 tci |= mp->b_rptr[tpos]; 2143 2144 vlanid = VLAN_ID(tci); 2145 if (VLAN_CFI(tci) != ETHER_CFI || vlanid > VLAN_ID_MAX) 2146 return (B_FALSE); 2147 if (vlanid == VLAN_ID_NONE || vlanid == blp->bl_pvid) 2148 goto input_no_vlan; 2149 if (!BRIDGE_VLAN_ISSET(blp, vlanid)) 2150 return (B_FALSE); 2151 } else { 2152 tci = 0xFFFF; 2153 input_no_vlan: 2154 /* 2155 * If PVID is set to zero, then untagged traffic is not 2156 * supported here. Do not learn or forward. 2157 */ 2158 if ((vlanid = blp->bl_pvid) == VLAN_ID_NONE) 2159 return (B_FALSE); 2160 } 2161 2162 *tcip = tci; 2163 *vlanidp = vlanid; 2164 return (B_TRUE); 2165 } 2166 2167 /* 2168 * Handle MAC notifications. 2169 */ 2170 static void 2171 bridge_notify_cb(void *arg, mac_notify_type_t note_type) 2172 { 2173 bridge_link_t *blp = arg; 2174 2175 switch (note_type) { 2176 case MAC_NOTE_UNICST: 2177 bridge_new_unicst(blp); 2178 break; 2179 2180 case MAC_NOTE_SDU_SIZE: { 2181 uint_t maxsdu; 2182 bridge_inst_t *bip = blp->bl_inst; 2183 bridge_mac_t *bmp = bip->bi_mac; 2184 boolean_t notify = B_FALSE; 2185 mblk_t *mlist = NULL; 2186 2187 mac_sdu_get(blp->bl_mh, NULL, &maxsdu); 2188 rw_enter(&bip->bi_rwlock, RW_READER); 2189 if (list_prev(&bip->bi_links, blp) == NULL && 2190 list_next(&bip->bi_links, blp) == NULL) { 2191 notify = (maxsdu != bmp->bm_maxsdu); 2192 bmp->bm_maxsdu = maxsdu; 2193 } 2194 blp->bl_maxsdu = maxsdu; 2195 if (maxsdu != bmp->bm_maxsdu) 2196 link_sdu_fail(blp, B_TRUE, &mlist); 2197 else if (notify) 2198 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2199 rw_exit(&bip->bi_rwlock); 2200 send_up_messages(bip, mlist); 2201 break; 2202 } 2203 } 2204 } 2205 2206 /* 2207 * This is called by the MAC layer. As with the transmit side, we're right in 2208 * the data path for all I/O on this port, so if we don't need to forward this 2209 * packet anywhere, we have to send it upwards via mac_rx_common. 2210 */ 2211 static void 2212 bridge_recv_cb(mac_handle_t mh, mac_resource_handle_t rsrc, mblk_t *mpnext) 2213 { 2214 mblk_t *mp, *mpcopy; 2215 bridge_link_t *blp = (bridge_link_t *)mh; 2216 bridge_inst_t *bip = blp->bl_inst; 2217 bridge_mac_t *bmp = bip->bi_mac; 2218 mac_header_info_t hdr_info; 2219 uint16_t vlanid, tci; 2220 boolean_t trillmode = B_FALSE; 2221 2222 KIINCR(bki_recv); 2223 KLINCR(bkl_recv); 2224 2225 /* 2226 * Regardless of state, check for inbound TRILL packets when TRILL is 2227 * active. These are pulled out of band and sent for TRILL handling. 2228 */ 2229 if (blp->bl_trilldata != NULL) { 2230 void *tdp; 2231 mblk_t *newhead; 2232 mblk_t *tail = NULL; 2233 2234 mutex_enter(&blp->bl_trilllock); 2235 if ((tdp = blp->bl_trilldata) != NULL) { 2236 blp->bl_trillthreads++; 2237 mutex_exit(&blp->bl_trilllock); 2238 trillmode = B_TRUE; 2239 newhead = mpnext; 2240 while ((mp = mpnext) != NULL) { 2241 boolean_t raw_isis, bridge_group; 2242 2243 mpnext = mp->b_next; 2244 2245 /* 2246 * If the header isn't readable, then leave on 2247 * the list and continue. 2248 */ 2249 if (mac_header_info(blp->bl_mh, mp, 2250 &hdr_info) != 0) { 2251 tail = mp; 2252 continue; 2253 } 2254 2255 /* 2256 * The TRILL document specifies that, on 2257 * Ethernet alone, IS-IS packets arrive with 2258 * LLC rather than Ethertype, and using a 2259 * specific destination address. We must check 2260 * for that here. Also, we need to give BPDUs 2261 * to TRILL for processing. 2262 */ 2263 raw_isis = bridge_group = B_FALSE; 2264 if (hdr_info.mhi_dsttype == 2265 MAC_ADDRTYPE_MULTICAST) { 2266 if (memcmp(hdr_info.mhi_daddr, 2267 all_isis_rbridges, ETHERADDRL) == 0) 2268 raw_isis = B_TRUE; 2269 else if (memcmp(hdr_info.mhi_daddr, 2270 bridge_group_address, ETHERADDRL) == 2271 0) 2272 bridge_group = B_TRUE; 2273 } 2274 if (!raw_isis && !bridge_group && 2275 hdr_info.mhi_bindsap != ETHERTYPE_TRILL && 2276 (hdr_info.mhi_bindsap != ETHERTYPE_VLAN || 2277 /* LINTED: alignment */ 2278 ((struct ether_vlan_header *)mp->b_rptr)-> 2279 ether_type != htons(ETHERTYPE_TRILL))) { 2280 tail = mp; 2281 continue; 2282 } 2283 2284 /* 2285 * We've got TRILL input. Remove from the list 2286 * and send up through the TRILL module. (Send 2287 * a copy through promiscuous receive just to 2288 * support snooping on TRILL. Order isn't 2289 * preserved strictly, but that doesn't matter 2290 * here.) 2291 */ 2292 if (tail != NULL) 2293 tail->b_next = mpnext; 2294 mp->b_next = NULL; 2295 if (mp == newhead) 2296 newhead = mpnext; 2297 mac_trill_snoop(blp->bl_mh, mp); 2298 update_header(mp, &hdr_info, B_TRUE); 2299 /* 2300 * On raw IS-IS and BPDU frames, we have to 2301 * make sure that the length is trimmed 2302 * properly. We use origsap in order to cope 2303 * with jumbograms for IS-IS. (Regular mac 2304 * can't.) 2305 */ 2306 if (raw_isis || bridge_group) { 2307 size_t msglen = msgdsize(mp); 2308 2309 if (msglen > hdr_info.mhi_origsap) { 2310 (void) adjmsg(mp, 2311 hdr_info.mhi_origsap - 2312 msglen); 2313 } else if (msglen < 2314 hdr_info.mhi_origsap) { 2315 freemsg(mp); 2316 continue; 2317 } 2318 } 2319 trill_recv_fn(tdp, blp, rsrc, mp, &hdr_info); 2320 } 2321 mpnext = newhead; 2322 mutex_enter(&blp->bl_trilllock); 2323 if (--blp->bl_trillthreads == 0 && 2324 blp->bl_trilldata == NULL) 2325 cv_broadcast(&blp->bl_trillwait); 2326 } 2327 mutex_exit(&blp->bl_trilllock); 2328 if (mpnext == NULL) 2329 return; 2330 } 2331 2332 /* 2333 * If this is a TRILL RBridge, then just check whether this link is 2334 * used at all for forwarding. If not, then we're done. 2335 */ 2336 if (trillmode) { 2337 if (!(blp->bl_flags & BLF_TRILLACTIVE) || 2338 (blp->bl_flags & BLF_SDUFAIL)) { 2339 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2340 return; 2341 } 2342 } else { 2343 /* 2344 * For regular (STP) bridges, if we're in blocking or listening 2345 * state, then do nothing. We don't learn or forward until 2346 * told to do so. 2347 */ 2348 if (blp->bl_state == BLS_BLOCKLISTEN) { 2349 mac_rx_common(blp->bl_mh, rsrc, mpnext); 2350 return; 2351 } 2352 } 2353 2354 /* 2355 * Send a copy of the message chain up to the observability node users. 2356 * For TRILL, we must obey the VLAN AF rules, so we go packet-by- 2357 * packet. 2358 */ 2359 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2360 (bmp->bm_flags & BMF_STARTED) && 2361 (mp = copymsgchain(mpnext)) != NULL) { 2362 mac_rx(bmp->bm_mh, NULL, mp); 2363 } 2364 2365 /* 2366 * We must be in learning or forwarding state, or using TRILL on a link 2367 * with one or more VLANs active. For each packet in the list, process 2368 * the source address, and then attempt to forward. 2369 */ 2370 while ((mp = mpnext) != NULL) { 2371 mpnext = mp->b_next; 2372 mp->b_next = NULL; 2373 2374 /* 2375 * If we can't decode the header or if the header specifies a 2376 * multicast source address (impossible!), then don't bother 2377 * learning or forwarding, but go ahead and forward up the 2378 * stack for subsequent processing. 2379 */ 2380 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0 || 2381 (hdr_info.mhi_saddr[0] & 1) != 0) { 2382 KIINCR(bki_drops); 2383 KLINCR(bkl_drops); 2384 mac_rx_common(blp->bl_mh, rsrc, mp); 2385 continue; 2386 } 2387 2388 /* 2389 * Extract and validate the VLAN ID for this packet. 2390 */ 2391 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2392 !BRIDGE_AF_ISSET(blp, vlanid)) { 2393 mac_rx_common(blp->bl_mh, rsrc, mp); 2394 continue; 2395 } 2396 2397 if (trillmode) { 2398 /* 2399 * Special test required by TRILL document: must 2400 * discard frames with outer address set to ESADI. 2401 */ 2402 if (memcmp(hdr_info.mhi_daddr, all_esadi_rbridges, 2403 ETHERADDRL) == 0) { 2404 mac_rx_common(blp->bl_mh, rsrc, mp); 2405 continue; 2406 } 2407 2408 /* 2409 * If we're in TRILL mode, then the call above to get 2410 * the VLAN ID has also checked that we're the 2411 * appointed forwarder, so report that we're handling 2412 * this packet to any observability node users. 2413 */ 2414 if ((bmp->bm_flags & BMF_STARTED) && 2415 (mpcopy = copymsg(mp)) != NULL) 2416 mac_rx(bmp->bm_mh, NULL, mpcopy); 2417 } 2418 2419 /* 2420 * First process the source address and learn from it. For 2421 * TRILL, we learn only if we're the appointed forwarder. 2422 */ 2423 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2424 vlanid); 2425 2426 /* 2427 * Now check whether we're forwarding and look up the 2428 * destination. If we can forward, do so. 2429 */ 2430 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2431 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2432 B_FALSE, B_FALSE); 2433 } 2434 if (mp != NULL) 2435 mac_rx_common(blp->bl_mh, rsrc, mp); 2436 } 2437 } 2438 2439 2440 /* ARGSUSED */ 2441 static mblk_t * 2442 bridge_xmit_cb(mac_handle_t mh, mac_ring_handle_t rh, mblk_t *mpnext) 2443 { 2444 bridge_link_t *blp = (bridge_link_t *)mh; 2445 bridge_inst_t *bip = blp->bl_inst; 2446 bridge_mac_t *bmp = bip->bi_mac; 2447 mac_header_info_t hdr_info; 2448 uint16_t vlanid, tci; 2449 mblk_t *mp, *mpcopy; 2450 boolean_t trillmode; 2451 2452 trillmode = blp->bl_trilldata != NULL; 2453 2454 /* 2455 * If we're using STP and we're in blocking or listening state, or if 2456 * we're using TRILL and no VLANs are active, then behave as though the 2457 * bridge isn't here at all, and send on the local link alone. 2458 */ 2459 if ((!trillmode && blp->bl_state == BLS_BLOCKLISTEN) || 2460 (trillmode && 2461 (!(blp->bl_flags & BLF_TRILLACTIVE) || 2462 (blp->bl_flags & BLF_SDUFAIL)))) { 2463 KIINCR(bki_sent); 2464 KLINCR(bkl_xmit); 2465 MAC_RING_TX(blp->bl_mh, rh, mpnext, mp); 2466 return (mp); 2467 } 2468 2469 /* 2470 * Send a copy of the message up to the observability node users. 2471 * TRILL needs to check on a packet-by-packet basis. 2472 */ 2473 if (!trillmode && blp->bl_state == BLS_FORWARDING && 2474 (bmp->bm_flags & BMF_STARTED) && 2475 (mp = copymsgchain(mpnext)) != NULL) { 2476 mac_rx(bmp->bm_mh, NULL, mp); 2477 } 2478 2479 while ((mp = mpnext) != NULL) { 2480 mpnext = mp->b_next; 2481 mp->b_next = NULL; 2482 2483 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2484 freemsg(mp); 2485 continue; 2486 } 2487 2488 /* 2489 * Extract and validate the VLAN ID for this packet. 2490 */ 2491 if (!bridge_get_vlan(blp, &hdr_info, mp, &vlanid, &tci) || 2492 !BRIDGE_AF_ISSET(blp, vlanid)) { 2493 freemsg(mp); 2494 continue; 2495 } 2496 2497 /* 2498 * If we're using TRILL, then we've now validated that we're 2499 * the forwarder for this VLAN, so go ahead and let 2500 * observability node users know about the packet. 2501 */ 2502 if (trillmode && (bmp->bm_flags & BMF_STARTED) && 2503 (mpcopy = copymsg(mp)) != NULL) { 2504 mac_rx(bmp->bm_mh, NULL, mpcopy); 2505 } 2506 2507 /* 2508 * We have to learn from our own transmitted packets, because 2509 * there may be a Solaris DLPI raw sender (who can specify his 2510 * own source address) using promiscuous mode for receive. The 2511 * mac layer information won't (and can't) tell us everything 2512 * we need to know. 2513 */ 2514 bridge_learn(blp, hdr_info.mhi_saddr, RBRIDGE_NICKNAME_NONE, 2515 vlanid); 2516 2517 /* attempt forwarding */ 2518 if (trillmode || blp->bl_state == BLS_FORWARDING) { 2519 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, 2520 B_FALSE, B_TRUE); 2521 } 2522 if (mp != NULL) { 2523 MAC_RING_TX(blp->bl_mh, rh, mp, mp); 2524 if (mp == NULL) { 2525 KIINCR(bki_sent); 2526 KLINCR(bkl_xmit); 2527 } 2528 } 2529 /* 2530 * If we get stuck, then stop. Don't let the user's output 2531 * packets get out of order. (More importantly: don't try to 2532 * bridge the same packet multiple times if flow control is 2533 * asserted.) 2534 */ 2535 if (mp != NULL) { 2536 mp->b_next = mpnext; 2537 break; 2538 } 2539 } 2540 return (mp); 2541 } 2542 2543 /* 2544 * This is called by TRILL when it decapsulates an packet, and we must forward 2545 * locally. On failure, we just drop. 2546 * 2547 * Note that the ingress_nick reported by TRILL must not represent this local 2548 * node. 2549 */ 2550 void 2551 bridge_trill_decaps(bridge_link_t *blp, mblk_t *mp, uint16_t ingress_nick) 2552 { 2553 mac_header_info_t hdr_info; 2554 uint16_t vlanid, tci; 2555 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2556 mblk_t *mpcopy; 2557 2558 if (mac_header_info(blp->bl_mh, mp, &hdr_info) != 0) { 2559 freemsg(mp); 2560 return; 2561 } 2562 2563 /* Extract VLAN ID for this packet. */ 2564 if (hdr_info.mhi_bindsap == ETHERTYPE_VLAN) { 2565 struct ether_vlan_header *evhp; 2566 2567 /* LINTED: alignment */ 2568 evhp = (struct ether_vlan_header *)mp->b_rptr; 2569 tci = ntohs(evhp->ether_tci); 2570 vlanid = VLAN_ID(tci); 2571 } else { 2572 /* Inner VLAN headers are required in TRILL data packets */ 2573 DTRACE_PROBE3(bridge__trill__decaps__novlan, bridge_link_t *, 2574 blp, mblk_t *, mp, uint16_t, ingress_nick); 2575 freemsg(mp); 2576 return; 2577 } 2578 2579 /* Learn the location of this sender in the RBridge network */ 2580 bridge_learn(blp, hdr_info.mhi_saddr, ingress_nick, vlanid); 2581 2582 /* attempt forwarding */ 2583 mp = bridge_forward(blp, &hdr_info, mp, vlanid, tci, B_TRUE, B_TRUE); 2584 if (mp != NULL) { 2585 if (bridge_can_send(blp, vlanid)) { 2586 /* Deliver a copy locally as well */ 2587 if ((mpcopy = copymsg(mp)) != NULL) 2588 mac_rx_common(blp->bl_mh, NULL, mpcopy); 2589 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2590 } 2591 if (mp == NULL) { 2592 KIINCR(bki_sent); 2593 KLINCR(bkl_xmit); 2594 } else { 2595 freemsg(mp); 2596 } 2597 } 2598 } 2599 2600 /* 2601 * This function is used by TRILL _only_ to transmit TRILL-encapsulated 2602 * packets. It sends on a single underlying link and does not bridge. 2603 */ 2604 mblk_t * 2605 bridge_trill_output(bridge_link_t *blp, mblk_t *mp) 2606 { 2607 bridge_inst_t *bip = blp->bl_inst; /* used by macros */ 2608 2609 mac_trill_snoop(blp->bl_mh, mp); 2610 MAC_RING_TX(blp->bl_mh, NULL, mp, mp); 2611 if (mp == NULL) { 2612 KIINCR(bki_sent); 2613 KLINCR(bkl_xmit); 2614 } 2615 return (mp); 2616 } 2617 2618 /* 2619 * Set the "appointed forwarder" flag array for this link. TRILL controls 2620 * forwarding on a VLAN basis. The "trillactive" flag is an optimization for 2621 * the forwarder. 2622 */ 2623 void 2624 bridge_trill_setvlans(bridge_link_t *blp, const uint8_t *arr) 2625 { 2626 int i; 2627 uint_t newflags = 0; 2628 2629 for (i = 0; i < BRIDGE_VLAN_ARR_SIZE; i++) { 2630 if ((blp->bl_afs[i] = arr[i]) != 0) 2631 newflags = BLF_TRILLACTIVE; 2632 } 2633 blp->bl_flags = (blp->bl_flags & ~BLF_TRILLACTIVE) | newflags; 2634 } 2635 2636 void 2637 bridge_trill_flush(bridge_link_t *blp, uint16_t vlan, boolean_t dotrill) 2638 { 2639 bridge_inst_t *bip = blp->bl_inst; 2640 bridge_fwd_t *bfp, *bfnext; 2641 avl_tree_t fwd_scavenge; 2642 int i; 2643 2644 _NOTE(ARGUNUSED(vlan)); 2645 2646 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 2647 offsetof(bridge_fwd_t, bf_node)); 2648 rw_enter(&bip->bi_rwlock, RW_WRITER); 2649 bfnext = avl_first(&bip->bi_fwd); 2650 while ((bfp = bfnext) != NULL) { 2651 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 2652 if (bfp->bf_flags & BFF_LOCALADDR) 2653 continue; 2654 if (dotrill) { 2655 /* port doesn't matter if we're flushing TRILL */ 2656 if (bfp->bf_trill_nick == RBRIDGE_NICKNAME_NONE) 2657 continue; 2658 } else { 2659 if (bfp->bf_trill_nick != RBRIDGE_NICKNAME_NONE) 2660 continue; 2661 for (i = 0; i < bfp->bf_nlinks; i++) { 2662 if (bfp->bf_links[i] == blp) 2663 break; 2664 } 2665 if (i >= bfp->bf_nlinks) 2666 continue; 2667 } 2668 ASSERT(bfp->bf_flags & BFF_INTREE); 2669 avl_remove(&bip->bi_fwd, bfp); 2670 bfp->bf_flags &= ~BFF_INTREE; 2671 avl_add(&fwd_scavenge, bfp); 2672 } 2673 rw_exit(&bip->bi_rwlock); 2674 bfnext = avl_first(&fwd_scavenge); 2675 while ((bfp = bfnext) != NULL) { 2676 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 2677 avl_remove(&fwd_scavenge, bfp); 2678 fwd_unref(bfp); 2679 } 2680 avl_destroy(&fwd_scavenge); 2681 } 2682 2683 /* 2684 * Let the mac module take or drop a reference to a bridge link. When this is 2685 * called, the mac module is holding the mi_bridge_lock, so the link cannot be 2686 * in the process of entering or leaving a bridge. 2687 */ 2688 static void 2689 bridge_ref_cb(mac_handle_t mh, boolean_t hold) 2690 { 2691 bridge_link_t *blp = (bridge_link_t *)mh; 2692 2693 if (hold) 2694 atomic_inc_uint(&blp->bl_refs); 2695 else 2696 link_unref(blp); 2697 } 2698 2699 /* 2700 * Handle link state changes reported by the mac layer. This acts as a filter 2701 * for link state changes: if a link is reporting down, but there are other 2702 * links still up on the bridge, then the state is changed to "up." When the 2703 * last link goes down, all are marked down, and when the first link goes up, 2704 * all are marked up. (Recursion is avoided by the use of the "redo" function.) 2705 * 2706 * We treat unknown as equivalent to "up." 2707 */ 2708 static link_state_t 2709 bridge_ls_cb(mac_handle_t mh, link_state_t newls) 2710 { 2711 bridge_link_t *blp = (bridge_link_t *)mh; 2712 bridge_link_t *blcmp; 2713 bridge_inst_t *bip; 2714 bridge_mac_t *bmp; 2715 2716 if (newls != LINK_STATE_DOWN && blp->bl_linkstate != LINK_STATE_DOWN || 2717 (blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) { 2718 blp->bl_linkstate = newls; 2719 return (newls); 2720 } 2721 2722 /* 2723 * Scan first to see if there are any other non-down links. If there 2724 * are, then we're done. Otherwise, if all others are down, then the 2725 * state of this link is the state of the bridge. 2726 */ 2727 bip = blp->bl_inst; 2728 rw_enter(&bip->bi_rwlock, RW_WRITER); 2729 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2730 blcmp = list_next(&bip->bi_links, blcmp)) { 2731 if (blcmp != blp && 2732 !(blcmp->bl_flags & (BLF_DELETED|BLF_SDUFAIL)) && 2733 blcmp->bl_linkstate != LINK_STATE_DOWN) 2734 break; 2735 } 2736 2737 if (blcmp != NULL) { 2738 /* 2739 * If there are other links that are considered up, then tell 2740 * the caller that the link is actually still up, regardless of 2741 * this link's underlying state. 2742 */ 2743 blp->bl_linkstate = newls; 2744 newls = LINK_STATE_UP; 2745 } else if (blp->bl_linkstate != newls) { 2746 /* 2747 * If we've found no other 'up' links, and this link has 2748 * changed state, then report the new state of the bridge to 2749 * all other clients. 2750 */ 2751 blp->bl_linkstate = newls; 2752 for (blcmp = list_head(&bip->bi_links); blcmp != NULL; 2753 blcmp = list_next(&bip->bi_links, blcmp)) { 2754 if (blcmp != blp && !(blcmp->bl_flags & BLF_DELETED)) 2755 mac_link_redo(blcmp->bl_mh, newls); 2756 } 2757 bmp = bip->bi_mac; 2758 if ((bmp->bm_linkstate = newls) != LINK_STATE_DOWN) 2759 bmp->bm_linkstate = LINK_STATE_UP; 2760 mac_link_redo(bmp->bm_mh, bmp->bm_linkstate); 2761 } 2762 rw_exit(&bip->bi_rwlock); 2763 return (newls); 2764 } 2765 2766 static void 2767 bridge_add_link(void *arg) 2768 { 2769 mblk_t *mp = arg; 2770 bridge_stream_t *bsp; 2771 bridge_inst_t *bip, *bipt; 2772 bridge_mac_t *bmp; 2773 datalink_id_t linkid; 2774 int err; 2775 mac_handle_t mh; 2776 uint_t maxsdu; 2777 bridge_link_t *blp = NULL, *blpt; 2778 const mac_info_t *mip; 2779 boolean_t macopen = B_FALSE; 2780 char linkname[MAXLINKNAMELEN]; 2781 char kstatname[KSTAT_STRLEN]; 2782 int i; 2783 link_state_t linkstate; 2784 mblk_t *mlist; 2785 2786 bsp = (bridge_stream_t *)mp->b_next; 2787 mp->b_next = NULL; 2788 bip = bsp->bs_inst; 2789 /* LINTED: alignment */ 2790 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2791 2792 /* 2793 * First make sure that there is no other bridge that has this link. 2794 * We don't want to overlap operations from two bridges; the MAC layer 2795 * supports only one bridge on a given MAC at a time. 2796 * 2797 * We rely on the fact that there's just one taskq thread for the 2798 * bridging module: once we've checked for a duplicate, we can drop the 2799 * lock, because no other thread could possibly be adding another link 2800 * until we're done. 2801 */ 2802 mutex_enter(&inst_lock); 2803 for (bipt = list_head(&inst_list); bipt != NULL; 2804 bipt = list_next(&inst_list, bipt)) { 2805 rw_enter(&bipt->bi_rwlock, RW_READER); 2806 for (blpt = list_head(&bipt->bi_links); blpt != NULL; 2807 blpt = list_next(&bipt->bi_links, blpt)) { 2808 if (linkid == blpt->bl_linkid) 2809 break; 2810 } 2811 rw_exit(&bipt->bi_rwlock); 2812 if (blpt != NULL) 2813 break; 2814 } 2815 mutex_exit(&inst_lock); 2816 if (bipt != NULL) { 2817 err = EBUSY; 2818 goto fail; 2819 } 2820 2821 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 2822 goto fail; 2823 macopen = B_TRUE; 2824 2825 /* we bridge only Ethernet */ 2826 mip = mac_info(mh); 2827 if (mip->mi_media != DL_ETHER) { 2828 err = ENOTSUP; 2829 goto fail; 2830 } 2831 2832 /* 2833 * Get the current maximum SDU on this interface. If there are other 2834 * links on the bridge, then this one must match, or it errors out. 2835 * Otherwise, the first link becomes the standard for the new bridge. 2836 */ 2837 mac_sdu_get(mh, NULL, &maxsdu); 2838 bmp = bip->bi_mac; 2839 if (list_is_empty(&bip->bi_links)) { 2840 bmp->bm_maxsdu = maxsdu; 2841 (void) mac_maxsdu_update(bmp->bm_mh, maxsdu); 2842 } 2843 2844 /* figure the kstat name; also used as the mac client name */ 2845 i = MBLKL(mp->b_cont) - sizeof (datalink_id_t); 2846 if (i < 0 || i >= MAXLINKNAMELEN) 2847 i = MAXLINKNAMELEN - 1; 2848 bcopy(mp->b_cont->b_rptr + sizeof (datalink_id_t), linkname, i); 2849 linkname[i] = '\0'; 2850 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", bip->bi_name, 2851 linkname); 2852 2853 if ((blp = kmem_zalloc(sizeof (*blp), KM_NOSLEEP)) == NULL) { 2854 err = ENOMEM; 2855 goto fail; 2856 } 2857 blp->bl_lfailmp = allocb(sizeof (bridge_ctl_t), BPRI_MED); 2858 if (blp->bl_lfailmp == NULL) { 2859 kmem_free(blp, sizeof (*blp)); 2860 blp = NULL; 2861 err = ENOMEM; 2862 goto fail; 2863 } 2864 2865 blp->bl_refs = 1; 2866 atomic_inc_uint(&bip->bi_refs); 2867 blp->bl_inst = bip; 2868 blp->bl_mh = mh; 2869 blp->bl_linkid = linkid; 2870 blp->bl_maxsdu = maxsdu; 2871 cv_init(&blp->bl_trillwait, NULL, CV_DRIVER, NULL); 2872 mutex_init(&blp->bl_trilllock, NULL, MUTEX_DRIVER, NULL); 2873 (void) memset(blp->bl_afs, 0xff, sizeof (blp->bl_afs)); 2874 2875 err = mac_client_open(mh, &blp->bl_mch, kstatname, 0); 2876 if (err != 0) 2877 goto fail; 2878 blp->bl_flags |= BLF_CLIENT_OPEN; 2879 2880 err = mac_margin_add(mh, &blp->bl_margin, B_TRUE); 2881 if (err != 0) 2882 goto fail; 2883 blp->bl_flags |= BLF_MARGIN_ADDED; 2884 2885 blp->bl_mnh = mac_notify_add(mh, bridge_notify_cb, blp); 2886 2887 /* Enable Bridging on the link */ 2888 err = mac_bridge_set(mh, (mac_handle_t)blp); 2889 if (err != 0) 2890 goto fail; 2891 blp->bl_flags |= BLF_SET_BRIDGE; 2892 2893 err = mac_promisc_add(blp->bl_mch, MAC_CLIENT_PROMISC_ALL, NULL, 2894 blp, &blp->bl_mphp, MAC_PROMISC_FLAGS_NO_TX_LOOP); 2895 if (err != 0) 2896 goto fail; 2897 blp->bl_flags |= BLF_PROM_ADDED; 2898 2899 bridge_new_unicst(blp); 2900 2901 blp->bl_ksp = kstat_setup((kstat_named_t *)&blp->bl_kstats, 2902 link_kstats_list, Dim(link_kstats_list), kstatname); 2903 2904 /* 2905 * The link holds a reference to the bridge instance, so that the 2906 * instance can't go away before the link is freed. The insertion into 2907 * bi_links holds a reference on the link (reference set to 1 above). 2908 * When marking as removed from bi_links (BLF_DELETED), drop the 2909 * reference on the link. When freeing the link, drop the reference on 2910 * the instance. BLF_LINK_ADDED tracks link insertion in bi_links list. 2911 */ 2912 rw_enter(&bip->bi_rwlock, RW_WRITER); 2913 list_insert_tail(&bip->bi_links, blp); 2914 blp->bl_flags |= BLF_LINK_ADDED; 2915 2916 /* 2917 * If the new link is no good on this bridge, then let the daemon know 2918 * about the problem. 2919 */ 2920 mlist = NULL; 2921 if (maxsdu != bmp->bm_maxsdu) 2922 link_sdu_fail(blp, B_TRUE, &mlist); 2923 rw_exit(&bip->bi_rwlock); 2924 send_up_messages(bip, mlist); 2925 2926 /* 2927 * Trigger a link state update so that if this link is the first one 2928 * "up" in the bridge, then we notify everyone. This triggers a trip 2929 * through bridge_ls_cb. 2930 */ 2931 linkstate = mac_stat_get(mh, MAC_STAT_LOWLINK_STATE); 2932 blp->bl_linkstate = LINK_STATE_DOWN; 2933 mac_link_update(mh, linkstate); 2934 2935 /* 2936 * We now need to report back to the stream that invoked us, and then 2937 * drop the reference on the stream that we're holding. 2938 */ 2939 miocack(bsp->bs_wq, mp, 0, 0); 2940 stream_unref(bsp); 2941 return; 2942 2943 fail: 2944 if (blp == NULL) { 2945 if (macopen) 2946 mac_close(mh); 2947 } else { 2948 link_shutdown(blp); 2949 } 2950 miocnak(bsp->bs_wq, mp, 0, err); 2951 stream_unref(bsp); 2952 } 2953 2954 static void 2955 bridge_rem_link(void *arg) 2956 { 2957 mblk_t *mp = arg; 2958 bridge_stream_t *bsp; 2959 bridge_inst_t *bip; 2960 bridge_mac_t *bmp; 2961 datalink_id_t linkid; 2962 bridge_link_t *blp, *blsave; 2963 boolean_t found; 2964 mblk_t *mlist; 2965 2966 bsp = (bridge_stream_t *)mp->b_next; 2967 mp->b_next = NULL; 2968 bip = bsp->bs_inst; 2969 /* LINTED: alignment */ 2970 linkid = *(datalink_id_t *)mp->b_cont->b_rptr; 2971 2972 /* 2973 * We become reader here so that we can loop over the other links and 2974 * deliver link up/down notification. 2975 */ 2976 rw_enter(&bip->bi_rwlock, RW_READER); 2977 found = B_FALSE; 2978 for (blp = list_head(&bip->bi_links); blp != NULL; 2979 blp = list_next(&bip->bi_links, blp)) { 2980 if (blp->bl_linkid == linkid && 2981 !(blp->bl_flags & BLF_DELETED)) { 2982 blp->bl_flags |= BLF_DELETED; 2983 (void) ddi_taskq_dispatch(bridge_taskq, link_shutdown, 2984 blp, DDI_SLEEP); 2985 found = B_TRUE; 2986 break; 2987 } 2988 } 2989 2990 /* 2991 * Check if this link is up and the remainder of the links are all 2992 * down. 2993 */ 2994 if (blp != NULL && blp->bl_linkstate != LINK_STATE_DOWN) { 2995 for (blp = list_head(&bip->bi_links); blp != NULL; 2996 blp = list_next(&bip->bi_links, blp)) { 2997 if (blp->bl_linkstate != LINK_STATE_DOWN && 2998 !(blp->bl_flags & (BLF_DELETED|BLF_SDUFAIL))) 2999 break; 3000 } 3001 if (blp == NULL) { 3002 for (blp = list_head(&bip->bi_links); blp != NULL; 3003 blp = list_next(&bip->bi_links, blp)) { 3004 if (!(blp->bl_flags & BLF_DELETED)) 3005 mac_link_redo(blp->bl_mh, 3006 LINK_STATE_DOWN); 3007 } 3008 bmp = bip->bi_mac; 3009 bmp->bm_linkstate = LINK_STATE_DOWN; 3010 mac_link_redo(bmp->bm_mh, LINK_STATE_DOWN); 3011 } 3012 } 3013 3014 /* 3015 * Check if there's just one working link left on the bridge. If so, 3016 * then that link is now authoritative for bridge MTU. 3017 */ 3018 blsave = NULL; 3019 for (blp = list_head(&bip->bi_links); blp != NULL; 3020 blp = list_next(&bip->bi_links, blp)) { 3021 if (!(blp->bl_flags & BLF_DELETED)) { 3022 if (blsave == NULL) 3023 blsave = blp; 3024 else 3025 break; 3026 } 3027 } 3028 mlist = NULL; 3029 bmp = bip->bi_mac; 3030 if (blsave != NULL && blp == NULL && 3031 blsave->bl_maxsdu != bmp->bm_maxsdu) { 3032 bmp->bm_maxsdu = blsave->bl_maxsdu; 3033 (void) mac_maxsdu_update(bmp->bm_mh, blsave->bl_maxsdu); 3034 link_sdu_fail(blsave, B_FALSE, &mlist); 3035 } 3036 rw_exit(&bip->bi_rwlock); 3037 send_up_messages(bip, mlist); 3038 3039 if (found) 3040 miocack(bsp->bs_wq, mp, 0, 0); 3041 else 3042 miocnak(bsp->bs_wq, mp, 0, ENOENT); 3043 stream_unref(bsp); 3044 } 3045 3046 /* 3047 * This function intentionally returns with bi_rwlock held; it is intended for 3048 * quick checks and updates. 3049 */ 3050 static bridge_link_t * 3051 enter_link(bridge_inst_t *bip, datalink_id_t linkid) 3052 { 3053 bridge_link_t *blp; 3054 3055 rw_enter(&bip->bi_rwlock, RW_READER); 3056 for (blp = list_head(&bip->bi_links); blp != NULL; 3057 blp = list_next(&bip->bi_links, blp)) { 3058 if (blp->bl_linkid == linkid && !(blp->bl_flags & BLF_DELETED)) 3059 break; 3060 } 3061 return (blp); 3062 } 3063 3064 static void 3065 bridge_ioctl(queue_t *wq, mblk_t *mp) 3066 { 3067 bridge_stream_t *bsp = wq->q_ptr; 3068 bridge_inst_t *bip; 3069 struct iocblk *iop; 3070 int rc = EINVAL; 3071 int len = 0; 3072 bridge_link_t *blp; 3073 cred_t *cr; 3074 3075 /* LINTED: alignment */ 3076 iop = (struct iocblk *)mp->b_rptr; 3077 3078 /* 3079 * For now, all of the bridge ioctls are privileged. 3080 */ 3081 if ((cr = msg_getcred(mp, NULL)) == NULL) 3082 cr = iop->ioc_cr; 3083 if (cr != NULL && secpolicy_net_config(cr, B_FALSE) != 0) { 3084 miocnak(wq, mp, 0, EPERM); 3085 return; 3086 } 3087 3088 switch (iop->ioc_cmd) { 3089 case BRIOC_NEWBRIDGE: { 3090 bridge_newbridge_t *bnb; 3091 3092 if (bsp->bs_inst != NULL || 3093 (rc = miocpullup(mp, sizeof (bridge_newbridge_t))) != 0) 3094 break; 3095 /* LINTED: alignment */ 3096 bnb = (bridge_newbridge_t *)mp->b_cont->b_rptr; 3097 bnb->bnb_name[MAXNAMELEN-1] = '\0'; 3098 rc = bridge_create(bnb->bnb_linkid, bnb->bnb_name, &bip, cr); 3099 if (rc != 0) 3100 break; 3101 3102 rw_enter(&bip->bi_rwlock, RW_WRITER); 3103 if (bip->bi_control != NULL) { 3104 rw_exit(&bip->bi_rwlock); 3105 bridge_unref(bip); 3106 rc = EBUSY; 3107 } else { 3108 atomic_inc_uint(&bip->bi_refs); 3109 bsp->bs_inst = bip; /* stream holds reference */ 3110 bip->bi_control = bsp; 3111 rw_exit(&bip->bi_rwlock); 3112 rc = 0; 3113 } 3114 break; 3115 } 3116 3117 case BRIOC_ADDLINK: 3118 if ((bip = bsp->bs_inst) == NULL || 3119 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3120 break; 3121 /* 3122 * We cannot perform the action in this thread, because we're 3123 * not in process context, and we may already be holding 3124 * MAC-related locks. Place the request on taskq. 3125 */ 3126 mp->b_next = (mblk_t *)bsp; 3127 stream_ref(bsp); 3128 (void) ddi_taskq_dispatch(bridge_taskq, bridge_add_link, mp, 3129 DDI_SLEEP); 3130 return; 3131 3132 case BRIOC_REMLINK: 3133 if ((bip = bsp->bs_inst) == NULL || 3134 (rc = miocpullup(mp, sizeof (datalink_id_t))) != 0) 3135 break; 3136 /* 3137 * We cannot perform the action in this thread, because we're 3138 * not in process context, and we may already be holding 3139 * MAC-related locks. Place the request on taskq. 3140 */ 3141 mp->b_next = (mblk_t *)bsp; 3142 stream_ref(bsp); 3143 (void) ddi_taskq_dispatch(bridge_taskq, bridge_rem_link, mp, 3144 DDI_SLEEP); 3145 return; 3146 3147 case BRIOC_SETSTATE: { 3148 bridge_setstate_t *bss; 3149 3150 if ((bip = bsp->bs_inst) == NULL || 3151 (rc = miocpullup(mp, sizeof (*bss))) != 0) 3152 break; 3153 /* LINTED: alignment */ 3154 bss = (bridge_setstate_t *)mp->b_cont->b_rptr; 3155 if ((blp = enter_link(bip, bss->bss_linkid)) == NULL) { 3156 rc = ENOENT; 3157 } else { 3158 rc = 0; 3159 blp->bl_state = bss->bss_state; 3160 } 3161 rw_exit(&bip->bi_rwlock); 3162 break; 3163 } 3164 3165 case BRIOC_SETPVID: { 3166 bridge_setpvid_t *bsv; 3167 3168 if ((bip = bsp->bs_inst) == NULL || 3169 (rc = miocpullup(mp, sizeof (*bsv))) != 0) 3170 break; 3171 /* LINTED: alignment */ 3172 bsv = (bridge_setpvid_t *)mp->b_cont->b_rptr; 3173 if (bsv->bsv_vlan > VLAN_ID_MAX) 3174 break; 3175 if ((blp = enter_link(bip, bsv->bsv_linkid)) == NULL) { 3176 rc = ENOENT; 3177 } else if (blp->bl_pvid == bsv->bsv_vlan) { 3178 rc = 0; 3179 } else { 3180 rc = 0; 3181 BRIDGE_VLAN_CLR(blp, blp->bl_pvid); 3182 blp->bl_pvid = bsv->bsv_vlan; 3183 if (blp->bl_pvid != 0) 3184 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3185 } 3186 rw_exit(&bip->bi_rwlock); 3187 break; 3188 } 3189 3190 case BRIOC_VLANENAB: { 3191 bridge_vlanenab_t *bve; 3192 3193 if ((bip = bsp->bs_inst) == NULL || 3194 (rc = miocpullup(mp, sizeof (*bve))) != 0) 3195 break; 3196 /* LINTED: alignment */ 3197 bve = (bridge_vlanenab_t *)mp->b_cont->b_rptr; 3198 if (bve->bve_vlan > VLAN_ID_MAX) 3199 break; 3200 if ((blp = enter_link(bip, bve->bve_linkid)) == NULL) { 3201 rc = ENOENT; 3202 } else { 3203 rc = 0; 3204 /* special case: vlan 0 means "all" */ 3205 if (bve->bve_vlan == 0) { 3206 (void) memset(blp->bl_vlans, 3207 bve->bve_onoff ? ~0 : 0, 3208 sizeof (blp->bl_vlans)); 3209 BRIDGE_VLAN_CLR(blp, 0); 3210 if (blp->bl_pvid != 0) 3211 BRIDGE_VLAN_SET(blp, blp->bl_pvid); 3212 } else if (bve->bve_vlan == blp->bl_pvid) { 3213 rc = EINVAL; 3214 } else if (bve->bve_onoff) { 3215 BRIDGE_VLAN_SET(blp, bve->bve_vlan); 3216 } else { 3217 BRIDGE_VLAN_CLR(blp, bve->bve_vlan); 3218 } 3219 } 3220 rw_exit(&bip->bi_rwlock); 3221 break; 3222 } 3223 3224 case BRIOC_FLUSHFWD: { 3225 bridge_flushfwd_t *bff; 3226 bridge_fwd_t *bfp, *bfnext; 3227 avl_tree_t fwd_scavenge; 3228 int i; 3229 3230 if ((bip = bsp->bs_inst) == NULL || 3231 (rc = miocpullup(mp, sizeof (*bff))) != 0) 3232 break; 3233 /* LINTED: alignment */ 3234 bff = (bridge_flushfwd_t *)mp->b_cont->b_rptr; 3235 rw_enter(&bip->bi_rwlock, RW_WRITER); 3236 /* This case means "all" */ 3237 if (bff->bff_linkid == DATALINK_INVALID_LINKID) { 3238 blp = NULL; 3239 } else { 3240 for (blp = list_head(&bip->bi_links); blp != NULL; 3241 blp = list_next(&bip->bi_links, blp)) { 3242 if (blp->bl_linkid == bff->bff_linkid && 3243 !(blp->bl_flags & BLF_DELETED)) 3244 break; 3245 } 3246 if (blp == NULL) { 3247 rc = ENOENT; 3248 rw_exit(&bip->bi_rwlock); 3249 break; 3250 } 3251 } 3252 avl_create(&fwd_scavenge, fwd_compare, sizeof (bridge_fwd_t), 3253 offsetof(bridge_fwd_t, bf_node)); 3254 bfnext = avl_first(&bip->bi_fwd); 3255 while ((bfp = bfnext) != NULL) { 3256 bfnext = AVL_NEXT(&bip->bi_fwd, bfp); 3257 if (bfp->bf_flags & BFF_LOCALADDR) 3258 continue; 3259 if (blp != NULL) { 3260 for (i = 0; i < bfp->bf_maxlinks; i++) { 3261 if (bfp->bf_links[i] == blp) 3262 break; 3263 } 3264 /* 3265 * If the link is there and we're excluding, 3266 * then skip. If the link is not there and 3267 * we're doing only that link, then skip. 3268 */ 3269 if ((i < bfp->bf_maxlinks) == bff->bff_exclude) 3270 continue; 3271 } 3272 ASSERT(bfp->bf_flags & BFF_INTREE); 3273 avl_remove(&bip->bi_fwd, bfp); 3274 bfp->bf_flags &= ~BFF_INTREE; 3275 avl_add(&fwd_scavenge, bfp); 3276 } 3277 rw_exit(&bip->bi_rwlock); 3278 bfnext = avl_first(&fwd_scavenge); 3279 while ((bfp = bfnext) != NULL) { 3280 bfnext = AVL_NEXT(&fwd_scavenge, bfp); 3281 avl_remove(&fwd_scavenge, bfp); 3282 fwd_unref(bfp); /* drop tree reference */ 3283 } 3284 avl_destroy(&fwd_scavenge); 3285 break; 3286 } 3287 3288 case BRIOC_TABLEMAX: 3289 if ((bip = bsp->bs_inst) == NULL || 3290 (rc = miocpullup(mp, sizeof (uint32_t))) != 0) 3291 break; 3292 /* LINTED: alignment */ 3293 bip->bi_tablemax = *(uint32_t *)mp->b_cont->b_rptr; 3294 break; 3295 } 3296 3297 if (rc == 0) 3298 miocack(wq, mp, len, 0); 3299 else 3300 miocnak(wq, mp, 0, rc); 3301 } 3302 3303 static void 3304 bridge_wput(queue_t *wq, mblk_t *mp) 3305 { 3306 switch (DB_TYPE(mp)) { 3307 case M_IOCTL: 3308 bridge_ioctl(wq, mp); 3309 break; 3310 case M_FLUSH: 3311 if (*mp->b_rptr & FLUSHW) 3312 *mp->b_rptr &= ~FLUSHW; 3313 if (*mp->b_rptr & FLUSHR) 3314 qreply(wq, mp); 3315 else 3316 freemsg(mp); 3317 break; 3318 default: 3319 freemsg(mp); 3320 break; 3321 } 3322 } 3323 3324 /* 3325 * This function allocates the main data structures for the bridge driver and 3326 * connects us into devfs. 3327 */ 3328 static void 3329 bridge_inst_init(void) 3330 { 3331 bridge_scan_interval = 5 * drv_usectohz(1000000); 3332 bridge_fwd_age = 25 * drv_usectohz(1000000); 3333 3334 rw_init(&bmac_rwlock, NULL, RW_DRIVER, NULL); 3335 list_create(&bmac_list, sizeof (bridge_mac_t), 3336 offsetof(bridge_mac_t, bm_node)); 3337 list_create(&inst_list, sizeof (bridge_inst_t), 3338 offsetof(bridge_inst_t, bi_node)); 3339 cv_init(&inst_cv, NULL, CV_DRIVER, NULL); 3340 mutex_init(&inst_lock, NULL, MUTEX_DRIVER, NULL); 3341 cv_init(&stream_ref_cv, NULL, CV_DRIVER, NULL); 3342 mutex_init(&stream_ref_lock, NULL, MUTEX_DRIVER, NULL); 3343 3344 mac_bridge_vectors(bridge_xmit_cb, bridge_recv_cb, bridge_ref_cb, 3345 bridge_ls_cb); 3346 } 3347 3348 /* 3349 * This function disconnects from devfs and destroys all data structures in 3350 * preparation for unload. It's assumed that there are no active bridge 3351 * references left at this point. 3352 */ 3353 static void 3354 bridge_inst_fini(void) 3355 { 3356 mac_bridge_vectors(NULL, NULL, NULL, NULL); 3357 if (bridge_timerid != 0) 3358 (void) untimeout(bridge_timerid); 3359 rw_destroy(&bmac_rwlock); 3360 list_destroy(&bmac_list); 3361 list_destroy(&inst_list); 3362 cv_destroy(&inst_cv); 3363 mutex_destroy(&inst_lock); 3364 cv_destroy(&stream_ref_cv); 3365 mutex_destroy(&stream_ref_lock); 3366 } 3367 3368 /* 3369 * bridge_attach() 3370 * 3371 * Description: 3372 * Attach bridge driver to the system. 3373 */ 3374 static int 3375 bridge_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 3376 { 3377 if (cmd != DDI_ATTACH) 3378 return (DDI_FAILURE); 3379 3380 if (ddi_create_minor_node(dip, BRIDGE_CTL, S_IFCHR, 0, DDI_PSEUDO, 3381 CLONE_DEV) == DDI_FAILURE) { 3382 return (DDI_FAILURE); 3383 } 3384 3385 if (dld_ioc_register(BRIDGE_IOC, bridge_ioc_list, 3386 DLDIOCCNT(bridge_ioc_list)) != 0) { 3387 ddi_remove_minor_node(dip, BRIDGE_CTL); 3388 return (DDI_FAILURE); 3389 } 3390 3391 bridge_dev_info = dip; 3392 bridge_major = ddi_driver_major(dip); 3393 bridge_taskq = ddi_taskq_create(dip, BRIDGE_DEV_NAME, 1, 3394 TASKQ_DEFAULTPRI, 0); 3395 return (DDI_SUCCESS); 3396 } 3397 3398 /* 3399 * bridge_detach() 3400 * 3401 * Description: 3402 * Detach an interface to the system. 3403 */ 3404 static int 3405 bridge_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 3406 { 3407 if (cmd != DDI_DETACH) 3408 return (DDI_FAILURE); 3409 3410 ddi_remove_minor_node(dip, NULL); 3411 ddi_taskq_destroy(bridge_taskq); 3412 bridge_dev_info = NULL; 3413 return (DDI_SUCCESS); 3414 } 3415 3416 /* 3417 * bridge_info() 3418 * 3419 * Description: 3420 * Translate "dev_t" to a pointer to the associated "dev_info_t". 3421 */ 3422 /* ARGSUSED */ 3423 static int 3424 bridge_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, 3425 void **result) 3426 { 3427 int rc; 3428 3429 switch (infocmd) { 3430 case DDI_INFO_DEVT2DEVINFO: 3431 if (bridge_dev_info == NULL) { 3432 rc = DDI_FAILURE; 3433 } else { 3434 *result = (void *)bridge_dev_info; 3435 rc = DDI_SUCCESS; 3436 } 3437 break; 3438 case DDI_INFO_DEVT2INSTANCE: 3439 *result = NULL; 3440 rc = DDI_SUCCESS; 3441 break; 3442 default: 3443 rc = DDI_FAILURE; 3444 break; 3445 } 3446 return (rc); 3447 } 3448 3449 static struct module_info bridge_modinfo = { 3450 2105, /* mi_idnum */ 3451 BRIDGE_DEV_NAME, /* mi_idname */ 3452 0, /* mi_minpsz */ 3453 16384, /* mi_maxpsz */ 3454 65536, /* mi_hiwat */ 3455 128 /* mi_lowat */ 3456 }; 3457 3458 static struct qinit bridge_rinit = { 3459 NULL, /* qi_putp */ 3460 NULL, /* qi_srvp */ 3461 bridge_open, /* qi_qopen */ 3462 bridge_close, /* qi_qclose */ 3463 NULL, /* qi_qadmin */ 3464 &bridge_modinfo, /* qi_minfo */ 3465 NULL /* qi_mstat */ 3466 }; 3467 3468 static struct qinit bridge_winit = { 3469 (int (*)())bridge_wput, /* qi_putp */ 3470 NULL, /* qi_srvp */ 3471 NULL, /* qi_qopen */ 3472 NULL, /* qi_qclose */ 3473 NULL, /* qi_qadmin */ 3474 &bridge_modinfo, /* qi_minfo */ 3475 NULL /* qi_mstat */ 3476 }; 3477 3478 static struct streamtab bridge_tab = { 3479 &bridge_rinit, /* st_rdinit */ 3480 &bridge_winit /* st_wrinit */ 3481 }; 3482 3483 /* No STREAMS perimeters; we do all our own locking */ 3484 DDI_DEFINE_STREAM_OPS(bridge_ops, nulldev, nulldev, bridge_attach, 3485 bridge_detach, nodev, bridge_info, D_NEW | D_MP, &bridge_tab, 3486 ddi_quiesce_not_supported); 3487 3488 static struct modldrv modldrv = { 3489 &mod_driverops, 3490 "bridging driver", 3491 &bridge_ops 3492 }; 3493 3494 static struct modlinkage modlinkage = { 3495 MODREV_1, 3496 (void *)&modldrv, 3497 NULL 3498 }; 3499 3500 int 3501 _init(void) 3502 { 3503 int retv; 3504 3505 mac_init_ops(NULL, BRIDGE_DEV_NAME); 3506 bridge_inst_init(); 3507 if ((retv = mod_install(&modlinkage)) != 0) 3508 bridge_inst_fini(); 3509 return (retv); 3510 } 3511 3512 int 3513 _fini(void) 3514 { 3515 int retv; 3516 3517 rw_enter(&bmac_rwlock, RW_READER); 3518 retv = list_is_empty(&bmac_list) ? 0 : EBUSY; 3519 rw_exit(&bmac_rwlock); 3520 if (retv == 0 && 3521 (retv = mod_remove(&modlinkage)) == 0) 3522 bridge_inst_fini(); 3523 return (retv); 3524 } 3525 3526 int 3527 _info(struct modinfo *modinfop) 3528 { 3529 return (mod_info(&modlinkage, modinfop)); 3530 } 3531