1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright 2024 Oxide Computer Company 26 */ 27 28 /* 29 * This module supports AF_TRILL sockets and TRILL layer-2 forwarding. 30 */ 31 32 #include <sys/strsubr.h> 33 #include <sys/socket.h> 34 #include <sys/socketvar.h> 35 #include <sys/modctl.h> 36 #include <sys/cmn_err.h> 37 #include <sys/tihdr.h> 38 #include <sys/strsun.h> 39 #include <sys/policy.h> 40 #include <sys/ethernet.h> 41 #include <sys/vlan.h> 42 #include <net/trill.h> 43 #include <net/if_dl.h> 44 #include <sys/mac.h> 45 #include <sys/mac_client.h> 46 #include <sys/mac_provider.h> 47 #include <sys/mac_client_priv.h> 48 #include <sys/sdt.h> 49 #include <sys/dls.h> 50 #include <sys/sunddi.h> 51 52 #include "trill_impl.h" 53 54 static void trill_del_all(trill_inst_t *, boolean_t); 55 static int trill_del_nick(trill_inst_t *, uint16_t, boolean_t); 56 static void trill_stop_recv(trill_sock_t *); 57 static void trill_ctrl_input(trill_sock_t *, mblk_t *, const uint8_t *, 58 uint16_t); 59 static trill_node_t *trill_node_lookup(trill_inst_t *, uint16_t); 60 static void trill_node_unref(trill_inst_t *, trill_node_t *); 61 static void trill_sock_unref(trill_sock_t *); 62 static void trill_kstats_init(trill_sock_t *, const char *); 63 64 static list_t trill_inst_list; 65 static krwlock_t trill_inst_rwlock; 66 67 static sock_lower_handle_t trill_create(int, int, int, sock_downcalls_t **, 68 uint_t *, int *, int, cred_t *); 69 70 static smod_reg_t sinfo = { 71 SOCKMOD_VERSION, 72 "trill", 73 SOCK_UC_VERSION, 74 SOCK_DC_VERSION, 75 trill_create, 76 NULL, 77 }; 78 79 /* modldrv structure */ 80 static struct modlsockmod sockmod = { 81 &mod_sockmodops, "AF_TRILL socket module", &sinfo 82 }; 83 84 /* modlinkage structure */ 85 static struct modlinkage ml = { 86 MODREV_1, 87 &sockmod, 88 NULL 89 }; 90 91 #define VALID_NICK(n) ((n) != RBRIDGE_NICKNAME_NONE && \ 92 (n) != RBRIDGE_NICKNAME_UNUSED) 93 94 static mblk_t * 95 create_trill_header(trill_sock_t *tsock, mblk_t *mp, const uint8_t *daddr, 96 boolean_t trill_hdr_ok, boolean_t multidest, uint16_t tci, 97 size_t msglen) 98 { 99 int extra_hdr_len; 100 struct ether_vlan_header *ethvlanhdr; 101 mblk_t *hdr_mp; 102 uint16_t etype; 103 104 etype = msglen > 0 ? (uint16_t)msglen : ETHERTYPE_TRILL; 105 106 /* When sending on the PVID, we must not give a VLAN ID */ 107 if (tci == tsock->ts_link->bl_pvid) 108 tci = TRILL_NO_TCI; 109 110 /* 111 * Create new Ethernet header and include additional space 112 * for writing TRILL header and/or VLAN tag. 113 */ 114 extra_hdr_len = (trill_hdr_ok ? 0 : sizeof (trill_header_t)) + 115 (tci != TRILL_NO_TCI ? sizeof (struct ether_vlan_extinfo) : 0); 116 hdr_mp = mac_header(tsock->ts_link->bl_mh, daddr, 117 tci != TRILL_NO_TCI ? ETHERTYPE_VLAN : etype, mp, extra_hdr_len); 118 if (hdr_mp == NULL) { 119 freemsg(mp); 120 return (NULL); 121 } 122 123 if (tci != TRILL_NO_TCI) { 124 /* LINTED: alignment */ 125 ethvlanhdr = (struct ether_vlan_header *)hdr_mp->b_rptr; 126 ethvlanhdr->ether_tci = htons(tci); 127 ethvlanhdr->ether_type = htons(etype); 128 hdr_mp->b_wptr += sizeof (struct ether_vlan_extinfo); 129 } 130 131 if (!trill_hdr_ok) { 132 trill_header_t *thp; 133 /* LINTED: alignment */ 134 thp = (trill_header_t *)hdr_mp->b_wptr; 135 (void) memset(thp, 0, sizeof (trill_header_t)); 136 thp->th_hopcount = TRILL_DEFAULT_HOPS; 137 thp->th_multidest = (multidest ? 1:0); 138 hdr_mp->b_wptr += sizeof (trill_header_t); 139 } 140 141 hdr_mp->b_cont = mp; 142 return (hdr_mp); 143 } 144 145 /* 146 * TRILL local recv function. TRILL data frames that should be received 147 * by the local system are decapsulated here and passed to bridging for 148 * learning and local system receive. Only called when we are the forwarder 149 * on the link (multi-dest frames) or the frame was destined for us. 150 */ 151 static void 152 trill_recv_local(trill_sock_t *tsock, mblk_t *mp, uint16_t ingressnick) 153 { 154 struct ether_header *inner_ethhdr; 155 156 /* LINTED: alignment */ 157 inner_ethhdr = (struct ether_header *)mp->b_rptr; 158 DTRACE_PROBE1(trill__recv__local, struct ether_header *, inner_ethhdr); 159 160 DB_CKSUMFLAGS(mp) = 0; 161 /* 162 * Transmit the decapsulated frame on the link via Bridging. 163 * Bridging does source address learning and appropriate forwarding. 164 */ 165 bridge_trill_decaps(tsock->ts_link, mp, ingressnick); 166 KSPINCR(tks_decap); 167 } 168 169 /* 170 * Determines the outgoing link to reach a RBridge having the given nick 171 * Assumes caller has acquired the trill instance rwlock. 172 */ 173 static trill_sock_t * 174 find_trill_link(trill_inst_t *tip, datalink_id_t linkid) 175 { 176 trill_sock_t *tsp = NULL; 177 178 ASSERT(RW_LOCK_HELD(&tip->ti_rwlock)); 179 for (tsp = list_head(&tip->ti_socklist); tsp != NULL; 180 tsp = list_next(&tip->ti_socklist, tsp)) { 181 if (tsp->ts_link != NULL && tsp->ts_link->bl_linkid == linkid) { 182 ASSERT(tsp->ts_link->bl_mh != NULL); 183 ASSERT(!(tsp->ts_flags & TSF_SHUTDOWN)); 184 atomic_inc_uint(&tsp->ts_refs); 185 break; 186 } 187 } 188 return (tsp); 189 } 190 191 /* 192 * TRILL destination forwarding function. Transmits the TRILL data packet 193 * to the next-hop, adjacent RBridge. Consumes passed mblk_t. 194 */ 195 static void 196 trill_dest_fwd(trill_inst_t *tip, mblk_t *fwd_mp, uint16_t adj_nick, 197 boolean_t has_trill_hdr, boolean_t multidest, uint16_t dtnick) 198 { 199 trill_node_t *adj; 200 trill_sock_t *tsock = NULL; 201 trill_header_t *trillhdr; 202 struct ether_header *ethhdr; 203 int ethtype; 204 int ethhdrlen; 205 206 adj = trill_node_lookup(tip, adj_nick); 207 if (adj == NULL || ((tsock = adj->tn_tsp) == NULL)) 208 goto dest_fwd_fail; 209 210 ASSERT(tsock->ts_link != NULL); 211 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 212 ASSERT(adj->tn_ni != NULL); 213 214 DTRACE_PROBE3(trill__dest__fwd, uint16_t, adj_nick, trill_node_t, 215 adj, trill_sock_t, tsock); 216 217 /* 218 * For broadcast links by using the dest address of 219 * the RBridge to forward the frame should result in 220 * savings. When the link is a bridged LAN or there are 221 * many end stations the frame will not always be flooded. 222 */ 223 fwd_mp = create_trill_header(tsock, fwd_mp, adj->tn_ni->tni_adjsnpa, 224 has_trill_hdr, multidest, tsock->ts_desigvlan, 0); 225 if (fwd_mp == NULL) 226 goto dest_fwd_fail; 227 228 /* LINTED: alignment */ 229 ethhdr = (struct ether_header *)fwd_mp->b_rptr; 230 ethtype = ntohs(ethhdr->ether_type); 231 ASSERT(ethtype == ETHERTYPE_VLAN || ethtype == ETHERTYPE_TRILL); 232 233 /* Pullup Ethernet and TRILL header (w/o TRILL options) */ 234 ethhdrlen = sizeof (struct ether_header) + 235 (ethtype == ETHERTYPE_VLAN ? sizeof (struct ether_vlan_extinfo):0); 236 if (!pullupmsg(fwd_mp, ethhdrlen + sizeof (trill_header_t))) 237 goto dest_fwd_fail; 238 /* LINTED: alignment */ 239 trillhdr = (struct trill_header *)(fwd_mp->b_rptr + ethhdrlen); 240 241 /* Update TRILL header with ingress and egress nicks for new frames */ 242 if (!has_trill_hdr) { 243 /* We are creating a new TRILL frame */ 244 trillhdr->th_egressnick = (multidest ? dtnick:adj_nick); 245 rw_enter(&tip->ti_rwlock, RW_READER); 246 trillhdr->th_ingressnick = tip->ti_nick; 247 rw_exit(&tip->ti_rwlock); 248 if (!VALID_NICK(trillhdr->th_ingressnick)) 249 goto dest_fwd_fail; 250 } 251 252 /* Set hop count and update header in packet */ 253 ASSERT(trillhdr->th_hopcount != 0); 254 trillhdr->th_hopcount--; 255 256 /* Clear checksum flag and transmit frame on the link */ 257 DB_CKSUMFLAGS(fwd_mp) = 0; 258 DTRACE_PROBE1(trill__dest__fwd__tx, trill_header_t *, &trillhdr); 259 fwd_mp = bridge_trill_output(tsock->ts_link, fwd_mp); 260 if (fwd_mp == NULL) { 261 KSPINCR(tks_sent); 262 KSPINCR(tks_forward); 263 } else { 264 freemsg(fwd_mp); 265 KSPINCR(tks_drops); 266 } 267 trill_node_unref(tip, adj); 268 return; 269 270 dest_fwd_fail: 271 if (adj != NULL) 272 trill_node_unref(tip, adj); 273 if (tsock != NULL) 274 KSPINCR(tks_drops); 275 freemsg(fwd_mp); 276 } 277 278 /* 279 * TRILL multi-destination forwarding. Transmits the packet to the adjacencies 280 * on the distribution tree determined by the egress nick. Source addr (saddr) 281 * is NULL for new TRILL packets originating from us. 282 */ 283 static void 284 trill_multidest_fwd(trill_inst_t *tip, mblk_t *mp, uint16_t egressnick, 285 uint16_t ingressnick, boolean_t is_trill_pkt, const uint8_t *saddr, 286 int inner_vlan, boolean_t free_mblk) 287 { 288 int idx; 289 uint16_t adjnick; 290 trill_node_t *dest; 291 trill_node_t *adj; 292 mblk_t *fwd_mp; 293 boolean_t nicksaved = B_FALSE; 294 uint16_t adjnicksaved; 295 296 /* Lookup the egress nick info, this is the DT root */ 297 if ((dest = trill_node_lookup(tip, egressnick)) == NULL) 298 goto fail_multidest_fwd; 299 300 /* Send a copy to all our adjacencies on the DT root */ 301 ASSERT(dest->tn_ni); 302 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) { 303 304 /* Check for a valid adjacency node */ 305 adjnick = TNI_ADJNICK(dest->tn_ni, idx); 306 if (!VALID_NICK(adjnick) || ingressnick == adjnick || 307 ((adj = trill_node_lookup(tip, adjnick)) == NULL)) 308 continue; 309 310 /* Do not forward back to adjacency that sent the pkt to us */ 311 ASSERT(adj->tn_ni != NULL); 312 if ((saddr != NULL) && 313 (memcmp(adj->tn_ni->tni_adjsnpa, saddr, 314 ETHERADDRL) == 0)) { 315 trill_node_unref(tip, adj); 316 continue; 317 } 318 319 /* Check if adj is marked as reaching inner VLAN downstream */ 320 if ((inner_vlan != VLAN_ID_NONE) && 321 !TRILL_VLANISSET(TNI_VLANFILTERMAP(dest->tn_ni, idx), 322 inner_vlan)) { 323 trill_node_unref(tip, adj); 324 DTRACE_PROBE4(trill__multi__dest__fwd__vlanfiltered, 325 uint16_t, adjnick, uint16_t, ingressnick, 326 uint16_t, egressnick, int, inner_vlan); 327 continue; 328 } 329 330 trill_node_unref(tip, adj); 331 332 /* 333 * Save the nick and look ahead to see if we should forward the 334 * frame to more adjacencies. We avoid doing a copy for this 335 * nick and use the passed mblk when we can consume the passed 336 * mblk. 337 */ 338 if (free_mblk && !nicksaved) { 339 adjnicksaved = adjnick; 340 nicksaved = B_TRUE; 341 continue; 342 } 343 344 fwd_mp = copymsg(mp); 345 if (fwd_mp == NULL) 346 break; 347 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t, 348 adjnick, uint16_t, ingressnick); 349 trill_dest_fwd(tip, fwd_mp, adjnick, is_trill_pkt, 350 B_TRUE, egressnick); 351 } 352 trill_node_unref(tip, dest); 353 354 if (nicksaved) { 355 ASSERT(free_mblk); 356 DTRACE_PROBE2(trill__multi__dest__fwd, uint16_t, 357 adjnicksaved, uint16_t, ingressnick); 358 trill_dest_fwd(tip, mp, adjnicksaved, is_trill_pkt, 359 B_TRUE, egressnick); 360 return; 361 } 362 363 fail_multidest_fwd: 364 DTRACE_PROBE2(trill__multi__dest__fwd__fail, uint16_t, 365 egressnick, uint16_t, ingressnick); 366 if (free_mblk) { 367 freemsg(mp); 368 } 369 } 370 371 /* 372 * TRILL data receive function. Forwards the received frame if necessary 373 * and also determines if the received frame should be consumed locally. 374 * Consumes passed mblk. 375 */ 376 static void 377 trill_recv(trill_sock_t *tsock, mblk_t *mp, const uint8_t *mpsaddr) 378 { 379 trill_header_t *trillhdr; 380 trill_node_t *dest = NULL; 381 trill_node_t *source = NULL; 382 trill_node_t *adj; 383 uint16_t ournick, adjnick, treeroot; 384 struct ether_header *ethhdr; 385 trill_inst_t *tip = tsock->ts_tip; 386 uint8_t srcaddr[ETHERADDRL]; 387 size_t trillhdrlen; 388 int inner_vlan = VLAN_ID_NONE; 389 int tci; 390 int idx; 391 size_t min_size; 392 393 /* Copy Ethernet source address before modifying packet */ 394 (void) memcpy(srcaddr, mpsaddr, ETHERADDRL); 395 396 /* Pull up TRILL header if necessary. */ 397 min_size = sizeof (trill_header_t); 398 if ((MBLKL(mp) < min_size || 399 !IS_P2ALIGNED(mp->b_rptr, TRILL_HDR_ALIGN)) && 400 !pullupmsg(mp, min_size)) 401 goto fail; 402 403 /* LINTED: alignment */ 404 trillhdr = (trill_header_t *)mp->b_rptr; 405 if (trillhdr->th_version != TRILL_PROTOCOL_VERS) { 406 DTRACE_PROBE1(trill__recv__wrongversion, 407 trill_header_t *, trillhdr); 408 goto fail; 409 } 410 411 /* Drop if unknown or invalid nickname */ 412 if (!VALID_NICK(trillhdr->th_egressnick) || 413 !VALID_NICK(trillhdr->th_ingressnick)) { 414 DTRACE_PROBE1(trill__recv__invalidnick, 415 trill_header_t *, trillhdr); 416 goto fail; 417 } 418 419 rw_enter(&tip->ti_rwlock, RW_READER); 420 ournick = tip->ti_nick; 421 treeroot = tip->ti_treeroot; 422 rw_exit(&tip->ti_rwlock); 423 /* Drop if we received a packet with our nick as ingress */ 424 if (trillhdr->th_ingressnick == ournick) 425 goto fail; 426 427 /* Re-pull any TRILL options and inner Ethernet header */ 428 min_size += GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t) + 429 sizeof (struct ether_header); 430 if (MBLKL(mp) < min_size) { 431 if (!pullupmsg(mp, min_size)) 432 goto fail; 433 /* LINTED: alignment */ 434 trillhdr = (trill_header_t *)mp->b_rptr; 435 } 436 trillhdrlen = sizeof (trill_header_t) + 437 (GET_TRILL_OPTS_LEN(trillhdr) * sizeof (uint32_t)); 438 439 /* 440 * Get the inner Ethernet header, plus the inner VLAN header if there 441 * is one. 442 */ 443 /* LINTED: alignment */ 444 ethhdr = (struct ether_header *)(mp->b_rptr + trillhdrlen); 445 if (ethhdr->ether_type == htons(ETHERTYPE_VLAN)) { 446 min_size += sizeof (struct ether_vlan_extinfo); 447 if (MBLKL(mp) < min_size) { 448 if (!pullupmsg(mp, min_size)) 449 goto fail; 450 /* LINTED: alignment */ 451 trillhdr = (trill_header_t *)mp->b_rptr; 452 /* LINTED: alignment */ 453 ethhdr = (struct ether_header *)(mp->b_rptr + 454 trillhdrlen); 455 } 456 457 tci = ntohs(((struct ether_vlan_header *)ethhdr)->ether_tci); 458 inner_vlan = VLAN_ID(tci); 459 } 460 461 /* Known/single destination forwarding. */ 462 if (!trillhdr->th_multidest) { 463 464 /* Inner MacDA must be unicast */ 465 if (ethhdr->ether_dhost.ether_addr_octet[0] & 1) 466 goto fail; 467 468 /* Ingress and Egress nicks must be different */ 469 if (trillhdr->th_egressnick == trillhdr->th_ingressnick) 470 goto fail; 471 472 DTRACE_PROBE1(trill__recv__singledest, 473 trill_header_t *, trillhdr); 474 if (trillhdr->th_egressnick == ournick) { 475 mp->b_rptr += trillhdrlen; 476 trill_recv_local(tsock, mp, trillhdr->th_ingressnick); 477 } else if (trillhdr->th_hopcount > 0) { 478 trill_dest_fwd(tip, mp, trillhdr->th_egressnick, 479 B_TRUE, B_FALSE, RBRIDGE_NICKNAME_NONE); 480 } else { 481 goto fail; 482 } 483 return; 484 } 485 486 /* 487 * Multi-destination frame: perform checks verifying we have 488 * received a valid multi-destination frame before receiving the 489 * frame locally and forwarding the frame to other RBridges. 490 * 491 * Check if we received this multi-destination frame on a 492 * adjacency in the distribution tree indicated by the frame's 493 * egress nickname. 494 */ 495 if ((dest = trill_node_lookup(tip, trillhdr->th_egressnick)) == NULL) 496 goto fail; 497 for (idx = 0; idx < dest->tn_ni->tni_adjcount; idx++) { 498 adjnick = TNI_ADJNICK(dest->tn_ni, idx); 499 if ((adj = trill_node_lookup(tip, adjnick)) == NULL) 500 continue; 501 if (memcmp(adj->tn_ni->tni_adjsnpa, srcaddr, ETHERADDRL) == 0) { 502 trill_node_unref(tip, adj); 503 break; 504 } 505 trill_node_unref(tip, adj); 506 } 507 508 if (idx >= dest->tn_ni->tni_adjcount) { 509 DTRACE_PROBE2(trill__recv__multidest__adjcheckfail, 510 trill_header_t *, trillhdr, trill_node_t *, dest); 511 goto fail; 512 } 513 514 /* 515 * Reverse path forwarding check. Check if the ingress RBridge 516 * that has forwarded the frame advertised the use of the 517 * distribution tree specified in the egress nick. 518 */ 519 if ((source = trill_node_lookup(tip, trillhdr->th_ingressnick)) == NULL) 520 goto fail; 521 for (idx = 0; idx < source->tn_ni->tni_dtrootcount; idx++) { 522 if (TNI_DTROOTNICK(source->tn_ni, idx) == 523 trillhdr->th_egressnick) 524 break; 525 } 526 527 if (idx >= source->tn_ni->tni_dtrootcount) { 528 /* 529 * Allow receipt of forwarded frame with the highest 530 * tree root RBridge as the egress RBridge when the 531 * ingress RBridge has not advertised the use of any 532 * distribution trees. 533 */ 534 if (source->tn_ni->tni_dtrootcount != 0 || 535 trillhdr->th_egressnick != treeroot) { 536 DTRACE_PROBE3( 537 trill__recv__multidest__rpfcheckfail, 538 trill_header_t *, trillhdr, trill_node_t *, 539 source, trill_inst_t *, tip); 540 goto fail; 541 } 542 } 543 544 /* Check hop count before doing any forwarding */ 545 if (trillhdr->th_hopcount == 0) 546 goto fail; 547 548 /* Forward frame using the distribution tree specified by egress nick */ 549 DTRACE_PROBE2(trill__recv__multidest, trill_header_t *, 550 trillhdr, trill_node_t *, source); 551 trill_node_unref(tip, source); 552 trill_node_unref(tip, dest); 553 554 /* Tell forwarding not to free if we're the link forwarder. */ 555 trill_multidest_fwd(tip, mp, trillhdr->th_egressnick, 556 trillhdr->th_ingressnick, B_TRUE, srcaddr, inner_vlan, 557 B_FALSE); 558 559 /* 560 * Send de-capsulated frame locally if we are the link forwarder (also 561 * does bridge learning). 562 */ 563 mp->b_rptr += trillhdrlen; 564 trill_recv_local(tsock, mp, trillhdr->th_ingressnick); 565 KSPINCR(tks_recv); 566 return; 567 568 fail: 569 DTRACE_PROBE2(trill__recv__multidest__fail, mblk_t *, mp, 570 trill_sock_t *, tsock); 571 if (dest != NULL) 572 trill_node_unref(tip, dest); 573 if (source != NULL) 574 trill_node_unref(tip, source); 575 freemsg(mp); 576 KSPINCR(tks_drops); 577 } 578 579 static void 580 trill_stop_recv(trill_sock_t *tsock) 581 { 582 mutex_enter(&tsock->ts_socklock); 583 stop_retry: 584 if (tsock->ts_state == TS_UNBND || tsock->ts_link == NULL) { 585 mutex_exit(&tsock->ts_socklock); 586 return; 587 } 588 589 /* 590 * If another thread is closing the socket then wait. Our callers 591 * expect us to return only after the socket is closed. 592 */ 593 if (tsock->ts_flags & TSF_CLOSEWAIT) { 594 cv_wait(&tsock->ts_sockclosewait, &tsock->ts_socklock); 595 goto stop_retry; 596 } 597 598 /* 599 * Set state and flags to block new bind or close calls 600 * while we close the socket. 601 */ 602 tsock->ts_flags |= TSF_CLOSEWAIT; 603 604 /* Wait until all AF_TRILL socket transmit operations are done */ 605 while (tsock->ts_sockthreadcount > 0) 606 cv_wait(&tsock->ts_sockthreadwait, &tsock->ts_socklock); 607 608 /* 609 * We are guaranteed to be the only thread closing on the 610 * socket while the TSF_CLOSEWAIT flag is set, all others cv_wait 611 * for us to finish. 612 */ 613 ASSERT(tsock->ts_link != NULL); 614 if (tsock->ts_ksp != NULL) 615 kstat_delete(tsock->ts_ksp); 616 617 /* 618 * Release lock before bridge_trill_lnunref to prevent deadlock 619 * between trill_ctrl_input thread waiting to acquire ts_socklock 620 * and bridge_trill_lnunref waiting for the trill thread to finish. 621 */ 622 mutex_exit(&tsock->ts_socklock); 623 624 /* 625 * Release TRILL link reference from Bridging. On return from 626 * bridge_trill_lnunref we can be sure there are no active TRILL data 627 * threads for this link. 628 */ 629 bridge_trill_lnunref(tsock->ts_link); 630 631 /* Set socket as unbound & wakeup threads waiting for socket to close */ 632 mutex_enter(&tsock->ts_socklock); 633 ASSERT(tsock->ts_link != NULL); 634 tsock->ts_link = NULL; 635 tsock->ts_state = TS_UNBND; 636 tsock->ts_flags &= ~TSF_CLOSEWAIT; 637 cv_broadcast(&tsock->ts_sockclosewait); 638 mutex_exit(&tsock->ts_socklock); 639 } 640 641 static int 642 trill_start_recv(trill_sock_t *tsock, const struct sockaddr *sa, socklen_t len) 643 { 644 struct sockaddr_dl *lladdr = (struct sockaddr_dl *)sa; 645 datalink_id_t linkid; 646 int err = 0; 647 648 if (len != sizeof (*lladdr)) 649 return (EINVAL); 650 651 mutex_enter(&tsock->ts_socklock); 652 if (tsock->ts_tip == NULL || tsock->ts_state != TS_UNBND) { 653 err = EINVAL; 654 goto bind_error; 655 } 656 657 if (tsock->ts_flags & TSF_CLOSEWAIT || tsock->ts_link != NULL) { 658 err = EBUSY; 659 goto bind_error; 660 } 661 662 (void) memcpy(&(tsock->ts_lladdr), lladdr, 663 sizeof (struct sockaddr_dl)); 664 (void) memcpy(&linkid, tsock->ts_lladdr.sdl_data, 665 sizeof (datalink_id_t)); 666 667 tsock->ts_link = bridge_trill_lnref(tsock->ts_tip->ti_binst, 668 linkid, tsock); 669 if (tsock->ts_link == NULL) { 670 err = EINVAL; 671 goto bind_error; 672 } 673 674 trill_kstats_init(tsock, tsock->ts_tip->ti_bridgename); 675 tsock->ts_state = TS_IDLE; 676 677 bind_error: 678 mutex_exit(&tsock->ts_socklock); 679 return (err); 680 } 681 682 static int 683 trill_do_unbind(trill_sock_t *tsock) 684 { 685 /* If a bind has not been done, we can't unbind. */ 686 if (tsock->ts_state != TS_IDLE) 687 return (EINVAL); 688 689 trill_stop_recv(tsock); 690 return (0); 691 } 692 693 static void 694 trill_instance_unref(trill_inst_t *tip) 695 { 696 rw_enter(&trill_inst_rwlock, RW_WRITER); 697 rw_enter(&tip->ti_rwlock, RW_WRITER); 698 if (atomic_dec_uint_nv(&tip->ti_refs) == 0) { 699 list_remove(&trill_inst_list, tip); 700 rw_exit(&tip->ti_rwlock); 701 rw_exit(&trill_inst_rwlock); 702 if (tip->ti_binst != NULL) 703 bridge_trill_brunref(tip->ti_binst); 704 list_destroy(&tip->ti_socklist); 705 rw_destroy(&tip->ti_rwlock); 706 kmem_free(tip, sizeof (*tip)); 707 } else { 708 rw_exit(&tip->ti_rwlock); 709 rw_exit(&trill_inst_rwlock); 710 } 711 } 712 713 /* 714 * This is called when the bridge module receives a TRILL-encapsulated packet 715 * on a given link or a packet identified as "TRILL control." We must verify 716 * that it's for us (it almost certainly will be), and then either decapsulate 717 * (if it's to our nickname), forward (if it's to someone else), or send up one 718 * of the sockets (if it's control traffic). 719 * 720 * Sadly, on Ethernet, the control traffic is identified by Outer.MacDA, and 721 * not by TRILL header information. 722 */ 723 static void 724 trill_recv_pkt_cb(void *lptr, bridge_link_t *blp, mac_resource_handle_t rsrc, 725 mblk_t *mp, mac_header_info_t *hdr_info) 726 { 727 trill_sock_t *tsock = lptr; 728 729 _NOTE(ARGUNUSED(rsrc)); 730 731 ASSERT(tsock->ts_tip != NULL); 732 ASSERT(tsock->ts_link != NULL); 733 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 734 735 /* 736 * Only receive packet if the source address is not multicast (which is 737 * bogus). 738 */ 739 if (hdr_info->mhi_saddr[0] & 1) 740 goto discard; 741 742 /* 743 * Check if this is our own packet reflected back. It should not be. 744 */ 745 if (bcmp(hdr_info->mhi_saddr, blp->bl_local_mac, ETHERADDRL) == 0) 746 goto discard; 747 748 /* Only receive unicast packet if addressed to us */ 749 if (hdr_info->mhi_dsttype == MAC_ADDRTYPE_UNICAST && 750 bcmp(hdr_info->mhi_daddr, blp->bl_local_mac, ETHERADDRL) != 0) 751 goto discard; 752 753 if (hdr_info->mhi_bindsap == ETHERTYPE_TRILL) { 754 /* TRILL data packets */ 755 trill_recv(tsock, mp, hdr_info->mhi_saddr); 756 } else { 757 /* Design constraint for cheap IS-IS/BPDU comparison */ 758 ASSERT(all_isis_rbridges[4] != bridge_group_address[4]); 759 /* Send received control packet upstream */ 760 trill_ctrl_input(tsock, mp, hdr_info->mhi_saddr, 761 hdr_info->mhi_daddr[4] == all_isis_rbridges[4] ? 762 hdr_info->mhi_tci : TRILL_TCI_BPDU); 763 } 764 765 return; 766 767 discard: 768 freemsg(mp); 769 KSPINCR(tks_drops); 770 } 771 772 /* 773 * This is called when the bridge module discovers that the destination address 774 * for a packet is not local -- it's through some remote node. We must verify 775 * that the remote node isn't our nickname (it shouldn't be), add a TRILL 776 * header, and then use the IS-IS data to determine which link and which 777 * next-hop RBridge should be used for output. We then transmit on that link. 778 * 779 * The egress_nick is RBRIDGE_NICKNAME_NONE for the "unknown destination" case. 780 */ 781 static void 782 trill_encap_pkt_cb(void *lptr, bridge_link_t *blp, mac_header_info_t *hdr_info, 783 mblk_t *mp, uint16_t egress_nick) 784 { 785 uint16_t ournick; 786 uint16_t dtnick; 787 trill_node_t *self = NULL; 788 trill_sock_t *tsock = lptr; 789 trill_inst_t *tip = tsock->ts_tip; 790 int vlan = VLAN_ID_NONE; 791 792 _NOTE(ARGUNUSED(blp)); 793 ASSERT(hdr_info->mhi_bindsap != ETHERTYPE_TRILL); 794 795 /* egress_nick = RBRIDGE_NICKNAME_NONE is valid */ 796 if (egress_nick != RBRIDGE_NICKNAME_NONE && !VALID_NICK(egress_nick)) 797 goto discard; 798 799 /* Check if our own nick is valid before we do any forwarding */ 800 rw_enter(&tip->ti_rwlock, RW_READER); 801 ournick = tip->ti_nick; 802 dtnick = tip->ti_treeroot; 803 rw_exit(&tip->ti_rwlock); 804 if (!VALID_NICK(ournick)) 805 goto discard; 806 807 /* 808 * For Multi-Destination forwarding determine our choice of 809 * root distribution tree. If we didn't choose a distribution 810 * tree (dtroots_count=0) then we use the highest priority tree 811 * root (t_treeroot) else we drop the packet without forwarding. 812 */ 813 if (egress_nick == RBRIDGE_NICKNAME_NONE) { 814 if ((self = trill_node_lookup(tip, ournick)) == NULL) 815 goto discard; 816 817 /* 818 * Use the first DT configured for now. In future we 819 * should have DT selection code here. 820 */ 821 if (self->tn_ni->tni_dtrootcount > 0) { 822 dtnick = TNI_DTROOTNICK(self->tn_ni, 0); 823 } 824 825 trill_node_unref(tip, self); 826 if (!VALID_NICK(dtnick)) { 827 DTRACE_PROBE(trill__fwd__packet__nodtroot); 828 goto discard; 829 } 830 } 831 832 /* 833 * Retrieve VLAN ID of the native frame used for VLAN 834 * pruning of multi-destination frames. 835 */ 836 if (hdr_info->mhi_istagged) { 837 vlan = VLAN_ID(hdr_info->mhi_tci); 838 } 839 840 DTRACE_PROBE2(trill__fwd__packet, mac_header_info_t *, hdr_info, 841 uint16_t, egress_nick); 842 if (egress_nick == RBRIDGE_NICKNAME_NONE) { 843 trill_multidest_fwd(tip, mp, dtnick, 844 ournick, B_FALSE, NULL, vlan, B_TRUE); 845 } else { 846 trill_dest_fwd(tip, mp, egress_nick, B_FALSE, B_FALSE, 847 RBRIDGE_NICKNAME_NONE); 848 } 849 KSPINCR(tks_encap); 850 return; 851 852 discard: 853 freemsg(mp); 854 } 855 856 /* 857 * This is called when the bridge module has completely torn down a bridge 858 * instance and all of the attached links. We need to make the TRILL instance 859 * go away at this point. 860 */ 861 static void 862 trill_br_dstr_cb(void *bptr, bridge_inst_t *bip) 863 { 864 trill_inst_t *tip = bptr; 865 866 _NOTE(ARGUNUSED(bip)); 867 rw_enter(&tip->ti_rwlock, RW_WRITER); 868 if (tip->ti_binst != NULL) 869 bridge_trill_brunref(tip->ti_binst); 870 tip->ti_binst = NULL; 871 rw_exit(&tip->ti_rwlock); 872 } 873 874 /* 875 * This is called when the bridge module is tearing down a link, but before the 876 * actual tear-down starts. When this function returns, we must make sure that 877 * we will not initiate any new transmits on this link. 878 */ 879 static void 880 trill_ln_dstr_cb(void *lptr, bridge_link_t *blp) 881 { 882 trill_sock_t *tsock = lptr; 883 884 _NOTE(ARGUNUSED(blp)); 885 trill_stop_recv(tsock); 886 } 887 888 static void 889 trill_init(void) 890 { 891 list_create(&trill_inst_list, sizeof (trill_inst_t), 892 offsetof(trill_inst_t, ti_instnode)); 893 rw_init(&trill_inst_rwlock, NULL, RW_DRIVER, NULL); 894 bridge_trill_register_cb(trill_recv_pkt_cb, trill_encap_pkt_cb, 895 trill_br_dstr_cb, trill_ln_dstr_cb); 896 } 897 898 static void 899 trill_fini(void) 900 { 901 bridge_trill_register_cb(NULL, NULL, NULL, NULL); 902 rw_destroy(&trill_inst_rwlock); 903 list_destroy(&trill_inst_list); 904 } 905 906 /* Loadable module configuration entry points */ 907 int 908 _init(void) 909 { 910 int rc; 911 912 trill_init(); 913 if ((rc = mod_install(&ml)) != 0) 914 trill_fini(); 915 return (rc); 916 } 917 918 int 919 _info(struct modinfo *modinfop) 920 { 921 return (mod_info(&ml, modinfop)); 922 } 923 924 int 925 _fini(void) 926 { 927 int rc; 928 929 rw_enter(&trill_inst_rwlock, RW_READER); 930 rc = list_is_empty(&trill_inst_list) ? 0 : EBUSY; 931 rw_exit(&trill_inst_rwlock); 932 if (rc == 0 && ((rc = mod_remove(&ml)) == 0)) 933 trill_fini(); 934 return (rc); 935 } 936 937 static void 938 trill_kstats_init(trill_sock_t *tsock, const char *bname) 939 { 940 int i; 941 char kstatname[KSTAT_STRLEN]; 942 kstat_named_t *knt; 943 static const char *sock_kstats_list[] = { TRILL_KSSOCK_NAMES }; 944 char link_name[MAXNAMELEN]; 945 int num; 946 int err; 947 948 bzero(link_name, sizeof (link_name)); 949 if ((err = dls_mgmt_get_linkinfo(tsock->ts_link->bl_linkid, link_name, 950 NULL, NULL, NULL)) != 0) { 951 cmn_err(CE_WARN, "%s: trill_kstats_init: error %d retrieving" 952 " linkinfo for linkid:%d", "trill", err, 953 tsock->ts_link->bl_linkid); 954 return; 955 } 956 957 bzero(kstatname, sizeof (kstatname)); 958 (void) snprintf(kstatname, sizeof (kstatname), "%s-%s", 959 bname, link_name); 960 961 num = sizeof (sock_kstats_list) / sizeof (*sock_kstats_list); 962 for (i = 0; i < num; i++) { 963 knt = (kstat_named_t *)&(tsock->ts_kstats); 964 kstat_named_init(&knt[i], sock_kstats_list[i], 965 KSTAT_DATA_UINT64); 966 } 967 968 tsock->ts_ksp = kstat_create_zone("trill", 0, kstatname, "sock", 969 KSTAT_TYPE_NAMED, num, KSTAT_FLAG_VIRTUAL, GLOBAL_ZONEID); 970 if (tsock->ts_ksp != NULL) { 971 tsock->ts_ksp->ks_data = &tsock->ts_kstats; 972 kstat_install(tsock->ts_ksp); 973 } 974 } 975 976 static trill_sock_t * 977 trill_do_open(int flags) 978 { 979 trill_sock_t *tsock; 980 int kmflag = ((flags & SOCKET_NOSLEEP)) ? KM_NOSLEEP:KM_SLEEP; 981 982 tsock = kmem_zalloc(sizeof (trill_sock_t), kmflag); 983 if (tsock != NULL) { 984 tsock->ts_state = TS_UNBND; 985 tsock->ts_refs++; 986 mutex_init(&tsock->ts_socklock, NULL, MUTEX_DRIVER, NULL); 987 cv_init(&tsock->ts_sockthreadwait, NULL, CV_DRIVER, NULL); 988 cv_init(&tsock->ts_sockclosewait, NULL, CV_DRIVER, NULL); 989 } 990 return (tsock); 991 } 992 993 static int 994 trill_find_bridge(trill_sock_t *tsock, const char *bname, boolean_t can_create) 995 { 996 trill_inst_t *tip, *newtip = NULL; 997 998 /* Allocate some memory (speculatively) before taking locks */ 999 if (can_create) 1000 newtip = kmem_zalloc(sizeof (*tip), KM_NOSLEEP); 1001 1002 rw_enter(&trill_inst_rwlock, RW_WRITER); 1003 for (tip = list_head(&trill_inst_list); tip != NULL; 1004 tip = list_next(&trill_inst_list, tip)) { 1005 if (strcmp(tip->ti_bridgename, bname) == 0) 1006 break; 1007 } 1008 if (tip == NULL) { 1009 if (!can_create || newtip == NULL) { 1010 rw_exit(&trill_inst_rwlock); 1011 return (can_create ? ENOMEM : ENOENT); 1012 } 1013 1014 tip = newtip; 1015 newtip = NULL; 1016 (void) strcpy(tip->ti_bridgename, bname); 1017 1018 /* Register TRILL instance with bridging */ 1019 tip->ti_binst = bridge_trill_brref(bname, tip); 1020 if (tip->ti_binst == NULL) { 1021 rw_exit(&trill_inst_rwlock); 1022 kmem_free(tip, sizeof (*tip)); 1023 return (ENOENT); 1024 } 1025 1026 rw_init(&tip->ti_rwlock, NULL, RW_DRIVER, NULL); 1027 list_create(&tip->ti_socklist, sizeof (trill_sock_t), 1028 offsetof(trill_sock_t, ts_socklistnode)); 1029 list_insert_tail(&trill_inst_list, tip); 1030 } 1031 atomic_inc_uint(&tip->ti_refs); 1032 rw_exit(&trill_inst_rwlock); 1033 1034 /* If we didn't need the preallocated memory, then discard now. */ 1035 if (newtip != NULL) 1036 kmem_free(newtip, sizeof (*newtip)); 1037 1038 rw_enter(&tip->ti_rwlock, RW_WRITER); 1039 list_insert_tail(&(tip->ti_socklist), tsock); 1040 tsock->ts_tip = tip; 1041 rw_exit(&tip->ti_rwlock); 1042 return (0); 1043 } 1044 1045 static void 1046 trill_clear_bridge(trill_sock_t *tsock) 1047 { 1048 trill_inst_t *tip; 1049 1050 if ((tip = tsock->ts_tip) == NULL) 1051 return; 1052 rw_enter(&tip->ti_rwlock, RW_WRITER); 1053 list_remove(&tip->ti_socklist, tsock); 1054 if (list_is_empty(&tip->ti_socklist)) 1055 trill_del_all(tip, B_TRUE); 1056 rw_exit(&tip->ti_rwlock); 1057 } 1058 1059 static void 1060 trill_sock_unref(trill_sock_t *tsock) 1061 { 1062 if (atomic_dec_uint_nv(&tsock->ts_refs) == 0) { 1063 mutex_destroy(&tsock->ts_socklock); 1064 cv_destroy(&tsock->ts_sockthreadwait); 1065 cv_destroy(&tsock->ts_sockclosewait); 1066 kmem_free(tsock, sizeof (trill_sock_t)); 1067 } 1068 } 1069 1070 static void 1071 trill_do_close(trill_sock_t *tsock) 1072 { 1073 trill_inst_t *tip; 1074 1075 tip = tsock->ts_tip; 1076 trill_stop_recv(tsock); 1077 /* Remove socket from TRILL instance socket list */ 1078 trill_clear_bridge(tsock); 1079 tsock->ts_flags |= TSF_SHUTDOWN; 1080 trill_sock_unref(tsock); 1081 if (tip != NULL) 1082 trill_instance_unref(tip); 1083 } 1084 1085 static void 1086 trill_del_all(trill_inst_t *tip, boolean_t lockheld) 1087 { 1088 int i; 1089 1090 if (!lockheld) 1091 rw_enter(&tip->ti_rwlock, RW_WRITER); 1092 for (i = RBRIDGE_NICKNAME_MIN; i < RBRIDGE_NICKNAME_MAX; i++) { 1093 if (tip->ti_nodes[i] != NULL) 1094 (void) trill_del_nick(tip, i, B_TRUE); 1095 } 1096 if (!lockheld) 1097 rw_exit(&tip->ti_rwlock); 1098 } 1099 1100 static void 1101 trill_node_free(trill_node_t *nick_entry) 1102 { 1103 trill_nickinfo_t *tni; 1104 1105 tni = nick_entry->tn_ni; 1106 kmem_free(tni, TNI_TOTALSIZE(tni)); 1107 kmem_free(nick_entry, sizeof (trill_node_t)); 1108 } 1109 1110 static void 1111 trill_node_unref(trill_inst_t *tip, trill_node_t *tnp) 1112 { 1113 if (atomic_dec_uint_nv(&tnp->tn_refs) == 0) { 1114 if (tnp->tn_tsp != NULL) 1115 trill_sock_unref(tnp->tn_tsp); 1116 trill_node_free(tnp); 1117 atomic_dec_uint(&tip->ti_nodecount); 1118 } 1119 } 1120 1121 static trill_node_t * 1122 trill_node_lookup(trill_inst_t *tip, uint16_t nick) 1123 { 1124 trill_node_t *nick_entry; 1125 1126 if (!VALID_NICK(nick)) 1127 return (NULL); 1128 rw_enter(&tip->ti_rwlock, RW_READER); 1129 nick_entry = tip->ti_nodes[nick]; 1130 if (nick_entry != NULL) { 1131 atomic_inc_uint(&nick_entry->tn_refs); 1132 } 1133 rw_exit(&tip->ti_rwlock); 1134 return (nick_entry); 1135 } 1136 1137 static int 1138 trill_del_nick(trill_inst_t *tip, uint16_t nick, boolean_t lockheld) 1139 { 1140 trill_node_t *nick_entry; 1141 int rc = ENOENT; 1142 1143 if (!lockheld) 1144 rw_enter(&tip->ti_rwlock, RW_WRITER); 1145 if (VALID_NICK(nick)) { 1146 nick_entry = tip->ti_nodes[nick]; 1147 if (nick_entry != NULL) { 1148 trill_node_unref(tip, nick_entry); 1149 tip->ti_nodes[nick] = NULL; 1150 rc = 0; 1151 } 1152 } 1153 if (!lockheld) 1154 rw_exit(&tip->ti_rwlock); 1155 return (rc); 1156 } 1157 1158 static int 1159 trill_add_nick(trill_inst_t *tip, void *arg, boolean_t self, int mode) 1160 { 1161 uint16_t nick; 1162 int size; 1163 trill_node_t *tnode; 1164 trill_nickinfo_t tnihdr; 1165 1166 /* First make sure we have at least the header available */ 1167 if (ddi_copyin(arg, &tnihdr, sizeof (trill_nickinfo_t), mode) != 0) 1168 return (EFAULT); 1169 1170 nick = tnihdr.tni_nick; 1171 if (!VALID_NICK(nick)) { 1172 DTRACE_PROBE1(trill__add__nick__bad, trill_nickinfo_t *, 1173 &tnihdr); 1174 return (EINVAL); 1175 } 1176 1177 size = TNI_TOTALSIZE(&tnihdr); 1178 if (size > TNI_MAXSIZE) 1179 return (EINVAL); 1180 tnode = kmem_zalloc(sizeof (trill_node_t), KM_SLEEP); 1181 tnode->tn_ni = kmem_zalloc(size, KM_SLEEP); 1182 if (ddi_copyin(arg, tnode->tn_ni, size, mode) != 0) { 1183 kmem_free(tnode->tn_ni, size); 1184 kmem_free(tnode, sizeof (trill_node_t)); 1185 return (EFAULT); 1186 } 1187 1188 tnode->tn_refs++; 1189 rw_enter(&tip->ti_rwlock, RW_WRITER); 1190 if (tip->ti_nodes[nick] != NULL) 1191 (void) trill_del_nick(tip, nick, B_TRUE); 1192 1193 if (self) { 1194 tip->ti_nick = nick; 1195 } else { 1196 tnode->tn_tsp = find_trill_link(tip, 1197 tnode->tn_ni->tni_linkid); 1198 } 1199 DTRACE_PROBE2(trill__add__nick, trill_node_t *, tnode, 1200 uint16_t, nick); 1201 tip->ti_nodes[nick] = tnode; 1202 tip->ti_nodecount++; 1203 rw_exit(&tip->ti_rwlock); 1204 return (0); 1205 } 1206 1207 static int 1208 trill_do_ioctl(trill_sock_t *tsock, int cmd, void *arg, int mode) 1209 { 1210 int error = 0; 1211 trill_inst_t *tip = tsock->ts_tip; 1212 1213 switch (cmd) { 1214 case TRILL_DESIGVLAN: { 1215 uint16_t desigvlan; 1216 1217 if (ddi_copyin(arg, &desigvlan, sizeof (desigvlan), mode) != 0) 1218 return (EFAULT); 1219 tsock->ts_desigvlan = desigvlan; 1220 break; 1221 } 1222 case TRILL_VLANFWDER: { 1223 uint8_t vlans[TRILL_VLANS_ARRSIZE]; 1224 1225 if (tsock->ts_link == NULL) 1226 return (EINVAL); 1227 if ((ddi_copyin(arg, vlans, sizeof (vlans), mode)) != 0) 1228 return (EFAULT); 1229 bridge_trill_setvlans(tsock->ts_link, vlans); 1230 break; 1231 } 1232 case TRILL_SETNICK: 1233 if (tip == NULL) 1234 return (EINVAL); 1235 error = trill_add_nick(tip, arg, B_TRUE, mode); 1236 break; 1237 1238 case TRILL_GETNICK: 1239 if (tip == NULL) 1240 return (EINVAL); 1241 rw_enter(&tip->ti_rwlock, RW_READER); 1242 if (ddi_copyout(&tip->ti_nick, arg, sizeof (tip->ti_nick), 1243 mode) != 0) 1244 error = EFAULT; 1245 rw_exit(&tip->ti_rwlock); 1246 break; 1247 1248 case TRILL_ADDNICK: 1249 if (tip == NULL) 1250 break; 1251 error = trill_add_nick(tip, arg, B_FALSE, mode); 1252 break; 1253 1254 case TRILL_DELNICK: { 1255 uint16_t delnick; 1256 1257 if (tip == NULL) 1258 break; 1259 if (ddi_copyin(arg, &delnick, sizeof (delnick), mode) != 0) 1260 return (EFAULT); 1261 error = trill_del_nick(tip, delnick, B_FALSE); 1262 break; 1263 } 1264 case TRILL_DELALL: 1265 if (tip == NULL) 1266 break; 1267 trill_del_all(tip, B_FALSE); 1268 break; 1269 1270 case TRILL_TREEROOT: { 1271 uint16_t treeroot; 1272 1273 if (tip == NULL) 1274 break; 1275 if (ddi_copyin(arg, &treeroot, sizeof (treeroot), mode) != 0) 1276 return (EFAULT); 1277 if (!VALID_NICK(treeroot)) 1278 return (EINVAL); 1279 rw_enter(&tip->ti_rwlock, RW_WRITER); 1280 tip->ti_treeroot = treeroot; 1281 rw_exit(&tip->ti_rwlock); 1282 break; 1283 } 1284 case TRILL_HWADDR: 1285 if (tsock->ts_link == NULL) 1286 break; 1287 if (ddi_copyout(tsock->ts_link->bl_local_mac, arg, ETHERADDRL, 1288 mode) != 0) 1289 return (EFAULT); 1290 break; 1291 1292 case TRILL_NEWBRIDGE: { 1293 char bname[MAXLINKNAMELEN]; 1294 1295 if (tsock->ts_state != TS_UNBND) 1296 return (ENOTSUP); 1297 /* ts_tip can only be set once */ 1298 if (tip != NULL) 1299 return (EEXIST); 1300 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0) 1301 return (EFAULT); 1302 bname[MAXLINKNAMELEN-1] = '\0'; 1303 error = trill_find_bridge(tsock, bname, B_TRUE); 1304 break; 1305 } 1306 1307 case TRILL_GETBRIDGE: { 1308 char bname[MAXLINKNAMELEN]; 1309 1310 /* ts_tip can only be set once */ 1311 if (tip != NULL) 1312 return (EEXIST); 1313 if (ddi_copyin(arg, bname, sizeof (bname), mode) != 0) 1314 return (EFAULT); 1315 bname[MAXLINKNAMELEN - 1] = '\0'; 1316 error = trill_find_bridge(tsock, bname, B_FALSE); 1317 break; 1318 } 1319 1320 case TRILL_LISTNICK: { 1321 trill_listnick_t tln; 1322 trill_node_t *tnp; 1323 trill_nickinfo_t *tnip; 1324 uint16_t nick; 1325 1326 if (tip == NULL) 1327 return (EINVAL); 1328 if (ddi_copyin(arg, &tln, sizeof (tln), mode) != 0) 1329 return (EFAULT); 1330 nick = tln.tln_nick; 1331 if (nick >= RBRIDGE_NICKNAME_MAX) { 1332 error = EINVAL; 1333 break; 1334 } 1335 rw_enter(&tip->ti_rwlock, RW_READER); 1336 while (++nick < RBRIDGE_NICKNAME_MAX) { 1337 if ((tnp = tip->ti_nodes[nick]) != NULL) { 1338 tnip = tnp->tn_ni; 1339 ASSERT(nick == tnip->tni_nick); 1340 tln.tln_nick = nick; 1341 bcopy(tnip->tni_adjsnpa, tln.tln_nexthop, 1342 ETHERADDRL); 1343 tln.tln_ours = nick == tip->ti_nick; 1344 if (tln.tln_ours || tnp->tn_tsp == NULL) { 1345 tln.tln_linkid = 1346 DATALINK_INVALID_LINKID; 1347 } else { 1348 tln.tln_linkid = 1349 tnp->tn_tsp->ts_link->bl_linkid; 1350 } 1351 break; 1352 } 1353 } 1354 rw_exit(&tip->ti_rwlock); 1355 if (nick >= RBRIDGE_NICKNAME_MAX) 1356 bzero(&tln, sizeof (tln)); 1357 if (ddi_copyout(&tln, arg, sizeof (tln), mode) != 0) 1358 return (EFAULT); 1359 break; 1360 } 1361 1362 /* 1363 * Port flush: this is used when we lose AF on a port. We must discard 1364 * all regular bridge forwarding entries on this port with the 1365 * indicated VLAN. 1366 */ 1367 case TRILL_PORTFLUSH: { 1368 uint16_t vlan = (uint16_t)(uintptr_t)arg; 1369 1370 if (tsock->ts_link == NULL) 1371 return (EINVAL); 1372 bridge_trill_flush(tsock->ts_link, vlan, B_FALSE); 1373 break; 1374 } 1375 1376 /* 1377 * Nick flush: this is used when we lose AF on a port. We must discard 1378 * all bridge TRILL forwarding entries on this port with the indicated 1379 * VLAN. 1380 */ 1381 case TRILL_NICKFLUSH: { 1382 uint16_t vlan = (uint16_t)(uintptr_t)arg; 1383 1384 if (tsock->ts_link == NULL) 1385 return (EINVAL); 1386 bridge_trill_flush(tsock->ts_link, vlan, B_TRUE); 1387 break; 1388 } 1389 1390 case TRILL_GETMTU: 1391 if (tsock->ts_link == NULL) 1392 break; 1393 if (ddi_copyout(&tsock->ts_link->bl_maxsdu, arg, 1394 sizeof (uint_t), mode) != 0) 1395 return (EFAULT); 1396 break; 1397 1398 default: 1399 error = ENOTSUP; 1400 break; 1401 } 1402 1403 return (error); 1404 } 1405 1406 /* 1407 * Sends received packet back upstream on the TRILL socket. 1408 * Consumes passed mblk_t. 1409 */ 1410 static void 1411 trill_ctrl_input(trill_sock_t *tsock, mblk_t *mp, const uint8_t *saddr, 1412 uint16_t tci) 1413 { 1414 int udi_size; 1415 mblk_t *mp1; 1416 struct T_unitdata_ind *tudi; 1417 struct sockaddr_dl *sdl; 1418 char *lladdr; 1419 int error; 1420 1421 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 1422 if (tsock->ts_flow_ctrld) { 1423 freemsg(mp); 1424 KSPINCR(tks_drops); 1425 return; 1426 } 1427 1428 udi_size = sizeof (struct T_unitdata_ind) + 1429 sizeof (struct sockaddr_dl); 1430 mp1 = allocb(udi_size, BPRI_MED); 1431 if (mp1 == NULL) { 1432 freemsg(mp); 1433 KSPINCR(tks_drops); 1434 return; 1435 } 1436 1437 mp1->b_cont = mp; 1438 mp = mp1; 1439 mp->b_datap->db_type = M_PROTO; 1440 /* LINTED: alignment */ 1441 tudi = (struct T_unitdata_ind *)mp->b_rptr; 1442 mp->b_wptr = (uchar_t *)tudi + udi_size; 1443 1444 tudi->PRIM_type = T_UNITDATA_IND; 1445 tudi->SRC_length = sizeof (struct sockaddr_dl); 1446 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1447 tudi->OPT_length = 0; 1448 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 1449 sizeof (struct sockaddr_dl); 1450 1451 /* Information of the link on which packet was received. */ 1452 sdl = (struct sockaddr_dl *)&tudi[1]; 1453 (void) memset(sdl, 0, sizeof (struct sockaddr_dl)); 1454 sdl->sdl_family = AF_TRILL; 1455 1456 /* LINTED: alignment */ 1457 *(datalink_id_t *)sdl->sdl_data = tsock->ts_link->bl_linkid; 1458 sdl->sdl_nlen = sizeof (tsock->ts_link->bl_linkid); 1459 1460 lladdr = LLADDR(sdl); 1461 (void) memcpy(lladdr, saddr, ETHERADDRL); 1462 lladdr += ETHERADDRL; 1463 sdl->sdl_alen = ETHERADDRL; 1464 1465 /* LINTED: alignment */ 1466 *(uint16_t *)lladdr = tci; 1467 sdl->sdl_slen = sizeof (uint16_t); 1468 1469 DTRACE_PROBE2(trill__ctrl__input, trill_sock_t *, tsock, mblk_t *, mp); 1470 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle, 1471 mp, msgdsize(mp), 0, &error, NULL); 1472 1473 if (error == ENOSPC) { 1474 mutex_enter(&tsock->ts_socklock); 1475 (*tsock->ts_conn_upcalls->su_recv)(tsock->ts_conn_upper_handle, 1476 NULL, 0, 0, &error, NULL); 1477 if (error == ENOSPC) 1478 tsock->ts_flow_ctrld = B_TRUE; 1479 mutex_exit(&tsock->ts_socklock); 1480 KSPINCR(tks_drops); 1481 } else if (error != 0) { 1482 KSPINCR(tks_drops); 1483 } else { 1484 KSPINCR(tks_recv); 1485 } 1486 1487 DTRACE_PROBE2(trill__ctrl__input__done, trill_sock_t *, 1488 tsock, int, error); 1489 } 1490 1491 /* ARGSUSED */ 1492 static void 1493 trill_activate(sock_lower_handle_t proto_handle, 1494 sock_upper_handle_t sock_handle, sock_upcalls_t *sock_upcalls, 1495 int flags, cred_t *cr) 1496 { 1497 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1498 struct sock_proto_props sopp; 1499 1500 tsock->ts_conn_upcalls = sock_upcalls; 1501 tsock->ts_conn_upper_handle = sock_handle; 1502 1503 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 1504 SOCKOPT_RCVLOWAT | SOCKOPT_MAXADDRLEN | SOCKOPT_MAXPSZ | 1505 SOCKOPT_MAXBLK | SOCKOPT_MINPSZ; 1506 sopp.sopp_wroff = 0; 1507 sopp.sopp_rxhiwat = SOCKET_RECVHIWATER; 1508 sopp.sopp_rxlowat = SOCKET_RECVLOWATER; 1509 sopp.sopp_maxaddrlen = sizeof (struct sockaddr_dl); 1510 sopp.sopp_maxpsz = INFPSZ; 1511 sopp.sopp_maxblk = INFPSZ; 1512 sopp.sopp_minpsz = 0; 1513 (*tsock->ts_conn_upcalls->su_set_proto_props)( 1514 tsock->ts_conn_upper_handle, &sopp); 1515 } 1516 1517 /* ARGSUSED */ 1518 static int 1519 trill_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 1520 { 1521 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1522 1523 trill_do_close(tsock); 1524 return (0); 1525 } 1526 1527 /* ARGSUSED */ 1528 static int 1529 trill_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 1530 socklen_t len, cred_t *cr) 1531 { 1532 int error; 1533 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1534 1535 if (sa == NULL) 1536 error = trill_do_unbind(tsock); 1537 else 1538 error = trill_start_recv(tsock, sa, len); 1539 1540 return (error); 1541 } 1542 1543 /* 1544 * This is a token getsockopt() implementation so we can reply to SO_PROTOCOL. 1545 */ 1546 static int 1547 trill_getsockopt(sock_lower_handle_t handle, int level, 1548 int option_name, void *optval, socklen_t *optlenp, struct cred *cr) 1549 { 1550 int32_t value; 1551 1552 if (level != SOL_SOCKET && option_name != SO_PROTOCOL) { 1553 return (ENOPROTOOPT); 1554 } 1555 1556 if (*optlenp < sizeof (int32_t)) { 1557 return (EINVAL); 1558 } 1559 1560 value = 0; 1561 bcopy(&value, optval, sizeof (value)); 1562 *optlenp = sizeof (value); 1563 return (0); 1564 } 1565 1566 /* ARGSUSED */ 1567 static int 1568 trill_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 1569 cred_t *cr) 1570 { 1571 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1572 struct sockaddr_dl *laddr; 1573 uint16_t tci; 1574 1575 ASSERT(DB_TYPE(mp) == M_DATA); 1576 ASSERT(!(tsock->ts_flags & TSF_SHUTDOWN)); 1577 1578 if (msg->msg_name == NULL || msg->msg_namelen != sizeof (*laddr)) 1579 goto eproto; 1580 1581 /* 1582 * The name is a datalink_id_t, the address is an Ethernet address, and 1583 * the selector value is the VLAN ID. 1584 */ 1585 laddr = (struct sockaddr_dl *)msg->msg_name; 1586 if (laddr->sdl_nlen != sizeof (datalink_id_t) || 1587 laddr->sdl_alen != ETHERADDRL || 1588 (laddr->sdl_slen != sizeof (tci) && laddr->sdl_slen != 0)) 1589 goto eproto; 1590 1591 mutex_enter(&tsock->ts_socklock); 1592 if (tsock->ts_state != TS_IDLE || tsock->ts_link == NULL) { 1593 mutex_exit(&tsock->ts_socklock); 1594 goto eproto; 1595 } 1596 atomic_inc_uint(&tsock->ts_sockthreadcount); 1597 mutex_exit(&tsock->ts_socklock); 1598 1599 /* 1600 * Safe to dereference VLAN now, as we've checked the user's specified 1601 * values, and alignment is now guaranteed. 1602 */ 1603 if (laddr->sdl_slen == 0) { 1604 tci = TRILL_NO_TCI; 1605 } else { 1606 /* LINTED: alignment */ 1607 tci = *(uint16_t *)(LLADDR(laddr) + ETHERADDRL); 1608 } 1609 1610 mp = create_trill_header(tsock, mp, (const uchar_t *)LLADDR(laddr), 1611 B_TRUE, B_FALSE, tci, msgdsize(mp)); 1612 if (mp != NULL) { 1613 mp = bridge_trill_output(tsock->ts_link, mp); 1614 if (mp == NULL) { 1615 KSPINCR(tks_sent); 1616 } else { 1617 freemsg(mp); 1618 KSPINCR(tks_drops); 1619 } 1620 } 1621 1622 /* Wake up any threads blocking on us */ 1623 if (atomic_dec_uint_nv(&tsock->ts_sockthreadcount) == 0) 1624 cv_broadcast(&tsock->ts_sockthreadwait); 1625 return (0); 1626 1627 eproto: 1628 freemsg(mp); 1629 KSPINCR(tks_drops); 1630 return (EPROTO); 1631 } 1632 1633 /* ARGSUSED */ 1634 static int 1635 trill_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 1636 int mode, int32_t *rvalp, cred_t *cr) 1637 { 1638 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1639 int rc; 1640 1641 switch (cmd) { 1642 /* List of unprivileged TRILL ioctls */ 1643 case TRILL_GETNICK: 1644 case TRILL_GETBRIDGE: 1645 case TRILL_LISTNICK: 1646 break; 1647 default: 1648 if (secpolicy_dl_config(cr) != 0) 1649 return (EPERM); 1650 break; 1651 } 1652 1653 /* Lock ensures socket state is unchanged during ioctl handling */ 1654 mutex_enter(&tsock->ts_socklock); 1655 rc = trill_do_ioctl(tsock, cmd, (void *)arg, mode); 1656 mutex_exit(&tsock->ts_socklock); 1657 return (rc); 1658 } 1659 1660 static void 1661 trill_clr_flowctrl(sock_lower_handle_t proto_handle) 1662 { 1663 trill_sock_t *tsock = (trill_sock_t *)proto_handle; 1664 1665 mutex_enter(&tsock->ts_socklock); 1666 tsock->ts_flow_ctrld = B_FALSE; 1667 mutex_exit(&tsock->ts_socklock); 1668 } 1669 1670 static sock_downcalls_t sock_trill_downcalls = { 1671 trill_activate, /* sd_activate */ 1672 sock_accept_notsupp, /* sd_accept */ 1673 trill_bind, /* sd_bind */ 1674 sock_listen_notsupp, /* sd_listen */ 1675 sock_connect_notsupp, /* sd_connect */ 1676 sock_getpeername_notsupp, /* sd_getpeername */ 1677 sock_getsockname_notsupp, /* sd_getsockname */ 1678 trill_getsockopt, /* sd_getsockopt */ 1679 sock_setsockopt_notsupp, /* sd_setsockopt */ 1680 trill_send, /* sd_send */ 1681 NULL, /* sd_send_uio */ 1682 NULL, /* sd_recv_uio */ 1683 NULL, /* sd_poll */ 1684 sock_shutdown_notsupp, /* sd_shutdown */ 1685 trill_clr_flowctrl, /* sd_setflowctrl */ 1686 trill_ioctl, /* sd_ioctl */ 1687 trill_close /* sd_close */ 1688 }; 1689 1690 /* ARGSUSED */ 1691 static sock_lower_handle_t 1692 trill_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 1693 uint_t *smodep, int *errorp, int flags, cred_t *credp) 1694 { 1695 trill_sock_t *tsock; 1696 1697 if (family != AF_TRILL || type != SOCK_DGRAM || proto != 0) { 1698 *errorp = EPROTONOSUPPORT; 1699 return (NULL); 1700 } 1701 1702 *sock_downcalls = &sock_trill_downcalls; 1703 *smodep = SM_ATOMIC; 1704 tsock = trill_do_open(flags); 1705 *errorp = (tsock != NULL) ? 0:ENOMEM; 1706 return ((sock_lower_handle_t)tsock); 1707 } 1708