1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2019 Joyent, Inc. 25 */ 26 27 /* 28 * Data-Link Services Module 29 */ 30 31 #include <sys/sysmacros.h> 32 #include <sys/strsubr.h> 33 #include <sys/strsun.h> 34 #include <sys/vlan.h> 35 #include <sys/dld_impl.h> 36 #include <sys/sdt.h> 37 #include <sys/atomic.h> 38 #include <sys/sysevent.h> 39 #include <sys/sysevent/eventdefs.h> 40 #include <sys/sysevent/datalink.h> 41 42 static kmem_cache_t *i_dls_link_cachep; 43 mod_hash_t *i_dls_link_hash; 44 static uint_t i_dls_link_count; 45 46 #define LINK_HASHSZ 67 /* prime */ 47 #define IMPL_HASHSZ 67 /* prime */ 48 49 /* 50 * Construct a hash key from the DLSAP value. 51 */ 52 #define MAKE_KEY(_sap) \ 53 ((mod_hash_key_t)(uintptr_t)((_sap) << VLAN_ID_SIZE)) 54 55 #define DLS_STRIP_PADDING(pktsize, p) { \ 56 if (pktsize != 0) { \ 57 ssize_t delta = pktsize - msgdsize(p); \ 58 \ 59 if (delta < 0) \ 60 (void) adjmsg(p, delta); \ 61 } \ 62 } 63 64 /* 65 * Private functions. 66 */ 67 68 /*ARGSUSED*/ 69 static int 70 i_dls_link_constructor(void *buf, void *arg, int kmflag) 71 { 72 dls_link_t *dlp = buf; 73 char name[MAXNAMELEN]; 74 75 bzero(buf, sizeof (dls_link_t)); 76 77 (void) snprintf(name, MAXNAMELEN, "dls_link_t_%p_hash", buf); 78 dlp->dl_str_hash = mod_hash_create_idhash(name, IMPL_HASHSZ, 79 mod_hash_null_valdtor); 80 81 return (0); 82 } 83 84 /*ARGSUSED*/ 85 static void 86 i_dls_link_destructor(void *buf, void *arg) 87 { 88 dls_link_t *dlp = buf; 89 90 ASSERT(dlp->dl_ref == 0); 91 ASSERT(dlp->dl_mh == NULL); 92 ASSERT(dlp->dl_mah == NULL); 93 ASSERT(dlp->dl_unknowns == 0); 94 95 mod_hash_destroy_idhash(dlp->dl_str_hash); 96 dlp->dl_str_hash = NULL; 97 98 } 99 100 /* 101 * - Parse the mac header information of the given packet. 102 * - Strip the padding and skip over the header. Note that because some 103 * DLS consumers only check the db_ref count of the first mblk, we 104 * pullup the message into a single mblk. Because the original message 105 * is freed as the result of message pulling up, mac_vlan_header_info() 106 * is called again to update the mhi_saddr and mhi_daddr pointers in the 107 * mhip. Further, the mac_vlan_header_info() function ensures that the 108 * size of the pulled message is greater than the MAC header size, 109 * therefore we can directly advance b_rptr to point at the payload. 110 * 111 * We choose to use a macro for performance reasons. 112 */ 113 #define DLS_PREPARE_PKT(mh, mp, mhip, err) { \ 114 mblk_t *nextp = (mp)->b_next; \ 115 if (((err) = mac_vlan_header_info((mh), (mp), (mhip))) == 0) { \ 116 DLS_STRIP_PADDING((mhip)->mhi_pktsize, (mp)); \ 117 if (MBLKL((mp)) < (mhip)->mhi_hdrsize) { \ 118 mblk_t *newmp; \ 119 if ((newmp = msgpullup((mp), -1)) == NULL) { \ 120 (err) = EINVAL; \ 121 } else { \ 122 (mp)->b_next = NULL; \ 123 freemsg((mp)); \ 124 (mp) = newmp; \ 125 VERIFY(mac_vlan_header_info((mh), \ 126 (mp), (mhip)) == 0); \ 127 (mp)->b_next = nextp; \ 128 (mp)->b_rptr += (mhip)->mhi_hdrsize; \ 129 } \ 130 } else { \ 131 (mp)->b_rptr += (mhip)->mhi_hdrsize; \ 132 } \ 133 } \ 134 } 135 136 /* 137 * Truncate the chain starting at mp such that all packets in the chain 138 * have identical source and destination addresses, saps, and tag types 139 * (see below). It returns a pointer to the mblk following the chain, 140 * NULL if there is no further packet following the processed chain. 141 * The countp argument is set to the number of valid packets in the chain. 142 * Note that the whole MAC header (including the VLAN tag if any) in each 143 * packet will be stripped. 144 */ 145 static mblk_t * 146 i_dls_link_subchain(dls_link_t *dlp, mblk_t *mp, const mac_header_info_t *mhip, 147 uint_t *countp) 148 { 149 mblk_t *prevp; 150 uint_t npacket = 1; 151 size_t addr_size = dlp->dl_mip->mi_addr_length; 152 uint16_t vid = VLAN_ID(mhip->mhi_tci); 153 uint16_t pri = VLAN_PRI(mhip->mhi_tci); 154 155 /* 156 * Compare with subsequent headers until we find one that has 157 * differing header information. After checking each packet 158 * strip padding and skip over the header. 159 */ 160 for (prevp = mp; (mp = mp->b_next) != NULL; prevp = mp) { 161 mac_header_info_t cmhi; 162 uint16_t cvid, cpri; 163 int err; 164 165 DLS_PREPARE_PKT(dlp->dl_mh, mp, &cmhi, err); 166 if (err != 0) 167 break; 168 169 prevp->b_next = mp; 170 171 /* 172 * The source, destination, sap, vlan tag must all match in 173 * a given subchain. 174 */ 175 if (mhip->mhi_saddr == NULL || cmhi.mhi_saddr == NULL || 176 memcmp(mhip->mhi_daddr, cmhi.mhi_daddr, addr_size) != 0 || 177 memcmp(mhip->mhi_saddr, cmhi.mhi_saddr, addr_size) != 0 || 178 mhip->mhi_bindsap != cmhi.mhi_bindsap) { 179 /* 180 * Note that we don't need to restore the padding. 181 */ 182 mp->b_rptr -= cmhi.mhi_hdrsize; 183 break; 184 } 185 186 cvid = VLAN_ID(cmhi.mhi_tci); 187 cpri = VLAN_PRI(cmhi.mhi_tci); 188 189 /* 190 * There are several types of packets. Packets don't match 191 * if they are classified to different type or if they are 192 * VLAN packets but belong to different VLANs: 193 * 194 * packet type tagged vid pri 195 * --------------------------------------------------------- 196 * untagged No zero zero 197 * VLAN packets Yes non-zero - 198 * priority tagged Yes zero non-zero 199 * 0 tagged Yes zero zero 200 */ 201 if ((mhip->mhi_istagged != cmhi.mhi_istagged) || 202 (vid != cvid) || ((vid == VLAN_ID_NONE) && 203 (((pri == 0) && (cpri != 0)) || 204 ((pri != 0) && (cpri == 0))))) { 205 mp->b_rptr -= cmhi.mhi_hdrsize; 206 break; 207 } 208 209 npacket++; 210 } 211 212 /* 213 * Break the chain at this point and return a pointer to the next 214 * sub-chain. 215 */ 216 prevp->b_next = NULL; 217 *countp = npacket; 218 return (mp); 219 } 220 221 /* ARGSUSED */ 222 static int 223 i_dls_head_hold(mod_hash_key_t key, mod_hash_val_t val) 224 { 225 dls_head_t *dhp = (dls_head_t *)val; 226 227 /* 228 * The lock order is mod_hash's internal lock -> dh_lock as in the 229 * call to i_dls_link_rx -> mod_hash_find_cb_rval -> i_dls_head_hold 230 */ 231 mutex_enter(&dhp->dh_lock); 232 if (dhp->dh_removing) { 233 mutex_exit(&dhp->dh_lock); 234 return (-1); 235 } 236 dhp->dh_ref++; 237 mutex_exit(&dhp->dh_lock); 238 return (0); 239 } 240 241 void 242 i_dls_head_rele(dls_head_t *dhp) 243 { 244 mutex_enter(&dhp->dh_lock); 245 dhp->dh_ref--; 246 if (dhp->dh_ref == 0 && dhp->dh_removing != 0) 247 cv_broadcast(&dhp->dh_cv); 248 mutex_exit(&dhp->dh_lock); 249 } 250 251 static dls_head_t * 252 i_dls_head_alloc(mod_hash_key_t key) 253 { 254 dls_head_t *dhp; 255 256 dhp = kmem_zalloc(sizeof (dls_head_t), KM_SLEEP); 257 dhp->dh_key = key; 258 return (dhp); 259 } 260 261 static void 262 i_dls_head_free(dls_head_t *dhp) 263 { 264 ASSERT(dhp->dh_ref == 0); 265 kmem_free(dhp, sizeof (dls_head_t)); 266 } 267 268 /* 269 * Try to send mp up to the streams of the given sap. Return the 270 * number of streams which accepted this message, or 0 if no streams 271 * accepted the message. 272 * 273 * Note that this function copies the message chain and the original 274 * mp remains valid after this function returns. 275 */ 276 static uint_t 277 i_dls_link_rx_func(dls_link_t *dlp, mac_resource_handle_t mrh, 278 mac_header_info_t *mhip, mblk_t *mp, uint32_t sap, 279 boolean_t (*acceptfunc)()) 280 { 281 mod_hash_t *hash = dlp->dl_str_hash; 282 mod_hash_key_t key; 283 dls_head_t *dhp; 284 dld_str_t *dsp; 285 mblk_t *nmp; 286 dls_rx_t ds_rx; 287 void *ds_rx_arg; 288 uint_t naccepted = 0; 289 int rval; 290 291 /* 292 * Construct a hash key from the DLSAP. 293 */ 294 key = MAKE_KEY(sap); 295 296 /* 297 * Search the hash table for a dld_str_t eligible to receive a 298 * packet chain for this DLSAP. The mod hash's internal lock 299 * serializes find/insert/remove from the mod hash list. 300 * Incrementing the dh_ref (while holding the mod hash lock) 301 * ensures dls_link_remove will wait for the upcall to finish. 302 */ 303 if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp, 304 i_dls_head_hold, &rval) != 0 || (rval != 0)) { 305 return (0); 306 } 307 308 /* 309 * Find all dld_str_t that will accept the sub-chain. 310 */ 311 for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) { 312 if (!acceptfunc(dsp, mhip, &ds_rx, &ds_rx_arg)) 313 continue; 314 315 /* 316 * We have at least one acceptor. 317 */ 318 naccepted++; 319 320 /* 321 * There will normally be at least one more dld_str_t 322 * (since we've yet to check for non-promiscuous 323 * dld_str_t) so dup the sub-chain. 324 */ 325 if ((nmp = copymsgchain(mp)) != NULL) 326 ds_rx(ds_rx_arg, mrh, nmp, mhip); 327 } 328 329 /* 330 * Release the hold on the dld_str_t chain now that we have 331 * finished walking it. 332 */ 333 i_dls_head_rele(dhp); 334 return (naccepted); 335 } 336 337 /* ARGSUSED */ 338 void 339 i_dls_link_rx(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 340 boolean_t loopback) 341 { 342 dls_link_t *dlp = arg; 343 mod_hash_t *hash = dlp->dl_str_hash; 344 mblk_t *nextp; 345 mac_header_info_t mhi; 346 dls_head_t *dhp; 347 dld_str_t *dsp; 348 dld_str_t *ndsp; 349 mblk_t *nmp; 350 mod_hash_key_t key; 351 uint_t npacket; 352 boolean_t accepted; 353 dls_rx_t ds_rx, nds_rx; 354 void *ds_rx_arg, *nds_rx_arg; 355 uint16_t vid; 356 int err, rval; 357 358 /* 359 * Walk the packet chain. 360 */ 361 for (; mp != NULL; mp = nextp) { 362 /* 363 * Wipe the accepted state. 364 */ 365 accepted = B_FALSE; 366 367 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err); 368 if (err != 0) { 369 atomic_inc_32(&(dlp->dl_unknowns)); 370 nextp = mp->b_next; 371 mp->b_next = NULL; 372 freemsg(mp); 373 continue; 374 } 375 376 /* 377 * Grab the longest sub-chain we can process as a single 378 * unit. 379 */ 380 nextp = i_dls_link_subchain(dlp, mp, &mhi, &npacket); 381 ASSERT(npacket != 0); 382 383 vid = VLAN_ID(mhi.mhi_tci); 384 385 /* 386 * This condition is true only when a sun4v vsw client 387 * is on the scene; as it is the only type of client 388 * that multiplexes VLANs on a single client instance. 389 * All other types of clients have one VLAN per client 390 * instance. In that case, MAC strips the VLAN tag 391 * before delivering it to DLS (see mac_rx_deliver()). 392 */ 393 if (mhi.mhi_istagged) { 394 395 /* 396 * If it is tagged traffic, send it upstream to 397 * all dld_str_t which are attached to the physical 398 * link and bound to SAP 0x8100. 399 */ 400 if (i_dls_link_rx_func(dlp, mrh, &mhi, mp, 401 ETHERTYPE_VLAN, dls_accept) > 0) { 402 accepted = B_TRUE; 403 } 404 405 /* 406 * Don't pass the packets up if they are tagged 407 * packets and: 408 * - their VID and priority are both zero and the 409 * original packet isn't using the PVID (invalid 410 * packets). 411 * - their sap is ETHERTYPE_VLAN and their VID is 412 * zero as they have already been sent upstreams. 413 */ 414 if ((vid == VLAN_ID_NONE && !mhi.mhi_ispvid && 415 VLAN_PRI(mhi.mhi_tci) == 0) || 416 (mhi.mhi_bindsap == ETHERTYPE_VLAN && 417 vid == VLAN_ID_NONE)) { 418 freemsgchain(mp); 419 goto loop; 420 } 421 } 422 423 /* 424 * Construct a hash key from the DLSAP. 425 */ 426 key = MAKE_KEY(mhi.mhi_bindsap); 427 428 /* 429 * Search the hash table for dld_str_t eligible to receive 430 * a packet chain for this DLSAP. 431 */ 432 if (mod_hash_find_cb_rval(hash, key, (mod_hash_val_t *)&dhp, 433 i_dls_head_hold, &rval) != 0 || (rval != 0)) { 434 freemsgchain(mp); 435 goto loop; 436 } 437 438 /* 439 * Find the first dld_str_t that will accept the sub-chain. 440 */ 441 for (dsp = dhp->dh_list; dsp != NULL; dsp = dsp->ds_next) 442 if (dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg)) 443 break; 444 445 /* 446 * If we did not find any dld_str_t willing to accept the 447 * sub-chain then throw it away. 448 */ 449 if (dsp == NULL) { 450 i_dls_head_rele(dhp); 451 freemsgchain(mp); 452 goto loop; 453 } 454 455 /* 456 * We have at least one acceptor. 457 */ 458 accepted = B_TRUE; 459 for (;;) { 460 /* 461 * Find the next dld_str_t that will accept the 462 * sub-chain. 463 */ 464 for (ndsp = dsp->ds_next; ndsp != NULL; 465 ndsp = ndsp->ds_next) 466 if (dls_accept(ndsp, &mhi, &nds_rx, 467 &nds_rx_arg)) 468 break; 469 470 /* 471 * If there are no more dld_str_t that are willing 472 * to accept the sub-chain then we don't need to dup 473 * it before handing it to the current one. 474 */ 475 if (ndsp == NULL) { 476 ds_rx(ds_rx_arg, mrh, mp, &mhi); 477 478 /* 479 * Since there are no more dld_str_t, we're 480 * done. 481 */ 482 break; 483 } 484 485 /* 486 * There are more dld_str_t so dup the sub-chain. 487 */ 488 if ((nmp = copymsgchain(mp)) != NULL) 489 ds_rx(ds_rx_arg, mrh, nmp, &mhi); 490 491 dsp = ndsp; 492 ds_rx = nds_rx; 493 ds_rx_arg = nds_rx_arg; 494 } 495 496 /* 497 * Release the hold on the dld_str_t chain now that we have 498 * finished walking it. 499 */ 500 i_dls_head_rele(dhp); 501 502 loop: 503 /* 504 * If there were no acceptors then add the packet count to the 505 * 'unknown' count. 506 */ 507 if (!accepted) 508 atomic_add_32(&(dlp->dl_unknowns), npacket); 509 } 510 } 511 512 /* ARGSUSED */ 513 void 514 dls_rx_vlan_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 515 boolean_t loopback) 516 { 517 dld_str_t *dsp = arg; 518 dls_link_t *dlp = dsp->ds_dlp; 519 mac_header_info_t mhi; 520 dls_rx_t ds_rx; 521 void *ds_rx_arg; 522 int err; 523 524 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err); 525 if (err != 0) 526 goto drop; 527 528 /* 529 * If there is promiscuous handle for vlan, we filter out the untagged 530 * pkts and pkts that are not for the primary unicast address. 531 */ 532 if (dsp->ds_vlan_mph != NULL) { 533 uint8_t prim_addr[MAXMACADDRLEN]; 534 size_t addr_length = dsp->ds_mip->mi_addr_length; 535 536 if (!(mhi.mhi_istagged)) 537 goto drop; 538 ASSERT(dsp->ds_mh != NULL); 539 mac_unicast_primary_get(dsp->ds_mh, (uint8_t *)prim_addr); 540 if (memcmp(mhi.mhi_daddr, prim_addr, addr_length) != 0) 541 goto drop; 542 543 if (!dls_accept(dsp, &mhi, &ds_rx, &ds_rx_arg)) 544 goto drop; 545 546 ds_rx(ds_rx_arg, NULL, mp, &mhi); 547 return; 548 } 549 550 drop: 551 atomic_inc_32(&dlp->dl_unknowns); 552 freemsg(mp); 553 } 554 555 /* ARGSUSED */ 556 void 557 dls_rx_promisc(void *arg, mac_resource_handle_t mrh, mblk_t *mp, 558 boolean_t loopback) 559 { 560 dld_str_t *dsp = arg; 561 dls_link_t *dlp = dsp->ds_dlp; 562 mac_header_info_t mhi; 563 dls_rx_t ds_rx; 564 void *ds_rx_arg; 565 int err; 566 dls_head_t *dhp; 567 mod_hash_key_t key; 568 569 /* 570 * We expect to deal with only a single packet. 571 */ 572 ASSERT3P(mp->b_next, ==, NULL); 573 574 DLS_PREPARE_PKT(dlp->dl_mh, mp, &mhi, err); 575 576 if (err != 0) 577 goto drop; 578 579 /* 580 * In order to filter out sap pkt that no dls channel listens, search 581 * the hash table trying to find a dld_str_t eligible to receive the pkt 582 */ 583 if ((dsp->ds_promisc & DLS_PROMISC_SAP) == 0) { 584 key = MAKE_KEY(mhi.mhi_bindsap); 585 if (mod_hash_find(dsp->ds_dlp->dl_str_hash, key, 586 (mod_hash_val_t *)&dhp) != 0) 587 goto drop; 588 } 589 590 if (!dls_accept_promisc(dsp, &mhi, &ds_rx, &ds_rx_arg, loopback)) 591 goto drop; 592 593 ds_rx(ds_rx_arg, NULL, mp, &mhi); 594 return; 595 596 drop: 597 atomic_inc_32(&dlp->dl_unknowns); 598 freemsg(mp); 599 } 600 601 /* 602 * We'd like to notify via sysevents that a link state change has occurred. 603 * There are a couple of challenges associated with this. The first is that if 604 * the link is flapping a lot, we may not see an accurate state when we launch 605 * the notification, we're told it changed, not what it changed to. 606 * 607 * The next problem is that all of the information that a user has associated 608 * with this device is the exact opposite of what we have on the dls_link_t. We 609 * have the name of the mac device, which has no bearing on what users see. 610 * Likewise, we don't have the datalink id either. So we're going to have to get 611 * this from dls. 612 * 613 * This is all further complicated by the fact that this could be going on in 614 * another thread at the same time as someone is tearing down the dls_link_t 615 * that we're associated with. We need to be careful not to grab the mac 616 * perimeter, otherwise we stand a good chance of deadlock. 617 */ 618 static void 619 dls_link_notify(void *arg, mac_notify_type_t type) 620 { 621 dls_link_t *dlp = arg; 622 dls_dl_handle_t dhp; 623 nvlist_t *nvp; 624 sysevent_t *event; 625 sysevent_id_t eid; 626 627 if (type != MAC_NOTE_LINK && type != MAC_NOTE_LOWLINK) 628 return; 629 630 /* 631 * If we can't find a devnet handle for this link, then there is no user 632 * knowable device for this at the moment and there's nothing we can 633 * really share with them that will make sense. 634 */ 635 if (dls_devnet_hold_tmp_by_link(dlp, &dhp) != 0) 636 return; 637 638 /* 639 * Because we're attaching this nvlist_t to the sysevent, it'll get 640 * cleaned up when we call sysevent_free. 641 */ 642 VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0); 643 VERIFY(nvlist_add_int32(nvp, DATALINK_EV_LINK_ID, 644 dls_devnet_linkid(dhp)) == 0); 645 VERIFY(nvlist_add_string(nvp, DATALINK_EV_LINK_NAME, 646 dls_devnet_link(dhp)) == 0); 647 VERIFY(nvlist_add_int32(nvp, DATALINK_EV_ZONE_ID, 648 dls_devnet_getzid(dhp)) == 0); 649 650 dls_devnet_rele_tmp(dhp); 651 652 event = sysevent_alloc(EC_DATALINK, ESC_DATALINK_LINK_STATE, 653 ILLUMOS_KERN_PUB"dls", SE_SLEEP); 654 VERIFY(event != NULL); 655 (void) sysevent_attach_attributes(event, (sysevent_attr_list_t *)nvp); 656 657 (void) log_sysevent(event, SE_SLEEP, &eid); 658 sysevent_free(event); 659 660 } 661 662 static void 663 i_dls_link_destroy(dls_link_t *dlp) 664 { 665 ASSERT(dlp->dl_nactive == 0); 666 ASSERT(dlp->dl_impl_count == 0); 667 ASSERT(dlp->dl_zone_ref == 0); 668 669 /* 670 * Free the structure back to the cache. 671 */ 672 if (dlp->dl_mnh != NULL) 673 mac_notify_remove(dlp->dl_mnh, B_TRUE); 674 675 if (dlp->dl_mch != NULL) 676 mac_client_close(dlp->dl_mch, 0); 677 678 if (dlp->dl_mh != NULL) { 679 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 680 mac_close(dlp->dl_mh); 681 } 682 683 dlp->dl_mh = NULL; 684 dlp->dl_mch = NULL; 685 dlp->dl_mip = NULL; 686 dlp->dl_mnh = NULL; 687 dlp->dl_unknowns = 0; 688 dlp->dl_nonip_cnt = 0; 689 kmem_cache_free(i_dls_link_cachep, dlp); 690 } 691 692 static int 693 i_dls_link_create(const char *name, dls_link_t **dlpp) 694 { 695 dls_link_t *dlp; 696 int err; 697 698 /* 699 * Allocate a new dls_link_t structure. 700 */ 701 dlp = kmem_cache_alloc(i_dls_link_cachep, KM_SLEEP); 702 703 /* 704 * Name the dls_link_t after the MAC interface it represents. 705 */ 706 (void) strlcpy(dlp->dl_name, name, sizeof (dlp->dl_name)); 707 708 /* 709 * First reference; hold open the MAC interface. 710 */ 711 ASSERT(dlp->dl_mh == NULL); 712 err = mac_open(dlp->dl_name, &dlp->dl_mh); 713 if (err != 0) 714 goto bail; 715 716 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 717 dlp->dl_mip = mac_info(dlp->dl_mh); 718 719 /* DLS is the "primary" MAC client */ 720 ASSERT(dlp->dl_mch == NULL); 721 722 err = mac_client_open(dlp->dl_mh, &dlp->dl_mch, NULL, 723 MAC_OPEN_FLAGS_USE_DATALINK_NAME); 724 if (err != 0) 725 goto bail; 726 727 dlp->dl_mnh = mac_notify_add(dlp->dl_mh, dls_link_notify, dlp); 728 729 DTRACE_PROBE2(dls__primary__client, char *, dlp->dl_name, void *, 730 dlp->dl_mch); 731 732 *dlpp = dlp; 733 return (0); 734 735 bail: 736 i_dls_link_destroy(dlp); 737 return (err); 738 } 739 740 /* 741 * Module initialization functions. 742 */ 743 744 void 745 dls_link_init(void) 746 { 747 /* 748 * Create a kmem_cache of dls_link_t structures. 749 */ 750 i_dls_link_cachep = kmem_cache_create("dls_link_cache", 751 sizeof (dls_link_t), 0, i_dls_link_constructor, 752 i_dls_link_destructor, NULL, NULL, NULL, 0); 753 ASSERT(i_dls_link_cachep != NULL); 754 755 /* 756 * Create a dls_link_t hash table and associated lock. 757 */ 758 i_dls_link_hash = mod_hash_create_extended("dls_link_hash", 759 IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 760 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 761 i_dls_link_count = 0; 762 } 763 764 int 765 dls_link_fini(void) 766 { 767 if (i_dls_link_count > 0) 768 return (EBUSY); 769 770 /* 771 * Destroy the kmem_cache. 772 */ 773 kmem_cache_destroy(i_dls_link_cachep); 774 775 /* 776 * Destroy the hash table and associated lock. 777 */ 778 mod_hash_destroy_hash(i_dls_link_hash); 779 return (0); 780 } 781 782 /* 783 * Exported functions. 784 */ 785 786 static int 787 dls_link_hold_common(const char *name, dls_link_t **dlpp, boolean_t create) 788 { 789 dls_link_t *dlp; 790 int err; 791 792 /* 793 * Look up a dls_link_t corresponding to the given macname in the 794 * global hash table. The i_dls_link_hash itself is protected by the 795 * mod_hash package's internal lock which synchronizes 796 * find/insert/remove into the global mod_hash list. Assumes that 797 * inserts and removes are single threaded on a per mac end point 798 * by the mac perimeter. 799 */ 800 if ((err = mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name, 801 (mod_hash_val_t *)&dlp)) == 0) 802 goto done; 803 804 ASSERT(err == MH_ERR_NOTFOUND); 805 if (!create) 806 return (ENOENT); 807 808 /* 809 * We didn't find anything so we need to create one. 810 */ 811 if ((err = i_dls_link_create(name, &dlp)) != 0) 812 return (err); 813 814 /* 815 * Insert the dls_link_t. 816 */ 817 err = mod_hash_insert(i_dls_link_hash, (mod_hash_key_t)dlp->dl_name, 818 (mod_hash_val_t)dlp); 819 ASSERT(err == 0); 820 821 atomic_inc_32(&i_dls_link_count); 822 ASSERT(i_dls_link_count != 0); 823 824 done: 825 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 826 /* 827 * Bump the reference count and hand back the reference. 828 */ 829 dlp->dl_ref++; 830 *dlpp = dlp; 831 return (0); 832 } 833 834 int 835 dls_link_hold_create(const char *name, dls_link_t **dlpp) 836 { 837 return (dls_link_hold_common(name, dlpp, B_TRUE)); 838 } 839 840 int 841 dls_link_hold(const char *name, dls_link_t **dlpp) 842 { 843 return (dls_link_hold_common(name, dlpp, B_FALSE)); 844 } 845 846 dev_info_t * 847 dls_link_devinfo(dev_t dev) 848 { 849 dls_link_t *dlp; 850 dev_info_t *dip; 851 char macname[MAXNAMELEN]; 852 char *drv; 853 mac_perim_handle_t mph; 854 855 if ((drv = ddi_major_to_name(getmajor(dev))) == NULL) 856 return (NULL); 857 (void) snprintf(macname, MAXNAMELEN, "%s%d", drv, 858 DLS_MINOR2INST(getminor(dev))); 859 860 /* 861 * The code below assumes that the name constructed above is the 862 * macname. This is not the case for legacy devices. Currently this 863 * is ok because this function is only called in the getinfo(9e) path, 864 * which for a legacy device would directly end up in the driver's 865 * getinfo, rather than here 866 */ 867 if (mac_perim_enter_by_macname(macname, &mph) != 0) 868 return (NULL); 869 870 if (dls_link_hold(macname, &dlp) != 0) { 871 mac_perim_exit(mph); 872 return (NULL); 873 } 874 875 dip = mac_devinfo_get(dlp->dl_mh); 876 dls_link_rele(dlp); 877 mac_perim_exit(mph); 878 879 return (dip); 880 } 881 882 dev_t 883 dls_link_dev(dls_link_t *dlp) 884 { 885 return (makedevice(ddi_driver_major(mac_devinfo_get(dlp->dl_mh)), 886 mac_minor(dlp->dl_mh))); 887 } 888 889 void 890 dls_link_rele(dls_link_t *dlp) 891 { 892 mod_hash_val_t val; 893 894 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 895 /* 896 * Check if there are any more references. 897 */ 898 if (--dlp->dl_ref == 0) { 899 (void) mod_hash_remove(i_dls_link_hash, 900 (mod_hash_key_t)dlp->dl_name, &val); 901 ASSERT(dlp == (dls_link_t *)val); 902 903 /* 904 * Destroy the dls_link_t. 905 */ 906 i_dls_link_destroy(dlp); 907 ASSERT(i_dls_link_count > 0); 908 atomic_dec_32(&i_dls_link_count); 909 } 910 } 911 912 int 913 dls_link_rele_by_name(const char *name) 914 { 915 dls_link_t *dlp; 916 917 if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)name, 918 (mod_hash_val_t *)&dlp) != 0) 919 return (ENOENT); 920 921 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 922 923 /* 924 * Must fail detach if mac client is busy. 925 */ 926 ASSERT(dlp->dl_ref > 0 && dlp->dl_mch != NULL); 927 if (mac_link_has_flows(dlp->dl_mch)) 928 return (ENOTEMPTY); 929 930 dls_link_rele(dlp); 931 return (0); 932 } 933 934 int 935 dls_link_setzid(const char *name, zoneid_t zid) 936 { 937 dls_link_t *dlp; 938 int err = 0; 939 zoneid_t old_zid; 940 941 if ((err = dls_link_hold_create(name, &dlp)) != 0) 942 return (err); 943 944 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 945 946 if ((old_zid = dlp->dl_zid) == zid) 947 goto done; 948 949 /* 950 * Check whether this dlp is used by its own zone. If yes, we cannot 951 * change its zoneid. 952 */ 953 if (dlp->dl_zone_ref != 0) { 954 err = EBUSY; 955 goto done; 956 } 957 958 dlp->dl_zid = zid; 959 960 if (zid == GLOBAL_ZONEID) { 961 /* 962 * The link is moving from a non-global zone to the global 963 * zone, so we need to release the reference that was held 964 * when the link was originally assigned to the non-global 965 * zone. 966 */ 967 dls_link_rele(dlp); 968 } 969 970 done: 971 /* 972 * We only keep the reference to this link open if the link has 973 * successfully moved from the global zone to a non-global zone. 974 */ 975 if (err != 0 || old_zid != GLOBAL_ZONEID) 976 dls_link_rele(dlp); 977 return (err); 978 } 979 980 int 981 dls_link_getzid(const char *name, zoneid_t *zidp) 982 { 983 dls_link_t *dlp; 984 int err = 0; 985 986 if ((err = dls_link_hold(name, &dlp)) != 0) 987 return (err); 988 989 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 990 991 *zidp = dlp->dl_zid; 992 993 dls_link_rele(dlp); 994 return (0); 995 } 996 997 void 998 dls_link_add(dls_link_t *dlp, uint32_t sap, dld_str_t *dsp) 999 { 1000 mod_hash_t *hash = dlp->dl_str_hash; 1001 mod_hash_key_t key; 1002 dls_head_t *dhp; 1003 dld_str_t *p; 1004 int err; 1005 1006 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 1007 1008 /* 1009 * Generate a hash key based on the sap. 1010 */ 1011 key = MAKE_KEY(sap); 1012 1013 /* 1014 * Search the table for a list head with this key. 1015 */ 1016 if ((err = mod_hash_find(hash, key, (mod_hash_val_t *)&dhp)) != 0) { 1017 ASSERT(err == MH_ERR_NOTFOUND); 1018 1019 dhp = i_dls_head_alloc(key); 1020 err = mod_hash_insert(hash, key, (mod_hash_val_t)dhp); 1021 ASSERT(err == 0); 1022 } 1023 1024 /* 1025 * Add the dld_str_t to the head of the list. List walkers in 1026 * i_dls_link_rx_* bump up dh_ref to ensure the list does not change 1027 * while they walk the list. The membar below ensures that list walkers 1028 * see exactly the old list or the new list. 1029 */ 1030 ASSERT(dsp->ds_next == NULL); 1031 p = dhp->dh_list; 1032 dsp->ds_next = p; 1033 1034 membar_producer(); 1035 1036 dhp->dh_list = dsp; 1037 1038 /* 1039 * Save a pointer to the list head. 1040 */ 1041 dsp->ds_head = dhp; 1042 dlp->dl_impl_count++; 1043 } 1044 1045 void 1046 dls_link_remove(dls_link_t *dlp, dld_str_t *dsp) 1047 { 1048 mod_hash_t *hash = dlp->dl_str_hash; 1049 dld_str_t **pp; 1050 dld_str_t *p; 1051 dls_head_t *dhp; 1052 1053 ASSERT(MAC_PERIM_HELD(dlp->dl_mh)); 1054 1055 /* 1056 * We set dh_removing here to tell the receive callbacks not to pass 1057 * up packets anymore. Then wait till the current callbacks are done. 1058 * This happens either in the close path or in processing the 1059 * DL_UNBIND_REQ via a taskq thread, and it is ok to cv_wait in either. 1060 * The dh_ref ensures there aren't and there won't be any upcalls 1061 * walking or using the dh_list. The mod hash internal lock ensures 1062 * that the insert/remove of the dls_head_t itself synchronizes with 1063 * any i_dls_link_rx trying to locate it. The perimeter ensures that 1064 * there isn't another simultaneous dls_link_add/remove. 1065 */ 1066 dhp = dsp->ds_head; 1067 1068 mutex_enter(&dhp->dh_lock); 1069 dhp->dh_removing = B_TRUE; 1070 while (dhp->dh_ref != 0) 1071 cv_wait(&dhp->dh_cv, &dhp->dh_lock); 1072 mutex_exit(&dhp->dh_lock); 1073 1074 /* 1075 * Walk the list and remove the dld_str_t. 1076 */ 1077 for (pp = &dhp->dh_list; (p = *pp) != NULL; pp = &(p->ds_next)) { 1078 if (p == dsp) 1079 break; 1080 } 1081 ASSERT(p != NULL); 1082 *pp = p->ds_next; 1083 p->ds_next = NULL; 1084 p->ds_head = NULL; 1085 1086 ASSERT(dlp->dl_impl_count != 0); 1087 dlp->dl_impl_count--; 1088 1089 if (dhp->dh_list == NULL) { 1090 mod_hash_val_t val = NULL; 1091 1092 /* 1093 * The list is empty so remove the hash table entry. 1094 */ 1095 (void) mod_hash_remove(hash, dhp->dh_key, &val); 1096 ASSERT(dhp == (dls_head_t *)val); 1097 i_dls_head_free(dhp); 1098 } else { 1099 mutex_enter(&dhp->dh_lock); 1100 dhp->dh_removing = B_FALSE; 1101 mutex_exit(&dhp->dh_lock); 1102 } 1103 } 1104