1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2018 Joyent, Inc. 25 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/conf.h> 30 #include <sys/id_space.h> 31 #include <sys/esunddi.h> 32 #include <sys/stat.h> 33 #include <sys/mkdev.h> 34 #include <sys/stream.h> 35 #include <sys/strsubr.h> 36 #include <sys/dlpi.h> 37 #include <sys/modhash.h> 38 #include <sys/mac.h> 39 #include <sys/mac_provider.h> 40 #include <sys/mac_impl.h> 41 #include <sys/mac_client_impl.h> 42 #include <sys/mac_client_priv.h> 43 #include <sys/mac_soft_ring.h> 44 #include <sys/mac_stat.h> 45 #include <sys/dld.h> 46 #include <sys/modctl.h> 47 #include <sys/fs/dv_node.h> 48 #include <sys/thread.h> 49 #include <sys/proc.h> 50 #include <sys/callb.h> 51 #include <sys/cpuvar.h> 52 #include <sys/atomic.h> 53 #include <sys/sdt.h> 54 #include <sys/mac_flow.h> 55 #include <sys/ddi_intr_impl.h> 56 #include <sys/disp.h> 57 #include <sys/sdt.h> 58 #include <sys/pattr.h> 59 #include <sys/strsun.h> 60 #include <sys/vlan.h> 61 #include <inet/ip.h> 62 #include <inet/tcp.h> 63 #include <netinet/udp.h> 64 #include <netinet/sctp.h> 65 66 /* 67 * MAC Provider Interface. 68 * 69 * Interface for GLDv3 compatible NIC drivers. 70 */ 71 72 static void i_mac_notify_thread(void *); 73 74 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *); 75 76 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = { 77 mac_fanout_recompute, /* MAC_NOTE_LINK */ 78 NULL, /* MAC_NOTE_UNICST */ 79 NULL, /* MAC_NOTE_TX */ 80 NULL, /* MAC_NOTE_DEVPROMISC */ 81 NULL, /* MAC_NOTE_FASTPATH_FLUSH */ 82 NULL, /* MAC_NOTE_SDU_SIZE */ 83 NULL, /* MAC_NOTE_MARGIN */ 84 NULL, /* MAC_NOTE_CAPAB_CHG */ 85 NULL /* MAC_NOTE_LOWLINK */ 86 }; 87 88 /* 89 * Driver support functions. 90 */ 91 92 /* REGISTRATION */ 93 94 mac_register_t * 95 mac_alloc(uint_t mac_version) 96 { 97 mac_register_t *mregp; 98 99 /* 100 * Make sure there isn't a version mismatch between the driver and 101 * the framework. In the future, if multiple versions are 102 * supported, this check could become more sophisticated. 103 */ 104 if (mac_version != MAC_VERSION) 105 return (NULL); 106 107 mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP); 108 mregp->m_version = mac_version; 109 return (mregp); 110 } 111 112 void 113 mac_free(mac_register_t *mregp) 114 { 115 kmem_free(mregp, sizeof (mac_register_t)); 116 } 117 118 /* 119 * mac_register() is how drivers register new MACs with the GLDv3 120 * framework. The mregp argument is allocated by drivers using the 121 * mac_alloc() function, and can be freed using mac_free() immediately upon 122 * return from mac_register(). Upon success (0 return value), the mhp 123 * opaque pointer becomes the driver's handle to its MAC interface, and is 124 * the argument to all other mac module entry points. 125 */ 126 /* ARGSUSED */ 127 int 128 mac_register(mac_register_t *mregp, mac_handle_t *mhp) 129 { 130 mac_impl_t *mip; 131 mactype_t *mtype; 132 int err = EINVAL; 133 struct devnames *dnp = NULL; 134 uint_t instance; 135 boolean_t style1_created = B_FALSE; 136 boolean_t style2_created = B_FALSE; 137 char *driver; 138 minor_t minor = 0; 139 140 /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */ 141 if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip))) 142 return (EINVAL); 143 144 /* Find the required MAC-Type plugin. */ 145 if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL) 146 return (EINVAL); 147 148 /* Create a mac_impl_t to represent this MAC. */ 149 mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP); 150 151 /* 152 * The mac is not ready for open yet. 153 */ 154 mip->mi_state_flags |= MIS_DISABLED; 155 156 /* 157 * When a mac is registered, the m_instance field can be set to: 158 * 159 * 0: Get the mac's instance number from m_dip. 160 * This is usually used for physical device dips. 161 * 162 * [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number. 163 * For example, when an aggregation is created with the key option, 164 * "key" will be used as the instance number. 165 * 166 * -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1]. 167 * This is often used when a MAC of a virtual link is registered 168 * (e.g., aggregation when "key" is not specified, or vnic). 169 * 170 * Note that the instance number is used to derive the mi_minor field 171 * of mac_impl_t, which will then be used to derive the name of kstats 172 * and the devfs nodes. The first 2 cases are needed to preserve 173 * backward compatibility. 174 */ 175 switch (mregp->m_instance) { 176 case 0: 177 instance = ddi_get_instance(mregp->m_dip); 178 break; 179 case ((uint_t)-1): 180 minor = mac_minor_hold(B_TRUE); 181 if (minor == 0) { 182 err = ENOSPC; 183 goto fail; 184 } 185 instance = minor - 1; 186 break; 187 default: 188 instance = mregp->m_instance; 189 if (instance >= MAC_MAX_MINOR) { 190 err = EINVAL; 191 goto fail; 192 } 193 break; 194 } 195 196 mip->mi_minor = (minor_t)(instance + 1); 197 mip->mi_dip = mregp->m_dip; 198 mip->mi_clients_list = NULL; 199 mip->mi_nclients = 0; 200 201 /* Set the default IEEE Port VLAN Identifier */ 202 mip->mi_pvid = 1; 203 204 /* Default bridge link learning protection values */ 205 mip->mi_llimit = 1000; 206 mip->mi_ldecay = 200; 207 208 driver = (char *)ddi_driver_name(mip->mi_dip); 209 210 /* Construct the MAC name as <drvname><instance> */ 211 (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d", 212 driver, instance); 213 214 mip->mi_driver = mregp->m_driver; 215 216 mip->mi_type = mtype; 217 mip->mi_margin = mregp->m_margin; 218 mip->mi_info.mi_media = mtype->mt_type; 219 mip->mi_info.mi_nativemedia = mtype->mt_nativetype; 220 if (mregp->m_max_sdu <= mregp->m_min_sdu) 221 goto fail; 222 if (mregp->m_multicast_sdu == 0) 223 mregp->m_multicast_sdu = mregp->m_max_sdu; 224 if (mregp->m_multicast_sdu < mregp->m_min_sdu || 225 mregp->m_multicast_sdu > mregp->m_max_sdu) 226 goto fail; 227 mip->mi_sdu_min = mregp->m_min_sdu; 228 mip->mi_sdu_max = mregp->m_max_sdu; 229 mip->mi_sdu_multicast = mregp->m_multicast_sdu; 230 mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length; 231 /* 232 * If the media supports a broadcast address, cache a pointer to it 233 * in the mac_info_t so that upper layers can use it. 234 */ 235 mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr; 236 237 mip->mi_v12n_level = mregp->m_v12n; 238 239 /* 240 * Copy the unicast source address into the mac_info_t, but only if 241 * the MAC-Type defines a non-zero address length. We need to 242 * handle MAC-Types that have an address length of 0 243 * (point-to-point protocol MACs for example). 244 */ 245 if (mip->mi_type->mt_addr_length > 0) { 246 if (mregp->m_src_addr == NULL) 247 goto fail; 248 mip->mi_info.mi_unicst_addr = 249 kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP); 250 bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr, 251 mip->mi_type->mt_addr_length); 252 253 /* 254 * Copy the fixed 'factory' MAC address from the immutable 255 * info. This is taken to be the MAC address currently in 256 * use. 257 */ 258 bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr, 259 mip->mi_type->mt_addr_length); 260 261 /* 262 * At this point, we should set up the classification 263 * rules etc but we delay it till mac_open() so that 264 * the resource discovery has taken place and we 265 * know someone wants to use the device. Otherwise 266 * memory gets allocated for Rx ring structures even 267 * during probe. 268 */ 269 270 /* Copy the destination address if one is provided. */ 271 if (mregp->m_dst_addr != NULL) { 272 bcopy(mregp->m_dst_addr, mip->mi_dstaddr, 273 mip->mi_type->mt_addr_length); 274 mip->mi_dstaddr_set = B_TRUE; 275 } 276 } else if (mregp->m_src_addr != NULL) { 277 goto fail; 278 } 279 280 /* 281 * The format of the m_pdata is specific to the plugin. It is 282 * passed in as an argument to all of the plugin callbacks. The 283 * driver can update this information by calling 284 * mac_pdata_update(). 285 */ 286 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) { 287 /* 288 * Verify if the supplied plugin data is valid. Note that 289 * even if the caller passed in a NULL pointer as plugin data, 290 * we still need to verify if that's valid as the plugin may 291 * require plugin data to function. 292 */ 293 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata, 294 mregp->m_pdata_size)) { 295 goto fail; 296 } 297 if (mregp->m_pdata != NULL) { 298 mip->mi_pdata = 299 kmem_alloc(mregp->m_pdata_size, KM_SLEEP); 300 bcopy(mregp->m_pdata, mip->mi_pdata, 301 mregp->m_pdata_size); 302 mip->mi_pdata_size = mregp->m_pdata_size; 303 } 304 } else if (mregp->m_pdata != NULL) { 305 /* 306 * The caller supplied non-NULL plugin data, but the plugin 307 * does not recognize plugin data. 308 */ 309 err = EINVAL; 310 goto fail; 311 } 312 313 /* 314 * Register the private properties. 315 */ 316 mac_register_priv_prop(mip, mregp->m_priv_props); 317 318 /* 319 * Stash the driver callbacks into the mac_impl_t, but first sanity 320 * check to make sure all mandatory callbacks are set. 321 */ 322 if (mregp->m_callbacks->mc_getstat == NULL || 323 mregp->m_callbacks->mc_start == NULL || 324 mregp->m_callbacks->mc_stop == NULL || 325 mregp->m_callbacks->mc_setpromisc == NULL || 326 mregp->m_callbacks->mc_multicst == NULL) { 327 goto fail; 328 } 329 mip->mi_callbacks = mregp->m_callbacks; 330 331 if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY, 332 &mip->mi_capab_legacy)) { 333 mip->mi_state_flags |= MIS_LEGACY; 334 mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev; 335 } else { 336 mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip), 337 mip->mi_minor); 338 } 339 340 /* 341 * Allocate a notification thread. thread_create blocks for memory 342 * if needed, it never fails. 343 */ 344 mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread, 345 mip, 0, &p0, TS_RUN, minclsyspri); 346 347 /* 348 * Initialize the capabilities 349 */ 350 351 bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t)); 352 bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t)); 353 354 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL)) 355 mip->mi_state_flags |= MIS_IS_VNIC; 356 357 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL)) 358 mip->mi_state_flags |= MIS_IS_AGGR; 359 360 mac_addr_factory_init(mip); 361 362 mac_transceiver_init(mip); 363 364 mac_led_init(mip); 365 366 /* 367 * Enforce the virtrualization level registered. 368 */ 369 if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) { 370 if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 || 371 mac_init_rings(mip, MAC_RING_TYPE_TX) != 0) 372 goto fail; 373 374 /* 375 * The driver needs to register at least rx rings for this 376 * virtualization level. 377 */ 378 if (mip->mi_rx_groups == NULL) 379 goto fail; 380 } 381 382 /* 383 * The driver must set mc_unicst entry point to NULL when it advertises 384 * CAP_RINGS for rx groups. 385 */ 386 if (mip->mi_rx_groups != NULL) { 387 if (mregp->m_callbacks->mc_unicst != NULL) 388 goto fail; 389 } else { 390 if (mregp->m_callbacks->mc_unicst == NULL) 391 goto fail; 392 } 393 394 /* 395 * Initialize MAC addresses. Must be called after mac_init_rings(). 396 */ 397 mac_init_macaddr(mip); 398 399 mip->mi_share_capab.ms_snum = 0; 400 if (mip->mi_v12n_level & MAC_VIRT_HIO) { 401 (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES, 402 &mip->mi_share_capab); 403 } 404 405 /* 406 * Initialize the kstats for this device. 407 */ 408 mac_driver_stat_create(mip); 409 410 /* Zero out any properties. */ 411 bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t)); 412 413 if (mip->mi_minor <= MAC_MAX_MINOR) { 414 /* Create a style-2 DLPI device */ 415 if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0, 416 DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS) 417 goto fail; 418 style2_created = B_TRUE; 419 420 /* Create a style-1 DLPI device */ 421 if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR, 422 mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS) 423 goto fail; 424 style1_created = B_TRUE; 425 } 426 427 mac_flow_l2tab_create(mip, &mip->mi_flow_tab); 428 429 rw_enter(&i_mac_impl_lock, RW_WRITER); 430 if (mod_hash_insert(i_mac_impl_hash, 431 (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) { 432 rw_exit(&i_mac_impl_lock); 433 err = EEXIST; 434 goto fail; 435 } 436 437 DTRACE_PROBE2(mac__register, struct devnames *, dnp, 438 (mac_impl_t *), mip); 439 440 /* 441 * Mark the MAC to be ready for open. 442 */ 443 mip->mi_state_flags &= ~MIS_DISABLED; 444 rw_exit(&i_mac_impl_lock); 445 446 atomic_inc_32(&i_mac_impl_count); 447 448 cmn_err(CE_NOTE, "!%s registered", mip->mi_name); 449 *mhp = (mac_handle_t)mip; 450 return (0); 451 452 fail: 453 if (style1_created) 454 ddi_remove_minor_node(mip->mi_dip, mip->mi_name); 455 456 if (style2_created) 457 ddi_remove_minor_node(mip->mi_dip, driver); 458 459 mac_addr_factory_fini(mip); 460 461 /* Clean up registered MAC addresses */ 462 mac_fini_macaddr(mip); 463 464 /* Clean up registered rings */ 465 mac_free_rings(mip, MAC_RING_TYPE_RX); 466 mac_free_rings(mip, MAC_RING_TYPE_TX); 467 468 /* Clean up notification thread */ 469 if (mip->mi_notify_thread != NULL) 470 i_mac_notify_exit(mip); 471 472 if (mip->mi_info.mi_unicst_addr != NULL) { 473 kmem_free(mip->mi_info.mi_unicst_addr, 474 mip->mi_type->mt_addr_length); 475 mip->mi_info.mi_unicst_addr = NULL; 476 } 477 478 mac_driver_stat_delete(mip); 479 480 if (mip->mi_type != NULL) { 481 atomic_dec_32(&mip->mi_type->mt_ref); 482 mip->mi_type = NULL; 483 } 484 485 if (mip->mi_pdata != NULL) { 486 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 487 mip->mi_pdata = NULL; 488 mip->mi_pdata_size = 0; 489 } 490 491 if (minor != 0) { 492 ASSERT(minor > MAC_MAX_MINOR); 493 mac_minor_rele(minor); 494 } 495 496 mip->mi_state_flags = 0; 497 mac_unregister_priv_prop(mip); 498 499 /* 500 * Clear the state before destroying the mac_impl_t 501 */ 502 mip->mi_state_flags = 0; 503 504 kmem_cache_free(i_mac_impl_cachep, mip); 505 return (err); 506 } 507 508 /* 509 * Unregister from the GLDv3 framework 510 */ 511 int 512 mac_unregister(mac_handle_t mh) 513 { 514 int err; 515 mac_impl_t *mip = (mac_impl_t *)mh; 516 mod_hash_val_t val; 517 mac_margin_req_t *mmr, *nextmmr; 518 519 /* Fail the unregister if there are any open references to this mac. */ 520 if ((err = mac_disable_nowait(mh)) != 0) 521 return (err); 522 523 /* 524 * Clean up notification thread and wait for it to exit. 525 */ 526 i_mac_notify_exit(mip); 527 528 /* 529 * Prior to acquiring the MAC perimeter, remove the MAC instance from 530 * the internal hash table. Such removal means table-walkers that 531 * acquire the perimeter will not do so on behalf of what we are 532 * unregistering, which prevents a deadlock. 533 */ 534 rw_enter(&i_mac_impl_lock, RW_WRITER); 535 (void) mod_hash_remove(i_mac_impl_hash, 536 (mod_hash_key_t)mip->mi_name, &val); 537 rw_exit(&i_mac_impl_lock); 538 ASSERT(mip == (mac_impl_t *)val); 539 540 i_mac_perim_enter(mip); 541 542 /* 543 * There is still resource properties configured over this mac. 544 */ 545 if (mip->mi_resource_props.mrp_mask != 0) 546 mac_fastpath_enable((mac_handle_t)mip); 547 548 if (mip->mi_minor < MAC_MAX_MINOR + 1) { 549 ddi_remove_minor_node(mip->mi_dip, mip->mi_name); 550 ddi_remove_minor_node(mip->mi_dip, 551 (char *)ddi_driver_name(mip->mi_dip)); 552 } 553 554 ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags & 555 MIS_EXCLUSIVE)); 556 557 mac_driver_stat_delete(mip); 558 559 ASSERT(i_mac_impl_count > 0); 560 atomic_dec_32(&i_mac_impl_count); 561 562 if (mip->mi_pdata != NULL) 563 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 564 mip->mi_pdata = NULL; 565 mip->mi_pdata_size = 0; 566 567 /* 568 * Free the list of margin request. 569 */ 570 for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) { 571 nextmmr = mmr->mmr_nextp; 572 kmem_free(mmr, sizeof (mac_margin_req_t)); 573 } 574 mip->mi_mmrp = NULL; 575 576 mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN; 577 kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length); 578 mip->mi_info.mi_unicst_addr = NULL; 579 580 atomic_dec_32(&mip->mi_type->mt_ref); 581 mip->mi_type = NULL; 582 583 /* 584 * Free the primary MAC address. 585 */ 586 mac_fini_macaddr(mip); 587 588 /* 589 * free all rings 590 */ 591 mac_free_rings(mip, MAC_RING_TYPE_RX); 592 mac_free_rings(mip, MAC_RING_TYPE_TX); 593 594 mac_addr_factory_fini(mip); 595 596 bzero(mip->mi_addr, MAXMACADDRLEN); 597 bzero(mip->mi_dstaddr, MAXMACADDRLEN); 598 mip->mi_dstaddr_set = B_FALSE; 599 600 /* and the flows */ 601 mac_flow_tab_destroy(mip->mi_flow_tab); 602 mip->mi_flow_tab = NULL; 603 604 if (mip->mi_minor > MAC_MAX_MINOR) 605 mac_minor_rele(mip->mi_minor); 606 607 cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name); 608 609 /* 610 * Reset the perim related fields to default values before 611 * kmem_cache_free 612 */ 613 i_mac_perim_exit(mip); 614 mip->mi_state_flags = 0; 615 616 mac_unregister_priv_prop(mip); 617 618 ASSERT(mip->mi_bridge_link == NULL); 619 kmem_cache_free(i_mac_impl_cachep, mip); 620 621 return (0); 622 } 623 624 /* DATA RECEPTION */ 625 626 /* 627 * This function is invoked for packets received by the MAC driver in 628 * interrupt context. The ring generation number provided by the driver 629 * is matched with the ring generation number held in MAC. If they do not 630 * match, received packets are considered stale packets coming from an older 631 * assignment of the ring. Drop them. 632 */ 633 void 634 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain, 635 uint64_t mr_gen_num) 636 { 637 mac_ring_t *mr = (mac_ring_t *)mrh; 638 639 if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) { 640 DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t, 641 mr->mr_gen_num, uint64_t, mr_gen_num); 642 freemsgchain(mp_chain); 643 return; 644 } 645 mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain); 646 } 647 648 /* 649 * This function is invoked for each packet received by the underlying driver. 650 */ 651 void 652 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 653 { 654 mac_impl_t *mip = (mac_impl_t *)mh; 655 656 /* 657 * Check if the link is part of a bridge. If not, then we don't need 658 * to take the lock to remain consistent. Make this common case 659 * lock-free and tail-call optimized. 660 */ 661 if (mip->mi_bridge_link == NULL) { 662 mac_rx_common(mh, mrh, mp_chain); 663 } else { 664 /* 665 * Once we take a reference on the bridge link, the bridge 666 * module itself can't unload, so the callback pointers are 667 * stable. 668 */ 669 mutex_enter(&mip->mi_bridge_lock); 670 if ((mh = mip->mi_bridge_link) != NULL) 671 mac_bridge_ref_cb(mh, B_TRUE); 672 mutex_exit(&mip->mi_bridge_lock); 673 if (mh == NULL) { 674 mac_rx_common((mac_handle_t)mip, mrh, mp_chain); 675 } else { 676 mac_bridge_rx_cb(mh, mrh, mp_chain); 677 mac_bridge_ref_cb(mh, B_FALSE); 678 } 679 } 680 } 681 682 /* 683 * Special case function: this allows snooping of packets transmitted and 684 * received by TRILL. By design, they go directly into the TRILL module. 685 */ 686 void 687 mac_trill_snoop(mac_handle_t mh, mblk_t *mp) 688 { 689 mac_impl_t *mip = (mac_impl_t *)mh; 690 691 if (mip->mi_promisc_list != NULL) 692 mac_promisc_dispatch(mip, mp, NULL); 693 } 694 695 /* 696 * This is the upward reentry point for packets arriving from the bridging 697 * module and from mac_rx for links not part of a bridge. 698 */ 699 void 700 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 701 { 702 mac_impl_t *mip = (mac_impl_t *)mh; 703 mac_ring_t *mr = (mac_ring_t *)mrh; 704 mac_soft_ring_set_t *mac_srs; 705 mblk_t *bp = mp_chain; 706 707 /* 708 * If there are any promiscuous mode callbacks defined for 709 * this MAC, pass them a copy if appropriate. 710 */ 711 if (mip->mi_promisc_list != NULL) 712 mac_promisc_dispatch(mip, mp_chain, NULL); 713 714 if (mr != NULL) { 715 /* 716 * If the SRS teardown has started, just return. The 'mr' 717 * continues to be valid until the driver unregisters the MAC. 718 * Hardware classified packets will not make their way up 719 * beyond this point once the teardown has started. The driver 720 * is never passed a pointer to a flow entry or SRS or any 721 * structure that can be freed much before mac_unregister. 722 */ 723 mutex_enter(&mr->mr_lock); 724 if ((mr->mr_state != MR_INUSE) || (mr->mr_flag & 725 (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) { 726 mutex_exit(&mr->mr_lock); 727 freemsgchain(mp_chain); 728 return; 729 } 730 731 /* 732 * The ring is in passthru mode; pass the chain up to 733 * the pseudo ring. 734 */ 735 if (mr->mr_classify_type == MAC_PASSTHRU_CLASSIFIER) { 736 MR_REFHOLD_LOCKED(mr); 737 mutex_exit(&mr->mr_lock); 738 mr->mr_pt_fn(mr->mr_pt_arg1, mr->mr_pt_arg2, mp_chain, 739 B_FALSE); 740 MR_REFRELE(mr); 741 return; 742 } 743 744 /* 745 * The passthru callback should only be set when in 746 * MAC_PASSTHRU_CLASSIFIER mode. 747 */ 748 ASSERT3P(mr->mr_pt_fn, ==, NULL); 749 750 /* 751 * We check if an SRS is controlling this ring. 752 * If so, we can directly call the srs_lower_proc 753 * routine otherwise we need to go through mac_rx_classify 754 * to reach the right place. 755 */ 756 if (mr->mr_classify_type == MAC_HW_CLASSIFIER) { 757 MR_REFHOLD_LOCKED(mr); 758 mutex_exit(&mr->mr_lock); 759 ASSERT3P(mr->mr_srs, !=, NULL); 760 mac_srs = mr->mr_srs; 761 762 /* 763 * This is the fast path. All packets received 764 * on this ring are hardware classified and 765 * share the same MAC header info. 766 */ 767 mac_srs->srs_rx.sr_lower_proc(mh, 768 (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE); 769 MR_REFRELE(mr); 770 return; 771 } 772 773 mutex_exit(&mr->mr_lock); 774 /* We'll fall through to software classification */ 775 } else { 776 flow_entry_t *flent; 777 int err; 778 779 rw_enter(&mip->mi_rw_lock, RW_READER); 780 if (mip->mi_single_active_client != NULL) { 781 flent = mip->mi_single_active_client->mci_flent_list; 782 FLOW_TRY_REFHOLD(flent, err); 783 rw_exit(&mip->mi_rw_lock); 784 if (err == 0) { 785 (flent->fe_cb_fn)(flent->fe_cb_arg1, 786 flent->fe_cb_arg2, mp_chain, B_FALSE); 787 FLOW_REFRELE(flent); 788 return; 789 } 790 } else { 791 rw_exit(&mip->mi_rw_lock); 792 } 793 } 794 795 if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) { 796 if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL) 797 return; 798 } 799 800 freemsgchain(bp); 801 } 802 803 /* DATA TRANSMISSION */ 804 805 /* 806 * A driver's notification to resume transmission, in case of a provider 807 * without TX rings. 808 */ 809 void 810 mac_tx_update(mac_handle_t mh) 811 { 812 mac_tx_ring_update(mh, NULL); 813 } 814 815 /* 816 * A driver's notification to resume transmission on the specified TX ring. 817 */ 818 void 819 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh) 820 { 821 i_mac_tx_srs_notify((mac_impl_t *)mh, rh); 822 } 823 824 /* LINK STATE */ 825 /* 826 * Notify the MAC layer about a link state change 827 */ 828 void 829 mac_link_update(mac_handle_t mh, link_state_t link) 830 { 831 mac_impl_t *mip = (mac_impl_t *)mh; 832 833 /* 834 * Save the link state. 835 */ 836 mip->mi_lowlinkstate = link; 837 838 /* 839 * Send a MAC_NOTE_LOWLINK notification. This tells the notification 840 * thread to deliver both lower and upper notifications. 841 */ 842 i_mac_notify(mip, MAC_NOTE_LOWLINK); 843 } 844 845 /* 846 * Notify the MAC layer about a link state change due to bridging. 847 */ 848 void 849 mac_link_redo(mac_handle_t mh, link_state_t link) 850 { 851 mac_impl_t *mip = (mac_impl_t *)mh; 852 853 /* 854 * Save the link state. 855 */ 856 mip->mi_linkstate = link; 857 858 /* 859 * Send a MAC_NOTE_LINK notification. Only upper notifications are 860 * made. 861 */ 862 i_mac_notify(mip, MAC_NOTE_LINK); 863 } 864 865 /* MINOR NODE HANDLING */ 866 867 /* 868 * Given a dev_t, return the instance number (PPA) associated with it. 869 * Drivers can use this in their getinfo(9e) implementation to lookup 870 * the instance number (i.e. PPA) of the device, to use as an index to 871 * their own array of soft state structures. 872 * 873 * Returns -1 on error. 874 */ 875 int 876 mac_devt_to_instance(dev_t devt) 877 { 878 return (dld_devt_to_instance(devt)); 879 } 880 881 /* 882 * This function returns the first minor number that is available for 883 * driver private use. All minor numbers smaller than this are 884 * reserved for GLDv3 use. 885 */ 886 minor_t 887 mac_private_minor(void) 888 { 889 return (MAC_PRIVATE_MINOR); 890 } 891 892 /* OTHER CONTROL INFORMATION */ 893 894 /* 895 * A driver notified us that its primary MAC address has changed. 896 */ 897 void 898 mac_unicst_update(mac_handle_t mh, const uint8_t *addr) 899 { 900 mac_impl_t *mip = (mac_impl_t *)mh; 901 902 if (mip->mi_type->mt_addr_length == 0) 903 return; 904 905 i_mac_perim_enter(mip); 906 907 /* 908 * If address changes, freshen the MAC address value and update 909 * all MAC clients that share this MAC address. 910 */ 911 if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) { 912 mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr), 913 (uint8_t *)addr); 914 } 915 916 i_mac_perim_exit(mip); 917 918 /* 919 * Send a MAC_NOTE_UNICST notification. 920 */ 921 i_mac_notify(mip, MAC_NOTE_UNICST); 922 } 923 924 void 925 mac_dst_update(mac_handle_t mh, const uint8_t *addr) 926 { 927 mac_impl_t *mip = (mac_impl_t *)mh; 928 929 if (mip->mi_type->mt_addr_length == 0) 930 return; 931 932 i_mac_perim_enter(mip); 933 bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length); 934 i_mac_perim_exit(mip); 935 i_mac_notify(mip, MAC_NOTE_DEST); 936 } 937 938 /* 939 * MAC plugin information changed. 940 */ 941 int 942 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize) 943 { 944 mac_impl_t *mip = (mac_impl_t *)mh; 945 946 /* 947 * Verify that the plugin supports MAC plugin data and that the 948 * supplied data is valid. 949 */ 950 if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY)) 951 return (EINVAL); 952 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize)) 953 return (EINVAL); 954 955 if (mip->mi_pdata != NULL) 956 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 957 958 mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP); 959 bcopy(mac_pdata, mip->mi_pdata, dsize); 960 mip->mi_pdata_size = dsize; 961 962 /* 963 * Since the MAC plugin data is used to construct MAC headers that 964 * were cached in fast-path headers, we need to flush fast-path 965 * information for links associated with this mac. 966 */ 967 i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH); 968 return (0); 969 } 970 971 /* 972 * Invoked by driver as well as the framework to notify its capability change. 973 */ 974 void 975 mac_capab_update(mac_handle_t mh) 976 { 977 /* Send MAC_NOTE_CAPAB_CHG notification */ 978 i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG); 979 } 980 981 /* 982 * Used by normal drivers to update the max sdu size. 983 * We need to handle the case of a smaller mi_sdu_multicast 984 * since this is called by mac_set_mtu() even for drivers that 985 * have differing unicast and multicast mtu and we don't want to 986 * increase the multicast mtu by accident in that case. 987 */ 988 int 989 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max) 990 { 991 mac_impl_t *mip = (mac_impl_t *)mh; 992 993 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min) 994 return (EINVAL); 995 mip->mi_sdu_max = sdu_max; 996 if (mip->mi_sdu_multicast > mip->mi_sdu_max) 997 mip->mi_sdu_multicast = mip->mi_sdu_max; 998 999 /* Send a MAC_NOTE_SDU_SIZE notification. */ 1000 i_mac_notify(mip, MAC_NOTE_SDU_SIZE); 1001 return (0); 1002 } 1003 1004 /* 1005 * Version of the above function that is used by drivers that have a different 1006 * max sdu size for multicast/broadcast vs. unicast. 1007 */ 1008 int 1009 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast) 1010 { 1011 mac_impl_t *mip = (mac_impl_t *)mh; 1012 1013 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min) 1014 return (EINVAL); 1015 if (sdu_multicast == 0) 1016 sdu_multicast = sdu_max; 1017 if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min) 1018 return (EINVAL); 1019 mip->mi_sdu_max = sdu_max; 1020 mip->mi_sdu_multicast = sdu_multicast; 1021 1022 /* Send a MAC_NOTE_SDU_SIZE notification. */ 1023 i_mac_notify(mip, MAC_NOTE_SDU_SIZE); 1024 return (0); 1025 } 1026 1027 static void 1028 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring) 1029 { 1030 mac_client_impl_t *mcip; 1031 flow_entry_t *flent; 1032 mac_soft_ring_set_t *mac_rx_srs; 1033 mac_cpus_t *srs_cpu; 1034 int i; 1035 1036 if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) && 1037 (!ring->mr_info.mri_intr.mi_ddi_shared)) { 1038 /* interrupt can be re-targeted */ 1039 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 1040 flent = mcip->mci_flent; 1041 if (ring->mr_type == MAC_RING_TYPE_RX) { 1042 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 1043 mac_rx_srs = flent->fe_rx_srs[i]; 1044 if (mac_rx_srs->srs_ring != ring) 1045 continue; 1046 srs_cpu = &mac_rx_srs->srs_cpu; 1047 mutex_enter(&cpu_lock); 1048 mac_rx_srs_retarget_intr(mac_rx_srs, 1049 srs_cpu->mc_rx_intr_cpu); 1050 mutex_exit(&cpu_lock); 1051 break; 1052 } 1053 } else { 1054 if (flent->fe_tx_srs != NULL) { 1055 mutex_enter(&cpu_lock); 1056 mac_tx_srs_retarget_intr( 1057 flent->fe_tx_srs); 1058 mutex_exit(&cpu_lock); 1059 } 1060 } 1061 } 1062 } 1063 1064 /* 1065 * Clients like aggr create pseudo rings (mac_ring_t) and expose them to 1066 * their clients. There is a 1-1 mapping pseudo ring and the hardware 1067 * ring. ddi interrupt handles are exported from the hardware ring to 1068 * the pseudo ring. Thus when the interrupt handle changes, clients of 1069 * aggr that are using the handle need to use the new handle and 1070 * re-target their interrupts. 1071 */ 1072 static void 1073 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring, 1074 ddi_intr_handle_t ddh) 1075 { 1076 mac_ring_t *pring; 1077 mac_group_t *pgroup; 1078 mac_impl_t *pmip; 1079 char macname[MAXNAMELEN]; 1080 mac_perim_handle_t p_mph; 1081 uint64_t saved_gen_num; 1082 1083 again: 1084 pring = (mac_ring_t *)ring->mr_prh; 1085 pgroup = (mac_group_t *)pring->mr_gh; 1086 pmip = (mac_impl_t *)pgroup->mrg_mh; 1087 saved_gen_num = ring->mr_gen_num; 1088 (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN); 1089 /* 1090 * We need to enter aggr's perimeter. The locking hierarchy 1091 * dictates that aggr's perimeter should be entered first 1092 * and then the port's perimeter. So drop the port's 1093 * perimeter, enter aggr's and then re-enter port's 1094 * perimeter. 1095 */ 1096 i_mac_perim_exit(mip); 1097 /* 1098 * While we know pmip is the aggr's mip, there is a 1099 * possibility that aggr could have unregistered by 1100 * the time we exit port's perimeter (mip) and 1101 * enter aggr's perimeter (pmip). To avoid that 1102 * scenario, enter aggr's perimeter using its name. 1103 */ 1104 if (mac_perim_enter_by_macname(macname, &p_mph) != 0) 1105 return; 1106 i_mac_perim_enter(mip); 1107 /* 1108 * Check if the ring got assigned to another aggregation before 1109 * be could enter aggr's and the port's perimeter. When a ring 1110 * gets deleted from an aggregation, it calls mac_stop_ring() 1111 * which increments the generation number. So checking 1112 * generation number will be enough. 1113 */ 1114 if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) { 1115 i_mac_perim_exit(mip); 1116 mac_perim_exit(p_mph); 1117 i_mac_perim_enter(mip); 1118 goto again; 1119 } 1120 1121 /* Check if pseudo ring is still present */ 1122 if (ring->mr_prh != NULL) { 1123 pring->mr_info.mri_intr.mi_ddi_handle = ddh; 1124 pring->mr_info.mri_intr.mi_ddi_shared = 1125 ring->mr_info.mri_intr.mi_ddi_shared; 1126 if (ddh != NULL) 1127 mac_ring_intr_retarget(pgroup, pring); 1128 } 1129 i_mac_perim_exit(mip); 1130 mac_perim_exit(p_mph); 1131 } 1132 /* 1133 * API called by driver to provide new interrupt handle for TX/RX rings. 1134 * This usually happens when IRM (Interrupt Resource Manangement) 1135 * framework either gives the driver more MSI-x interrupts or takes 1136 * away MSI-x interrupts from the driver. 1137 */ 1138 void 1139 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh) 1140 { 1141 mac_ring_t *ring = (mac_ring_t *)mrh; 1142 mac_group_t *group = (mac_group_t *)ring->mr_gh; 1143 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1144 1145 i_mac_perim_enter(mip); 1146 ring->mr_info.mri_intr.mi_ddi_handle = ddh; 1147 if (ddh == NULL) { 1148 /* Interrupts being reset */ 1149 ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE; 1150 if (ring->mr_prh != NULL) { 1151 mac_pseudo_ring_intr_retarget(mip, ring, ddh); 1152 return; 1153 } 1154 } else { 1155 /* New interrupt handle */ 1156 mac_compare_ddi_handle(mip->mi_rx_groups, 1157 mip->mi_rx_group_count, ring); 1158 if (!ring->mr_info.mri_intr.mi_ddi_shared) { 1159 mac_compare_ddi_handle(mip->mi_tx_groups, 1160 mip->mi_tx_group_count, ring); 1161 } 1162 if (ring->mr_prh != NULL) { 1163 mac_pseudo_ring_intr_retarget(mip, ring, ddh); 1164 return; 1165 } else { 1166 mac_ring_intr_retarget(group, ring); 1167 } 1168 } 1169 i_mac_perim_exit(mip); 1170 } 1171 1172 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */ 1173 1174 /* 1175 * Updates the mac_impl structure with the current state of the link 1176 */ 1177 static void 1178 i_mac_log_link_state(mac_impl_t *mip) 1179 { 1180 /* 1181 * If no change, then it is not interesting. 1182 */ 1183 if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate) 1184 return; 1185 1186 switch (mip->mi_lowlinkstate) { 1187 case LINK_STATE_UP: 1188 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) { 1189 char det[200]; 1190 1191 mip->mi_type->mt_ops.mtops_link_details(det, 1192 sizeof (det), (mac_handle_t)mip, mip->mi_pdata); 1193 1194 cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det); 1195 } else { 1196 cmn_err(CE_NOTE, "!%s link up", mip->mi_name); 1197 } 1198 break; 1199 1200 case LINK_STATE_DOWN: 1201 /* 1202 * Only transitions from UP to DOWN are interesting 1203 */ 1204 if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN) 1205 cmn_err(CE_NOTE, "!%s link down", mip->mi_name); 1206 break; 1207 1208 case LINK_STATE_UNKNOWN: 1209 /* 1210 * This case is normally not interesting. 1211 */ 1212 break; 1213 } 1214 mip->mi_lastlowlinkstate = mip->mi_lowlinkstate; 1215 } 1216 1217 /* 1218 * Main routine for the callbacks notifications thread 1219 */ 1220 static void 1221 i_mac_notify_thread(void *arg) 1222 { 1223 mac_impl_t *mip = arg; 1224 callb_cpr_t cprinfo; 1225 mac_cb_t *mcb; 1226 mac_cb_info_t *mcbi; 1227 mac_notify_cb_t *mncb; 1228 1229 mcbi = &mip->mi_notify_cb_info; 1230 CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr, 1231 "i_mac_notify_thread"); 1232 1233 mutex_enter(mcbi->mcbi_lockp); 1234 1235 for (;;) { 1236 uint32_t bits; 1237 uint32_t type; 1238 1239 bits = mip->mi_notify_bits; 1240 if (bits == 0) { 1241 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1242 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 1243 CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp); 1244 continue; 1245 } 1246 mip->mi_notify_bits = 0; 1247 if ((bits & (1 << MAC_NNOTE)) != 0) { 1248 /* request to quit */ 1249 ASSERT(mip->mi_state_flags & MIS_DISABLED); 1250 break; 1251 } 1252 1253 mutex_exit(mcbi->mcbi_lockp); 1254 1255 /* 1256 * Log link changes on the actual link, but then do reports on 1257 * synthetic state (if part of a bridge). 1258 */ 1259 if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) { 1260 link_state_t newstate; 1261 mac_handle_t mh; 1262 1263 i_mac_log_link_state(mip); 1264 newstate = mip->mi_lowlinkstate; 1265 if (mip->mi_bridge_link != NULL) { 1266 mutex_enter(&mip->mi_bridge_lock); 1267 if ((mh = mip->mi_bridge_link) != NULL) { 1268 newstate = mac_bridge_ls_cb(mh, 1269 newstate); 1270 } 1271 mutex_exit(&mip->mi_bridge_lock); 1272 } 1273 if (newstate != mip->mi_linkstate) { 1274 mip->mi_linkstate = newstate; 1275 bits |= 1 << MAC_NOTE_LINK; 1276 } 1277 } 1278 1279 /* 1280 * Do notification callbacks for each notification type. 1281 */ 1282 for (type = 0; type < MAC_NNOTE; type++) { 1283 if ((bits & (1 << type)) == 0) { 1284 continue; 1285 } 1286 1287 if (mac_notify_cb_list[type] != NULL) 1288 (*mac_notify_cb_list[type])(mip); 1289 1290 /* 1291 * Walk the list of notifications. 1292 */ 1293 MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info); 1294 for (mcb = mip->mi_notify_cb_list; mcb != NULL; 1295 mcb = mcb->mcb_nextp) { 1296 mncb = (mac_notify_cb_t *)mcb->mcb_objp; 1297 mncb->mncb_fn(mncb->mncb_arg, type); 1298 } 1299 MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info, 1300 &mip->mi_notify_cb_list); 1301 } 1302 1303 mutex_enter(mcbi->mcbi_lockp); 1304 } 1305 1306 mip->mi_state_flags |= MIS_NOTIFY_DONE; 1307 cv_broadcast(&mcbi->mcbi_cv); 1308 1309 /* CALLB_CPR_EXIT drops the lock */ 1310 CALLB_CPR_EXIT(&cprinfo); 1311 thread_exit(); 1312 } 1313 1314 /* 1315 * Signal the i_mac_notify_thread asking it to quit. 1316 * Then wait till it is done. 1317 */ 1318 void 1319 i_mac_notify_exit(mac_impl_t *mip) 1320 { 1321 mac_cb_info_t *mcbi; 1322 1323 mcbi = &mip->mi_notify_cb_info; 1324 1325 mutex_enter(mcbi->mcbi_lockp); 1326 mip->mi_notify_bits = (1 << MAC_NNOTE); 1327 cv_broadcast(&mcbi->mcbi_cv); 1328 1329 1330 while ((mip->mi_notify_thread != NULL) && 1331 !(mip->mi_state_flags & MIS_NOTIFY_DONE)) { 1332 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 1333 } 1334 1335 /* Necessary clean up before doing kmem_cache_free */ 1336 mip->mi_state_flags &= ~MIS_NOTIFY_DONE; 1337 mip->mi_notify_bits = 0; 1338 mip->mi_notify_thread = NULL; 1339 mutex_exit(mcbi->mcbi_lockp); 1340 } 1341 1342 /* 1343 * Entry point invoked by drivers to dynamically add a ring to an 1344 * existing group. 1345 */ 1346 int 1347 mac_group_add_ring(mac_group_handle_t gh, int index) 1348 { 1349 mac_group_t *group = (mac_group_t *)gh; 1350 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1351 int ret; 1352 1353 i_mac_perim_enter(mip); 1354 ret = i_mac_group_add_ring(group, NULL, index); 1355 i_mac_perim_exit(mip); 1356 return (ret); 1357 } 1358 1359 /* 1360 * Entry point invoked by drivers to dynamically remove a ring 1361 * from an existing group. The specified ring handle must no longer 1362 * be used by the driver after a call to this function. 1363 */ 1364 void 1365 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh) 1366 { 1367 mac_group_t *group = (mac_group_t *)gh; 1368 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1369 1370 i_mac_perim_enter(mip); 1371 i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); 1372 i_mac_perim_exit(mip); 1373 } 1374 1375 /* 1376 * mac_prop_info_*() callbacks called from the driver's prefix_propinfo() 1377 * entry points. 1378 */ 1379 1380 void 1381 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val) 1382 { 1383 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1384 1385 /* nothing to do if the caller doesn't want the default value */ 1386 if (pr->pr_default == NULL) 1387 return; 1388 1389 ASSERT(pr->pr_default_size >= sizeof (uint8_t)); 1390 1391 *(uint8_t *)(pr->pr_default) = val; 1392 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1393 } 1394 1395 void 1396 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val) 1397 { 1398 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1399 1400 /* nothing to do if the caller doesn't want the default value */ 1401 if (pr->pr_default == NULL) 1402 return; 1403 1404 ASSERT(pr->pr_default_size >= sizeof (uint64_t)); 1405 1406 bcopy(&val, pr->pr_default, sizeof (val)); 1407 1408 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1409 } 1410 1411 void 1412 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val) 1413 { 1414 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1415 1416 /* nothing to do if the caller doesn't want the default value */ 1417 if (pr->pr_default == NULL) 1418 return; 1419 1420 ASSERT(pr->pr_default_size >= sizeof (uint32_t)); 1421 1422 bcopy(&val, pr->pr_default, sizeof (val)); 1423 1424 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1425 } 1426 1427 void 1428 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str) 1429 { 1430 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1431 1432 /* nothing to do if the caller doesn't want the default value */ 1433 if (pr->pr_default == NULL) 1434 return; 1435 1436 if (strlen(str) >= pr->pr_default_size) 1437 pr->pr_errno = ENOBUFS; 1438 else 1439 (void) strlcpy(pr->pr_default, str, pr->pr_default_size); 1440 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1441 } 1442 1443 void 1444 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph, 1445 link_flowctrl_t val) 1446 { 1447 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1448 1449 /* nothing to do if the caller doesn't want the default value */ 1450 if (pr->pr_default == NULL) 1451 return; 1452 1453 ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t)); 1454 1455 bcopy(&val, pr->pr_default, sizeof (val)); 1456 1457 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1458 } 1459 1460 void 1461 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min, 1462 uint32_t max) 1463 { 1464 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1465 mac_propval_range_t *range = pr->pr_range; 1466 mac_propval_uint32_range_t *range32; 1467 1468 /* nothing to do if the caller doesn't want the range info */ 1469 if (range == NULL) 1470 return; 1471 1472 if (pr->pr_range_cur_count++ == 0) { 1473 /* first range */ 1474 pr->pr_flags |= MAC_PROP_INFO_RANGE; 1475 range->mpr_type = MAC_PROPVAL_UINT32; 1476 } else { 1477 /* all ranges of a property should be of the same type */ 1478 ASSERT(range->mpr_type == MAC_PROPVAL_UINT32); 1479 if (pr->pr_range_cur_count > range->mpr_count) { 1480 pr->pr_errno = ENOSPC; 1481 return; 1482 } 1483 } 1484 1485 range32 = range->mpr_range_uint32; 1486 range32[pr->pr_range_cur_count - 1].mpur_min = min; 1487 range32[pr->pr_range_cur_count - 1].mpur_max = max; 1488 } 1489 1490 void 1491 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm) 1492 { 1493 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1494 1495 pr->pr_perm = perm; 1496 pr->pr_flags |= MAC_PROP_INFO_PERM; 1497 } 1498 1499 void mac_hcksum_get(mblk_t *mp, uint32_t *start, uint32_t *stuff, 1500 uint32_t *end, uint32_t *value, uint32_t *flags_ptr) 1501 { 1502 uint32_t flags; 1503 1504 ASSERT(DB_TYPE(mp) == M_DATA); 1505 1506 flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS; 1507 if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) { 1508 if (value != NULL) 1509 *value = (uint32_t)DB_CKSUM16(mp); 1510 if ((flags & HCK_PARTIALCKSUM) != 0) { 1511 if (start != NULL) 1512 *start = (uint32_t)DB_CKSUMSTART(mp); 1513 if (stuff != NULL) 1514 *stuff = (uint32_t)DB_CKSUMSTUFF(mp); 1515 if (end != NULL) 1516 *end = (uint32_t)DB_CKSUMEND(mp); 1517 } 1518 } 1519 1520 if (flags_ptr != NULL) 1521 *flags_ptr = flags; 1522 } 1523 1524 void mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, 1525 uint32_t end, uint32_t value, uint32_t flags) 1526 { 1527 ASSERT(DB_TYPE(mp) == M_DATA); 1528 1529 DB_CKSUMSTART(mp) = (intptr_t)start; 1530 DB_CKSUMSTUFF(mp) = (intptr_t)stuff; 1531 DB_CKSUMEND(mp) = (intptr_t)end; 1532 DB_CKSUMFLAGS(mp) = (uint16_t)flags; 1533 DB_CKSUM16(mp) = (uint16_t)value; 1534 } 1535 1536 void 1537 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 1538 { 1539 ASSERT(DB_TYPE(mp) == M_DATA); 1540 1541 if (flags != NULL) { 1542 *flags = DB_CKSUMFLAGS(mp) & HW_LSO; 1543 if ((*flags != 0) && (mss != NULL)) 1544 *mss = (uint32_t)DB_LSOMSS(mp); 1545 } 1546 } 1547 1548 void 1549 mac_transceiver_info_set_present(mac_transceiver_info_t *infop, 1550 boolean_t present) 1551 { 1552 infop->mti_present = present; 1553 } 1554 1555 void 1556 mac_transceiver_info_set_usable(mac_transceiver_info_t *infop, 1557 boolean_t usable) 1558 { 1559 infop->mti_usable = usable; 1560 } 1561 1562 /* 1563 * We should really keep track of our offset and not walk everything every 1564 * time. I can't imagine that this will be kind to us at high packet rates; 1565 * however, for the moment, let's leave that. 1566 * 1567 * This walks a message block chain without pulling up to fill in the context 1568 * information. Note that the data we care about could be hidden across more 1569 * than one mblk_t. 1570 */ 1571 static int 1572 mac_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out) 1573 { 1574 size_t mpsize; 1575 uint8_t *bp; 1576 1577 mpsize = msgsize(mp); 1578 /* Check for overflow */ 1579 if (off + sizeof (uint16_t) > mpsize) 1580 return (-1); 1581 1582 mpsize = MBLKL(mp); 1583 while (off >= mpsize) { 1584 mp = mp->b_cont; 1585 off -= mpsize; 1586 mpsize = MBLKL(mp); 1587 } 1588 1589 bp = mp->b_rptr + off; 1590 *out = *bp; 1591 return (0); 1592 1593 } 1594 1595 static int 1596 mac_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out) 1597 { 1598 size_t mpsize; 1599 uint8_t *bp; 1600 1601 mpsize = msgsize(mp); 1602 /* Check for overflow */ 1603 if (off + sizeof (uint16_t) > mpsize) 1604 return (-1); 1605 1606 mpsize = MBLKL(mp); 1607 while (off >= mpsize) { 1608 mp = mp->b_cont; 1609 off -= mpsize; 1610 mpsize = MBLKL(mp); 1611 } 1612 1613 /* 1614 * Data is in network order. Note the second byte of data might be in 1615 * the next mp. 1616 */ 1617 bp = mp->b_rptr + off; 1618 *out = *bp << 8; 1619 if (off + 1 == mpsize) { 1620 mp = mp->b_cont; 1621 bp = mp->b_rptr; 1622 } else { 1623 bp++; 1624 } 1625 1626 *out |= *bp; 1627 return (0); 1628 1629 } 1630 1631 1632 int 1633 mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi) 1634 { 1635 size_t off; 1636 uint16_t ether; 1637 uint8_t ipproto, iplen, l4len, maclen; 1638 1639 bzero(meoi, sizeof (mac_ether_offload_info_t)); 1640 1641 meoi->meoi_len = msgsize(mp); 1642 off = offsetof(struct ether_header, ether_type); 1643 if (mac_meoi_get_uint16(mp, off, ðer) != 0) 1644 return (-1); 1645 1646 if (ether == ETHERTYPE_VLAN) { 1647 off = offsetof(struct ether_vlan_header, ether_type); 1648 if (mac_meoi_get_uint16(mp, off, ðer) != 0) 1649 return (-1); 1650 meoi->meoi_flags |= MEOI_VLAN_TAGGED; 1651 maclen = sizeof (struct ether_vlan_header); 1652 } else { 1653 maclen = sizeof (struct ether_header); 1654 } 1655 meoi->meoi_flags |= MEOI_L2INFO_SET; 1656 meoi->meoi_l2hlen = maclen; 1657 meoi->meoi_l3proto = ether; 1658 1659 switch (ether) { 1660 case ETHERTYPE_IP: 1661 /* 1662 * For IPv4 we need to get the length of the header, as it can 1663 * be variable. 1664 */ 1665 off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen; 1666 if (mac_meoi_get_uint8(mp, off, &iplen) != 0) 1667 return (-1); 1668 iplen &= 0x0f; 1669 if (iplen < 5 || iplen > 0x0f) 1670 return (-1); 1671 iplen *= 4; 1672 off = offsetof(ipha_t, ipha_protocol) + maclen; 1673 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1) 1674 return (-1); 1675 break; 1676 case ETHERTYPE_IPV6: 1677 iplen = 40; 1678 off = offsetof(ip6_t, ip6_nxt) + maclen; 1679 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1) 1680 return (-1); 1681 break; 1682 default: 1683 return (0); 1684 } 1685 meoi->meoi_l3hlen = iplen; 1686 meoi->meoi_l4proto = ipproto; 1687 meoi->meoi_flags |= MEOI_L3INFO_SET; 1688 1689 switch (ipproto) { 1690 case IPPROTO_TCP: 1691 off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen; 1692 if (mac_meoi_get_uint8(mp, off, &l4len) == -1) 1693 return (-1); 1694 l4len = (l4len & 0xf0) >> 4; 1695 if (l4len < 5 || l4len > 0xf) 1696 return (-1); 1697 l4len *= 4; 1698 break; 1699 case IPPROTO_UDP: 1700 l4len = sizeof (struct udphdr); 1701 break; 1702 case IPPROTO_SCTP: 1703 l4len = sizeof (sctp_hdr_t); 1704 break; 1705 default: 1706 return (0); 1707 } 1708 1709 meoi->meoi_l4hlen = l4len; 1710 meoi->meoi_flags |= MEOI_L4INFO_SET; 1711 return (0); 1712 } 1713