1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2019 Joyent, Inc. 25 * Copyright 2017 OmniTI Computer Consulting, Inc. All rights reserved. 26 * Copyright 2020 RackTop Systems, Inc. 27 */ 28 29 #include <sys/types.h> 30 #include <sys/conf.h> 31 #include <sys/id_space.h> 32 #include <sys/esunddi.h> 33 #include <sys/stat.h> 34 #include <sys/mkdev.h> 35 #include <sys/stream.h> 36 #include <sys/strsubr.h> 37 #include <sys/dlpi.h> 38 #include <sys/modhash.h> 39 #include <sys/mac.h> 40 #include <sys/mac_provider.h> 41 #include <sys/mac_impl.h> 42 #include <sys/mac_client_impl.h> 43 #include <sys/mac_client_priv.h> 44 #include <sys/mac_soft_ring.h> 45 #include <sys/mac_stat.h> 46 #include <sys/dld.h> 47 #include <sys/modctl.h> 48 #include <sys/fs/dv_node.h> 49 #include <sys/thread.h> 50 #include <sys/proc.h> 51 #include <sys/callb.h> 52 #include <sys/cpuvar.h> 53 #include <sys/atomic.h> 54 #include <sys/sdt.h> 55 #include <sys/mac_flow.h> 56 #include <sys/ddi_intr_impl.h> 57 #include <sys/disp.h> 58 #include <sys/sdt.h> 59 #include <sys/pattr.h> 60 #include <sys/strsun.h> 61 #include <sys/vlan.h> 62 #include <inet/ip.h> 63 #include <inet/tcp.h> 64 #include <netinet/udp.h> 65 #include <netinet/sctp.h> 66 67 /* 68 * MAC Provider Interface. 69 * 70 * Interface for GLDv3 compatible NIC drivers. 71 */ 72 73 static void i_mac_notify_thread(void *); 74 75 typedef void (*mac_notify_default_cb_fn_t)(mac_impl_t *); 76 77 static const mac_notify_default_cb_fn_t mac_notify_cb_list[MAC_NNOTE] = { 78 mac_fanout_recompute, /* MAC_NOTE_LINK */ 79 NULL, /* MAC_NOTE_UNICST */ 80 NULL, /* MAC_NOTE_TX */ 81 NULL, /* MAC_NOTE_DEVPROMISC */ 82 NULL, /* MAC_NOTE_FASTPATH_FLUSH */ 83 NULL, /* MAC_NOTE_SDU_SIZE */ 84 NULL, /* MAC_NOTE_MARGIN */ 85 NULL, /* MAC_NOTE_CAPAB_CHG */ 86 NULL /* MAC_NOTE_LOWLINK */ 87 }; 88 89 /* 90 * Driver support functions. 91 */ 92 93 /* REGISTRATION */ 94 95 mac_register_t * 96 mac_alloc(uint_t mac_version) 97 { 98 mac_register_t *mregp; 99 100 /* 101 * Make sure there isn't a version mismatch between the driver and 102 * the framework. In the future, if multiple versions are 103 * supported, this check could become more sophisticated. 104 */ 105 if (mac_version != MAC_VERSION) 106 return (NULL); 107 108 mregp = kmem_zalloc(sizeof (mac_register_t), KM_SLEEP); 109 mregp->m_version = mac_version; 110 return (mregp); 111 } 112 113 void 114 mac_free(mac_register_t *mregp) 115 { 116 kmem_free(mregp, sizeof (mac_register_t)); 117 } 118 119 /* 120 * Convert a MAC's offload features into the equivalent DB_CKSUMFLAGS 121 * value. 122 */ 123 static uint16_t 124 mac_features_to_flags(mac_handle_t mh) 125 { 126 uint16_t flags = 0; 127 uint32_t cap_sum = 0; 128 mac_capab_lso_t cap_lso; 129 130 if (mac_capab_get(mh, MAC_CAPAB_HCKSUM, &cap_sum)) { 131 if (cap_sum & HCKSUM_IPHDRCKSUM) 132 flags |= HCK_IPV4_HDRCKSUM; 133 134 if (cap_sum & HCKSUM_INET_PARTIAL) 135 flags |= HCK_PARTIALCKSUM; 136 else if (cap_sum & (HCKSUM_INET_FULL_V4 | HCKSUM_INET_FULL_V6)) 137 flags |= HCK_FULLCKSUM; 138 } 139 140 /* 141 * We don't need the information stored in 'cap_lso', but we 142 * need to pass a non-NULL pointer to appease the driver. 143 */ 144 if (mac_capab_get(mh, MAC_CAPAB_LSO, &cap_lso)) 145 flags |= HW_LSO; 146 147 return (flags); 148 } 149 150 /* 151 * mac_register() is how drivers register new MACs with the GLDv3 152 * framework. The mregp argument is allocated by drivers using the 153 * mac_alloc() function, and can be freed using mac_free() immediately upon 154 * return from mac_register(). Upon success (0 return value), the mhp 155 * opaque pointer becomes the driver's handle to its MAC interface, and is 156 * the argument to all other mac module entry points. 157 */ 158 /* ARGSUSED */ 159 int 160 mac_register(mac_register_t *mregp, mac_handle_t *mhp) 161 { 162 mac_impl_t *mip; 163 mactype_t *mtype; 164 int err = EINVAL; 165 struct devnames *dnp = NULL; 166 uint_t instance; 167 boolean_t style1_created = B_FALSE; 168 boolean_t style2_created = B_FALSE; 169 char *driver; 170 minor_t minor = 0; 171 172 /* A successful call to mac_init_ops() sets the DN_GLDV3_DRIVER flag. */ 173 if (!GLDV3_DRV(ddi_driver_major(mregp->m_dip))) 174 return (EINVAL); 175 176 /* Find the required MAC-Type plugin. */ 177 if ((mtype = mactype_getplugin(mregp->m_type_ident)) == NULL) 178 return (EINVAL); 179 180 /* Create a mac_impl_t to represent this MAC. */ 181 mip = kmem_cache_alloc(i_mac_impl_cachep, KM_SLEEP); 182 183 /* 184 * The mac is not ready for open yet. 185 */ 186 mip->mi_state_flags |= MIS_DISABLED; 187 188 /* 189 * When a mac is registered, the m_instance field can be set to: 190 * 191 * 0: Get the mac's instance number from m_dip. 192 * This is usually used for physical device dips. 193 * 194 * [1 .. MAC_MAX_MINOR-1]: Use the value as the mac's instance number. 195 * For example, when an aggregation is created with the key option, 196 * "key" will be used as the instance number. 197 * 198 * -1: Assign an instance number from [MAC_MAX_MINOR .. MAXMIN-1]. 199 * This is often used when a MAC of a virtual link is registered 200 * (e.g., aggregation when "key" is not specified, or vnic). 201 * 202 * Note that the instance number is used to derive the mi_minor field 203 * of mac_impl_t, which will then be used to derive the name of kstats 204 * and the devfs nodes. The first 2 cases are needed to preserve 205 * backward compatibility. 206 */ 207 switch (mregp->m_instance) { 208 case 0: 209 instance = ddi_get_instance(mregp->m_dip); 210 break; 211 case ((uint_t)-1): 212 minor = mac_minor_hold(B_TRUE); 213 if (minor == 0) { 214 err = ENOSPC; 215 goto fail; 216 } 217 instance = minor - 1; 218 break; 219 default: 220 instance = mregp->m_instance; 221 if (instance >= MAC_MAX_MINOR) { 222 err = EINVAL; 223 goto fail; 224 } 225 break; 226 } 227 228 mip->mi_minor = (minor_t)(instance + 1); 229 mip->mi_dip = mregp->m_dip; 230 mip->mi_clients_list = NULL; 231 mip->mi_nclients = 0; 232 233 /* Set the default IEEE Port VLAN Identifier */ 234 mip->mi_pvid = 1; 235 236 /* Default bridge link learning protection values */ 237 mip->mi_llimit = 1000; 238 mip->mi_ldecay = 200; 239 240 driver = (char *)ddi_driver_name(mip->mi_dip); 241 242 /* Construct the MAC name as <drvname><instance> */ 243 (void) snprintf(mip->mi_name, sizeof (mip->mi_name), "%s%d", 244 driver, instance); 245 246 mip->mi_driver = mregp->m_driver; 247 248 mip->mi_type = mtype; 249 mip->mi_margin = mregp->m_margin; 250 mip->mi_info.mi_media = mtype->mt_type; 251 mip->mi_info.mi_nativemedia = mtype->mt_nativetype; 252 if (mregp->m_max_sdu <= mregp->m_min_sdu) 253 goto fail; 254 if (mregp->m_multicast_sdu == 0) 255 mregp->m_multicast_sdu = mregp->m_max_sdu; 256 if (mregp->m_multicast_sdu < mregp->m_min_sdu || 257 mregp->m_multicast_sdu > mregp->m_max_sdu) 258 goto fail; 259 mip->mi_sdu_min = mregp->m_min_sdu; 260 mip->mi_sdu_max = mregp->m_max_sdu; 261 mip->mi_sdu_multicast = mregp->m_multicast_sdu; 262 mip->mi_info.mi_addr_length = mip->mi_type->mt_addr_length; 263 /* 264 * If the media supports a broadcast address, cache a pointer to it 265 * in the mac_info_t so that upper layers can use it. 266 */ 267 mip->mi_info.mi_brdcst_addr = mip->mi_type->mt_brdcst_addr; 268 269 mip->mi_v12n_level = mregp->m_v12n; 270 271 /* 272 * Copy the unicast source address into the mac_info_t, but only if 273 * the MAC-Type defines a non-zero address length. We need to 274 * handle MAC-Types that have an address length of 0 275 * (point-to-point protocol MACs for example). 276 */ 277 if (mip->mi_type->mt_addr_length > 0) { 278 if (mregp->m_src_addr == NULL) 279 goto fail; 280 mip->mi_info.mi_unicst_addr = 281 kmem_alloc(mip->mi_type->mt_addr_length, KM_SLEEP); 282 bcopy(mregp->m_src_addr, mip->mi_info.mi_unicst_addr, 283 mip->mi_type->mt_addr_length); 284 285 /* 286 * Copy the fixed 'factory' MAC address from the immutable 287 * info. This is taken to be the MAC address currently in 288 * use. 289 */ 290 bcopy(mip->mi_info.mi_unicst_addr, mip->mi_addr, 291 mip->mi_type->mt_addr_length); 292 293 /* 294 * At this point, we should set up the classification 295 * rules etc but we delay it till mac_open() so that 296 * the resource discovery has taken place and we 297 * know someone wants to use the device. Otherwise 298 * memory gets allocated for Rx ring structures even 299 * during probe. 300 */ 301 302 /* Copy the destination address if one is provided. */ 303 if (mregp->m_dst_addr != NULL) { 304 bcopy(mregp->m_dst_addr, mip->mi_dstaddr, 305 mip->mi_type->mt_addr_length); 306 mip->mi_dstaddr_set = B_TRUE; 307 } 308 } else if (mregp->m_src_addr != NULL) { 309 goto fail; 310 } 311 312 /* 313 * The format of the m_pdata is specific to the plugin. It is 314 * passed in as an argument to all of the plugin callbacks. The 315 * driver can update this information by calling 316 * mac_pdata_update(). 317 */ 318 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY) { 319 /* 320 * Verify if the supplied plugin data is valid. Note that 321 * even if the caller passed in a NULL pointer as plugin data, 322 * we still need to verify if that's valid as the plugin may 323 * require plugin data to function. 324 */ 325 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mregp->m_pdata, 326 mregp->m_pdata_size)) { 327 goto fail; 328 } 329 if (mregp->m_pdata != NULL) { 330 mip->mi_pdata = 331 kmem_alloc(mregp->m_pdata_size, KM_SLEEP); 332 bcopy(mregp->m_pdata, mip->mi_pdata, 333 mregp->m_pdata_size); 334 mip->mi_pdata_size = mregp->m_pdata_size; 335 } 336 } else if (mregp->m_pdata != NULL) { 337 /* 338 * The caller supplied non-NULL plugin data, but the plugin 339 * does not recognize plugin data. 340 */ 341 err = EINVAL; 342 goto fail; 343 } 344 345 /* 346 * Register the private properties. 347 */ 348 mac_register_priv_prop(mip, mregp->m_priv_props); 349 350 /* 351 * Stash the driver callbacks into the mac_impl_t, but first sanity 352 * check to make sure all mandatory callbacks are set. 353 */ 354 if (mregp->m_callbacks->mc_getstat == NULL || 355 mregp->m_callbacks->mc_start == NULL || 356 mregp->m_callbacks->mc_stop == NULL || 357 mregp->m_callbacks->mc_setpromisc == NULL || 358 mregp->m_callbacks->mc_multicst == NULL) { 359 goto fail; 360 } 361 mip->mi_callbacks = mregp->m_callbacks; 362 363 if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LEGACY, 364 &mip->mi_capab_legacy)) { 365 mip->mi_state_flags |= MIS_LEGACY; 366 mip->mi_phy_dev = mip->mi_capab_legacy.ml_dev; 367 } else { 368 mip->mi_phy_dev = makedevice(ddi_driver_major(mip->mi_dip), 369 mip->mi_minor); 370 } 371 372 /* 373 * Allocate a notification thread. thread_create blocks for memory 374 * if needed, it never fails. 375 */ 376 mip->mi_notify_thread = thread_create(NULL, 0, i_mac_notify_thread, 377 mip, 0, &p0, TS_RUN, minclsyspri); 378 379 /* 380 * Cache the DB_CKSUMFLAGS that this MAC supports. 381 */ 382 mip->mi_tx_cksum_flags = mac_features_to_flags((mac_handle_t)mip); 383 384 /* 385 * Initialize the capabilities 386 */ 387 bzero(&mip->mi_rx_rings_cap, sizeof (mac_capab_rings_t)); 388 bzero(&mip->mi_tx_rings_cap, sizeof (mac_capab_rings_t)); 389 390 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_VNIC, NULL)) 391 mip->mi_state_flags |= MIS_IS_VNIC; 392 393 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_AGGR, NULL)) 394 mip->mi_state_flags |= MIS_IS_AGGR; 395 396 if (i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_OVERLAY, NULL)) 397 mip->mi_state_flags |= MIS_IS_OVERLAY; 398 399 mac_addr_factory_init(mip); 400 401 mac_transceiver_init(mip); 402 403 mac_led_init(mip); 404 405 /* 406 * Enforce the virtrualization level registered. 407 */ 408 if (mip->mi_v12n_level & MAC_VIRT_LEVEL1) { 409 if (mac_init_rings(mip, MAC_RING_TYPE_RX) != 0 || 410 mac_init_rings(mip, MAC_RING_TYPE_TX) != 0) 411 goto fail; 412 413 /* 414 * The driver needs to register at least rx rings for this 415 * virtualization level. 416 */ 417 if (mip->mi_rx_groups == NULL) 418 goto fail; 419 } 420 421 /* 422 * The driver must set mc_unicst entry point to NULL when it advertises 423 * CAP_RINGS for rx groups. 424 */ 425 if (mip->mi_rx_groups != NULL) { 426 if (mregp->m_callbacks->mc_unicst != NULL) 427 goto fail; 428 } else { 429 if (mregp->m_callbacks->mc_unicst == NULL) 430 goto fail; 431 } 432 433 /* 434 * Initialize MAC addresses. Must be called after mac_init_rings(). 435 */ 436 mac_init_macaddr(mip); 437 438 mip->mi_share_capab.ms_snum = 0; 439 if (mip->mi_v12n_level & MAC_VIRT_HIO) { 440 (void) mac_capab_get((mac_handle_t)mip, MAC_CAPAB_SHARES, 441 &mip->mi_share_capab); 442 } 443 444 /* 445 * Initialize the kstats for this device. 446 */ 447 mac_driver_stat_create(mip); 448 449 /* Zero out any properties. */ 450 bzero(&mip->mi_resource_props, sizeof (mac_resource_props_t)); 451 452 if (mip->mi_minor <= MAC_MAX_MINOR) { 453 /* Create a style-2 DLPI device */ 454 if (ddi_create_minor_node(mip->mi_dip, driver, S_IFCHR, 0, 455 DDI_NT_NET, CLONE_DEV) != DDI_SUCCESS) 456 goto fail; 457 style2_created = B_TRUE; 458 459 /* Create a style-1 DLPI device */ 460 if (ddi_create_minor_node(mip->mi_dip, mip->mi_name, S_IFCHR, 461 mip->mi_minor, DDI_NT_NET, 0) != DDI_SUCCESS) 462 goto fail; 463 style1_created = B_TRUE; 464 } 465 466 mac_flow_l2tab_create(mip, &mip->mi_flow_tab); 467 468 rw_enter(&i_mac_impl_lock, RW_WRITER); 469 if (mod_hash_insert(i_mac_impl_hash, 470 (mod_hash_key_t)mip->mi_name, (mod_hash_val_t)mip) != 0) { 471 rw_exit(&i_mac_impl_lock); 472 err = EEXIST; 473 goto fail; 474 } 475 476 DTRACE_PROBE2(mac__register, struct devnames *, dnp, 477 (mac_impl_t *), mip); 478 479 /* 480 * Mark the MAC to be ready for open. 481 */ 482 mip->mi_state_flags &= ~MIS_DISABLED; 483 rw_exit(&i_mac_impl_lock); 484 485 atomic_inc_32(&i_mac_impl_count); 486 487 cmn_err(CE_NOTE, "!%s registered", mip->mi_name); 488 *mhp = (mac_handle_t)mip; 489 return (0); 490 491 fail: 492 if (style1_created) 493 ddi_remove_minor_node(mip->mi_dip, mip->mi_name); 494 495 if (style2_created) 496 ddi_remove_minor_node(mip->mi_dip, driver); 497 498 mac_addr_factory_fini(mip); 499 500 /* Clean up registered MAC addresses */ 501 mac_fini_macaddr(mip); 502 503 /* Clean up registered rings */ 504 mac_free_rings(mip, MAC_RING_TYPE_RX); 505 mac_free_rings(mip, MAC_RING_TYPE_TX); 506 507 /* Clean up notification thread */ 508 if (mip->mi_notify_thread != NULL) 509 i_mac_notify_exit(mip); 510 511 if (mip->mi_info.mi_unicst_addr != NULL) { 512 kmem_free(mip->mi_info.mi_unicst_addr, 513 mip->mi_type->mt_addr_length); 514 mip->mi_info.mi_unicst_addr = NULL; 515 } 516 517 mac_driver_stat_delete(mip); 518 519 if (mip->mi_type != NULL) { 520 atomic_dec_32(&mip->mi_type->mt_ref); 521 mip->mi_type = NULL; 522 } 523 524 if (mip->mi_pdata != NULL) { 525 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 526 mip->mi_pdata = NULL; 527 mip->mi_pdata_size = 0; 528 } 529 530 if (minor != 0) { 531 ASSERT(minor > MAC_MAX_MINOR); 532 mac_minor_rele(minor); 533 } 534 535 mip->mi_state_flags = 0; 536 mac_unregister_priv_prop(mip); 537 538 /* 539 * Clear the state before destroying the mac_impl_t 540 */ 541 mip->mi_state_flags = 0; 542 543 kmem_cache_free(i_mac_impl_cachep, mip); 544 return (err); 545 } 546 547 /* 548 * Unregister from the GLDv3 framework 549 */ 550 int 551 mac_unregister(mac_handle_t mh) 552 { 553 int err; 554 mac_impl_t *mip = (mac_impl_t *)mh; 555 mod_hash_val_t val; 556 mac_margin_req_t *mmr, *nextmmr; 557 558 /* Fail the unregister if there are any open references to this mac. */ 559 if ((err = mac_disable_nowait(mh)) != 0) 560 return (err); 561 562 /* 563 * Clean up notification thread and wait for it to exit. 564 */ 565 i_mac_notify_exit(mip); 566 567 /* 568 * Prior to acquiring the MAC perimeter, remove the MAC instance from 569 * the internal hash table. Such removal means table-walkers that 570 * acquire the perimeter will not do so on behalf of what we are 571 * unregistering, which prevents a deadlock. 572 */ 573 rw_enter(&i_mac_impl_lock, RW_WRITER); 574 (void) mod_hash_remove(i_mac_impl_hash, 575 (mod_hash_key_t)mip->mi_name, &val); 576 rw_exit(&i_mac_impl_lock); 577 ASSERT(mip == (mac_impl_t *)val); 578 579 i_mac_perim_enter(mip); 580 581 /* 582 * There is still resource properties configured over this mac. 583 */ 584 if (mip->mi_resource_props.mrp_mask != 0) 585 mac_fastpath_enable((mac_handle_t)mip); 586 587 if (mip->mi_minor < MAC_MAX_MINOR + 1) { 588 ddi_remove_minor_node(mip->mi_dip, mip->mi_name); 589 ddi_remove_minor_node(mip->mi_dip, 590 (char *)ddi_driver_name(mip->mi_dip)); 591 } 592 593 ASSERT(mip->mi_nactiveclients == 0 && !(mip->mi_state_flags & 594 MIS_EXCLUSIVE)); 595 596 mac_driver_stat_delete(mip); 597 598 ASSERT(i_mac_impl_count > 0); 599 atomic_dec_32(&i_mac_impl_count); 600 601 if (mip->mi_pdata != NULL) 602 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 603 mip->mi_pdata = NULL; 604 mip->mi_pdata_size = 0; 605 606 /* 607 * Free the list of margin request. 608 */ 609 for (mmr = mip->mi_mmrp; mmr != NULL; mmr = nextmmr) { 610 nextmmr = mmr->mmr_nextp; 611 kmem_free(mmr, sizeof (mac_margin_req_t)); 612 } 613 mip->mi_mmrp = NULL; 614 615 mip->mi_linkstate = mip->mi_lowlinkstate = LINK_STATE_UNKNOWN; 616 kmem_free(mip->mi_info.mi_unicst_addr, mip->mi_type->mt_addr_length); 617 mip->mi_info.mi_unicst_addr = NULL; 618 619 atomic_dec_32(&mip->mi_type->mt_ref); 620 mip->mi_type = NULL; 621 622 /* 623 * Free the primary MAC address. 624 */ 625 mac_fini_macaddr(mip); 626 627 /* 628 * free all rings 629 */ 630 mac_free_rings(mip, MAC_RING_TYPE_RX); 631 mac_free_rings(mip, MAC_RING_TYPE_TX); 632 633 mac_addr_factory_fini(mip); 634 635 bzero(mip->mi_addr, MAXMACADDRLEN); 636 bzero(mip->mi_dstaddr, MAXMACADDRLEN); 637 mip->mi_dstaddr_set = B_FALSE; 638 639 /* and the flows */ 640 mac_flow_tab_destroy(mip->mi_flow_tab); 641 mip->mi_flow_tab = NULL; 642 643 if (mip->mi_minor > MAC_MAX_MINOR) 644 mac_minor_rele(mip->mi_minor); 645 646 cmn_err(CE_NOTE, "!%s unregistered", mip->mi_name); 647 648 /* 649 * Reset the perim related fields to default values before 650 * kmem_cache_free 651 */ 652 i_mac_perim_exit(mip); 653 mip->mi_state_flags = 0; 654 655 mac_unregister_priv_prop(mip); 656 657 ASSERT(mip->mi_bridge_link == NULL); 658 kmem_cache_free(i_mac_impl_cachep, mip); 659 660 return (0); 661 } 662 663 /* DATA RECEPTION */ 664 665 /* 666 * This function is invoked for packets received by the MAC driver in 667 * interrupt context. The ring generation number provided by the driver 668 * is matched with the ring generation number held in MAC. If they do not 669 * match, received packets are considered stale packets coming from an older 670 * assignment of the ring. Drop them. 671 */ 672 void 673 mac_rx_ring(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp_chain, 674 uint64_t mr_gen_num) 675 { 676 mac_ring_t *mr = (mac_ring_t *)mrh; 677 678 if ((mr != NULL) && (mr->mr_gen_num != mr_gen_num)) { 679 DTRACE_PROBE2(mac__rx__rings__stale__packet, uint64_t, 680 mr->mr_gen_num, uint64_t, mr_gen_num); 681 freemsgchain(mp_chain); 682 return; 683 } 684 mac_rx(mh, (mac_resource_handle_t)mrh, mp_chain); 685 } 686 687 /* 688 * This function is invoked for each packet received by the underlying driver. 689 */ 690 void 691 mac_rx(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 692 { 693 mac_impl_t *mip = (mac_impl_t *)mh; 694 695 /* 696 * Check if the link is part of a bridge. If not, then we don't need 697 * to take the lock to remain consistent. Make this common case 698 * lock-free and tail-call optimized. 699 */ 700 if (mip->mi_bridge_link == NULL) { 701 mac_rx_common(mh, mrh, mp_chain); 702 } else { 703 /* 704 * Once we take a reference on the bridge link, the bridge 705 * module itself can't unload, so the callback pointers are 706 * stable. 707 */ 708 mutex_enter(&mip->mi_bridge_lock); 709 if ((mh = mip->mi_bridge_link) != NULL) 710 mac_bridge_ref_cb(mh, B_TRUE); 711 mutex_exit(&mip->mi_bridge_lock); 712 if (mh == NULL) { 713 mac_rx_common((mac_handle_t)mip, mrh, mp_chain); 714 } else { 715 mac_bridge_rx_cb(mh, mrh, mp_chain); 716 mac_bridge_ref_cb(mh, B_FALSE); 717 } 718 } 719 } 720 721 /* 722 * Special case function: this allows snooping of packets transmitted and 723 * received by TRILL. By design, they go directly into the TRILL module. 724 */ 725 void 726 mac_trill_snoop(mac_handle_t mh, mblk_t *mp) 727 { 728 mac_impl_t *mip = (mac_impl_t *)mh; 729 730 if (mip->mi_promisc_list != NULL) 731 mac_promisc_dispatch(mip, mp, NULL, B_FALSE); 732 } 733 734 /* 735 * This is the upward reentry point for packets arriving from the bridging 736 * module and from mac_rx for links not part of a bridge. 737 */ 738 void 739 mac_rx_common(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 740 { 741 mac_impl_t *mip = (mac_impl_t *)mh; 742 mac_ring_t *mr = (mac_ring_t *)mrh; 743 mac_soft_ring_set_t *mac_srs; 744 mblk_t *bp = mp_chain; 745 746 /* 747 * If there are any promiscuous mode callbacks defined for 748 * this MAC, pass them a copy if appropriate. 749 */ 750 if (mip->mi_promisc_list != NULL) 751 mac_promisc_dispatch(mip, mp_chain, NULL, B_FALSE); 752 753 if (mr != NULL) { 754 /* 755 * If the SRS teardown has started, just return. The 'mr' 756 * continues to be valid until the driver unregisters the MAC. 757 * Hardware classified packets will not make their way up 758 * beyond this point once the teardown has started. The driver 759 * is never passed a pointer to a flow entry or SRS or any 760 * structure that can be freed much before mac_unregister. 761 */ 762 mutex_enter(&mr->mr_lock); 763 if ((mr->mr_state != MR_INUSE) || (mr->mr_flag & 764 (MR_INCIPIENT | MR_CONDEMNED | MR_QUIESCE))) { 765 mutex_exit(&mr->mr_lock); 766 freemsgchain(mp_chain); 767 return; 768 } 769 770 /* 771 * The ring is in passthru mode; pass the chain up to 772 * the pseudo ring. 773 */ 774 if (mr->mr_classify_type == MAC_PASSTHRU_CLASSIFIER) { 775 MR_REFHOLD_LOCKED(mr); 776 mutex_exit(&mr->mr_lock); 777 mr->mr_pt_fn(mr->mr_pt_arg1, mr->mr_pt_arg2, mp_chain, 778 B_FALSE); 779 MR_REFRELE(mr); 780 return; 781 } 782 783 /* 784 * The passthru callback should only be set when in 785 * MAC_PASSTHRU_CLASSIFIER mode. 786 */ 787 ASSERT3P(mr->mr_pt_fn, ==, NULL); 788 789 /* 790 * We check if an SRS is controlling this ring. 791 * If so, we can directly call the srs_lower_proc 792 * routine otherwise we need to go through mac_rx_classify 793 * to reach the right place. 794 */ 795 if (mr->mr_classify_type == MAC_HW_CLASSIFIER) { 796 MR_REFHOLD_LOCKED(mr); 797 mutex_exit(&mr->mr_lock); 798 ASSERT3P(mr->mr_srs, !=, NULL); 799 mac_srs = mr->mr_srs; 800 801 /* 802 * This is the fast path. All packets received 803 * on this ring are hardware classified and 804 * share the same MAC header info. 805 */ 806 mac_srs->srs_rx.sr_lower_proc(mh, 807 (mac_resource_handle_t)mac_srs, mp_chain, B_FALSE); 808 MR_REFRELE(mr); 809 return; 810 } 811 812 mutex_exit(&mr->mr_lock); 813 /* We'll fall through to software classification */ 814 } else { 815 flow_entry_t *flent; 816 int err; 817 818 rw_enter(&mip->mi_rw_lock, RW_READER); 819 if (mip->mi_single_active_client != NULL) { 820 flent = mip->mi_single_active_client->mci_flent_list; 821 FLOW_TRY_REFHOLD(flent, err); 822 rw_exit(&mip->mi_rw_lock); 823 if (err == 0) { 824 (flent->fe_cb_fn)(flent->fe_cb_arg1, 825 flent->fe_cb_arg2, mp_chain, B_FALSE); 826 FLOW_REFRELE(flent); 827 return; 828 } 829 } else { 830 rw_exit(&mip->mi_rw_lock); 831 } 832 } 833 834 if (!FLOW_TAB_EMPTY(mip->mi_flow_tab)) { 835 if ((bp = mac_rx_flow(mh, mrh, bp)) == NULL) 836 return; 837 } 838 839 freemsgchain(bp); 840 } 841 842 /* DATA TRANSMISSION */ 843 844 /* 845 * A driver's notification to resume transmission, in case of a provider 846 * without TX rings. 847 */ 848 void 849 mac_tx_update(mac_handle_t mh) 850 { 851 mac_tx_ring_update(mh, NULL); 852 } 853 854 /* 855 * A driver's notification to resume transmission on the specified TX ring. 856 */ 857 void 858 mac_tx_ring_update(mac_handle_t mh, mac_ring_handle_t rh) 859 { 860 i_mac_tx_srs_notify((mac_impl_t *)mh, rh); 861 } 862 863 /* LINK STATE */ 864 /* 865 * Notify the MAC layer about a link state change 866 */ 867 void 868 mac_link_update(mac_handle_t mh, link_state_t link) 869 { 870 mac_impl_t *mip = (mac_impl_t *)mh; 871 872 /* 873 * Save the link state. 874 */ 875 mip->mi_lowlinkstate = link; 876 877 /* 878 * Send a MAC_NOTE_LOWLINK notification. This tells the notification 879 * thread to deliver both lower and upper notifications. 880 */ 881 i_mac_notify(mip, MAC_NOTE_LOWLINK); 882 } 883 884 /* 885 * Notify the MAC layer about a link state change due to bridging. 886 */ 887 void 888 mac_link_redo(mac_handle_t mh, link_state_t link) 889 { 890 mac_impl_t *mip = (mac_impl_t *)mh; 891 892 /* 893 * Save the link state. 894 */ 895 mip->mi_linkstate = link; 896 897 /* 898 * Send a MAC_NOTE_LINK notification. Only upper notifications are 899 * made. 900 */ 901 i_mac_notify(mip, MAC_NOTE_LINK); 902 } 903 904 /* MINOR NODE HANDLING */ 905 906 /* 907 * Given a dev_t, return the instance number (PPA) associated with it. 908 * Drivers can use this in their getinfo(9e) implementation to lookup 909 * the instance number (i.e. PPA) of the device, to use as an index to 910 * their own array of soft state structures. 911 * 912 * Returns -1 on error. 913 */ 914 int 915 mac_devt_to_instance(dev_t devt) 916 { 917 return (dld_devt_to_instance(devt)); 918 } 919 920 /* 921 * This function returns the first minor number that is available for 922 * driver private use. All minor numbers smaller than this are 923 * reserved for GLDv3 use. 924 */ 925 minor_t 926 mac_private_minor(void) 927 { 928 return (MAC_PRIVATE_MINOR); 929 } 930 931 /* OTHER CONTROL INFORMATION */ 932 933 /* 934 * A driver notified us that its primary MAC address has changed. 935 */ 936 void 937 mac_unicst_update(mac_handle_t mh, const uint8_t *addr) 938 { 939 mac_impl_t *mip = (mac_impl_t *)mh; 940 941 if (mip->mi_type->mt_addr_length == 0) 942 return; 943 944 i_mac_perim_enter(mip); 945 946 /* 947 * If address changes, freshen the MAC address value and update 948 * all MAC clients that share this MAC address. 949 */ 950 if (bcmp(addr, mip->mi_addr, mip->mi_type->mt_addr_length) != 0) { 951 mac_freshen_macaddr(mac_find_macaddr(mip, mip->mi_addr), 952 (uint8_t *)addr); 953 } 954 955 i_mac_perim_exit(mip); 956 957 /* 958 * Send a MAC_NOTE_UNICST notification. 959 */ 960 i_mac_notify(mip, MAC_NOTE_UNICST); 961 } 962 963 void 964 mac_dst_update(mac_handle_t mh, const uint8_t *addr) 965 { 966 mac_impl_t *mip = (mac_impl_t *)mh; 967 968 if (mip->mi_type->mt_addr_length == 0) 969 return; 970 971 i_mac_perim_enter(mip); 972 bcopy(addr, mip->mi_dstaddr, mip->mi_type->mt_addr_length); 973 i_mac_perim_exit(mip); 974 i_mac_notify(mip, MAC_NOTE_DEST); 975 } 976 977 /* 978 * MAC plugin information changed. 979 */ 980 int 981 mac_pdata_update(mac_handle_t mh, void *mac_pdata, size_t dsize) 982 { 983 mac_impl_t *mip = (mac_impl_t *)mh; 984 985 /* 986 * Verify that the plugin supports MAC plugin data and that the 987 * supplied data is valid. 988 */ 989 if (!(mip->mi_type->mt_ops.mtops_ops & MTOPS_PDATA_VERIFY)) 990 return (EINVAL); 991 if (!mip->mi_type->mt_ops.mtops_pdata_verify(mac_pdata, dsize)) 992 return (EINVAL); 993 994 if (mip->mi_pdata != NULL) 995 kmem_free(mip->mi_pdata, mip->mi_pdata_size); 996 997 mip->mi_pdata = kmem_alloc(dsize, KM_SLEEP); 998 bcopy(mac_pdata, mip->mi_pdata, dsize); 999 mip->mi_pdata_size = dsize; 1000 1001 /* 1002 * Since the MAC plugin data is used to construct MAC headers that 1003 * were cached in fast-path headers, we need to flush fast-path 1004 * information for links associated with this mac. 1005 */ 1006 i_mac_notify(mip, MAC_NOTE_FASTPATH_FLUSH); 1007 return (0); 1008 } 1009 1010 /* 1011 * The mac provider or mac frameowrk calls this function when it wants 1012 * to notify upstream consumers that the capabilities have changed and 1013 * that they should modify their own internal state accordingly. 1014 * 1015 * We currently have no regard for the fact that a provider could 1016 * decide to drop capabilities which would invalidate pending traffic. 1017 * For example, if one was to disable the Tx checksum offload while 1018 * TCP/IP traffic was being sent by mac clients relying on that 1019 * feature, then those packets would hit the write with missing or 1020 * partial checksums. A proper solution involves not only providing 1021 * notfication, but also performing client quiescing. That is, a capab 1022 * change should be treated as an atomic transaction that forms a 1023 * barrier between traffic relying on the current capabs and traffic 1024 * relying on the new capabs. In practice, simnet is currently the 1025 * only provider that could hit this, and it's an easily avoidable 1026 * situation (and at worst it should only lead to some dropped 1027 * packets). But if we ever want better on-the-fly capab change to 1028 * actual hardware providers, then we should give this update 1029 * mechanism a proper implementation. 1030 */ 1031 void 1032 mac_capab_update(mac_handle_t mh) 1033 { 1034 /* 1035 * Send a MAC_NOTE_CAPAB_CHG notification to alert upstream 1036 * clients to renegotiate capabilities. 1037 */ 1038 i_mac_notify((mac_impl_t *)mh, MAC_NOTE_CAPAB_CHG); 1039 } 1040 1041 /* 1042 * Used by normal drivers to update the max sdu size. 1043 * We need to handle the case of a smaller mi_sdu_multicast 1044 * since this is called by mac_set_mtu() even for drivers that 1045 * have differing unicast and multicast mtu and we don't want to 1046 * increase the multicast mtu by accident in that case. 1047 */ 1048 int 1049 mac_maxsdu_update(mac_handle_t mh, uint_t sdu_max) 1050 { 1051 mac_impl_t *mip = (mac_impl_t *)mh; 1052 1053 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min) 1054 return (EINVAL); 1055 mip->mi_sdu_max = sdu_max; 1056 if (mip->mi_sdu_multicast > mip->mi_sdu_max) 1057 mip->mi_sdu_multicast = mip->mi_sdu_max; 1058 1059 /* Send a MAC_NOTE_SDU_SIZE notification. */ 1060 i_mac_notify(mip, MAC_NOTE_SDU_SIZE); 1061 return (0); 1062 } 1063 1064 /* 1065 * Version of the above function that is used by drivers that have a different 1066 * max sdu size for multicast/broadcast vs. unicast. 1067 */ 1068 int 1069 mac_maxsdu_update2(mac_handle_t mh, uint_t sdu_max, uint_t sdu_multicast) 1070 { 1071 mac_impl_t *mip = (mac_impl_t *)mh; 1072 1073 if (sdu_max == 0 || sdu_max < mip->mi_sdu_min) 1074 return (EINVAL); 1075 if (sdu_multicast == 0) 1076 sdu_multicast = sdu_max; 1077 if (sdu_multicast > sdu_max || sdu_multicast < mip->mi_sdu_min) 1078 return (EINVAL); 1079 mip->mi_sdu_max = sdu_max; 1080 mip->mi_sdu_multicast = sdu_multicast; 1081 1082 /* Send a MAC_NOTE_SDU_SIZE notification. */ 1083 i_mac_notify(mip, MAC_NOTE_SDU_SIZE); 1084 return (0); 1085 } 1086 1087 static void 1088 mac_ring_intr_retarget(mac_group_t *group, mac_ring_t *ring) 1089 { 1090 mac_client_impl_t *mcip; 1091 flow_entry_t *flent; 1092 mac_soft_ring_set_t *mac_rx_srs; 1093 mac_cpus_t *srs_cpu; 1094 int i; 1095 1096 if (((mcip = MAC_GROUP_ONLY_CLIENT(group)) != NULL) && 1097 (!ring->mr_info.mri_intr.mi_ddi_shared)) { 1098 /* interrupt can be re-targeted */ 1099 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 1100 flent = mcip->mci_flent; 1101 if (ring->mr_type == MAC_RING_TYPE_RX) { 1102 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 1103 mac_rx_srs = flent->fe_rx_srs[i]; 1104 if (mac_rx_srs->srs_ring != ring) 1105 continue; 1106 srs_cpu = &mac_rx_srs->srs_cpu; 1107 mutex_enter(&cpu_lock); 1108 mac_rx_srs_retarget_intr(mac_rx_srs, 1109 srs_cpu->mc_rx_intr_cpu); 1110 mutex_exit(&cpu_lock); 1111 break; 1112 } 1113 } else { 1114 if (flent->fe_tx_srs != NULL) { 1115 mutex_enter(&cpu_lock); 1116 mac_tx_srs_retarget_intr( 1117 flent->fe_tx_srs); 1118 mutex_exit(&cpu_lock); 1119 } 1120 } 1121 } 1122 } 1123 1124 /* 1125 * Clients like aggr create pseudo rings (mac_ring_t) and expose them to 1126 * their clients. There is a 1-1 mapping pseudo ring and the hardware 1127 * ring. ddi interrupt handles are exported from the hardware ring to 1128 * the pseudo ring. Thus when the interrupt handle changes, clients of 1129 * aggr that are using the handle need to use the new handle and 1130 * re-target their interrupts. 1131 */ 1132 static void 1133 mac_pseudo_ring_intr_retarget(mac_impl_t *mip, mac_ring_t *ring, 1134 ddi_intr_handle_t ddh) 1135 { 1136 mac_ring_t *pring; 1137 mac_group_t *pgroup; 1138 mac_impl_t *pmip; 1139 char macname[MAXNAMELEN]; 1140 mac_perim_handle_t p_mph; 1141 uint64_t saved_gen_num; 1142 1143 again: 1144 pring = (mac_ring_t *)ring->mr_prh; 1145 pgroup = (mac_group_t *)pring->mr_gh; 1146 pmip = (mac_impl_t *)pgroup->mrg_mh; 1147 saved_gen_num = ring->mr_gen_num; 1148 (void) strlcpy(macname, pmip->mi_name, MAXNAMELEN); 1149 /* 1150 * We need to enter aggr's perimeter. The locking hierarchy 1151 * dictates that aggr's perimeter should be entered first 1152 * and then the port's perimeter. So drop the port's 1153 * perimeter, enter aggr's and then re-enter port's 1154 * perimeter. 1155 */ 1156 i_mac_perim_exit(mip); 1157 /* 1158 * While we know pmip is the aggr's mip, there is a 1159 * possibility that aggr could have unregistered by 1160 * the time we exit port's perimeter (mip) and 1161 * enter aggr's perimeter (pmip). To avoid that 1162 * scenario, enter aggr's perimeter using its name. 1163 */ 1164 if (mac_perim_enter_by_macname(macname, &p_mph) != 0) 1165 return; 1166 i_mac_perim_enter(mip); 1167 /* 1168 * Check if the ring got assigned to another aggregation before 1169 * be could enter aggr's and the port's perimeter. When a ring 1170 * gets deleted from an aggregation, it calls mac_stop_ring() 1171 * which increments the generation number. So checking 1172 * generation number will be enough. 1173 */ 1174 if (ring->mr_gen_num != saved_gen_num && ring->mr_prh != NULL) { 1175 i_mac_perim_exit(mip); 1176 mac_perim_exit(p_mph); 1177 i_mac_perim_enter(mip); 1178 goto again; 1179 } 1180 1181 /* Check if pseudo ring is still present */ 1182 if (ring->mr_prh != NULL) { 1183 pring->mr_info.mri_intr.mi_ddi_handle = ddh; 1184 pring->mr_info.mri_intr.mi_ddi_shared = 1185 ring->mr_info.mri_intr.mi_ddi_shared; 1186 if (ddh != NULL) 1187 mac_ring_intr_retarget(pgroup, pring); 1188 } 1189 i_mac_perim_exit(mip); 1190 mac_perim_exit(p_mph); 1191 } 1192 /* 1193 * API called by driver to provide new interrupt handle for TX/RX rings. 1194 * This usually happens when IRM (Interrupt Resource Manangement) 1195 * framework either gives the driver more MSI-x interrupts or takes 1196 * away MSI-x interrupts from the driver. 1197 */ 1198 void 1199 mac_ring_intr_set(mac_ring_handle_t mrh, ddi_intr_handle_t ddh) 1200 { 1201 mac_ring_t *ring = (mac_ring_t *)mrh; 1202 mac_group_t *group = (mac_group_t *)ring->mr_gh; 1203 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1204 1205 i_mac_perim_enter(mip); 1206 ring->mr_info.mri_intr.mi_ddi_handle = ddh; 1207 if (ddh == NULL) { 1208 /* Interrupts being reset */ 1209 ring->mr_info.mri_intr.mi_ddi_shared = B_FALSE; 1210 if (ring->mr_prh != NULL) { 1211 mac_pseudo_ring_intr_retarget(mip, ring, ddh); 1212 return; 1213 } 1214 } else { 1215 /* New interrupt handle */ 1216 mac_compare_ddi_handle(mip->mi_rx_groups, 1217 mip->mi_rx_group_count, ring); 1218 if (!ring->mr_info.mri_intr.mi_ddi_shared) { 1219 mac_compare_ddi_handle(mip->mi_tx_groups, 1220 mip->mi_tx_group_count, ring); 1221 } 1222 if (ring->mr_prh != NULL) { 1223 mac_pseudo_ring_intr_retarget(mip, ring, ddh); 1224 return; 1225 } else { 1226 mac_ring_intr_retarget(group, ring); 1227 } 1228 } 1229 i_mac_perim_exit(mip); 1230 } 1231 1232 /* PRIVATE FUNCTIONS, FOR INTERNAL USE ONLY */ 1233 1234 /* 1235 * Updates the mac_impl structure with the current state of the link 1236 */ 1237 static void 1238 i_mac_log_link_state(mac_impl_t *mip) 1239 { 1240 /* 1241 * If no change, then it is not interesting. 1242 */ 1243 if (mip->mi_lastlowlinkstate == mip->mi_lowlinkstate) 1244 return; 1245 1246 switch (mip->mi_lowlinkstate) { 1247 case LINK_STATE_UP: 1248 if (mip->mi_type->mt_ops.mtops_ops & MTOPS_LINK_DETAILS) { 1249 char det[200]; 1250 1251 mip->mi_type->mt_ops.mtops_link_details(det, 1252 sizeof (det), (mac_handle_t)mip, mip->mi_pdata); 1253 1254 cmn_err(CE_NOTE, "!%s link up, %s", mip->mi_name, det); 1255 } else { 1256 cmn_err(CE_NOTE, "!%s link up", mip->mi_name); 1257 } 1258 break; 1259 1260 case LINK_STATE_DOWN: 1261 /* 1262 * Only transitions from UP to DOWN are interesting 1263 */ 1264 if (mip->mi_lastlowlinkstate != LINK_STATE_UNKNOWN) 1265 cmn_err(CE_NOTE, "!%s link down", mip->mi_name); 1266 break; 1267 1268 case LINK_STATE_UNKNOWN: 1269 /* 1270 * This case is normally not interesting. 1271 */ 1272 break; 1273 } 1274 mip->mi_lastlowlinkstate = mip->mi_lowlinkstate; 1275 } 1276 1277 /* 1278 * Main routine for the callbacks notifications thread 1279 */ 1280 static void 1281 i_mac_notify_thread(void *arg) 1282 { 1283 mac_impl_t *mip = arg; 1284 callb_cpr_t cprinfo; 1285 mac_cb_t *mcb; 1286 mac_cb_info_t *mcbi; 1287 mac_notify_cb_t *mncb; 1288 1289 mcbi = &mip->mi_notify_cb_info; 1290 CALLB_CPR_INIT(&cprinfo, mcbi->mcbi_lockp, callb_generic_cpr, 1291 "i_mac_notify_thread"); 1292 1293 mutex_enter(mcbi->mcbi_lockp); 1294 1295 for (;;) { 1296 uint32_t bits; 1297 uint32_t type; 1298 1299 bits = mip->mi_notify_bits; 1300 if (bits == 0) { 1301 CALLB_CPR_SAFE_BEGIN(&cprinfo); 1302 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 1303 CALLB_CPR_SAFE_END(&cprinfo, mcbi->mcbi_lockp); 1304 continue; 1305 } 1306 mip->mi_notify_bits = 0; 1307 if ((bits & (1 << MAC_NNOTE)) != 0) { 1308 /* request to quit */ 1309 ASSERT(mip->mi_state_flags & MIS_DISABLED); 1310 break; 1311 } 1312 1313 mutex_exit(mcbi->mcbi_lockp); 1314 1315 /* 1316 * Log link changes on the actual link, but then do reports on 1317 * synthetic state (if part of a bridge). 1318 */ 1319 if ((bits & (1 << MAC_NOTE_LOWLINK)) != 0) { 1320 link_state_t newstate; 1321 mac_handle_t mh; 1322 1323 i_mac_log_link_state(mip); 1324 newstate = mip->mi_lowlinkstate; 1325 if (mip->mi_bridge_link != NULL) { 1326 mutex_enter(&mip->mi_bridge_lock); 1327 if ((mh = mip->mi_bridge_link) != NULL) { 1328 newstate = mac_bridge_ls_cb(mh, 1329 newstate); 1330 } 1331 mutex_exit(&mip->mi_bridge_lock); 1332 } 1333 if (newstate != mip->mi_linkstate) { 1334 mip->mi_linkstate = newstate; 1335 bits |= 1 << MAC_NOTE_LINK; 1336 } 1337 } 1338 1339 /* 1340 * Depending on which capabs have changed, the Tx 1341 * checksum flags may also need to be updated. 1342 */ 1343 if ((bits & (1 << MAC_NOTE_CAPAB_CHG)) != 0) { 1344 mac_perim_handle_t mph; 1345 mac_handle_t mh = (mac_handle_t)mip; 1346 1347 mac_perim_enter_by_mh(mh, &mph); 1348 mip->mi_tx_cksum_flags = mac_features_to_flags(mh); 1349 mac_perim_exit(mph); 1350 } 1351 1352 /* 1353 * Do notification callbacks for each notification type. 1354 */ 1355 for (type = 0; type < MAC_NNOTE; type++) { 1356 if ((bits & (1 << type)) == 0) { 1357 continue; 1358 } 1359 1360 if (mac_notify_cb_list[type] != NULL) 1361 (*mac_notify_cb_list[type])(mip); 1362 1363 /* 1364 * Walk the list of notifications. 1365 */ 1366 MAC_CALLBACK_WALKER_INC(&mip->mi_notify_cb_info); 1367 for (mcb = mip->mi_notify_cb_list; mcb != NULL; 1368 mcb = mcb->mcb_nextp) { 1369 mncb = (mac_notify_cb_t *)mcb->mcb_objp; 1370 mncb->mncb_fn(mncb->mncb_arg, type); 1371 } 1372 MAC_CALLBACK_WALKER_DCR(&mip->mi_notify_cb_info, 1373 &mip->mi_notify_cb_list); 1374 } 1375 1376 mutex_enter(mcbi->mcbi_lockp); 1377 } 1378 1379 mip->mi_state_flags |= MIS_NOTIFY_DONE; 1380 cv_broadcast(&mcbi->mcbi_cv); 1381 1382 /* CALLB_CPR_EXIT drops the lock */ 1383 CALLB_CPR_EXIT(&cprinfo); 1384 thread_exit(); 1385 } 1386 1387 /* 1388 * Signal the i_mac_notify_thread asking it to quit. 1389 * Then wait till it is done. 1390 */ 1391 void 1392 i_mac_notify_exit(mac_impl_t *mip) 1393 { 1394 mac_cb_info_t *mcbi; 1395 1396 mcbi = &mip->mi_notify_cb_info; 1397 1398 mutex_enter(mcbi->mcbi_lockp); 1399 mip->mi_notify_bits = (1 << MAC_NNOTE); 1400 cv_broadcast(&mcbi->mcbi_cv); 1401 1402 1403 while ((mip->mi_notify_thread != NULL) && 1404 !(mip->mi_state_flags & MIS_NOTIFY_DONE)) { 1405 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 1406 } 1407 1408 /* Necessary clean up before doing kmem_cache_free */ 1409 mip->mi_state_flags &= ~MIS_NOTIFY_DONE; 1410 mip->mi_notify_bits = 0; 1411 mip->mi_notify_thread = NULL; 1412 mutex_exit(mcbi->mcbi_lockp); 1413 } 1414 1415 /* 1416 * Entry point invoked by drivers to dynamically add a ring to an 1417 * existing group. 1418 */ 1419 int 1420 mac_group_add_ring(mac_group_handle_t gh, int index) 1421 { 1422 mac_group_t *group = (mac_group_t *)gh; 1423 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1424 int ret; 1425 1426 i_mac_perim_enter(mip); 1427 ret = i_mac_group_add_ring(group, NULL, index); 1428 i_mac_perim_exit(mip); 1429 return (ret); 1430 } 1431 1432 /* 1433 * Entry point invoked by drivers to dynamically remove a ring 1434 * from an existing group. The specified ring handle must no longer 1435 * be used by the driver after a call to this function. 1436 */ 1437 void 1438 mac_group_rem_ring(mac_group_handle_t gh, mac_ring_handle_t rh) 1439 { 1440 mac_group_t *group = (mac_group_t *)gh; 1441 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 1442 1443 i_mac_perim_enter(mip); 1444 i_mac_group_rem_ring(group, (mac_ring_t *)rh, B_TRUE); 1445 i_mac_perim_exit(mip); 1446 } 1447 1448 /* 1449 * mac_prop_info_*() callbacks called from the driver's prefix_propinfo() 1450 * entry points. 1451 */ 1452 1453 void 1454 mac_prop_info_set_default_uint8(mac_prop_info_handle_t ph, uint8_t val) 1455 { 1456 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1457 1458 /* nothing to do if the caller doesn't want the default value */ 1459 if (pr->pr_default == NULL) 1460 return; 1461 1462 ASSERT(pr->pr_default_size >= sizeof (uint8_t)); 1463 1464 *(uint8_t *)(pr->pr_default) = val; 1465 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1466 } 1467 1468 void 1469 mac_prop_info_set_default_uint64(mac_prop_info_handle_t ph, uint64_t val) 1470 { 1471 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1472 1473 /* nothing to do if the caller doesn't want the default value */ 1474 if (pr->pr_default == NULL) 1475 return; 1476 1477 ASSERT(pr->pr_default_size >= sizeof (uint64_t)); 1478 1479 bcopy(&val, pr->pr_default, sizeof (val)); 1480 1481 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1482 } 1483 1484 void 1485 mac_prop_info_set_default_uint32(mac_prop_info_handle_t ph, uint32_t val) 1486 { 1487 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1488 1489 /* nothing to do if the caller doesn't want the default value */ 1490 if (pr->pr_default == NULL) 1491 return; 1492 1493 ASSERT(pr->pr_default_size >= sizeof (uint32_t)); 1494 1495 bcopy(&val, pr->pr_default, sizeof (val)); 1496 1497 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1498 } 1499 1500 void 1501 mac_prop_info_set_default_str(mac_prop_info_handle_t ph, const char *str) 1502 { 1503 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1504 1505 /* nothing to do if the caller doesn't want the default value */ 1506 if (pr->pr_default == NULL) 1507 return; 1508 1509 if (strlen(str) >= pr->pr_default_size) 1510 pr->pr_errno = ENOBUFS; 1511 else 1512 (void) strlcpy(pr->pr_default, str, pr->pr_default_size); 1513 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1514 } 1515 1516 void 1517 mac_prop_info_set_default_link_flowctrl(mac_prop_info_handle_t ph, 1518 link_flowctrl_t val) 1519 { 1520 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1521 1522 /* nothing to do if the caller doesn't want the default value */ 1523 if (pr->pr_default == NULL) 1524 return; 1525 1526 ASSERT(pr->pr_default_size >= sizeof (link_flowctrl_t)); 1527 1528 bcopy(&val, pr->pr_default, sizeof (val)); 1529 1530 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1531 } 1532 1533 void 1534 mac_prop_info_set_default_fec(mac_prop_info_handle_t ph, link_fec_t val) 1535 { 1536 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1537 1538 /* nothing to do if the caller doesn't want the default value */ 1539 if (pr->pr_default == NULL) 1540 return; 1541 1542 ASSERT(pr->pr_default_size >= sizeof (link_fec_t)); 1543 1544 bcopy(&val, pr->pr_default, sizeof (val)); 1545 1546 pr->pr_flags |= MAC_PROP_INFO_DEFAULT; 1547 } 1548 1549 void 1550 mac_prop_info_set_range_uint32(mac_prop_info_handle_t ph, uint32_t min, 1551 uint32_t max) 1552 { 1553 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1554 mac_propval_range_t *range = pr->pr_range; 1555 mac_propval_uint32_range_t *range32; 1556 1557 /* nothing to do if the caller doesn't want the range info */ 1558 if (range == NULL) 1559 return; 1560 1561 if (pr->pr_range_cur_count++ == 0) { 1562 /* first range */ 1563 pr->pr_flags |= MAC_PROP_INFO_RANGE; 1564 range->mpr_type = MAC_PROPVAL_UINT32; 1565 } else { 1566 /* all ranges of a property should be of the same type */ 1567 ASSERT(range->mpr_type == MAC_PROPVAL_UINT32); 1568 if (pr->pr_range_cur_count > range->mpr_count) { 1569 pr->pr_errno = ENOSPC; 1570 return; 1571 } 1572 } 1573 1574 range32 = range->mpr_range_uint32; 1575 range32[pr->pr_range_cur_count - 1].mpur_min = min; 1576 range32[pr->pr_range_cur_count - 1].mpur_max = max; 1577 } 1578 1579 void 1580 mac_prop_info_set_perm(mac_prop_info_handle_t ph, uint8_t perm) 1581 { 1582 mac_prop_info_state_t *pr = (mac_prop_info_state_t *)ph; 1583 1584 pr->pr_perm = perm; 1585 pr->pr_flags |= MAC_PROP_INFO_PERM; 1586 } 1587 1588 void 1589 mac_hcksum_get(const mblk_t *mp, uint32_t *start, uint32_t *stuff, 1590 uint32_t *end, uint32_t *value, uint32_t *flags_ptr) 1591 { 1592 uint32_t flags; 1593 1594 ASSERT(DB_TYPE(mp) == M_DATA); 1595 1596 flags = DB_CKSUMFLAGS(mp) & HCK_FLAGS; 1597 if ((flags & (HCK_PARTIALCKSUM | HCK_FULLCKSUM)) != 0) { 1598 if (value != NULL) 1599 *value = (uint32_t)DB_CKSUM16(mp); 1600 if ((flags & HCK_PARTIALCKSUM) != 0) { 1601 if (start != NULL) 1602 *start = (uint32_t)DB_CKSUMSTART(mp); 1603 if (stuff != NULL) 1604 *stuff = (uint32_t)DB_CKSUMSTUFF(mp); 1605 if (end != NULL) 1606 *end = (uint32_t)DB_CKSUMEND(mp); 1607 } 1608 } 1609 1610 if (flags_ptr != NULL) 1611 *flags_ptr = flags; 1612 } 1613 1614 void 1615 mac_hcksum_set(mblk_t *mp, uint32_t start, uint32_t stuff, uint32_t end, 1616 uint32_t value, uint32_t flags) 1617 { 1618 ASSERT(DB_TYPE(mp) == M_DATA); 1619 1620 DB_CKSUMSTART(mp) = (intptr_t)start; 1621 DB_CKSUMSTUFF(mp) = (intptr_t)stuff; 1622 DB_CKSUMEND(mp) = (intptr_t)end; 1623 DB_CKSUMFLAGS(mp) = (uint16_t)flags; 1624 DB_CKSUM16(mp) = (uint16_t)value; 1625 } 1626 1627 void 1628 mac_hcksum_clone(const mblk_t *src, mblk_t *dst) 1629 { 1630 ASSERT3U(DB_TYPE(src), ==, M_DATA); 1631 ASSERT3U(DB_TYPE(dst), ==, M_DATA); 1632 1633 /* 1634 * Do these assignments unconditionally, rather than only when 1635 * flags is non-zero. This protects a situation where zeroed 1636 * hcksum data does not make the jump onto an mblk_t with 1637 * stale data in those fields. It's important to copy all 1638 * possible flags (HCK_* as well as HW_*) and not just the 1639 * checksum specific flags. Dropping flags during a clone 1640 * could result in dropped packets. If the caller has good 1641 * reason to drop those flags then it should do it manually, 1642 * after the clone. 1643 */ 1644 DB_CKSUMFLAGS(dst) = DB_CKSUMFLAGS(src); 1645 DB_CKSUMSTART(dst) = DB_CKSUMSTART(src); 1646 DB_CKSUMSTUFF(dst) = DB_CKSUMSTUFF(src); 1647 DB_CKSUMEND(dst) = DB_CKSUMEND(src); 1648 DB_CKSUM16(dst) = DB_CKSUM16(src); 1649 DB_LSOMSS(dst) = DB_LSOMSS(src); 1650 } 1651 1652 void 1653 mac_lso_get(mblk_t *mp, uint32_t *mss, uint32_t *flags) 1654 { 1655 ASSERT(DB_TYPE(mp) == M_DATA); 1656 1657 if (flags != NULL) { 1658 *flags = DB_CKSUMFLAGS(mp) & HW_LSO; 1659 if ((*flags != 0) && (mss != NULL)) 1660 *mss = (uint32_t)DB_LSOMSS(mp); 1661 } 1662 } 1663 1664 void 1665 mac_transceiver_info_set_present(mac_transceiver_info_t *infop, 1666 boolean_t present) 1667 { 1668 infop->mti_present = present; 1669 } 1670 1671 void 1672 mac_transceiver_info_set_usable(mac_transceiver_info_t *infop, 1673 boolean_t usable) 1674 { 1675 infop->mti_usable = usable; 1676 } 1677 1678 /* 1679 * We should really keep track of our offset and not walk everything every 1680 * time. I can't imagine that this will be kind to us at high packet rates; 1681 * however, for the moment, let's leave that. 1682 * 1683 * This walks a message block chain without pulling up to fill in the context 1684 * information. Note that the data we care about could be hidden across more 1685 * than one mblk_t. 1686 */ 1687 static int 1688 mac_meoi_get_uint8(mblk_t *mp, off_t off, uint8_t *out) 1689 { 1690 size_t mpsize; 1691 uint8_t *bp; 1692 1693 mpsize = msgsize(mp); 1694 /* Check for overflow */ 1695 if (off + sizeof (uint16_t) > mpsize) 1696 return (-1); 1697 1698 mpsize = MBLKL(mp); 1699 while (off >= mpsize) { 1700 mp = mp->b_cont; 1701 off -= mpsize; 1702 mpsize = MBLKL(mp); 1703 } 1704 1705 bp = mp->b_rptr + off; 1706 *out = *bp; 1707 return (0); 1708 1709 } 1710 1711 static int 1712 mac_meoi_get_uint16(mblk_t *mp, off_t off, uint16_t *out) 1713 { 1714 size_t mpsize; 1715 uint8_t *bp; 1716 1717 mpsize = msgsize(mp); 1718 /* Check for overflow */ 1719 if (off + sizeof (uint16_t) > mpsize) 1720 return (-1); 1721 1722 mpsize = MBLKL(mp); 1723 while (off >= mpsize) { 1724 mp = mp->b_cont; 1725 off -= mpsize; 1726 mpsize = MBLKL(mp); 1727 } 1728 1729 /* 1730 * Data is in network order. Note the second byte of data might be in 1731 * the next mp. 1732 */ 1733 bp = mp->b_rptr + off; 1734 *out = *bp << 8; 1735 if (off + 1 == mpsize) { 1736 mp = mp->b_cont; 1737 bp = mp->b_rptr; 1738 } else { 1739 bp++; 1740 } 1741 1742 *out |= *bp; 1743 return (0); 1744 1745 } 1746 1747 1748 int 1749 mac_ether_offload_info(mblk_t *mp, mac_ether_offload_info_t *meoi) 1750 { 1751 size_t off; 1752 uint16_t ether; 1753 uint8_t ipproto, iplen, l4len, maclen; 1754 1755 bzero(meoi, sizeof (mac_ether_offload_info_t)); 1756 1757 meoi->meoi_len = msgsize(mp); 1758 off = offsetof(struct ether_header, ether_type); 1759 if (mac_meoi_get_uint16(mp, off, ðer) != 0) 1760 return (-1); 1761 1762 if (ether == ETHERTYPE_VLAN) { 1763 off = offsetof(struct ether_vlan_header, ether_type); 1764 if (mac_meoi_get_uint16(mp, off, ðer) != 0) 1765 return (-1); 1766 meoi->meoi_flags |= MEOI_VLAN_TAGGED; 1767 maclen = sizeof (struct ether_vlan_header); 1768 } else { 1769 maclen = sizeof (struct ether_header); 1770 } 1771 meoi->meoi_flags |= MEOI_L2INFO_SET; 1772 meoi->meoi_l2hlen = maclen; 1773 meoi->meoi_l3proto = ether; 1774 1775 switch (ether) { 1776 case ETHERTYPE_IP: 1777 /* 1778 * For IPv4 we need to get the length of the header, as it can 1779 * be variable. 1780 */ 1781 off = offsetof(ipha_t, ipha_version_and_hdr_length) + maclen; 1782 if (mac_meoi_get_uint8(mp, off, &iplen) != 0) 1783 return (-1); 1784 iplen &= 0x0f; 1785 if (iplen < 5 || iplen > 0x0f) 1786 return (-1); 1787 iplen *= 4; 1788 off = offsetof(ipha_t, ipha_protocol) + maclen; 1789 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1) 1790 return (-1); 1791 break; 1792 case ETHERTYPE_IPV6: 1793 iplen = 40; 1794 off = offsetof(ip6_t, ip6_nxt) + maclen; 1795 if (mac_meoi_get_uint8(mp, off, &ipproto) == -1) 1796 return (-1); 1797 break; 1798 default: 1799 return (0); 1800 } 1801 meoi->meoi_l3hlen = iplen; 1802 meoi->meoi_l4proto = ipproto; 1803 meoi->meoi_flags |= MEOI_L3INFO_SET; 1804 1805 switch (ipproto) { 1806 case IPPROTO_TCP: 1807 off = offsetof(tcph_t, th_offset_and_rsrvd) + maclen + iplen; 1808 if (mac_meoi_get_uint8(mp, off, &l4len) == -1) 1809 return (-1); 1810 l4len = (l4len & 0xf0) >> 4; 1811 if (l4len < 5 || l4len > 0xf) 1812 return (-1); 1813 l4len *= 4; 1814 break; 1815 case IPPROTO_UDP: 1816 l4len = sizeof (struct udphdr); 1817 break; 1818 case IPPROTO_SCTP: 1819 l4len = sizeof (sctp_hdr_t); 1820 break; 1821 default: 1822 return (0); 1823 } 1824 1825 meoi->meoi_l4hlen = l4len; 1826 meoi->meoi_flags |= MEOI_L4INFO_SET; 1827 return (0); 1828 } 1829