1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2020 Joyent, Inc. 25 * Copyright 2015 Garrett D'Amore <garrett@damore.org> 26 * Copyright 2020 RackTop Systems, Inc. 27 */ 28 29 /* 30 * MAC Services Module 31 * 32 * The GLDv3 framework locking - The MAC layer 33 * -------------------------------------------- 34 * 35 * The MAC layer is central to the GLD framework and can provide the locking 36 * framework needed for itself and for the use of MAC clients. MAC end points 37 * are fairly disjoint and don't share a lot of state. So a coarse grained 38 * multi-threading scheme is to single thread all create/modify/delete or set 39 * type of control operations on a per mac end point while allowing data threads 40 * concurrently. 41 * 42 * Control operations (set) that modify a mac end point are always serialized on 43 * a per mac end point basis, We have at most 1 such thread per mac end point 44 * at a time. 45 * 46 * All other operations that are not serialized are essentially multi-threaded. 47 * For example a control operation (get) like getting statistics which may not 48 * care about reading values atomically or data threads sending or receiving 49 * data. Mostly these type of operations don't modify the control state. Any 50 * state these operations care about are protected using traditional locks. 51 * 52 * The perimeter only serializes serial operations. It does not imply there 53 * aren't any other concurrent operations. However a serialized operation may 54 * sometimes need to make sure it is the only thread. In this case it needs 55 * to use reference counting mechanisms to cv_wait until any current data 56 * threads are done. 57 * 58 * The mac layer itself does not hold any locks across a call to another layer. 59 * The perimeter is however held across a down call to the driver to make the 60 * whole control operation atomic with respect to other control operations. 61 * Also the data path and get type control operations may proceed concurrently. 62 * These operations synchronize with the single serial operation on a given mac 63 * end point using regular locks. The perimeter ensures that conflicting 64 * operations like say a mac_multicast_add and a mac_multicast_remove on the 65 * same mac end point don't interfere with each other and also ensures that the 66 * changes in the mac layer and the call to the underlying driver to say add a 67 * multicast address are done atomically without interference from a thread 68 * trying to delete the same address. 69 * 70 * For example, consider 71 * mac_multicst_add() 72 * { 73 * mac_perimeter_enter(); serialize all control operations 74 * 75 * grab list lock protect against access by data threads 76 * add to list 77 * drop list lock 78 * 79 * call driver's mi_multicst 80 * 81 * mac_perimeter_exit(); 82 * } 83 * 84 * To lessen the number of serialization locks and simplify the lock hierarchy, 85 * we serialize all the control operations on a per mac end point by using a 86 * single serialization lock called the perimeter. We allow recursive entry into 87 * the perimeter to facilitate use of this mechanism by both the mac client and 88 * the MAC layer itself. 89 * 90 * MAC client means an entity that does an operation on a mac handle 91 * obtained from a mac_open/mac_client_open. Similarly MAC driver means 92 * an entity that does an operation on a mac handle obtained from a 93 * mac_register. An entity could be both client and driver but on different 94 * handles eg. aggr. and should only make the corresponding mac interface calls 95 * i.e. mac driver interface or mac client interface as appropriate for that 96 * mac handle. 97 * 98 * General rules. 99 * ------------- 100 * 101 * R1. The lock order of upcall threads is natually opposite to downcall 102 * threads. Hence upcalls must not hold any locks across layers for fear of 103 * recursive lock enter and lock order violation. This applies to all layers. 104 * 105 * R2. The perimeter is just another lock. Since it is held in the down 106 * direction, acquiring the perimeter in an upcall is prohibited as it would 107 * cause a deadlock. This applies to all layers. 108 * 109 * Note that upcalls that need to grab the mac perimeter (for example 110 * mac_notify upcalls) can still achieve that by posting the request to a 111 * thread, which can then grab all the required perimeters and locks in the 112 * right global order. Note that in the above example the mac layer iself 113 * won't grab the mac perimeter in the mac_notify upcall, instead the upcall 114 * to the client must do that. Please see the aggr code for an example. 115 * 116 * MAC client rules 117 * ---------------- 118 * 119 * R3. A MAC client may use the MAC provided perimeter facility to serialize 120 * control operations on a per mac end point. It does this by by acquring 121 * and holding the perimeter across a sequence of calls to the mac layer. 122 * This ensures atomicity across the entire block of mac calls. In this 123 * model the MAC client must not hold any client locks across the calls to 124 * the mac layer. This model is the preferred solution. 125 * 126 * R4. However if a MAC client has a lot of global state across all mac end 127 * points the per mac end point serialization may not be sufficient. In this 128 * case the client may choose to use global locks or use its own serialization. 129 * To avoid deadlocks, these client layer locks held across the mac calls 130 * in the control path must never be acquired by the data path for the reason 131 * mentioned below. 132 * 133 * (Assume that a control operation that holds a client lock blocks in the 134 * mac layer waiting for upcall reference counts to drop to zero. If an upcall 135 * data thread that holds this reference count, tries to acquire the same 136 * client lock subsequently it will deadlock). 137 * 138 * A MAC client may follow either the R3 model or the R4 model, but can't 139 * mix both. In the former, the hierarchy is Perim -> client locks, but in 140 * the latter it is client locks -> Perim. 141 * 142 * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able 143 * context since they may block while trying to acquire the perimeter. 144 * In addition some calls may block waiting for upcall refcnts to come down to 145 * zero. 146 * 147 * R6. MAC clients must make sure that they are single threaded and all threads 148 * from the top (in particular data threads) have finished before calling 149 * mac_client_close. The MAC framework does not track the number of client 150 * threads using the mac client handle. Also mac clients must make sure 151 * they have undone all the control operations before calling mac_client_close. 152 * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding 153 * mac_unicast_add/mac_multicast_add. 154 * 155 * MAC framework rules 156 * ------------------- 157 * 158 * R7. The mac layer itself must not hold any mac layer locks (except the mac 159 * perimeter) across a call to any other layer from the mac layer. The call to 160 * any other layer could be via mi_* entry points, classifier entry points into 161 * the driver or via upcall pointers into layers above. The mac perimeter may 162 * be acquired or held only in the down direction, for e.g. when calling into 163 * a mi_* driver enty point to provide atomicity of the operation. 164 * 165 * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across 166 * mac driver interfaces, the MAC layer must provide a cut out for control 167 * interfaces like upcall notifications and start them in a separate thread. 168 * 169 * R9. Note that locking order also implies a plumbing order. For example 170 * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt 171 * to plumb in any other order must be failed at mac_open time, otherwise it 172 * could lead to deadlocks due to inverse locking order. 173 * 174 * R10. MAC driver interfaces must not block since the driver could call them 175 * in interrupt context. 176 * 177 * R11. Walkers must preferably not hold any locks while calling walker 178 * callbacks. Instead these can operate on reference counts. In simple 179 * callbacks it may be ok to hold a lock and call the callbacks, but this is 180 * harder to maintain in the general case of arbitrary callbacks. 181 * 182 * R12. The MAC layer must protect upcall notification callbacks using reference 183 * counts rather than holding locks across the callbacks. 184 * 185 * R13. Given the variety of drivers, it is preferable if the MAC layer can make 186 * sure that any pointers (such as mac ring pointers) it passes to the driver 187 * remain valid until mac unregister time. Currently the mac layer achieves 188 * this by using generation numbers for rings and freeing the mac rings only 189 * at unregister time. The MAC layer must provide a layer of indirection and 190 * must not expose underlying driver rings or driver data structures/pointers 191 * directly to MAC clients. 192 * 193 * MAC driver rules 194 * ---------------- 195 * 196 * R14. It would be preferable if MAC drivers don't hold any locks across any 197 * mac call. However at a minimum they must not hold any locks across data 198 * upcalls. They must also make sure that all references to mac data structures 199 * are cleaned up and that it is single threaded at mac_unregister time. 200 * 201 * R15. MAC driver interfaces don't block and so the action may be done 202 * asynchronously in a separate thread as for example handling notifications. 203 * The driver must not assume that the action is complete when the call 204 * returns. 205 * 206 * R16. Drivers must maintain a generation number per Rx ring, and pass it 207 * back to mac_rx_ring(); They are expected to increment the generation 208 * number whenever the ring's stop routine is invoked. 209 * See comments in mac_rx_ring(); 210 * 211 * R17 Similarly mi_stop is another synchronization point and the driver must 212 * ensure that all upcalls are done and there won't be any future upcall 213 * before returning from mi_stop. 214 * 215 * R18. The driver may assume that all set/modify control operations via 216 * the mi_* entry points are single threaded on a per mac end point. 217 * 218 * Lock and Perimeter hierarchy scenarios 219 * --------------------------------------- 220 * 221 * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] 222 * 223 * ft_lock -> fe_lock [mac_flow_lookup] 224 * 225 * mi_rw_lock -> fe_lock [mac_bcast_send] 226 * 227 * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] 228 * 229 * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] 230 * 231 * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] 232 * 233 * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac 234 * client to driver. In the case of clients that explictly use the mac provided 235 * perimeter mechanism for its serialization, the hierarchy is 236 * Perimeter -> mac layer locks, since the client never holds any locks across 237 * the mac calls. In the case of clients that use its own locks the hierarchy 238 * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly 239 * calls mac_perim_enter/exit in this case. 240 * 241 * Subflow creation rules 242 * --------------------------- 243 * o In case of a user specified cpulist present on underlying link and flows, 244 * the flows cpulist must be a subset of the underlying link. 245 * o In case of a user specified fanout mode present on link and flow, the 246 * subflow fanout count has to be less than or equal to that of the 247 * underlying link. The cpu-bindings for the subflows will be a subset of 248 * the underlying link. 249 * o In case if no cpulist specified on both underlying link and flow, the 250 * underlying link relies on a MAC tunable to provide out of box fanout. 251 * The subflow will have no cpulist (the subflow will be unbound) 252 * o In case if no cpulist is specified on the underlying link, a subflow can 253 * carry either a user-specified cpulist or fanout count. The cpu-bindings 254 * for the subflow will not adhere to restriction that they need to be subset 255 * of the underlying link. 256 * o In case where the underlying link is carrying either a user specified 257 * cpulist or fanout mode and for a unspecified subflow, the subflow will be 258 * created unbound. 259 * o While creating unbound subflows, bandwidth mode changes attempt to 260 * figure a right fanout count. In such cases the fanout count will override 261 * the unbound cpu-binding behavior. 262 * o In addition to this, while cycling between flow and link properties, we 263 * impose a restriction that if a link property has a subflow with 264 * user-specified attributes, we will not allow changing the link property. 265 * The administrator needs to reset all the user specified properties for the 266 * subflows before attempting a link property change. 267 * Some of the above rules can be overridden by specifying additional command 268 * line options while creating or modifying link or subflow properties. 269 * 270 * Datapath 271 * -------- 272 * 273 * For information on the datapath, the world of soft rings, hardware rings, how 274 * it is structured, and the path of an mblk_t between a driver and a mac 275 * client, see mac_sched.c. 276 */ 277 278 #include <sys/types.h> 279 #include <sys/conf.h> 280 #include <sys/id_space.h> 281 #include <sys/esunddi.h> 282 #include <sys/stat.h> 283 #include <sys/mkdev.h> 284 #include <sys/stream.h> 285 #include <sys/strsun.h> 286 #include <sys/strsubr.h> 287 #include <sys/dlpi.h> 288 #include <sys/list.h> 289 #include <sys/modhash.h> 290 #include <sys/mac_provider.h> 291 #include <sys/mac_client_impl.h> 292 #include <sys/mac_soft_ring.h> 293 #include <sys/mac_stat.h> 294 #include <sys/mac_impl.h> 295 #include <sys/mac.h> 296 #include <sys/dls.h> 297 #include <sys/dld.h> 298 #include <sys/modctl.h> 299 #include <sys/fs/dv_node.h> 300 #include <sys/thread.h> 301 #include <sys/proc.h> 302 #include <sys/callb.h> 303 #include <sys/cpuvar.h> 304 #include <sys/atomic.h> 305 #include <sys/bitmap.h> 306 #include <sys/sdt.h> 307 #include <sys/mac_flow.h> 308 #include <sys/ddi_intr_impl.h> 309 #include <sys/disp.h> 310 #include <sys/sdt.h> 311 #include <sys/vnic.h> 312 #include <sys/vnic_impl.h> 313 #include <sys/vlan.h> 314 #include <inet/ip.h> 315 #include <inet/ip6.h> 316 #include <sys/exacct.h> 317 #include <sys/exacct_impl.h> 318 #include <inet/nd.h> 319 #include <sys/ethernet.h> 320 #include <sys/pool.h> 321 #include <sys/pool_pset.h> 322 #include <sys/cpupart.h> 323 #include <inet/wifi_ioctl.h> 324 #include <net/wpa.h> 325 326 #define IMPL_HASHSZ 67 /* prime */ 327 328 kmem_cache_t *i_mac_impl_cachep; 329 mod_hash_t *i_mac_impl_hash; 330 krwlock_t i_mac_impl_lock; 331 uint_t i_mac_impl_count; 332 static kmem_cache_t *mac_ring_cache; 333 static id_space_t *minor_ids; 334 static uint32_t minor_count; 335 static pool_event_cb_t mac_pool_event_reg; 336 337 /* 338 * Logging stuff. Perhaps mac_logging_interval could be broken into 339 * mac_flow_log_interval and mac_link_log_interval if we want to be 340 * able to schedule them differently. 341 */ 342 uint_t mac_logging_interval; 343 boolean_t mac_flow_log_enable; 344 boolean_t mac_link_log_enable; 345 timeout_id_t mac_logging_timer; 346 347 #define MACTYPE_KMODDIR "mac" 348 #define MACTYPE_HASHSZ 67 349 static mod_hash_t *i_mactype_hash; 350 /* 351 * i_mactype_lock synchronizes threads that obtain references to mactype_t 352 * structures through i_mactype_getplugin(). 353 */ 354 static kmutex_t i_mactype_lock; 355 356 /* 357 * mac_tx_percpu_cnt 358 * 359 * Number of per cpu locks per mac_client_impl_t. Used by the transmit side 360 * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. 361 * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. 362 * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. 363 */ 364 int mac_tx_percpu_cnt; 365 int mac_tx_percpu_cnt_max = 128; 366 367 /* 368 * Call back functions for the bridge module. These are guaranteed to be valid 369 * when holding a reference on a link or when holding mip->mi_bridge_lock and 370 * mi_bridge_link is non-NULL. 371 */ 372 mac_bridge_tx_t mac_bridge_tx_cb; 373 mac_bridge_rx_t mac_bridge_rx_cb; 374 mac_bridge_ref_t mac_bridge_ref_cb; 375 mac_bridge_ls_t mac_bridge_ls_cb; 376 377 static int i_mac_constructor(void *, void *, int); 378 static void i_mac_destructor(void *, void *); 379 static int i_mac_ring_ctor(void *, void *, int); 380 static void i_mac_ring_dtor(void *, void *); 381 static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); 382 void mac_tx_client_flush(mac_client_impl_t *); 383 void mac_tx_client_block(mac_client_impl_t *); 384 static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); 385 static int mac_start_group_and_rings(mac_group_t *); 386 static void mac_stop_group_and_rings(mac_group_t *); 387 static void mac_pool_event_cb(pool_event_t, int, void *); 388 389 typedef struct netinfo_s { 390 list_node_t ni_link; 391 void *ni_record; 392 int ni_size; 393 int ni_type; 394 } netinfo_t; 395 396 /* 397 * Module initialization functions. 398 */ 399 400 void 401 mac_init(void) 402 { 403 mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : 404 boot_max_ncpus); 405 406 /* Upper bound is mac_tx_percpu_cnt_max */ 407 if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) 408 mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; 409 410 if (mac_tx_percpu_cnt < 1) { 411 /* Someone set max_tx_percpu_cnt_max to 0 or less */ 412 mac_tx_percpu_cnt = 1; 413 } 414 415 ASSERT(mac_tx_percpu_cnt >= 1); 416 mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); 417 /* 418 * Make it of the form 2**N - 1 in the range 419 * [0 .. mac_tx_percpu_cnt_max - 1] 420 */ 421 mac_tx_percpu_cnt--; 422 423 i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", 424 sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, 425 NULL, NULL, NULL, 0); 426 ASSERT(i_mac_impl_cachep != NULL); 427 428 mac_ring_cache = kmem_cache_create("mac_ring_cache", 429 sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, 430 NULL, NULL, 0); 431 ASSERT(mac_ring_cache != NULL); 432 433 i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", 434 IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 435 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 436 rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); 437 438 mac_flow_init(); 439 mac_soft_ring_init(); 440 mac_bcast_init(); 441 mac_client_init(); 442 443 i_mac_impl_count = 0; 444 445 i_mactype_hash = mod_hash_create_extended("mactype_hash", 446 MACTYPE_HASHSZ, 447 mod_hash_null_keydtor, mod_hash_null_valdtor, 448 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 449 450 /* 451 * Allocate an id space to manage minor numbers. The range of the 452 * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1. This 453 * leaves half of the 32-bit minors available for driver private use. 454 */ 455 minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, 456 MAC_PRIVATE_MINOR-1); 457 ASSERT(minor_ids != NULL); 458 minor_count = 0; 459 460 /* Let's default to 20 seconds */ 461 mac_logging_interval = 20; 462 mac_flow_log_enable = B_FALSE; 463 mac_link_log_enable = B_FALSE; 464 mac_logging_timer = NULL; 465 466 /* Register to be notified of noteworthy pools events */ 467 mac_pool_event_reg.pec_func = mac_pool_event_cb; 468 mac_pool_event_reg.pec_arg = NULL; 469 pool_event_cb_register(&mac_pool_event_reg); 470 } 471 472 int 473 mac_fini(void) 474 { 475 476 if (i_mac_impl_count > 0 || minor_count > 0) 477 return (EBUSY); 478 479 pool_event_cb_unregister(&mac_pool_event_reg); 480 481 id_space_destroy(minor_ids); 482 mac_flow_fini(); 483 484 mod_hash_destroy_hash(i_mac_impl_hash); 485 rw_destroy(&i_mac_impl_lock); 486 487 mac_client_fini(); 488 kmem_cache_destroy(mac_ring_cache); 489 490 mod_hash_destroy_hash(i_mactype_hash); 491 mac_soft_ring_finish(); 492 493 494 return (0); 495 } 496 497 /* 498 * Initialize a GLDv3 driver's device ops. A driver that manages its own ops 499 * (e.g. softmac) may pass in a NULL ops argument. 500 */ 501 void 502 mac_init_ops(struct dev_ops *ops, const char *name) 503 { 504 major_t major = ddi_name_to_major((char *)name); 505 506 /* 507 * By returning on error below, we are not letting the driver continue 508 * in an undefined context. The mac_register() function will faill if 509 * DN_GLDV3_DRIVER isn't set. 510 */ 511 if (major == DDI_MAJOR_T_NONE) 512 return; 513 LOCK_DEV_OPS(&devnamesp[major].dn_lock); 514 devnamesp[major].dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER); 515 UNLOCK_DEV_OPS(&devnamesp[major].dn_lock); 516 if (ops != NULL) 517 dld_init_ops(ops, name); 518 } 519 520 void 521 mac_fini_ops(struct dev_ops *ops) 522 { 523 dld_fini_ops(ops); 524 } 525 526 /*ARGSUSED*/ 527 static int 528 i_mac_constructor(void *buf, void *arg, int kmflag) 529 { 530 mac_impl_t *mip = buf; 531 532 bzero(buf, sizeof (mac_impl_t)); 533 534 mip->mi_linkstate = LINK_STATE_UNKNOWN; 535 536 rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); 537 mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); 538 mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); 539 mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); 540 541 mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; 542 cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 543 mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; 544 cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 545 546 mutex_init(&mip->mi_bridge_lock, NULL, MUTEX_DEFAULT, NULL); 547 548 return (0); 549 } 550 551 /*ARGSUSED*/ 552 static void 553 i_mac_destructor(void *buf, void *arg) 554 { 555 mac_impl_t *mip = buf; 556 mac_cb_info_t *mcbi; 557 558 ASSERT(mip->mi_ref == 0); 559 ASSERT(mip->mi_active == 0); 560 ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); 561 ASSERT(mip->mi_devpromisc == 0); 562 ASSERT(mip->mi_ksp == NULL); 563 ASSERT(mip->mi_kstat_count == 0); 564 ASSERT(mip->mi_nclients == 0); 565 ASSERT(mip->mi_nactiveclients == 0); 566 ASSERT(mip->mi_single_active_client == NULL); 567 ASSERT(mip->mi_state_flags == 0); 568 ASSERT(mip->mi_factory_addr == NULL); 569 ASSERT(mip->mi_factory_addr_num == 0); 570 ASSERT(mip->mi_default_tx_ring == NULL); 571 572 mcbi = &mip->mi_notify_cb_info; 573 ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); 574 ASSERT(mip->mi_notify_bits == 0); 575 ASSERT(mip->mi_notify_thread == NULL); 576 ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); 577 mcbi->mcbi_lockp = NULL; 578 579 mcbi = &mip->mi_promisc_cb_info; 580 ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); 581 ASSERT(mip->mi_promisc_list == NULL); 582 ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); 583 mcbi->mcbi_lockp = NULL; 584 585 ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); 586 ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); 587 588 rw_destroy(&mip->mi_rw_lock); 589 590 mutex_destroy(&mip->mi_promisc_lock); 591 cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); 592 mutex_destroy(&mip->mi_notify_lock); 593 cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); 594 mutex_destroy(&mip->mi_ring_lock); 595 596 ASSERT(mip->mi_bridge_link == NULL); 597 } 598 599 /* ARGSUSED */ 600 static int 601 i_mac_ring_ctor(void *buf, void *arg, int kmflag) 602 { 603 mac_ring_t *ring = (mac_ring_t *)buf; 604 605 bzero(ring, sizeof (mac_ring_t)); 606 cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); 607 mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); 608 ring->mr_state = MR_FREE; 609 return (0); 610 } 611 612 /* ARGSUSED */ 613 static void 614 i_mac_ring_dtor(void *buf, void *arg) 615 { 616 mac_ring_t *ring = (mac_ring_t *)buf; 617 618 cv_destroy(&ring->mr_cv); 619 mutex_destroy(&ring->mr_lock); 620 } 621 622 /* 623 * Common functions to do mac callback addition and deletion. Currently this is 624 * used by promisc callbacks and notify callbacks. List addition and deletion 625 * need to take care of list walkers. List walkers in general, can't hold list 626 * locks and make upcall callbacks due to potential lock order and recursive 627 * reentry issues. Instead list walkers increment the list walker count to mark 628 * the presence of a walker thread. Addition can be carefully done to ensure 629 * that the list walker always sees either the old list or the new list. 630 * However the deletion can't be done while the walker is active, instead the 631 * deleting thread simply marks the entry as logically deleted. The last walker 632 * physically deletes and frees up the logically deleted entries when the walk 633 * is complete. 634 */ 635 void 636 mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 637 mac_cb_t *mcb_elem) 638 { 639 mac_cb_t *p; 640 mac_cb_t **pp; 641 642 /* Verify it is not already in the list */ 643 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 644 if (p == mcb_elem) 645 break; 646 } 647 VERIFY(p == NULL); 648 649 /* 650 * Add it to the head of the callback list. The membar ensures that 651 * the following list pointer manipulations reach global visibility 652 * in exactly the program order below. 653 */ 654 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 655 656 mcb_elem->mcb_nextp = *mcb_head; 657 membar_producer(); 658 *mcb_head = mcb_elem; 659 } 660 661 /* 662 * Mark the entry as logically deleted. If there aren't any walkers unlink 663 * from the list. In either case return the corresponding status. 664 */ 665 boolean_t 666 mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 667 mac_cb_t *mcb_elem) 668 { 669 mac_cb_t *p; 670 mac_cb_t **pp; 671 672 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 673 /* 674 * Search the callback list for the entry to be removed 675 */ 676 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 677 if (p == mcb_elem) 678 break; 679 } 680 VERIFY(p != NULL); 681 682 /* 683 * If there are walkers just mark it as deleted and the last walker 684 * will remove from the list and free it. 685 */ 686 if (mcbi->mcbi_walker_cnt != 0) { 687 p->mcb_flags |= MCB_CONDEMNED; 688 mcbi->mcbi_del_cnt++; 689 return (B_FALSE); 690 } 691 692 ASSERT(mcbi->mcbi_del_cnt == 0); 693 *pp = p->mcb_nextp; 694 p->mcb_nextp = NULL; 695 return (B_TRUE); 696 } 697 698 /* 699 * Wait for all pending callback removals to be completed 700 */ 701 void 702 mac_callback_remove_wait(mac_cb_info_t *mcbi) 703 { 704 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 705 while (mcbi->mcbi_del_cnt != 0) { 706 DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); 707 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 708 } 709 } 710 711 void 712 mac_callback_barrier(mac_cb_info_t *mcbi) 713 { 714 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 715 ASSERT3U(mcbi->mcbi_barrier_cnt, <, UINT_MAX); 716 717 if (mcbi->mcbi_walker_cnt == 0) { 718 return; 719 } 720 721 mcbi->mcbi_barrier_cnt++; 722 do { 723 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 724 } while (mcbi->mcbi_walker_cnt > 0); 725 mcbi->mcbi_barrier_cnt--; 726 cv_broadcast(&mcbi->mcbi_cv); 727 } 728 729 void 730 mac_callback_walker_enter(mac_cb_info_t *mcbi) 731 { 732 mutex_enter(mcbi->mcbi_lockp); 733 /* 734 * Incoming walkers should give precedence to timely clean-up of 735 * deleted callback entries and requested barriers. 736 */ 737 while (mcbi->mcbi_del_cnt > 0 || mcbi->mcbi_barrier_cnt > 0) { 738 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 739 } 740 mcbi->mcbi_walker_cnt++; 741 mutex_exit(mcbi->mcbi_lockp); 742 } 743 744 /* 745 * The last mac callback walker does the cleanup. Walk the list and unlik 746 * all the logically deleted entries and construct a temporary list of 747 * removed entries. Return the list of removed entries to the caller. 748 */ 749 static mac_cb_t * 750 mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) 751 { 752 mac_cb_t *p; 753 mac_cb_t **pp; 754 mac_cb_t *rmlist = NULL; /* List of removed elements */ 755 int cnt = 0; 756 757 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 758 ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); 759 760 pp = mcb_head; 761 while (*pp != NULL) { 762 if ((*pp)->mcb_flags & MCB_CONDEMNED) { 763 p = *pp; 764 *pp = p->mcb_nextp; 765 p->mcb_nextp = rmlist; 766 rmlist = p; 767 cnt++; 768 continue; 769 } 770 pp = &(*pp)->mcb_nextp; 771 } 772 773 ASSERT(mcbi->mcbi_del_cnt == cnt); 774 mcbi->mcbi_del_cnt = 0; 775 return (rmlist); 776 } 777 778 void 779 mac_callback_walker_exit(mac_cb_info_t *mcbi, mac_cb_t **headp, 780 boolean_t is_promisc) 781 { 782 boolean_t do_wake = B_FALSE; 783 784 mutex_enter(mcbi->mcbi_lockp); 785 786 /* If walkers remain, nothing more can be done for now */ 787 if (--mcbi->mcbi_walker_cnt != 0) { 788 mutex_exit(mcbi->mcbi_lockp); 789 return; 790 } 791 792 if (mcbi->mcbi_del_cnt != 0) { 793 mac_cb_t *rmlist; 794 795 rmlist = mac_callback_walker_cleanup(mcbi, headp); 796 797 if (!is_promisc) { 798 /* The "normal" non-promisc callback clean-up */ 799 mac_callback_free(rmlist); 800 } else { 801 mac_cb_t *mcb, *mcb_next; 802 803 /* 804 * The promisc callbacks are in 2 lists, one off the 805 * 'mip' and another off the 'mcip' threaded by 806 * mpi_mi_link and mpi_mci_link respectively. There 807 * is, however, only a single shared total walker 808 * count, and an entry cannot be physically unlinked if 809 * a walker is active on either list. The last walker 810 * does this cleanup of logically deleted entries. 811 * 812 * With a list of callbacks deleted from above from 813 * mi_promisc_list (headp), remove the corresponding 814 * entry from mci_promisc_list (headp_pair) and free 815 * the structure. 816 */ 817 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 818 mac_promisc_impl_t *mpip; 819 mac_client_impl_t *mcip; 820 821 mcb_next = mcb->mcb_nextp; 822 mpip = (mac_promisc_impl_t *)mcb->mcb_objp; 823 mcip = mpip->mpi_mcip; 824 825 ASSERT3P(&mcip->mci_mip->mi_promisc_cb_info, 826 ==, mcbi); 827 ASSERT3P(&mcip->mci_mip->mi_promisc_list, 828 ==, headp); 829 830 VERIFY(mac_callback_remove(mcbi, 831 &mcip->mci_promisc_list, 832 &mpip->mpi_mci_link)); 833 mcb->mcb_flags = 0; 834 mcb->mcb_nextp = NULL; 835 kmem_cache_free(mac_promisc_impl_cache, mpip); 836 } 837 } 838 839 /* 840 * Wake any walker threads that could be waiting in 841 * mac_callback_walker_enter() until deleted items have been 842 * cleaned from the list. 843 */ 844 do_wake = B_TRUE; 845 } 846 847 if (mcbi->mcbi_barrier_cnt != 0) { 848 /* 849 * One or more threads are waiting for all walkers to exit the 850 * callback list. Notify them, now that the list is clear. 851 */ 852 do_wake = B_TRUE; 853 } 854 855 if (do_wake) { 856 cv_broadcast(&mcbi->mcbi_cv); 857 } 858 mutex_exit(mcbi->mcbi_lockp); 859 } 860 861 static boolean_t 862 mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 863 { 864 mac_cb_t *mcb; 865 866 /* Verify it is not already in the list */ 867 for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { 868 if (mcb == mcb_elem) 869 return (B_TRUE); 870 } 871 872 return (B_FALSE); 873 } 874 875 static boolean_t 876 mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 877 { 878 boolean_t found; 879 880 mutex_enter(mcbi->mcbi_lockp); 881 found = mac_callback_lookup(mcb_headp, mcb_elem); 882 mutex_exit(mcbi->mcbi_lockp); 883 884 return (found); 885 } 886 887 /* Free the list of removed callbacks */ 888 void 889 mac_callback_free(mac_cb_t *rmlist) 890 { 891 mac_cb_t *mcb; 892 mac_cb_t *mcb_next; 893 894 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 895 mcb_next = mcb->mcb_nextp; 896 kmem_free(mcb->mcb_objp, mcb->mcb_objsize); 897 } 898 } 899 900 void 901 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) 902 { 903 mac_cb_info_t *mcbi; 904 905 /* 906 * Signal the notify thread even after mi_ref has become zero and 907 * mi_disabled is set. The synchronization with the notify thread 908 * happens in mac_unregister and that implies the driver must make 909 * sure it is single-threaded (with respect to mac calls) and that 910 * all pending mac calls have returned before it calls mac_unregister 911 */ 912 rw_enter(&i_mac_impl_lock, RW_READER); 913 if (mip->mi_state_flags & MIS_DISABLED) 914 goto exit; 915 916 /* 917 * Guard against incorrect notifications. (Running a newer 918 * mac client against an older implementation?) 919 */ 920 if (type >= MAC_NNOTE) 921 goto exit; 922 923 mcbi = &mip->mi_notify_cb_info; 924 mutex_enter(mcbi->mcbi_lockp); 925 mip->mi_notify_bits |= (1 << type); 926 cv_broadcast(&mcbi->mcbi_cv); 927 mutex_exit(mcbi->mcbi_lockp); 928 929 exit: 930 rw_exit(&i_mac_impl_lock); 931 } 932 933 /* 934 * Mac serialization primitives. Please see the block comment at the 935 * top of the file. 936 */ 937 void 938 i_mac_perim_enter(mac_impl_t *mip) 939 { 940 mac_client_impl_t *mcip; 941 942 if (mip->mi_state_flags & MIS_IS_VNIC) { 943 /* 944 * This is a VNIC. Return the lower mac since that is what 945 * we want to serialize on. 946 */ 947 mcip = mac_vnic_lower(mip); 948 mip = mcip->mci_mip; 949 } 950 951 mutex_enter(&mip->mi_perim_lock); 952 if (mip->mi_perim_owner == curthread) { 953 mip->mi_perim_ocnt++; 954 mutex_exit(&mip->mi_perim_lock); 955 return; 956 } 957 958 while (mip->mi_perim_owner != NULL) 959 cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); 960 961 mip->mi_perim_owner = curthread; 962 ASSERT(mip->mi_perim_ocnt == 0); 963 mip->mi_perim_ocnt++; 964 #ifdef DEBUG 965 mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, 966 MAC_PERIM_STACK_DEPTH); 967 #endif 968 mutex_exit(&mip->mi_perim_lock); 969 } 970 971 int 972 i_mac_perim_enter_nowait(mac_impl_t *mip) 973 { 974 /* 975 * The vnic is a special case, since the serialization is done based 976 * on the lower mac. If the lower mac is busy, it does not imply the 977 * vnic can't be unregistered. But in the case of other drivers, 978 * a busy perimeter or open mac handles implies that the mac is busy 979 * and can't be unregistered. 980 */ 981 if (mip->mi_state_flags & MIS_IS_VNIC) { 982 i_mac_perim_enter(mip); 983 return (0); 984 } 985 986 mutex_enter(&mip->mi_perim_lock); 987 if (mip->mi_perim_owner != NULL) { 988 mutex_exit(&mip->mi_perim_lock); 989 return (EBUSY); 990 } 991 ASSERT(mip->mi_perim_ocnt == 0); 992 mip->mi_perim_owner = curthread; 993 mip->mi_perim_ocnt++; 994 mutex_exit(&mip->mi_perim_lock); 995 996 return (0); 997 } 998 999 void 1000 i_mac_perim_exit(mac_impl_t *mip) 1001 { 1002 mac_client_impl_t *mcip; 1003 1004 if (mip->mi_state_flags & MIS_IS_VNIC) { 1005 /* 1006 * This is a VNIC. Return the lower mac since that is what 1007 * we want to serialize on. 1008 */ 1009 mcip = mac_vnic_lower(mip); 1010 mip = mcip->mci_mip; 1011 } 1012 1013 ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); 1014 1015 mutex_enter(&mip->mi_perim_lock); 1016 if (--mip->mi_perim_ocnt == 0) { 1017 mip->mi_perim_owner = NULL; 1018 cv_signal(&mip->mi_perim_cv); 1019 } 1020 mutex_exit(&mip->mi_perim_lock); 1021 } 1022 1023 /* 1024 * Returns whether the current thread holds the mac perimeter. Used in making 1025 * assertions. 1026 */ 1027 boolean_t 1028 mac_perim_held(mac_handle_t mh) 1029 { 1030 mac_impl_t *mip = (mac_impl_t *)mh; 1031 mac_client_impl_t *mcip; 1032 1033 if (mip->mi_state_flags & MIS_IS_VNIC) { 1034 /* 1035 * This is a VNIC. Return the lower mac since that is what 1036 * we want to serialize on. 1037 */ 1038 mcip = mac_vnic_lower(mip); 1039 mip = mcip->mci_mip; 1040 } 1041 return (mip->mi_perim_owner == curthread); 1042 } 1043 1044 /* 1045 * mac client interfaces to enter the mac perimeter of a mac end point, given 1046 * its mac handle, or macname or linkid. 1047 */ 1048 void 1049 mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) 1050 { 1051 mac_impl_t *mip = (mac_impl_t *)mh; 1052 1053 i_mac_perim_enter(mip); 1054 /* 1055 * The mac_perim_handle_t returned encodes the 'mip' and whether a 1056 * mac_open has been done internally while entering the perimeter. 1057 * This information is used in mac_perim_exit 1058 */ 1059 MAC_ENCODE_MPH(*mphp, mip, 0); 1060 } 1061 1062 int 1063 mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) 1064 { 1065 int err; 1066 mac_handle_t mh; 1067 1068 if ((err = mac_open(name, &mh)) != 0) 1069 return (err); 1070 1071 mac_perim_enter_by_mh(mh, mphp); 1072 MAC_ENCODE_MPH(*mphp, mh, 1); 1073 return (0); 1074 } 1075 1076 int 1077 mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) 1078 { 1079 int err; 1080 mac_handle_t mh; 1081 1082 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 1083 return (err); 1084 1085 mac_perim_enter_by_mh(mh, mphp); 1086 MAC_ENCODE_MPH(*mphp, mh, 1); 1087 return (0); 1088 } 1089 1090 void 1091 mac_perim_exit(mac_perim_handle_t mph) 1092 { 1093 mac_impl_t *mip; 1094 boolean_t need_close; 1095 1096 MAC_DECODE_MPH(mph, mip, need_close); 1097 i_mac_perim_exit(mip); 1098 if (need_close) 1099 mac_close((mac_handle_t)mip); 1100 } 1101 1102 int 1103 mac_hold(const char *macname, mac_impl_t **pmip) 1104 { 1105 mac_impl_t *mip; 1106 int err; 1107 1108 /* 1109 * Check the device name length to make sure it won't overflow our 1110 * buffer. 1111 */ 1112 if (strlen(macname) >= MAXNAMELEN) 1113 return (EINVAL); 1114 1115 /* 1116 * Look up its entry in the global hash table. 1117 */ 1118 rw_enter(&i_mac_impl_lock, RW_WRITER); 1119 err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, 1120 (mod_hash_val_t *)&mip); 1121 1122 if (err != 0) { 1123 rw_exit(&i_mac_impl_lock); 1124 return (ENOENT); 1125 } 1126 1127 if (mip->mi_state_flags & MIS_DISABLED) { 1128 rw_exit(&i_mac_impl_lock); 1129 return (ENOENT); 1130 } 1131 1132 if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { 1133 rw_exit(&i_mac_impl_lock); 1134 return (EBUSY); 1135 } 1136 1137 mip->mi_ref++; 1138 rw_exit(&i_mac_impl_lock); 1139 1140 *pmip = mip; 1141 return (0); 1142 } 1143 1144 void 1145 mac_rele(mac_impl_t *mip) 1146 { 1147 rw_enter(&i_mac_impl_lock, RW_WRITER); 1148 ASSERT(mip->mi_ref != 0); 1149 if (--mip->mi_ref == 0) { 1150 ASSERT(mip->mi_nactiveclients == 0 && 1151 !(mip->mi_state_flags & MIS_EXCLUSIVE)); 1152 } 1153 rw_exit(&i_mac_impl_lock); 1154 } 1155 1156 /* 1157 * Private GLDv3 function to start a MAC instance. 1158 */ 1159 int 1160 mac_start(mac_handle_t mh) 1161 { 1162 mac_impl_t *mip = (mac_impl_t *)mh; 1163 int err = 0; 1164 mac_group_t *defgrp; 1165 1166 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1167 ASSERT(mip->mi_start != NULL); 1168 1169 /* 1170 * Check whether the device is already started. 1171 */ 1172 if (mip->mi_active++ == 0) { 1173 mac_ring_t *ring = NULL; 1174 1175 /* 1176 * Start the device. 1177 */ 1178 err = mip->mi_start(mip->mi_driver); 1179 if (err != 0) { 1180 mip->mi_active--; 1181 return (err); 1182 } 1183 1184 /* 1185 * Start the default tx ring. 1186 */ 1187 if (mip->mi_default_tx_ring != NULL) { 1188 1189 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1190 if (ring->mr_state != MR_INUSE) { 1191 err = mac_start_ring(ring); 1192 if (err != 0) { 1193 mip->mi_active--; 1194 return (err); 1195 } 1196 } 1197 } 1198 1199 if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1200 /* 1201 * Start the default group which is responsible 1202 * for receiving broadcast and multicast 1203 * traffic for both primary and non-primary 1204 * MAC clients. 1205 */ 1206 ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); 1207 err = mac_start_group_and_rings(defgrp); 1208 if (err != 0) { 1209 mip->mi_active--; 1210 if ((ring != NULL) && 1211 (ring->mr_state == MR_INUSE)) 1212 mac_stop_ring(ring); 1213 return (err); 1214 } 1215 mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); 1216 } 1217 } 1218 1219 return (err); 1220 } 1221 1222 /* 1223 * Private GLDv3 function to stop a MAC instance. 1224 */ 1225 void 1226 mac_stop(mac_handle_t mh) 1227 { 1228 mac_impl_t *mip = (mac_impl_t *)mh; 1229 mac_group_t *grp; 1230 1231 ASSERT(mip->mi_stop != NULL); 1232 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1233 1234 /* 1235 * Check whether the device is still needed. 1236 */ 1237 ASSERT(mip->mi_active != 0); 1238 if (--mip->mi_active == 0) { 1239 if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1240 /* 1241 * There should be no more active clients since the 1242 * MAC is being stopped. Stop the default RX group 1243 * and transition it back to registered state. 1244 * 1245 * When clients are torn down, the groups 1246 * are release via mac_release_rx_group which 1247 * knows the the default group is always in 1248 * started mode since broadcast uses it. So 1249 * we can assert that their are no clients 1250 * (since mac_bcast_add doesn't register itself 1251 * as a client) and group is in SHARED state. 1252 */ 1253 ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); 1254 ASSERT(MAC_GROUP_NO_CLIENT(grp) && 1255 mip->mi_nactiveclients == 0); 1256 mac_stop_group_and_rings(grp); 1257 mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); 1258 } 1259 1260 if (mip->mi_default_tx_ring != NULL) { 1261 mac_ring_t *ring; 1262 1263 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1264 if (ring->mr_state == MR_INUSE) { 1265 mac_stop_ring(ring); 1266 ring->mr_flag = 0; 1267 } 1268 } 1269 1270 /* 1271 * Stop the device. 1272 */ 1273 mip->mi_stop(mip->mi_driver); 1274 } 1275 } 1276 1277 int 1278 i_mac_promisc_set(mac_impl_t *mip, boolean_t on) 1279 { 1280 int err = 0; 1281 1282 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1283 ASSERT(mip->mi_setpromisc != NULL); 1284 1285 if (on) { 1286 /* 1287 * Enable promiscuous mode on the device if not yet enabled. 1288 */ 1289 if (mip->mi_devpromisc++ == 0) { 1290 err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); 1291 if (err != 0) { 1292 mip->mi_devpromisc--; 1293 return (err); 1294 } 1295 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1296 } 1297 } else { 1298 if (mip->mi_devpromisc == 0) 1299 return (EPROTO); 1300 1301 /* 1302 * Disable promiscuous mode on the device if this is the last 1303 * enabling. 1304 */ 1305 if (--mip->mi_devpromisc == 0) { 1306 err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); 1307 if (err != 0) { 1308 mip->mi_devpromisc++; 1309 return (err); 1310 } 1311 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1312 } 1313 } 1314 1315 return (0); 1316 } 1317 1318 /* 1319 * The promiscuity state can change any time. If the caller needs to take 1320 * actions that are atomic with the promiscuity state, then the caller needs 1321 * to bracket the entire sequence with mac_perim_enter/exit 1322 */ 1323 boolean_t 1324 mac_promisc_get(mac_handle_t mh) 1325 { 1326 mac_impl_t *mip = (mac_impl_t *)mh; 1327 1328 /* 1329 * Return the current promiscuity. 1330 */ 1331 return (mip->mi_devpromisc != 0); 1332 } 1333 1334 /* 1335 * Invoked at MAC instance attach time to initialize the list 1336 * of factory MAC addresses supported by a MAC instance. This function 1337 * builds a local cache in the mac_impl_t for the MAC addresses 1338 * supported by the underlying hardware. The MAC clients themselves 1339 * use the mac_addr_factory*() functions to query and reserve 1340 * factory MAC addresses. 1341 */ 1342 void 1343 mac_addr_factory_init(mac_impl_t *mip) 1344 { 1345 mac_capab_multifactaddr_t capab; 1346 uint8_t *addr; 1347 int i; 1348 1349 /* 1350 * First round to see how many factory MAC addresses are available. 1351 */ 1352 bzero(&capab, sizeof (capab)); 1353 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, 1354 &capab) || (capab.mcm_naddr == 0)) { 1355 /* 1356 * The MAC instance doesn't support multiple factory 1357 * MAC addresses, we're done here. 1358 */ 1359 return; 1360 } 1361 1362 /* 1363 * Allocate the space and get all the factory addresses. 1364 */ 1365 addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); 1366 capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); 1367 1368 mip->mi_factory_addr_num = capab.mcm_naddr; 1369 mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * 1370 sizeof (mac_factory_addr_t), KM_SLEEP); 1371 1372 for (i = 0; i < capab.mcm_naddr; i++) { 1373 bcopy(addr + i * MAXMACADDRLEN, 1374 mip->mi_factory_addr[i].mfa_addr, 1375 mip->mi_type->mt_addr_length); 1376 mip->mi_factory_addr[i].mfa_in_use = B_FALSE; 1377 } 1378 1379 kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); 1380 } 1381 1382 void 1383 mac_addr_factory_fini(mac_impl_t *mip) 1384 { 1385 if (mip->mi_factory_addr == NULL) { 1386 ASSERT(mip->mi_factory_addr_num == 0); 1387 return; 1388 } 1389 1390 kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * 1391 sizeof (mac_factory_addr_t)); 1392 1393 mip->mi_factory_addr = NULL; 1394 mip->mi_factory_addr_num = 0; 1395 } 1396 1397 /* 1398 * Reserve a factory MAC address. If *slot is set to -1, the function 1399 * attempts to reserve any of the available factory MAC addresses and 1400 * returns the reserved slot id. If no slots are available, the function 1401 * returns ENOSPC. If *slot is not set to -1, the function reserves 1402 * the specified slot if it is available, or returns EBUSY is the slot 1403 * is already used. Returns ENOTSUP if the underlying MAC does not 1404 * support multiple factory addresses. If the slot number is not -1 but 1405 * is invalid, returns EINVAL. 1406 */ 1407 int 1408 mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) 1409 { 1410 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1411 mac_impl_t *mip = mcip->mci_mip; 1412 int i, ret = 0; 1413 1414 i_mac_perim_enter(mip); 1415 /* 1416 * Protect against concurrent readers that may need a self-consistent 1417 * view of the factory addresses 1418 */ 1419 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1420 1421 if (mip->mi_factory_addr_num == 0) { 1422 ret = ENOTSUP; 1423 goto bail; 1424 } 1425 1426 if (*slot != -1) { 1427 /* check the specified slot */ 1428 if (*slot < 1 || *slot > mip->mi_factory_addr_num) { 1429 ret = EINVAL; 1430 goto bail; 1431 } 1432 if (mip->mi_factory_addr[*slot-1].mfa_in_use) { 1433 ret = EBUSY; 1434 goto bail; 1435 } 1436 } else { 1437 /* pick the next available slot */ 1438 for (i = 0; i < mip->mi_factory_addr_num; i++) { 1439 if (!mip->mi_factory_addr[i].mfa_in_use) 1440 break; 1441 } 1442 1443 if (i == mip->mi_factory_addr_num) { 1444 ret = ENOSPC; 1445 goto bail; 1446 } 1447 *slot = i+1; 1448 } 1449 1450 mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; 1451 mip->mi_factory_addr[*slot-1].mfa_client = mcip; 1452 1453 bail: 1454 rw_exit(&mip->mi_rw_lock); 1455 i_mac_perim_exit(mip); 1456 return (ret); 1457 } 1458 1459 /* 1460 * Release the specified factory MAC address slot. 1461 */ 1462 void 1463 mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) 1464 { 1465 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1466 mac_impl_t *mip = mcip->mci_mip; 1467 1468 i_mac_perim_enter(mip); 1469 /* 1470 * Protect against concurrent readers that may need a self-consistent 1471 * view of the factory addresses 1472 */ 1473 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1474 1475 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1476 ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); 1477 1478 mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; 1479 1480 rw_exit(&mip->mi_rw_lock); 1481 i_mac_perim_exit(mip); 1482 } 1483 1484 /* 1485 * Stores in mac_addr the value of the specified MAC address. Returns 1486 * 0 on success, or EINVAL if the slot number is not valid for the MAC. 1487 * The caller must provide a string of at least MAXNAMELEN bytes. 1488 */ 1489 void 1490 mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, 1491 uint_t *addr_len, char *client_name, boolean_t *in_use_arg) 1492 { 1493 mac_impl_t *mip = (mac_impl_t *)mh; 1494 boolean_t in_use; 1495 1496 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1497 1498 /* 1499 * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter 1500 * and mi_rw_lock 1501 */ 1502 rw_enter(&mip->mi_rw_lock, RW_READER); 1503 bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); 1504 *addr_len = mip->mi_type->mt_addr_length; 1505 in_use = mip->mi_factory_addr[slot-1].mfa_in_use; 1506 if (in_use && client_name != NULL) { 1507 bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, 1508 client_name, MAXNAMELEN); 1509 } 1510 if (in_use_arg != NULL) 1511 *in_use_arg = in_use; 1512 rw_exit(&mip->mi_rw_lock); 1513 } 1514 1515 /* 1516 * Returns the number of factory MAC addresses (in addition to the 1517 * primary MAC address), 0 if the underlying MAC doesn't support 1518 * that feature. 1519 */ 1520 uint_t 1521 mac_addr_factory_num(mac_handle_t mh) 1522 { 1523 mac_impl_t *mip = (mac_impl_t *)mh; 1524 1525 return (mip->mi_factory_addr_num); 1526 } 1527 1528 1529 void 1530 mac_rx_group_unmark(mac_group_t *grp, uint_t flag) 1531 { 1532 mac_ring_t *ring; 1533 1534 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) 1535 ring->mr_flag &= ~flag; 1536 } 1537 1538 /* 1539 * The following mac_hwrings_xxx() functions are private mac client functions 1540 * used by the aggr driver to access and control the underlying HW Rx group 1541 * and rings. In this case, the aggr driver has exclusive control of the 1542 * underlying HW Rx group/rings, it calls the following functions to 1543 * start/stop the HW Rx rings, disable/enable polling, add/remove MAC 1544 * addresses, or set up the Rx callback. 1545 */ 1546 /* ARGSUSED */ 1547 static void 1548 mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, 1549 mblk_t *mp_chain, boolean_t loopback) 1550 { 1551 mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; 1552 mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; 1553 mac_direct_rx_t proc; 1554 void *arg1; 1555 mac_resource_handle_t arg2; 1556 1557 proc = srs_rx->sr_func; 1558 arg1 = srs_rx->sr_arg1; 1559 arg2 = mac_srs->srs_mrh; 1560 1561 proc(arg1, arg2, mp_chain, NULL); 1562 } 1563 1564 /* 1565 * This function is called to get the list of HW rings that are reserved by 1566 * an exclusive mac client. 1567 * 1568 * Return value: the number of HW rings. 1569 */ 1570 int 1571 mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, 1572 mac_ring_handle_t *hwrh, mac_ring_type_t rtype) 1573 { 1574 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1575 flow_entry_t *flent = mcip->mci_flent; 1576 mac_group_t *grp; 1577 mac_ring_t *ring; 1578 int cnt = 0; 1579 1580 if (rtype == MAC_RING_TYPE_RX) { 1581 grp = flent->fe_rx_ring_group; 1582 } else if (rtype == MAC_RING_TYPE_TX) { 1583 grp = flent->fe_tx_ring_group; 1584 } else { 1585 ASSERT(B_FALSE); 1586 return (-1); 1587 } 1588 1589 /* 1590 * The MAC client did not reserve an Rx group, return directly. 1591 * This is probably because the underlying MAC does not support 1592 * any groups. 1593 */ 1594 if (hwgh != NULL) 1595 *hwgh = NULL; 1596 if (grp == NULL) 1597 return (0); 1598 /* 1599 * This group must be reserved by this MAC client. 1600 */ 1601 ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && 1602 (mcip == MAC_GROUP_ONLY_CLIENT(grp))); 1603 1604 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { 1605 ASSERT(cnt < MAX_RINGS_PER_GROUP); 1606 hwrh[cnt] = (mac_ring_handle_t)ring; 1607 } 1608 if (hwgh != NULL) 1609 *hwgh = (mac_group_handle_t)grp; 1610 1611 return (cnt); 1612 } 1613 1614 /* 1615 * Get the HW ring handles of the given group index. If the MAC 1616 * doesn't have a group at this index, or any groups at all, then 0 is 1617 * returned and hwgh is set to NULL. This is a private client API. The 1618 * MAC perimeter must be held when calling this function. 1619 * 1620 * mh: A handle to the MAC that owns the group. 1621 * 1622 * idx: The index of the HW group to be read. 1623 * 1624 * hwgh: If non-NULL, contains a handle to the HW group on return. 1625 * 1626 * hwrh: An array of ring handles pointing to the HW rings in the 1627 * group. The array must be large enough to hold a handle to each ring 1628 * in the group. To be safe, this array should be of size MAX_RINGS_PER_GROUP. 1629 * 1630 * rtype: Used to determine if we are fetching Rx or Tx rings. 1631 * 1632 * Returns the number of rings in the group. 1633 */ 1634 uint_t 1635 mac_hwrings_idx_get(mac_handle_t mh, uint_t idx, mac_group_handle_t *hwgh, 1636 mac_ring_handle_t *hwrh, mac_ring_type_t rtype) 1637 { 1638 mac_impl_t *mip = (mac_impl_t *)mh; 1639 mac_group_t *grp; 1640 mac_ring_t *ring; 1641 uint_t cnt = 0; 1642 1643 /* 1644 * The MAC perimeter must be held when accessing the 1645 * mi_{rx,tx}_groups fields. 1646 */ 1647 ASSERT(MAC_PERIM_HELD(mh)); 1648 ASSERT(rtype == MAC_RING_TYPE_RX || rtype == MAC_RING_TYPE_TX); 1649 1650 if (rtype == MAC_RING_TYPE_RX) { 1651 grp = mip->mi_rx_groups; 1652 } else { 1653 ASSERT(rtype == MAC_RING_TYPE_TX); 1654 grp = mip->mi_tx_groups; 1655 } 1656 1657 while (grp != NULL && grp->mrg_index != idx) 1658 grp = grp->mrg_next; 1659 1660 /* 1661 * If the MAC doesn't have a group at this index or doesn't 1662 * impelement RINGS capab, then set hwgh to NULL and return 0. 1663 */ 1664 if (hwgh != NULL) 1665 *hwgh = NULL; 1666 1667 if (grp == NULL) 1668 return (0); 1669 1670 ASSERT3U(idx, ==, grp->mrg_index); 1671 1672 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { 1673 ASSERT3U(cnt, <, MAX_RINGS_PER_GROUP); 1674 hwrh[cnt] = (mac_ring_handle_t)ring; 1675 } 1676 1677 /* A group should always have at least one ring. */ 1678 ASSERT3U(cnt, >, 0); 1679 1680 if (hwgh != NULL) 1681 *hwgh = (mac_group_handle_t)grp; 1682 1683 return (cnt); 1684 } 1685 1686 /* 1687 * This function is called to get info about Tx/Rx rings. 1688 * 1689 * Return value: returns uint_t which will have various bits set 1690 * that indicates different properties of the ring. 1691 */ 1692 uint_t 1693 mac_hwring_getinfo(mac_ring_handle_t rh) 1694 { 1695 mac_ring_t *ring = (mac_ring_t *)rh; 1696 mac_ring_info_t *info = &ring->mr_info; 1697 1698 return (info->mri_flags); 1699 } 1700 1701 /* 1702 * Set the passthru callback on the hardware ring. 1703 */ 1704 void 1705 mac_hwring_set_passthru(mac_ring_handle_t hwrh, mac_rx_t fn, void *arg1, 1706 mac_resource_handle_t arg2) 1707 { 1708 mac_ring_t *hwring = (mac_ring_t *)hwrh; 1709 1710 ASSERT3S(hwring->mr_type, ==, MAC_RING_TYPE_RX); 1711 1712 hwring->mr_classify_type = MAC_PASSTHRU_CLASSIFIER; 1713 1714 hwring->mr_pt_fn = fn; 1715 hwring->mr_pt_arg1 = arg1; 1716 hwring->mr_pt_arg2 = arg2; 1717 } 1718 1719 /* 1720 * Clear the passthru callback on the hardware ring. 1721 */ 1722 void 1723 mac_hwring_clear_passthru(mac_ring_handle_t hwrh) 1724 { 1725 mac_ring_t *hwring = (mac_ring_t *)hwrh; 1726 1727 ASSERT3S(hwring->mr_type, ==, MAC_RING_TYPE_RX); 1728 1729 hwring->mr_classify_type = MAC_NO_CLASSIFIER; 1730 1731 hwring->mr_pt_fn = NULL; 1732 hwring->mr_pt_arg1 = NULL; 1733 hwring->mr_pt_arg2 = NULL; 1734 } 1735 1736 void 1737 mac_client_set_flow_cb(mac_client_handle_t mch, mac_rx_t func, void *arg1) 1738 { 1739 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1740 flow_entry_t *flent = mcip->mci_flent; 1741 1742 mutex_enter(&flent->fe_lock); 1743 flent->fe_cb_fn = (flow_fn_t)func; 1744 flent->fe_cb_arg1 = arg1; 1745 flent->fe_cb_arg2 = NULL; 1746 flent->fe_flags &= ~FE_MC_NO_DATAPATH; 1747 mutex_exit(&flent->fe_lock); 1748 } 1749 1750 void 1751 mac_client_clear_flow_cb(mac_client_handle_t mch) 1752 { 1753 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1754 flow_entry_t *flent = mcip->mci_flent; 1755 1756 mutex_enter(&flent->fe_lock); 1757 flent->fe_cb_fn = (flow_fn_t)mac_rx_def; 1758 flent->fe_cb_arg1 = NULL; 1759 flent->fe_cb_arg2 = NULL; 1760 flent->fe_flags |= FE_MC_NO_DATAPATH; 1761 mutex_exit(&flent->fe_lock); 1762 } 1763 1764 /* 1765 * Export ddi interrupt handles from the HW ring to the pseudo ring and 1766 * setup the RX callback of the mac client which exclusively controls 1767 * HW ring. 1768 */ 1769 void 1770 mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, 1771 mac_ring_handle_t pseudo_rh) 1772 { 1773 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1774 mac_ring_t *pseudo_ring; 1775 mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 1776 1777 if (pseudo_rh != NULL) { 1778 pseudo_ring = (mac_ring_t *)pseudo_rh; 1779 /* Export the ddi handles to pseudo ring */ 1780 pseudo_ring->mr_info.mri_intr.mi_ddi_handle = 1781 hw_ring->mr_info.mri_intr.mi_ddi_handle; 1782 pseudo_ring->mr_info.mri_intr.mi_ddi_shared = 1783 hw_ring->mr_info.mri_intr.mi_ddi_shared; 1784 /* 1785 * Save a pointer to pseudo ring in the hw ring. If 1786 * interrupt handle changes, the hw ring will be 1787 * notified of the change (see mac_ring_intr_set()) 1788 * and the appropriate change has to be made to 1789 * the pseudo ring that has exported the ddi handle. 1790 */ 1791 hw_ring->mr_prh = pseudo_rh; 1792 } 1793 1794 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1795 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1796 mac_srs->srs_mrh = prh; 1797 mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; 1798 } 1799 } 1800 1801 void 1802 mac_hwring_teardown(mac_ring_handle_t hwrh) 1803 { 1804 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1805 mac_soft_ring_set_t *mac_srs; 1806 1807 if (hw_ring == NULL) 1808 return; 1809 hw_ring->mr_prh = NULL; 1810 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1811 mac_srs = hw_ring->mr_srs; 1812 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1813 mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; 1814 mac_srs->srs_mrh = NULL; 1815 } 1816 } 1817 1818 int 1819 mac_hwring_disable_intr(mac_ring_handle_t rh) 1820 { 1821 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1822 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1823 1824 return (intr->mi_disable(intr->mi_handle)); 1825 } 1826 1827 int 1828 mac_hwring_enable_intr(mac_ring_handle_t rh) 1829 { 1830 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1831 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1832 1833 return (intr->mi_enable(intr->mi_handle)); 1834 } 1835 1836 /* 1837 * Start the HW ring pointed to by rh. 1838 * 1839 * This is used by special MAC clients that are MAC themselves and 1840 * need to exert control over the underlying HW rings of the NIC. 1841 */ 1842 int 1843 mac_hwring_start(mac_ring_handle_t rh) 1844 { 1845 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1846 int rv = 0; 1847 1848 if (rr_ring->mr_state != MR_INUSE) 1849 rv = mac_start_ring(rr_ring); 1850 1851 return (rv); 1852 } 1853 1854 /* 1855 * Stop the HW ring pointed to by rh. Also see mac_hwring_start(). 1856 */ 1857 void 1858 mac_hwring_stop(mac_ring_handle_t rh) 1859 { 1860 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1861 1862 if (rr_ring->mr_state != MR_FREE) 1863 mac_stop_ring(rr_ring); 1864 } 1865 1866 /* 1867 * Remove the quiesced flag from the HW ring pointed to by rh. 1868 * 1869 * This is used by special MAC clients that are MAC themselves and 1870 * need to exert control over the underlying HW rings of the NIC. 1871 */ 1872 int 1873 mac_hwring_activate(mac_ring_handle_t rh) 1874 { 1875 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1876 1877 MAC_RING_UNMARK(rr_ring, MR_QUIESCE); 1878 return (0); 1879 } 1880 1881 /* 1882 * Quiesce the HW ring pointed to by rh. Also see mac_hwring_activate(). 1883 */ 1884 void 1885 mac_hwring_quiesce(mac_ring_handle_t rh) 1886 { 1887 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1888 1889 mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); 1890 } 1891 1892 mblk_t * 1893 mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) 1894 { 1895 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1896 mac_ring_info_t *info = &rr_ring->mr_info; 1897 1898 return (info->mri_poll(info->mri_driver, bytes_to_pickup)); 1899 } 1900 1901 /* 1902 * Send packets through a selected tx ring. 1903 */ 1904 mblk_t * 1905 mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) 1906 { 1907 mac_ring_t *ring = (mac_ring_t *)rh; 1908 mac_ring_info_t *info = &ring->mr_info; 1909 1910 ASSERT(ring->mr_type == MAC_RING_TYPE_TX && 1911 ring->mr_state >= MR_INUSE); 1912 return (info->mri_tx(info->mri_driver, mp)); 1913 } 1914 1915 /* 1916 * Query stats for a particular rx/tx ring 1917 */ 1918 int 1919 mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) 1920 { 1921 mac_ring_t *ring = (mac_ring_t *)rh; 1922 mac_ring_info_t *info = &ring->mr_info; 1923 1924 return (info->mri_stat(info->mri_driver, stat, val)); 1925 } 1926 1927 /* 1928 * Private function that is only used by aggr to send packets through 1929 * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports 1930 * that does not expose Tx rings, aggr_ring_tx() entry point needs 1931 * access to mac_impl_t to send packets through m_tx() entry point. 1932 * It accomplishes this by calling mac_hwring_send_priv() function. 1933 */ 1934 mblk_t * 1935 mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) 1936 { 1937 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1938 mac_impl_t *mip = mcip->mci_mip; 1939 1940 return (mac_provider_tx(mip, rh, mp, mcip)); 1941 } 1942 1943 /* 1944 * Private function that is only used by aggr to update the default transmission 1945 * ring. Because aggr exposes a pseudo Tx ring even for ports that may 1946 * temporarily be down, it may need to update the default ring that is used by 1947 * MAC such that it refers to a link that can actively be used to send traffic. 1948 * Note that this is different from the case where the port has been removed 1949 * from the group. In those cases, all of the rings will be torn down because 1950 * the ring will no longer exist. It's important to give aggr a case where the 1951 * rings can still exist such that it may be able to continue to send LACP PDUs 1952 * to potentially restore the link. 1953 */ 1954 void 1955 mac_hwring_set_default(mac_handle_t mh, mac_ring_handle_t rh) 1956 { 1957 mac_impl_t *mip = (mac_impl_t *)mh; 1958 mac_ring_t *ring = (mac_ring_t *)rh; 1959 1960 ASSERT(MAC_PERIM_HELD(mh)); 1961 VERIFY(mip->mi_state_flags & MIS_IS_AGGR); 1962 1963 /* 1964 * We used to condition this assignment on the ring's 1965 * 'mr_state' being one of 'MR_INUSE'. However, there are 1966 * cases where this is called before the ring has any active 1967 * clients, and therefore is not marked as in use. Since the 1968 * sole purpose of this function is for aggr to make sure 1969 * 'mi_default_tx_ring' matches 'lg_tx_ports[0]', its 1970 * imperative that we update its value regardless of ring 1971 * state. Otherwise, we can end up in a state where 1972 * 'mi_default_tx_ring' points to a pseudo ring of a downed 1973 * port, even when 'lg_tx_ports[0]' points to a port that is 1974 * up. 1975 */ 1976 mip->mi_default_tx_ring = rh; 1977 } 1978 1979 int 1980 mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) 1981 { 1982 mac_group_t *group = (mac_group_t *)gh; 1983 1984 return (mac_group_addmac(group, addr)); 1985 } 1986 1987 int 1988 mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) 1989 { 1990 mac_group_t *group = (mac_group_t *)gh; 1991 1992 return (mac_group_remmac(group, addr)); 1993 } 1994 1995 /* 1996 * Program the group's HW VLAN filter if it has such support. 1997 * Otherwise, the group will implicitly accept tagged traffic and 1998 * there is nothing to do. 1999 */ 2000 int 2001 mac_hwgroup_addvlan(mac_group_handle_t gh, uint16_t vid) 2002 { 2003 mac_group_t *group = (mac_group_t *)gh; 2004 2005 if (!MAC_GROUP_HW_VLAN(group)) 2006 return (0); 2007 2008 return (mac_group_addvlan(group, vid)); 2009 } 2010 2011 int 2012 mac_hwgroup_remvlan(mac_group_handle_t gh, uint16_t vid) 2013 { 2014 mac_group_t *group = (mac_group_t *)gh; 2015 2016 if (!MAC_GROUP_HW_VLAN(group)) 2017 return (0); 2018 2019 return (mac_group_remvlan(group, vid)); 2020 } 2021 2022 /* 2023 * Determine if a MAC has HW VLAN support. This is a private API 2024 * consumed by aggr. In the future it might be nice to have a bitfield 2025 * in mac_capab_rings_t to track which forms of HW filtering are 2026 * supported by the MAC. 2027 */ 2028 boolean_t 2029 mac_has_hw_vlan(mac_handle_t mh) 2030 { 2031 mac_impl_t *mip = (mac_impl_t *)mh; 2032 2033 return (MAC_GROUP_HW_VLAN(mip->mi_rx_groups)); 2034 } 2035 2036 /* 2037 * Get the number of Rx HW groups on this MAC. 2038 */ 2039 uint_t 2040 mac_get_num_rx_groups(mac_handle_t mh) 2041 { 2042 mac_impl_t *mip = (mac_impl_t *)mh; 2043 2044 ASSERT(MAC_PERIM_HELD(mh)); 2045 return (mip->mi_rx_group_count); 2046 } 2047 2048 int 2049 mac_set_promisc(mac_handle_t mh, boolean_t value) 2050 { 2051 mac_impl_t *mip = (mac_impl_t *)mh; 2052 2053 ASSERT(MAC_PERIM_HELD(mh)); 2054 return (i_mac_promisc_set(mip, value)); 2055 } 2056 2057 /* 2058 * Set the RX group to be shared/reserved. Note that the group must be 2059 * started/stopped outside of this function. 2060 */ 2061 void 2062 mac_set_group_state(mac_group_t *grp, mac_group_state_t state) 2063 { 2064 /* 2065 * If there is no change in the group state, just return. 2066 */ 2067 if (grp->mrg_state == state) 2068 return; 2069 2070 switch (state) { 2071 case MAC_GROUP_STATE_RESERVED: 2072 /* 2073 * Successfully reserved the group. 2074 * 2075 * Given that there is an exclusive client controlling this 2076 * group, we enable the group level polling when available, 2077 * so that SRSs get to turn on/off individual rings they's 2078 * assigned to. 2079 */ 2080 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 2081 2082 if (grp->mrg_type == MAC_RING_TYPE_RX && 2083 GROUP_INTR_DISABLE_FUNC(grp) != NULL) { 2084 GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 2085 } 2086 break; 2087 2088 case MAC_GROUP_STATE_SHARED: 2089 /* 2090 * Set all rings of this group to software classified. 2091 * If the group has an overriding interrupt, then re-enable it. 2092 */ 2093 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 2094 2095 if (grp->mrg_type == MAC_RING_TYPE_RX && 2096 GROUP_INTR_ENABLE_FUNC(grp) != NULL) { 2097 GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 2098 } 2099 /* The ring is not available for reservations any more */ 2100 break; 2101 2102 case MAC_GROUP_STATE_REGISTERED: 2103 /* Also callable from mac_register, perim is not held */ 2104 break; 2105 2106 default: 2107 ASSERT(B_FALSE); 2108 break; 2109 } 2110 2111 grp->mrg_state = state; 2112 } 2113 2114 /* 2115 * Quiesce future hardware classified packets for the specified Rx ring 2116 */ 2117 static void 2118 mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) 2119 { 2120 ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); 2121 ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); 2122 2123 mutex_enter(&rx_ring->mr_lock); 2124 rx_ring->mr_flag |= ring_flag; 2125 while (rx_ring->mr_refcnt != 0) 2126 cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); 2127 mutex_exit(&rx_ring->mr_lock); 2128 } 2129 2130 /* 2131 * Please see mac_tx for details about the per cpu locking scheme 2132 */ 2133 static void 2134 mac_tx_lock_all(mac_client_impl_t *mcip) 2135 { 2136 int i; 2137 2138 for (i = 0; i <= mac_tx_percpu_cnt; i++) 2139 mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 2140 } 2141 2142 static void 2143 mac_tx_unlock_all(mac_client_impl_t *mcip) 2144 { 2145 int i; 2146 2147 for (i = mac_tx_percpu_cnt; i >= 0; i--) 2148 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 2149 } 2150 2151 static void 2152 mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) 2153 { 2154 int i; 2155 2156 for (i = mac_tx_percpu_cnt; i > 0; i--) 2157 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 2158 } 2159 2160 static int 2161 mac_tx_sum_refcnt(mac_client_impl_t *mcip) 2162 { 2163 int i; 2164 int refcnt = 0; 2165 2166 for (i = 0; i <= mac_tx_percpu_cnt; i++) 2167 refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; 2168 2169 return (refcnt); 2170 } 2171 2172 /* 2173 * Stop future Tx packets coming down from the client in preparation for 2174 * quiescing the Tx side. This is needed for dynamic reclaim and reassignment 2175 * of rings between clients 2176 */ 2177 void 2178 mac_tx_client_block(mac_client_impl_t *mcip) 2179 { 2180 mac_tx_lock_all(mcip); 2181 mcip->mci_tx_flag |= MCI_TX_QUIESCE; 2182 while (mac_tx_sum_refcnt(mcip) != 0) { 2183 mac_tx_unlock_allbutzero(mcip); 2184 cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); 2185 mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); 2186 mac_tx_lock_all(mcip); 2187 } 2188 mac_tx_unlock_all(mcip); 2189 } 2190 2191 void 2192 mac_tx_client_unblock(mac_client_impl_t *mcip) 2193 { 2194 mac_tx_lock_all(mcip); 2195 mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; 2196 mac_tx_unlock_all(mcip); 2197 /* 2198 * We may fail to disable flow control for the last MAC_NOTE_TX 2199 * notification because the MAC client is quiesced. Send the 2200 * notification again. 2201 */ 2202 i_mac_notify(mcip->mci_mip, MAC_NOTE_TX); 2203 } 2204 2205 /* 2206 * Wait for an SRS to quiesce. The SRS worker will signal us when the 2207 * quiesce is done. 2208 */ 2209 static void 2210 mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) 2211 { 2212 mutex_enter(&srs->srs_lock); 2213 while (!(srs->srs_state & srs_flag)) 2214 cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); 2215 mutex_exit(&srs->srs_lock); 2216 } 2217 2218 /* 2219 * Quiescing an Rx SRS is achieved by the following sequence. The protocol 2220 * works bottom up by cutting off packet flow from the bottommost point in the 2221 * mac, then the SRS, and then the soft rings. There are 2 use cases of this 2222 * mechanism. One is a temporary quiesce of the SRS, such as say while changing 2223 * the Rx callbacks. Another use case is Rx SRS teardown. In the former case 2224 * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used 2225 * for the SRS and MR flags. In the former case the threads pause waiting for 2226 * a restart, while in the latter case the threads exit. The Tx SRS teardown 2227 * is also mostly similar to the above. 2228 * 2229 * 1. Stop future hardware classified packets at the lowest level in the mac. 2230 * Remove any hardware classification rule (CONDEMNED case) and mark the 2231 * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt 2232 * from increasing. Upcalls from the driver that come through hardware 2233 * classification will be dropped in mac_rx from now on. Then we wait for 2234 * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are 2235 * sure there aren't any upcall threads from the driver through hardware 2236 * classification. In the case of SRS teardown we also remove the 2237 * classification rule in the driver. 2238 * 2239 * 2. Stop future software classified packets by marking the flow entry with 2240 * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from 2241 * increasing. We also remove the flow entry from the table in the latter 2242 * case. Then wait for the fe_refcnt to reach an appropriate quiescent value 2243 * that indicates there aren't any active threads using that flow entry. 2244 * 2245 * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, 2246 * SRS worker thread, and the soft ring threads are quiesced in sequence 2247 * with the SRS worker thread serving as a master controller. This 2248 * mechansim is explained in mac_srs_worker_quiesce(). 2249 * 2250 * The restart mechanism to reactivate the SRS and softrings is explained 2251 * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the 2252 * restart sequence. 2253 */ 2254 void 2255 mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 2256 { 2257 flow_entry_t *flent = srs->srs_flent; 2258 uint_t mr_flag, srs_done_flag; 2259 2260 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 2261 ASSERT(!(srs->srs_type & SRST_TX)); 2262 2263 if (srs_quiesce_flag == SRS_CONDEMNED) { 2264 mr_flag = MR_CONDEMNED; 2265 srs_done_flag = SRS_CONDEMNED_DONE; 2266 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 2267 mac_srs_client_poll_disable(srs->srs_mcip, srs); 2268 } else { 2269 ASSERT(srs_quiesce_flag == SRS_QUIESCE); 2270 mr_flag = MR_QUIESCE; 2271 srs_done_flag = SRS_QUIESCE_DONE; 2272 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 2273 mac_srs_client_poll_quiesce(srs->srs_mcip, srs); 2274 } 2275 2276 if (srs->srs_ring != NULL) { 2277 mac_rx_ring_quiesce(srs->srs_ring, mr_flag); 2278 } else { 2279 /* 2280 * SRS is driven by software classification. In case 2281 * of CONDEMNED, the top level teardown functions will 2282 * deal with flow removal. 2283 */ 2284 if (srs_quiesce_flag != SRS_CONDEMNED) { 2285 FLOW_MARK(flent, FE_QUIESCE); 2286 mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 2287 } 2288 } 2289 2290 /* 2291 * Signal the SRS to quiesce itself, and then cv_wait for the 2292 * SRS quiesce to complete. The SRS worker thread will wake us 2293 * up when the quiesce is complete 2294 */ 2295 mac_srs_signal(srs, srs_quiesce_flag); 2296 mac_srs_quiesce_wait(srs, srs_done_flag); 2297 } 2298 2299 /* 2300 * Remove an SRS. 2301 */ 2302 void 2303 mac_rx_srs_remove(mac_soft_ring_set_t *srs) 2304 { 2305 flow_entry_t *flent = srs->srs_flent; 2306 int i; 2307 2308 mac_rx_srs_quiesce(srs, SRS_CONDEMNED); 2309 /* 2310 * Locate and remove our entry in the fe_rx_srs[] array, and 2311 * adjust the fe_rx_srs array entries and array count by 2312 * moving the last entry into the vacated spot. 2313 */ 2314 mutex_enter(&flent->fe_lock); 2315 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2316 if (flent->fe_rx_srs[i] == srs) 2317 break; 2318 } 2319 2320 ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); 2321 if (i != flent->fe_rx_srs_cnt - 1) { 2322 flent->fe_rx_srs[i] = 2323 flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; 2324 i = flent->fe_rx_srs_cnt - 1; 2325 } 2326 2327 flent->fe_rx_srs[i] = NULL; 2328 flent->fe_rx_srs_cnt--; 2329 mutex_exit(&flent->fe_lock); 2330 2331 mac_srs_free(srs); 2332 } 2333 2334 static void 2335 mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) 2336 { 2337 mutex_enter(&srs->srs_lock); 2338 srs->srs_state &= ~flag; 2339 mutex_exit(&srs->srs_lock); 2340 } 2341 2342 void 2343 mac_rx_srs_restart(mac_soft_ring_set_t *srs) 2344 { 2345 flow_entry_t *flent = srs->srs_flent; 2346 mac_ring_t *mr; 2347 2348 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 2349 ASSERT((srs->srs_type & SRST_TX) == 0); 2350 2351 /* 2352 * This handles a change in the number of SRSs between the quiesce and 2353 * and restart operation of a flow. 2354 */ 2355 if (!SRS_QUIESCED(srs)) 2356 return; 2357 2358 /* 2359 * Signal the SRS to restart itself. Wait for the restart to complete 2360 * Note that we only restart the SRS if it is not marked as 2361 * permanently quiesced. 2362 */ 2363 if (!SRS_QUIESCED_PERMANENT(srs)) { 2364 mac_srs_signal(srs, SRS_RESTART); 2365 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2366 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2367 2368 mac_srs_client_poll_restart(srs->srs_mcip, srs); 2369 } 2370 2371 /* Finally clear the flags to let the packets in */ 2372 mr = srs->srs_ring; 2373 if (mr != NULL) { 2374 MAC_RING_UNMARK(mr, MR_QUIESCE); 2375 /* In case the ring was stopped, safely restart it */ 2376 if (mr->mr_state != MR_INUSE) 2377 (void) mac_start_ring(mr); 2378 } else { 2379 FLOW_UNMARK(flent, FE_QUIESCE); 2380 } 2381 } 2382 2383 /* 2384 * Temporary quiesce of a flow and associated Rx SRS. 2385 * Please see block comment above mac_rx_classify_flow_rem. 2386 */ 2387 /* ARGSUSED */ 2388 int 2389 mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) 2390 { 2391 int i; 2392 2393 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2394 mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], 2395 SRS_QUIESCE); 2396 } 2397 return (0); 2398 } 2399 2400 /* 2401 * Restart a flow and associated Rx SRS that has been quiesced temporarily 2402 * Please see block comment above mac_rx_classify_flow_rem 2403 */ 2404 /* ARGSUSED */ 2405 int 2406 mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) 2407 { 2408 int i; 2409 2410 for (i = 0; i < flent->fe_rx_srs_cnt; i++) 2411 mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); 2412 2413 return (0); 2414 } 2415 2416 void 2417 mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) 2418 { 2419 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2420 flow_entry_t *flent = mcip->mci_flent; 2421 mac_impl_t *mip = mcip->mci_mip; 2422 mac_soft_ring_set_t *mac_srs; 2423 int i; 2424 2425 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2426 2427 if (flent == NULL) 2428 return; 2429 2430 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2431 mac_srs = flent->fe_rx_srs[i]; 2432 mutex_enter(&mac_srs->srs_lock); 2433 if (on) 2434 mac_srs->srs_state |= SRS_QUIESCE_PERM; 2435 else 2436 mac_srs->srs_state &= ~SRS_QUIESCE_PERM; 2437 mutex_exit(&mac_srs->srs_lock); 2438 } 2439 } 2440 2441 void 2442 mac_rx_client_quiesce(mac_client_handle_t mch) 2443 { 2444 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2445 mac_impl_t *mip = mcip->mci_mip; 2446 2447 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2448 2449 if (MCIP_DATAPATH_SETUP(mcip)) { 2450 (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, 2451 NULL); 2452 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2453 mac_rx_classify_flow_quiesce, NULL); 2454 } 2455 } 2456 2457 void 2458 mac_rx_client_restart(mac_client_handle_t mch) 2459 { 2460 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2461 mac_impl_t *mip = mcip->mci_mip; 2462 2463 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2464 2465 if (MCIP_DATAPATH_SETUP(mcip)) { 2466 (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); 2467 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2468 mac_rx_classify_flow_restart, NULL); 2469 } 2470 } 2471 2472 /* 2473 * This function only quiesces the Tx SRS and softring worker threads. Callers 2474 * need to make sure that there aren't any mac client threads doing current or 2475 * future transmits in the mac before calling this function. 2476 */ 2477 void 2478 mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 2479 { 2480 mac_client_impl_t *mcip = srs->srs_mcip; 2481 2482 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2483 2484 ASSERT(srs->srs_type & SRST_TX); 2485 ASSERT(srs_quiesce_flag == SRS_CONDEMNED || 2486 srs_quiesce_flag == SRS_QUIESCE); 2487 2488 /* 2489 * Signal the SRS to quiesce itself, and then cv_wait for the 2490 * SRS quiesce to complete. The SRS worker thread will wake us 2491 * up when the quiesce is complete 2492 */ 2493 mac_srs_signal(srs, srs_quiesce_flag); 2494 mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? 2495 SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); 2496 } 2497 2498 void 2499 mac_tx_srs_restart(mac_soft_ring_set_t *srs) 2500 { 2501 /* 2502 * Resizing the fanout could result in creation of new SRSs. 2503 * They may not necessarily be in the quiesced state in which 2504 * case it need be restarted 2505 */ 2506 if (!SRS_QUIESCED(srs)) 2507 return; 2508 2509 mac_srs_signal(srs, SRS_RESTART); 2510 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2511 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2512 } 2513 2514 /* 2515 * Temporary quiesce of a flow and associated Rx SRS. 2516 * Please see block comment above mac_rx_srs_quiesce 2517 */ 2518 /* ARGSUSED */ 2519 int 2520 mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) 2521 { 2522 /* 2523 * The fe_tx_srs is null for a subflow on an interface that is 2524 * not plumbed 2525 */ 2526 if (flent->fe_tx_srs != NULL) 2527 mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); 2528 return (0); 2529 } 2530 2531 /* ARGSUSED */ 2532 int 2533 mac_tx_flow_restart(flow_entry_t *flent, void *arg) 2534 { 2535 /* 2536 * The fe_tx_srs is null for a subflow on an interface that is 2537 * not plumbed 2538 */ 2539 if (flent->fe_tx_srs != NULL) 2540 mac_tx_srs_restart(flent->fe_tx_srs); 2541 return (0); 2542 } 2543 2544 static void 2545 i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) 2546 { 2547 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2548 2549 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2550 2551 mac_tx_client_block(mcip); 2552 if (MCIP_TX_SRS(mcip) != NULL) { 2553 mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); 2554 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2555 mac_tx_flow_quiesce, NULL); 2556 } 2557 } 2558 2559 void 2560 mac_tx_client_quiesce(mac_client_handle_t mch) 2561 { 2562 i_mac_tx_client_quiesce(mch, SRS_QUIESCE); 2563 } 2564 2565 void 2566 mac_tx_client_condemn(mac_client_handle_t mch) 2567 { 2568 i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); 2569 } 2570 2571 void 2572 mac_tx_client_restart(mac_client_handle_t mch) 2573 { 2574 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2575 2576 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2577 2578 mac_tx_client_unblock(mcip); 2579 if (MCIP_TX_SRS(mcip) != NULL) { 2580 mac_tx_srs_restart(MCIP_TX_SRS(mcip)); 2581 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2582 mac_tx_flow_restart, NULL); 2583 } 2584 } 2585 2586 void 2587 mac_tx_client_flush(mac_client_impl_t *mcip) 2588 { 2589 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2590 2591 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2592 mac_tx_client_restart((mac_client_handle_t)mcip); 2593 } 2594 2595 void 2596 mac_client_quiesce(mac_client_impl_t *mcip) 2597 { 2598 mac_rx_client_quiesce((mac_client_handle_t)mcip); 2599 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2600 } 2601 2602 void 2603 mac_client_restart(mac_client_impl_t *mcip) 2604 { 2605 mac_rx_client_restart((mac_client_handle_t)mcip); 2606 mac_tx_client_restart((mac_client_handle_t)mcip); 2607 } 2608 2609 /* 2610 * Allocate a minor number. 2611 */ 2612 minor_t 2613 mac_minor_hold(boolean_t sleep) 2614 { 2615 id_t id; 2616 2617 /* 2618 * Grab a value from the arena. 2619 */ 2620 atomic_inc_32(&minor_count); 2621 2622 if (sleep) 2623 return ((uint_t)id_alloc(minor_ids)); 2624 2625 if ((id = id_alloc_nosleep(minor_ids)) == -1) { 2626 atomic_dec_32(&minor_count); 2627 return (0); 2628 } 2629 2630 return ((uint_t)id); 2631 } 2632 2633 /* 2634 * Release a previously allocated minor number. 2635 */ 2636 void 2637 mac_minor_rele(minor_t minor) 2638 { 2639 /* 2640 * Return the value to the arena. 2641 */ 2642 id_free(minor_ids, minor); 2643 atomic_dec_32(&minor_count); 2644 } 2645 2646 uint32_t 2647 mac_no_notification(mac_handle_t mh) 2648 { 2649 mac_impl_t *mip = (mac_impl_t *)mh; 2650 2651 return (((mip->mi_state_flags & MIS_LEGACY) != 0) ? 2652 mip->mi_capab_legacy.ml_unsup_note : 0); 2653 } 2654 2655 /* 2656 * Prevent any new opens of this mac in preparation for unregister 2657 */ 2658 int 2659 i_mac_disable(mac_impl_t *mip) 2660 { 2661 mac_client_impl_t *mcip; 2662 2663 rw_enter(&i_mac_impl_lock, RW_WRITER); 2664 if (mip->mi_state_flags & MIS_DISABLED) { 2665 /* Already disabled, return success */ 2666 rw_exit(&i_mac_impl_lock); 2667 return (0); 2668 } 2669 /* 2670 * See if there are any other references to this mac_t (e.g., VLAN's). 2671 * If so return failure. If all the other checks below pass, then 2672 * set mi_disabled atomically under the i_mac_impl_lock to prevent 2673 * any new VLAN's from being created or new mac client opens of this 2674 * mac end point. 2675 */ 2676 if (mip->mi_ref > 0) { 2677 rw_exit(&i_mac_impl_lock); 2678 return (EBUSY); 2679 } 2680 2681 /* 2682 * mac clients must delete all multicast groups they join before 2683 * closing. bcast groups are reference counted, the last client 2684 * to delete the group will wait till the group is physically 2685 * deleted. Since all clients have closed this mac end point 2686 * mi_bcast_ngrps must be zero at this point 2687 */ 2688 ASSERT(mip->mi_bcast_ngrps == 0); 2689 2690 /* 2691 * Don't let go of this if it has some flows. 2692 * All other code guarantees no flows are added to a disabled 2693 * mac, therefore it is sufficient to check for the flow table 2694 * only here. 2695 */ 2696 mcip = mac_primary_client_handle(mip); 2697 if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { 2698 rw_exit(&i_mac_impl_lock); 2699 return (ENOTEMPTY); 2700 } 2701 2702 mip->mi_state_flags |= MIS_DISABLED; 2703 rw_exit(&i_mac_impl_lock); 2704 return (0); 2705 } 2706 2707 int 2708 mac_disable_nowait(mac_handle_t mh) 2709 { 2710 mac_impl_t *mip = (mac_impl_t *)mh; 2711 int err; 2712 2713 if ((err = i_mac_perim_enter_nowait(mip)) != 0) 2714 return (err); 2715 err = i_mac_disable(mip); 2716 i_mac_perim_exit(mip); 2717 return (err); 2718 } 2719 2720 int 2721 mac_disable(mac_handle_t mh) 2722 { 2723 mac_impl_t *mip = (mac_impl_t *)mh; 2724 int err; 2725 2726 i_mac_perim_enter(mip); 2727 err = i_mac_disable(mip); 2728 i_mac_perim_exit(mip); 2729 2730 /* 2731 * Clean up notification thread and wait for it to exit. 2732 */ 2733 if (err == 0) 2734 i_mac_notify_exit(mip); 2735 2736 return (err); 2737 } 2738 2739 /* 2740 * Called when the MAC instance has a non empty flow table, to de-multiplex 2741 * incoming packets to the right flow. 2742 */ 2743 /* ARGSUSED */ 2744 static mblk_t * 2745 mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) 2746 { 2747 flow_entry_t *flent = NULL; 2748 uint_t flags = FLOW_INBOUND; 2749 int err; 2750 2751 err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); 2752 if (err != 0) { 2753 /* no registered receive function */ 2754 return (mp); 2755 } else { 2756 mac_client_impl_t *mcip; 2757 2758 /* 2759 * This flent might just be an additional one on the MAC client, 2760 * i.e. for classification purposes (different fdesc), however 2761 * the resources, SRS et. al., are in the mci_flent, so if 2762 * this isn't the mci_flent, we need to get it. 2763 */ 2764 if ((mcip = flent->fe_mcip) != NULL && 2765 mcip->mci_flent != flent) { 2766 FLOW_REFRELE(flent); 2767 flent = mcip->mci_flent; 2768 FLOW_TRY_REFHOLD(flent, err); 2769 if (err != 0) 2770 return (mp); 2771 } 2772 (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, 2773 B_FALSE); 2774 FLOW_REFRELE(flent); 2775 } 2776 return (NULL); 2777 } 2778 2779 mblk_t * 2780 mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 2781 { 2782 mac_impl_t *mip = (mac_impl_t *)mh; 2783 mblk_t *bp, *bp1, **bpp, *list = NULL; 2784 2785 /* 2786 * We walk the chain and attempt to classify each packet. 2787 * The packets that couldn't be classified will be returned 2788 * back to the caller. 2789 */ 2790 bp = mp_chain; 2791 bpp = &list; 2792 while (bp != NULL) { 2793 bp1 = bp; 2794 bp = bp->b_next; 2795 bp1->b_next = NULL; 2796 2797 if (mac_rx_classify(mip, mrh, bp1) != NULL) { 2798 *bpp = bp1; 2799 bpp = &bp1->b_next; 2800 } 2801 } 2802 return (list); 2803 } 2804 2805 static int 2806 mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) 2807 { 2808 mac_ring_handle_t ring = arg; 2809 2810 if (flent->fe_tx_srs) 2811 mac_tx_srs_wakeup(flent->fe_tx_srs, ring); 2812 return (0); 2813 } 2814 2815 void 2816 i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) 2817 { 2818 mac_client_impl_t *cclient; 2819 mac_soft_ring_set_t *mac_srs; 2820 2821 /* 2822 * After grabbing the mi_rw_lock, the list of clients can't change. 2823 * If there are any clients mi_disabled must be B_FALSE and can't 2824 * get set since there are clients. If there aren't any clients we 2825 * don't do anything. In any case the mip has to be valid. The driver 2826 * must make sure that it goes single threaded (with respect to mac 2827 * calls) and wait for all pending mac calls to finish before calling 2828 * mac_unregister. 2829 */ 2830 rw_enter(&i_mac_impl_lock, RW_READER); 2831 if (mip->mi_state_flags & MIS_DISABLED) { 2832 rw_exit(&i_mac_impl_lock); 2833 return; 2834 } 2835 2836 /* 2837 * Get MAC tx srs from walking mac_client_handle list. 2838 */ 2839 rw_enter(&mip->mi_rw_lock, RW_READER); 2840 for (cclient = mip->mi_clients_list; cclient != NULL; 2841 cclient = cclient->mci_client_next) { 2842 if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { 2843 mac_tx_srs_wakeup(mac_srs, ring); 2844 } else { 2845 /* 2846 * Aggr opens underlying ports in exclusive mode 2847 * and registers flow control callbacks using 2848 * mac_tx_client_notify(). When opened in 2849 * exclusive mode, Tx SRS won't be created 2850 * during mac_unicast_add(). 2851 */ 2852 if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { 2853 mac_tx_invoke_callbacks(cclient, 2854 (mac_tx_cookie_t)ring); 2855 } 2856 } 2857 (void) mac_flow_walk(cclient->mci_subflow_tab, 2858 mac_tx_flow_srs_wakeup, ring); 2859 } 2860 rw_exit(&mip->mi_rw_lock); 2861 rw_exit(&i_mac_impl_lock); 2862 } 2863 2864 /* ARGSUSED */ 2865 void 2866 mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, 2867 boolean_t add) 2868 { 2869 mac_impl_t *mip = (mac_impl_t *)mh; 2870 2871 i_mac_perim_enter((mac_impl_t *)mh); 2872 /* 2873 * If no specific refresh function was given then default to the 2874 * driver's m_multicst entry point. 2875 */ 2876 if (refresh == NULL) { 2877 refresh = mip->mi_multicst; 2878 arg = mip->mi_driver; 2879 } 2880 2881 mac_bcast_refresh(mip, refresh, arg, add); 2882 i_mac_perim_exit((mac_impl_t *)mh); 2883 } 2884 2885 void 2886 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) 2887 { 2888 mac_impl_t *mip = (mac_impl_t *)mh; 2889 2890 /* 2891 * If no specific refresh function was given then default to the 2892 * driver's m_promisc entry point. 2893 */ 2894 if (refresh == NULL) { 2895 refresh = mip->mi_setpromisc; 2896 arg = mip->mi_driver; 2897 } 2898 ASSERT(refresh != NULL); 2899 2900 /* 2901 * Call the refresh function with the current promiscuity. 2902 */ 2903 refresh(arg, (mip->mi_devpromisc != 0)); 2904 } 2905 2906 /* 2907 * The mac client requests that the mac not to change its margin size to 2908 * be less than the specified value. If "current" is B_TRUE, then the client 2909 * requests the mac not to change its margin size to be smaller than the 2910 * current size. Further, return the current margin size value in this case. 2911 * 2912 * We keep every requested size in an ordered list from largest to smallest. 2913 */ 2914 int 2915 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) 2916 { 2917 mac_impl_t *mip = (mac_impl_t *)mh; 2918 mac_margin_req_t **pp, *p; 2919 int err = 0; 2920 2921 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2922 if (current) 2923 *marginp = mip->mi_margin; 2924 2925 /* 2926 * If the current margin value cannot satisfy the margin requested, 2927 * return ENOTSUP directly. 2928 */ 2929 if (*marginp > mip->mi_margin) { 2930 err = ENOTSUP; 2931 goto done; 2932 } 2933 2934 /* 2935 * Check whether the given margin is already in the list. If so, 2936 * bump the reference count. 2937 */ 2938 for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { 2939 if (p->mmr_margin == *marginp) { 2940 /* 2941 * The margin requested is already in the list, 2942 * so just bump the reference count. 2943 */ 2944 p->mmr_ref++; 2945 goto done; 2946 } 2947 if (p->mmr_margin < *marginp) 2948 break; 2949 } 2950 2951 2952 p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); 2953 p->mmr_margin = *marginp; 2954 p->mmr_ref++; 2955 p->mmr_nextp = *pp; 2956 *pp = p; 2957 2958 done: 2959 rw_exit(&(mip->mi_rw_lock)); 2960 return (err); 2961 } 2962 2963 /* 2964 * The mac client requests to cancel its previous mac_margin_add() request. 2965 * We remove the requested margin size from the list. 2966 */ 2967 int 2968 mac_margin_remove(mac_handle_t mh, uint32_t margin) 2969 { 2970 mac_impl_t *mip = (mac_impl_t *)mh; 2971 mac_margin_req_t **pp, *p; 2972 int err = 0; 2973 2974 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2975 /* 2976 * Find the entry in the list for the given margin. 2977 */ 2978 for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { 2979 if (p->mmr_margin == margin) { 2980 if (--p->mmr_ref == 0) 2981 break; 2982 2983 /* 2984 * There is still a reference to this address so 2985 * there's nothing more to do. 2986 */ 2987 goto done; 2988 } 2989 } 2990 2991 /* 2992 * We did not find an entry for the given margin. 2993 */ 2994 if (p == NULL) { 2995 err = ENOENT; 2996 goto done; 2997 } 2998 2999 ASSERT(p->mmr_ref == 0); 3000 3001 /* 3002 * Remove it from the list. 3003 */ 3004 *pp = p->mmr_nextp; 3005 kmem_free(p, sizeof (mac_margin_req_t)); 3006 done: 3007 rw_exit(&(mip->mi_rw_lock)); 3008 return (err); 3009 } 3010 3011 boolean_t 3012 mac_margin_update(mac_handle_t mh, uint32_t margin) 3013 { 3014 mac_impl_t *mip = (mac_impl_t *)mh; 3015 uint32_t margin_needed = 0; 3016 3017 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 3018 3019 if (mip->mi_mmrp != NULL) 3020 margin_needed = mip->mi_mmrp->mmr_margin; 3021 3022 if (margin_needed <= margin) 3023 mip->mi_margin = margin; 3024 3025 rw_exit(&(mip->mi_rw_lock)); 3026 3027 if (margin_needed <= margin) 3028 i_mac_notify(mip, MAC_NOTE_MARGIN); 3029 3030 return (margin_needed <= margin); 3031 } 3032 3033 /* 3034 * MAC clients use this interface to request that a MAC device not change its 3035 * MTU below the specified amount. At this time, that amount must be within the 3036 * range of the device's current minimum and the device's current maximum. eg. a 3037 * client cannot request a 3000 byte MTU when the device's MTU is currently 3038 * 2000. 3039 * 3040 * If "current" is set to B_TRUE, then the request is to simply to reserve the 3041 * current underlying mac's maximum for this mac client and return it in mtup. 3042 */ 3043 int 3044 mac_mtu_add(mac_handle_t mh, uint32_t *mtup, boolean_t current) 3045 { 3046 mac_impl_t *mip = (mac_impl_t *)mh; 3047 mac_mtu_req_t *prev, *cur; 3048 mac_propval_range_t mpr; 3049 int err; 3050 3051 i_mac_perim_enter(mip); 3052 rw_enter(&mip->mi_rw_lock, RW_WRITER); 3053 3054 if (current == B_TRUE) 3055 *mtup = mip->mi_sdu_max; 3056 mpr.mpr_count = 1; 3057 err = mac_prop_info(mh, MAC_PROP_MTU, "mtu", NULL, 0, &mpr, NULL); 3058 if (err != 0) { 3059 rw_exit(&mip->mi_rw_lock); 3060 i_mac_perim_exit(mip); 3061 return (err); 3062 } 3063 3064 if (*mtup > mip->mi_sdu_max || 3065 *mtup < mpr.mpr_range_uint32[0].mpur_min) { 3066 rw_exit(&mip->mi_rw_lock); 3067 i_mac_perim_exit(mip); 3068 return (ENOTSUP); 3069 } 3070 3071 prev = NULL; 3072 for (cur = mip->mi_mtrp; cur != NULL; cur = cur->mtr_nextp) { 3073 if (*mtup == cur->mtr_mtu) { 3074 cur->mtr_ref++; 3075 rw_exit(&mip->mi_rw_lock); 3076 i_mac_perim_exit(mip); 3077 return (0); 3078 } 3079 3080 if (*mtup > cur->mtr_mtu) 3081 break; 3082 3083 prev = cur; 3084 } 3085 3086 cur = kmem_alloc(sizeof (mac_mtu_req_t), KM_SLEEP); 3087 cur->mtr_mtu = *mtup; 3088 cur->mtr_ref = 1; 3089 if (prev != NULL) { 3090 cur->mtr_nextp = prev->mtr_nextp; 3091 prev->mtr_nextp = cur; 3092 } else { 3093 cur->mtr_nextp = mip->mi_mtrp; 3094 mip->mi_mtrp = cur; 3095 } 3096 3097 rw_exit(&mip->mi_rw_lock); 3098 i_mac_perim_exit(mip); 3099 return (0); 3100 } 3101 3102 int 3103 mac_mtu_remove(mac_handle_t mh, uint32_t mtu) 3104 { 3105 mac_impl_t *mip = (mac_impl_t *)mh; 3106 mac_mtu_req_t *cur, *prev; 3107 3108 i_mac_perim_enter(mip); 3109 rw_enter(&mip->mi_rw_lock, RW_WRITER); 3110 3111 prev = NULL; 3112 for (cur = mip->mi_mtrp; cur != NULL; cur = cur->mtr_nextp) { 3113 if (cur->mtr_mtu == mtu) { 3114 ASSERT(cur->mtr_ref > 0); 3115 cur->mtr_ref--; 3116 if (cur->mtr_ref == 0) { 3117 if (prev == NULL) { 3118 mip->mi_mtrp = cur->mtr_nextp; 3119 } else { 3120 prev->mtr_nextp = cur->mtr_nextp; 3121 } 3122 kmem_free(cur, sizeof (mac_mtu_req_t)); 3123 } 3124 rw_exit(&mip->mi_rw_lock); 3125 i_mac_perim_exit(mip); 3126 return (0); 3127 } 3128 3129 prev = cur; 3130 } 3131 3132 rw_exit(&mip->mi_rw_lock); 3133 i_mac_perim_exit(mip); 3134 return (ENOENT); 3135 } 3136 3137 /* 3138 * MAC Type Plugin functions. 3139 */ 3140 3141 mactype_t * 3142 mactype_getplugin(const char *pname) 3143 { 3144 mactype_t *mtype = NULL; 3145 boolean_t tried_modload = B_FALSE; 3146 3147 mutex_enter(&i_mactype_lock); 3148 3149 find_registered_mactype: 3150 if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, 3151 (mod_hash_val_t *)&mtype) != 0) { 3152 if (!tried_modload) { 3153 /* 3154 * If the plugin has not yet been loaded, then 3155 * attempt to load it now. If modload() succeeds, 3156 * the plugin should have registered using 3157 * mactype_register(), in which case we can go back 3158 * and attempt to find it again. 3159 */ 3160 if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { 3161 tried_modload = B_TRUE; 3162 goto find_registered_mactype; 3163 } 3164 } 3165 } else { 3166 /* 3167 * Note that there's no danger that the plugin we've loaded 3168 * could be unloaded between the modload() step and the 3169 * reference count bump here, as we're holding 3170 * i_mactype_lock, which mactype_unregister() also holds. 3171 */ 3172 atomic_inc_32(&mtype->mt_ref); 3173 } 3174 3175 mutex_exit(&i_mactype_lock); 3176 return (mtype); 3177 } 3178 3179 mactype_register_t * 3180 mactype_alloc(uint_t mactype_version) 3181 { 3182 mactype_register_t *mtrp; 3183 3184 /* 3185 * Make sure there isn't a version mismatch between the plugin and 3186 * the framework. In the future, if multiple versions are 3187 * supported, this check could become more sophisticated. 3188 */ 3189 if (mactype_version != MACTYPE_VERSION) 3190 return (NULL); 3191 3192 mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); 3193 mtrp->mtr_version = mactype_version; 3194 return (mtrp); 3195 } 3196 3197 void 3198 mactype_free(mactype_register_t *mtrp) 3199 { 3200 kmem_free(mtrp, sizeof (mactype_register_t)); 3201 } 3202 3203 int 3204 mactype_register(mactype_register_t *mtrp) 3205 { 3206 mactype_t *mtp; 3207 mactype_ops_t *ops = mtrp->mtr_ops; 3208 3209 /* Do some sanity checking before we register this MAC type. */ 3210 if (mtrp->mtr_ident == NULL || ops == NULL) 3211 return (EINVAL); 3212 3213 /* 3214 * Verify that all mandatory callbacks are set in the ops 3215 * vector. 3216 */ 3217 if (ops->mtops_unicst_verify == NULL || 3218 ops->mtops_multicst_verify == NULL || 3219 ops->mtops_sap_verify == NULL || 3220 ops->mtops_header == NULL || 3221 ops->mtops_header_info == NULL) { 3222 return (EINVAL); 3223 } 3224 3225 mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); 3226 mtp->mt_ident = mtrp->mtr_ident; 3227 mtp->mt_ops = *ops; 3228 mtp->mt_type = mtrp->mtr_mactype; 3229 mtp->mt_nativetype = mtrp->mtr_nativetype; 3230 mtp->mt_addr_length = mtrp->mtr_addrlen; 3231 if (mtrp->mtr_brdcst_addr != NULL) { 3232 mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); 3233 bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, 3234 mtrp->mtr_addrlen); 3235 } 3236 3237 mtp->mt_stats = mtrp->mtr_stats; 3238 mtp->mt_statcount = mtrp->mtr_statcount; 3239 3240 mtp->mt_mapping = mtrp->mtr_mapping; 3241 mtp->mt_mappingcount = mtrp->mtr_mappingcount; 3242 3243 if (mod_hash_insert(i_mactype_hash, 3244 (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { 3245 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 3246 kmem_free(mtp, sizeof (*mtp)); 3247 return (EEXIST); 3248 } 3249 return (0); 3250 } 3251 3252 int 3253 mactype_unregister(const char *ident) 3254 { 3255 mactype_t *mtp; 3256 mod_hash_val_t val; 3257 int err; 3258 3259 /* 3260 * Let's not allow MAC drivers to use this plugin while we're 3261 * trying to unregister it. Holding i_mactype_lock also prevents a 3262 * plugin from unregistering while a MAC driver is attempting to 3263 * hold a reference to it in i_mactype_getplugin(). 3264 */ 3265 mutex_enter(&i_mactype_lock); 3266 3267 if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, 3268 (mod_hash_val_t *)&mtp)) != 0) { 3269 /* A plugin is trying to unregister, but it never registered. */ 3270 err = ENXIO; 3271 goto done; 3272 } 3273 3274 if (mtp->mt_ref != 0) { 3275 err = EBUSY; 3276 goto done; 3277 } 3278 3279 err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); 3280 ASSERT(err == 0); 3281 if (err != 0) { 3282 /* This should never happen, thus the ASSERT() above. */ 3283 err = EINVAL; 3284 goto done; 3285 } 3286 ASSERT(mtp == (mactype_t *)val); 3287 3288 if (mtp->mt_brdcst_addr != NULL) 3289 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 3290 kmem_free(mtp, sizeof (mactype_t)); 3291 done: 3292 mutex_exit(&i_mactype_lock); 3293 return (err); 3294 } 3295 3296 /* 3297 * Checks the size of the value size specified for a property as 3298 * part of a property operation. Returns B_TRUE if the size is 3299 * correct, B_FALSE otherwise. 3300 */ 3301 boolean_t 3302 mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range) 3303 { 3304 uint_t minsize = 0; 3305 3306 if (is_range) 3307 return (valsize >= sizeof (mac_propval_range_t)); 3308 3309 switch (id) { 3310 case MAC_PROP_ZONE: 3311 minsize = sizeof (dld_ioc_zid_t); 3312 break; 3313 case MAC_PROP_AUTOPUSH: 3314 if (valsize != 0) 3315 minsize = sizeof (struct dlautopush); 3316 break; 3317 case MAC_PROP_TAGMODE: 3318 minsize = sizeof (link_tagmode_t); 3319 break; 3320 case MAC_PROP_RESOURCE: 3321 case MAC_PROP_RESOURCE_EFF: 3322 minsize = sizeof (mac_resource_props_t); 3323 break; 3324 case MAC_PROP_DUPLEX: 3325 minsize = sizeof (link_duplex_t); 3326 break; 3327 case MAC_PROP_SPEED: 3328 minsize = sizeof (uint64_t); 3329 break; 3330 case MAC_PROP_STATUS: 3331 minsize = sizeof (link_state_t); 3332 break; 3333 case MAC_PROP_AUTONEG: 3334 case MAC_PROP_EN_AUTONEG: 3335 minsize = sizeof (uint8_t); 3336 break; 3337 case MAC_PROP_MTU: 3338 case MAC_PROP_LLIMIT: 3339 case MAC_PROP_LDECAY: 3340 minsize = sizeof (uint32_t); 3341 break; 3342 case MAC_PROP_FLOWCTRL: 3343 minsize = sizeof (link_flowctrl_t); 3344 break; 3345 case MAC_PROP_ADV_FEC_CAP: 3346 case MAC_PROP_EN_FEC_CAP: 3347 minsize = sizeof (link_fec_t); 3348 break; 3349 case MAC_PROP_ADV_5000FDX_CAP: 3350 case MAC_PROP_EN_5000FDX_CAP: 3351 case MAC_PROP_ADV_2500FDX_CAP: 3352 case MAC_PROP_EN_2500FDX_CAP: 3353 case MAC_PROP_ADV_100GFDX_CAP: 3354 case MAC_PROP_EN_100GFDX_CAP: 3355 case MAC_PROP_ADV_50GFDX_CAP: 3356 case MAC_PROP_EN_50GFDX_CAP: 3357 case MAC_PROP_ADV_40GFDX_CAP: 3358 case MAC_PROP_EN_40GFDX_CAP: 3359 case MAC_PROP_ADV_25GFDX_CAP: 3360 case MAC_PROP_EN_25GFDX_CAP: 3361 case MAC_PROP_ADV_10GFDX_CAP: 3362 case MAC_PROP_EN_10GFDX_CAP: 3363 case MAC_PROP_ADV_1000HDX_CAP: 3364 case MAC_PROP_EN_1000HDX_CAP: 3365 case MAC_PROP_ADV_100FDX_CAP: 3366 case MAC_PROP_EN_100FDX_CAP: 3367 case MAC_PROP_ADV_100HDX_CAP: 3368 case MAC_PROP_EN_100HDX_CAP: 3369 case MAC_PROP_ADV_10FDX_CAP: 3370 case MAC_PROP_EN_10FDX_CAP: 3371 case MAC_PROP_ADV_10HDX_CAP: 3372 case MAC_PROP_EN_10HDX_CAP: 3373 case MAC_PROP_ADV_100T4_CAP: 3374 case MAC_PROP_EN_100T4_CAP: 3375 minsize = sizeof (uint8_t); 3376 break; 3377 case MAC_PROP_PVID: 3378 minsize = sizeof (uint16_t); 3379 break; 3380 case MAC_PROP_IPTUN_HOPLIMIT: 3381 minsize = sizeof (uint32_t); 3382 break; 3383 case MAC_PROP_IPTUN_ENCAPLIMIT: 3384 minsize = sizeof (uint32_t); 3385 break; 3386 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3387 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3388 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3389 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3390 minsize = sizeof (uint_t); 3391 break; 3392 case MAC_PROP_WL_ESSID: 3393 minsize = sizeof (wl_linkstatus_t); 3394 break; 3395 case MAC_PROP_WL_BSSID: 3396 minsize = sizeof (wl_bssid_t); 3397 break; 3398 case MAC_PROP_WL_BSSTYPE: 3399 minsize = sizeof (wl_bss_type_t); 3400 break; 3401 case MAC_PROP_WL_LINKSTATUS: 3402 minsize = sizeof (wl_linkstatus_t); 3403 break; 3404 case MAC_PROP_WL_DESIRED_RATES: 3405 minsize = sizeof (wl_rates_t); 3406 break; 3407 case MAC_PROP_WL_SUPPORTED_RATES: 3408 minsize = sizeof (wl_rates_t); 3409 break; 3410 case MAC_PROP_WL_AUTH_MODE: 3411 minsize = sizeof (wl_authmode_t); 3412 break; 3413 case MAC_PROP_WL_ENCRYPTION: 3414 minsize = sizeof (wl_encryption_t); 3415 break; 3416 case MAC_PROP_WL_RSSI: 3417 minsize = sizeof (wl_rssi_t); 3418 break; 3419 case MAC_PROP_WL_PHY_CONFIG: 3420 minsize = sizeof (wl_phy_conf_t); 3421 break; 3422 case MAC_PROP_WL_CAPABILITY: 3423 minsize = sizeof (wl_capability_t); 3424 break; 3425 case MAC_PROP_WL_WPA: 3426 minsize = sizeof (wl_wpa_t); 3427 break; 3428 case MAC_PROP_WL_SCANRESULTS: 3429 minsize = sizeof (wl_wpa_ess_t); 3430 break; 3431 case MAC_PROP_WL_POWER_MODE: 3432 minsize = sizeof (wl_ps_mode_t); 3433 break; 3434 case MAC_PROP_WL_RADIO: 3435 minsize = sizeof (wl_radio_t); 3436 break; 3437 case MAC_PROP_WL_ESS_LIST: 3438 minsize = sizeof (wl_ess_list_t); 3439 break; 3440 case MAC_PROP_WL_KEY_TAB: 3441 minsize = sizeof (wl_wep_key_tab_t); 3442 break; 3443 case MAC_PROP_WL_CREATE_IBSS: 3444 minsize = sizeof (wl_create_ibss_t); 3445 break; 3446 case MAC_PROP_WL_SETOPTIE: 3447 minsize = sizeof (wl_wpa_ie_t); 3448 break; 3449 case MAC_PROP_WL_DELKEY: 3450 minsize = sizeof (wl_del_key_t); 3451 break; 3452 case MAC_PROP_WL_KEY: 3453 minsize = sizeof (wl_key_t); 3454 break; 3455 case MAC_PROP_WL_MLME: 3456 minsize = sizeof (wl_mlme_t); 3457 break; 3458 case MAC_PROP_VN_PROMISC_FILTERED: 3459 minsize = sizeof (boolean_t); 3460 break; 3461 } 3462 3463 return (valsize >= minsize); 3464 } 3465 3466 /* 3467 * mac_set_prop() sets MAC or hardware driver properties: 3468 * 3469 * - MAC-managed properties such as resource properties include maxbw, 3470 * priority, and cpu binding list, as well as the default port VID 3471 * used by bridging. These properties are consumed by the MAC layer 3472 * itself and not passed down to the driver. For resource control 3473 * properties, this function invokes mac_set_resources() which will 3474 * cache the property value in mac_impl_t and may call 3475 * mac_client_set_resource() to update property value of the primary 3476 * mac client, if it exists. 3477 * 3478 * - Properties which act on the hardware and must be passed to the 3479 * driver, such as MTU, through the driver's mc_setprop() entry point. 3480 */ 3481 int 3482 mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3483 uint_t valsize) 3484 { 3485 int err = ENOTSUP; 3486 mac_impl_t *mip = (mac_impl_t *)mh; 3487 3488 ASSERT(MAC_PERIM_HELD(mh)); 3489 3490 switch (id) { 3491 case MAC_PROP_RESOURCE: { 3492 mac_resource_props_t *mrp; 3493 3494 /* call mac_set_resources() for MAC properties */ 3495 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3496 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3497 bcopy(val, mrp, sizeof (*mrp)); 3498 err = mac_set_resources(mh, mrp); 3499 kmem_free(mrp, sizeof (*mrp)); 3500 break; 3501 } 3502 3503 case MAC_PROP_PVID: 3504 ASSERT(valsize >= sizeof (uint16_t)); 3505 if (mip->mi_state_flags & MIS_IS_VNIC) 3506 return (EINVAL); 3507 err = mac_set_pvid(mh, *(uint16_t *)val); 3508 break; 3509 3510 case MAC_PROP_MTU: { 3511 uint32_t mtu; 3512 3513 ASSERT(valsize >= sizeof (uint32_t)); 3514 bcopy(val, &mtu, sizeof (mtu)); 3515 err = mac_set_mtu(mh, mtu, NULL); 3516 break; 3517 } 3518 3519 case MAC_PROP_LLIMIT: 3520 case MAC_PROP_LDECAY: { 3521 uint32_t learnval; 3522 3523 if (valsize < sizeof (learnval) || 3524 (mip->mi_state_flags & MIS_IS_VNIC)) 3525 return (EINVAL); 3526 bcopy(val, &learnval, sizeof (learnval)); 3527 if (learnval == 0 && id == MAC_PROP_LDECAY) 3528 return (EINVAL); 3529 if (id == MAC_PROP_LLIMIT) 3530 mip->mi_llimit = learnval; 3531 else 3532 mip->mi_ldecay = learnval; 3533 err = 0; 3534 break; 3535 } 3536 3537 case MAC_PROP_ADV_FEC_CAP: 3538 case MAC_PROP_EN_FEC_CAP: { 3539 link_fec_t fec; 3540 3541 ASSERT(valsize >= sizeof (link_fec_t)); 3542 3543 /* 3544 * fec cannot be zero, and auto must be set exclusively. 3545 */ 3546 bcopy(val, &fec, sizeof (link_fec_t)); 3547 if (fec == 0) 3548 return (EINVAL); 3549 if ((fec & LINK_FEC_AUTO) != 0 && (fec & ~LINK_FEC_AUTO) != 0) 3550 return (EINVAL); 3551 3552 if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 3553 err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 3554 name, id, valsize, val); 3555 } 3556 break; 3557 } 3558 3559 default: 3560 /* For other driver properties, call driver's callback */ 3561 if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 3562 err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 3563 name, id, valsize, val); 3564 } 3565 } 3566 return (err); 3567 } 3568 3569 /* 3570 * mac_get_prop() gets MAC or device driver properties. 3571 * 3572 * If the property is a driver property, mac_get_prop() calls driver's callback 3573 * entry point to get it. 3574 * If the property is a MAC property, mac_get_prop() invokes mac_get_resources() 3575 * which returns the cached value in mac_impl_t. 3576 */ 3577 int 3578 mac_get_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3579 uint_t valsize) 3580 { 3581 int err = ENOTSUP; 3582 mac_impl_t *mip = (mac_impl_t *)mh; 3583 uint_t rings; 3584 uint_t vlinks; 3585 3586 bzero(val, valsize); 3587 3588 switch (id) { 3589 case MAC_PROP_RESOURCE: { 3590 mac_resource_props_t *mrp; 3591 3592 /* If mac property, read from cache */ 3593 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3594 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3595 mac_get_resources(mh, mrp); 3596 bcopy(mrp, val, sizeof (*mrp)); 3597 kmem_free(mrp, sizeof (*mrp)); 3598 return (0); 3599 } 3600 case MAC_PROP_RESOURCE_EFF: { 3601 mac_resource_props_t *mrp; 3602 3603 /* If mac effective property, read from client */ 3604 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3605 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3606 mac_get_effective_resources(mh, mrp); 3607 bcopy(mrp, val, sizeof (*mrp)); 3608 kmem_free(mrp, sizeof (*mrp)); 3609 return (0); 3610 } 3611 3612 case MAC_PROP_PVID: 3613 ASSERT(valsize >= sizeof (uint16_t)); 3614 if (mip->mi_state_flags & MIS_IS_VNIC) 3615 return (EINVAL); 3616 *(uint16_t *)val = mac_get_pvid(mh); 3617 return (0); 3618 3619 case MAC_PROP_LLIMIT: 3620 case MAC_PROP_LDECAY: 3621 ASSERT(valsize >= sizeof (uint32_t)); 3622 if (mip->mi_state_flags & MIS_IS_VNIC) 3623 return (EINVAL); 3624 if (id == MAC_PROP_LLIMIT) 3625 bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit)); 3626 else 3627 bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay)); 3628 return (0); 3629 3630 case MAC_PROP_MTU: { 3631 uint32_t sdu; 3632 3633 ASSERT(valsize >= sizeof (uint32_t)); 3634 mac_sdu_get2(mh, NULL, &sdu, NULL); 3635 bcopy(&sdu, val, sizeof (sdu)); 3636 3637 return (0); 3638 } 3639 case MAC_PROP_STATUS: { 3640 link_state_t link_state; 3641 3642 if (valsize < sizeof (link_state)) 3643 return (EINVAL); 3644 link_state = mac_link_get(mh); 3645 bcopy(&link_state, val, sizeof (link_state)); 3646 3647 return (0); 3648 } 3649 3650 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3651 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3652 ASSERT(valsize >= sizeof (uint_t)); 3653 rings = id == MAC_PROP_MAX_RX_RINGS_AVAIL ? 3654 mac_rxavail_get(mh) : mac_txavail_get(mh); 3655 bcopy(&rings, val, sizeof (uint_t)); 3656 return (0); 3657 3658 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3659 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3660 ASSERT(valsize >= sizeof (uint_t)); 3661 vlinks = id == MAC_PROP_MAX_RXHWCLNT_AVAIL ? 3662 mac_rxhwlnksavail_get(mh) : mac_txhwlnksavail_get(mh); 3663 bcopy(&vlinks, val, sizeof (uint_t)); 3664 return (0); 3665 3666 case MAC_PROP_RXRINGSRANGE: 3667 case MAC_PROP_TXRINGSRANGE: 3668 /* 3669 * The value for these properties are returned through 3670 * the MAC_PROP_RESOURCE property. 3671 */ 3672 return (0); 3673 3674 default: 3675 break; 3676 3677 } 3678 3679 /* If driver property, request from driver */ 3680 if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { 3681 err = mip->mi_callbacks->mc_getprop(mip->mi_driver, name, id, 3682 valsize, val); 3683 } 3684 3685 return (err); 3686 } 3687 3688 /* 3689 * Helper function to initialize the range structure for use in 3690 * mac_get_prop. If the type can be other than uint32, we can 3691 * pass that as an arg. 3692 */ 3693 static void 3694 _mac_set_range(mac_propval_range_t *range, uint32_t min, uint32_t max) 3695 { 3696 range->mpr_count = 1; 3697 range->mpr_type = MAC_PROPVAL_UINT32; 3698 range->mpr_range_uint32[0].mpur_min = min; 3699 range->mpr_range_uint32[0].mpur_max = max; 3700 } 3701 3702 /* 3703 * Returns information about the specified property, such as default 3704 * values or permissions. 3705 */ 3706 int 3707 mac_prop_info(mac_handle_t mh, mac_prop_id_t id, char *name, 3708 void *default_val, uint_t default_size, mac_propval_range_t *range, 3709 uint_t *perm) 3710 { 3711 mac_prop_info_state_t state; 3712 mac_impl_t *mip = (mac_impl_t *)mh; 3713 uint_t max; 3714 3715 /* 3716 * A property is read/write by default unless the driver says 3717 * otherwise. 3718 */ 3719 if (perm != NULL) 3720 *perm = MAC_PROP_PERM_RW; 3721 3722 if (default_val != NULL) 3723 bzero(default_val, default_size); 3724 3725 /* 3726 * First, handle framework properties for which we don't need to 3727 * involve the driver. 3728 */ 3729 switch (id) { 3730 case MAC_PROP_RESOURCE: 3731 case MAC_PROP_PVID: 3732 case MAC_PROP_LLIMIT: 3733 case MAC_PROP_LDECAY: 3734 return (0); 3735 3736 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3737 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3738 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3739 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3740 if (perm != NULL) 3741 *perm = MAC_PROP_PERM_READ; 3742 return (0); 3743 3744 case MAC_PROP_RXRINGSRANGE: 3745 case MAC_PROP_TXRINGSRANGE: 3746 /* 3747 * Currently, we support range for RX and TX rings properties. 3748 * When we extend this support to maxbw, cpus and priority, 3749 * we should move this to mac_get_resources. 3750 * There is no default value for RX or TX rings. 3751 */ 3752 if ((mip->mi_state_flags & MIS_IS_VNIC) && 3753 mac_is_vnic_primary(mh)) { 3754 /* 3755 * We don't support setting rings for a VLAN 3756 * data link because it shares its ring with the 3757 * primary MAC client. 3758 */ 3759 if (perm != NULL) 3760 *perm = MAC_PROP_PERM_READ; 3761 if (range != NULL) 3762 range->mpr_count = 0; 3763 } else if (range != NULL) { 3764 if (mip->mi_state_flags & MIS_IS_VNIC) 3765 mh = mac_get_lower_mac_handle(mh); 3766 mip = (mac_impl_t *)mh; 3767 if ((id == MAC_PROP_RXRINGSRANGE && 3768 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) || 3769 (id == MAC_PROP_TXRINGSRANGE && 3770 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC)) { 3771 if (id == MAC_PROP_RXRINGSRANGE) { 3772 if ((mac_rxhwlnksavail_get(mh) + 3773 mac_rxhwlnksrsvd_get(mh)) <= 1) { 3774 /* 3775 * doesn't support groups or 3776 * rings 3777 */ 3778 range->mpr_count = 0; 3779 } else { 3780 /* 3781 * supports specifying groups, 3782 * but not rings 3783 */ 3784 _mac_set_range(range, 0, 0); 3785 } 3786 } else { 3787 if ((mac_txhwlnksavail_get(mh) + 3788 mac_txhwlnksrsvd_get(mh)) <= 1) { 3789 /* 3790 * doesn't support groups or 3791 * rings 3792 */ 3793 range->mpr_count = 0; 3794 } else { 3795 /* 3796 * supports specifying groups, 3797 * but not rings 3798 */ 3799 _mac_set_range(range, 0, 0); 3800 } 3801 } 3802 } else { 3803 max = id == MAC_PROP_RXRINGSRANGE ? 3804 mac_rxavail_get(mh) + mac_rxrsvd_get(mh) : 3805 mac_txavail_get(mh) + mac_txrsvd_get(mh); 3806 if (max <= 1) { 3807 /* 3808 * doesn't support groups or 3809 * rings 3810 */ 3811 range->mpr_count = 0; 3812 } else { 3813 /* 3814 * -1 because we have to leave out the 3815 * default ring. 3816 */ 3817 _mac_set_range(range, 1, max - 1); 3818 } 3819 } 3820 } 3821 return (0); 3822 3823 case MAC_PROP_STATUS: 3824 if (perm != NULL) 3825 *perm = MAC_PROP_PERM_READ; 3826 return (0); 3827 } 3828 3829 /* 3830 * Get the property info from the driver if it implements the 3831 * property info entry point. 3832 */ 3833 bzero(&state, sizeof (state)); 3834 3835 if (mip->mi_callbacks->mc_callbacks & MC_PROPINFO) { 3836 state.pr_default = default_val; 3837 state.pr_default_size = default_size; 3838 3839 /* 3840 * The caller specifies the maximum number of ranges 3841 * it can accomodate using mpr_count. We don't touch 3842 * this value until the driver returns from its 3843 * mc_propinfo() callback, and ensure we don't exceed 3844 * this number of range as the driver defines 3845 * supported range from its mc_propinfo(). 3846 * 3847 * pr_range_cur_count keeps track of how many ranges 3848 * were defined by the driver from its mc_propinfo() 3849 * entry point. 3850 * 3851 * On exit, the user-specified range mpr_count returns 3852 * the number of ranges specified by the driver on 3853 * success, or the number of ranges it wanted to 3854 * define if that number of ranges could not be 3855 * accomodated by the specified range structure. In 3856 * the latter case, the caller will be able to 3857 * allocate a larger range structure, and query the 3858 * property again. 3859 */ 3860 state.pr_range_cur_count = 0; 3861 state.pr_range = range; 3862 3863 mip->mi_callbacks->mc_propinfo(mip->mi_driver, name, id, 3864 (mac_prop_info_handle_t)&state); 3865 3866 if (state.pr_flags & MAC_PROP_INFO_RANGE) 3867 range->mpr_count = state.pr_range_cur_count; 3868 3869 /* 3870 * The operation could fail if the buffer supplied by 3871 * the user was too small for the range or default 3872 * value of the property. 3873 */ 3874 if (state.pr_errno != 0) 3875 return (state.pr_errno); 3876 3877 if (perm != NULL && state.pr_flags & MAC_PROP_INFO_PERM) 3878 *perm = state.pr_perm; 3879 } 3880 3881 /* 3882 * The MAC layer may want to provide default values or allowed 3883 * ranges for properties if the driver does not provide a 3884 * property info entry point, or that entry point exists, but 3885 * it did not provide a default value or allowed ranges for 3886 * that property. 3887 */ 3888 switch (id) { 3889 case MAC_PROP_MTU: { 3890 uint32_t sdu; 3891 3892 mac_sdu_get2(mh, NULL, &sdu, NULL); 3893 3894 if (range != NULL && !(state.pr_flags & 3895 MAC_PROP_INFO_RANGE)) { 3896 /* MTU range */ 3897 _mac_set_range(range, sdu, sdu); 3898 } 3899 3900 if (default_val != NULL && !(state.pr_flags & 3901 MAC_PROP_INFO_DEFAULT)) { 3902 if (mip->mi_info.mi_media == DL_ETHER) 3903 sdu = ETHERMTU; 3904 /* default MTU value */ 3905 bcopy(&sdu, default_val, sizeof (sdu)); 3906 } 3907 } 3908 } 3909 3910 return (0); 3911 } 3912 3913 int 3914 mac_fastpath_disable(mac_handle_t mh) 3915 { 3916 mac_impl_t *mip = (mac_impl_t *)mh; 3917 3918 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3919 return (0); 3920 3921 return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver)); 3922 } 3923 3924 void 3925 mac_fastpath_enable(mac_handle_t mh) 3926 { 3927 mac_impl_t *mip = (mac_impl_t *)mh; 3928 3929 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3930 return; 3931 3932 mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver); 3933 } 3934 3935 void 3936 mac_register_priv_prop(mac_impl_t *mip, char **priv_props) 3937 { 3938 uint_t nprops, i; 3939 3940 if (priv_props == NULL) 3941 return; 3942 3943 nprops = 0; 3944 while (priv_props[nprops] != NULL) 3945 nprops++; 3946 if (nprops == 0) 3947 return; 3948 3949 3950 mip->mi_priv_prop = kmem_zalloc(nprops * sizeof (char *), KM_SLEEP); 3951 3952 for (i = 0; i < nprops; i++) { 3953 mip->mi_priv_prop[i] = kmem_zalloc(MAXLINKPROPNAME, KM_SLEEP); 3954 (void) strlcpy(mip->mi_priv_prop[i], priv_props[i], 3955 MAXLINKPROPNAME); 3956 } 3957 3958 mip->mi_priv_prop_count = nprops; 3959 } 3960 3961 void 3962 mac_unregister_priv_prop(mac_impl_t *mip) 3963 { 3964 uint_t i; 3965 3966 if (mip->mi_priv_prop_count == 0) { 3967 ASSERT(mip->mi_priv_prop == NULL); 3968 return; 3969 } 3970 3971 for (i = 0; i < mip->mi_priv_prop_count; i++) 3972 kmem_free(mip->mi_priv_prop[i], MAXLINKPROPNAME); 3973 kmem_free(mip->mi_priv_prop, mip->mi_priv_prop_count * 3974 sizeof (char *)); 3975 3976 mip->mi_priv_prop = NULL; 3977 mip->mi_priv_prop_count = 0; 3978 } 3979 3980 /* 3981 * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure 3982 * (by invoking mac_rx()) even after processing mac_stop_ring(). In such 3983 * cases if MAC free's the ring structure after mac_stop_ring(), any 3984 * illegal access to the ring structure coming from the driver will panic 3985 * the system. In order to protect the system from such inadverent access, 3986 * we maintain a cache of rings in the mac_impl_t after they get free'd up. 3987 * When packets are received on free'd up rings, MAC (through the generation 3988 * count mechanism) will drop such packets. 3989 */ 3990 static mac_ring_t * 3991 mac_ring_alloc(mac_impl_t *mip) 3992 { 3993 mac_ring_t *ring; 3994 3995 mutex_enter(&mip->mi_ring_lock); 3996 if (mip->mi_ring_freelist != NULL) { 3997 ring = mip->mi_ring_freelist; 3998 mip->mi_ring_freelist = ring->mr_next; 3999 bzero(ring, sizeof (mac_ring_t)); 4000 mutex_exit(&mip->mi_ring_lock); 4001 } else { 4002 mutex_exit(&mip->mi_ring_lock); 4003 ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); 4004 } 4005 ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); 4006 return (ring); 4007 } 4008 4009 static void 4010 mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) 4011 { 4012 ASSERT(ring->mr_state == MR_FREE); 4013 4014 mutex_enter(&mip->mi_ring_lock); 4015 ring->mr_state = MR_FREE; 4016 ring->mr_flag = 0; 4017 ring->mr_next = mip->mi_ring_freelist; 4018 ring->mr_mip = NULL; 4019 mip->mi_ring_freelist = ring; 4020 mac_ring_stat_delete(ring); 4021 mutex_exit(&mip->mi_ring_lock); 4022 } 4023 4024 static void 4025 mac_ring_freeall(mac_impl_t *mip) 4026 { 4027 mac_ring_t *ring_next; 4028 mutex_enter(&mip->mi_ring_lock); 4029 mac_ring_t *ring = mip->mi_ring_freelist; 4030 while (ring != NULL) { 4031 ring_next = ring->mr_next; 4032 kmem_cache_free(mac_ring_cache, ring); 4033 ring = ring_next; 4034 } 4035 mip->mi_ring_freelist = NULL; 4036 mutex_exit(&mip->mi_ring_lock); 4037 } 4038 4039 int 4040 mac_start_ring(mac_ring_t *ring) 4041 { 4042 int rv = 0; 4043 4044 ASSERT(ring->mr_state == MR_FREE); 4045 4046 if (ring->mr_start != NULL) { 4047 rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); 4048 if (rv != 0) 4049 return (rv); 4050 } 4051 4052 ring->mr_state = MR_INUSE; 4053 return (rv); 4054 } 4055 4056 void 4057 mac_stop_ring(mac_ring_t *ring) 4058 { 4059 ASSERT(ring->mr_state == MR_INUSE); 4060 4061 if (ring->mr_stop != NULL) 4062 ring->mr_stop(ring->mr_driver); 4063 4064 ring->mr_state = MR_FREE; 4065 4066 /* 4067 * Increment the ring generation number for this ring. 4068 */ 4069 ring->mr_gen_num++; 4070 } 4071 4072 int 4073 mac_start_group(mac_group_t *group) 4074 { 4075 int rv = 0; 4076 4077 if (group->mrg_start != NULL) 4078 rv = group->mrg_start(group->mrg_driver); 4079 4080 return (rv); 4081 } 4082 4083 void 4084 mac_stop_group(mac_group_t *group) 4085 { 4086 if (group->mrg_stop != NULL) 4087 group->mrg_stop(group->mrg_driver); 4088 } 4089 4090 /* 4091 * Called from mac_start() on the default Rx group. Broadcast and multicast 4092 * packets are received only on the default group. Hence the default group 4093 * needs to be up even if the primary client is not up, for the other groups 4094 * to be functional. We do this by calling this function at mac_start time 4095 * itself. However the broadcast packets that are received can't make their 4096 * way beyond mac_rx until a mac client creates a broadcast flow. 4097 */ 4098 static int 4099 mac_start_group_and_rings(mac_group_t *group) 4100 { 4101 mac_ring_t *ring; 4102 int rv = 0; 4103 4104 ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED); 4105 if ((rv = mac_start_group(group)) != 0) 4106 return (rv); 4107 4108 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 4109 ASSERT(ring->mr_state == MR_FREE); 4110 4111 if ((rv = mac_start_ring(ring)) != 0) 4112 goto error; 4113 4114 /* 4115 * When aggr_set_port_sdu() is called, it will remove 4116 * the port client's unicast address. This will cause 4117 * MAC to stop the default group's rings on the port 4118 * MAC. After it modifies the SDU, it will then re-add 4119 * the unicast address. At which time, this function is 4120 * called to start the default group's rings. Normally 4121 * this function would set the classify type to 4122 * MAC_SW_CLASSIFIER; but that will break aggr which 4123 * relies on the passthru classify mode being set for 4124 * correct delivery (see mac_rx_common()). To avoid 4125 * that, we check for a passthru callback and set the 4126 * classify type to MAC_PASSTHRU_CLASSIFIER; as it was 4127 * before the rings were stopped. 4128 */ 4129 ring->mr_classify_type = (ring->mr_pt_fn != NULL) ? 4130 MAC_PASSTHRU_CLASSIFIER : MAC_SW_CLASSIFIER; 4131 } 4132 return (0); 4133 4134 error: 4135 mac_stop_group_and_rings(group); 4136 return (rv); 4137 } 4138 4139 /* Called from mac_stop on the default Rx group */ 4140 static void 4141 mac_stop_group_and_rings(mac_group_t *group) 4142 { 4143 mac_ring_t *ring; 4144 4145 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 4146 if (ring->mr_state != MR_FREE) { 4147 mac_stop_ring(ring); 4148 ring->mr_flag = 0; 4149 ring->mr_classify_type = MAC_NO_CLASSIFIER; 4150 } 4151 } 4152 mac_stop_group(group); 4153 } 4154 4155 4156 static mac_ring_t * 4157 mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, 4158 mac_capab_rings_t *cap_rings) 4159 { 4160 mac_ring_t *ring, *rnext; 4161 mac_ring_info_t ring_info; 4162 ddi_intr_handle_t ddi_handle; 4163 4164 ring = mac_ring_alloc(mip); 4165 4166 /* Prepare basic information of ring */ 4167 4168 /* 4169 * Ring index is numbered to be unique across a particular device. 4170 * Ring index computation makes following assumptions: 4171 * - For drivers with static grouping (e.g. ixgbe, bge), 4172 * ring index exchanged with the driver (e.g. during mr_rget) 4173 * is unique only across the group the ring belongs to. 4174 * - Drivers with dynamic grouping (e.g. nxge), start 4175 * with single group (mrg_index = 0). 4176 */ 4177 ring->mr_index = group->mrg_index * group->mrg_info.mgi_count + index; 4178 ring->mr_type = group->mrg_type; 4179 ring->mr_gh = (mac_group_handle_t)group; 4180 4181 /* Insert the new ring to the list. */ 4182 ring->mr_next = group->mrg_rings; 4183 group->mrg_rings = ring; 4184 4185 /* Zero to reuse the info data structure */ 4186 bzero(&ring_info, sizeof (ring_info)); 4187 4188 /* Query ring information from driver */ 4189 cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index, 4190 index, &ring_info, (mac_ring_handle_t)ring); 4191 4192 ring->mr_info = ring_info; 4193 4194 /* 4195 * The interrupt handle could be shared among multiple rings. 4196 * Thus if there is a bunch of rings that are sharing an 4197 * interrupt, then only one ring among the bunch will be made 4198 * available for interrupt re-targeting; the rest will have 4199 * ddi_shared flag set to TRUE and would not be available for 4200 * be interrupt re-targeting. 4201 */ 4202 if ((ddi_handle = ring_info.mri_intr.mi_ddi_handle) != NULL) { 4203 rnext = ring->mr_next; 4204 while (rnext != NULL) { 4205 if (rnext->mr_info.mri_intr.mi_ddi_handle == 4206 ddi_handle) { 4207 /* 4208 * If default ring (mr_index == 0) is part 4209 * of a group of rings sharing an 4210 * interrupt, then set ddi_shared flag for 4211 * the default ring and give another ring 4212 * the chance to be re-targeted. 4213 */ 4214 if (rnext->mr_index == 0 && 4215 !rnext->mr_info.mri_intr.mi_ddi_shared) { 4216 rnext->mr_info.mri_intr.mi_ddi_shared = 4217 B_TRUE; 4218 } else { 4219 ring->mr_info.mri_intr.mi_ddi_shared = 4220 B_TRUE; 4221 } 4222 break; 4223 } 4224 rnext = rnext->mr_next; 4225 } 4226 /* 4227 * If rnext is NULL, then no matching ddi_handle was found. 4228 * Rx rings get registered first. So if this is a Tx ring, 4229 * then go through all the Rx rings and see if there is a 4230 * matching ddi handle. 4231 */ 4232 if (rnext == NULL && ring->mr_type == MAC_RING_TYPE_TX) { 4233 mac_compare_ddi_handle(mip->mi_rx_groups, 4234 mip->mi_rx_group_count, ring); 4235 } 4236 } 4237 4238 /* Update ring's status */ 4239 ring->mr_state = MR_FREE; 4240 ring->mr_flag = 0; 4241 4242 /* Update the ring count of the group */ 4243 group->mrg_cur_count++; 4244 4245 /* Create per ring kstats */ 4246 if (ring->mr_stat != NULL) { 4247 ring->mr_mip = mip; 4248 mac_ring_stat_create(ring); 4249 } 4250 4251 return (ring); 4252 } 4253 4254 /* 4255 * Rings are chained together for easy regrouping. 4256 */ 4257 static void 4258 mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, 4259 mac_capab_rings_t *cap_rings) 4260 { 4261 int index; 4262 4263 /* 4264 * Initialize all ring members of this group. Size of zero will not 4265 * enter the loop, so it's safe for initializing an empty group. 4266 */ 4267 for (index = size - 1; index >= 0; index--) 4268 (void) mac_init_ring(mip, group, index, cap_rings); 4269 } 4270 4271 int 4272 mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) 4273 { 4274 mac_capab_rings_t *cap_rings; 4275 mac_group_t *group; 4276 mac_group_t *groups; 4277 mac_group_info_t group_info; 4278 uint_t group_free = 0; 4279 uint_t ring_left; 4280 mac_ring_t *ring; 4281 int g; 4282 int err = 0; 4283 uint_t grpcnt; 4284 boolean_t pseudo_txgrp = B_FALSE; 4285 4286 switch (rtype) { 4287 case MAC_RING_TYPE_RX: 4288 ASSERT(mip->mi_rx_groups == NULL); 4289 4290 cap_rings = &mip->mi_rx_rings_cap; 4291 cap_rings->mr_type = MAC_RING_TYPE_RX; 4292 break; 4293 case MAC_RING_TYPE_TX: 4294 ASSERT(mip->mi_tx_groups == NULL); 4295 4296 cap_rings = &mip->mi_tx_rings_cap; 4297 cap_rings->mr_type = MAC_RING_TYPE_TX; 4298 break; 4299 default: 4300 ASSERT(B_FALSE); 4301 } 4302 4303 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, cap_rings)) 4304 return (0); 4305 grpcnt = cap_rings->mr_gnum; 4306 4307 /* 4308 * If we have multiple TX rings, but only one TX group, we can 4309 * create pseudo TX groups (one per TX ring) in the MAC layer, 4310 * except for an aggr. For an aggr currently we maintain only 4311 * one group with all the rings (for all its ports), going 4312 * forwards we might change this. 4313 */ 4314 if (rtype == MAC_RING_TYPE_TX && 4315 cap_rings->mr_gnum == 0 && cap_rings->mr_rnum > 0 && 4316 (mip->mi_state_flags & MIS_IS_AGGR) == 0) { 4317 /* 4318 * The -1 here is because we create a default TX group 4319 * with all the rings in it. 4320 */ 4321 grpcnt = cap_rings->mr_rnum - 1; 4322 pseudo_txgrp = B_TRUE; 4323 } 4324 4325 /* 4326 * Allocate a contiguous buffer for all groups. 4327 */ 4328 groups = kmem_zalloc(sizeof (mac_group_t) * (grpcnt+ 1), KM_SLEEP); 4329 4330 ring_left = cap_rings->mr_rnum; 4331 4332 /* 4333 * Get all ring groups if any, and get their ring members 4334 * if any. 4335 */ 4336 for (g = 0; g < grpcnt; g++) { 4337 group = groups + g; 4338 4339 /* Prepare basic information of the group */ 4340 group->mrg_index = g; 4341 group->mrg_type = rtype; 4342 group->mrg_state = MAC_GROUP_STATE_UNINIT; 4343 group->mrg_mh = (mac_handle_t)mip; 4344 group->mrg_next = group + 1; 4345 4346 /* Zero to reuse the info data structure */ 4347 bzero(&group_info, sizeof (group_info)); 4348 4349 if (pseudo_txgrp) { 4350 /* 4351 * This is a pseudo group that we created, apart 4352 * from setting the state there is nothing to be 4353 * done. 4354 */ 4355 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 4356 group_free++; 4357 continue; 4358 } 4359 /* Query group information from driver */ 4360 cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, 4361 (mac_group_handle_t)group); 4362 4363 switch (cap_rings->mr_group_type) { 4364 case MAC_GROUP_TYPE_DYNAMIC: 4365 if (cap_rings->mr_gaddring == NULL || 4366 cap_rings->mr_gremring == NULL) { 4367 DTRACE_PROBE3( 4368 mac__init__rings_no_addremring, 4369 char *, mip->mi_name, 4370 mac_group_add_ring_t, 4371 cap_rings->mr_gaddring, 4372 mac_group_add_ring_t, 4373 cap_rings->mr_gremring); 4374 err = EINVAL; 4375 goto bail; 4376 } 4377 4378 switch (rtype) { 4379 case MAC_RING_TYPE_RX: 4380 /* 4381 * The first RX group must have non-zero 4382 * rings, and the following groups must 4383 * have zero rings. 4384 */ 4385 if (g == 0 && group_info.mgi_count == 0) { 4386 DTRACE_PROBE1( 4387 mac__init__rings__rx__def__zero, 4388 char *, mip->mi_name); 4389 err = EINVAL; 4390 goto bail; 4391 } 4392 if (g > 0 && group_info.mgi_count != 0) { 4393 DTRACE_PROBE3( 4394 mac__init__rings__rx__nonzero, 4395 char *, mip->mi_name, 4396 int, g, int, group_info.mgi_count); 4397 err = EINVAL; 4398 goto bail; 4399 } 4400 break; 4401 case MAC_RING_TYPE_TX: 4402 /* 4403 * All TX ring groups must have zero rings. 4404 */ 4405 if (group_info.mgi_count != 0) { 4406 DTRACE_PROBE3( 4407 mac__init__rings__tx__nonzero, 4408 char *, mip->mi_name, 4409 int, g, int, group_info.mgi_count); 4410 err = EINVAL; 4411 goto bail; 4412 } 4413 break; 4414 } 4415 break; 4416 case MAC_GROUP_TYPE_STATIC: 4417 /* 4418 * Note that an empty group is allowed, e.g., an aggr 4419 * would start with an empty group. 4420 */ 4421 break; 4422 default: 4423 /* unknown group type */ 4424 DTRACE_PROBE2(mac__init__rings__unknown__type, 4425 char *, mip->mi_name, 4426 int, cap_rings->mr_group_type); 4427 err = EINVAL; 4428 goto bail; 4429 } 4430 4431 4432 /* 4433 * The driver must register some form of hardware MAC 4434 * filter in order for Rx groups to support multiple 4435 * MAC addresses. 4436 */ 4437 if (rtype == MAC_RING_TYPE_RX && 4438 (group_info.mgi_addmac == NULL || 4439 group_info.mgi_remmac == NULL)) { 4440 DTRACE_PROBE1(mac__init__rings__no__mac__filter, 4441 char *, mip->mi_name); 4442 err = EINVAL; 4443 goto bail; 4444 } 4445 4446 /* Cache driver-supplied information */ 4447 group->mrg_info = group_info; 4448 4449 /* Update the group's status and group count. */ 4450 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 4451 group_free++; 4452 4453 group->mrg_rings = NULL; 4454 group->mrg_cur_count = 0; 4455 mac_init_group(mip, group, group_info.mgi_count, cap_rings); 4456 ring_left -= group_info.mgi_count; 4457 4458 /* The current group size should be equal to default value */ 4459 ASSERT(group->mrg_cur_count == group_info.mgi_count); 4460 } 4461 4462 /* Build up a dummy group for free resources as a pool */ 4463 group = groups + grpcnt; 4464 4465 /* Prepare basic information of the group */ 4466 group->mrg_index = -1; 4467 group->mrg_type = rtype; 4468 group->mrg_state = MAC_GROUP_STATE_UNINIT; 4469 group->mrg_mh = (mac_handle_t)mip; 4470 group->mrg_next = NULL; 4471 4472 /* 4473 * If there are ungrouped rings, allocate a continuous buffer for 4474 * remaining resources. 4475 */ 4476 if (ring_left != 0) { 4477 group->mrg_rings = NULL; 4478 group->mrg_cur_count = 0; 4479 mac_init_group(mip, group, ring_left, cap_rings); 4480 4481 /* The current group size should be equal to ring_left */ 4482 ASSERT(group->mrg_cur_count == ring_left); 4483 4484 ring_left = 0; 4485 4486 /* Update this group's status */ 4487 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 4488 } else { 4489 group->mrg_rings = NULL; 4490 } 4491 4492 ASSERT(ring_left == 0); 4493 4494 bail: 4495 4496 /* Cache other important information to finalize the initialization */ 4497 switch (rtype) { 4498 case MAC_RING_TYPE_RX: 4499 mip->mi_rx_group_type = cap_rings->mr_group_type; 4500 mip->mi_rx_group_count = cap_rings->mr_gnum; 4501 mip->mi_rx_groups = groups; 4502 mip->mi_rx_donor_grp = groups; 4503 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 4504 /* 4505 * The default ring is reserved since it is 4506 * used for sending the broadcast etc. packets. 4507 */ 4508 mip->mi_rxrings_avail = 4509 mip->mi_rx_groups->mrg_cur_count - 1; 4510 mip->mi_rxrings_rsvd = 1; 4511 } 4512 /* 4513 * The default group cannot be reserved. It is used by 4514 * all the clients that do not have an exclusive group. 4515 */ 4516 mip->mi_rxhwclnt_avail = mip->mi_rx_group_count - 1; 4517 mip->mi_rxhwclnt_used = 1; 4518 break; 4519 case MAC_RING_TYPE_TX: 4520 mip->mi_tx_group_type = pseudo_txgrp ? MAC_GROUP_TYPE_DYNAMIC : 4521 cap_rings->mr_group_type; 4522 mip->mi_tx_group_count = grpcnt; 4523 mip->mi_tx_group_free = group_free; 4524 mip->mi_tx_groups = groups; 4525 4526 group = groups + grpcnt; 4527 ring = group->mrg_rings; 4528 /* 4529 * The ring can be NULL in the case of aggr. Aggr will 4530 * have an empty Tx group which will get populated 4531 * later when pseudo Tx rings are added after 4532 * mac_register() is done. 4533 */ 4534 if (ring == NULL) { 4535 ASSERT(mip->mi_state_flags & MIS_IS_AGGR); 4536 /* 4537 * pass the group to aggr so it can add Tx 4538 * rings to the group later. 4539 */ 4540 cap_rings->mr_gget(mip->mi_driver, rtype, 0, NULL, 4541 (mac_group_handle_t)group); 4542 /* 4543 * Even though there are no rings at this time 4544 * (rings will come later), set the group 4545 * state to registered. 4546 */ 4547 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 4548 } else { 4549 /* 4550 * Ring 0 is used as the default one and it could be 4551 * assigned to a client as well. 4552 */ 4553 while ((ring->mr_index != 0) && (ring->mr_next != NULL)) 4554 ring = ring->mr_next; 4555 ASSERT(ring->mr_index == 0); 4556 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4557 } 4558 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 4559 mip->mi_txrings_avail = group->mrg_cur_count - 1; 4560 /* 4561 * The default ring cannot be reserved. 4562 */ 4563 mip->mi_txrings_rsvd = 1; 4564 } 4565 /* 4566 * The default group cannot be reserved. It will be shared 4567 * by clients that do not have an exclusive group. 4568 */ 4569 mip->mi_txhwclnt_avail = mip->mi_tx_group_count; 4570 mip->mi_txhwclnt_used = 1; 4571 break; 4572 default: 4573 ASSERT(B_FALSE); 4574 } 4575 4576 if (err != 0) 4577 mac_free_rings(mip, rtype); 4578 4579 return (err); 4580 } 4581 4582 /* 4583 * The ddi interrupt handle could be shared amoung rings. If so, compare 4584 * the new ring's ddi handle with the existing ones and set ddi_shared 4585 * flag. 4586 */ 4587 void 4588 mac_compare_ddi_handle(mac_group_t *groups, uint_t grpcnt, mac_ring_t *cring) 4589 { 4590 mac_group_t *group; 4591 mac_ring_t *ring; 4592 ddi_intr_handle_t ddi_handle; 4593 int g; 4594 4595 ddi_handle = cring->mr_info.mri_intr.mi_ddi_handle; 4596 for (g = 0; g < grpcnt; g++) { 4597 group = groups + g; 4598 for (ring = group->mrg_rings; ring != NULL; 4599 ring = ring->mr_next) { 4600 if (ring == cring) 4601 continue; 4602 if (ring->mr_info.mri_intr.mi_ddi_handle == 4603 ddi_handle) { 4604 if (cring->mr_type == MAC_RING_TYPE_RX && 4605 ring->mr_index == 0 && 4606 !ring->mr_info.mri_intr.mi_ddi_shared) { 4607 ring->mr_info.mri_intr.mi_ddi_shared = 4608 B_TRUE; 4609 } else { 4610 cring->mr_info.mri_intr.mi_ddi_shared = 4611 B_TRUE; 4612 } 4613 return; 4614 } 4615 } 4616 } 4617 } 4618 4619 /* 4620 * Called to free all groups of particular type (RX or TX). It's assumed that 4621 * no clients are using these groups. 4622 */ 4623 void 4624 mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) 4625 { 4626 mac_group_t *group, *groups; 4627 uint_t group_count; 4628 4629 switch (rtype) { 4630 case MAC_RING_TYPE_RX: 4631 if (mip->mi_rx_groups == NULL) 4632 return; 4633 4634 groups = mip->mi_rx_groups; 4635 group_count = mip->mi_rx_group_count; 4636 4637 mip->mi_rx_groups = NULL; 4638 mip->mi_rx_donor_grp = NULL; 4639 mip->mi_rx_group_count = 0; 4640 break; 4641 case MAC_RING_TYPE_TX: 4642 ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free); 4643 4644 if (mip->mi_tx_groups == NULL) 4645 return; 4646 4647 groups = mip->mi_tx_groups; 4648 group_count = mip->mi_tx_group_count; 4649 4650 mip->mi_tx_groups = NULL; 4651 mip->mi_tx_group_count = 0; 4652 mip->mi_tx_group_free = 0; 4653 mip->mi_default_tx_ring = NULL; 4654 break; 4655 default: 4656 ASSERT(B_FALSE); 4657 } 4658 4659 for (group = groups; group != NULL; group = group->mrg_next) { 4660 mac_ring_t *ring; 4661 4662 if (group->mrg_cur_count == 0) 4663 continue; 4664 4665 ASSERT(group->mrg_rings != NULL); 4666 4667 while ((ring = group->mrg_rings) != NULL) { 4668 group->mrg_rings = ring->mr_next; 4669 mac_ring_free(mip, ring); 4670 } 4671 } 4672 4673 /* Free all the cached rings */ 4674 mac_ring_freeall(mip); 4675 /* Free the block of group data strutures */ 4676 kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); 4677 } 4678 4679 /* 4680 * Associate the VLAN filter to the receive group. 4681 */ 4682 int 4683 mac_group_addvlan(mac_group_t *group, uint16_t vlan) 4684 { 4685 VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); 4686 VERIFY3P(group->mrg_info.mgi_addvlan, !=, NULL); 4687 4688 if (vlan > VLAN_ID_MAX) 4689 return (EINVAL); 4690 4691 vlan = MAC_VLAN_UNTAGGED_VID(vlan); 4692 return (group->mrg_info.mgi_addvlan(group->mrg_info.mgi_driver, vlan)); 4693 } 4694 4695 /* 4696 * Dissociate the VLAN from the receive group. 4697 */ 4698 int 4699 mac_group_remvlan(mac_group_t *group, uint16_t vlan) 4700 { 4701 VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); 4702 VERIFY3P(group->mrg_info.mgi_remvlan, !=, NULL); 4703 4704 if (vlan > VLAN_ID_MAX) 4705 return (EINVAL); 4706 4707 vlan = MAC_VLAN_UNTAGGED_VID(vlan); 4708 return (group->mrg_info.mgi_remvlan(group->mrg_info.mgi_driver, vlan)); 4709 } 4710 4711 /* 4712 * Associate a MAC address with a receive group. 4713 * 4714 * The return value of this function should always be checked properly, because 4715 * any type of failure could cause unexpected results. A group can be added 4716 * or removed with a MAC address only after it has been reserved. Ideally, 4717 * a successful reservation always leads to calling mac_group_addmac() to 4718 * steer desired traffic. Failure of adding an unicast MAC address doesn't 4719 * always imply that the group is functioning abnormally. 4720 * 4721 * Currently this function is called everywhere, and it reflects assumptions 4722 * about MAC addresses in the implementation. CR 6735196. 4723 */ 4724 int 4725 mac_group_addmac(mac_group_t *group, const uint8_t *addr) 4726 { 4727 VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); 4728 VERIFY3P(group->mrg_info.mgi_addmac, !=, NULL); 4729 4730 return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); 4731 } 4732 4733 /* 4734 * Remove the association between MAC address and receive group. 4735 */ 4736 int 4737 mac_group_remmac(mac_group_t *group, const uint8_t *addr) 4738 { 4739 VERIFY3S(group->mrg_type, ==, MAC_RING_TYPE_RX); 4740 VERIFY3P(group->mrg_info.mgi_remmac, !=, NULL); 4741 4742 return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); 4743 } 4744 4745 /* 4746 * This is the entry point for packets transmitted through the bridge 4747 * code. If no bridge is in place, mac_ring_tx() transmits via the tx 4748 * ring. The 'rh' pointer may be NULL to select the default ring. 4749 */ 4750 mblk_t * 4751 mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) 4752 { 4753 mac_handle_t mh; 4754 4755 /* 4756 * Once we take a reference on the bridge link, the bridge 4757 * module itself can't unload, so the callback pointers are 4758 * stable. 4759 */ 4760 mutex_enter(&mip->mi_bridge_lock); 4761 if ((mh = mip->mi_bridge_link) != NULL) 4762 mac_bridge_ref_cb(mh, B_TRUE); 4763 mutex_exit(&mip->mi_bridge_lock); 4764 if (mh == NULL) { 4765 mp = mac_ring_tx((mac_handle_t)mip, rh, mp); 4766 } else { 4767 /* 4768 * The bridge may place this mblk on a provider's Tx 4769 * path, a mac's Rx path, or both. Since we don't have 4770 * enough information at this point, we can't be sure 4771 * that the destination(s) are capable of handling the 4772 * hardware offloads requested by the mblk. We emulate 4773 * them here as it is the safest choice. In the 4774 * future, if bridge performance becomes a priority, 4775 * we can elide the emulation here and leave the 4776 * choice up to bridge. 4777 * 4778 * We don't clear the DB_CKSUMFLAGS here because 4779 * HCK_IPV4_HDRCKSUM (Tx) and HCK_IPV4_HDRCKSUM_OK 4780 * (Rx) still have the same value. If the bridge 4781 * receives a packet from a HCKSUM_IPHDRCKSUM NIC then 4782 * the mac(s) it is forwarded on may calculate the 4783 * checksum again, but incorrectly (because the 4784 * checksum field is not zero). Until the 4785 * HCK_IPV4_HDRCKSUM/HCK_IPV4_HDRCKSUM_OK issue is 4786 * resovled, we leave the flag clearing in bridge 4787 * itself. 4788 */ 4789 if ((DB_CKSUMFLAGS(mp) & (HCK_TX_FLAGS | HW_LSO_FLAGS)) != 0) { 4790 mac_hw_emul(&mp, NULL, NULL, MAC_ALL_EMULS); 4791 } 4792 4793 mp = mac_bridge_tx_cb(mh, rh, mp); 4794 mac_bridge_ref_cb(mh, B_FALSE); 4795 } 4796 4797 return (mp); 4798 } 4799 4800 /* 4801 * Find a ring from its index. 4802 */ 4803 mac_ring_handle_t 4804 mac_find_ring(mac_group_handle_t gh, int index) 4805 { 4806 mac_group_t *group = (mac_group_t *)gh; 4807 mac_ring_t *ring = group->mrg_rings; 4808 4809 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) 4810 if (ring->mr_index == index) 4811 break; 4812 4813 return ((mac_ring_handle_t)ring); 4814 } 4815 /* 4816 * Add a ring to an existing group. 4817 * 4818 * The ring must be either passed directly (for example if the ring 4819 * movement is initiated by the framework), or specified through a driver 4820 * index (for example when the ring is added by the driver. 4821 * 4822 * The caller needs to call mac_perim_enter() before calling this function. 4823 */ 4824 int 4825 i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) 4826 { 4827 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4828 mac_capab_rings_t *cap_rings; 4829 boolean_t driver_call = (ring == NULL); 4830 mac_group_type_t group_type; 4831 int ret = 0; 4832 flow_entry_t *flent; 4833 4834 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4835 4836 switch (group->mrg_type) { 4837 case MAC_RING_TYPE_RX: 4838 cap_rings = &mip->mi_rx_rings_cap; 4839 group_type = mip->mi_rx_group_type; 4840 break; 4841 case MAC_RING_TYPE_TX: 4842 cap_rings = &mip->mi_tx_rings_cap; 4843 group_type = mip->mi_tx_group_type; 4844 break; 4845 default: 4846 ASSERT(B_FALSE); 4847 } 4848 4849 /* 4850 * There should be no ring with the same ring index in the target 4851 * group. 4852 */ 4853 ASSERT(mac_find_ring((mac_group_handle_t)group, 4854 driver_call ? index : ring->mr_index) == NULL); 4855 4856 if (driver_call) { 4857 /* 4858 * The function is called as a result of a request from 4859 * a driver to add a ring to an existing group, for example 4860 * from the aggregation driver. Allocate a new mac_ring_t 4861 * for that ring. 4862 */ 4863 ring = mac_init_ring(mip, group, index, cap_rings); 4864 ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT); 4865 } else { 4866 /* 4867 * The function is called as a result of a MAC layer request 4868 * to add a ring to an existing group. In this case the 4869 * ring is being moved between groups, which requires 4870 * the underlying driver to support dynamic grouping, 4871 * and the mac_ring_t already exists. 4872 */ 4873 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4874 ASSERT(group->mrg_driver == NULL || 4875 cap_rings->mr_gaddring != NULL); 4876 ASSERT(ring->mr_gh == NULL); 4877 } 4878 4879 /* 4880 * At this point the ring should not be in use, and it should be 4881 * of the right for the target group. 4882 */ 4883 ASSERT(ring->mr_state < MR_INUSE); 4884 ASSERT(ring->mr_srs == NULL); 4885 ASSERT(ring->mr_type == group->mrg_type); 4886 4887 if (!driver_call) { 4888 /* 4889 * Add the driver level hardware ring if the process was not 4890 * initiated by the driver, and the target group is not the 4891 * group. 4892 */ 4893 if (group->mrg_driver != NULL) { 4894 cap_rings->mr_gaddring(group->mrg_driver, 4895 ring->mr_driver, ring->mr_type); 4896 } 4897 4898 /* 4899 * Insert the ring ahead existing rings. 4900 */ 4901 ring->mr_next = group->mrg_rings; 4902 group->mrg_rings = ring; 4903 ring->mr_gh = (mac_group_handle_t)group; 4904 group->mrg_cur_count++; 4905 } 4906 4907 /* 4908 * If the group has not been actively used, we're done. 4909 */ 4910 if (group->mrg_index != -1 && 4911 group->mrg_state < MAC_GROUP_STATE_RESERVED) 4912 return (0); 4913 4914 /* 4915 * Start the ring if needed. Failure causes to undo the grouping action. 4916 */ 4917 if (ring->mr_state != MR_INUSE) { 4918 if ((ret = mac_start_ring(ring)) != 0) { 4919 if (!driver_call) { 4920 cap_rings->mr_gremring(group->mrg_driver, 4921 ring->mr_driver, ring->mr_type); 4922 } 4923 group->mrg_cur_count--; 4924 group->mrg_rings = ring->mr_next; 4925 4926 ring->mr_gh = NULL; 4927 4928 if (driver_call) 4929 mac_ring_free(mip, ring); 4930 4931 return (ret); 4932 } 4933 } 4934 4935 /* 4936 * Set up SRS/SR according to the ring type. 4937 */ 4938 switch (ring->mr_type) { 4939 case MAC_RING_TYPE_RX: 4940 /* 4941 * Setup an SRS on top of the new ring if the group is 4942 * reserved for someone's exclusive use. 4943 */ 4944 if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { 4945 mac_client_impl_t *mcip = MAC_GROUP_ONLY_CLIENT(group); 4946 4947 VERIFY3P(mcip, !=, NULL); 4948 flent = mcip->mci_flent; 4949 VERIFY3S(flent->fe_rx_srs_cnt, >, 0); 4950 mac_rx_srs_group_setup(mcip, flent, SRST_LINK); 4951 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 4952 mac_rx_deliver, mcip, NULL, NULL); 4953 } else { 4954 ring->mr_classify_type = MAC_SW_CLASSIFIER; 4955 } 4956 break; 4957 case MAC_RING_TYPE_TX: 4958 { 4959 mac_grp_client_t *mgcp = group->mrg_clients; 4960 mac_client_impl_t *mcip; 4961 mac_soft_ring_set_t *mac_srs; 4962 mac_srs_tx_t *tx; 4963 4964 if (MAC_GROUP_NO_CLIENT(group)) { 4965 if (ring->mr_state == MR_INUSE) 4966 mac_stop_ring(ring); 4967 ring->mr_flag = 0; 4968 break; 4969 } 4970 /* 4971 * If the rings are being moved to a group that has 4972 * clients using it, then add the new rings to the 4973 * clients SRS. 4974 */ 4975 while (mgcp != NULL) { 4976 boolean_t is_aggr; 4977 4978 mcip = mgcp->mgc_client; 4979 flent = mcip->mci_flent; 4980 is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT); 4981 mac_srs = MCIP_TX_SRS(mcip); 4982 tx = &mac_srs->srs_tx; 4983 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4984 /* 4985 * If we are growing from 1 to multiple rings. 4986 */ 4987 if (tx->st_mode == SRS_TX_BW || 4988 tx->st_mode == SRS_TX_SERIALIZE || 4989 tx->st_mode == SRS_TX_DEFAULT) { 4990 mac_ring_t *tx_ring = tx->st_arg2; 4991 4992 tx->st_arg2 = NULL; 4993 mac_tx_srs_stat_recreate(mac_srs, B_TRUE); 4994 mac_tx_srs_add_ring(mac_srs, tx_ring); 4995 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4996 tx->st_mode = is_aggr ? SRS_TX_BW_AGGR : 4997 SRS_TX_BW_FANOUT; 4998 } else { 4999 tx->st_mode = is_aggr ? SRS_TX_AGGR : 5000 SRS_TX_FANOUT; 5001 } 5002 tx->st_func = mac_tx_get_func(tx->st_mode); 5003 } 5004 mac_tx_srs_add_ring(mac_srs, ring); 5005 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 5006 mac_rx_deliver, mcip, NULL, NULL); 5007 mac_tx_client_restart((mac_client_handle_t)mcip); 5008 mgcp = mgcp->mgc_next; 5009 } 5010 break; 5011 } 5012 default: 5013 ASSERT(B_FALSE); 5014 } 5015 /* 5016 * For aggr, the default ring will be NULL to begin with. If it 5017 * is NULL, then pick the first ring that gets added as the 5018 * default ring. Any ring in an aggregation can be removed at 5019 * any time (by the user action of removing a link) and if the 5020 * current default ring gets removed, then a new one gets 5021 * picked (see i_mac_group_rem_ring()). 5022 */ 5023 if (mip->mi_state_flags & MIS_IS_AGGR && 5024 mip->mi_default_tx_ring == NULL && 5025 ring->mr_type == MAC_RING_TYPE_TX) { 5026 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 5027 } 5028 5029 MAC_RING_UNMARK(ring, MR_INCIPIENT); 5030 return (0); 5031 } 5032 5033 /* 5034 * Remove a ring from it's current group. MAC internal function for dynamic 5035 * grouping. 5036 * 5037 * The caller needs to call mac_perim_enter() before calling this function. 5038 */ 5039 void 5040 i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, 5041 boolean_t driver_call) 5042 { 5043 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 5044 mac_capab_rings_t *cap_rings = NULL; 5045 mac_group_type_t group_type; 5046 5047 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5048 5049 ASSERT(mac_find_ring((mac_group_handle_t)group, 5050 ring->mr_index) == (mac_ring_handle_t)ring); 5051 ASSERT((mac_group_t *)ring->mr_gh == group); 5052 ASSERT(ring->mr_type == group->mrg_type); 5053 5054 if (ring->mr_state == MR_INUSE) 5055 mac_stop_ring(ring); 5056 switch (ring->mr_type) { 5057 case MAC_RING_TYPE_RX: 5058 group_type = mip->mi_rx_group_type; 5059 cap_rings = &mip->mi_rx_rings_cap; 5060 5061 /* 5062 * Only hardware classified packets hold a reference to the 5063 * ring all the way up the Rx path. mac_rx_srs_remove() 5064 * will take care of quiescing the Rx path and removing the 5065 * SRS. The software classified path neither holds a reference 5066 * nor any association with the ring in mac_rx. 5067 */ 5068 if (ring->mr_srs != NULL) { 5069 mac_rx_srs_remove(ring->mr_srs); 5070 ring->mr_srs = NULL; 5071 } 5072 5073 break; 5074 case MAC_RING_TYPE_TX: 5075 { 5076 mac_grp_client_t *mgcp; 5077 mac_client_impl_t *mcip; 5078 mac_soft_ring_set_t *mac_srs; 5079 mac_srs_tx_t *tx; 5080 mac_ring_t *rem_ring; 5081 mac_group_t *defgrp; 5082 uint_t ring_info = 0; 5083 5084 /* 5085 * For TX this function is invoked in three 5086 * cases: 5087 * 5088 * 1) In the case of a failure during the 5089 * initial creation of a group when a share is 5090 * associated with a MAC client. So the SRS is not 5091 * yet setup, and will be setup later after the 5092 * group has been reserved and populated. 5093 * 5094 * 2) From mac_release_tx_group() when freeing 5095 * a TX SRS. 5096 * 5097 * 3) In the case of aggr, when a port gets removed, 5098 * the pseudo Tx rings that it exposed gets removed. 5099 * 5100 * In the first two cases the SRS and its soft 5101 * rings are already quiesced. 5102 */ 5103 if (driver_call) { 5104 mac_client_impl_t *mcip; 5105 mac_soft_ring_set_t *mac_srs; 5106 mac_soft_ring_t *sringp; 5107 mac_srs_tx_t *srs_tx; 5108 5109 if (mip->mi_state_flags & MIS_IS_AGGR && 5110 mip->mi_default_tx_ring == 5111 (mac_ring_handle_t)ring) { 5112 /* pick a new default Tx ring */ 5113 mip->mi_default_tx_ring = 5114 (group->mrg_rings != ring) ? 5115 (mac_ring_handle_t)group->mrg_rings : 5116 (mac_ring_handle_t)(ring->mr_next); 5117 } 5118 /* Presently only aggr case comes here */ 5119 if (group->mrg_state != MAC_GROUP_STATE_RESERVED) 5120 break; 5121 5122 mcip = MAC_GROUP_ONLY_CLIENT(group); 5123 ASSERT(mcip != NULL); 5124 ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR_CLIENT); 5125 mac_srs = MCIP_TX_SRS(mcip); 5126 ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || 5127 mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); 5128 srs_tx = &mac_srs->srs_tx; 5129 /* 5130 * Wakeup any callers blocked on this 5131 * Tx ring due to flow control. 5132 */ 5133 sringp = srs_tx->st_soft_rings[ring->mr_index]; 5134 ASSERT(sringp != NULL); 5135 mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)sringp); 5136 mac_tx_client_quiesce((mac_client_handle_t)mcip); 5137 mac_tx_srs_del_ring(mac_srs, ring); 5138 mac_tx_client_restart((mac_client_handle_t)mcip); 5139 break; 5140 } 5141 ASSERT(ring != (mac_ring_t *)mip->mi_default_tx_ring); 5142 group_type = mip->mi_tx_group_type; 5143 cap_rings = &mip->mi_tx_rings_cap; 5144 /* 5145 * See if we need to take it out of the MAC clients using 5146 * this group 5147 */ 5148 if (MAC_GROUP_NO_CLIENT(group)) 5149 break; 5150 mgcp = group->mrg_clients; 5151 defgrp = MAC_DEFAULT_TX_GROUP(mip); 5152 while (mgcp != NULL) { 5153 mcip = mgcp->mgc_client; 5154 mac_srs = MCIP_TX_SRS(mcip); 5155 tx = &mac_srs->srs_tx; 5156 mac_tx_client_quiesce((mac_client_handle_t)mcip); 5157 /* 5158 * If we are here when removing rings from the 5159 * defgroup, mac_reserve_tx_ring would have 5160 * already deleted the ring from the MAC 5161 * clients in the group. 5162 */ 5163 if (group != defgrp) { 5164 mac_tx_invoke_callbacks(mcip, 5165 (mac_tx_cookie_t) 5166 mac_tx_srs_get_soft_ring(mac_srs, ring)); 5167 mac_tx_srs_del_ring(mac_srs, ring); 5168 } 5169 /* 5170 * Additionally, if we are left with only 5171 * one ring in the group after this, we need 5172 * to modify the mode etc. to. (We haven't 5173 * yet taken the ring out, so we check with 2). 5174 */ 5175 if (group->mrg_cur_count == 2) { 5176 if (ring->mr_next == NULL) 5177 rem_ring = group->mrg_rings; 5178 else 5179 rem_ring = ring->mr_next; 5180 mac_tx_invoke_callbacks(mcip, 5181 (mac_tx_cookie_t) 5182 mac_tx_srs_get_soft_ring(mac_srs, 5183 rem_ring)); 5184 mac_tx_srs_del_ring(mac_srs, rem_ring); 5185 if (rem_ring->mr_state != MR_INUSE) { 5186 (void) mac_start_ring(rem_ring); 5187 } 5188 tx->st_arg2 = (void *)rem_ring; 5189 mac_tx_srs_stat_recreate(mac_srs, B_FALSE); 5190 ring_info = mac_hwring_getinfo( 5191 (mac_ring_handle_t)rem_ring); 5192 /* 5193 * We are shrinking from multiple 5194 * to 1 ring. 5195 */ 5196 if (mac_srs->srs_type & SRST_BW_CONTROL) { 5197 tx->st_mode = SRS_TX_BW; 5198 } else if (mac_tx_serialize || 5199 (ring_info & MAC_RING_TX_SERIALIZE)) { 5200 tx->st_mode = SRS_TX_SERIALIZE; 5201 } else { 5202 tx->st_mode = SRS_TX_DEFAULT; 5203 } 5204 tx->st_func = mac_tx_get_func(tx->st_mode); 5205 } 5206 mac_tx_client_restart((mac_client_handle_t)mcip); 5207 mgcp = mgcp->mgc_next; 5208 } 5209 break; 5210 } 5211 default: 5212 ASSERT(B_FALSE); 5213 } 5214 5215 /* 5216 * Remove the ring from the group. 5217 */ 5218 if (ring == group->mrg_rings) 5219 group->mrg_rings = ring->mr_next; 5220 else { 5221 mac_ring_t *pre; 5222 5223 pre = group->mrg_rings; 5224 while (pre->mr_next != ring) 5225 pre = pre->mr_next; 5226 pre->mr_next = ring->mr_next; 5227 } 5228 group->mrg_cur_count--; 5229 5230 if (!driver_call) { 5231 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 5232 ASSERT(group->mrg_driver == NULL || 5233 cap_rings->mr_gremring != NULL); 5234 5235 /* 5236 * Remove the driver level hardware ring. 5237 */ 5238 if (group->mrg_driver != NULL) { 5239 cap_rings->mr_gremring(group->mrg_driver, 5240 ring->mr_driver, ring->mr_type); 5241 } 5242 } 5243 5244 ring->mr_gh = NULL; 5245 if (driver_call) 5246 mac_ring_free(mip, ring); 5247 else 5248 ring->mr_flag = 0; 5249 } 5250 5251 /* 5252 * Move a ring to the target group. If needed, remove the ring from the group 5253 * that it currently belongs to. 5254 * 5255 * The caller need to enter MAC's perimeter by calling mac_perim_enter(). 5256 */ 5257 static int 5258 mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring) 5259 { 5260 mac_group_t *s_group = (mac_group_t *)ring->mr_gh; 5261 int rv; 5262 5263 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5264 ASSERT(d_group != NULL); 5265 ASSERT(s_group == NULL || s_group->mrg_mh == d_group->mrg_mh); 5266 5267 if (s_group == d_group) 5268 return (0); 5269 5270 /* 5271 * Remove it from current group first. 5272 */ 5273 if (s_group != NULL) 5274 i_mac_group_rem_ring(s_group, ring, B_FALSE); 5275 5276 /* 5277 * Add it to the new group. 5278 */ 5279 rv = i_mac_group_add_ring(d_group, ring, 0); 5280 if (rv != 0) { 5281 /* 5282 * Failed to add ring back to source group. If 5283 * that fails, the ring is stuck in limbo, log message. 5284 */ 5285 if (i_mac_group_add_ring(s_group, ring, 0)) { 5286 cmn_err(CE_WARN, "%s: failed to move ring %p\n", 5287 mip->mi_name, (void *)ring); 5288 } 5289 } 5290 5291 return (rv); 5292 } 5293 5294 /* 5295 * Find a MAC address according to its value. 5296 */ 5297 mac_address_t * 5298 mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr) 5299 { 5300 mac_address_t *map; 5301 5302 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5303 5304 for (map = mip->mi_addresses; map != NULL; map = map->ma_next) { 5305 if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) 5306 break; 5307 } 5308 5309 return (map); 5310 } 5311 5312 /* 5313 * Check whether the MAC address is shared by multiple clients. 5314 */ 5315 boolean_t 5316 mac_check_macaddr_shared(mac_address_t *map) 5317 { 5318 ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip)); 5319 5320 return (map->ma_nusers > 1); 5321 } 5322 5323 /* 5324 * Remove the specified MAC address from the MAC address list and free it. 5325 */ 5326 static void 5327 mac_free_macaddr(mac_address_t *map) 5328 { 5329 mac_impl_t *mip = map->ma_mip; 5330 5331 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5332 VERIFY3P(mip->mi_addresses, !=, NULL); 5333 5334 VERIFY3P(map, ==, mac_find_macaddr(mip, map->ma_addr)); 5335 VERIFY3P(map, !=, NULL); 5336 VERIFY3S(map->ma_nusers, ==, 0); 5337 VERIFY3P(map->ma_vlans, ==, NULL); 5338 5339 if (map == mip->mi_addresses) { 5340 mip->mi_addresses = map->ma_next; 5341 } else { 5342 mac_address_t *pre; 5343 5344 pre = mip->mi_addresses; 5345 while (pre->ma_next != map) 5346 pre = pre->ma_next; 5347 pre->ma_next = map->ma_next; 5348 } 5349 5350 kmem_free(map, sizeof (mac_address_t)); 5351 } 5352 5353 static mac_vlan_t * 5354 mac_find_vlan(mac_address_t *map, uint16_t vid) 5355 { 5356 mac_vlan_t *mvp; 5357 5358 for (mvp = map->ma_vlans; mvp != NULL; mvp = mvp->mv_next) { 5359 if (mvp->mv_vid == vid) 5360 return (mvp); 5361 } 5362 5363 return (NULL); 5364 } 5365 5366 static mac_vlan_t * 5367 mac_add_vlan(mac_address_t *map, uint16_t vid) 5368 { 5369 mac_vlan_t *mvp; 5370 5371 /* 5372 * We should never add the same {addr, VID} tuple more 5373 * than once, but let's be sure. 5374 */ 5375 for (mvp = map->ma_vlans; mvp != NULL; mvp = mvp->mv_next) 5376 VERIFY3U(mvp->mv_vid, !=, vid); 5377 5378 /* Add the VLAN to the head of the VLAN list. */ 5379 mvp = kmem_zalloc(sizeof (mac_vlan_t), KM_SLEEP); 5380 mvp->mv_vid = vid; 5381 mvp->mv_next = map->ma_vlans; 5382 map->ma_vlans = mvp; 5383 5384 return (mvp); 5385 } 5386 5387 static void 5388 mac_rem_vlan(mac_address_t *map, mac_vlan_t *mvp) 5389 { 5390 mac_vlan_t *pre; 5391 5392 if (map->ma_vlans == mvp) { 5393 map->ma_vlans = mvp->mv_next; 5394 } else { 5395 pre = map->ma_vlans; 5396 while (pre->mv_next != mvp) { 5397 pre = pre->mv_next; 5398 5399 /* 5400 * We've reached the end of the list without 5401 * finding mvp. 5402 */ 5403 VERIFY3P(pre, !=, NULL); 5404 } 5405 pre->mv_next = mvp->mv_next; 5406 } 5407 5408 kmem_free(mvp, sizeof (mac_vlan_t)); 5409 } 5410 5411 /* 5412 * Create a new mac_address_t if this is the first use of the address 5413 * or add a VID to an existing address. In either case, the 5414 * mac_address_t acts as a list of {addr, VID} tuples where each tuple 5415 * shares the same addr. If group is non-NULL then attempt to program 5416 * the MAC's HW filters for this group. Otherwise, if group is NULL, 5417 * then the MAC has no rings and there is nothing to program. 5418 */ 5419 int 5420 mac_add_macaddr_vlan(mac_impl_t *mip, mac_group_t *group, uint8_t *addr, 5421 uint16_t vid, boolean_t use_hw) 5422 { 5423 mac_address_t *map; 5424 mac_vlan_t *mvp; 5425 int err = 0; 5426 boolean_t allocated_map = B_FALSE; 5427 boolean_t hw_mac = B_FALSE; 5428 boolean_t hw_vlan = B_FALSE; 5429 5430 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5431 5432 map = mac_find_macaddr(mip, addr); 5433 5434 /* 5435 * If this is the first use of this MAC address then allocate 5436 * and initialize a new structure. 5437 */ 5438 if (map == NULL) { 5439 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 5440 map->ma_len = mip->mi_type->mt_addr_length; 5441 bcopy(addr, map->ma_addr, map->ma_len); 5442 map->ma_nusers = 0; 5443 map->ma_group = group; 5444 map->ma_mip = mip; 5445 map->ma_untagged = B_FALSE; 5446 5447 /* Add the new MAC address to the head of the address list. */ 5448 map->ma_next = mip->mi_addresses; 5449 mip->mi_addresses = map; 5450 5451 allocated_map = B_TRUE; 5452 } 5453 5454 VERIFY(map->ma_group == NULL || map->ma_group == group); 5455 if (map->ma_group == NULL) 5456 map->ma_group = group; 5457 5458 if (vid == VLAN_ID_NONE) { 5459 map->ma_untagged = B_TRUE; 5460 mvp = NULL; 5461 } else { 5462 mvp = mac_add_vlan(map, vid); 5463 } 5464 5465 /* 5466 * Set the VLAN HW filter if: 5467 * 5468 * o the MAC's VLAN HW filtering is enabled, and 5469 * o the address does not currently rely on promisc mode. 5470 * 5471 * This is called even when the client specifies an untagged 5472 * address (VLAN_ID_NONE) because some MAC providers require 5473 * setting additional bits to accept untagged traffic when 5474 * VLAN HW filtering is enabled. 5475 */ 5476 if (MAC_GROUP_HW_VLAN(group) && 5477 map->ma_type != MAC_ADDRESS_TYPE_UNICAST_PROMISC) { 5478 if ((err = mac_group_addvlan(group, vid)) != 0) 5479 goto bail; 5480 5481 hw_vlan = B_TRUE; 5482 } 5483 5484 VERIFY3S(map->ma_nusers, >=, 0); 5485 map->ma_nusers++; 5486 5487 /* 5488 * If this MAC address already has a HW filter then simply 5489 * increment the counter. 5490 */ 5491 if (map->ma_nusers > 1) 5492 return (0); 5493 5494 /* 5495 * All logic from here on out is executed during initial 5496 * creation only. 5497 */ 5498 VERIFY3S(map->ma_nusers, ==, 1); 5499 5500 /* 5501 * Activate this MAC address by adding it to the reserved group. 5502 */ 5503 if (group != NULL) { 5504 err = mac_group_addmac(group, (const uint8_t *)addr); 5505 5506 /* 5507 * If the driver is out of filters then we can 5508 * continue and use promisc mode. For any other error, 5509 * assume the driver is in a state where we can't 5510 * program the filters or use promisc mode; so we must 5511 * bail. 5512 */ 5513 if (err != 0 && err != ENOSPC) { 5514 map->ma_nusers--; 5515 goto bail; 5516 } 5517 5518 hw_mac = (err == 0); 5519 } 5520 5521 if (hw_mac) { 5522 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 5523 return (0); 5524 } 5525 5526 /* 5527 * The MAC address addition failed. If the client requires a 5528 * hardware classified MAC address, fail the operation. This 5529 * feature is only used by sun4v vsw. 5530 */ 5531 if (use_hw && !hw_mac) { 5532 err = ENOSPC; 5533 map->ma_nusers--; 5534 goto bail; 5535 } 5536 5537 /* 5538 * If we reach this point then either the MAC doesn't have 5539 * RINGS capability or we are out of MAC address HW filters. 5540 * In any case we must put the MAC into promiscuous mode. 5541 */ 5542 VERIFY(group == NULL || !hw_mac); 5543 5544 /* 5545 * The one exception is the primary address. A non-RINGS 5546 * driver filters the primary address by default; promisc mode 5547 * is not needed. 5548 */ 5549 if ((group == NULL) && 5550 (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { 5551 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 5552 return (0); 5553 } 5554 5555 /* 5556 * Enable promiscuous mode in order to receive traffic to the 5557 * new MAC address. All existing HW filters still send their 5558 * traffic to their respective group/SRSes. But with promisc 5559 * enabled all unknown traffic is delivered to the default 5560 * group where it is SW classified via mac_rx_classify(). 5561 */ 5562 if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) { 5563 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; 5564 return (0); 5565 } 5566 5567 /* 5568 * We failed to set promisc mode and we are about to free 'map'. 5569 */ 5570 map->ma_nusers = 0; 5571 5572 bail: 5573 if (hw_vlan) { 5574 int err2 = mac_group_remvlan(group, vid); 5575 5576 if (err2 != 0) { 5577 cmn_err(CE_WARN, "Failed to remove VLAN %u from group" 5578 " %d on MAC %s: %d.", vid, group->mrg_index, 5579 mip->mi_name, err2); 5580 } 5581 } 5582 5583 if (mvp != NULL) 5584 mac_rem_vlan(map, mvp); 5585 5586 if (allocated_map) 5587 mac_free_macaddr(map); 5588 5589 return (err); 5590 } 5591 5592 int 5593 mac_remove_macaddr_vlan(mac_address_t *map, uint16_t vid) 5594 { 5595 mac_vlan_t *mvp; 5596 mac_impl_t *mip = map->ma_mip; 5597 mac_group_t *group = map->ma_group; 5598 int err = 0; 5599 5600 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5601 VERIFY3P(map, ==, mac_find_macaddr(mip, map->ma_addr)); 5602 5603 if (vid == VLAN_ID_NONE) { 5604 map->ma_untagged = B_FALSE; 5605 mvp = NULL; 5606 } else { 5607 mvp = mac_find_vlan(map, vid); 5608 VERIFY3P(mvp, !=, NULL); 5609 } 5610 5611 if (MAC_GROUP_HW_VLAN(group) && 5612 map->ma_type == MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED && 5613 ((err = mac_group_remvlan(group, vid)) != 0)) 5614 return (err); 5615 5616 if (mvp != NULL) 5617 mac_rem_vlan(map, mvp); 5618 5619 /* 5620 * If it's not the last client using this MAC address, only update 5621 * the MAC clients count. 5622 */ 5623 map->ma_nusers--; 5624 if (map->ma_nusers > 0) 5625 return (0); 5626 5627 VERIFY3S(map->ma_nusers, ==, 0); 5628 5629 /* 5630 * The MAC address is no longer used by any MAC client, so 5631 * remove it from its associated group. Turn off promiscuous 5632 * mode if this is the last address relying on it. 5633 */ 5634 switch (map->ma_type) { 5635 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 5636 /* 5637 * Don't free the preset primary address for drivers that 5638 * don't advertise RINGS capability. 5639 */ 5640 if (group == NULL) 5641 return (0); 5642 5643 if ((err = mac_group_remmac(group, map->ma_addr)) != 0) { 5644 if (vid == VLAN_ID_NONE) 5645 map->ma_untagged = B_TRUE; 5646 else 5647 (void) mac_add_vlan(map, vid); 5648 5649 /* 5650 * If we fail to remove the MAC address HW 5651 * filter but then also fail to re-add the 5652 * VLAN HW filter then we are in a busted 5653 * state. We do our best by logging a warning 5654 * and returning the original 'err' that got 5655 * us here. At this point, traffic for this 5656 * address + VLAN combination will be dropped 5657 * until the user reboots the system. In the 5658 * future, it would be nice to have a system 5659 * that can compare the state of expected 5660 * classification according to mac to the 5661 * actual state of the provider, and report 5662 * and fix any inconsistencies. 5663 */ 5664 if (MAC_GROUP_HW_VLAN(group)) { 5665 int err2; 5666 5667 err2 = mac_group_addvlan(group, vid); 5668 if (err2 != 0) { 5669 cmn_err(CE_WARN, "Failed to readd VLAN" 5670 " %u to group %d on MAC %s: %d.", 5671 vid, group->mrg_index, mip->mi_name, 5672 err2); 5673 } 5674 } 5675 5676 map->ma_nusers = 1; 5677 return (err); 5678 } 5679 5680 map->ma_group = NULL; 5681 break; 5682 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 5683 err = i_mac_promisc_set(mip, B_FALSE); 5684 break; 5685 default: 5686 panic("Unexpected ma_type 0x%x, file: %s, line %d", 5687 map->ma_type, __FILE__, __LINE__); 5688 } 5689 5690 if (err != 0) { 5691 map->ma_nusers = 1; 5692 return (err); 5693 } 5694 5695 /* 5696 * We created MAC address for the primary one at registration, so we 5697 * won't free it here. mac_fini_macaddr() will take care of it. 5698 */ 5699 if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0) 5700 mac_free_macaddr(map); 5701 5702 return (0); 5703 } 5704 5705 /* 5706 * Update an existing MAC address. The caller need to make sure that the new 5707 * value has not been used. 5708 */ 5709 int 5710 mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) 5711 { 5712 mac_impl_t *mip = map->ma_mip; 5713 int err = 0; 5714 5715 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5716 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 5717 5718 switch (map->ma_type) { 5719 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 5720 /* 5721 * Update the primary address for drivers that are not 5722 * RINGS capable. 5723 */ 5724 if (mip->mi_rx_groups == NULL) { 5725 err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) 5726 mac_addr); 5727 if (err != 0) 5728 return (err); 5729 break; 5730 } 5731 5732 /* 5733 * If this MAC address is not currently in use, 5734 * simply break out and update the value. 5735 */ 5736 if (map->ma_nusers == 0) 5737 break; 5738 5739 /* 5740 * Need to replace the MAC address associated with a group. 5741 */ 5742 err = mac_group_remmac(map->ma_group, map->ma_addr); 5743 if (err != 0) 5744 return (err); 5745 5746 err = mac_group_addmac(map->ma_group, mac_addr); 5747 5748 /* 5749 * Failure hints hardware error. The MAC layer needs to 5750 * have error notification facility to handle this. 5751 * Now, simply try to restore the value. 5752 */ 5753 if (err != 0) 5754 (void) mac_group_addmac(map->ma_group, map->ma_addr); 5755 5756 break; 5757 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 5758 /* 5759 * Need to do nothing more if in promiscuous mode. 5760 */ 5761 break; 5762 default: 5763 ASSERT(B_FALSE); 5764 } 5765 5766 /* 5767 * Successfully replaced the MAC address. 5768 */ 5769 if (err == 0) 5770 bcopy(mac_addr, map->ma_addr, map->ma_len); 5771 5772 return (err); 5773 } 5774 5775 /* 5776 * Freshen the MAC address with new value. Its caller must have updated the 5777 * hardware MAC address before calling this function. 5778 * This funcitons is supposed to be used to handle the MAC address change 5779 * notification from underlying drivers. 5780 */ 5781 void 5782 mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr) 5783 { 5784 mac_impl_t *mip = map->ma_mip; 5785 5786 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5787 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 5788 5789 /* 5790 * Freshen the MAC address with new value. 5791 */ 5792 bcopy(mac_addr, map->ma_addr, map->ma_len); 5793 bcopy(mac_addr, mip->mi_addr, map->ma_len); 5794 5795 /* 5796 * Update all MAC clients that share this MAC address. 5797 */ 5798 mac_unicast_update_clients(mip, map); 5799 } 5800 5801 /* 5802 * Set up the primary MAC address. 5803 */ 5804 void 5805 mac_init_macaddr(mac_impl_t *mip) 5806 { 5807 mac_address_t *map; 5808 5809 /* 5810 * The reference count is initialized to zero, until it's really 5811 * activated. 5812 */ 5813 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 5814 map->ma_len = mip->mi_type->mt_addr_length; 5815 bcopy(mip->mi_addr, map->ma_addr, map->ma_len); 5816 5817 /* 5818 * If driver advertises RINGS capability, it shouldn't have initialized 5819 * its primary MAC address. For other drivers, including VNIC, the 5820 * primary address must work after registration. 5821 */ 5822 if (mip->mi_rx_groups == NULL) 5823 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 5824 5825 map->ma_mip = mip; 5826 5827 mip->mi_addresses = map; 5828 } 5829 5830 /* 5831 * Clean up the primary MAC address. Note, only one primary MAC address 5832 * is allowed. All other MAC addresses must have been freed appropriately. 5833 */ 5834 void 5835 mac_fini_macaddr(mac_impl_t *mip) 5836 { 5837 mac_address_t *map = mip->mi_addresses; 5838 5839 if (map == NULL) 5840 return; 5841 5842 /* 5843 * If mi_addresses is initialized, there should be exactly one 5844 * entry left on the list with no users. 5845 */ 5846 VERIFY3S(map->ma_nusers, ==, 0); 5847 VERIFY3P(map->ma_next, ==, NULL); 5848 VERIFY3P(map->ma_vlans, ==, NULL); 5849 5850 kmem_free(map, sizeof (mac_address_t)); 5851 mip->mi_addresses = NULL; 5852 } 5853 5854 /* 5855 * Logging related functions. 5856 * 5857 * Note that Kernel statistics have been extended to maintain fine 5858 * granularity of statistics viz. hardware lane, software lane, fanout 5859 * stats etc. However, extended accounting continues to support only 5860 * aggregate statistics like before. 5861 */ 5862 5863 /* Write the flow description to a netinfo_t record */ 5864 static netinfo_t * 5865 mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) 5866 { 5867 netinfo_t *ninfo; 5868 net_desc_t *ndesc; 5869 flow_desc_t *fdesc; 5870 mac_resource_props_t *mrp; 5871 5872 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5873 if (ninfo == NULL) 5874 return (NULL); 5875 ndesc = kmem_zalloc(sizeof (net_desc_t), KM_NOSLEEP); 5876 if (ndesc == NULL) { 5877 kmem_free(ninfo, sizeof (netinfo_t)); 5878 return (NULL); 5879 } 5880 5881 /* 5882 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5883 * Updates to the fe_flow_desc are done under the fe_lock 5884 */ 5885 mutex_enter(&flent->fe_lock); 5886 fdesc = &flent->fe_flow_desc; 5887 mrp = &flent->fe_resource_props; 5888 5889 ndesc->nd_name = flent->fe_flow_name; 5890 ndesc->nd_devname = mcip->mci_name; 5891 bcopy(fdesc->fd_src_mac, ndesc->nd_ehost, ETHERADDRL); 5892 bcopy(fdesc->fd_dst_mac, ndesc->nd_edest, ETHERADDRL); 5893 ndesc->nd_sap = htonl(fdesc->fd_sap); 5894 ndesc->nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION; 5895 ndesc->nd_bw_limit = mrp->mrp_maxbw; 5896 if (ndesc->nd_isv4) { 5897 ndesc->nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]); 5898 ndesc->nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]); 5899 } else { 5900 bcopy(&fdesc->fd_local_addr, ndesc->nd_saddr, IPV6_ADDR_LEN); 5901 bcopy(&fdesc->fd_remote_addr, ndesc->nd_daddr, IPV6_ADDR_LEN); 5902 } 5903 ndesc->nd_sport = htons(fdesc->fd_local_port); 5904 ndesc->nd_dport = htons(fdesc->fd_remote_port); 5905 ndesc->nd_protocol = (uint8_t)fdesc->fd_protocol; 5906 mutex_exit(&flent->fe_lock); 5907 5908 ninfo->ni_record = ndesc; 5909 ninfo->ni_size = sizeof (net_desc_t); 5910 ninfo->ni_type = EX_NET_FLDESC_REC; 5911 5912 return (ninfo); 5913 } 5914 5915 /* Write the flow statistics to a netinfo_t record */ 5916 static netinfo_t * 5917 mac_write_flow_stats(flow_entry_t *flent) 5918 { 5919 netinfo_t *ninfo; 5920 net_stat_t *nstat; 5921 mac_soft_ring_set_t *mac_srs; 5922 mac_rx_stats_t *mac_rx_stat; 5923 mac_tx_stats_t *mac_tx_stat; 5924 int i; 5925 5926 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5927 if (ninfo == NULL) 5928 return (NULL); 5929 nstat = kmem_zalloc(sizeof (net_stat_t), KM_NOSLEEP); 5930 if (nstat == NULL) { 5931 kmem_free(ninfo, sizeof (netinfo_t)); 5932 return (NULL); 5933 } 5934 5935 nstat->ns_name = flent->fe_flow_name; 5936 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5937 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5938 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5939 5940 nstat->ns_ibytes += mac_rx_stat->mrs_intrbytes + 5941 mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; 5942 nstat->ns_ipackets += mac_rx_stat->mrs_intrcnt + 5943 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5944 nstat->ns_oerrors += mac_rx_stat->mrs_ierrors; 5945 } 5946 5947 mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); 5948 if (mac_srs != NULL) { 5949 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5950 5951 nstat->ns_obytes = mac_tx_stat->mts_obytes; 5952 nstat->ns_opackets = mac_tx_stat->mts_opackets; 5953 nstat->ns_oerrors = mac_tx_stat->mts_oerrors; 5954 } 5955 5956 ninfo->ni_record = nstat; 5957 ninfo->ni_size = sizeof (net_stat_t); 5958 ninfo->ni_type = EX_NET_FLSTAT_REC; 5959 5960 return (ninfo); 5961 } 5962 5963 /* Write the link description to a netinfo_t record */ 5964 static netinfo_t * 5965 mac_write_link_desc(mac_client_impl_t *mcip) 5966 { 5967 netinfo_t *ninfo; 5968 net_desc_t *ndesc; 5969 flow_entry_t *flent = mcip->mci_flent; 5970 5971 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5972 if (ninfo == NULL) 5973 return (NULL); 5974 ndesc = kmem_zalloc(sizeof (net_desc_t), KM_NOSLEEP); 5975 if (ndesc == NULL) { 5976 kmem_free(ninfo, sizeof (netinfo_t)); 5977 return (NULL); 5978 } 5979 5980 ndesc->nd_name = mcip->mci_name; 5981 ndesc->nd_devname = mcip->mci_name; 5982 ndesc->nd_isv4 = B_TRUE; 5983 /* 5984 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5985 * Updates to the fe_flow_desc are done under the fe_lock 5986 * after removing the flent from the flow table. 5987 */ 5988 mutex_enter(&flent->fe_lock); 5989 bcopy(flent->fe_flow_desc.fd_src_mac, ndesc->nd_ehost, ETHERADDRL); 5990 mutex_exit(&flent->fe_lock); 5991 5992 ninfo->ni_record = ndesc; 5993 ninfo->ni_size = sizeof (net_desc_t); 5994 ninfo->ni_type = EX_NET_LNDESC_REC; 5995 5996 return (ninfo); 5997 } 5998 5999 /* Write the link statistics to a netinfo_t record */ 6000 static netinfo_t * 6001 mac_write_link_stats(mac_client_impl_t *mcip) 6002 { 6003 netinfo_t *ninfo; 6004 net_stat_t *nstat; 6005 flow_entry_t *flent; 6006 mac_soft_ring_set_t *mac_srs; 6007 mac_rx_stats_t *mac_rx_stat; 6008 mac_tx_stats_t *mac_tx_stat; 6009 int i; 6010 6011 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 6012 if (ninfo == NULL) 6013 return (NULL); 6014 nstat = kmem_zalloc(sizeof (net_stat_t), KM_NOSLEEP); 6015 if (nstat == NULL) { 6016 kmem_free(ninfo, sizeof (netinfo_t)); 6017 return (NULL); 6018 } 6019 6020 nstat->ns_name = mcip->mci_name; 6021 flent = mcip->mci_flent; 6022 if (flent != NULL) { 6023 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 6024 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 6025 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 6026 6027 nstat->ns_ibytes += mac_rx_stat->mrs_intrbytes + 6028 mac_rx_stat->mrs_pollbytes + 6029 mac_rx_stat->mrs_lclbytes; 6030 nstat->ns_ipackets += mac_rx_stat->mrs_intrcnt + 6031 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 6032 nstat->ns_oerrors += mac_rx_stat->mrs_ierrors; 6033 } 6034 } 6035 6036 mac_srs = (mac_soft_ring_set_t *)(mcip->mci_flent->fe_tx_srs); 6037 if (mac_srs != NULL) { 6038 mac_tx_stat = &mac_srs->srs_tx.st_stat; 6039 6040 nstat->ns_obytes = mac_tx_stat->mts_obytes; 6041 nstat->ns_opackets = mac_tx_stat->mts_opackets; 6042 nstat->ns_oerrors = mac_tx_stat->mts_oerrors; 6043 } 6044 6045 ninfo->ni_record = nstat; 6046 ninfo->ni_size = sizeof (net_stat_t); 6047 ninfo->ni_type = EX_NET_LNSTAT_REC; 6048 6049 return (ninfo); 6050 } 6051 6052 typedef struct i_mac_log_state_s { 6053 boolean_t mi_last; 6054 int mi_fenable; 6055 int mi_lenable; 6056 list_t *mi_list; 6057 } i_mac_log_state_t; 6058 6059 /* 6060 * For a given flow, if the description has not been logged before, do it now. 6061 * If it is a VNIC, then we have collected information about it from the MAC 6062 * table, so skip it. 6063 * 6064 * Called through mac_flow_walk_nolock() 6065 * 6066 * Return 0 if successful. 6067 */ 6068 static int 6069 mac_log_flowinfo(flow_entry_t *flent, void *arg) 6070 { 6071 mac_client_impl_t *mcip = flent->fe_mcip; 6072 i_mac_log_state_t *lstate = arg; 6073 netinfo_t *ninfo; 6074 6075 if (mcip == NULL) 6076 return (0); 6077 6078 /* 6079 * If the name starts with "vnic", and fe_user_generated is true (to 6080 * exclude the mcast and active flow entries created implicitly for 6081 * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow, 6082 * vnic/bge1/mcast1 is not and neither is vnic/bge1/active. 6083 */ 6084 if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 && 6085 (flent->fe_type & FLOW_USER) != 0) { 6086 return (0); 6087 } 6088 6089 if (!flent->fe_desc_logged) { 6090 /* 6091 * We don't return error because we want to continue the 6092 * walk in case this is the last walk which means we 6093 * need to reset fe_desc_logged in all the flows. 6094 */ 6095 if ((ninfo = mac_write_flow_desc(flent, mcip)) == NULL) 6096 return (0); 6097 list_insert_tail(lstate->mi_list, ninfo); 6098 flent->fe_desc_logged = B_TRUE; 6099 } 6100 6101 /* 6102 * Regardless of the error, we want to proceed in case we have to 6103 * reset fe_desc_logged. 6104 */ 6105 ninfo = mac_write_flow_stats(flent); 6106 if (ninfo == NULL) 6107 return (-1); 6108 6109 list_insert_tail(lstate->mi_list, ninfo); 6110 6111 if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED)) 6112 flent->fe_desc_logged = B_FALSE; 6113 6114 return (0); 6115 } 6116 6117 /* 6118 * Log the description for each mac client of this mac_impl_t, if it 6119 * hasn't already been done. Additionally, log statistics for the link as 6120 * well. Walk the flow table and log information for each flow as well. 6121 * If it is the last walk (mci_last), then we turn off mci_desc_logged (and 6122 * also fe_desc_logged, if flow logging is on) since we want to log the 6123 * description if and when logging is restarted. 6124 * 6125 * Return 0 upon success or -1 upon failure 6126 */ 6127 static int 6128 i_mac_impl_log(mac_impl_t *mip, i_mac_log_state_t *lstate) 6129 { 6130 mac_client_impl_t *mcip; 6131 netinfo_t *ninfo; 6132 6133 i_mac_perim_enter(mip); 6134 /* 6135 * Only walk the client list for NIC and etherstub 6136 */ 6137 if ((mip->mi_state_flags & MIS_DISABLED) || 6138 ((mip->mi_state_flags & MIS_IS_VNIC) && 6139 (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL))) { 6140 i_mac_perim_exit(mip); 6141 return (0); 6142 } 6143 6144 for (mcip = mip->mi_clients_list; mcip != NULL; 6145 mcip = mcip->mci_client_next) { 6146 if (!MCIP_DATAPATH_SETUP(mcip)) 6147 continue; 6148 if (lstate->mi_lenable) { 6149 if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) { 6150 ninfo = mac_write_link_desc(mcip); 6151 if (ninfo == NULL) { 6152 /* 6153 * We can't terminate it if this is the last 6154 * walk, else there might be some links with 6155 * mi_desc_logged set to true, which means 6156 * their description won't be logged the next 6157 * time logging is started (similarly for the 6158 * flows within such links). We can continue 6159 * without walking the flow table (i.e. to 6160 * set fe_desc_logged to false) because we 6161 * won't have written any flow stuff for this 6162 * link as we haven't logged the link itself. 6163 */ 6164 i_mac_perim_exit(mip); 6165 if (lstate->mi_last) 6166 return (0); 6167 else 6168 return (-1); 6169 } 6170 mcip->mci_state_flags |= MCIS_DESC_LOGGED; 6171 list_insert_tail(lstate->mi_list, ninfo); 6172 } 6173 } 6174 6175 ninfo = mac_write_link_stats(mcip); 6176 if (ninfo == NULL && !lstate->mi_last) { 6177 i_mac_perim_exit(mip); 6178 return (-1); 6179 } 6180 list_insert_tail(lstate->mi_list, ninfo); 6181 6182 if (lstate->mi_last) 6183 mcip->mci_state_flags &= ~MCIS_DESC_LOGGED; 6184 6185 if (lstate->mi_fenable) { 6186 if (mcip->mci_subflow_tab != NULL) { 6187 (void) mac_flow_walk_nolock( 6188 mcip->mci_subflow_tab, mac_log_flowinfo, 6189 lstate); 6190 } 6191 } 6192 } 6193 i_mac_perim_exit(mip); 6194 return (0); 6195 } 6196 6197 /* 6198 * modhash walker function to add a mac_impl_t to a list 6199 */ 6200 /*ARGSUSED*/ 6201 static uint_t 6202 i_mac_impl_list_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 6203 { 6204 list_t *list = (list_t *)arg; 6205 mac_impl_t *mip = (mac_impl_t *)val; 6206 6207 if ((mip->mi_state_flags & MIS_DISABLED) == 0) { 6208 list_insert_tail(list, mip); 6209 mip->mi_ref++; 6210 } 6211 6212 return (MH_WALK_CONTINUE); 6213 } 6214 6215 void 6216 i_mac_log_info(list_t *net_log_list, i_mac_log_state_t *lstate) 6217 { 6218 list_t mac_impl_list; 6219 mac_impl_t *mip; 6220 netinfo_t *ninfo; 6221 6222 /* Create list of mac_impls */ 6223 ASSERT(RW_LOCK_HELD(&i_mac_impl_lock)); 6224 list_create(&mac_impl_list, sizeof (mac_impl_t), offsetof(mac_impl_t, 6225 mi_node)); 6226 mod_hash_walk(i_mac_impl_hash, i_mac_impl_list_walker, &mac_impl_list); 6227 rw_exit(&i_mac_impl_lock); 6228 6229 /* Create log entries for each mac_impl */ 6230 for (mip = list_head(&mac_impl_list); mip != NULL; 6231 mip = list_next(&mac_impl_list, mip)) { 6232 if (i_mac_impl_log(mip, lstate) != 0) 6233 continue; 6234 } 6235 6236 /* Remove elements and destroy list of mac_impls */ 6237 rw_enter(&i_mac_impl_lock, RW_WRITER); 6238 while ((mip = list_remove_tail(&mac_impl_list)) != NULL) { 6239 mip->mi_ref--; 6240 } 6241 rw_exit(&i_mac_impl_lock); 6242 list_destroy(&mac_impl_list); 6243 6244 /* 6245 * Write log entries to files outside of locks, free associated 6246 * structures, and remove entries from the list. 6247 */ 6248 while ((ninfo = list_head(net_log_list)) != NULL) { 6249 (void) exacct_commit_netinfo(ninfo->ni_record, ninfo->ni_type); 6250 list_remove(net_log_list, ninfo); 6251 kmem_free(ninfo->ni_record, ninfo->ni_size); 6252 kmem_free(ninfo, sizeof (*ninfo)); 6253 } 6254 list_destroy(net_log_list); 6255 } 6256 6257 /* 6258 * The timer thread that runs every mac_logging_interval seconds and logs 6259 * link and/or flow information. 6260 */ 6261 /* ARGSUSED */ 6262 void 6263 mac_log_linkinfo(void *arg) 6264 { 6265 i_mac_log_state_t lstate; 6266 list_t net_log_list; 6267 6268 list_create(&net_log_list, sizeof (netinfo_t), 6269 offsetof(netinfo_t, ni_link)); 6270 6271 rw_enter(&i_mac_impl_lock, RW_READER); 6272 if (!mac_flow_log_enable && !mac_link_log_enable) { 6273 rw_exit(&i_mac_impl_lock); 6274 return; 6275 } 6276 lstate.mi_fenable = mac_flow_log_enable; 6277 lstate.mi_lenable = mac_link_log_enable; 6278 lstate.mi_last = B_FALSE; 6279 lstate.mi_list = &net_log_list; 6280 6281 /* Write log entries for each mac_impl in the list */ 6282 i_mac_log_info(&net_log_list, &lstate); 6283 6284 if (mac_flow_log_enable || mac_link_log_enable) { 6285 mac_logging_timer = timeout(mac_log_linkinfo, NULL, 6286 SEC_TO_TICK(mac_logging_interval)); 6287 } 6288 } 6289 6290 typedef struct i_mac_fastpath_state_s { 6291 boolean_t mf_disable; 6292 int mf_err; 6293 } i_mac_fastpath_state_t; 6294 6295 /* modhash walker function to enable or disable fastpath */ 6296 /*ARGSUSED*/ 6297 static uint_t 6298 i_mac_fastpath_walker(mod_hash_key_t key, mod_hash_val_t *val, 6299 void *arg) 6300 { 6301 i_mac_fastpath_state_t *state = arg; 6302 mac_handle_t mh = (mac_handle_t)val; 6303 6304 if (state->mf_disable) 6305 state->mf_err = mac_fastpath_disable(mh); 6306 else 6307 mac_fastpath_enable(mh); 6308 6309 return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 6310 } 6311 6312 /* 6313 * Start the logging timer. 6314 */ 6315 int 6316 mac_start_logusage(mac_logtype_t type, uint_t interval) 6317 { 6318 i_mac_fastpath_state_t dstate = {B_TRUE, 0}; 6319 i_mac_fastpath_state_t estate = {B_FALSE, 0}; 6320 int err; 6321 6322 rw_enter(&i_mac_impl_lock, RW_WRITER); 6323 switch (type) { 6324 case MAC_LOGTYPE_FLOW: 6325 if (mac_flow_log_enable) { 6326 rw_exit(&i_mac_impl_lock); 6327 return (0); 6328 } 6329 /* FALLTHRU */ 6330 case MAC_LOGTYPE_LINK: 6331 if (mac_link_log_enable) { 6332 rw_exit(&i_mac_impl_lock); 6333 return (0); 6334 } 6335 break; 6336 default: 6337 ASSERT(0); 6338 } 6339 6340 /* Disable fastpath */ 6341 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &dstate); 6342 if ((err = dstate.mf_err) != 0) { 6343 /* Reenable fastpath */ 6344 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); 6345 rw_exit(&i_mac_impl_lock); 6346 return (err); 6347 } 6348 6349 switch (type) { 6350 case MAC_LOGTYPE_FLOW: 6351 mac_flow_log_enable = B_TRUE; 6352 /* FALLTHRU */ 6353 case MAC_LOGTYPE_LINK: 6354 mac_link_log_enable = B_TRUE; 6355 break; 6356 } 6357 6358 mac_logging_interval = interval; 6359 rw_exit(&i_mac_impl_lock); 6360 mac_log_linkinfo(NULL); 6361 return (0); 6362 } 6363 6364 /* 6365 * Stop the logging timer if both link and flow logging are turned off. 6366 */ 6367 void 6368 mac_stop_logusage(mac_logtype_t type) 6369 { 6370 i_mac_log_state_t lstate; 6371 i_mac_fastpath_state_t estate = {B_FALSE, 0}; 6372 list_t net_log_list; 6373 6374 list_create(&net_log_list, sizeof (netinfo_t), 6375 offsetof(netinfo_t, ni_link)); 6376 6377 rw_enter(&i_mac_impl_lock, RW_WRITER); 6378 6379 lstate.mi_fenable = mac_flow_log_enable; 6380 lstate.mi_lenable = mac_link_log_enable; 6381 lstate.mi_list = &net_log_list; 6382 6383 /* Last walk */ 6384 lstate.mi_last = B_TRUE; 6385 6386 switch (type) { 6387 case MAC_LOGTYPE_FLOW: 6388 if (lstate.mi_fenable) { 6389 ASSERT(mac_link_log_enable); 6390 mac_flow_log_enable = B_FALSE; 6391 mac_link_log_enable = B_FALSE; 6392 break; 6393 } 6394 /* FALLTHRU */ 6395 case MAC_LOGTYPE_LINK: 6396 if (!lstate.mi_lenable || mac_flow_log_enable) { 6397 rw_exit(&i_mac_impl_lock); 6398 return; 6399 } 6400 mac_link_log_enable = B_FALSE; 6401 break; 6402 default: 6403 ASSERT(0); 6404 } 6405 6406 /* Reenable fastpath */ 6407 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); 6408 6409 (void) untimeout(mac_logging_timer); 6410 mac_logging_timer = NULL; 6411 6412 /* Write log entries for each mac_impl in the list */ 6413 i_mac_log_info(&net_log_list, &lstate); 6414 } 6415 6416 /* 6417 * Walk the rx and tx SRS/SRs for a flow and update the priority value. 6418 */ 6419 void 6420 mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) 6421 { 6422 pri_t pri; 6423 int count; 6424 mac_soft_ring_set_t *mac_srs; 6425 6426 if (flent->fe_rx_srs_cnt <= 0) 6427 return; 6428 6429 if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type == 6430 SRST_FLOW) { 6431 pri = FLOW_PRIORITY(mcip->mci_min_pri, 6432 mcip->mci_max_pri, 6433 flent->fe_resource_props.mrp_priority); 6434 } else { 6435 pri = mcip->mci_max_pri; 6436 } 6437 6438 for (count = 0; count < flent->fe_rx_srs_cnt; count++) { 6439 mac_srs = flent->fe_rx_srs[count]; 6440 mac_update_srs_priority(mac_srs, pri); 6441 } 6442 /* 6443 * If we have a Tx SRS, we need to modify all the threads associated 6444 * with it. 6445 */ 6446 if (flent->fe_tx_srs != NULL) 6447 mac_update_srs_priority(flent->fe_tx_srs, pri); 6448 } 6449 6450 /* 6451 * RX and TX rings are reserved according to different semantics depending 6452 * on the requests from the MAC clients and type of rings: 6453 * 6454 * On the Tx side, by default we reserve individual rings, independently from 6455 * the groups. 6456 * 6457 * On the Rx side, the reservation is at the granularity of the group 6458 * of rings, and used for v12n level 1 only. It has a special case for the 6459 * primary client. 6460 * 6461 * If a share is allocated to a MAC client, we allocate a TX group and an 6462 * RX group to the client, and assign TX rings and RX rings to these 6463 * groups according to information gathered from the driver through 6464 * the share capability. 6465 * 6466 * The foreseable evolution of Rx rings will handle v12n level 2 and higher 6467 * to allocate individual rings out of a group and program the hw classifier 6468 * based on IP address or higher level criteria. 6469 */ 6470 6471 /* 6472 * mac_reserve_tx_ring() 6473 * Reserve a unused ring by marking it with MR_INUSE state. 6474 * As reserved, the ring is ready to function. 6475 * 6476 * Notes for Hybrid I/O: 6477 * 6478 * If a specific ring is needed, it is specified through the desired_ring 6479 * argument. Otherwise that argument is set to NULL. 6480 * If the desired ring was previous allocated to another client, this 6481 * function swaps it with a new ring from the group of unassigned rings. 6482 */ 6483 mac_ring_t * 6484 mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) 6485 { 6486 mac_group_t *group; 6487 mac_grp_client_t *mgcp; 6488 mac_client_impl_t *mcip; 6489 mac_soft_ring_set_t *srs; 6490 6491 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 6492 6493 /* 6494 * Find an available ring and start it before changing its status. 6495 * The unassigned rings are at the end of the mi_tx_groups 6496 * array. 6497 */ 6498 group = MAC_DEFAULT_TX_GROUP(mip); 6499 6500 /* Can't take the default ring out of the default group */ 6501 ASSERT(desired_ring != (mac_ring_t *)mip->mi_default_tx_ring); 6502 6503 if (desired_ring->mr_state == MR_FREE) { 6504 ASSERT(MAC_GROUP_NO_CLIENT(group)); 6505 if (mac_start_ring(desired_ring) != 0) 6506 return (NULL); 6507 return (desired_ring); 6508 } 6509 /* 6510 * There are clients using this ring, so let's move the clients 6511 * away from using this ring. 6512 */ 6513 for (mgcp = group->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 6514 mcip = mgcp->mgc_client; 6515 mac_tx_client_quiesce((mac_client_handle_t)mcip); 6516 srs = MCIP_TX_SRS(mcip); 6517 ASSERT(mac_tx_srs_ring_present(srs, desired_ring)); 6518 mac_tx_invoke_callbacks(mcip, 6519 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(srs, 6520 desired_ring)); 6521 mac_tx_srs_del_ring(srs, desired_ring); 6522 mac_tx_client_restart((mac_client_handle_t)mcip); 6523 } 6524 return (desired_ring); 6525 } 6526 6527 /* 6528 * For a non-default group with multiple clients, return the primary client. 6529 */ 6530 static mac_client_impl_t * 6531 mac_get_grp_primary(mac_group_t *grp) 6532 { 6533 mac_grp_client_t *mgcp = grp->mrg_clients; 6534 mac_client_impl_t *mcip; 6535 6536 while (mgcp != NULL) { 6537 mcip = mgcp->mgc_client; 6538 if (mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) 6539 return (mcip); 6540 mgcp = mgcp->mgc_next; 6541 } 6542 return (NULL); 6543 } 6544 6545 /* 6546 * Hybrid I/O specifies the ring that should be given to a share. 6547 * If the ring is already used by clients, then we need to release 6548 * the ring back to the default group so that we can give it to 6549 * the share. This means the clients using this ring now get a 6550 * replacement ring. If there aren't any replacement rings, this 6551 * function returns a failure. 6552 */ 6553 static int 6554 mac_reclaim_ring_from_grp(mac_impl_t *mip, mac_ring_type_t ring_type, 6555 mac_ring_t *ring, mac_ring_t **rings, int nrings) 6556 { 6557 mac_group_t *group = (mac_group_t *)ring->mr_gh; 6558 mac_resource_props_t *mrp; 6559 mac_client_impl_t *mcip; 6560 mac_group_t *defgrp; 6561 mac_ring_t *tring; 6562 mac_group_t *tgrp; 6563 int i; 6564 int j; 6565 6566 mcip = MAC_GROUP_ONLY_CLIENT(group); 6567 if (mcip == NULL) 6568 mcip = mac_get_grp_primary(group); 6569 ASSERT(mcip != NULL); 6570 ASSERT(mcip->mci_share == 0); 6571 6572 mrp = MCIP_RESOURCE_PROPS(mcip); 6573 if (ring_type == MAC_RING_TYPE_RX) { 6574 defgrp = mip->mi_rx_donor_grp; 6575 if ((mrp->mrp_mask & MRP_RX_RINGS) == 0) { 6576 /* Need to put this mac client in the default group */ 6577 if (mac_rx_switch_group(mcip, group, defgrp) != 0) 6578 return (ENOSPC); 6579 } else { 6580 /* 6581 * Switch this ring with some other ring from 6582 * the default group. 6583 */ 6584 for (tring = defgrp->mrg_rings; tring != NULL; 6585 tring = tring->mr_next) { 6586 if (tring->mr_index == 0) 6587 continue; 6588 for (j = 0; j < nrings; j++) { 6589 if (rings[j] == tring) 6590 break; 6591 } 6592 if (j >= nrings) 6593 break; 6594 } 6595 if (tring == NULL) 6596 return (ENOSPC); 6597 if (mac_group_mov_ring(mip, group, tring) != 0) 6598 return (ENOSPC); 6599 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 6600 (void) mac_group_mov_ring(mip, defgrp, tring); 6601 return (ENOSPC); 6602 } 6603 } 6604 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 6605 return (0); 6606 } 6607 6608 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6609 if (ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6610 /* 6611 * See if we can get a spare ring to replace the default 6612 * ring. 6613 */ 6614 if (defgrp->mrg_cur_count == 1) { 6615 /* 6616 * Need to get a ring from another client, see if 6617 * there are any clients that can be moved to 6618 * the default group, thereby freeing some rings. 6619 */ 6620 for (i = 0; i < mip->mi_tx_group_count; i++) { 6621 tgrp = &mip->mi_tx_groups[i]; 6622 if (tgrp->mrg_state == 6623 MAC_GROUP_STATE_REGISTERED) { 6624 continue; 6625 } 6626 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 6627 if (mcip == NULL) 6628 mcip = mac_get_grp_primary(tgrp); 6629 ASSERT(mcip != NULL); 6630 mrp = MCIP_RESOURCE_PROPS(mcip); 6631 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 6632 ASSERT(tgrp->mrg_cur_count == 1); 6633 /* 6634 * If this ring is part of the 6635 * rings asked by the share we cannot 6636 * use it as the default ring. 6637 */ 6638 for (j = 0; j < nrings; j++) { 6639 if (rings[j] == tgrp->mrg_rings) 6640 break; 6641 } 6642 if (j < nrings) 6643 continue; 6644 mac_tx_client_quiesce( 6645 (mac_client_handle_t)mcip); 6646 mac_tx_switch_group(mcip, tgrp, 6647 defgrp); 6648 mac_tx_client_restart( 6649 (mac_client_handle_t)mcip); 6650 break; 6651 } 6652 } 6653 /* 6654 * All the rings are reserved, can't give up the 6655 * default ring. 6656 */ 6657 if (defgrp->mrg_cur_count <= 1) 6658 return (ENOSPC); 6659 } 6660 /* 6661 * Swap the default ring with another. 6662 */ 6663 for (tring = defgrp->mrg_rings; tring != NULL; 6664 tring = tring->mr_next) { 6665 /* 6666 * If this ring is part of the rings asked by the 6667 * share we cannot use it as the default ring. 6668 */ 6669 for (j = 0; j < nrings; j++) { 6670 if (rings[j] == tring) 6671 break; 6672 } 6673 if (j >= nrings) 6674 break; 6675 } 6676 ASSERT(tring != NULL); 6677 mip->mi_default_tx_ring = (mac_ring_handle_t)tring; 6678 return (0); 6679 } 6680 /* 6681 * The Tx ring is with a group reserved by a MAC client. See if 6682 * we can swap it. 6683 */ 6684 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 6685 mcip = MAC_GROUP_ONLY_CLIENT(group); 6686 if (mcip == NULL) 6687 mcip = mac_get_grp_primary(group); 6688 ASSERT(mcip != NULL); 6689 mrp = MCIP_RESOURCE_PROPS(mcip); 6690 mac_tx_client_quiesce((mac_client_handle_t)mcip); 6691 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 6692 ASSERT(group->mrg_cur_count == 1); 6693 /* Put this mac client in the default group */ 6694 mac_tx_switch_group(mcip, group, defgrp); 6695 } else { 6696 /* 6697 * Switch this ring with some other ring from 6698 * the default group. 6699 */ 6700 for (tring = defgrp->mrg_rings; tring != NULL; 6701 tring = tring->mr_next) { 6702 if (tring == (mac_ring_t *)mip->mi_default_tx_ring) 6703 continue; 6704 /* 6705 * If this ring is part of the rings asked by the 6706 * share we cannot use it for swapping. 6707 */ 6708 for (j = 0; j < nrings; j++) { 6709 if (rings[j] == tring) 6710 break; 6711 } 6712 if (j >= nrings) 6713 break; 6714 } 6715 if (tring == NULL) { 6716 mac_tx_client_restart((mac_client_handle_t)mcip); 6717 return (ENOSPC); 6718 } 6719 if (mac_group_mov_ring(mip, group, tring) != 0) { 6720 mac_tx_client_restart((mac_client_handle_t)mcip); 6721 return (ENOSPC); 6722 } 6723 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 6724 (void) mac_group_mov_ring(mip, defgrp, tring); 6725 mac_tx_client_restart((mac_client_handle_t)mcip); 6726 return (ENOSPC); 6727 } 6728 } 6729 mac_tx_client_restart((mac_client_handle_t)mcip); 6730 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 6731 return (0); 6732 } 6733 6734 /* 6735 * Populate a zero-ring group with rings. If the share is non-NULL, 6736 * the rings are chosen according to that share. 6737 * Invoked after allocating a new RX or TX group through 6738 * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively. 6739 * Returns zero on success, an errno otherwise. 6740 */ 6741 int 6742 i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, 6743 mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share, 6744 uint32_t ringcnt) 6745 { 6746 mac_ring_t **rings, *ring; 6747 uint_t nrings; 6748 int rv = 0, i = 0, j; 6749 6750 ASSERT((ring_type == MAC_RING_TYPE_RX && 6751 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) || 6752 (ring_type == MAC_RING_TYPE_TX && 6753 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)); 6754 6755 /* 6756 * First find the rings to allocate to the group. 6757 */ 6758 if (share != 0) { 6759 /* get rings through ms_squery() */ 6760 mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings); 6761 ASSERT(nrings != 0); 6762 rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t), 6763 KM_SLEEP); 6764 mip->mi_share_capab.ms_squery(share, ring_type, 6765 (mac_ring_handle_t *)rings, &nrings); 6766 for (i = 0; i < nrings; i++) { 6767 /* 6768 * If we have given this ring to a non-default 6769 * group, we need to check if we can get this 6770 * ring. 6771 */ 6772 ring = rings[i]; 6773 if (ring->mr_gh != (mac_group_handle_t)src_group || 6774 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6775 if (mac_reclaim_ring_from_grp(mip, ring_type, 6776 ring, rings, nrings) != 0) { 6777 rv = ENOSPC; 6778 goto bail; 6779 } 6780 } 6781 } 6782 } else { 6783 /* 6784 * Pick one ring from default group. 6785 * 6786 * for now pick the second ring which requires the first ring 6787 * at index 0 to stay in the default group, since it is the 6788 * ring which carries the multicast traffic. 6789 * We need a better way for a driver to indicate this, 6790 * for example a per-ring flag. 6791 */ 6792 rings = kmem_alloc(ringcnt * sizeof (mac_ring_handle_t), 6793 KM_SLEEP); 6794 for (ring = src_group->mrg_rings; ring != NULL; 6795 ring = ring->mr_next) { 6796 if (ring_type == MAC_RING_TYPE_RX && 6797 ring->mr_index == 0) { 6798 continue; 6799 } 6800 if (ring_type == MAC_RING_TYPE_TX && 6801 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6802 continue; 6803 } 6804 rings[i++] = ring; 6805 if (i == ringcnt) 6806 break; 6807 } 6808 ASSERT(ring != NULL); 6809 nrings = i; 6810 /* Not enough rings as required */ 6811 if (nrings != ringcnt) { 6812 rv = ENOSPC; 6813 goto bail; 6814 } 6815 } 6816 6817 switch (ring_type) { 6818 case MAC_RING_TYPE_RX: 6819 if (src_group->mrg_cur_count - nrings < 1) { 6820 /* we ran out of rings */ 6821 rv = ENOSPC; 6822 goto bail; 6823 } 6824 6825 /* move receive rings to new group */ 6826 for (i = 0; i < nrings; i++) { 6827 rv = mac_group_mov_ring(mip, new_group, rings[i]); 6828 if (rv != 0) { 6829 /* move rings back on failure */ 6830 for (j = 0; j < i; j++) { 6831 (void) mac_group_mov_ring(mip, 6832 src_group, rings[j]); 6833 } 6834 goto bail; 6835 } 6836 } 6837 break; 6838 6839 case MAC_RING_TYPE_TX: { 6840 mac_ring_t *tmp_ring; 6841 6842 /* move the TX rings to the new group */ 6843 for (i = 0; i < nrings; i++) { 6844 /* get the desired ring */ 6845 tmp_ring = mac_reserve_tx_ring(mip, rings[i]); 6846 if (tmp_ring == NULL) { 6847 rv = ENOSPC; 6848 goto bail; 6849 } 6850 ASSERT(tmp_ring == rings[i]); 6851 rv = mac_group_mov_ring(mip, new_group, rings[i]); 6852 if (rv != 0) { 6853 /* cleanup on failure */ 6854 for (j = 0; j < i; j++) { 6855 (void) mac_group_mov_ring(mip, 6856 MAC_DEFAULT_TX_GROUP(mip), 6857 rings[j]); 6858 } 6859 goto bail; 6860 } 6861 } 6862 break; 6863 } 6864 } 6865 6866 /* add group to share */ 6867 if (share != 0) 6868 mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); 6869 6870 bail: 6871 /* free temporary array of rings */ 6872 kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); 6873 6874 return (rv); 6875 } 6876 6877 void 6878 mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) 6879 { 6880 mac_grp_client_t *mgcp; 6881 6882 for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 6883 if (mgcp->mgc_client == mcip) 6884 break; 6885 } 6886 6887 ASSERT(mgcp == NULL); 6888 6889 mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); 6890 mgcp->mgc_client = mcip; 6891 mgcp->mgc_next = grp->mrg_clients; 6892 grp->mrg_clients = mgcp; 6893 } 6894 6895 void 6896 mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) 6897 { 6898 mac_grp_client_t *mgcp, **pprev; 6899 6900 for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL; 6901 pprev = &mgcp->mgc_next, mgcp = *pprev) { 6902 if (mgcp->mgc_client == mcip) 6903 break; 6904 } 6905 6906 ASSERT(mgcp != NULL); 6907 6908 *pprev = mgcp->mgc_next; 6909 kmem_free(mgcp, sizeof (mac_grp_client_t)); 6910 } 6911 6912 /* 6913 * Return true if any client on this group explicitly asked for HW 6914 * rings (of type mask) or have a bound share. 6915 */ 6916 static boolean_t 6917 i_mac_clients_hw(mac_group_t *grp, uint32_t mask) 6918 { 6919 mac_grp_client_t *mgcip; 6920 mac_client_impl_t *mcip; 6921 mac_resource_props_t *mrp; 6922 6923 for (mgcip = grp->mrg_clients; mgcip != NULL; mgcip = mgcip->mgc_next) { 6924 mcip = mgcip->mgc_client; 6925 mrp = MCIP_RESOURCE_PROPS(mcip); 6926 if (mcip->mci_share != 0 || (mrp->mrp_mask & mask) != 0) 6927 return (B_TRUE); 6928 } 6929 6930 return (B_FALSE); 6931 } 6932 6933 /* 6934 * Finds an available group and exclusively reserves it for a client. 6935 * The group is chosen to suit the flow's resource controls (bandwidth and 6936 * fanout requirements) and the address type. 6937 * If the requestor is the pimary MAC then return the group with the 6938 * largest number of rings, otherwise the default ring when available. 6939 */ 6940 mac_group_t * 6941 mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) 6942 { 6943 mac_share_handle_t share = mcip->mci_share; 6944 mac_impl_t *mip = mcip->mci_mip; 6945 mac_group_t *grp = NULL; 6946 int i; 6947 int err = 0; 6948 mac_address_t *map; 6949 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6950 int nrings; 6951 int donor_grp_rcnt; 6952 boolean_t need_exclgrp = B_FALSE; 6953 int need_rings = 0; 6954 mac_group_t *candidate_grp = NULL; 6955 mac_client_impl_t *gclient; 6956 mac_group_t *donorgrp = NULL; 6957 boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; 6958 boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; 6959 boolean_t isprimary; 6960 6961 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 6962 6963 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6964 6965 /* 6966 * Check if a group already has this MAC address (case of VLANs) 6967 * unless we are moving this MAC client from one group to another. 6968 */ 6969 if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { 6970 if (map->ma_group != NULL) 6971 return (map->ma_group); 6972 } 6973 6974 if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) 6975 return (NULL); 6976 6977 /* 6978 * If this client is requesting exclusive MAC access then 6979 * return NULL to ensure the client uses the default group. 6980 */ 6981 if (mcip->mci_state_flags & MCIS_EXCLUSIVE) 6982 return (NULL); 6983 6984 /* For dynamic groups default unspecified to 1 */ 6985 if (rxhw && unspec && 6986 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6987 mrp->mrp_nrxrings = 1; 6988 } 6989 6990 /* 6991 * For static grouping we allow only specifying rings=0 and 6992 * unspecified 6993 */ 6994 if (rxhw && mrp->mrp_nrxrings > 0 && 6995 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { 6996 return (NULL); 6997 } 6998 6999 if (rxhw) { 7000 /* 7001 * We have explicitly asked for a group (with nrxrings, 7002 * if unspec). 7003 */ 7004 if (unspec || mrp->mrp_nrxrings > 0) { 7005 need_exclgrp = B_TRUE; 7006 need_rings = mrp->mrp_nrxrings; 7007 } else if (mrp->mrp_nrxrings == 0) { 7008 /* 7009 * We have asked for a software group. 7010 */ 7011 return (NULL); 7012 } 7013 } else if (isprimary && mip->mi_nactiveclients == 1 && 7014 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 7015 /* 7016 * If the primary is the only active client on this 7017 * mip and we have not asked for any rings, we give 7018 * it the default group so that the primary gets to 7019 * use all the rings. 7020 */ 7021 return (NULL); 7022 } 7023 7024 /* The group that can donate rings */ 7025 donorgrp = mip->mi_rx_donor_grp; 7026 7027 /* 7028 * The number of rings that the default group can donate. 7029 * We need to leave at least one ring. 7030 */ 7031 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 7032 7033 /* 7034 * Try to exclusively reserve a RX group. 7035 * 7036 * For flows requiring HW_DEFAULT_RING (unicast flow of the primary 7037 * client), try to reserve the a non-default RX group and give 7038 * it all the rings from the donor group, except the default ring 7039 * 7040 * For flows requiring HW_RING (unicast flow of other clients), try 7041 * to reserve non-default RX group with the specified number of 7042 * rings, if available. 7043 * 7044 * For flows that have not asked for software or hardware ring, 7045 * try to reserve a non-default group with 1 ring, if available. 7046 */ 7047 for (i = 1; i < mip->mi_rx_group_count; i++) { 7048 grp = &mip->mi_rx_groups[i]; 7049 7050 DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, 7051 int, grp->mrg_index, mac_group_state_t, grp->mrg_state); 7052 7053 /* 7054 * Check if this group could be a candidate group for 7055 * eviction if we need a group for this MAC client, 7056 * but there aren't any. A candidate group is one 7057 * that didn't ask for an exclusive group, but got 7058 * one and it has enough rings (combined with what 7059 * the donor group can donate) for the new MAC 7060 * client. 7061 */ 7062 if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { 7063 /* 7064 * If the donor group is not the default 7065 * group, don't bother looking for a candidate 7066 * group. If we don't have enough rings we 7067 * will check if the primary group can be 7068 * vacated. 7069 */ 7070 if (candidate_grp == NULL && 7071 donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { 7072 if (!i_mac_clients_hw(grp, MRP_RX_RINGS) && 7073 (unspec || 7074 (grp->mrg_cur_count + donor_grp_rcnt >= 7075 need_rings))) { 7076 candidate_grp = grp; 7077 } 7078 } 7079 continue; 7080 } 7081 /* 7082 * This group could already be SHARED by other multicast 7083 * flows on this client. In that case, the group would 7084 * be shared and has already been started. 7085 */ 7086 ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT); 7087 7088 if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) && 7089 (mac_start_group(grp) != 0)) { 7090 continue; 7091 } 7092 7093 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 7094 break; 7095 ASSERT(grp->mrg_cur_count == 0); 7096 7097 /* 7098 * Populate the group. Rings should be taken 7099 * from the donor group. 7100 */ 7101 nrings = rxhw ? need_rings : isprimary ? donor_grp_rcnt: 1; 7102 7103 /* 7104 * If the donor group can't donate, let's just walk and 7105 * see if someone can vacate a group, so that we have 7106 * enough rings for this, unless we already have 7107 * identified a candiate group.. 7108 */ 7109 if (nrings <= donor_grp_rcnt) { 7110 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 7111 donorgrp, grp, share, nrings); 7112 if (err == 0) { 7113 /* 7114 * For a share i_mac_group_allocate_rings gets 7115 * the rings from the driver, let's populate 7116 * the property for the client now. 7117 */ 7118 if (share != 0) { 7119 mac_client_set_rings( 7120 (mac_client_handle_t)mcip, 7121 grp->mrg_cur_count, -1); 7122 } 7123 if (mac_is_primary_client(mcip) && !rxhw) 7124 mip->mi_rx_donor_grp = grp; 7125 break; 7126 } 7127 } 7128 7129 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 7130 mip->mi_name, int, grp->mrg_index, int, err); 7131 7132 /* 7133 * It's a dynamic group but the grouping operation 7134 * failed. 7135 */ 7136 mac_stop_group(grp); 7137 } 7138 7139 /* We didn't find an exclusive group for this MAC client */ 7140 if (i >= mip->mi_rx_group_count) { 7141 7142 if (!need_exclgrp) 7143 return (NULL); 7144 7145 /* 7146 * If we found a candidate group then move the 7147 * existing MAC client from the candidate_group to the 7148 * default group and give the candidate_group to the 7149 * new MAC client. If we didn't find a candidate 7150 * group, then check if the primary is in its own 7151 * group and if it can make way for this MAC client. 7152 */ 7153 if (candidate_grp == NULL && 7154 donorgrp != MAC_DEFAULT_RX_GROUP(mip) && 7155 donorgrp->mrg_cur_count >= need_rings) { 7156 candidate_grp = donorgrp; 7157 } 7158 if (candidate_grp != NULL) { 7159 boolean_t prim_grp = B_FALSE; 7160 7161 /* 7162 * Switch the existing MAC client from the 7163 * candidate group to the default group. If 7164 * the candidate group is the donor group, 7165 * then after the switch we need to update the 7166 * donor group too. 7167 */ 7168 grp = candidate_grp; 7169 gclient = grp->mrg_clients->mgc_client; 7170 VERIFY3P(gclient, !=, NULL); 7171 if (grp == mip->mi_rx_donor_grp) 7172 prim_grp = B_TRUE; 7173 if (mac_rx_switch_group(gclient, grp, 7174 MAC_DEFAULT_RX_GROUP(mip)) != 0) { 7175 return (NULL); 7176 } 7177 if (prim_grp) { 7178 mip->mi_rx_donor_grp = 7179 MAC_DEFAULT_RX_GROUP(mip); 7180 donorgrp = MAC_DEFAULT_RX_GROUP(mip); 7181 } 7182 7183 /* 7184 * Now give this group with the required rings 7185 * to this MAC client. 7186 */ 7187 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 7188 if (mac_start_group(grp) != 0) 7189 return (NULL); 7190 7191 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 7192 return (grp); 7193 7194 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 7195 ASSERT(grp->mrg_cur_count == 0); 7196 ASSERT(donor_grp_rcnt >= need_rings); 7197 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 7198 donorgrp, grp, share, need_rings); 7199 if (err == 0) { 7200 /* 7201 * For a share i_mac_group_allocate_rings gets 7202 * the rings from the driver, let's populate 7203 * the property for the client now. 7204 */ 7205 if (share != 0) { 7206 mac_client_set_rings( 7207 (mac_client_handle_t)mcip, 7208 grp->mrg_cur_count, -1); 7209 } 7210 DTRACE_PROBE2(rx__group__reserved, 7211 char *, mip->mi_name, int, grp->mrg_index); 7212 return (grp); 7213 } 7214 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 7215 mip->mi_name, int, grp->mrg_index, int, err); 7216 mac_stop_group(grp); 7217 } 7218 return (NULL); 7219 } 7220 ASSERT(grp != NULL); 7221 7222 DTRACE_PROBE2(rx__group__reserved, 7223 char *, mip->mi_name, int, grp->mrg_index); 7224 return (grp); 7225 } 7226 7227 /* 7228 * mac_rx_release_group() 7229 * 7230 * Release the group when it has no remaining clients. The group is 7231 * stopped and its shares are removed and all rings are assigned back 7232 * to default group. This should never be called against the default 7233 * group. 7234 */ 7235 void 7236 mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) 7237 { 7238 mac_impl_t *mip = mcip->mci_mip; 7239 mac_ring_t *ring; 7240 7241 ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); 7242 ASSERT(MAC_GROUP_NO_CLIENT(group) == B_TRUE); 7243 7244 if (mip->mi_rx_donor_grp == group) 7245 mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); 7246 7247 /* 7248 * This is the case where there are no clients left. Any 7249 * SRS etc on this group have also be quiesced. 7250 */ 7251 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 7252 if (ring->mr_classify_type == MAC_HW_CLASSIFIER) { 7253 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 7254 /* 7255 * Remove the SRS associated with the HW ring. 7256 * As a result, polling will be disabled. 7257 */ 7258 ring->mr_srs = NULL; 7259 } 7260 ASSERT(group->mrg_state < MAC_GROUP_STATE_RESERVED || 7261 ring->mr_state == MR_INUSE); 7262 if (ring->mr_state == MR_INUSE) { 7263 mac_stop_ring(ring); 7264 ring->mr_flag = 0; 7265 } 7266 } 7267 7268 /* remove group from share */ 7269 if (mcip->mci_share != 0) { 7270 mip->mi_share_capab.ms_sremove(mcip->mci_share, 7271 group->mrg_driver); 7272 } 7273 7274 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 7275 mac_ring_t *ring; 7276 7277 /* 7278 * Rings were dynamically allocated to group. 7279 * Move rings back to default group. 7280 */ 7281 while ((ring = group->mrg_rings) != NULL) { 7282 (void) mac_group_mov_ring(mip, mip->mi_rx_donor_grp, 7283 ring); 7284 } 7285 } 7286 mac_stop_group(group); 7287 /* 7288 * Possible improvement: See if we can assign the group just released 7289 * to a another client of the mip 7290 */ 7291 } 7292 7293 /* 7294 * Move the MAC address from fgrp to tgrp. 7295 */ 7296 static int 7297 mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, 7298 mac_group_t *tgrp) 7299 { 7300 mac_impl_t *mip = mcip->mci_mip; 7301 uint8_t maddr[MAXMACADDRLEN]; 7302 int err = 0; 7303 uint16_t vid; 7304 mac_unicast_impl_t *muip; 7305 boolean_t use_hw; 7306 7307 mac_rx_client_quiesce((mac_client_handle_t)mcip); 7308 VERIFY3P(mcip->mci_unicast, !=, NULL); 7309 bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); 7310 7311 /* 7312 * Does the client require MAC address hardware classifiction? 7313 */ 7314 use_hw = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 7315 vid = i_mac_flow_vid(mcip->mci_flent); 7316 7317 /* 7318 * You can never move an address that is shared by multiple 7319 * clients. mac_datapath_setup() ensures that clients sharing 7320 * an address are placed on the default group. This guarantees 7321 * that a non-default group will only ever have one client and 7322 * thus make full use of HW filters. 7323 */ 7324 if (mac_check_macaddr_shared(mcip->mci_unicast)) 7325 return (EINVAL); 7326 7327 err = mac_remove_macaddr_vlan(mcip->mci_unicast, vid); 7328 7329 if (err != 0) { 7330 mac_rx_client_restart((mac_client_handle_t)mcip); 7331 return (err); 7332 } 7333 7334 /* 7335 * If this isn't the primary MAC address then the 7336 * mac_address_t has been freed by the last call to 7337 * mac_remove_macaddr_vlan(). In any case, NULL the reference 7338 * to avoid a dangling pointer. 7339 */ 7340 mcip->mci_unicast = NULL; 7341 7342 /* 7343 * We also have to NULL all the mui_map references -- sun4v 7344 * strikes again! 7345 */ 7346 rw_enter(&mcip->mci_rw_lock, RW_WRITER); 7347 for (muip = mcip->mci_unicast_list; muip != NULL; muip = muip->mui_next) 7348 muip->mui_map = NULL; 7349 rw_exit(&mcip->mci_rw_lock); 7350 7351 /* 7352 * Program the H/W Classifier first, if this fails we need not 7353 * proceed with the other stuff. 7354 */ 7355 if ((err = mac_add_macaddr_vlan(mip, tgrp, maddr, vid, use_hw)) != 0) { 7356 int err2; 7357 7358 /* Revert back the H/W Classifier */ 7359 err2 = mac_add_macaddr_vlan(mip, fgrp, maddr, vid, use_hw); 7360 7361 if (err2 != 0) { 7362 cmn_err(CE_WARN, "Failed to revert HW classification" 7363 " on MAC %s, for client %s: %d.", mip->mi_name, 7364 mcip->mci_name, err2); 7365 } 7366 7367 mac_rx_client_restart((mac_client_handle_t)mcip); 7368 return (err); 7369 } 7370 7371 /* 7372 * Get a reference to the new mac_address_t and update the 7373 * client's reference. Then restart the client and add the 7374 * other clients of this MAC addr (if they exsit). 7375 */ 7376 mcip->mci_unicast = mac_find_macaddr(mip, maddr); 7377 rw_enter(&mcip->mci_rw_lock, RW_WRITER); 7378 for (muip = mcip->mci_unicast_list; muip != NULL; muip = muip->mui_next) 7379 muip->mui_map = mcip->mci_unicast; 7380 rw_exit(&mcip->mci_rw_lock); 7381 mac_rx_client_restart((mac_client_handle_t)mcip); 7382 return (0); 7383 } 7384 7385 /* 7386 * Switch the MAC client from one group to another. This means we need 7387 * to remove the MAC address from the group, remove the MAC client, 7388 * teardown the SRSs and revert the group state. Then, we add the client 7389 * to the destination group, set the SRSs, and add the MAC address to the 7390 * group. 7391 */ 7392 int 7393 mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 7394 mac_group_t *tgrp) 7395 { 7396 int err; 7397 mac_group_state_t next_state; 7398 mac_client_impl_t *group_only_mcip; 7399 mac_client_impl_t *gmcip; 7400 mac_impl_t *mip = mcip->mci_mip; 7401 mac_grp_client_t *mgcp; 7402 7403 VERIFY3P(fgrp, ==, mcip->mci_flent->fe_rx_ring_group); 7404 7405 if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) 7406 return (err); 7407 7408 /* 7409 * If the group is marked as reserved and in use by a single 7410 * client, then there is an SRS to teardown. 7411 */ 7412 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && 7413 MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 7414 mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); 7415 } 7416 7417 /* 7418 * If we are moving the client from a non-default group, then 7419 * we know that any additional clients on this group share the 7420 * same MAC address. Since we moved the MAC address filter, we 7421 * need to move these clients too. 7422 * 7423 * If we are moving the client from the default group and its 7424 * MAC address has VLAN clients, then we must move those 7425 * clients as well. 7426 * 7427 * In both cases the idea is the same: we moved the MAC 7428 * address filter to the tgrp, so we must move all clients 7429 * using that MAC address to tgrp as well. 7430 */ 7431 if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { 7432 mgcp = fgrp->mrg_clients; 7433 while (mgcp != NULL) { 7434 gmcip = mgcp->mgc_client; 7435 mgcp = mgcp->mgc_next; 7436 mac_group_remove_client(fgrp, gmcip); 7437 mac_group_add_client(tgrp, gmcip); 7438 gmcip->mci_flent->fe_rx_ring_group = tgrp; 7439 } 7440 mac_release_rx_group(mcip, fgrp); 7441 VERIFY3B(MAC_GROUP_NO_CLIENT(fgrp), ==, B_TRUE); 7442 mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); 7443 } else { 7444 mac_group_remove_client(fgrp, mcip); 7445 mac_group_add_client(tgrp, mcip); 7446 mcip->mci_flent->fe_rx_ring_group = tgrp; 7447 7448 /* 7449 * If there are other clients (VLANs) sharing this address 7450 * then move them too. 7451 */ 7452 if (mac_check_macaddr_shared(mcip->mci_unicast)) { 7453 /* 7454 * We need to move all the clients that are using 7455 * this MAC address. 7456 */ 7457 mgcp = fgrp->mrg_clients; 7458 while (mgcp != NULL) { 7459 gmcip = mgcp->mgc_client; 7460 mgcp = mgcp->mgc_next; 7461 if (mcip->mci_unicast == gmcip->mci_unicast) { 7462 mac_group_remove_client(fgrp, gmcip); 7463 mac_group_add_client(tgrp, gmcip); 7464 gmcip->mci_flent->fe_rx_ring_group = 7465 tgrp; 7466 } 7467 } 7468 } 7469 7470 /* 7471 * The default group still handles multicast and 7472 * broadcast traffic; it won't transition to 7473 * MAC_GROUP_STATE_REGISTERED. 7474 */ 7475 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) 7476 mac_rx_group_unmark(fgrp, MR_CONDEMNED); 7477 mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); 7478 } 7479 7480 next_state = mac_group_next_state(tgrp, &group_only_mcip, 7481 MAC_DEFAULT_RX_GROUP(mip), B_TRUE); 7482 mac_set_group_state(tgrp, next_state); 7483 7484 /* 7485 * If the destination group is reserved, then setup the SRSes. 7486 * Otherwise make sure to use SW classification. 7487 */ 7488 if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7489 mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); 7490 mac_fanout_setup(mcip, mcip->mci_flent, 7491 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, mcip, NULL, 7492 NULL); 7493 mac_rx_group_unmark(tgrp, MR_INCIPIENT); 7494 } else { 7495 mac_rx_switch_grp_to_sw(tgrp); 7496 } 7497 7498 return (0); 7499 } 7500 7501 /* 7502 * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() 7503 * when a share was allocated to the client. 7504 */ 7505 mac_group_t * 7506 mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) 7507 { 7508 mac_impl_t *mip = mcip->mci_mip; 7509 mac_group_t *grp = NULL; 7510 int rv; 7511 int i; 7512 int err; 7513 mac_group_t *defgrp; 7514 mac_share_handle_t share = mcip->mci_share; 7515 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7516 int nrings; 7517 int defnrings; 7518 boolean_t need_exclgrp = B_FALSE; 7519 int need_rings = 0; 7520 mac_group_t *candidate_grp = NULL; 7521 mac_client_impl_t *gclient; 7522 mac_resource_props_t *gmrp; 7523 boolean_t txhw = mrp->mrp_mask & MRP_TX_RINGS; 7524 boolean_t unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; 7525 boolean_t isprimary; 7526 7527 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 7528 7529 /* 7530 * When we come here for a VLAN on the primary (dladm create-vlan), 7531 * we need to pair it along with the primary (to keep it consistent 7532 * with the RX side). So, we check if the primary is already assigned 7533 * to a group and return the group if so. The other way is also 7534 * true, i.e. the VLAN is already created and now we are plumbing 7535 * the primary. 7536 */ 7537 if (!move && isprimary) { 7538 for (gclient = mip->mi_clients_list; gclient != NULL; 7539 gclient = gclient->mci_client_next) { 7540 if (gclient->mci_flent->fe_type & FLOW_PRIMARY_MAC && 7541 gclient->mci_flent->fe_tx_ring_group != NULL) { 7542 return (gclient->mci_flent->fe_tx_ring_group); 7543 } 7544 } 7545 } 7546 7547 if (mip->mi_tx_groups == NULL || mip->mi_tx_group_count == 0) 7548 return (NULL); 7549 7550 /* For dynamic groups, default unspec to 1 */ 7551 if (txhw && unspec && 7552 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 7553 mrp->mrp_ntxrings = 1; 7554 } 7555 /* 7556 * For static grouping we allow only specifying rings=0 and 7557 * unspecified 7558 */ 7559 if (txhw && mrp->mrp_ntxrings > 0 && 7560 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC) { 7561 return (NULL); 7562 } 7563 7564 if (txhw) { 7565 /* 7566 * We have explicitly asked for a group (with ntxrings, 7567 * if unspec). 7568 */ 7569 if (unspec || mrp->mrp_ntxrings > 0) { 7570 need_exclgrp = B_TRUE; 7571 need_rings = mrp->mrp_ntxrings; 7572 } else if (mrp->mrp_ntxrings == 0) { 7573 /* 7574 * We have asked for a software group. 7575 */ 7576 return (NULL); 7577 } 7578 } 7579 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7580 /* 7581 * The number of rings that the default group can donate. 7582 * We need to leave at least one ring - the default ring - in 7583 * this group. 7584 */ 7585 defnrings = defgrp->mrg_cur_count - 1; 7586 7587 /* 7588 * Primary gets default group unless explicitly told not 7589 * to (i.e. rings > 0). 7590 */ 7591 if (isprimary && !need_exclgrp) 7592 return (NULL); 7593 7594 nrings = (mrp->mrp_mask & MRP_TX_RINGS) != 0 ? mrp->mrp_ntxrings : 1; 7595 for (i = 0; i < mip->mi_tx_group_count; i++) { 7596 grp = &mip->mi_tx_groups[i]; 7597 if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || 7598 (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) { 7599 /* 7600 * Select a candidate for replacement if we don't 7601 * get an exclusive group. A candidate group is one 7602 * that didn't ask for an exclusive group, but got 7603 * one and it has enough rings (combined with what 7604 * the default group can donate) for the new MAC 7605 * client. 7606 */ 7607 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && 7608 candidate_grp == NULL) { 7609 gclient = MAC_GROUP_ONLY_CLIENT(grp); 7610 VERIFY3P(gclient, !=, NULL); 7611 gmrp = MCIP_RESOURCE_PROPS(gclient); 7612 if (gclient->mci_share == 0 && 7613 (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && 7614 (unspec || 7615 (grp->mrg_cur_count + defnrings) >= 7616 need_rings)) { 7617 candidate_grp = grp; 7618 } 7619 } 7620 continue; 7621 } 7622 /* 7623 * If the default can't donate let's just walk and 7624 * see if someone can vacate a group, so that we have 7625 * enough rings for this. 7626 */ 7627 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC || 7628 nrings <= defnrings) { 7629 if (grp->mrg_state == MAC_GROUP_STATE_REGISTERED) { 7630 rv = mac_start_group(grp); 7631 ASSERT(rv == 0); 7632 } 7633 break; 7634 } 7635 } 7636 7637 /* The default group */ 7638 if (i >= mip->mi_tx_group_count) { 7639 /* 7640 * If we need an exclusive group and have identified a 7641 * candidate group we switch the MAC client from the 7642 * candidate group to the default group and give the 7643 * candidate group to this client. 7644 */ 7645 if (need_exclgrp && candidate_grp != NULL) { 7646 /* 7647 * Switch the MAC client from the candidate 7648 * group to the default group. We know the 7649 * candidate_grp came from a reserved group 7650 * and thus only has one client. 7651 */ 7652 grp = candidate_grp; 7653 gclient = MAC_GROUP_ONLY_CLIENT(grp); 7654 VERIFY3P(gclient, !=, NULL); 7655 mac_tx_client_quiesce((mac_client_handle_t)gclient); 7656 mac_tx_switch_group(gclient, grp, defgrp); 7657 mac_tx_client_restart((mac_client_handle_t)gclient); 7658 7659 /* 7660 * Give the candidate group with the specified number 7661 * of rings to this MAC client. 7662 */ 7663 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 7664 rv = mac_start_group(grp); 7665 ASSERT(rv == 0); 7666 7667 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) 7668 return (grp); 7669 7670 ASSERT(grp->mrg_cur_count == 0); 7671 ASSERT(defgrp->mrg_cur_count > need_rings); 7672 7673 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, 7674 defgrp, grp, share, need_rings); 7675 if (err == 0) { 7676 /* 7677 * For a share i_mac_group_allocate_rings gets 7678 * the rings from the driver, let's populate 7679 * the property for the client now. 7680 */ 7681 if (share != 0) { 7682 mac_client_set_rings( 7683 (mac_client_handle_t)mcip, -1, 7684 grp->mrg_cur_count); 7685 } 7686 mip->mi_tx_group_free--; 7687 return (grp); 7688 } 7689 DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, 7690 mip->mi_name, int, grp->mrg_index, int, err); 7691 mac_stop_group(grp); 7692 } 7693 return (NULL); 7694 } 7695 /* 7696 * We got an exclusive group, but it is not dynamic. 7697 */ 7698 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { 7699 mip->mi_tx_group_free--; 7700 return (grp); 7701 } 7702 7703 rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, defgrp, grp, 7704 share, nrings); 7705 if (rv != 0) { 7706 DTRACE_PROBE3(tx__group__reserve__alloc__rings, 7707 char *, mip->mi_name, int, grp->mrg_index, int, rv); 7708 mac_stop_group(grp); 7709 return (NULL); 7710 } 7711 /* 7712 * For a share i_mac_group_allocate_rings gets the rings from the 7713 * driver, let's populate the property for the client now. 7714 */ 7715 if (share != 0) { 7716 mac_client_set_rings((mac_client_handle_t)mcip, -1, 7717 grp->mrg_cur_count); 7718 } 7719 mip->mi_tx_group_free--; 7720 return (grp); 7721 } 7722 7723 void 7724 mac_release_tx_group(mac_client_impl_t *mcip, mac_group_t *grp) 7725 { 7726 mac_impl_t *mip = mcip->mci_mip; 7727 mac_share_handle_t share = mcip->mci_share; 7728 mac_ring_t *ring; 7729 mac_soft_ring_set_t *srs = MCIP_TX_SRS(mcip); 7730 mac_group_t *defgrp; 7731 7732 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7733 if (srs != NULL) { 7734 if (srs->srs_soft_ring_count > 0) { 7735 for (ring = grp->mrg_rings; ring != NULL; 7736 ring = ring->mr_next) { 7737 ASSERT(mac_tx_srs_ring_present(srs, ring)); 7738 mac_tx_invoke_callbacks(mcip, 7739 (mac_tx_cookie_t) 7740 mac_tx_srs_get_soft_ring(srs, ring)); 7741 mac_tx_srs_del_ring(srs, ring); 7742 } 7743 } else { 7744 ASSERT(srs->srs_tx.st_arg2 != NULL); 7745 srs->srs_tx.st_arg2 = NULL; 7746 mac_srs_stat_delete(srs); 7747 } 7748 } 7749 if (share != 0) 7750 mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); 7751 7752 /* move the ring back to the pool */ 7753 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 7754 while ((ring = grp->mrg_rings) != NULL) 7755 (void) mac_group_mov_ring(mip, defgrp, ring); 7756 } 7757 mac_stop_group(grp); 7758 mip->mi_tx_group_free++; 7759 } 7760 7761 /* 7762 * Disassociate a MAC client from a group, i.e go through the rings in the 7763 * group and delete all the soft rings tied to them. 7764 */ 7765 static void 7766 mac_tx_dismantle_soft_rings(mac_group_t *fgrp, flow_entry_t *flent) 7767 { 7768 mac_client_impl_t *mcip = flent->fe_mcip; 7769 mac_soft_ring_set_t *tx_srs; 7770 mac_srs_tx_t *tx; 7771 mac_ring_t *ring; 7772 7773 tx_srs = flent->fe_tx_srs; 7774 tx = &tx_srs->srs_tx; 7775 7776 /* Single ring case we haven't created any soft rings */ 7777 if (tx->st_mode == SRS_TX_BW || tx->st_mode == SRS_TX_SERIALIZE || 7778 tx->st_mode == SRS_TX_DEFAULT) { 7779 tx->st_arg2 = NULL; 7780 mac_srs_stat_delete(tx_srs); 7781 /* Fanout case, where we have to dismantle the soft rings */ 7782 } else { 7783 for (ring = fgrp->mrg_rings; ring != NULL; 7784 ring = ring->mr_next) { 7785 ASSERT(mac_tx_srs_ring_present(tx_srs, ring)); 7786 mac_tx_invoke_callbacks(mcip, 7787 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(tx_srs, 7788 ring)); 7789 mac_tx_srs_del_ring(tx_srs, ring); 7790 } 7791 ASSERT(tx->st_arg2 == NULL); 7792 } 7793 } 7794 7795 /* 7796 * Switch the MAC client from one group to another. This means we need 7797 * to remove the MAC client, teardown the SRSs and revert the group state. 7798 * Then, we add the client to the destination roup, set the SRSs etc. 7799 */ 7800 void 7801 mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 7802 mac_group_t *tgrp) 7803 { 7804 mac_client_impl_t *group_only_mcip; 7805 mac_impl_t *mip = mcip->mci_mip; 7806 flow_entry_t *flent = mcip->mci_flent; 7807 mac_group_t *defgrp; 7808 mac_grp_client_t *mgcp; 7809 mac_client_impl_t *gmcip; 7810 flow_entry_t *gflent; 7811 7812 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7813 ASSERT(fgrp == flent->fe_tx_ring_group); 7814 7815 if (fgrp == defgrp) { 7816 /* 7817 * If this is the primary we need to find any VLANs on 7818 * the primary and move them too. 7819 */ 7820 mac_group_remove_client(fgrp, mcip); 7821 mac_tx_dismantle_soft_rings(fgrp, flent); 7822 if (mac_check_macaddr_shared(mcip->mci_unicast)) { 7823 mgcp = fgrp->mrg_clients; 7824 while (mgcp != NULL) { 7825 gmcip = mgcp->mgc_client; 7826 mgcp = mgcp->mgc_next; 7827 if (mcip->mci_unicast != gmcip->mci_unicast) 7828 continue; 7829 mac_tx_client_quiesce( 7830 (mac_client_handle_t)gmcip); 7831 7832 gflent = gmcip->mci_flent; 7833 mac_group_remove_client(fgrp, gmcip); 7834 mac_tx_dismantle_soft_rings(fgrp, gflent); 7835 7836 mac_group_add_client(tgrp, gmcip); 7837 gflent->fe_tx_ring_group = tgrp; 7838 /* We could directly set this to SHARED */ 7839 tgrp->mrg_state = mac_group_next_state(tgrp, 7840 &group_only_mcip, defgrp, B_FALSE); 7841 7842 mac_tx_srs_group_setup(gmcip, gflent, 7843 SRST_LINK); 7844 mac_fanout_setup(gmcip, gflent, 7845 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 7846 gmcip, NULL, NULL); 7847 7848 mac_tx_client_restart( 7849 (mac_client_handle_t)gmcip); 7850 } 7851 } 7852 if (MAC_GROUP_NO_CLIENT(fgrp)) { 7853 mac_ring_t *ring; 7854 int cnt; 7855 int ringcnt; 7856 7857 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 7858 /* 7859 * Additionally, we also need to stop all 7860 * the rings in the default group, except 7861 * the default ring. The reason being 7862 * this group won't be released since it is 7863 * the default group, so the rings won't 7864 * be stopped otherwise. 7865 */ 7866 ringcnt = fgrp->mrg_cur_count; 7867 ring = fgrp->mrg_rings; 7868 for (cnt = 0; cnt < ringcnt; cnt++) { 7869 if (ring->mr_state == MR_INUSE && 7870 ring != 7871 (mac_ring_t *)mip->mi_default_tx_ring) { 7872 mac_stop_ring(ring); 7873 ring->mr_flag = 0; 7874 } 7875 ring = ring->mr_next; 7876 } 7877 } else if (MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 7878 fgrp->mrg_state = MAC_GROUP_STATE_RESERVED; 7879 } else { 7880 ASSERT(fgrp->mrg_state == MAC_GROUP_STATE_SHARED); 7881 } 7882 } else { 7883 /* 7884 * We could have VLANs sharing the non-default group with 7885 * the primary. 7886 */ 7887 mgcp = fgrp->mrg_clients; 7888 while (mgcp != NULL) { 7889 gmcip = mgcp->mgc_client; 7890 mgcp = mgcp->mgc_next; 7891 if (gmcip == mcip) 7892 continue; 7893 mac_tx_client_quiesce((mac_client_handle_t)gmcip); 7894 gflent = gmcip->mci_flent; 7895 7896 mac_group_remove_client(fgrp, gmcip); 7897 mac_tx_dismantle_soft_rings(fgrp, gflent); 7898 7899 mac_group_add_client(tgrp, gmcip); 7900 gflent->fe_tx_ring_group = tgrp; 7901 /* We could directly set this to SHARED */ 7902 tgrp->mrg_state = mac_group_next_state(tgrp, 7903 &group_only_mcip, defgrp, B_FALSE); 7904 mac_tx_srs_group_setup(gmcip, gflent, SRST_LINK); 7905 mac_fanout_setup(gmcip, gflent, 7906 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 7907 gmcip, NULL, NULL); 7908 7909 mac_tx_client_restart((mac_client_handle_t)gmcip); 7910 } 7911 mac_group_remove_client(fgrp, mcip); 7912 mac_release_tx_group(mcip, fgrp); 7913 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 7914 } 7915 7916 /* Add it to the tgroup */ 7917 mac_group_add_client(tgrp, mcip); 7918 flent->fe_tx_ring_group = tgrp; 7919 tgrp->mrg_state = mac_group_next_state(tgrp, &group_only_mcip, 7920 defgrp, B_FALSE); 7921 7922 mac_tx_srs_group_setup(mcip, flent, SRST_LINK); 7923 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 7924 mac_rx_deliver, mcip, NULL, NULL); 7925 } 7926 7927 /* 7928 * This is a 1-time control path activity initiated by the client (IP). 7929 * The mac perimeter protects against other simultaneous control activities, 7930 * for example an ioctl that attempts to change the degree of fanout and 7931 * increase or decrease the number of softrings associated with this Tx SRS. 7932 */ 7933 static mac_tx_notify_cb_t * 7934 mac_client_tx_notify_add(mac_client_impl_t *mcip, 7935 mac_tx_notify_t notify, void *arg) 7936 { 7937 mac_cb_info_t *mcbi; 7938 mac_tx_notify_cb_t *mtnfp; 7939 7940 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7941 7942 mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP); 7943 mtnfp->mtnf_fn = notify; 7944 mtnfp->mtnf_arg = arg; 7945 mtnfp->mtnf_link.mcb_objp = mtnfp; 7946 mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t); 7947 mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T; 7948 7949 mcbi = &mcip->mci_tx_notify_cb_info; 7950 mutex_enter(mcbi->mcbi_lockp); 7951 mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link); 7952 mutex_exit(mcbi->mcbi_lockp); 7953 return (mtnfp); 7954 } 7955 7956 static void 7957 mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp) 7958 { 7959 mac_cb_info_t *mcbi; 7960 mac_cb_t **cblist; 7961 7962 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7963 7964 if (!mac_callback_find(&mcip->mci_tx_notify_cb_info, 7965 &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) { 7966 cmn_err(CE_WARN, 7967 "mac_client_tx_notify_remove: callback not " 7968 "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp); 7969 return; 7970 } 7971 7972 mcbi = &mcip->mci_tx_notify_cb_info; 7973 cblist = &mcip->mci_tx_notify_cb_list; 7974 mutex_enter(mcbi->mcbi_lockp); 7975 if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link)) 7976 kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t)); 7977 else 7978 mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info); 7979 mutex_exit(mcbi->mcbi_lockp); 7980 } 7981 7982 /* 7983 * mac_client_tx_notify(): 7984 * call to add and remove flow control callback routine. 7985 */ 7986 mac_tx_notify_handle_t 7987 mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func, 7988 void *ptr) 7989 { 7990 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 7991 mac_tx_notify_cb_t *mtnfp = NULL; 7992 7993 i_mac_perim_enter(mcip->mci_mip); 7994 7995 if (callb_func != NULL) { 7996 /* Add a notify callback */ 7997 mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr); 7998 } else { 7999 mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr); 8000 } 8001 i_mac_perim_exit(mcip->mci_mip); 8002 8003 return ((mac_tx_notify_handle_t)mtnfp); 8004 } 8005 8006 void 8007 mac_bridge_vectors(mac_bridge_tx_t txf, mac_bridge_rx_t rxf, 8008 mac_bridge_ref_t reff, mac_bridge_ls_t lsf) 8009 { 8010 mac_bridge_tx_cb = txf; 8011 mac_bridge_rx_cb = rxf; 8012 mac_bridge_ref_cb = reff; 8013 mac_bridge_ls_cb = lsf; 8014 } 8015 8016 int 8017 mac_bridge_set(mac_handle_t mh, mac_handle_t link) 8018 { 8019 mac_impl_t *mip = (mac_impl_t *)mh; 8020 int retv; 8021 8022 mutex_enter(&mip->mi_bridge_lock); 8023 if (mip->mi_bridge_link == NULL) { 8024 mip->mi_bridge_link = link; 8025 retv = 0; 8026 } else { 8027 retv = EBUSY; 8028 } 8029 mutex_exit(&mip->mi_bridge_lock); 8030 if (retv == 0) { 8031 mac_poll_state_change(mh, B_FALSE); 8032 mac_capab_update(mh); 8033 } 8034 return (retv); 8035 } 8036 8037 /* 8038 * Disable bridging on the indicated link. 8039 */ 8040 void 8041 mac_bridge_clear(mac_handle_t mh, mac_handle_t link) 8042 { 8043 mac_impl_t *mip = (mac_impl_t *)mh; 8044 8045 mutex_enter(&mip->mi_bridge_lock); 8046 ASSERT(mip->mi_bridge_link == link); 8047 mip->mi_bridge_link = NULL; 8048 mutex_exit(&mip->mi_bridge_lock); 8049 mac_poll_state_change(mh, B_TRUE); 8050 mac_capab_update(mh); 8051 } 8052 8053 void 8054 mac_no_active(mac_handle_t mh) 8055 { 8056 mac_impl_t *mip = (mac_impl_t *)mh; 8057 8058 i_mac_perim_enter(mip); 8059 mip->mi_state_flags |= MIS_NO_ACTIVE; 8060 i_mac_perim_exit(mip); 8061 } 8062 8063 /* 8064 * Walk the primary VLAN clients whenever the primary's rings property 8065 * changes and update the mac_resource_props_t for the VLAN's client. 8066 * We need to do this since we don't support setting these properties 8067 * on the primary's VLAN clients, but the VLAN clients have to 8068 * follow the primary w.r.t the rings property. 8069 */ 8070 void 8071 mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) 8072 { 8073 mac_client_impl_t *vmcip; 8074 mac_resource_props_t *vmrp; 8075 8076 for (vmcip = mip->mi_clients_list; vmcip != NULL; 8077 vmcip = vmcip->mci_client_next) { 8078 if (!(vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) || 8079 mac_client_vid((mac_client_handle_t)vmcip) == 8080 VLAN_ID_NONE) { 8081 continue; 8082 } 8083 vmrp = MCIP_RESOURCE_PROPS(vmcip); 8084 8085 vmrp->mrp_nrxrings = mrp->mrp_nrxrings; 8086 if (mrp->mrp_mask & MRP_RX_RINGS) 8087 vmrp->mrp_mask |= MRP_RX_RINGS; 8088 else if (vmrp->mrp_mask & MRP_RX_RINGS) 8089 vmrp->mrp_mask &= ~MRP_RX_RINGS; 8090 8091 vmrp->mrp_ntxrings = mrp->mrp_ntxrings; 8092 if (mrp->mrp_mask & MRP_TX_RINGS) 8093 vmrp->mrp_mask |= MRP_TX_RINGS; 8094 else if (vmrp->mrp_mask & MRP_TX_RINGS) 8095 vmrp->mrp_mask &= ~MRP_TX_RINGS; 8096 8097 if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) 8098 vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; 8099 else 8100 vmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; 8101 8102 if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) 8103 vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; 8104 else 8105 vmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; 8106 } 8107 } 8108 8109 /* 8110 * We are adding or removing ring(s) from a group. The source for taking 8111 * rings is the default group. The destination for giving rings back is 8112 * the default group. 8113 */ 8114 int 8115 mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, 8116 mac_group_t *defgrp) 8117 { 8118 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 8119 uint_t modify; 8120 int count; 8121 mac_ring_t *ring; 8122 mac_ring_t *next; 8123 mac_impl_t *mip = mcip->mci_mip; 8124 mac_ring_t **rings; 8125 uint_t ringcnt; 8126 int i = 0; 8127 boolean_t rx_group = group->mrg_type == MAC_RING_TYPE_RX; 8128 int start; 8129 int end; 8130 mac_group_t *tgrp; 8131 int j; 8132 int rv = 0; 8133 8134 /* 8135 * If we are asked for just a group, we give 1 ring, else 8136 * the specified number of rings. 8137 */ 8138 if (rx_group) { 8139 ringcnt = (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) ? 1: 8140 mrp->mrp_nrxrings; 8141 } else { 8142 ringcnt = (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) ? 1: 8143 mrp->mrp_ntxrings; 8144 } 8145 8146 /* don't allow modifying rings for a share for now. */ 8147 ASSERT(mcip->mci_share == 0); 8148 8149 if (ringcnt == group->mrg_cur_count) 8150 return (0); 8151 8152 if (group->mrg_cur_count > ringcnt) { 8153 modify = group->mrg_cur_count - ringcnt; 8154 if (rx_group) { 8155 if (mip->mi_rx_donor_grp == group) { 8156 ASSERT(mac_is_primary_client(mcip)); 8157 mip->mi_rx_donor_grp = defgrp; 8158 } else { 8159 defgrp = mip->mi_rx_donor_grp; 8160 } 8161 } 8162 ring = group->mrg_rings; 8163 rings = kmem_alloc(modify * sizeof (mac_ring_handle_t), 8164 KM_SLEEP); 8165 j = 0; 8166 for (count = 0; count < modify; count++) { 8167 next = ring->mr_next; 8168 rv = mac_group_mov_ring(mip, defgrp, ring); 8169 if (rv != 0) { 8170 /* cleanup on failure */ 8171 for (j = 0; j < count; j++) { 8172 (void) mac_group_mov_ring(mip, group, 8173 rings[j]); 8174 } 8175 break; 8176 } 8177 rings[j++] = ring; 8178 ring = next; 8179 } 8180 kmem_free(rings, modify * sizeof (mac_ring_handle_t)); 8181 return (rv); 8182 } 8183 if (ringcnt >= MAX_RINGS_PER_GROUP) 8184 return (EINVAL); 8185 8186 modify = ringcnt - group->mrg_cur_count; 8187 8188 if (rx_group) { 8189 if (group != mip->mi_rx_donor_grp) 8190 defgrp = mip->mi_rx_donor_grp; 8191 else 8192 /* 8193 * This is the donor group with all the remaining 8194 * rings. Default group now gets to be the donor 8195 */ 8196 mip->mi_rx_donor_grp = defgrp; 8197 start = 1; 8198 end = mip->mi_rx_group_count; 8199 } else { 8200 start = 0; 8201 end = mip->mi_tx_group_count - 1; 8202 } 8203 /* 8204 * If the default doesn't have any rings, lets see if we can 8205 * take rings given to an h/w client that doesn't need it. 8206 * For now, we just see if there is any one client that can donate 8207 * all the required rings. 8208 */ 8209 if (defgrp->mrg_cur_count < (modify + 1)) { 8210 for (i = start; i < end; i++) { 8211 if (rx_group) { 8212 tgrp = &mip->mi_rx_groups[i]; 8213 if (tgrp == group || tgrp->mrg_state < 8214 MAC_GROUP_STATE_RESERVED) { 8215 continue; 8216 } 8217 if (i_mac_clients_hw(tgrp, MRP_RX_RINGS)) 8218 continue; 8219 mcip = tgrp->mrg_clients->mgc_client; 8220 VERIFY3P(mcip, !=, NULL); 8221 if ((tgrp->mrg_cur_count + 8222 defgrp->mrg_cur_count) < (modify + 1)) { 8223 continue; 8224 } 8225 if (mac_rx_switch_group(mcip, tgrp, 8226 defgrp) != 0) { 8227 return (ENOSPC); 8228 } 8229 } else { 8230 tgrp = &mip->mi_tx_groups[i]; 8231 if (tgrp == group || tgrp->mrg_state < 8232 MAC_GROUP_STATE_RESERVED) { 8233 continue; 8234 } 8235 if (i_mac_clients_hw(tgrp, MRP_TX_RINGS)) 8236 continue; 8237 mcip = tgrp->mrg_clients->mgc_client; 8238 VERIFY3P(mcip, !=, NULL); 8239 if ((tgrp->mrg_cur_count + 8240 defgrp->mrg_cur_count) < (modify + 1)) { 8241 continue; 8242 } 8243 /* OK, we can switch this to s/w */ 8244 mac_tx_client_quiesce( 8245 (mac_client_handle_t)mcip); 8246 mac_tx_switch_group(mcip, tgrp, defgrp); 8247 mac_tx_client_restart( 8248 (mac_client_handle_t)mcip); 8249 } 8250 } 8251 if (defgrp->mrg_cur_count < (modify + 1)) 8252 return (ENOSPC); 8253 } 8254 if ((rv = i_mac_group_allocate_rings(mip, group->mrg_type, defgrp, 8255 group, mcip->mci_share, modify)) != 0) { 8256 return (rv); 8257 } 8258 return (0); 8259 } 8260 8261 /* 8262 * Given the poolname in mac_resource_props, find the cpupart 8263 * that is associated with this pool. The cpupart will be used 8264 * later for finding the cpus to be bound to the networking threads. 8265 * 8266 * use_default is set B_TRUE if pools are enabled and pool_default 8267 * is returned. This avoids a 2nd lookup to set the poolname 8268 * for pool-effective. 8269 * 8270 * returns: 8271 * 8272 * NULL - pools are disabled or if the 'cpus' property is set. 8273 * cpupart of pool_default - pools are enabled and the pool 8274 * is not available or poolname is blank 8275 * cpupart of named pool - pools are enabled and the pool 8276 * is available. 8277 */ 8278 cpupart_t * 8279 mac_pset_find(mac_resource_props_t *mrp, boolean_t *use_default) 8280 { 8281 pool_t *pool; 8282 cpupart_t *cpupart; 8283 8284 *use_default = B_FALSE; 8285 8286 /* CPUs property is set */ 8287 if (mrp->mrp_mask & MRP_CPUS) 8288 return (NULL); 8289 8290 ASSERT(pool_lock_held()); 8291 8292 /* Pools are disabled, no pset */ 8293 if (pool_state == POOL_DISABLED) 8294 return (NULL); 8295 8296 /* Pools property is set */ 8297 if (mrp->mrp_mask & MRP_POOL) { 8298 if ((pool = pool_lookup_pool_by_name(mrp->mrp_pool)) == NULL) { 8299 /* Pool not found */ 8300 DTRACE_PROBE1(mac_pset_find_no_pool, char *, 8301 mrp->mrp_pool); 8302 *use_default = B_TRUE; 8303 pool = pool_default; 8304 } 8305 /* Pools property is not set */ 8306 } else { 8307 *use_default = B_TRUE; 8308 pool = pool_default; 8309 } 8310 8311 /* Find the CPU pset that corresponds to the pool */ 8312 mutex_enter(&cpu_lock); 8313 if ((cpupart = cpupart_find(pool->pool_pset->pset_id)) == NULL) { 8314 DTRACE_PROBE1(mac_find_pset_no_pset, psetid_t, 8315 pool->pool_pset->pset_id); 8316 } 8317 mutex_exit(&cpu_lock); 8318 8319 return (cpupart); 8320 } 8321 8322 void 8323 mac_set_pool_effective(boolean_t use_default, cpupart_t *cpupart, 8324 mac_resource_props_t *mrp, mac_resource_props_t *emrp) 8325 { 8326 ASSERT(pool_lock_held()); 8327 8328 if (cpupart != NULL) { 8329 emrp->mrp_mask |= MRP_POOL; 8330 if (use_default) { 8331 (void) strcpy(emrp->mrp_pool, 8332 "pool_default"); 8333 } else { 8334 ASSERT(strlen(mrp->mrp_pool) != 0); 8335 (void) strcpy(emrp->mrp_pool, 8336 mrp->mrp_pool); 8337 } 8338 } else { 8339 emrp->mrp_mask &= ~MRP_POOL; 8340 bzero(emrp->mrp_pool, MAXPATHLEN); 8341 } 8342 } 8343 8344 struct mac_pool_arg { 8345 char mpa_poolname[MAXPATHLEN]; 8346 pool_event_t mpa_what; 8347 }; 8348 8349 /*ARGSUSED*/ 8350 static uint_t 8351 mac_pool_link_update(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 8352 { 8353 struct mac_pool_arg *mpa = arg; 8354 mac_impl_t *mip = (mac_impl_t *)val; 8355 mac_client_impl_t *mcip; 8356 mac_resource_props_t *mrp, *emrp; 8357 boolean_t pool_update = B_FALSE; 8358 boolean_t pool_clear = B_FALSE; 8359 boolean_t use_default = B_FALSE; 8360 cpupart_t *cpupart = NULL; 8361 8362 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 8363 i_mac_perim_enter(mip); 8364 for (mcip = mip->mi_clients_list; mcip != NULL; 8365 mcip = mcip->mci_client_next) { 8366 pool_update = B_FALSE; 8367 pool_clear = B_FALSE; 8368 use_default = B_FALSE; 8369 mac_client_get_resources((mac_client_handle_t)mcip, mrp); 8370 emrp = MCIP_EFFECTIVE_PROPS(mcip); 8371 8372 /* 8373 * When pools are enabled 8374 */ 8375 if ((mpa->mpa_what == POOL_E_ENABLE) && 8376 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 8377 mrp->mrp_mask |= MRP_POOL; 8378 pool_update = B_TRUE; 8379 } 8380 8381 /* 8382 * When pools are disabled 8383 */ 8384 if ((mpa->mpa_what == POOL_E_DISABLE) && 8385 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 8386 mrp->mrp_mask |= MRP_POOL; 8387 pool_clear = B_TRUE; 8388 } 8389 8390 /* 8391 * Look for links with the pool property set and the poolname 8392 * matching the one which is changing. 8393 */ 8394 if (strcmp(mrp->mrp_pool, mpa->mpa_poolname) == 0) { 8395 /* 8396 * The pool associated with the link has changed. 8397 */ 8398 if (mpa->mpa_what == POOL_E_CHANGE) { 8399 mrp->mrp_mask |= MRP_POOL; 8400 pool_update = B_TRUE; 8401 } 8402 } 8403 8404 /* 8405 * This link is associated with pool_default and 8406 * pool_default has changed. 8407 */ 8408 if ((mpa->mpa_what == POOL_E_CHANGE) && 8409 (strcmp(emrp->mrp_pool, "pool_default") == 0) && 8410 (strcmp(mpa->mpa_poolname, "pool_default") == 0)) { 8411 mrp->mrp_mask |= MRP_POOL; 8412 pool_update = B_TRUE; 8413 } 8414 8415 /* 8416 * Get new list of cpus for the pool, bind network 8417 * threads to new list of cpus and update resources. 8418 */ 8419 if (pool_update) { 8420 if (MCIP_DATAPATH_SETUP(mcip)) { 8421 pool_lock(); 8422 cpupart = mac_pset_find(mrp, &use_default); 8423 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 8424 mac_rx_deliver, mcip, NULL, cpupart); 8425 mac_set_pool_effective(use_default, cpupart, 8426 mrp, emrp); 8427 pool_unlock(); 8428 } 8429 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 8430 B_FALSE); 8431 } 8432 8433 /* 8434 * Clear the effective pool and bind network threads 8435 * to any available CPU. 8436 */ 8437 if (pool_clear) { 8438 if (MCIP_DATAPATH_SETUP(mcip)) { 8439 emrp->mrp_mask &= ~MRP_POOL; 8440 bzero(emrp->mrp_pool, MAXPATHLEN); 8441 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 8442 mac_rx_deliver, mcip, NULL, NULL); 8443 } 8444 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 8445 B_FALSE); 8446 } 8447 } 8448 i_mac_perim_exit(mip); 8449 kmem_free(mrp, sizeof (*mrp)); 8450 return (MH_WALK_CONTINUE); 8451 } 8452 8453 static void 8454 mac_pool_update(void *arg) 8455 { 8456 mod_hash_walk(i_mac_impl_hash, mac_pool_link_update, arg); 8457 kmem_free(arg, sizeof (struct mac_pool_arg)); 8458 } 8459 8460 /* 8461 * Callback function to be executed when a noteworthy pool event 8462 * takes place. 8463 */ 8464 /* ARGSUSED */ 8465 static void 8466 mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) 8467 { 8468 pool_t *pool; 8469 char *poolname = NULL; 8470 struct mac_pool_arg *mpa; 8471 8472 pool_lock(); 8473 mpa = kmem_zalloc(sizeof (struct mac_pool_arg), KM_SLEEP); 8474 8475 switch (what) { 8476 case POOL_E_ENABLE: 8477 case POOL_E_DISABLE: 8478 break; 8479 8480 case POOL_E_CHANGE: 8481 pool = pool_lookup_pool_by_id(id); 8482 if (pool == NULL) { 8483 kmem_free(mpa, sizeof (struct mac_pool_arg)); 8484 pool_unlock(); 8485 return; 8486 } 8487 pool_get_name(pool, &poolname); 8488 (void) strlcpy(mpa->mpa_poolname, poolname, 8489 sizeof (mpa->mpa_poolname)); 8490 break; 8491 8492 default: 8493 kmem_free(mpa, sizeof (struct mac_pool_arg)); 8494 pool_unlock(); 8495 return; 8496 } 8497 pool_unlock(); 8498 8499 mpa->mpa_what = what; 8500 8501 mac_pool_update(mpa); 8502 } 8503 8504 /* 8505 * Set effective rings property. This could be called from datapath_setup/ 8506 * datapath_teardown or set-linkprop. 8507 * If the group is reserved we just go ahead and set the effective rings. 8508 * Additionally, for TX this could mean the default group has lost/gained 8509 * some rings, so if the default group is reserved, we need to adjust the 8510 * effective rings for the default group clients. For RX, if we are working 8511 * with the non-default group, we just need to reset the effective props 8512 * for the default group clients. 8513 */ 8514 void 8515 mac_set_rings_effective(mac_client_impl_t *mcip) 8516 { 8517 mac_impl_t *mip = mcip->mci_mip; 8518 mac_group_t *grp; 8519 mac_group_t *defgrp; 8520 flow_entry_t *flent = mcip->mci_flent; 8521 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); 8522 mac_grp_client_t *mgcp; 8523 mac_client_impl_t *gmcip; 8524 8525 grp = flent->fe_rx_ring_group; 8526 if (grp != NULL) { 8527 defgrp = MAC_DEFAULT_RX_GROUP(mip); 8528 /* 8529 * If we have reserved a group, set the effective rings 8530 * to the ring count in the group. 8531 */ 8532 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 8533 emrp->mrp_mask |= MRP_RX_RINGS; 8534 emrp->mrp_nrxrings = grp->mrg_cur_count; 8535 } 8536 8537 /* 8538 * We go through the clients in the shared group and 8539 * reset the effective properties. It is possible this 8540 * might have already been done for some client (i.e. 8541 * if some client is being moved to a group that is 8542 * already shared). The case where the default group is 8543 * RESERVED is taken care of above (note in the RX side if 8544 * there is a non-default group, the default group is always 8545 * SHARED). 8546 */ 8547 if (grp != defgrp || grp->mrg_state == MAC_GROUP_STATE_SHARED) { 8548 if (grp->mrg_state == MAC_GROUP_STATE_SHARED) 8549 mgcp = grp->mrg_clients; 8550 else 8551 mgcp = defgrp->mrg_clients; 8552 while (mgcp != NULL) { 8553 gmcip = mgcp->mgc_client; 8554 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 8555 if (emrp->mrp_mask & MRP_RX_RINGS) { 8556 emrp->mrp_mask &= ~MRP_RX_RINGS; 8557 emrp->mrp_nrxrings = 0; 8558 } 8559 mgcp = mgcp->mgc_next; 8560 } 8561 } 8562 } 8563 8564 /* Now the TX side */ 8565 grp = flent->fe_tx_ring_group; 8566 if (grp != NULL) { 8567 defgrp = MAC_DEFAULT_TX_GROUP(mip); 8568 8569 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 8570 emrp->mrp_mask |= MRP_TX_RINGS; 8571 emrp->mrp_ntxrings = grp->mrg_cur_count; 8572 } else if (grp->mrg_state == MAC_GROUP_STATE_SHARED) { 8573 mgcp = grp->mrg_clients; 8574 while (mgcp != NULL) { 8575 gmcip = mgcp->mgc_client; 8576 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 8577 if (emrp->mrp_mask & MRP_TX_RINGS) { 8578 emrp->mrp_mask &= ~MRP_TX_RINGS; 8579 emrp->mrp_ntxrings = 0; 8580 } 8581 mgcp = mgcp->mgc_next; 8582 } 8583 } 8584 8585 /* 8586 * If the group is not the default group and the default 8587 * group is reserved, the ring count in the default group 8588 * might have changed, update it. 8589 */ 8590 if (grp != defgrp && 8591 defgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 8592 gmcip = MAC_GROUP_ONLY_CLIENT(defgrp); 8593 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 8594 emrp->mrp_ntxrings = defgrp->mrg_cur_count; 8595 } 8596 } 8597 emrp = MCIP_EFFECTIVE_PROPS(mcip); 8598 } 8599 8600 /* 8601 * Check if the primary is in the default group. If so, see if we 8602 * can give it a an exclusive group now that another client is 8603 * being configured. We take the primary out of the default group 8604 * because the multicast/broadcast packets for the all the clients 8605 * will land in the default ring in the default group which means 8606 * any client in the default group, even if it is the only on in 8607 * the group, will lose exclusive access to the rings, hence 8608 * polling. 8609 */ 8610 mac_client_impl_t * 8611 mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) 8612 { 8613 mac_impl_t *mip = mcip->mci_mip; 8614 mac_group_t *defgrp = MAC_DEFAULT_RX_GROUP(mip); 8615 flow_entry_t *flent = mcip->mci_flent; 8616 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 8617 uint8_t *mac_addr; 8618 mac_group_t *ngrp; 8619 8620 /* 8621 * Check if the primary is in the default group, if not 8622 * or if it is explicitly configured to be in the default 8623 * group OR set the RX rings property, return. 8624 */ 8625 if (flent->fe_rx_ring_group != defgrp || mrp->mrp_mask & MRP_RX_RINGS) 8626 return (NULL); 8627 8628 /* 8629 * If the new client needs an exclusive group and we 8630 * don't have another for the primary, return. 8631 */ 8632 if (rxhw && mip->mi_rxhwclnt_avail < 2) 8633 return (NULL); 8634 8635 mac_addr = flent->fe_flow_desc.fd_dst_mac; 8636 /* 8637 * We call this when we are setting up the datapath for 8638 * the first non-primary. 8639 */ 8640 ASSERT(mip->mi_nactiveclients == 2); 8641 8642 /* 8643 * OK, now we have the primary that needs to be relocated. 8644 */ 8645 ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); 8646 if (ngrp == NULL) 8647 return (NULL); 8648 if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { 8649 mac_stop_group(ngrp); 8650 return (NULL); 8651 } 8652 return (mcip); 8653 } 8654 8655 void 8656 mac_transceiver_init(mac_impl_t *mip) 8657 { 8658 if (mac_capab_get((mac_handle_t)mip, MAC_CAPAB_TRANSCEIVER, 8659 &mip->mi_transceiver)) { 8660 /* 8661 * The driver set a flag that we don't know about. In this case, 8662 * we need to warn about that case and ignore this capability. 8663 */ 8664 if (mip->mi_transceiver.mct_flags != 0) { 8665 dev_err(mip->mi_dip, CE_WARN, "driver set transceiver " 8666 "flags to invalid value: 0x%x, ignoring " 8667 "capability", mip->mi_transceiver.mct_flags); 8668 bzero(&mip->mi_transceiver, 8669 sizeof (mac_capab_transceiver_t)); 8670 } 8671 } else { 8672 bzero(&mip->mi_transceiver, 8673 sizeof (mac_capab_transceiver_t)); 8674 } 8675 } 8676 8677 int 8678 mac_transceiver_count(mac_handle_t mh, uint_t *countp) 8679 { 8680 mac_impl_t *mip = (mac_impl_t *)mh; 8681 8682 ASSERT(MAC_PERIM_HELD(mh)); 8683 8684 if (mip->mi_transceiver.mct_ntransceivers == 0) 8685 return (ENOTSUP); 8686 8687 *countp = mip->mi_transceiver.mct_ntransceivers; 8688 return (0); 8689 } 8690 8691 int 8692 mac_transceiver_info(mac_handle_t mh, uint_t tranid, boolean_t *present, 8693 boolean_t *usable) 8694 { 8695 int ret; 8696 mac_transceiver_info_t info; 8697 8698 mac_impl_t *mip = (mac_impl_t *)mh; 8699 8700 ASSERT(MAC_PERIM_HELD(mh)); 8701 8702 if (mip->mi_transceiver.mct_info == NULL || 8703 mip->mi_transceiver.mct_ntransceivers == 0) 8704 return (ENOTSUP); 8705 8706 if (tranid >= mip->mi_transceiver.mct_ntransceivers) 8707 return (EINVAL); 8708 8709 bzero(&info, sizeof (mac_transceiver_info_t)); 8710 if ((ret = mip->mi_transceiver.mct_info(mip->mi_driver, tranid, 8711 &info)) != 0) { 8712 return (ret); 8713 } 8714 8715 *present = info.mti_present; 8716 *usable = info.mti_usable; 8717 return (0); 8718 } 8719 8720 int 8721 mac_transceiver_read(mac_handle_t mh, uint_t tranid, uint_t page, void *buf, 8722 size_t nbytes, off_t offset, size_t *nread) 8723 { 8724 int ret; 8725 size_t nr; 8726 mac_impl_t *mip = (mac_impl_t *)mh; 8727 8728 ASSERT(MAC_PERIM_HELD(mh)); 8729 8730 if (mip->mi_transceiver.mct_read == NULL) 8731 return (ENOTSUP); 8732 8733 if (tranid >= mip->mi_transceiver.mct_ntransceivers) 8734 return (EINVAL); 8735 8736 /* 8737 * All supported pages today are 256 bytes wide. Make sure offset + 8738 * nbytes never exceeds that. 8739 */ 8740 if (offset < 0 || offset >= 256 || nbytes > 256 || 8741 offset + nbytes > 256) 8742 return (EINVAL); 8743 8744 if (nread == NULL) 8745 nread = &nr; 8746 ret = mip->mi_transceiver.mct_read(mip->mi_driver, tranid, page, buf, 8747 nbytes, offset, nread); 8748 if (ret == 0 && *nread > nbytes) { 8749 dev_err(mip->mi_dip, CE_PANIC, "driver wrote %lu bytes into " 8750 "%lu byte sized buffer, possible memory corruption", 8751 *nread, nbytes); 8752 } 8753 8754 return (ret); 8755 } 8756 8757 void 8758 mac_led_init(mac_impl_t *mip) 8759 { 8760 mip->mi_led_modes = MAC_LED_DEFAULT; 8761 8762 if (!mac_capab_get((mac_handle_t)mip, MAC_CAPAB_LED, &mip->mi_led)) { 8763 bzero(&mip->mi_led, sizeof (mac_capab_led_t)); 8764 return; 8765 } 8766 8767 if (mip->mi_led.mcl_flags != 0) { 8768 dev_err(mip->mi_dip, CE_WARN, "driver set led capability " 8769 "flags to invalid value: 0x%x, ignoring " 8770 "capability", mip->mi_transceiver.mct_flags); 8771 bzero(&mip->mi_led, sizeof (mac_capab_led_t)); 8772 return; 8773 } 8774 8775 if ((mip->mi_led.mcl_modes & ~MAC_LED_ALL) != 0) { 8776 dev_err(mip->mi_dip, CE_WARN, "driver set led capability " 8777 "supported modes to invalid value: 0x%x, ignoring " 8778 "capability", mip->mi_transceiver.mct_flags); 8779 bzero(&mip->mi_led, sizeof (mac_capab_led_t)); 8780 return; 8781 } 8782 } 8783 8784 int 8785 mac_led_get(mac_handle_t mh, mac_led_mode_t *supported, mac_led_mode_t *active) 8786 { 8787 mac_impl_t *mip = (mac_impl_t *)mh; 8788 8789 ASSERT(MAC_PERIM_HELD(mh)); 8790 8791 if (mip->mi_led.mcl_set == NULL) 8792 return (ENOTSUP); 8793 8794 *supported = mip->mi_led.mcl_modes; 8795 *active = mip->mi_led_modes; 8796 8797 return (0); 8798 } 8799 8800 /* 8801 * Update and multiplex the various LED requests. We only ever send one LED to 8802 * the underlying driver at a time. As such, we end up multiplexing all 8803 * requested states and picking one to send down to the driver. 8804 */ 8805 int 8806 mac_led_set(mac_handle_t mh, mac_led_mode_t desired) 8807 { 8808 int ret; 8809 mac_led_mode_t driver; 8810 8811 mac_impl_t *mip = (mac_impl_t *)mh; 8812 8813 ASSERT(MAC_PERIM_HELD(mh)); 8814 8815 /* 8816 * If we've been passed a desired value of zero, that indicates that 8817 * we're basically resetting to the value of zero, which is our default 8818 * value. 8819 */ 8820 if (desired == 0) 8821 desired = MAC_LED_DEFAULT; 8822 8823 if (mip->mi_led.mcl_set == NULL) 8824 return (ENOTSUP); 8825 8826 /* 8827 * Catch both values that we don't know about and those that the driver 8828 * doesn't support. 8829 */ 8830 if ((desired & ~MAC_LED_ALL) != 0) 8831 return (EINVAL); 8832 8833 if ((desired & ~mip->mi_led.mcl_modes) != 0) 8834 return (ENOTSUP); 8835 8836 /* 8837 * If we have the same value, then there is nothing to do. 8838 */ 8839 if (desired == mip->mi_led_modes) 8840 return (0); 8841 8842 /* 8843 * Based on the desired value, determine what to send to the driver. We 8844 * only will send a single bit to the driver at any given time. IDENT 8845 * takes priority over OFF or ON. We also let OFF take priority over the 8846 * rest. 8847 */ 8848 if (desired & MAC_LED_IDENT) { 8849 driver = MAC_LED_IDENT; 8850 } else if (desired & MAC_LED_OFF) { 8851 driver = MAC_LED_OFF; 8852 } else if (desired & MAC_LED_ON) { 8853 driver = MAC_LED_ON; 8854 } else { 8855 driver = MAC_LED_DEFAULT; 8856 } 8857 8858 if ((ret = mip->mi_led.mcl_set(mip->mi_driver, driver, 0)) == 0) { 8859 mip->mi_led_modes = desired; 8860 } 8861 8862 return (ret); 8863 } 8864 8865 /* 8866 * Send packets through the Tx ring ('mrh') or through the default 8867 * handler if no ring is specified. Before passing the packet down to 8868 * the MAC provider, emulate any hardware offloads which have been 8869 * requested but are not supported by the provider. 8870 */ 8871 mblk_t * 8872 mac_ring_tx(mac_handle_t mh, mac_ring_handle_t mrh, mblk_t *mp) 8873 { 8874 mac_impl_t *mip = (mac_impl_t *)mh; 8875 8876 if (mrh == NULL) 8877 mrh = mip->mi_default_tx_ring; 8878 8879 if (mrh == NULL) 8880 return (mip->mi_tx(mip->mi_driver, mp)); 8881 else 8882 return (mac_hwring_tx(mrh, mp)); 8883 } 8884 8885 /* 8886 * This is the final stop before reaching the underlying MAC provider. 8887 * This is also where the bridging hook is inserted. Packets that are 8888 * bridged will return through mac_bridge_tx(), with rh nulled out if 8889 * the bridge chooses to send output on a different link due to 8890 * forwarding. 8891 */ 8892 mblk_t * 8893 mac_provider_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp, 8894 mac_client_impl_t *mcip) 8895 { 8896 /* 8897 * If there is a bound Hybrid I/O share, send packets through 8898 * the default tx ring. When there's a bound Hybrid I/O share, 8899 * the tx rings of this client are mapped in the guest domain 8900 * and not accessible from here. 8901 */ 8902 if (mcip->mci_state_flags & MCIS_SHARE_BOUND) 8903 rh = mip->mi_default_tx_ring; 8904 8905 if (mip->mi_promisc_list != NULL) 8906 mac_promisc_dispatch(mip, mp, mcip, B_FALSE); 8907 8908 if (mip->mi_bridge_link == NULL) 8909 return (mac_ring_tx((mac_handle_t)mip, rh, mp)); 8910 else 8911 return (mac_bridge_tx(mip, rh, mp)); 8912 } 8913