1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * MAC Services Module 28 * 29 * The GLDv3 framework locking - The MAC layer 30 * -------------------------------------------- 31 * 32 * The MAC layer is central to the GLD framework and can provide the locking 33 * framework needed for itself and for the use of MAC clients. MAC end points 34 * are fairly disjoint and don't share a lot of state. So a coarse grained 35 * multi-threading scheme is to single thread all create/modify/delete or set 36 * type of control operations on a per mac end point while allowing data threads 37 * concurrently. 38 * 39 * Control operations (set) that modify a mac end point are always serialized on 40 * a per mac end point basis, We have at most 1 such thread per mac end point 41 * at a time. 42 * 43 * All other operations that are not serialized are essentially multi-threaded. 44 * For example a control operation (get) like getting statistics which may not 45 * care about reading values atomically or data threads sending or receiving 46 * data. Mostly these type of operations don't modify the control state. Any 47 * state these operations care about are protected using traditional locks. 48 * 49 * The perimeter only serializes serial operations. It does not imply there 50 * aren't any other concurrent operations. However a serialized operation may 51 * sometimes need to make sure it is the only thread. In this case it needs 52 * to use reference counting mechanisms to cv_wait until any current data 53 * threads are done. 54 * 55 * The mac layer itself does not hold any locks across a call to another layer. 56 * The perimeter is however held across a down call to the driver to make the 57 * whole control operation atomic with respect to other control operations. 58 * Also the data path and get type control operations may proceed concurrently. 59 * These operations synchronize with the single serial operation on a given mac 60 * end point using regular locks. The perimeter ensures that conflicting 61 * operations like say a mac_multicast_add and a mac_multicast_remove on the 62 * same mac end point don't interfere with each other and also ensures that the 63 * changes in the mac layer and the call to the underlying driver to say add a 64 * multicast address are done atomically without interference from a thread 65 * trying to delete the same address. 66 * 67 * For example, consider 68 * mac_multicst_add() 69 * { 70 * mac_perimeter_enter(); serialize all control operations 71 * 72 * grab list lock protect against access by data threads 73 * add to list 74 * drop list lock 75 * 76 * call driver's mi_multicst 77 * 78 * mac_perimeter_exit(); 79 * } 80 * 81 * To lessen the number of serialization locks and simplify the lock hierarchy, 82 * we serialize all the control operations on a per mac end point by using a 83 * single serialization lock called the perimeter. We allow recursive entry into 84 * the perimeter to facilitate use of this mechanism by both the mac client and 85 * the MAC layer itself. 86 * 87 * MAC client means an entity that does an operation on a mac handle 88 * obtained from a mac_open/mac_client_open. Similarly MAC driver means 89 * an entity that does an operation on a mac handle obtained from a 90 * mac_register. An entity could be both client and driver but on different 91 * handles eg. aggr. and should only make the corresponding mac interface calls 92 * i.e. mac driver interface or mac client interface as appropriate for that 93 * mac handle. 94 * 95 * General rules. 96 * ------------- 97 * 98 * R1. The lock order of upcall threads is natually opposite to downcall 99 * threads. Hence upcalls must not hold any locks across layers for fear of 100 * recursive lock enter and lock order violation. This applies to all layers. 101 * 102 * R2. The perimeter is just another lock. Since it is held in the down 103 * direction, acquiring the perimeter in an upcall is prohibited as it would 104 * cause a deadlock. This applies to all layers. 105 * 106 * Note that upcalls that need to grab the mac perimeter (for example 107 * mac_notify upcalls) can still achieve that by posting the request to a 108 * thread, which can then grab all the required perimeters and locks in the 109 * right global order. Note that in the above example the mac layer iself 110 * won't grab the mac perimeter in the mac_notify upcall, instead the upcall 111 * to the client must do that. Please see the aggr code for an example. 112 * 113 * MAC client rules 114 * ---------------- 115 * 116 * R3. A MAC client may use the MAC provided perimeter facility to serialize 117 * control operations on a per mac end point. It does this by by acquring 118 * and holding the perimeter across a sequence of calls to the mac layer. 119 * This ensures atomicity across the entire block of mac calls. In this 120 * model the MAC client must not hold any client locks across the calls to 121 * the mac layer. This model is the preferred solution. 122 * 123 * R4. However if a MAC client has a lot of global state across all mac end 124 * points the per mac end point serialization may not be sufficient. In this 125 * case the client may choose to use global locks or use its own serialization. 126 * To avoid deadlocks, these client layer locks held across the mac calls 127 * in the control path must never be acquired by the data path for the reason 128 * mentioned below. 129 * 130 * (Assume that a control operation that holds a client lock blocks in the 131 * mac layer waiting for upcall reference counts to drop to zero. If an upcall 132 * data thread that holds this reference count, tries to acquire the same 133 * client lock subsequently it will deadlock). 134 * 135 * A MAC client may follow either the R3 model or the R4 model, but can't 136 * mix both. In the former, the hierarchy is Perim -> client locks, but in 137 * the latter it is client locks -> Perim. 138 * 139 * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able 140 * context since they may block while trying to acquire the perimeter. 141 * In addition some calls may block waiting for upcall refcnts to come down to 142 * zero. 143 * 144 * R6. MAC clients must make sure that they are single threaded and all threads 145 * from the top (in particular data threads) have finished before calling 146 * mac_client_close. The MAC framework does not track the number of client 147 * threads using the mac client handle. Also mac clients must make sure 148 * they have undone all the control operations before calling mac_client_close. 149 * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding 150 * mac_unicast_add/mac_multicast_add. 151 * 152 * MAC framework rules 153 * ------------------- 154 * 155 * R7. The mac layer itself must not hold any mac layer locks (except the mac 156 * perimeter) across a call to any other layer from the mac layer. The call to 157 * any other layer could be via mi_* entry points, classifier entry points into 158 * the driver or via upcall pointers into layers above. The mac perimeter may 159 * be acquired or held only in the down direction, for e.g. when calling into 160 * a mi_* driver enty point to provide atomicity of the operation. 161 * 162 * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across 163 * mac driver interfaces, the MAC layer must provide a cut out for control 164 * interfaces like upcall notifications and start them in a separate thread. 165 * 166 * R9. Note that locking order also implies a plumbing order. For example 167 * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt 168 * to plumb in any other order must be failed at mac_open time, otherwise it 169 * could lead to deadlocks due to inverse locking order. 170 * 171 * R10. MAC driver interfaces must not block since the driver could call them 172 * in interrupt context. 173 * 174 * R11. Walkers must preferably not hold any locks while calling walker 175 * callbacks. Instead these can operate on reference counts. In simple 176 * callbacks it may be ok to hold a lock and call the callbacks, but this is 177 * harder to maintain in the general case of arbitrary callbacks. 178 * 179 * R12. The MAC layer must protect upcall notification callbacks using reference 180 * counts rather than holding locks across the callbacks. 181 * 182 * R13. Given the variety of drivers, it is preferable if the MAC layer can make 183 * sure that any pointers (such as mac ring pointers) it passes to the driver 184 * remain valid until mac unregister time. Currently the mac layer achieves 185 * this by using generation numbers for rings and freeing the mac rings only 186 * at unregister time. The MAC layer must provide a layer of indirection and 187 * must not expose underlying driver rings or driver data structures/pointers 188 * directly to MAC clients. 189 * 190 * MAC driver rules 191 * ---------------- 192 * 193 * R14. It would be preferable if MAC drivers don't hold any locks across any 194 * mac call. However at a minimum they must not hold any locks across data 195 * upcalls. They must also make sure that all references to mac data structures 196 * are cleaned up and that it is single threaded at mac_unregister time. 197 * 198 * R15. MAC driver interfaces don't block and so the action may be done 199 * asynchronously in a separate thread as for example handling notifications. 200 * The driver must not assume that the action is complete when the call 201 * returns. 202 * 203 * R16. Drivers must maintain a generation number per Rx ring, and pass it 204 * back to mac_rx_ring(); They are expected to increment the generation 205 * number whenever the ring's stop routine is invoked. 206 * See comments in mac_rx_ring(); 207 * 208 * R17 Similarly mi_stop is another synchronization point and the driver must 209 * ensure that all upcalls are done and there won't be any future upcall 210 * before returning from mi_stop. 211 * 212 * R18. The driver may assume that all set/modify control operations via 213 * the mi_* entry points are single threaded on a per mac end point. 214 * 215 * Lock and Perimeter hierarchy scenarios 216 * --------------------------------------- 217 * 218 * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] 219 * 220 * ft_lock -> fe_lock [mac_flow_lookup] 221 * 222 * mi_rw_lock -> fe_lock [mac_bcast_send] 223 * 224 * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] 225 * 226 * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] 227 * 228 * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] 229 * 230 * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac 231 * client to driver. In the case of clients that explictly use the mac provided 232 * perimeter mechanism for its serialization, the hierarchy is 233 * Perimeter -> mac layer locks, since the client never holds any locks across 234 * the mac calls. In the case of clients that use its own locks the hierarchy 235 * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly 236 * calls mac_perim_enter/exit in this case. 237 * 238 * Subflow creation rules 239 * --------------------------- 240 * o In case of a user specified cpulist present on underlying link and flows, 241 * the flows cpulist must be a subset of the underlying link. 242 * o In case of a user specified fanout mode present on link and flow, the 243 * subflow fanout count has to be less than or equal to that of the 244 * underlying link. The cpu-bindings for the subflows will be a subset of 245 * the underlying link. 246 * o In case if no cpulist specified on both underlying link and flow, the 247 * underlying link relies on a MAC tunable to provide out of box fanout. 248 * The subflow will have no cpulist (the subflow will be unbound) 249 * o In case if no cpulist is specified on the underlying link, a subflow can 250 * carry either a user-specified cpulist or fanout count. The cpu-bindings 251 * for the subflow will not adhere to restriction that they need to be subset 252 * of the underlying link. 253 * o In case where the underlying link is carrying either a user specified 254 * cpulist or fanout mode and for a unspecified subflow, the subflow will be 255 * created unbound. 256 * o While creating unbound subflows, bandwidth mode changes attempt to 257 * figure a right fanout count. In such cases the fanout count will override 258 * the unbound cpu-binding behavior. 259 * o In addition to this, while cycling between flow and link properties, we 260 * impose a restriction that if a link property has a subflow with 261 * user-specified attributes, we will not allow changing the link property. 262 * The administrator needs to reset all the user specified properties for the 263 * subflows before attempting a link property change. 264 * Some of the above rules can be overridden by specifying additional command 265 * line options while creating or modifying link or subflow properties. 266 */ 267 268 #include <sys/types.h> 269 #include <sys/conf.h> 270 #include <sys/id_space.h> 271 #include <sys/esunddi.h> 272 #include <sys/stat.h> 273 #include <sys/mkdev.h> 274 #include <sys/stream.h> 275 #include <sys/strsun.h> 276 #include <sys/strsubr.h> 277 #include <sys/dlpi.h> 278 #include <sys/list.h> 279 #include <sys/modhash.h> 280 #include <sys/mac_provider.h> 281 #include <sys/mac_client_impl.h> 282 #include <sys/mac_soft_ring.h> 283 #include <sys/mac_stat.h> 284 #include <sys/mac_impl.h> 285 #include <sys/mac.h> 286 #include <sys/dls.h> 287 #include <sys/dld.h> 288 #include <sys/modctl.h> 289 #include <sys/fs/dv_node.h> 290 #include <sys/thread.h> 291 #include <sys/proc.h> 292 #include <sys/callb.h> 293 #include <sys/cpuvar.h> 294 #include <sys/atomic.h> 295 #include <sys/bitmap.h> 296 #include <sys/sdt.h> 297 #include <sys/mac_flow.h> 298 #include <sys/ddi_intr_impl.h> 299 #include <sys/disp.h> 300 #include <sys/sdt.h> 301 #include <sys/vnic.h> 302 #include <sys/vnic_impl.h> 303 #include <sys/vlan.h> 304 #include <inet/ip.h> 305 #include <inet/ip6.h> 306 #include <sys/exacct.h> 307 #include <sys/exacct_impl.h> 308 #include <inet/nd.h> 309 #include <sys/ethernet.h> 310 #include <sys/pool.h> 311 #include <sys/pool_pset.h> 312 #include <sys/cpupart.h> 313 #include <inet/wifi_ioctl.h> 314 #include <net/wpa.h> 315 316 #define IMPL_HASHSZ 67 /* prime */ 317 318 kmem_cache_t *i_mac_impl_cachep; 319 mod_hash_t *i_mac_impl_hash; 320 krwlock_t i_mac_impl_lock; 321 uint_t i_mac_impl_count; 322 static kmem_cache_t *mac_ring_cache; 323 static id_space_t *minor_ids; 324 static uint32_t minor_count; 325 static pool_event_cb_t mac_pool_event_reg; 326 327 /* 328 * Logging stuff. Perhaps mac_logging_interval could be broken into 329 * mac_flow_log_interval and mac_link_log_interval if we want to be 330 * able to schedule them differently. 331 */ 332 uint_t mac_logging_interval; 333 boolean_t mac_flow_log_enable; 334 boolean_t mac_link_log_enable; 335 timeout_id_t mac_logging_timer; 336 337 /* for debugging, see MAC_DBG_PRT() in mac_impl.h */ 338 int mac_dbg = 0; 339 340 #define MACTYPE_KMODDIR "mac" 341 #define MACTYPE_HASHSZ 67 342 static mod_hash_t *i_mactype_hash; 343 /* 344 * i_mactype_lock synchronizes threads that obtain references to mactype_t 345 * structures through i_mactype_getplugin(). 346 */ 347 static kmutex_t i_mactype_lock; 348 349 /* 350 * mac_tx_percpu_cnt 351 * 352 * Number of per cpu locks per mac_client_impl_t. Used by the transmit side 353 * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. 354 * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. 355 * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. 356 */ 357 int mac_tx_percpu_cnt; 358 int mac_tx_percpu_cnt_max = 128; 359 360 /* 361 * Call back functions for the bridge module. These are guaranteed to be valid 362 * when holding a reference on a link or when holding mip->mi_bridge_lock and 363 * mi_bridge_link is non-NULL. 364 */ 365 mac_bridge_tx_t mac_bridge_tx_cb; 366 mac_bridge_rx_t mac_bridge_rx_cb; 367 mac_bridge_ref_t mac_bridge_ref_cb; 368 mac_bridge_ls_t mac_bridge_ls_cb; 369 370 static int i_mac_constructor(void *, void *, int); 371 static void i_mac_destructor(void *, void *); 372 static int i_mac_ring_ctor(void *, void *, int); 373 static void i_mac_ring_dtor(void *, void *); 374 static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); 375 void mac_tx_client_flush(mac_client_impl_t *); 376 void mac_tx_client_block(mac_client_impl_t *); 377 static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); 378 static int mac_start_group_and_rings(mac_group_t *); 379 static void mac_stop_group_and_rings(mac_group_t *); 380 static void mac_pool_event_cb(pool_event_t, int, void *); 381 382 typedef struct netinfo_s { 383 list_node_t ni_link; 384 void *ni_record; 385 int ni_size; 386 int ni_type; 387 } netinfo_t; 388 389 /* 390 * Module initialization functions. 391 */ 392 393 void 394 mac_init(void) 395 { 396 mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : 397 boot_max_ncpus); 398 399 /* Upper bound is mac_tx_percpu_cnt_max */ 400 if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) 401 mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; 402 403 if (mac_tx_percpu_cnt < 1) { 404 /* Someone set max_tx_percpu_cnt_max to 0 or less */ 405 mac_tx_percpu_cnt = 1; 406 } 407 408 ASSERT(mac_tx_percpu_cnt >= 1); 409 mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); 410 /* 411 * Make it of the form 2**N - 1 in the range 412 * [0 .. mac_tx_percpu_cnt_max - 1] 413 */ 414 mac_tx_percpu_cnt--; 415 416 i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", 417 sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, 418 NULL, NULL, NULL, 0); 419 ASSERT(i_mac_impl_cachep != NULL); 420 421 mac_ring_cache = kmem_cache_create("mac_ring_cache", 422 sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, 423 NULL, NULL, 0); 424 ASSERT(mac_ring_cache != NULL); 425 426 i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", 427 IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 428 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 429 rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); 430 431 mac_flow_init(); 432 mac_soft_ring_init(); 433 mac_bcast_init(); 434 mac_client_init(); 435 436 i_mac_impl_count = 0; 437 438 i_mactype_hash = mod_hash_create_extended("mactype_hash", 439 MACTYPE_HASHSZ, 440 mod_hash_null_keydtor, mod_hash_null_valdtor, 441 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 442 443 /* 444 * Allocate an id space to manage minor numbers. The range of the 445 * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1. This 446 * leaves half of the 32-bit minors available for driver private use. 447 */ 448 minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, 449 MAC_PRIVATE_MINOR-1); 450 ASSERT(minor_ids != NULL); 451 minor_count = 0; 452 453 /* Let's default to 20 seconds */ 454 mac_logging_interval = 20; 455 mac_flow_log_enable = B_FALSE; 456 mac_link_log_enable = B_FALSE; 457 mac_logging_timer = 0; 458 459 /* Register to be notified of noteworthy pools events */ 460 mac_pool_event_reg.pec_func = mac_pool_event_cb; 461 mac_pool_event_reg.pec_arg = NULL; 462 pool_event_cb_register(&mac_pool_event_reg); 463 } 464 465 int 466 mac_fini(void) 467 { 468 469 if (i_mac_impl_count > 0 || minor_count > 0) 470 return (EBUSY); 471 472 pool_event_cb_unregister(&mac_pool_event_reg); 473 474 id_space_destroy(minor_ids); 475 mac_flow_fini(); 476 477 mod_hash_destroy_hash(i_mac_impl_hash); 478 rw_destroy(&i_mac_impl_lock); 479 480 mac_client_fini(); 481 kmem_cache_destroy(mac_ring_cache); 482 483 mod_hash_destroy_hash(i_mactype_hash); 484 mac_soft_ring_finish(); 485 486 487 return (0); 488 } 489 490 /* 491 * Initialize a GLDv3 driver's device ops. A driver that manages its own ops 492 * (e.g. softmac) may pass in a NULL ops argument. 493 */ 494 void 495 mac_init_ops(struct dev_ops *ops, const char *name) 496 { 497 major_t major = ddi_name_to_major((char *)name); 498 499 /* 500 * By returning on error below, we are not letting the driver continue 501 * in an undefined context. The mac_register() function will faill if 502 * DN_GLDV3_DRIVER isn't set. 503 */ 504 if (major == DDI_MAJOR_T_NONE) 505 return; 506 LOCK_DEV_OPS(&devnamesp[major].dn_lock); 507 devnamesp[major].dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER); 508 UNLOCK_DEV_OPS(&devnamesp[major].dn_lock); 509 if (ops != NULL) 510 dld_init_ops(ops, name); 511 } 512 513 void 514 mac_fini_ops(struct dev_ops *ops) 515 { 516 dld_fini_ops(ops); 517 } 518 519 /*ARGSUSED*/ 520 static int 521 i_mac_constructor(void *buf, void *arg, int kmflag) 522 { 523 mac_impl_t *mip = buf; 524 525 bzero(buf, sizeof (mac_impl_t)); 526 527 mip->mi_linkstate = LINK_STATE_UNKNOWN; 528 529 rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); 530 mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); 531 mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); 532 mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); 533 534 mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; 535 cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 536 mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; 537 cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 538 539 mutex_init(&mip->mi_bridge_lock, NULL, MUTEX_DEFAULT, NULL); 540 541 return (0); 542 } 543 544 /*ARGSUSED*/ 545 static void 546 i_mac_destructor(void *buf, void *arg) 547 { 548 mac_impl_t *mip = buf; 549 mac_cb_info_t *mcbi; 550 551 ASSERT(mip->mi_ref == 0); 552 ASSERT(mip->mi_active == 0); 553 ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); 554 ASSERT(mip->mi_devpromisc == 0); 555 ASSERT(mip->mi_ksp == NULL); 556 ASSERT(mip->mi_kstat_count == 0); 557 ASSERT(mip->mi_nclients == 0); 558 ASSERT(mip->mi_nactiveclients == 0); 559 ASSERT(mip->mi_single_active_client == NULL); 560 ASSERT(mip->mi_state_flags == 0); 561 ASSERT(mip->mi_factory_addr == NULL); 562 ASSERT(mip->mi_factory_addr_num == 0); 563 ASSERT(mip->mi_default_tx_ring == NULL); 564 565 mcbi = &mip->mi_notify_cb_info; 566 ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); 567 ASSERT(mip->mi_notify_bits == 0); 568 ASSERT(mip->mi_notify_thread == NULL); 569 ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); 570 mcbi->mcbi_lockp = NULL; 571 572 mcbi = &mip->mi_promisc_cb_info; 573 ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); 574 ASSERT(mip->mi_promisc_list == NULL); 575 ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); 576 mcbi->mcbi_lockp = NULL; 577 578 ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); 579 ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); 580 581 rw_destroy(&mip->mi_rw_lock); 582 583 mutex_destroy(&mip->mi_promisc_lock); 584 cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); 585 mutex_destroy(&mip->mi_notify_lock); 586 cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); 587 mutex_destroy(&mip->mi_ring_lock); 588 589 ASSERT(mip->mi_bridge_link == NULL); 590 } 591 592 /* ARGSUSED */ 593 static int 594 i_mac_ring_ctor(void *buf, void *arg, int kmflag) 595 { 596 mac_ring_t *ring = (mac_ring_t *)buf; 597 598 bzero(ring, sizeof (mac_ring_t)); 599 cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); 600 mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); 601 ring->mr_state = MR_FREE; 602 return (0); 603 } 604 605 /* ARGSUSED */ 606 static void 607 i_mac_ring_dtor(void *buf, void *arg) 608 { 609 mac_ring_t *ring = (mac_ring_t *)buf; 610 611 cv_destroy(&ring->mr_cv); 612 mutex_destroy(&ring->mr_lock); 613 } 614 615 /* 616 * Common functions to do mac callback addition and deletion. Currently this is 617 * used by promisc callbacks and notify callbacks. List addition and deletion 618 * need to take care of list walkers. List walkers in general, can't hold list 619 * locks and make upcall callbacks due to potential lock order and recursive 620 * reentry issues. Instead list walkers increment the list walker count to mark 621 * the presence of a walker thread. Addition can be carefully done to ensure 622 * that the list walker always sees either the old list or the new list. 623 * However the deletion can't be done while the walker is active, instead the 624 * deleting thread simply marks the entry as logically deleted. The last walker 625 * physically deletes and frees up the logically deleted entries when the walk 626 * is complete. 627 */ 628 void 629 mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 630 mac_cb_t *mcb_elem) 631 { 632 mac_cb_t *p; 633 mac_cb_t **pp; 634 635 /* Verify it is not already in the list */ 636 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 637 if (p == mcb_elem) 638 break; 639 } 640 VERIFY(p == NULL); 641 642 /* 643 * Add it to the head of the callback list. The membar ensures that 644 * the following list pointer manipulations reach global visibility 645 * in exactly the program order below. 646 */ 647 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 648 649 mcb_elem->mcb_nextp = *mcb_head; 650 membar_producer(); 651 *mcb_head = mcb_elem; 652 } 653 654 /* 655 * Mark the entry as logically deleted. If there aren't any walkers unlink 656 * from the list. In either case return the corresponding status. 657 */ 658 boolean_t 659 mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 660 mac_cb_t *mcb_elem) 661 { 662 mac_cb_t *p; 663 mac_cb_t **pp; 664 665 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 666 /* 667 * Search the callback list for the entry to be removed 668 */ 669 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 670 if (p == mcb_elem) 671 break; 672 } 673 VERIFY(p != NULL); 674 675 /* 676 * If there are walkers just mark it as deleted and the last walker 677 * will remove from the list and free it. 678 */ 679 if (mcbi->mcbi_walker_cnt != 0) { 680 p->mcb_flags |= MCB_CONDEMNED; 681 mcbi->mcbi_del_cnt++; 682 return (B_FALSE); 683 } 684 685 ASSERT(mcbi->mcbi_del_cnt == 0); 686 *pp = p->mcb_nextp; 687 p->mcb_nextp = NULL; 688 return (B_TRUE); 689 } 690 691 /* 692 * Wait for all pending callback removals to be completed 693 */ 694 void 695 mac_callback_remove_wait(mac_cb_info_t *mcbi) 696 { 697 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 698 while (mcbi->mcbi_del_cnt != 0) { 699 DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); 700 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 701 } 702 } 703 704 /* 705 * The last mac callback walker does the cleanup. Walk the list and unlik 706 * all the logically deleted entries and construct a temporary list of 707 * removed entries. Return the list of removed entries to the caller. 708 */ 709 mac_cb_t * 710 mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) 711 { 712 mac_cb_t *p; 713 mac_cb_t **pp; 714 mac_cb_t *rmlist = NULL; /* List of removed elements */ 715 int cnt = 0; 716 717 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 718 ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); 719 720 pp = mcb_head; 721 while (*pp != NULL) { 722 if ((*pp)->mcb_flags & MCB_CONDEMNED) { 723 p = *pp; 724 *pp = p->mcb_nextp; 725 p->mcb_nextp = rmlist; 726 rmlist = p; 727 cnt++; 728 continue; 729 } 730 pp = &(*pp)->mcb_nextp; 731 } 732 733 ASSERT(mcbi->mcbi_del_cnt == cnt); 734 mcbi->mcbi_del_cnt = 0; 735 return (rmlist); 736 } 737 738 boolean_t 739 mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 740 { 741 mac_cb_t *mcb; 742 743 /* Verify it is not already in the list */ 744 for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { 745 if (mcb == mcb_elem) 746 return (B_TRUE); 747 } 748 749 return (B_FALSE); 750 } 751 752 boolean_t 753 mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 754 { 755 boolean_t found; 756 757 mutex_enter(mcbi->mcbi_lockp); 758 found = mac_callback_lookup(mcb_headp, mcb_elem); 759 mutex_exit(mcbi->mcbi_lockp); 760 761 return (found); 762 } 763 764 /* Free the list of removed callbacks */ 765 void 766 mac_callback_free(mac_cb_t *rmlist) 767 { 768 mac_cb_t *mcb; 769 mac_cb_t *mcb_next; 770 771 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 772 mcb_next = mcb->mcb_nextp; 773 kmem_free(mcb->mcb_objp, mcb->mcb_objsize); 774 } 775 } 776 777 /* 778 * The promisc callbacks are in 2 lists, one off the 'mip' and another off the 779 * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there 780 * is only a single shared total walker count, and an entry can't be physically 781 * unlinked if a walker is active on either list. The last walker does this 782 * cleanup of logically deleted entries. 783 */ 784 void 785 i_mac_promisc_walker_cleanup(mac_impl_t *mip) 786 { 787 mac_cb_t *rmlist; 788 mac_cb_t *mcb; 789 mac_cb_t *mcb_next; 790 mac_promisc_impl_t *mpip; 791 792 /* 793 * Construct a temporary list of deleted callbacks by walking the 794 * the mi_promisc_list. Then for each entry in the temporary list, 795 * remove it from the mci_promisc_list and free the entry. 796 */ 797 rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info, 798 &mip->mi_promisc_list); 799 800 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 801 mcb_next = mcb->mcb_nextp; 802 mpip = (mac_promisc_impl_t *)mcb->mcb_objp; 803 VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info, 804 &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link)); 805 mcb->mcb_flags = 0; 806 mcb->mcb_nextp = NULL; 807 kmem_cache_free(mac_promisc_impl_cache, mpip); 808 } 809 } 810 811 void 812 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) 813 { 814 mac_cb_info_t *mcbi; 815 816 /* 817 * Signal the notify thread even after mi_ref has become zero and 818 * mi_disabled is set. The synchronization with the notify thread 819 * happens in mac_unregister and that implies the driver must make 820 * sure it is single-threaded (with respect to mac calls) and that 821 * all pending mac calls have returned before it calls mac_unregister 822 */ 823 rw_enter(&i_mac_impl_lock, RW_READER); 824 if (mip->mi_state_flags & MIS_DISABLED) 825 goto exit; 826 827 /* 828 * Guard against incorrect notifications. (Running a newer 829 * mac client against an older implementation?) 830 */ 831 if (type >= MAC_NNOTE) 832 goto exit; 833 834 mcbi = &mip->mi_notify_cb_info; 835 mutex_enter(mcbi->mcbi_lockp); 836 mip->mi_notify_bits |= (1 << type); 837 cv_broadcast(&mcbi->mcbi_cv); 838 mutex_exit(mcbi->mcbi_lockp); 839 840 exit: 841 rw_exit(&i_mac_impl_lock); 842 } 843 844 /* 845 * Mac serialization primitives. Please see the block comment at the 846 * top of the file. 847 */ 848 void 849 i_mac_perim_enter(mac_impl_t *mip) 850 { 851 mac_client_impl_t *mcip; 852 853 if (mip->mi_state_flags & MIS_IS_VNIC) { 854 /* 855 * This is a VNIC. Return the lower mac since that is what 856 * we want to serialize on. 857 */ 858 mcip = mac_vnic_lower(mip); 859 mip = mcip->mci_mip; 860 } 861 862 mutex_enter(&mip->mi_perim_lock); 863 if (mip->mi_perim_owner == curthread) { 864 mip->mi_perim_ocnt++; 865 mutex_exit(&mip->mi_perim_lock); 866 return; 867 } 868 869 while (mip->mi_perim_owner != NULL) 870 cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); 871 872 mip->mi_perim_owner = curthread; 873 ASSERT(mip->mi_perim_ocnt == 0); 874 mip->mi_perim_ocnt++; 875 #ifdef DEBUG 876 mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, 877 MAC_PERIM_STACK_DEPTH); 878 #endif 879 mutex_exit(&mip->mi_perim_lock); 880 } 881 882 int 883 i_mac_perim_enter_nowait(mac_impl_t *mip) 884 { 885 /* 886 * The vnic is a special case, since the serialization is done based 887 * on the lower mac. If the lower mac is busy, it does not imply the 888 * vnic can't be unregistered. But in the case of other drivers, 889 * a busy perimeter or open mac handles implies that the mac is busy 890 * and can't be unregistered. 891 */ 892 if (mip->mi_state_flags & MIS_IS_VNIC) { 893 i_mac_perim_enter(mip); 894 return (0); 895 } 896 897 mutex_enter(&mip->mi_perim_lock); 898 if (mip->mi_perim_owner != NULL) { 899 mutex_exit(&mip->mi_perim_lock); 900 return (EBUSY); 901 } 902 ASSERT(mip->mi_perim_ocnt == 0); 903 mip->mi_perim_owner = curthread; 904 mip->mi_perim_ocnt++; 905 mutex_exit(&mip->mi_perim_lock); 906 907 return (0); 908 } 909 910 void 911 i_mac_perim_exit(mac_impl_t *mip) 912 { 913 mac_client_impl_t *mcip; 914 915 if (mip->mi_state_flags & MIS_IS_VNIC) { 916 /* 917 * This is a VNIC. Return the lower mac since that is what 918 * we want to serialize on. 919 */ 920 mcip = mac_vnic_lower(mip); 921 mip = mcip->mci_mip; 922 } 923 924 ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); 925 926 mutex_enter(&mip->mi_perim_lock); 927 if (--mip->mi_perim_ocnt == 0) { 928 mip->mi_perim_owner = NULL; 929 cv_signal(&mip->mi_perim_cv); 930 } 931 mutex_exit(&mip->mi_perim_lock); 932 } 933 934 /* 935 * Returns whether the current thread holds the mac perimeter. Used in making 936 * assertions. 937 */ 938 boolean_t 939 mac_perim_held(mac_handle_t mh) 940 { 941 mac_impl_t *mip = (mac_impl_t *)mh; 942 mac_client_impl_t *mcip; 943 944 if (mip->mi_state_flags & MIS_IS_VNIC) { 945 /* 946 * This is a VNIC. Return the lower mac since that is what 947 * we want to serialize on. 948 */ 949 mcip = mac_vnic_lower(mip); 950 mip = mcip->mci_mip; 951 } 952 return (mip->mi_perim_owner == curthread); 953 } 954 955 /* 956 * mac client interfaces to enter the mac perimeter of a mac end point, given 957 * its mac handle, or macname or linkid. 958 */ 959 void 960 mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) 961 { 962 mac_impl_t *mip = (mac_impl_t *)mh; 963 964 i_mac_perim_enter(mip); 965 /* 966 * The mac_perim_handle_t returned encodes the 'mip' and whether a 967 * mac_open has been done internally while entering the perimeter. 968 * This information is used in mac_perim_exit 969 */ 970 MAC_ENCODE_MPH(*mphp, mip, 0); 971 } 972 973 int 974 mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) 975 { 976 int err; 977 mac_handle_t mh; 978 979 if ((err = mac_open(name, &mh)) != 0) 980 return (err); 981 982 mac_perim_enter_by_mh(mh, mphp); 983 MAC_ENCODE_MPH(*mphp, mh, 1); 984 return (0); 985 } 986 987 int 988 mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) 989 { 990 int err; 991 mac_handle_t mh; 992 993 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 994 return (err); 995 996 mac_perim_enter_by_mh(mh, mphp); 997 MAC_ENCODE_MPH(*mphp, mh, 1); 998 return (0); 999 } 1000 1001 void 1002 mac_perim_exit(mac_perim_handle_t mph) 1003 { 1004 mac_impl_t *mip; 1005 boolean_t need_close; 1006 1007 MAC_DECODE_MPH(mph, mip, need_close); 1008 i_mac_perim_exit(mip); 1009 if (need_close) 1010 mac_close((mac_handle_t)mip); 1011 } 1012 1013 int 1014 mac_hold(const char *macname, mac_impl_t **pmip) 1015 { 1016 mac_impl_t *mip; 1017 int err; 1018 1019 /* 1020 * Check the device name length to make sure it won't overflow our 1021 * buffer. 1022 */ 1023 if (strlen(macname) >= MAXNAMELEN) 1024 return (EINVAL); 1025 1026 /* 1027 * Look up its entry in the global hash table. 1028 */ 1029 rw_enter(&i_mac_impl_lock, RW_WRITER); 1030 err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, 1031 (mod_hash_val_t *)&mip); 1032 1033 if (err != 0) { 1034 rw_exit(&i_mac_impl_lock); 1035 return (ENOENT); 1036 } 1037 1038 if (mip->mi_state_flags & MIS_DISABLED) { 1039 rw_exit(&i_mac_impl_lock); 1040 return (ENOENT); 1041 } 1042 1043 if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { 1044 rw_exit(&i_mac_impl_lock); 1045 return (EBUSY); 1046 } 1047 1048 mip->mi_ref++; 1049 rw_exit(&i_mac_impl_lock); 1050 1051 *pmip = mip; 1052 return (0); 1053 } 1054 1055 void 1056 mac_rele(mac_impl_t *mip) 1057 { 1058 rw_enter(&i_mac_impl_lock, RW_WRITER); 1059 ASSERT(mip->mi_ref != 0); 1060 if (--mip->mi_ref == 0) { 1061 ASSERT(mip->mi_nactiveclients == 0 && 1062 !(mip->mi_state_flags & MIS_EXCLUSIVE)); 1063 } 1064 rw_exit(&i_mac_impl_lock); 1065 } 1066 1067 /* 1068 * Private GLDv3 function to start a MAC instance. 1069 */ 1070 int 1071 mac_start(mac_handle_t mh) 1072 { 1073 mac_impl_t *mip = (mac_impl_t *)mh; 1074 int err = 0; 1075 mac_group_t *defgrp; 1076 1077 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1078 ASSERT(mip->mi_start != NULL); 1079 1080 /* 1081 * Check whether the device is already started. 1082 */ 1083 if (mip->mi_active++ == 0) { 1084 mac_ring_t *ring = NULL; 1085 1086 /* 1087 * Start the device. 1088 */ 1089 err = mip->mi_start(mip->mi_driver); 1090 if (err != 0) { 1091 mip->mi_active--; 1092 return (err); 1093 } 1094 1095 /* 1096 * Start the default tx ring. 1097 */ 1098 if (mip->mi_default_tx_ring != NULL) { 1099 1100 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1101 if (ring->mr_state != MR_INUSE) { 1102 err = mac_start_ring(ring); 1103 if (err != 0) { 1104 mip->mi_active--; 1105 return (err); 1106 } 1107 } 1108 } 1109 1110 if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1111 /* 1112 * Start the default ring, since it will be needed 1113 * to receive broadcast and multicast traffic for 1114 * both primary and non-primary MAC clients. 1115 */ 1116 ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); 1117 err = mac_start_group_and_rings(defgrp); 1118 if (err != 0) { 1119 mip->mi_active--; 1120 if ((ring != NULL) && 1121 (ring->mr_state == MR_INUSE)) 1122 mac_stop_ring(ring); 1123 return (err); 1124 } 1125 mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); 1126 } 1127 } 1128 1129 return (err); 1130 } 1131 1132 /* 1133 * Private GLDv3 function to stop a MAC instance. 1134 */ 1135 void 1136 mac_stop(mac_handle_t mh) 1137 { 1138 mac_impl_t *mip = (mac_impl_t *)mh; 1139 mac_group_t *grp; 1140 1141 ASSERT(mip->mi_stop != NULL); 1142 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1143 1144 /* 1145 * Check whether the device is still needed. 1146 */ 1147 ASSERT(mip->mi_active != 0); 1148 if (--mip->mi_active == 0) { 1149 if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1150 /* 1151 * There should be no more active clients since the 1152 * MAC is being stopped. Stop the default RX group 1153 * and transition it back to registered state. 1154 * 1155 * When clients are torn down, the groups 1156 * are release via mac_release_rx_group which 1157 * knows the the default group is always in 1158 * started mode since broadcast uses it. So 1159 * we can assert that their are no clients 1160 * (since mac_bcast_add doesn't register itself 1161 * as a client) and group is in SHARED state. 1162 */ 1163 ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); 1164 ASSERT(MAC_GROUP_NO_CLIENT(grp) && 1165 mip->mi_nactiveclients == 0); 1166 mac_stop_group_and_rings(grp); 1167 mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); 1168 } 1169 1170 if (mip->mi_default_tx_ring != NULL) { 1171 mac_ring_t *ring; 1172 1173 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1174 if (ring->mr_state == MR_INUSE) { 1175 mac_stop_ring(ring); 1176 ring->mr_flag = 0; 1177 } 1178 } 1179 1180 /* 1181 * Stop the device. 1182 */ 1183 mip->mi_stop(mip->mi_driver); 1184 } 1185 } 1186 1187 int 1188 i_mac_promisc_set(mac_impl_t *mip, boolean_t on) 1189 { 1190 int err = 0; 1191 1192 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1193 ASSERT(mip->mi_setpromisc != NULL); 1194 1195 if (on) { 1196 /* 1197 * Enable promiscuous mode on the device if not yet enabled. 1198 */ 1199 if (mip->mi_devpromisc++ == 0) { 1200 err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); 1201 if (err != 0) { 1202 mip->mi_devpromisc--; 1203 return (err); 1204 } 1205 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1206 } 1207 } else { 1208 if (mip->mi_devpromisc == 0) 1209 return (EPROTO); 1210 1211 /* 1212 * Disable promiscuous mode on the device if this is the last 1213 * enabling. 1214 */ 1215 if (--mip->mi_devpromisc == 0) { 1216 err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); 1217 if (err != 0) { 1218 mip->mi_devpromisc++; 1219 return (err); 1220 } 1221 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1222 } 1223 } 1224 1225 return (0); 1226 } 1227 1228 /* 1229 * The promiscuity state can change any time. If the caller needs to take 1230 * actions that are atomic with the promiscuity state, then the caller needs 1231 * to bracket the entire sequence with mac_perim_enter/exit 1232 */ 1233 boolean_t 1234 mac_promisc_get(mac_handle_t mh) 1235 { 1236 mac_impl_t *mip = (mac_impl_t *)mh; 1237 1238 /* 1239 * Return the current promiscuity. 1240 */ 1241 return (mip->mi_devpromisc != 0); 1242 } 1243 1244 /* 1245 * Invoked at MAC instance attach time to initialize the list 1246 * of factory MAC addresses supported by a MAC instance. This function 1247 * builds a local cache in the mac_impl_t for the MAC addresses 1248 * supported by the underlying hardware. The MAC clients themselves 1249 * use the mac_addr_factory*() functions to query and reserve 1250 * factory MAC addresses. 1251 */ 1252 void 1253 mac_addr_factory_init(mac_impl_t *mip) 1254 { 1255 mac_capab_multifactaddr_t capab; 1256 uint8_t *addr; 1257 int i; 1258 1259 /* 1260 * First round to see how many factory MAC addresses are available. 1261 */ 1262 bzero(&capab, sizeof (capab)); 1263 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, 1264 &capab) || (capab.mcm_naddr == 0)) { 1265 /* 1266 * The MAC instance doesn't support multiple factory 1267 * MAC addresses, we're done here. 1268 */ 1269 return; 1270 } 1271 1272 /* 1273 * Allocate the space and get all the factory addresses. 1274 */ 1275 addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); 1276 capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); 1277 1278 mip->mi_factory_addr_num = capab.mcm_naddr; 1279 mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * 1280 sizeof (mac_factory_addr_t), KM_SLEEP); 1281 1282 for (i = 0; i < capab.mcm_naddr; i++) { 1283 bcopy(addr + i * MAXMACADDRLEN, 1284 mip->mi_factory_addr[i].mfa_addr, 1285 mip->mi_type->mt_addr_length); 1286 mip->mi_factory_addr[i].mfa_in_use = B_FALSE; 1287 } 1288 1289 kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); 1290 } 1291 1292 void 1293 mac_addr_factory_fini(mac_impl_t *mip) 1294 { 1295 if (mip->mi_factory_addr == NULL) { 1296 ASSERT(mip->mi_factory_addr_num == 0); 1297 return; 1298 } 1299 1300 kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * 1301 sizeof (mac_factory_addr_t)); 1302 1303 mip->mi_factory_addr = NULL; 1304 mip->mi_factory_addr_num = 0; 1305 } 1306 1307 /* 1308 * Reserve a factory MAC address. If *slot is set to -1, the function 1309 * attempts to reserve any of the available factory MAC addresses and 1310 * returns the reserved slot id. If no slots are available, the function 1311 * returns ENOSPC. If *slot is not set to -1, the function reserves 1312 * the specified slot if it is available, or returns EBUSY is the slot 1313 * is already used. Returns ENOTSUP if the underlying MAC does not 1314 * support multiple factory addresses. If the slot number is not -1 but 1315 * is invalid, returns EINVAL. 1316 */ 1317 int 1318 mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) 1319 { 1320 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1321 mac_impl_t *mip = mcip->mci_mip; 1322 int i, ret = 0; 1323 1324 i_mac_perim_enter(mip); 1325 /* 1326 * Protect against concurrent readers that may need a self-consistent 1327 * view of the factory addresses 1328 */ 1329 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1330 1331 if (mip->mi_factory_addr_num == 0) { 1332 ret = ENOTSUP; 1333 goto bail; 1334 } 1335 1336 if (*slot != -1) { 1337 /* check the specified slot */ 1338 if (*slot < 1 || *slot > mip->mi_factory_addr_num) { 1339 ret = EINVAL; 1340 goto bail; 1341 } 1342 if (mip->mi_factory_addr[*slot-1].mfa_in_use) { 1343 ret = EBUSY; 1344 goto bail; 1345 } 1346 } else { 1347 /* pick the next available slot */ 1348 for (i = 0; i < mip->mi_factory_addr_num; i++) { 1349 if (!mip->mi_factory_addr[i].mfa_in_use) 1350 break; 1351 } 1352 1353 if (i == mip->mi_factory_addr_num) { 1354 ret = ENOSPC; 1355 goto bail; 1356 } 1357 *slot = i+1; 1358 } 1359 1360 mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; 1361 mip->mi_factory_addr[*slot-1].mfa_client = mcip; 1362 1363 bail: 1364 rw_exit(&mip->mi_rw_lock); 1365 i_mac_perim_exit(mip); 1366 return (ret); 1367 } 1368 1369 /* 1370 * Release the specified factory MAC address slot. 1371 */ 1372 void 1373 mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) 1374 { 1375 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1376 mac_impl_t *mip = mcip->mci_mip; 1377 1378 i_mac_perim_enter(mip); 1379 /* 1380 * Protect against concurrent readers that may need a self-consistent 1381 * view of the factory addresses 1382 */ 1383 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1384 1385 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1386 ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); 1387 1388 mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; 1389 1390 rw_exit(&mip->mi_rw_lock); 1391 i_mac_perim_exit(mip); 1392 } 1393 1394 /* 1395 * Stores in mac_addr the value of the specified MAC address. Returns 1396 * 0 on success, or EINVAL if the slot number is not valid for the MAC. 1397 * The caller must provide a string of at least MAXNAMELEN bytes. 1398 */ 1399 void 1400 mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, 1401 uint_t *addr_len, char *client_name, boolean_t *in_use_arg) 1402 { 1403 mac_impl_t *mip = (mac_impl_t *)mh; 1404 boolean_t in_use; 1405 1406 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1407 1408 /* 1409 * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter 1410 * and mi_rw_lock 1411 */ 1412 rw_enter(&mip->mi_rw_lock, RW_READER); 1413 bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); 1414 *addr_len = mip->mi_type->mt_addr_length; 1415 in_use = mip->mi_factory_addr[slot-1].mfa_in_use; 1416 if (in_use && client_name != NULL) { 1417 bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, 1418 client_name, MAXNAMELEN); 1419 } 1420 if (in_use_arg != NULL) 1421 *in_use_arg = in_use; 1422 rw_exit(&mip->mi_rw_lock); 1423 } 1424 1425 /* 1426 * Returns the number of factory MAC addresses (in addition to the 1427 * primary MAC address), 0 if the underlying MAC doesn't support 1428 * that feature. 1429 */ 1430 uint_t 1431 mac_addr_factory_num(mac_handle_t mh) 1432 { 1433 mac_impl_t *mip = (mac_impl_t *)mh; 1434 1435 return (mip->mi_factory_addr_num); 1436 } 1437 1438 1439 void 1440 mac_rx_group_unmark(mac_group_t *grp, uint_t flag) 1441 { 1442 mac_ring_t *ring; 1443 1444 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) 1445 ring->mr_flag &= ~flag; 1446 } 1447 1448 /* 1449 * The following mac_hwrings_xxx() functions are private mac client functions 1450 * used by the aggr driver to access and control the underlying HW Rx group 1451 * and rings. In this case, the aggr driver has exclusive control of the 1452 * underlying HW Rx group/rings, it calls the following functions to 1453 * start/stop the HW Rx rings, disable/enable polling, add/remove mac' 1454 * addresses, or set up the Rx callback. 1455 */ 1456 /* ARGSUSED */ 1457 static void 1458 mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, 1459 mblk_t *mp_chain, boolean_t loopback) 1460 { 1461 mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; 1462 mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; 1463 mac_direct_rx_t proc; 1464 void *arg1; 1465 mac_resource_handle_t arg2; 1466 1467 proc = srs_rx->sr_func; 1468 arg1 = srs_rx->sr_arg1; 1469 arg2 = mac_srs->srs_mrh; 1470 1471 proc(arg1, arg2, mp_chain, NULL); 1472 } 1473 1474 /* 1475 * This function is called to get the list of HW rings that are reserved by 1476 * an exclusive mac client. 1477 * 1478 * Return value: the number of HW rings. 1479 */ 1480 int 1481 mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, 1482 mac_ring_handle_t *hwrh, mac_ring_type_t rtype) 1483 { 1484 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1485 flow_entry_t *flent = mcip->mci_flent; 1486 mac_group_t *grp; 1487 mac_ring_t *ring; 1488 int cnt = 0; 1489 1490 if (rtype == MAC_RING_TYPE_RX) { 1491 grp = flent->fe_rx_ring_group; 1492 } else if (rtype == MAC_RING_TYPE_TX) { 1493 grp = flent->fe_tx_ring_group; 1494 } else { 1495 ASSERT(B_FALSE); 1496 return (-1); 1497 } 1498 /* 1499 * The mac client did not reserve any RX group, return directly. 1500 * This is probably because the underlying MAC does not support 1501 * any groups. 1502 */ 1503 if (hwgh != NULL) 1504 *hwgh = NULL; 1505 if (grp == NULL) 1506 return (0); 1507 /* 1508 * This group must be reserved by this mac client. 1509 */ 1510 ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && 1511 (mcip == MAC_GROUP_ONLY_CLIENT(grp))); 1512 1513 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { 1514 ASSERT(cnt < MAX_RINGS_PER_GROUP); 1515 hwrh[cnt] = (mac_ring_handle_t)ring; 1516 } 1517 if (hwgh != NULL) 1518 *hwgh = (mac_group_handle_t)grp; 1519 1520 return (cnt); 1521 } 1522 1523 /* 1524 * This function is called to get info about Tx/Rx rings. 1525 * 1526 * Return value: returns uint_t which will have various bits set 1527 * that indicates different properties of the ring. 1528 */ 1529 uint_t 1530 mac_hwring_getinfo(mac_ring_handle_t rh) 1531 { 1532 mac_ring_t *ring = (mac_ring_t *)rh; 1533 mac_ring_info_t *info = &ring->mr_info; 1534 1535 return (info->mri_flags); 1536 } 1537 1538 /* 1539 * Export ddi interrupt handles from the HW ring to the pseudo ring and 1540 * setup the RX callback of the mac client which exclusively controls 1541 * HW ring. 1542 */ 1543 void 1544 mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, 1545 mac_ring_handle_t pseudo_rh) 1546 { 1547 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1548 mac_ring_t *pseudo_ring; 1549 mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 1550 1551 if (pseudo_rh != NULL) { 1552 pseudo_ring = (mac_ring_t *)pseudo_rh; 1553 /* Export the ddi handles to pseudo ring */ 1554 pseudo_ring->mr_info.mri_intr.mi_ddi_handle = 1555 hw_ring->mr_info.mri_intr.mi_ddi_handle; 1556 pseudo_ring->mr_info.mri_intr.mi_ddi_shared = 1557 hw_ring->mr_info.mri_intr.mi_ddi_shared; 1558 /* 1559 * Save a pointer to pseudo ring in the hw ring. If 1560 * interrupt handle changes, the hw ring will be 1561 * notified of the change (see mac_ring_intr_set()) 1562 * and the appropriate change has to be made to 1563 * the pseudo ring that has exported the ddi handle. 1564 */ 1565 hw_ring->mr_prh = pseudo_rh; 1566 } 1567 1568 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1569 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1570 mac_srs->srs_mrh = prh; 1571 mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; 1572 } 1573 } 1574 1575 void 1576 mac_hwring_teardown(mac_ring_handle_t hwrh) 1577 { 1578 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1579 mac_soft_ring_set_t *mac_srs; 1580 1581 if (hw_ring == NULL) 1582 return; 1583 hw_ring->mr_prh = NULL; 1584 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1585 mac_srs = hw_ring->mr_srs; 1586 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1587 mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; 1588 mac_srs->srs_mrh = NULL; 1589 } 1590 } 1591 1592 int 1593 mac_hwring_disable_intr(mac_ring_handle_t rh) 1594 { 1595 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1596 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1597 1598 return (intr->mi_disable(intr->mi_handle)); 1599 } 1600 1601 int 1602 mac_hwring_enable_intr(mac_ring_handle_t rh) 1603 { 1604 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1605 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1606 1607 return (intr->mi_enable(intr->mi_handle)); 1608 } 1609 1610 int 1611 mac_hwring_start(mac_ring_handle_t rh) 1612 { 1613 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1614 1615 MAC_RING_UNMARK(rr_ring, MR_QUIESCE); 1616 return (0); 1617 } 1618 1619 void 1620 mac_hwring_stop(mac_ring_handle_t rh) 1621 { 1622 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1623 1624 mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); 1625 } 1626 1627 mblk_t * 1628 mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) 1629 { 1630 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1631 mac_ring_info_t *info = &rr_ring->mr_info; 1632 1633 return (info->mri_poll(info->mri_driver, bytes_to_pickup)); 1634 } 1635 1636 /* 1637 * Send packets through a selected tx ring. 1638 */ 1639 mblk_t * 1640 mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) 1641 { 1642 mac_ring_t *ring = (mac_ring_t *)rh; 1643 mac_ring_info_t *info = &ring->mr_info; 1644 1645 ASSERT(ring->mr_type == MAC_RING_TYPE_TX && 1646 ring->mr_state >= MR_INUSE); 1647 return (info->mri_tx(info->mri_driver, mp)); 1648 } 1649 1650 /* 1651 * Query stats for a particular rx/tx ring 1652 */ 1653 int 1654 mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) 1655 { 1656 mac_ring_t *ring = (mac_ring_t *)rh; 1657 mac_ring_info_t *info = &ring->mr_info; 1658 1659 return (info->mri_stat(info->mri_driver, stat, val)); 1660 } 1661 1662 /* 1663 * Private function that is only used by aggr to send packets through 1664 * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports 1665 * that does not expose Tx rings, aggr_ring_tx() entry point needs 1666 * access to mac_impl_t to send packets through m_tx() entry point. 1667 * It accomplishes this by calling mac_hwring_send_priv() function. 1668 */ 1669 mblk_t * 1670 mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) 1671 { 1672 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1673 mac_impl_t *mip = mcip->mci_mip; 1674 1675 MAC_TX(mip, rh, mp, mcip); 1676 return (mp); 1677 } 1678 1679 int 1680 mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) 1681 { 1682 mac_group_t *group = (mac_group_t *)gh; 1683 1684 return (mac_group_addmac(group, addr)); 1685 } 1686 1687 int 1688 mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) 1689 { 1690 mac_group_t *group = (mac_group_t *)gh; 1691 1692 return (mac_group_remmac(group, addr)); 1693 } 1694 1695 /* 1696 * Set the RX group to be shared/reserved. Note that the group must be 1697 * started/stopped outside of this function. 1698 */ 1699 void 1700 mac_set_group_state(mac_group_t *grp, mac_group_state_t state) 1701 { 1702 /* 1703 * If there is no change in the group state, just return. 1704 */ 1705 if (grp->mrg_state == state) 1706 return; 1707 1708 switch (state) { 1709 case MAC_GROUP_STATE_RESERVED: 1710 /* 1711 * Successfully reserved the group. 1712 * 1713 * Given that there is an exclusive client controlling this 1714 * group, we enable the group level polling when available, 1715 * so that SRSs get to turn on/off individual rings they's 1716 * assigned to. 1717 */ 1718 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 1719 1720 if (grp->mrg_type == MAC_RING_TYPE_RX && 1721 GROUP_INTR_DISABLE_FUNC(grp) != NULL) { 1722 GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 1723 } 1724 break; 1725 1726 case MAC_GROUP_STATE_SHARED: 1727 /* 1728 * Set all rings of this group to software classified. 1729 * If the group has an overriding interrupt, then re-enable it. 1730 */ 1731 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 1732 1733 if (grp->mrg_type == MAC_RING_TYPE_RX && 1734 GROUP_INTR_ENABLE_FUNC(grp) != NULL) { 1735 GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 1736 } 1737 /* The ring is not available for reservations any more */ 1738 break; 1739 1740 case MAC_GROUP_STATE_REGISTERED: 1741 /* Also callable from mac_register, perim is not held */ 1742 break; 1743 1744 default: 1745 ASSERT(B_FALSE); 1746 break; 1747 } 1748 1749 grp->mrg_state = state; 1750 } 1751 1752 /* 1753 * Quiesce future hardware classified packets for the specified Rx ring 1754 */ 1755 static void 1756 mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) 1757 { 1758 ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); 1759 ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); 1760 1761 mutex_enter(&rx_ring->mr_lock); 1762 rx_ring->mr_flag |= ring_flag; 1763 while (rx_ring->mr_refcnt != 0) 1764 cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); 1765 mutex_exit(&rx_ring->mr_lock); 1766 } 1767 1768 /* 1769 * Please see mac_tx for details about the per cpu locking scheme 1770 */ 1771 static void 1772 mac_tx_lock_all(mac_client_impl_t *mcip) 1773 { 1774 int i; 1775 1776 for (i = 0; i <= mac_tx_percpu_cnt; i++) 1777 mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1778 } 1779 1780 static void 1781 mac_tx_unlock_all(mac_client_impl_t *mcip) 1782 { 1783 int i; 1784 1785 for (i = mac_tx_percpu_cnt; i >= 0; i--) 1786 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1787 } 1788 1789 static void 1790 mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) 1791 { 1792 int i; 1793 1794 for (i = mac_tx_percpu_cnt; i > 0; i--) 1795 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1796 } 1797 1798 static int 1799 mac_tx_sum_refcnt(mac_client_impl_t *mcip) 1800 { 1801 int i; 1802 int refcnt = 0; 1803 1804 for (i = 0; i <= mac_tx_percpu_cnt; i++) 1805 refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; 1806 1807 return (refcnt); 1808 } 1809 1810 /* 1811 * Stop future Tx packets coming down from the client in preparation for 1812 * quiescing the Tx side. This is needed for dynamic reclaim and reassignment 1813 * of rings between clients 1814 */ 1815 void 1816 mac_tx_client_block(mac_client_impl_t *mcip) 1817 { 1818 mac_tx_lock_all(mcip); 1819 mcip->mci_tx_flag |= MCI_TX_QUIESCE; 1820 while (mac_tx_sum_refcnt(mcip) != 0) { 1821 mac_tx_unlock_allbutzero(mcip); 1822 cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); 1823 mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); 1824 mac_tx_lock_all(mcip); 1825 } 1826 mac_tx_unlock_all(mcip); 1827 } 1828 1829 void 1830 mac_tx_client_unblock(mac_client_impl_t *mcip) 1831 { 1832 mac_tx_lock_all(mcip); 1833 mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; 1834 mac_tx_unlock_all(mcip); 1835 /* 1836 * We may fail to disable flow control for the last MAC_NOTE_TX 1837 * notification because the MAC client is quiesced. Send the 1838 * notification again. 1839 */ 1840 i_mac_notify(mcip->mci_mip, MAC_NOTE_TX); 1841 } 1842 1843 /* 1844 * Wait for an SRS to quiesce. The SRS worker will signal us when the 1845 * quiesce is done. 1846 */ 1847 static void 1848 mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) 1849 { 1850 mutex_enter(&srs->srs_lock); 1851 while (!(srs->srs_state & srs_flag)) 1852 cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); 1853 mutex_exit(&srs->srs_lock); 1854 } 1855 1856 /* 1857 * Quiescing an Rx SRS is achieved by the following sequence. The protocol 1858 * works bottom up by cutting off packet flow from the bottommost point in the 1859 * mac, then the SRS, and then the soft rings. There are 2 use cases of this 1860 * mechanism. One is a temporary quiesce of the SRS, such as say while changing 1861 * the Rx callbacks. Another use case is Rx SRS teardown. In the former case 1862 * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used 1863 * for the SRS and MR flags. In the former case the threads pause waiting for 1864 * a restart, while in the latter case the threads exit. The Tx SRS teardown 1865 * is also mostly similar to the above. 1866 * 1867 * 1. Stop future hardware classified packets at the lowest level in the mac. 1868 * Remove any hardware classification rule (CONDEMNED case) and mark the 1869 * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt 1870 * from increasing. Upcalls from the driver that come through hardware 1871 * classification will be dropped in mac_rx from now on. Then we wait for 1872 * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are 1873 * sure there aren't any upcall threads from the driver through hardware 1874 * classification. In the case of SRS teardown we also remove the 1875 * classification rule in the driver. 1876 * 1877 * 2. Stop future software classified packets by marking the flow entry with 1878 * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from 1879 * increasing. We also remove the flow entry from the table in the latter 1880 * case. Then wait for the fe_refcnt to reach an appropriate quiescent value 1881 * that indicates there aren't any active threads using that flow entry. 1882 * 1883 * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, 1884 * SRS worker thread, and the soft ring threads are quiesced in sequence 1885 * with the SRS worker thread serving as a master controller. This 1886 * mechansim is explained in mac_srs_worker_quiesce(). 1887 * 1888 * The restart mechanism to reactivate the SRS and softrings is explained 1889 * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the 1890 * restart sequence. 1891 */ 1892 void 1893 mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 1894 { 1895 flow_entry_t *flent = srs->srs_flent; 1896 uint_t mr_flag, srs_done_flag; 1897 1898 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 1899 ASSERT(!(srs->srs_type & SRST_TX)); 1900 1901 if (srs_quiesce_flag == SRS_CONDEMNED) { 1902 mr_flag = MR_CONDEMNED; 1903 srs_done_flag = SRS_CONDEMNED_DONE; 1904 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 1905 mac_srs_client_poll_disable(srs->srs_mcip, srs); 1906 } else { 1907 ASSERT(srs_quiesce_flag == SRS_QUIESCE); 1908 mr_flag = MR_QUIESCE; 1909 srs_done_flag = SRS_QUIESCE_DONE; 1910 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 1911 mac_srs_client_poll_quiesce(srs->srs_mcip, srs); 1912 } 1913 1914 if (srs->srs_ring != NULL) { 1915 mac_rx_ring_quiesce(srs->srs_ring, mr_flag); 1916 } else { 1917 /* 1918 * SRS is driven by software classification. In case 1919 * of CONDEMNED, the top level teardown functions will 1920 * deal with flow removal. 1921 */ 1922 if (srs_quiesce_flag != SRS_CONDEMNED) { 1923 FLOW_MARK(flent, FE_QUIESCE); 1924 mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 1925 } 1926 } 1927 1928 /* 1929 * Signal the SRS to quiesce itself, and then cv_wait for the 1930 * SRS quiesce to complete. The SRS worker thread will wake us 1931 * up when the quiesce is complete 1932 */ 1933 mac_srs_signal(srs, srs_quiesce_flag); 1934 mac_srs_quiesce_wait(srs, srs_done_flag); 1935 } 1936 1937 /* 1938 * Remove an SRS. 1939 */ 1940 void 1941 mac_rx_srs_remove(mac_soft_ring_set_t *srs) 1942 { 1943 flow_entry_t *flent = srs->srs_flent; 1944 int i; 1945 1946 mac_rx_srs_quiesce(srs, SRS_CONDEMNED); 1947 /* 1948 * Locate and remove our entry in the fe_rx_srs[] array, and 1949 * adjust the fe_rx_srs array entries and array count by 1950 * moving the last entry into the vacated spot. 1951 */ 1952 mutex_enter(&flent->fe_lock); 1953 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 1954 if (flent->fe_rx_srs[i] == srs) 1955 break; 1956 } 1957 1958 ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); 1959 if (i != flent->fe_rx_srs_cnt - 1) { 1960 flent->fe_rx_srs[i] = 1961 flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; 1962 i = flent->fe_rx_srs_cnt - 1; 1963 } 1964 1965 flent->fe_rx_srs[i] = NULL; 1966 flent->fe_rx_srs_cnt--; 1967 mutex_exit(&flent->fe_lock); 1968 1969 mac_srs_free(srs); 1970 } 1971 1972 static void 1973 mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) 1974 { 1975 mutex_enter(&srs->srs_lock); 1976 srs->srs_state &= ~flag; 1977 mutex_exit(&srs->srs_lock); 1978 } 1979 1980 void 1981 mac_rx_srs_restart(mac_soft_ring_set_t *srs) 1982 { 1983 flow_entry_t *flent = srs->srs_flent; 1984 mac_ring_t *mr; 1985 1986 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 1987 ASSERT((srs->srs_type & SRST_TX) == 0); 1988 1989 /* 1990 * This handles a change in the number of SRSs between the quiesce and 1991 * and restart operation of a flow. 1992 */ 1993 if (!SRS_QUIESCED(srs)) 1994 return; 1995 1996 /* 1997 * Signal the SRS to restart itself. Wait for the restart to complete 1998 * Note that we only restart the SRS if it is not marked as 1999 * permanently quiesced. 2000 */ 2001 if (!SRS_QUIESCED_PERMANENT(srs)) { 2002 mac_srs_signal(srs, SRS_RESTART); 2003 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2004 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2005 2006 mac_srs_client_poll_restart(srs->srs_mcip, srs); 2007 } 2008 2009 /* Finally clear the flags to let the packets in */ 2010 mr = srs->srs_ring; 2011 if (mr != NULL) { 2012 MAC_RING_UNMARK(mr, MR_QUIESCE); 2013 /* In case the ring was stopped, safely restart it */ 2014 if (mr->mr_state != MR_INUSE) 2015 (void) mac_start_ring(mr); 2016 } else { 2017 FLOW_UNMARK(flent, FE_QUIESCE); 2018 } 2019 } 2020 2021 /* 2022 * Temporary quiesce of a flow and associated Rx SRS. 2023 * Please see block comment above mac_rx_classify_flow_rem. 2024 */ 2025 /* ARGSUSED */ 2026 int 2027 mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) 2028 { 2029 int i; 2030 2031 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2032 mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], 2033 SRS_QUIESCE); 2034 } 2035 return (0); 2036 } 2037 2038 /* 2039 * Restart a flow and associated Rx SRS that has been quiesced temporarily 2040 * Please see block comment above mac_rx_classify_flow_rem 2041 */ 2042 /* ARGSUSED */ 2043 int 2044 mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) 2045 { 2046 int i; 2047 2048 for (i = 0; i < flent->fe_rx_srs_cnt; i++) 2049 mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); 2050 2051 return (0); 2052 } 2053 2054 void 2055 mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) 2056 { 2057 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2058 flow_entry_t *flent = mcip->mci_flent; 2059 mac_impl_t *mip = mcip->mci_mip; 2060 mac_soft_ring_set_t *mac_srs; 2061 int i; 2062 2063 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2064 2065 if (flent == NULL) 2066 return; 2067 2068 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2069 mac_srs = flent->fe_rx_srs[i]; 2070 mutex_enter(&mac_srs->srs_lock); 2071 if (on) 2072 mac_srs->srs_state |= SRS_QUIESCE_PERM; 2073 else 2074 mac_srs->srs_state &= ~SRS_QUIESCE_PERM; 2075 mutex_exit(&mac_srs->srs_lock); 2076 } 2077 } 2078 2079 void 2080 mac_rx_client_quiesce(mac_client_handle_t mch) 2081 { 2082 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2083 mac_impl_t *mip = mcip->mci_mip; 2084 2085 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2086 2087 if (MCIP_DATAPATH_SETUP(mcip)) { 2088 (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, 2089 NULL); 2090 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2091 mac_rx_classify_flow_quiesce, NULL); 2092 } 2093 } 2094 2095 void 2096 mac_rx_client_restart(mac_client_handle_t mch) 2097 { 2098 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2099 mac_impl_t *mip = mcip->mci_mip; 2100 2101 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2102 2103 if (MCIP_DATAPATH_SETUP(mcip)) { 2104 (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); 2105 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2106 mac_rx_classify_flow_restart, NULL); 2107 } 2108 } 2109 2110 /* 2111 * This function only quiesces the Tx SRS and softring worker threads. Callers 2112 * need to make sure that there aren't any mac client threads doing current or 2113 * future transmits in the mac before calling this function. 2114 */ 2115 void 2116 mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 2117 { 2118 mac_client_impl_t *mcip = srs->srs_mcip; 2119 2120 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2121 2122 ASSERT(srs->srs_type & SRST_TX); 2123 ASSERT(srs_quiesce_flag == SRS_CONDEMNED || 2124 srs_quiesce_flag == SRS_QUIESCE); 2125 2126 /* 2127 * Signal the SRS to quiesce itself, and then cv_wait for the 2128 * SRS quiesce to complete. The SRS worker thread will wake us 2129 * up when the quiesce is complete 2130 */ 2131 mac_srs_signal(srs, srs_quiesce_flag); 2132 mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? 2133 SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); 2134 } 2135 2136 void 2137 mac_tx_srs_restart(mac_soft_ring_set_t *srs) 2138 { 2139 /* 2140 * Resizing the fanout could result in creation of new SRSs. 2141 * They may not necessarily be in the quiesced state in which 2142 * case it need be restarted 2143 */ 2144 if (!SRS_QUIESCED(srs)) 2145 return; 2146 2147 mac_srs_signal(srs, SRS_RESTART); 2148 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2149 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2150 } 2151 2152 /* 2153 * Temporary quiesce of a flow and associated Rx SRS. 2154 * Please see block comment above mac_rx_srs_quiesce 2155 */ 2156 /* ARGSUSED */ 2157 int 2158 mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) 2159 { 2160 /* 2161 * The fe_tx_srs is null for a subflow on an interface that is 2162 * not plumbed 2163 */ 2164 if (flent->fe_tx_srs != NULL) 2165 mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); 2166 return (0); 2167 } 2168 2169 /* ARGSUSED */ 2170 int 2171 mac_tx_flow_restart(flow_entry_t *flent, void *arg) 2172 { 2173 /* 2174 * The fe_tx_srs is null for a subflow on an interface that is 2175 * not plumbed 2176 */ 2177 if (flent->fe_tx_srs != NULL) 2178 mac_tx_srs_restart(flent->fe_tx_srs); 2179 return (0); 2180 } 2181 2182 static void 2183 i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) 2184 { 2185 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2186 2187 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2188 2189 mac_tx_client_block(mcip); 2190 if (MCIP_TX_SRS(mcip) != NULL) { 2191 mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); 2192 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2193 mac_tx_flow_quiesce, NULL); 2194 } 2195 } 2196 2197 void 2198 mac_tx_client_quiesce(mac_client_handle_t mch) 2199 { 2200 i_mac_tx_client_quiesce(mch, SRS_QUIESCE); 2201 } 2202 2203 void 2204 mac_tx_client_condemn(mac_client_handle_t mch) 2205 { 2206 i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); 2207 } 2208 2209 void 2210 mac_tx_client_restart(mac_client_handle_t mch) 2211 { 2212 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2213 2214 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2215 2216 mac_tx_client_unblock(mcip); 2217 if (MCIP_TX_SRS(mcip) != NULL) { 2218 mac_tx_srs_restart(MCIP_TX_SRS(mcip)); 2219 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2220 mac_tx_flow_restart, NULL); 2221 } 2222 } 2223 2224 void 2225 mac_tx_client_flush(mac_client_impl_t *mcip) 2226 { 2227 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2228 2229 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2230 mac_tx_client_restart((mac_client_handle_t)mcip); 2231 } 2232 2233 void 2234 mac_client_quiesce(mac_client_impl_t *mcip) 2235 { 2236 mac_rx_client_quiesce((mac_client_handle_t)mcip); 2237 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2238 } 2239 2240 void 2241 mac_client_restart(mac_client_impl_t *mcip) 2242 { 2243 mac_rx_client_restart((mac_client_handle_t)mcip); 2244 mac_tx_client_restart((mac_client_handle_t)mcip); 2245 } 2246 2247 /* 2248 * Allocate a minor number. 2249 */ 2250 minor_t 2251 mac_minor_hold(boolean_t sleep) 2252 { 2253 minor_t minor; 2254 2255 /* 2256 * Grab a value from the arena. 2257 */ 2258 atomic_add_32(&minor_count, 1); 2259 2260 if (sleep) 2261 minor = (uint_t)id_alloc(minor_ids); 2262 else 2263 minor = (uint_t)id_alloc_nosleep(minor_ids); 2264 2265 if (minor == 0) { 2266 atomic_add_32(&minor_count, -1); 2267 return (0); 2268 } 2269 2270 return (minor); 2271 } 2272 2273 /* 2274 * Release a previously allocated minor number. 2275 */ 2276 void 2277 mac_minor_rele(minor_t minor) 2278 { 2279 /* 2280 * Return the value to the arena. 2281 */ 2282 id_free(minor_ids, minor); 2283 atomic_add_32(&minor_count, -1); 2284 } 2285 2286 uint32_t 2287 mac_no_notification(mac_handle_t mh) 2288 { 2289 mac_impl_t *mip = (mac_impl_t *)mh; 2290 2291 return (((mip->mi_state_flags & MIS_LEGACY) != 0) ? 2292 mip->mi_capab_legacy.ml_unsup_note : 0); 2293 } 2294 2295 /* 2296 * Prevent any new opens of this mac in preparation for unregister 2297 */ 2298 int 2299 i_mac_disable(mac_impl_t *mip) 2300 { 2301 mac_client_impl_t *mcip; 2302 2303 rw_enter(&i_mac_impl_lock, RW_WRITER); 2304 if (mip->mi_state_flags & MIS_DISABLED) { 2305 /* Already disabled, return success */ 2306 rw_exit(&i_mac_impl_lock); 2307 return (0); 2308 } 2309 /* 2310 * See if there are any other references to this mac_t (e.g., VLAN's). 2311 * If so return failure. If all the other checks below pass, then 2312 * set mi_disabled atomically under the i_mac_impl_lock to prevent 2313 * any new VLAN's from being created or new mac client opens of this 2314 * mac end point. 2315 */ 2316 if (mip->mi_ref > 0) { 2317 rw_exit(&i_mac_impl_lock); 2318 return (EBUSY); 2319 } 2320 2321 /* 2322 * mac clients must delete all multicast groups they join before 2323 * closing. bcast groups are reference counted, the last client 2324 * to delete the group will wait till the group is physically 2325 * deleted. Since all clients have closed this mac end point 2326 * mi_bcast_ngrps must be zero at this point 2327 */ 2328 ASSERT(mip->mi_bcast_ngrps == 0); 2329 2330 /* 2331 * Don't let go of this if it has some flows. 2332 * All other code guarantees no flows are added to a disabled 2333 * mac, therefore it is sufficient to check for the flow table 2334 * only here. 2335 */ 2336 mcip = mac_primary_client_handle(mip); 2337 if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { 2338 rw_exit(&i_mac_impl_lock); 2339 return (ENOTEMPTY); 2340 } 2341 2342 mip->mi_state_flags |= MIS_DISABLED; 2343 rw_exit(&i_mac_impl_lock); 2344 return (0); 2345 } 2346 2347 int 2348 mac_disable_nowait(mac_handle_t mh) 2349 { 2350 mac_impl_t *mip = (mac_impl_t *)mh; 2351 int err; 2352 2353 if ((err = i_mac_perim_enter_nowait(mip)) != 0) 2354 return (err); 2355 err = i_mac_disable(mip); 2356 i_mac_perim_exit(mip); 2357 return (err); 2358 } 2359 2360 int 2361 mac_disable(mac_handle_t mh) 2362 { 2363 mac_impl_t *mip = (mac_impl_t *)mh; 2364 int err; 2365 2366 i_mac_perim_enter(mip); 2367 err = i_mac_disable(mip); 2368 i_mac_perim_exit(mip); 2369 2370 /* 2371 * Clean up notification thread and wait for it to exit. 2372 */ 2373 if (err == 0) 2374 i_mac_notify_exit(mip); 2375 2376 return (err); 2377 } 2378 2379 /* 2380 * Called when the MAC instance has a non empty flow table, to de-multiplex 2381 * incoming packets to the right flow. 2382 * The MAC's rw lock is assumed held as a READER. 2383 */ 2384 /* ARGSUSED */ 2385 static mblk_t * 2386 mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) 2387 { 2388 flow_entry_t *flent = NULL; 2389 uint_t flags = FLOW_INBOUND; 2390 int err; 2391 2392 /* 2393 * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN 2394 * to mac_flow_lookup() so that the VLAN packets can be successfully 2395 * passed to the non-VLAN aggregation flows. 2396 * 2397 * Note that there is possibly a race between this and 2398 * mac_unicast_remove/add() and VLAN packets could be incorrectly 2399 * classified to non-VLAN flows of non-aggregation mac clients. These 2400 * VLAN packets will be then filtered out by the mac module. 2401 */ 2402 if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) 2403 flags |= FLOW_IGNORE_VLAN; 2404 2405 err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); 2406 if (err != 0) { 2407 /* no registered receive function */ 2408 return (mp); 2409 } else { 2410 mac_client_impl_t *mcip; 2411 2412 /* 2413 * This flent might just be an additional one on the MAC client, 2414 * i.e. for classification purposes (different fdesc), however 2415 * the resources, SRS et. al., are in the mci_flent, so if 2416 * this isn't the mci_flent, we need to get it. 2417 */ 2418 if ((mcip = flent->fe_mcip) != NULL && 2419 mcip->mci_flent != flent) { 2420 FLOW_REFRELE(flent); 2421 flent = mcip->mci_flent; 2422 FLOW_TRY_REFHOLD(flent, err); 2423 if (err != 0) 2424 return (mp); 2425 } 2426 (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, 2427 B_FALSE); 2428 FLOW_REFRELE(flent); 2429 } 2430 return (NULL); 2431 } 2432 2433 mblk_t * 2434 mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 2435 { 2436 mac_impl_t *mip = (mac_impl_t *)mh; 2437 mblk_t *bp, *bp1, **bpp, *list = NULL; 2438 2439 /* 2440 * We walk the chain and attempt to classify each packet. 2441 * The packets that couldn't be classified will be returned 2442 * back to the caller. 2443 */ 2444 bp = mp_chain; 2445 bpp = &list; 2446 while (bp != NULL) { 2447 bp1 = bp; 2448 bp = bp->b_next; 2449 bp1->b_next = NULL; 2450 2451 if (mac_rx_classify(mip, mrh, bp1) != NULL) { 2452 *bpp = bp1; 2453 bpp = &bp1->b_next; 2454 } 2455 } 2456 return (list); 2457 } 2458 2459 static int 2460 mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) 2461 { 2462 mac_ring_handle_t ring = arg; 2463 2464 if (flent->fe_tx_srs) 2465 mac_tx_srs_wakeup(flent->fe_tx_srs, ring); 2466 return (0); 2467 } 2468 2469 void 2470 i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) 2471 { 2472 mac_client_impl_t *cclient; 2473 mac_soft_ring_set_t *mac_srs; 2474 2475 /* 2476 * After grabbing the mi_rw_lock, the list of clients can't change. 2477 * If there are any clients mi_disabled must be B_FALSE and can't 2478 * get set since there are clients. If there aren't any clients we 2479 * don't do anything. In any case the mip has to be valid. The driver 2480 * must make sure that it goes single threaded (with respect to mac 2481 * calls) and wait for all pending mac calls to finish before calling 2482 * mac_unregister. 2483 */ 2484 rw_enter(&i_mac_impl_lock, RW_READER); 2485 if (mip->mi_state_flags & MIS_DISABLED) { 2486 rw_exit(&i_mac_impl_lock); 2487 return; 2488 } 2489 2490 /* 2491 * Get MAC tx srs from walking mac_client_handle list. 2492 */ 2493 rw_enter(&mip->mi_rw_lock, RW_READER); 2494 for (cclient = mip->mi_clients_list; cclient != NULL; 2495 cclient = cclient->mci_client_next) { 2496 if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { 2497 mac_tx_srs_wakeup(mac_srs, ring); 2498 } else { 2499 /* 2500 * Aggr opens underlying ports in exclusive mode 2501 * and registers flow control callbacks using 2502 * mac_tx_client_notify(). When opened in 2503 * exclusive mode, Tx SRS won't be created 2504 * during mac_unicast_add(). 2505 */ 2506 if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { 2507 mac_tx_invoke_callbacks(cclient, 2508 (mac_tx_cookie_t)ring); 2509 } 2510 } 2511 (void) mac_flow_walk(cclient->mci_subflow_tab, 2512 mac_tx_flow_srs_wakeup, ring); 2513 } 2514 rw_exit(&mip->mi_rw_lock); 2515 rw_exit(&i_mac_impl_lock); 2516 } 2517 2518 /* ARGSUSED */ 2519 void 2520 mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, 2521 boolean_t add) 2522 { 2523 mac_impl_t *mip = (mac_impl_t *)mh; 2524 2525 i_mac_perim_enter((mac_impl_t *)mh); 2526 /* 2527 * If no specific refresh function was given then default to the 2528 * driver's m_multicst entry point. 2529 */ 2530 if (refresh == NULL) { 2531 refresh = mip->mi_multicst; 2532 arg = mip->mi_driver; 2533 } 2534 2535 mac_bcast_refresh(mip, refresh, arg, add); 2536 i_mac_perim_exit((mac_impl_t *)mh); 2537 } 2538 2539 void 2540 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) 2541 { 2542 mac_impl_t *mip = (mac_impl_t *)mh; 2543 2544 /* 2545 * If no specific refresh function was given then default to the 2546 * driver's m_promisc entry point. 2547 */ 2548 if (refresh == NULL) { 2549 refresh = mip->mi_setpromisc; 2550 arg = mip->mi_driver; 2551 } 2552 ASSERT(refresh != NULL); 2553 2554 /* 2555 * Call the refresh function with the current promiscuity. 2556 */ 2557 refresh(arg, (mip->mi_devpromisc != 0)); 2558 } 2559 2560 /* 2561 * The mac client requests that the mac not to change its margin size to 2562 * be less than the specified value. If "current" is B_TRUE, then the client 2563 * requests the mac not to change its margin size to be smaller than the 2564 * current size. Further, return the current margin size value in this case. 2565 * 2566 * We keep every requested size in an ordered list from largest to smallest. 2567 */ 2568 int 2569 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) 2570 { 2571 mac_impl_t *mip = (mac_impl_t *)mh; 2572 mac_margin_req_t **pp, *p; 2573 int err = 0; 2574 2575 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2576 if (current) 2577 *marginp = mip->mi_margin; 2578 2579 /* 2580 * If the current margin value cannot satisfy the margin requested, 2581 * return ENOTSUP directly. 2582 */ 2583 if (*marginp > mip->mi_margin) { 2584 err = ENOTSUP; 2585 goto done; 2586 } 2587 2588 /* 2589 * Check whether the given margin is already in the list. If so, 2590 * bump the reference count. 2591 */ 2592 for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { 2593 if (p->mmr_margin == *marginp) { 2594 /* 2595 * The margin requested is already in the list, 2596 * so just bump the reference count. 2597 */ 2598 p->mmr_ref++; 2599 goto done; 2600 } 2601 if (p->mmr_margin < *marginp) 2602 break; 2603 } 2604 2605 2606 p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); 2607 p->mmr_margin = *marginp; 2608 p->mmr_ref++; 2609 p->mmr_nextp = *pp; 2610 *pp = p; 2611 2612 done: 2613 rw_exit(&(mip->mi_rw_lock)); 2614 return (err); 2615 } 2616 2617 /* 2618 * The mac client requests to cancel its previous mac_margin_add() request. 2619 * We remove the requested margin size from the list. 2620 */ 2621 int 2622 mac_margin_remove(mac_handle_t mh, uint32_t margin) 2623 { 2624 mac_impl_t *mip = (mac_impl_t *)mh; 2625 mac_margin_req_t **pp, *p; 2626 int err = 0; 2627 2628 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2629 /* 2630 * Find the entry in the list for the given margin. 2631 */ 2632 for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { 2633 if (p->mmr_margin == margin) { 2634 if (--p->mmr_ref == 0) 2635 break; 2636 2637 /* 2638 * There is still a reference to this address so 2639 * there's nothing more to do. 2640 */ 2641 goto done; 2642 } 2643 } 2644 2645 /* 2646 * We did not find an entry for the given margin. 2647 */ 2648 if (p == NULL) { 2649 err = ENOENT; 2650 goto done; 2651 } 2652 2653 ASSERT(p->mmr_ref == 0); 2654 2655 /* 2656 * Remove it from the list. 2657 */ 2658 *pp = p->mmr_nextp; 2659 kmem_free(p, sizeof (mac_margin_req_t)); 2660 done: 2661 rw_exit(&(mip->mi_rw_lock)); 2662 return (err); 2663 } 2664 2665 boolean_t 2666 mac_margin_update(mac_handle_t mh, uint32_t margin) 2667 { 2668 mac_impl_t *mip = (mac_impl_t *)mh; 2669 uint32_t margin_needed = 0; 2670 2671 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2672 2673 if (mip->mi_mmrp != NULL) 2674 margin_needed = mip->mi_mmrp->mmr_margin; 2675 2676 if (margin_needed <= margin) 2677 mip->mi_margin = margin; 2678 2679 rw_exit(&(mip->mi_rw_lock)); 2680 2681 if (margin_needed <= margin) 2682 i_mac_notify(mip, MAC_NOTE_MARGIN); 2683 2684 return (margin_needed <= margin); 2685 } 2686 2687 /* 2688 * MAC Type Plugin functions. 2689 */ 2690 2691 mactype_t * 2692 mactype_getplugin(const char *pname) 2693 { 2694 mactype_t *mtype = NULL; 2695 boolean_t tried_modload = B_FALSE; 2696 2697 mutex_enter(&i_mactype_lock); 2698 2699 find_registered_mactype: 2700 if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, 2701 (mod_hash_val_t *)&mtype) != 0) { 2702 if (!tried_modload) { 2703 /* 2704 * If the plugin has not yet been loaded, then 2705 * attempt to load it now. If modload() succeeds, 2706 * the plugin should have registered using 2707 * mactype_register(), in which case we can go back 2708 * and attempt to find it again. 2709 */ 2710 if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { 2711 tried_modload = B_TRUE; 2712 goto find_registered_mactype; 2713 } 2714 } 2715 } else { 2716 /* 2717 * Note that there's no danger that the plugin we've loaded 2718 * could be unloaded between the modload() step and the 2719 * reference count bump here, as we're holding 2720 * i_mactype_lock, which mactype_unregister() also holds. 2721 */ 2722 atomic_inc_32(&mtype->mt_ref); 2723 } 2724 2725 mutex_exit(&i_mactype_lock); 2726 return (mtype); 2727 } 2728 2729 mactype_register_t * 2730 mactype_alloc(uint_t mactype_version) 2731 { 2732 mactype_register_t *mtrp; 2733 2734 /* 2735 * Make sure there isn't a version mismatch between the plugin and 2736 * the framework. In the future, if multiple versions are 2737 * supported, this check could become more sophisticated. 2738 */ 2739 if (mactype_version != MACTYPE_VERSION) 2740 return (NULL); 2741 2742 mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); 2743 mtrp->mtr_version = mactype_version; 2744 return (mtrp); 2745 } 2746 2747 void 2748 mactype_free(mactype_register_t *mtrp) 2749 { 2750 kmem_free(mtrp, sizeof (mactype_register_t)); 2751 } 2752 2753 int 2754 mactype_register(mactype_register_t *mtrp) 2755 { 2756 mactype_t *mtp; 2757 mactype_ops_t *ops = mtrp->mtr_ops; 2758 2759 /* Do some sanity checking before we register this MAC type. */ 2760 if (mtrp->mtr_ident == NULL || ops == NULL) 2761 return (EINVAL); 2762 2763 /* 2764 * Verify that all mandatory callbacks are set in the ops 2765 * vector. 2766 */ 2767 if (ops->mtops_unicst_verify == NULL || 2768 ops->mtops_multicst_verify == NULL || 2769 ops->mtops_sap_verify == NULL || 2770 ops->mtops_header == NULL || 2771 ops->mtops_header_info == NULL) { 2772 return (EINVAL); 2773 } 2774 2775 mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); 2776 mtp->mt_ident = mtrp->mtr_ident; 2777 mtp->mt_ops = *ops; 2778 mtp->mt_type = mtrp->mtr_mactype; 2779 mtp->mt_nativetype = mtrp->mtr_nativetype; 2780 mtp->mt_addr_length = mtrp->mtr_addrlen; 2781 if (mtrp->mtr_brdcst_addr != NULL) { 2782 mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); 2783 bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, 2784 mtrp->mtr_addrlen); 2785 } 2786 2787 mtp->mt_stats = mtrp->mtr_stats; 2788 mtp->mt_statcount = mtrp->mtr_statcount; 2789 2790 mtp->mt_mapping = mtrp->mtr_mapping; 2791 mtp->mt_mappingcount = mtrp->mtr_mappingcount; 2792 2793 if (mod_hash_insert(i_mactype_hash, 2794 (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { 2795 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 2796 kmem_free(mtp, sizeof (*mtp)); 2797 return (EEXIST); 2798 } 2799 return (0); 2800 } 2801 2802 int 2803 mactype_unregister(const char *ident) 2804 { 2805 mactype_t *mtp; 2806 mod_hash_val_t val; 2807 int err; 2808 2809 /* 2810 * Let's not allow MAC drivers to use this plugin while we're 2811 * trying to unregister it. Holding i_mactype_lock also prevents a 2812 * plugin from unregistering while a MAC driver is attempting to 2813 * hold a reference to it in i_mactype_getplugin(). 2814 */ 2815 mutex_enter(&i_mactype_lock); 2816 2817 if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, 2818 (mod_hash_val_t *)&mtp)) != 0) { 2819 /* A plugin is trying to unregister, but it never registered. */ 2820 err = ENXIO; 2821 goto done; 2822 } 2823 2824 if (mtp->mt_ref != 0) { 2825 err = EBUSY; 2826 goto done; 2827 } 2828 2829 err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); 2830 ASSERT(err == 0); 2831 if (err != 0) { 2832 /* This should never happen, thus the ASSERT() above. */ 2833 err = EINVAL; 2834 goto done; 2835 } 2836 ASSERT(mtp == (mactype_t *)val); 2837 2838 if (mtp->mt_brdcst_addr != NULL) 2839 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 2840 kmem_free(mtp, sizeof (mactype_t)); 2841 done: 2842 mutex_exit(&i_mactype_lock); 2843 return (err); 2844 } 2845 2846 /* 2847 * Checks the size of the value size specified for a property as 2848 * part of a property operation. Returns B_TRUE if the size is 2849 * correct, B_FALSE otherwise. 2850 */ 2851 boolean_t 2852 mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range) 2853 { 2854 uint_t minsize = 0; 2855 2856 if (is_range) 2857 return (valsize >= sizeof (mac_propval_range_t)); 2858 2859 switch (id) { 2860 case MAC_PROP_ZONE: 2861 minsize = sizeof (dld_ioc_zid_t); 2862 break; 2863 case MAC_PROP_AUTOPUSH: 2864 if (valsize != 0) 2865 minsize = sizeof (struct dlautopush); 2866 break; 2867 case MAC_PROP_TAGMODE: 2868 minsize = sizeof (link_tagmode_t); 2869 break; 2870 case MAC_PROP_RESOURCE: 2871 case MAC_PROP_RESOURCE_EFF: 2872 minsize = sizeof (mac_resource_props_t); 2873 break; 2874 case MAC_PROP_DUPLEX: 2875 minsize = sizeof (link_duplex_t); 2876 break; 2877 case MAC_PROP_SPEED: 2878 minsize = sizeof (uint64_t); 2879 break; 2880 case MAC_PROP_STATUS: 2881 minsize = sizeof (link_state_t); 2882 break; 2883 case MAC_PROP_AUTONEG: 2884 case MAC_PROP_EN_AUTONEG: 2885 minsize = sizeof (uint8_t); 2886 break; 2887 case MAC_PROP_MTU: 2888 case MAC_PROP_LLIMIT: 2889 case MAC_PROP_LDECAY: 2890 minsize = sizeof (uint32_t); 2891 break; 2892 case MAC_PROP_FLOWCTRL: 2893 minsize = sizeof (link_flowctrl_t); 2894 break; 2895 case MAC_PROP_ADV_10GFDX_CAP: 2896 case MAC_PROP_EN_10GFDX_CAP: 2897 case MAC_PROP_ADV_1000HDX_CAP: 2898 case MAC_PROP_EN_1000HDX_CAP: 2899 case MAC_PROP_ADV_100FDX_CAP: 2900 case MAC_PROP_EN_100FDX_CAP: 2901 case MAC_PROP_ADV_100HDX_CAP: 2902 case MAC_PROP_EN_100HDX_CAP: 2903 case MAC_PROP_ADV_10FDX_CAP: 2904 case MAC_PROP_EN_10FDX_CAP: 2905 case MAC_PROP_ADV_10HDX_CAP: 2906 case MAC_PROP_EN_10HDX_CAP: 2907 case MAC_PROP_ADV_100T4_CAP: 2908 case MAC_PROP_EN_100T4_CAP: 2909 minsize = sizeof (uint8_t); 2910 break; 2911 case MAC_PROP_PVID: 2912 minsize = sizeof (uint16_t); 2913 break; 2914 case MAC_PROP_IPTUN_HOPLIMIT: 2915 minsize = sizeof (uint32_t); 2916 break; 2917 case MAC_PROP_IPTUN_ENCAPLIMIT: 2918 minsize = sizeof (uint32_t); 2919 break; 2920 case MAC_PROP_MAX_TX_RINGS_AVAIL: 2921 case MAC_PROP_MAX_RX_RINGS_AVAIL: 2922 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 2923 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 2924 minsize = sizeof (uint_t); 2925 break; 2926 case MAC_PROP_WL_ESSID: 2927 minsize = sizeof (wl_linkstatus_t); 2928 break; 2929 case MAC_PROP_WL_BSSID: 2930 minsize = sizeof (wl_bssid_t); 2931 break; 2932 case MAC_PROP_WL_BSSTYPE: 2933 minsize = sizeof (wl_bss_type_t); 2934 break; 2935 case MAC_PROP_WL_LINKSTATUS: 2936 minsize = sizeof (wl_linkstatus_t); 2937 break; 2938 case MAC_PROP_WL_DESIRED_RATES: 2939 minsize = sizeof (wl_rates_t); 2940 break; 2941 case MAC_PROP_WL_SUPPORTED_RATES: 2942 minsize = sizeof (wl_rates_t); 2943 break; 2944 case MAC_PROP_WL_AUTH_MODE: 2945 minsize = sizeof (wl_authmode_t); 2946 break; 2947 case MAC_PROP_WL_ENCRYPTION: 2948 minsize = sizeof (wl_encryption_t); 2949 break; 2950 case MAC_PROP_WL_RSSI: 2951 minsize = sizeof (wl_rssi_t); 2952 break; 2953 case MAC_PROP_WL_PHY_CONFIG: 2954 minsize = sizeof (wl_phy_conf_t); 2955 break; 2956 case MAC_PROP_WL_CAPABILITY: 2957 minsize = sizeof (wl_capability_t); 2958 break; 2959 case MAC_PROP_WL_WPA: 2960 minsize = sizeof (wl_wpa_t); 2961 break; 2962 case MAC_PROP_WL_SCANRESULTS: 2963 minsize = sizeof (wl_wpa_ess_t); 2964 break; 2965 case MAC_PROP_WL_POWER_MODE: 2966 minsize = sizeof (wl_ps_mode_t); 2967 break; 2968 case MAC_PROP_WL_RADIO: 2969 minsize = sizeof (wl_radio_t); 2970 break; 2971 case MAC_PROP_WL_ESS_LIST: 2972 minsize = sizeof (wl_ess_list_t); 2973 break; 2974 case MAC_PROP_WL_KEY_TAB: 2975 minsize = sizeof (wl_wep_key_tab_t); 2976 break; 2977 case MAC_PROP_WL_CREATE_IBSS: 2978 minsize = sizeof (wl_create_ibss_t); 2979 break; 2980 case MAC_PROP_WL_SETOPTIE: 2981 minsize = sizeof (wl_wpa_ie_t); 2982 break; 2983 case MAC_PROP_WL_DELKEY: 2984 minsize = sizeof (wl_del_key_t); 2985 break; 2986 case MAC_PROP_WL_KEY: 2987 minsize = sizeof (wl_key_t); 2988 break; 2989 case MAC_PROP_WL_MLME: 2990 minsize = sizeof (wl_mlme_t); 2991 break; 2992 } 2993 2994 return (valsize >= minsize); 2995 } 2996 2997 /* 2998 * mac_set_prop() sets MAC or hardware driver properties: 2999 * 3000 * - MAC-managed properties such as resource properties include maxbw, 3001 * priority, and cpu binding list, as well as the default port VID 3002 * used by bridging. These properties are consumed by the MAC layer 3003 * itself and not passed down to the driver. For resource control 3004 * properties, this function invokes mac_set_resources() which will 3005 * cache the property value in mac_impl_t and may call 3006 * mac_client_set_resource() to update property value of the primary 3007 * mac client, if it exists. 3008 * 3009 * - Properties which act on the hardware and must be passed to the 3010 * driver, such as MTU, through the driver's mc_setprop() entry point. 3011 */ 3012 int 3013 mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3014 uint_t valsize) 3015 { 3016 int err = ENOTSUP; 3017 mac_impl_t *mip = (mac_impl_t *)mh; 3018 3019 ASSERT(MAC_PERIM_HELD(mh)); 3020 3021 switch (id) { 3022 case MAC_PROP_RESOURCE: { 3023 mac_resource_props_t *mrp; 3024 3025 /* call mac_set_resources() for MAC properties */ 3026 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3027 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3028 bcopy(val, mrp, sizeof (*mrp)); 3029 err = mac_set_resources(mh, mrp); 3030 kmem_free(mrp, sizeof (*mrp)); 3031 break; 3032 } 3033 3034 case MAC_PROP_PVID: 3035 ASSERT(valsize >= sizeof (uint16_t)); 3036 if (mip->mi_state_flags & MIS_IS_VNIC) 3037 return (EINVAL); 3038 err = mac_set_pvid(mh, *(uint16_t *)val); 3039 break; 3040 3041 case MAC_PROP_MTU: { 3042 uint32_t mtu; 3043 3044 ASSERT(valsize >= sizeof (uint32_t)); 3045 bcopy(val, &mtu, sizeof (mtu)); 3046 err = mac_set_mtu(mh, mtu, NULL); 3047 break; 3048 } 3049 3050 case MAC_PROP_LLIMIT: 3051 case MAC_PROP_LDECAY: { 3052 uint32_t learnval; 3053 3054 if (valsize < sizeof (learnval) || 3055 (mip->mi_state_flags & MIS_IS_VNIC)) 3056 return (EINVAL); 3057 bcopy(val, &learnval, sizeof (learnval)); 3058 if (learnval == 0 && id == MAC_PROP_LDECAY) 3059 return (EINVAL); 3060 if (id == MAC_PROP_LLIMIT) 3061 mip->mi_llimit = learnval; 3062 else 3063 mip->mi_ldecay = learnval; 3064 err = 0; 3065 break; 3066 } 3067 3068 default: 3069 /* For other driver properties, call driver's callback */ 3070 if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 3071 err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 3072 name, id, valsize, val); 3073 } 3074 } 3075 return (err); 3076 } 3077 3078 /* 3079 * mac_get_prop() gets MAC or device driver properties. 3080 * 3081 * If the property is a driver property, mac_get_prop() calls driver's callback 3082 * entry point to get it. 3083 * If the property is a MAC property, mac_get_prop() invokes mac_get_resources() 3084 * which returns the cached value in mac_impl_t. 3085 */ 3086 int 3087 mac_get_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3088 uint_t valsize) 3089 { 3090 int err = ENOTSUP; 3091 mac_impl_t *mip = (mac_impl_t *)mh; 3092 uint_t rings; 3093 uint_t vlinks; 3094 3095 bzero(val, valsize); 3096 3097 switch (id) { 3098 case MAC_PROP_RESOURCE: { 3099 mac_resource_props_t *mrp; 3100 3101 /* If mac property, read from cache */ 3102 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3103 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3104 mac_get_resources(mh, mrp); 3105 bcopy(mrp, val, sizeof (*mrp)); 3106 kmem_free(mrp, sizeof (*mrp)); 3107 return (0); 3108 } 3109 case MAC_PROP_RESOURCE_EFF: { 3110 mac_resource_props_t *mrp; 3111 3112 /* If mac effective property, read from client */ 3113 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3114 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3115 mac_get_effective_resources(mh, mrp); 3116 bcopy(mrp, val, sizeof (*mrp)); 3117 kmem_free(mrp, sizeof (*mrp)); 3118 return (0); 3119 } 3120 3121 case MAC_PROP_PVID: 3122 ASSERT(valsize >= sizeof (uint16_t)); 3123 if (mip->mi_state_flags & MIS_IS_VNIC) 3124 return (EINVAL); 3125 *(uint16_t *)val = mac_get_pvid(mh); 3126 return (0); 3127 3128 case MAC_PROP_LLIMIT: 3129 case MAC_PROP_LDECAY: 3130 ASSERT(valsize >= sizeof (uint32_t)); 3131 if (mip->mi_state_flags & MIS_IS_VNIC) 3132 return (EINVAL); 3133 if (id == MAC_PROP_LLIMIT) 3134 bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit)); 3135 else 3136 bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay)); 3137 return (0); 3138 3139 case MAC_PROP_MTU: { 3140 uint32_t sdu; 3141 3142 ASSERT(valsize >= sizeof (uint32_t)); 3143 mac_sdu_get2(mh, NULL, &sdu, NULL); 3144 bcopy(&sdu, val, sizeof (sdu)); 3145 3146 return (0); 3147 } 3148 case MAC_PROP_STATUS: { 3149 link_state_t link_state; 3150 3151 if (valsize < sizeof (link_state)) 3152 return (EINVAL); 3153 link_state = mac_link_get(mh); 3154 bcopy(&link_state, val, sizeof (link_state)); 3155 3156 return (0); 3157 } 3158 3159 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3160 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3161 ASSERT(valsize >= sizeof (uint_t)); 3162 rings = id == MAC_PROP_MAX_RX_RINGS_AVAIL ? 3163 mac_rxavail_get(mh) : mac_txavail_get(mh); 3164 bcopy(&rings, val, sizeof (uint_t)); 3165 return (0); 3166 3167 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3168 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3169 ASSERT(valsize >= sizeof (uint_t)); 3170 vlinks = id == MAC_PROP_MAX_RXHWCLNT_AVAIL ? 3171 mac_rxhwlnksavail_get(mh) : mac_txhwlnksavail_get(mh); 3172 bcopy(&vlinks, val, sizeof (uint_t)); 3173 return (0); 3174 3175 case MAC_PROP_RXRINGSRANGE: 3176 case MAC_PROP_TXRINGSRANGE: 3177 /* 3178 * The value for these properties are returned through 3179 * the MAC_PROP_RESOURCE property. 3180 */ 3181 return (0); 3182 3183 default: 3184 break; 3185 3186 } 3187 3188 /* If driver property, request from driver */ 3189 if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { 3190 err = mip->mi_callbacks->mc_getprop(mip->mi_driver, name, id, 3191 valsize, val); 3192 } 3193 3194 return (err); 3195 } 3196 3197 /* 3198 * Helper function to initialize the range structure for use in 3199 * mac_get_prop. If the type can be other than uint32, we can 3200 * pass that as an arg. 3201 */ 3202 static void 3203 _mac_set_range(mac_propval_range_t *range, uint32_t min, uint32_t max) 3204 { 3205 range->mpr_count = 1; 3206 range->mpr_type = MAC_PROPVAL_UINT32; 3207 range->mpr_range_uint32[0].mpur_min = min; 3208 range->mpr_range_uint32[0].mpur_max = max; 3209 } 3210 3211 /* 3212 * Returns information about the specified property, such as default 3213 * values or permissions. 3214 */ 3215 int 3216 mac_prop_info(mac_handle_t mh, mac_prop_id_t id, char *name, 3217 void *default_val, uint_t default_size, mac_propval_range_t *range, 3218 uint_t *perm) 3219 { 3220 mac_prop_info_state_t state; 3221 mac_impl_t *mip = (mac_impl_t *)mh; 3222 uint_t max; 3223 3224 /* 3225 * A property is read/write by default unless the driver says 3226 * otherwise. 3227 */ 3228 if (perm != NULL) 3229 *perm = MAC_PROP_PERM_RW; 3230 3231 if (default_val != NULL) 3232 bzero(default_val, default_size); 3233 3234 /* 3235 * First, handle framework properties for which we don't need to 3236 * involve the driver. 3237 */ 3238 switch (id) { 3239 case MAC_PROP_RESOURCE: 3240 case MAC_PROP_PVID: 3241 case MAC_PROP_LLIMIT: 3242 case MAC_PROP_LDECAY: 3243 return (0); 3244 3245 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3246 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3247 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3248 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3249 if (perm != NULL) 3250 *perm = MAC_PROP_PERM_READ; 3251 return (0); 3252 3253 case MAC_PROP_RXRINGSRANGE: 3254 case MAC_PROP_TXRINGSRANGE: 3255 /* 3256 * Currently, we support range for RX and TX rings properties. 3257 * When we extend this support to maxbw, cpus and priority, 3258 * we should move this to mac_get_resources. 3259 * There is no default value for RX or TX rings. 3260 */ 3261 if ((mip->mi_state_flags & MIS_IS_VNIC) && 3262 mac_is_vnic_primary(mh)) { 3263 /* 3264 * We don't support setting rings for a VLAN 3265 * data link because it shares its ring with the 3266 * primary MAC client. 3267 */ 3268 if (perm != NULL) 3269 *perm = MAC_PROP_PERM_READ; 3270 if (range != NULL) 3271 range->mpr_count = 0; 3272 } else if (range != NULL) { 3273 if (mip->mi_state_flags & MIS_IS_VNIC) 3274 mh = mac_get_lower_mac_handle(mh); 3275 mip = (mac_impl_t *)mh; 3276 if ((id == MAC_PROP_RXRINGSRANGE && 3277 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) || 3278 (id == MAC_PROP_TXRINGSRANGE && 3279 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC)) { 3280 if (id == MAC_PROP_RXRINGSRANGE) { 3281 if ((mac_rxhwlnksavail_get(mh) + 3282 mac_rxhwlnksrsvd_get(mh)) <= 1) { 3283 /* 3284 * doesn't support groups or 3285 * rings 3286 */ 3287 range->mpr_count = 0; 3288 } else { 3289 /* 3290 * supports specifying groups, 3291 * but not rings 3292 */ 3293 _mac_set_range(range, 0, 0); 3294 } 3295 } else { 3296 if ((mac_txhwlnksavail_get(mh) + 3297 mac_txhwlnksrsvd_get(mh)) <= 1) { 3298 /* 3299 * doesn't support groups or 3300 * rings 3301 */ 3302 range->mpr_count = 0; 3303 } else { 3304 /* 3305 * supports specifying groups, 3306 * but not rings 3307 */ 3308 _mac_set_range(range, 0, 0); 3309 } 3310 } 3311 } else { 3312 max = id == MAC_PROP_RXRINGSRANGE ? 3313 mac_rxavail_get(mh) + mac_rxrsvd_get(mh) : 3314 mac_txavail_get(mh) + mac_txrsvd_get(mh); 3315 if (max <= 1) { 3316 /* 3317 * doesn't support groups or 3318 * rings 3319 */ 3320 range->mpr_count = 0; 3321 } else { 3322 /* 3323 * -1 because we have to leave out the 3324 * default ring. 3325 */ 3326 _mac_set_range(range, 1, max - 1); 3327 } 3328 } 3329 } 3330 return (0); 3331 3332 case MAC_PROP_STATUS: 3333 if (perm != NULL) 3334 *perm = MAC_PROP_PERM_READ; 3335 return (0); 3336 } 3337 3338 /* 3339 * Get the property info from the driver if it implements the 3340 * property info entry point. 3341 */ 3342 bzero(&state, sizeof (state)); 3343 3344 if (mip->mi_callbacks->mc_callbacks & MC_PROPINFO) { 3345 state.pr_default = default_val; 3346 state.pr_default_size = default_size; 3347 3348 /* 3349 * The caller specifies the maximum number of ranges 3350 * it can accomodate using mpr_count. We don't touch 3351 * this value until the driver returns from its 3352 * mc_propinfo() callback, and ensure we don't exceed 3353 * this number of range as the driver defines 3354 * supported range from its mc_propinfo(). 3355 * 3356 * pr_range_cur_count keeps track of how many ranges 3357 * were defined by the driver from its mc_propinfo() 3358 * entry point. 3359 * 3360 * On exit, the user-specified range mpr_count returns 3361 * the number of ranges specified by the driver on 3362 * success, or the number of ranges it wanted to 3363 * define if that number of ranges could not be 3364 * accomodated by the specified range structure. In 3365 * the latter case, the caller will be able to 3366 * allocate a larger range structure, and query the 3367 * property again. 3368 */ 3369 state.pr_range_cur_count = 0; 3370 state.pr_range = range; 3371 3372 mip->mi_callbacks->mc_propinfo(mip->mi_driver, name, id, 3373 (mac_prop_info_handle_t)&state); 3374 3375 if (state.pr_flags & MAC_PROP_INFO_RANGE) 3376 range->mpr_count = state.pr_range_cur_count; 3377 3378 /* 3379 * The operation could fail if the buffer supplied by 3380 * the user was too small for the range or default 3381 * value of the property. 3382 */ 3383 if (state.pr_errno != 0) 3384 return (state.pr_errno); 3385 3386 if (perm != NULL && state.pr_flags & MAC_PROP_INFO_PERM) 3387 *perm = state.pr_perm; 3388 } 3389 3390 /* 3391 * The MAC layer may want to provide default values or allowed 3392 * ranges for properties if the driver does not provide a 3393 * property info entry point, or that entry point exists, but 3394 * it did not provide a default value or allowed ranges for 3395 * that property. 3396 */ 3397 switch (id) { 3398 case MAC_PROP_MTU: { 3399 uint32_t sdu; 3400 3401 mac_sdu_get2(mh, NULL, &sdu, NULL); 3402 3403 if (range != NULL && !(state.pr_flags & 3404 MAC_PROP_INFO_RANGE)) { 3405 /* MTU range */ 3406 _mac_set_range(range, sdu, sdu); 3407 } 3408 3409 if (default_val != NULL && !(state.pr_flags & 3410 MAC_PROP_INFO_DEFAULT)) { 3411 if (mip->mi_info.mi_media == DL_ETHER) 3412 sdu = ETHERMTU; 3413 /* default MTU value */ 3414 bcopy(&sdu, default_val, sizeof (sdu)); 3415 } 3416 } 3417 } 3418 3419 return (0); 3420 } 3421 3422 int 3423 mac_fastpath_disable(mac_handle_t mh) 3424 { 3425 mac_impl_t *mip = (mac_impl_t *)mh; 3426 3427 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3428 return (0); 3429 3430 return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver)); 3431 } 3432 3433 void 3434 mac_fastpath_enable(mac_handle_t mh) 3435 { 3436 mac_impl_t *mip = (mac_impl_t *)mh; 3437 3438 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3439 return; 3440 3441 mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver); 3442 } 3443 3444 void 3445 mac_register_priv_prop(mac_impl_t *mip, char **priv_props) 3446 { 3447 uint_t nprops, i; 3448 3449 if (priv_props == NULL) 3450 return; 3451 3452 nprops = 0; 3453 while (priv_props[nprops] != NULL) 3454 nprops++; 3455 if (nprops == 0) 3456 return; 3457 3458 3459 mip->mi_priv_prop = kmem_zalloc(nprops * sizeof (char *), KM_SLEEP); 3460 3461 for (i = 0; i < nprops; i++) { 3462 mip->mi_priv_prop[i] = kmem_zalloc(MAXLINKPROPNAME, KM_SLEEP); 3463 (void) strlcpy(mip->mi_priv_prop[i], priv_props[i], 3464 MAXLINKPROPNAME); 3465 } 3466 3467 mip->mi_priv_prop_count = nprops; 3468 } 3469 3470 void 3471 mac_unregister_priv_prop(mac_impl_t *mip) 3472 { 3473 uint_t i; 3474 3475 if (mip->mi_priv_prop_count == 0) { 3476 ASSERT(mip->mi_priv_prop == NULL); 3477 return; 3478 } 3479 3480 for (i = 0; i < mip->mi_priv_prop_count; i++) 3481 kmem_free(mip->mi_priv_prop[i], MAXLINKPROPNAME); 3482 kmem_free(mip->mi_priv_prop, mip->mi_priv_prop_count * 3483 sizeof (char *)); 3484 3485 mip->mi_priv_prop = NULL; 3486 mip->mi_priv_prop_count = 0; 3487 } 3488 3489 /* 3490 * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure 3491 * (by invoking mac_rx()) even after processing mac_stop_ring(). In such 3492 * cases if MAC free's the ring structure after mac_stop_ring(), any 3493 * illegal access to the ring structure coming from the driver will panic 3494 * the system. In order to protect the system from such inadverent access, 3495 * we maintain a cache of rings in the mac_impl_t after they get free'd up. 3496 * When packets are received on free'd up rings, MAC (through the generation 3497 * count mechanism) will drop such packets. 3498 */ 3499 static mac_ring_t * 3500 mac_ring_alloc(mac_impl_t *mip) 3501 { 3502 mac_ring_t *ring; 3503 3504 mutex_enter(&mip->mi_ring_lock); 3505 if (mip->mi_ring_freelist != NULL) { 3506 ring = mip->mi_ring_freelist; 3507 mip->mi_ring_freelist = ring->mr_next; 3508 bzero(ring, sizeof (mac_ring_t)); 3509 mutex_exit(&mip->mi_ring_lock); 3510 } else { 3511 mutex_exit(&mip->mi_ring_lock); 3512 ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); 3513 } 3514 ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); 3515 return (ring); 3516 } 3517 3518 static void 3519 mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) 3520 { 3521 ASSERT(ring->mr_state == MR_FREE); 3522 3523 mutex_enter(&mip->mi_ring_lock); 3524 ring->mr_state = MR_FREE; 3525 ring->mr_flag = 0; 3526 ring->mr_next = mip->mi_ring_freelist; 3527 ring->mr_mip = NULL; 3528 mip->mi_ring_freelist = ring; 3529 mac_ring_stat_delete(ring); 3530 mutex_exit(&mip->mi_ring_lock); 3531 } 3532 3533 static void 3534 mac_ring_freeall(mac_impl_t *mip) 3535 { 3536 mac_ring_t *ring_next; 3537 mutex_enter(&mip->mi_ring_lock); 3538 mac_ring_t *ring = mip->mi_ring_freelist; 3539 while (ring != NULL) { 3540 ring_next = ring->mr_next; 3541 kmem_cache_free(mac_ring_cache, ring); 3542 ring = ring_next; 3543 } 3544 mip->mi_ring_freelist = NULL; 3545 mutex_exit(&mip->mi_ring_lock); 3546 } 3547 3548 int 3549 mac_start_ring(mac_ring_t *ring) 3550 { 3551 int rv = 0; 3552 3553 ASSERT(ring->mr_state == MR_FREE); 3554 3555 if (ring->mr_start != NULL) { 3556 rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); 3557 if (rv != 0) 3558 return (rv); 3559 } 3560 3561 ring->mr_state = MR_INUSE; 3562 return (rv); 3563 } 3564 3565 void 3566 mac_stop_ring(mac_ring_t *ring) 3567 { 3568 ASSERT(ring->mr_state == MR_INUSE); 3569 3570 if (ring->mr_stop != NULL) 3571 ring->mr_stop(ring->mr_driver); 3572 3573 ring->mr_state = MR_FREE; 3574 3575 /* 3576 * Increment the ring generation number for this ring. 3577 */ 3578 ring->mr_gen_num++; 3579 } 3580 3581 int 3582 mac_start_group(mac_group_t *group) 3583 { 3584 int rv = 0; 3585 3586 if (group->mrg_start != NULL) 3587 rv = group->mrg_start(group->mrg_driver); 3588 3589 return (rv); 3590 } 3591 3592 void 3593 mac_stop_group(mac_group_t *group) 3594 { 3595 if (group->mrg_stop != NULL) 3596 group->mrg_stop(group->mrg_driver); 3597 } 3598 3599 /* 3600 * Called from mac_start() on the default Rx group. Broadcast and multicast 3601 * packets are received only on the default group. Hence the default group 3602 * needs to be up even if the primary client is not up, for the other groups 3603 * to be functional. We do this by calling this function at mac_start time 3604 * itself. However the broadcast packets that are received can't make their 3605 * way beyond mac_rx until a mac client creates a broadcast flow. 3606 */ 3607 static int 3608 mac_start_group_and_rings(mac_group_t *group) 3609 { 3610 mac_ring_t *ring; 3611 int rv = 0; 3612 3613 ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED); 3614 if ((rv = mac_start_group(group)) != 0) 3615 return (rv); 3616 3617 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 3618 ASSERT(ring->mr_state == MR_FREE); 3619 if ((rv = mac_start_ring(ring)) != 0) 3620 goto error; 3621 ring->mr_classify_type = MAC_SW_CLASSIFIER; 3622 } 3623 return (0); 3624 3625 error: 3626 mac_stop_group_and_rings(group); 3627 return (rv); 3628 } 3629 3630 /* Called from mac_stop on the default Rx group */ 3631 static void 3632 mac_stop_group_and_rings(mac_group_t *group) 3633 { 3634 mac_ring_t *ring; 3635 3636 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 3637 if (ring->mr_state != MR_FREE) { 3638 mac_stop_ring(ring); 3639 ring->mr_flag = 0; 3640 ring->mr_classify_type = MAC_NO_CLASSIFIER; 3641 } 3642 } 3643 mac_stop_group(group); 3644 } 3645 3646 3647 static mac_ring_t * 3648 mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, 3649 mac_capab_rings_t *cap_rings) 3650 { 3651 mac_ring_t *ring, *rnext; 3652 mac_ring_info_t ring_info; 3653 ddi_intr_handle_t ddi_handle; 3654 3655 ring = mac_ring_alloc(mip); 3656 3657 /* Prepare basic information of ring */ 3658 3659 /* 3660 * Ring index is numbered to be unique across a particular device. 3661 * Ring index computation makes following assumptions: 3662 * - For drivers with static grouping (e.g. ixgbe, bge), 3663 * ring index exchanged with the driver (e.g. during mr_rget) 3664 * is unique only across the group the ring belongs to. 3665 * - Drivers with dynamic grouping (e.g. nxge), start 3666 * with single group (mrg_index = 0). 3667 */ 3668 ring->mr_index = group->mrg_index * group->mrg_info.mgi_count + index; 3669 ring->mr_type = group->mrg_type; 3670 ring->mr_gh = (mac_group_handle_t)group; 3671 3672 /* Insert the new ring to the list. */ 3673 ring->mr_next = group->mrg_rings; 3674 group->mrg_rings = ring; 3675 3676 /* Zero to reuse the info data structure */ 3677 bzero(&ring_info, sizeof (ring_info)); 3678 3679 /* Query ring information from driver */ 3680 cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index, 3681 index, &ring_info, (mac_ring_handle_t)ring); 3682 3683 ring->mr_info = ring_info; 3684 3685 /* 3686 * The interrupt handle could be shared among multiple rings. 3687 * Thus if there is a bunch of rings that are sharing an 3688 * interrupt, then only one ring among the bunch will be made 3689 * available for interrupt re-targeting; the rest will have 3690 * ddi_shared flag set to TRUE and would not be available for 3691 * be interrupt re-targeting. 3692 */ 3693 if ((ddi_handle = ring_info.mri_intr.mi_ddi_handle) != NULL) { 3694 rnext = ring->mr_next; 3695 while (rnext != NULL) { 3696 if (rnext->mr_info.mri_intr.mi_ddi_handle == 3697 ddi_handle) { 3698 /* 3699 * If default ring (mr_index == 0) is part 3700 * of a group of rings sharing an 3701 * interrupt, then set ddi_shared flag for 3702 * the default ring and give another ring 3703 * the chance to be re-targeted. 3704 */ 3705 if (rnext->mr_index == 0 && 3706 !rnext->mr_info.mri_intr.mi_ddi_shared) { 3707 rnext->mr_info.mri_intr.mi_ddi_shared = 3708 B_TRUE; 3709 } else { 3710 ring->mr_info.mri_intr.mi_ddi_shared = 3711 B_TRUE; 3712 } 3713 break; 3714 } 3715 rnext = rnext->mr_next; 3716 } 3717 /* 3718 * If rnext is NULL, then no matching ddi_handle was found. 3719 * Rx rings get registered first. So if this is a Tx ring, 3720 * then go through all the Rx rings and see if there is a 3721 * matching ddi handle. 3722 */ 3723 if (rnext == NULL && ring->mr_type == MAC_RING_TYPE_TX) { 3724 mac_compare_ddi_handle(mip->mi_rx_groups, 3725 mip->mi_rx_group_count, ring); 3726 } 3727 } 3728 3729 /* Update ring's status */ 3730 ring->mr_state = MR_FREE; 3731 ring->mr_flag = 0; 3732 3733 /* Update the ring count of the group */ 3734 group->mrg_cur_count++; 3735 3736 /* Create per ring kstats */ 3737 if (ring->mr_stat != NULL) { 3738 ring->mr_mip = mip; 3739 mac_ring_stat_create(ring); 3740 } 3741 3742 return (ring); 3743 } 3744 3745 /* 3746 * Rings are chained together for easy regrouping. 3747 */ 3748 static void 3749 mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, 3750 mac_capab_rings_t *cap_rings) 3751 { 3752 int index; 3753 3754 /* 3755 * Initialize all ring members of this group. Size of zero will not 3756 * enter the loop, so it's safe for initializing an empty group. 3757 */ 3758 for (index = size - 1; index >= 0; index--) 3759 (void) mac_init_ring(mip, group, index, cap_rings); 3760 } 3761 3762 int 3763 mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) 3764 { 3765 mac_capab_rings_t *cap_rings; 3766 mac_group_t *group; 3767 mac_group_t *groups; 3768 mac_group_info_t group_info; 3769 uint_t group_free = 0; 3770 uint_t ring_left; 3771 mac_ring_t *ring; 3772 int g; 3773 int err = 0; 3774 uint_t grpcnt; 3775 boolean_t pseudo_txgrp = B_FALSE; 3776 3777 switch (rtype) { 3778 case MAC_RING_TYPE_RX: 3779 ASSERT(mip->mi_rx_groups == NULL); 3780 3781 cap_rings = &mip->mi_rx_rings_cap; 3782 cap_rings->mr_type = MAC_RING_TYPE_RX; 3783 break; 3784 case MAC_RING_TYPE_TX: 3785 ASSERT(mip->mi_tx_groups == NULL); 3786 3787 cap_rings = &mip->mi_tx_rings_cap; 3788 cap_rings->mr_type = MAC_RING_TYPE_TX; 3789 break; 3790 default: 3791 ASSERT(B_FALSE); 3792 } 3793 3794 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, cap_rings)) 3795 return (0); 3796 grpcnt = cap_rings->mr_gnum; 3797 3798 /* 3799 * If we have multiple TX rings, but only one TX group, we can 3800 * create pseudo TX groups (one per TX ring) in the MAC layer, 3801 * except for an aggr. For an aggr currently we maintain only 3802 * one group with all the rings (for all its ports), going 3803 * forwards we might change this. 3804 */ 3805 if (rtype == MAC_RING_TYPE_TX && 3806 cap_rings->mr_gnum == 0 && cap_rings->mr_rnum > 0 && 3807 (mip->mi_state_flags & MIS_IS_AGGR) == 0) { 3808 /* 3809 * The -1 here is because we create a default TX group 3810 * with all the rings in it. 3811 */ 3812 grpcnt = cap_rings->mr_rnum - 1; 3813 pseudo_txgrp = B_TRUE; 3814 } 3815 3816 /* 3817 * Allocate a contiguous buffer for all groups. 3818 */ 3819 groups = kmem_zalloc(sizeof (mac_group_t) * (grpcnt+ 1), KM_SLEEP); 3820 3821 ring_left = cap_rings->mr_rnum; 3822 3823 /* 3824 * Get all ring groups if any, and get their ring members 3825 * if any. 3826 */ 3827 for (g = 0; g < grpcnt; g++) { 3828 group = groups + g; 3829 3830 /* Prepare basic information of the group */ 3831 group->mrg_index = g; 3832 group->mrg_type = rtype; 3833 group->mrg_state = MAC_GROUP_STATE_UNINIT; 3834 group->mrg_mh = (mac_handle_t)mip; 3835 group->mrg_next = group + 1; 3836 3837 /* Zero to reuse the info data structure */ 3838 bzero(&group_info, sizeof (group_info)); 3839 3840 if (pseudo_txgrp) { 3841 /* 3842 * This is a pseudo group that we created, apart 3843 * from setting the state there is nothing to be 3844 * done. 3845 */ 3846 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 3847 group_free++; 3848 continue; 3849 } 3850 /* Query group information from driver */ 3851 cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, 3852 (mac_group_handle_t)group); 3853 3854 switch (cap_rings->mr_group_type) { 3855 case MAC_GROUP_TYPE_DYNAMIC: 3856 if (cap_rings->mr_gaddring == NULL || 3857 cap_rings->mr_gremring == NULL) { 3858 DTRACE_PROBE3( 3859 mac__init__rings_no_addremring, 3860 char *, mip->mi_name, 3861 mac_group_add_ring_t, 3862 cap_rings->mr_gaddring, 3863 mac_group_add_ring_t, 3864 cap_rings->mr_gremring); 3865 err = EINVAL; 3866 goto bail; 3867 } 3868 3869 switch (rtype) { 3870 case MAC_RING_TYPE_RX: 3871 /* 3872 * The first RX group must have non-zero 3873 * rings, and the following groups must 3874 * have zero rings. 3875 */ 3876 if (g == 0 && group_info.mgi_count == 0) { 3877 DTRACE_PROBE1( 3878 mac__init__rings__rx__def__zero, 3879 char *, mip->mi_name); 3880 err = EINVAL; 3881 goto bail; 3882 } 3883 if (g > 0 && group_info.mgi_count != 0) { 3884 DTRACE_PROBE3( 3885 mac__init__rings__rx__nonzero, 3886 char *, mip->mi_name, 3887 int, g, int, group_info.mgi_count); 3888 err = EINVAL; 3889 goto bail; 3890 } 3891 break; 3892 case MAC_RING_TYPE_TX: 3893 /* 3894 * All TX ring groups must have zero rings. 3895 */ 3896 if (group_info.mgi_count != 0) { 3897 DTRACE_PROBE3( 3898 mac__init__rings__tx__nonzero, 3899 char *, mip->mi_name, 3900 int, g, int, group_info.mgi_count); 3901 err = EINVAL; 3902 goto bail; 3903 } 3904 break; 3905 } 3906 break; 3907 case MAC_GROUP_TYPE_STATIC: 3908 /* 3909 * Note that an empty group is allowed, e.g., an aggr 3910 * would start with an empty group. 3911 */ 3912 break; 3913 default: 3914 /* unknown group type */ 3915 DTRACE_PROBE2(mac__init__rings__unknown__type, 3916 char *, mip->mi_name, 3917 int, cap_rings->mr_group_type); 3918 err = EINVAL; 3919 goto bail; 3920 } 3921 3922 3923 /* 3924 * Driver must register group->mgi_addmac/remmac() for rx groups 3925 * to support multiple MAC addresses. 3926 */ 3927 if (rtype == MAC_RING_TYPE_RX) { 3928 if ((group_info.mgi_addmac == NULL) || 3929 (group_info.mgi_addmac == NULL)) { 3930 goto bail; 3931 } 3932 } 3933 3934 /* Cache driver-supplied information */ 3935 group->mrg_info = group_info; 3936 3937 /* Update the group's status and group count. */ 3938 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 3939 group_free++; 3940 3941 group->mrg_rings = NULL; 3942 group->mrg_cur_count = 0; 3943 mac_init_group(mip, group, group_info.mgi_count, cap_rings); 3944 ring_left -= group_info.mgi_count; 3945 3946 /* The current group size should be equal to default value */ 3947 ASSERT(group->mrg_cur_count == group_info.mgi_count); 3948 } 3949 3950 /* Build up a dummy group for free resources as a pool */ 3951 group = groups + grpcnt; 3952 3953 /* Prepare basic information of the group */ 3954 group->mrg_index = -1; 3955 group->mrg_type = rtype; 3956 group->mrg_state = MAC_GROUP_STATE_UNINIT; 3957 group->mrg_mh = (mac_handle_t)mip; 3958 group->mrg_next = NULL; 3959 3960 /* 3961 * If there are ungrouped rings, allocate a continuous buffer for 3962 * remaining resources. 3963 */ 3964 if (ring_left != 0) { 3965 group->mrg_rings = NULL; 3966 group->mrg_cur_count = 0; 3967 mac_init_group(mip, group, ring_left, cap_rings); 3968 3969 /* The current group size should be equal to ring_left */ 3970 ASSERT(group->mrg_cur_count == ring_left); 3971 3972 ring_left = 0; 3973 3974 /* Update this group's status */ 3975 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 3976 } else 3977 group->mrg_rings = NULL; 3978 3979 ASSERT(ring_left == 0); 3980 3981 bail: 3982 3983 /* Cache other important information to finalize the initialization */ 3984 switch (rtype) { 3985 case MAC_RING_TYPE_RX: 3986 mip->mi_rx_group_type = cap_rings->mr_group_type; 3987 mip->mi_rx_group_count = cap_rings->mr_gnum; 3988 mip->mi_rx_groups = groups; 3989 mip->mi_rx_donor_grp = groups; 3990 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 3991 /* 3992 * The default ring is reserved since it is 3993 * used for sending the broadcast etc. packets. 3994 */ 3995 mip->mi_rxrings_avail = 3996 mip->mi_rx_groups->mrg_cur_count - 1; 3997 mip->mi_rxrings_rsvd = 1; 3998 } 3999 /* 4000 * The default group cannot be reserved. It is used by 4001 * all the clients that do not have an exclusive group. 4002 */ 4003 mip->mi_rxhwclnt_avail = mip->mi_rx_group_count - 1; 4004 mip->mi_rxhwclnt_used = 1; 4005 break; 4006 case MAC_RING_TYPE_TX: 4007 mip->mi_tx_group_type = pseudo_txgrp ? MAC_GROUP_TYPE_DYNAMIC : 4008 cap_rings->mr_group_type; 4009 mip->mi_tx_group_count = grpcnt; 4010 mip->mi_tx_group_free = group_free; 4011 mip->mi_tx_groups = groups; 4012 4013 group = groups + grpcnt; 4014 ring = group->mrg_rings; 4015 /* 4016 * The ring can be NULL in the case of aggr. Aggr will 4017 * have an empty Tx group which will get populated 4018 * later when pseudo Tx rings are added after 4019 * mac_register() is done. 4020 */ 4021 if (ring == NULL) { 4022 ASSERT(mip->mi_state_flags & MIS_IS_AGGR); 4023 /* 4024 * pass the group to aggr so it can add Tx 4025 * rings to the group later. 4026 */ 4027 cap_rings->mr_gget(mip->mi_driver, rtype, 0, NULL, 4028 (mac_group_handle_t)group); 4029 /* 4030 * Even though there are no rings at this time 4031 * (rings will come later), set the group 4032 * state to registered. 4033 */ 4034 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 4035 } else { 4036 /* 4037 * Ring 0 is used as the default one and it could be 4038 * assigned to a client as well. 4039 */ 4040 while ((ring->mr_index != 0) && (ring->mr_next != NULL)) 4041 ring = ring->mr_next; 4042 ASSERT(ring->mr_index == 0); 4043 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4044 } 4045 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) 4046 mip->mi_txrings_avail = group->mrg_cur_count - 1; 4047 /* 4048 * The default ring cannot be reserved. 4049 */ 4050 mip->mi_txrings_rsvd = 1; 4051 /* 4052 * The default group cannot be reserved. It will be shared 4053 * by clients that do not have an exclusive group. 4054 */ 4055 mip->mi_txhwclnt_avail = mip->mi_tx_group_count; 4056 mip->mi_txhwclnt_used = 1; 4057 break; 4058 default: 4059 ASSERT(B_FALSE); 4060 } 4061 4062 if (err != 0) 4063 mac_free_rings(mip, rtype); 4064 4065 return (err); 4066 } 4067 4068 /* 4069 * The ddi interrupt handle could be shared amoung rings. If so, compare 4070 * the new ring's ddi handle with the existing ones and set ddi_shared 4071 * flag. 4072 */ 4073 void 4074 mac_compare_ddi_handle(mac_group_t *groups, uint_t grpcnt, mac_ring_t *cring) 4075 { 4076 mac_group_t *group; 4077 mac_ring_t *ring; 4078 ddi_intr_handle_t ddi_handle; 4079 int g; 4080 4081 ddi_handle = cring->mr_info.mri_intr.mi_ddi_handle; 4082 for (g = 0; g < grpcnt; g++) { 4083 group = groups + g; 4084 for (ring = group->mrg_rings; ring != NULL; 4085 ring = ring->mr_next) { 4086 if (ring == cring) 4087 continue; 4088 if (ring->mr_info.mri_intr.mi_ddi_handle == 4089 ddi_handle) { 4090 if (cring->mr_type == MAC_RING_TYPE_RX && 4091 ring->mr_index == 0 && 4092 !ring->mr_info.mri_intr.mi_ddi_shared) { 4093 ring->mr_info.mri_intr.mi_ddi_shared = 4094 B_TRUE; 4095 } else { 4096 cring->mr_info.mri_intr.mi_ddi_shared = 4097 B_TRUE; 4098 } 4099 return; 4100 } 4101 } 4102 } 4103 } 4104 4105 /* 4106 * Called to free all groups of particular type (RX or TX). It's assumed that 4107 * no clients are using these groups. 4108 */ 4109 void 4110 mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) 4111 { 4112 mac_group_t *group, *groups; 4113 uint_t group_count; 4114 4115 switch (rtype) { 4116 case MAC_RING_TYPE_RX: 4117 if (mip->mi_rx_groups == NULL) 4118 return; 4119 4120 groups = mip->mi_rx_groups; 4121 group_count = mip->mi_rx_group_count; 4122 4123 mip->mi_rx_groups = NULL; 4124 mip->mi_rx_donor_grp = NULL; 4125 mip->mi_rx_group_count = 0; 4126 break; 4127 case MAC_RING_TYPE_TX: 4128 ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free); 4129 4130 if (mip->mi_tx_groups == NULL) 4131 return; 4132 4133 groups = mip->mi_tx_groups; 4134 group_count = mip->mi_tx_group_count; 4135 4136 mip->mi_tx_groups = NULL; 4137 mip->mi_tx_group_count = 0; 4138 mip->mi_tx_group_free = 0; 4139 mip->mi_default_tx_ring = NULL; 4140 break; 4141 default: 4142 ASSERT(B_FALSE); 4143 } 4144 4145 for (group = groups; group != NULL; group = group->mrg_next) { 4146 mac_ring_t *ring; 4147 4148 if (group->mrg_cur_count == 0) 4149 continue; 4150 4151 ASSERT(group->mrg_rings != NULL); 4152 4153 while ((ring = group->mrg_rings) != NULL) { 4154 group->mrg_rings = ring->mr_next; 4155 mac_ring_free(mip, ring); 4156 } 4157 } 4158 4159 /* Free all the cached rings */ 4160 mac_ring_freeall(mip); 4161 /* Free the block of group data strutures */ 4162 kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); 4163 } 4164 4165 /* 4166 * Associate a MAC address with a receive group. 4167 * 4168 * The return value of this function should always be checked properly, because 4169 * any type of failure could cause unexpected results. A group can be added 4170 * or removed with a MAC address only after it has been reserved. Ideally, 4171 * a successful reservation always leads to calling mac_group_addmac() to 4172 * steer desired traffic. Failure of adding an unicast MAC address doesn't 4173 * always imply that the group is functioning abnormally. 4174 * 4175 * Currently this function is called everywhere, and it reflects assumptions 4176 * about MAC addresses in the implementation. CR 6735196. 4177 */ 4178 int 4179 mac_group_addmac(mac_group_t *group, const uint8_t *addr) 4180 { 4181 ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 4182 ASSERT(group->mrg_info.mgi_addmac != NULL); 4183 4184 return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); 4185 } 4186 4187 /* 4188 * Remove the association between MAC address and receive group. 4189 */ 4190 int 4191 mac_group_remmac(mac_group_t *group, const uint8_t *addr) 4192 { 4193 ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 4194 ASSERT(group->mrg_info.mgi_remmac != NULL); 4195 4196 return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); 4197 } 4198 4199 /* 4200 * This is the entry point for packets transmitted through the bridging code. 4201 * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh' 4202 * pointer may be NULL to select the default ring. 4203 */ 4204 mblk_t * 4205 mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) 4206 { 4207 mac_handle_t mh; 4208 4209 /* 4210 * Once we take a reference on the bridge link, the bridge 4211 * module itself can't unload, so the callback pointers are 4212 * stable. 4213 */ 4214 mutex_enter(&mip->mi_bridge_lock); 4215 if ((mh = mip->mi_bridge_link) != NULL) 4216 mac_bridge_ref_cb(mh, B_TRUE); 4217 mutex_exit(&mip->mi_bridge_lock); 4218 if (mh == NULL) { 4219 MAC_RING_TX(mip, rh, mp, mp); 4220 } else { 4221 mp = mac_bridge_tx_cb(mh, rh, mp); 4222 mac_bridge_ref_cb(mh, B_FALSE); 4223 } 4224 4225 return (mp); 4226 } 4227 4228 /* 4229 * Find a ring from its index. 4230 */ 4231 mac_ring_handle_t 4232 mac_find_ring(mac_group_handle_t gh, int index) 4233 { 4234 mac_group_t *group = (mac_group_t *)gh; 4235 mac_ring_t *ring = group->mrg_rings; 4236 4237 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) 4238 if (ring->mr_index == index) 4239 break; 4240 4241 return ((mac_ring_handle_t)ring); 4242 } 4243 /* 4244 * Add a ring to an existing group. 4245 * 4246 * The ring must be either passed directly (for example if the ring 4247 * movement is initiated by the framework), or specified through a driver 4248 * index (for example when the ring is added by the driver. 4249 * 4250 * The caller needs to call mac_perim_enter() before calling this function. 4251 */ 4252 int 4253 i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) 4254 { 4255 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4256 mac_capab_rings_t *cap_rings; 4257 boolean_t driver_call = (ring == NULL); 4258 mac_group_type_t group_type; 4259 int ret = 0; 4260 flow_entry_t *flent; 4261 4262 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4263 4264 switch (group->mrg_type) { 4265 case MAC_RING_TYPE_RX: 4266 cap_rings = &mip->mi_rx_rings_cap; 4267 group_type = mip->mi_rx_group_type; 4268 break; 4269 case MAC_RING_TYPE_TX: 4270 cap_rings = &mip->mi_tx_rings_cap; 4271 group_type = mip->mi_tx_group_type; 4272 break; 4273 default: 4274 ASSERT(B_FALSE); 4275 } 4276 4277 /* 4278 * There should be no ring with the same ring index in the target 4279 * group. 4280 */ 4281 ASSERT(mac_find_ring((mac_group_handle_t)group, 4282 driver_call ? index : ring->mr_index) == NULL); 4283 4284 if (driver_call) { 4285 /* 4286 * The function is called as a result of a request from 4287 * a driver to add a ring to an existing group, for example 4288 * from the aggregation driver. Allocate a new mac_ring_t 4289 * for that ring. 4290 */ 4291 ring = mac_init_ring(mip, group, index, cap_rings); 4292 ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT); 4293 } else { 4294 /* 4295 * The function is called as a result of a MAC layer request 4296 * to add a ring to an existing group. In this case the 4297 * ring is being moved between groups, which requires 4298 * the underlying driver to support dynamic grouping, 4299 * and the mac_ring_t already exists. 4300 */ 4301 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4302 ASSERT(group->mrg_driver == NULL || 4303 cap_rings->mr_gaddring != NULL); 4304 ASSERT(ring->mr_gh == NULL); 4305 } 4306 4307 /* 4308 * At this point the ring should not be in use, and it should be 4309 * of the right for the target group. 4310 */ 4311 ASSERT(ring->mr_state < MR_INUSE); 4312 ASSERT(ring->mr_srs == NULL); 4313 ASSERT(ring->mr_type == group->mrg_type); 4314 4315 if (!driver_call) { 4316 /* 4317 * Add the driver level hardware ring if the process was not 4318 * initiated by the driver, and the target group is not the 4319 * group. 4320 */ 4321 if (group->mrg_driver != NULL) { 4322 cap_rings->mr_gaddring(group->mrg_driver, 4323 ring->mr_driver, ring->mr_type); 4324 } 4325 4326 /* 4327 * Insert the ring ahead existing rings. 4328 */ 4329 ring->mr_next = group->mrg_rings; 4330 group->mrg_rings = ring; 4331 ring->mr_gh = (mac_group_handle_t)group; 4332 group->mrg_cur_count++; 4333 } 4334 4335 /* 4336 * If the group has not been actively used, we're done. 4337 */ 4338 if (group->mrg_index != -1 && 4339 group->mrg_state < MAC_GROUP_STATE_RESERVED) 4340 return (0); 4341 4342 /* 4343 * Start the ring if needed. Failure causes to undo the grouping action. 4344 */ 4345 if (ring->mr_state != MR_INUSE) { 4346 if ((ret = mac_start_ring(ring)) != 0) { 4347 if (!driver_call) { 4348 cap_rings->mr_gremring(group->mrg_driver, 4349 ring->mr_driver, ring->mr_type); 4350 } 4351 group->mrg_cur_count--; 4352 group->mrg_rings = ring->mr_next; 4353 4354 ring->mr_gh = NULL; 4355 4356 if (driver_call) 4357 mac_ring_free(mip, ring); 4358 4359 return (ret); 4360 } 4361 } 4362 4363 /* 4364 * Set up SRS/SR according to the ring type. 4365 */ 4366 switch (ring->mr_type) { 4367 case MAC_RING_TYPE_RX: 4368 /* 4369 * Setup SRS on top of the new ring if the group is 4370 * reserved for someones exclusive use. 4371 */ 4372 if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { 4373 mac_client_impl_t *mcip; 4374 4375 mcip = MAC_GROUP_ONLY_CLIENT(group); 4376 /* 4377 * Even though this group is reserved we migth still 4378 * have multiple clients, i.e a VLAN shares the 4379 * group with the primary mac client. 4380 */ 4381 if (mcip != NULL) { 4382 flent = mcip->mci_flent; 4383 ASSERT(flent->fe_rx_srs_cnt > 0); 4384 mac_rx_srs_group_setup(mcip, flent, SRST_LINK); 4385 mac_fanout_setup(mcip, flent, 4386 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, 4387 mcip, NULL, NULL); 4388 } else { 4389 ring->mr_classify_type = MAC_SW_CLASSIFIER; 4390 } 4391 } 4392 break; 4393 case MAC_RING_TYPE_TX: 4394 { 4395 mac_grp_client_t *mgcp = group->mrg_clients; 4396 mac_client_impl_t *mcip; 4397 mac_soft_ring_set_t *mac_srs; 4398 mac_srs_tx_t *tx; 4399 4400 if (MAC_GROUP_NO_CLIENT(group)) { 4401 if (ring->mr_state == MR_INUSE) 4402 mac_stop_ring(ring); 4403 ring->mr_flag = 0; 4404 break; 4405 } 4406 /* 4407 * If the rings are being moved to a group that has 4408 * clients using it, then add the new rings to the 4409 * clients SRS. 4410 */ 4411 while (mgcp != NULL) { 4412 boolean_t is_aggr; 4413 4414 mcip = mgcp->mgc_client; 4415 flent = mcip->mci_flent; 4416 is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); 4417 mac_srs = MCIP_TX_SRS(mcip); 4418 tx = &mac_srs->srs_tx; 4419 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4420 /* 4421 * If we are growing from 1 to multiple rings. 4422 */ 4423 if (tx->st_mode == SRS_TX_BW || 4424 tx->st_mode == SRS_TX_SERIALIZE || 4425 tx->st_mode == SRS_TX_DEFAULT) { 4426 mac_ring_t *tx_ring = tx->st_arg2; 4427 4428 tx->st_arg2 = NULL; 4429 mac_tx_srs_stat_recreate(mac_srs, B_TRUE); 4430 mac_tx_srs_add_ring(mac_srs, tx_ring); 4431 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4432 tx->st_mode = is_aggr ? SRS_TX_BW_AGGR : 4433 SRS_TX_BW_FANOUT; 4434 } else { 4435 tx->st_mode = is_aggr ? SRS_TX_AGGR : 4436 SRS_TX_FANOUT; 4437 } 4438 tx->st_func = mac_tx_get_func(tx->st_mode); 4439 } 4440 mac_tx_srs_add_ring(mac_srs, ring); 4441 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 4442 mac_rx_deliver, mcip, NULL, NULL); 4443 mac_tx_client_restart((mac_client_handle_t)mcip); 4444 mgcp = mgcp->mgc_next; 4445 } 4446 break; 4447 } 4448 default: 4449 ASSERT(B_FALSE); 4450 } 4451 /* 4452 * For aggr, the default ring will be NULL to begin with. If it 4453 * is NULL, then pick the first ring that gets added as the 4454 * default ring. Any ring in an aggregation can be removed at 4455 * any time (by the user action of removing a link) and if the 4456 * current default ring gets removed, then a new one gets 4457 * picked (see i_mac_group_rem_ring()). 4458 */ 4459 if (mip->mi_state_flags & MIS_IS_AGGR && 4460 mip->mi_default_tx_ring == NULL && 4461 ring->mr_type == MAC_RING_TYPE_TX) { 4462 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4463 } 4464 4465 MAC_RING_UNMARK(ring, MR_INCIPIENT); 4466 return (0); 4467 } 4468 4469 /* 4470 * Remove a ring from it's current group. MAC internal function for dynamic 4471 * grouping. 4472 * 4473 * The caller needs to call mac_perim_enter() before calling this function. 4474 */ 4475 void 4476 i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, 4477 boolean_t driver_call) 4478 { 4479 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4480 mac_capab_rings_t *cap_rings = NULL; 4481 mac_group_type_t group_type; 4482 4483 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4484 4485 ASSERT(mac_find_ring((mac_group_handle_t)group, 4486 ring->mr_index) == (mac_ring_handle_t)ring); 4487 ASSERT((mac_group_t *)ring->mr_gh == group); 4488 ASSERT(ring->mr_type == group->mrg_type); 4489 4490 if (ring->mr_state == MR_INUSE) 4491 mac_stop_ring(ring); 4492 switch (ring->mr_type) { 4493 case MAC_RING_TYPE_RX: 4494 group_type = mip->mi_rx_group_type; 4495 cap_rings = &mip->mi_rx_rings_cap; 4496 4497 /* 4498 * Only hardware classified packets hold a reference to the 4499 * ring all the way up the Rx path. mac_rx_srs_remove() 4500 * will take care of quiescing the Rx path and removing the 4501 * SRS. The software classified path neither holds a reference 4502 * nor any association with the ring in mac_rx. 4503 */ 4504 if (ring->mr_srs != NULL) { 4505 mac_rx_srs_remove(ring->mr_srs); 4506 ring->mr_srs = NULL; 4507 } 4508 4509 break; 4510 case MAC_RING_TYPE_TX: 4511 { 4512 mac_grp_client_t *mgcp; 4513 mac_client_impl_t *mcip; 4514 mac_soft_ring_set_t *mac_srs; 4515 mac_srs_tx_t *tx; 4516 mac_ring_t *rem_ring; 4517 mac_group_t *defgrp; 4518 uint_t ring_info = 0; 4519 4520 /* 4521 * For TX this function is invoked in three 4522 * cases: 4523 * 4524 * 1) In the case of a failure during the 4525 * initial creation of a group when a share is 4526 * associated with a MAC client. So the SRS is not 4527 * yet setup, and will be setup later after the 4528 * group has been reserved and populated. 4529 * 4530 * 2) From mac_release_tx_group() when freeing 4531 * a TX SRS. 4532 * 4533 * 3) In the case of aggr, when a port gets removed, 4534 * the pseudo Tx rings that it exposed gets removed. 4535 * 4536 * In the first two cases the SRS and its soft 4537 * rings are already quiesced. 4538 */ 4539 if (driver_call) { 4540 mac_client_impl_t *mcip; 4541 mac_soft_ring_set_t *mac_srs; 4542 mac_soft_ring_t *sringp; 4543 mac_srs_tx_t *srs_tx; 4544 4545 if (mip->mi_state_flags & MIS_IS_AGGR && 4546 mip->mi_default_tx_ring == 4547 (mac_ring_handle_t)ring) { 4548 /* pick a new default Tx ring */ 4549 mip->mi_default_tx_ring = 4550 (group->mrg_rings != ring) ? 4551 (mac_ring_handle_t)group->mrg_rings : 4552 (mac_ring_handle_t)(ring->mr_next); 4553 } 4554 /* Presently only aggr case comes here */ 4555 if (group->mrg_state != MAC_GROUP_STATE_RESERVED) 4556 break; 4557 4558 mcip = MAC_GROUP_ONLY_CLIENT(group); 4559 ASSERT(mcip != NULL); 4560 ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); 4561 mac_srs = MCIP_TX_SRS(mcip); 4562 ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || 4563 mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); 4564 srs_tx = &mac_srs->srs_tx; 4565 /* 4566 * Wakeup any callers blocked on this 4567 * Tx ring due to flow control. 4568 */ 4569 sringp = srs_tx->st_soft_rings[ring->mr_index]; 4570 ASSERT(sringp != NULL); 4571 mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)sringp); 4572 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4573 mac_tx_srs_del_ring(mac_srs, ring); 4574 mac_tx_client_restart((mac_client_handle_t)mcip); 4575 break; 4576 } 4577 ASSERT(ring != (mac_ring_t *)mip->mi_default_tx_ring); 4578 group_type = mip->mi_tx_group_type; 4579 cap_rings = &mip->mi_tx_rings_cap; 4580 /* 4581 * See if we need to take it out of the MAC clients using 4582 * this group 4583 */ 4584 if (MAC_GROUP_NO_CLIENT(group)) 4585 break; 4586 mgcp = group->mrg_clients; 4587 defgrp = MAC_DEFAULT_TX_GROUP(mip); 4588 while (mgcp != NULL) { 4589 mcip = mgcp->mgc_client; 4590 mac_srs = MCIP_TX_SRS(mcip); 4591 tx = &mac_srs->srs_tx; 4592 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4593 /* 4594 * If we are here when removing rings from the 4595 * defgroup, mac_reserve_tx_ring would have 4596 * already deleted the ring from the MAC 4597 * clients in the group. 4598 */ 4599 if (group != defgrp) { 4600 mac_tx_invoke_callbacks(mcip, 4601 (mac_tx_cookie_t) 4602 mac_tx_srs_get_soft_ring(mac_srs, ring)); 4603 mac_tx_srs_del_ring(mac_srs, ring); 4604 } 4605 /* 4606 * Additionally, if we are left with only 4607 * one ring in the group after this, we need 4608 * to modify the mode etc. to. (We haven't 4609 * yet taken the ring out, so we check with 2). 4610 */ 4611 if (group->mrg_cur_count == 2) { 4612 if (ring->mr_next == NULL) 4613 rem_ring = group->mrg_rings; 4614 else 4615 rem_ring = ring->mr_next; 4616 mac_tx_invoke_callbacks(mcip, 4617 (mac_tx_cookie_t) 4618 mac_tx_srs_get_soft_ring(mac_srs, 4619 rem_ring)); 4620 mac_tx_srs_del_ring(mac_srs, rem_ring); 4621 if (rem_ring->mr_state != MR_INUSE) { 4622 (void) mac_start_ring(rem_ring); 4623 } 4624 tx->st_arg2 = (void *)rem_ring; 4625 mac_tx_srs_stat_recreate(mac_srs, B_FALSE); 4626 ring_info = mac_hwring_getinfo( 4627 (mac_ring_handle_t)rem_ring); 4628 /* 4629 * We are shrinking from multiple 4630 * to 1 ring. 4631 */ 4632 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4633 tx->st_mode = SRS_TX_BW; 4634 } else if (mac_tx_serialize || 4635 (ring_info & MAC_RING_TX_SERIALIZE)) { 4636 tx->st_mode = SRS_TX_SERIALIZE; 4637 } else { 4638 tx->st_mode = SRS_TX_DEFAULT; 4639 } 4640 tx->st_func = mac_tx_get_func(tx->st_mode); 4641 } 4642 mac_tx_client_restart((mac_client_handle_t)mcip); 4643 mgcp = mgcp->mgc_next; 4644 } 4645 break; 4646 } 4647 default: 4648 ASSERT(B_FALSE); 4649 } 4650 4651 /* 4652 * Remove the ring from the group. 4653 */ 4654 if (ring == group->mrg_rings) 4655 group->mrg_rings = ring->mr_next; 4656 else { 4657 mac_ring_t *pre; 4658 4659 pre = group->mrg_rings; 4660 while (pre->mr_next != ring) 4661 pre = pre->mr_next; 4662 pre->mr_next = ring->mr_next; 4663 } 4664 group->mrg_cur_count--; 4665 4666 if (!driver_call) { 4667 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4668 ASSERT(group->mrg_driver == NULL || 4669 cap_rings->mr_gremring != NULL); 4670 4671 /* 4672 * Remove the driver level hardware ring. 4673 */ 4674 if (group->mrg_driver != NULL) { 4675 cap_rings->mr_gremring(group->mrg_driver, 4676 ring->mr_driver, ring->mr_type); 4677 } 4678 } 4679 4680 ring->mr_gh = NULL; 4681 if (driver_call) 4682 mac_ring_free(mip, ring); 4683 else 4684 ring->mr_flag = 0; 4685 } 4686 4687 /* 4688 * Move a ring to the target group. If needed, remove the ring from the group 4689 * that it currently belongs to. 4690 * 4691 * The caller need to enter MAC's perimeter by calling mac_perim_enter(). 4692 */ 4693 static int 4694 mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring) 4695 { 4696 mac_group_t *s_group = (mac_group_t *)ring->mr_gh; 4697 int rv; 4698 4699 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4700 ASSERT(d_group != NULL); 4701 ASSERT(s_group->mrg_mh == d_group->mrg_mh); 4702 4703 if (s_group == d_group) 4704 return (0); 4705 4706 /* 4707 * Remove it from current group first. 4708 */ 4709 if (s_group != NULL) 4710 i_mac_group_rem_ring(s_group, ring, B_FALSE); 4711 4712 /* 4713 * Add it to the new group. 4714 */ 4715 rv = i_mac_group_add_ring(d_group, ring, 0); 4716 if (rv != 0) { 4717 /* 4718 * Failed to add ring back to source group. If 4719 * that fails, the ring is stuck in limbo, log message. 4720 */ 4721 if (i_mac_group_add_ring(s_group, ring, 0)) { 4722 cmn_err(CE_WARN, "%s: failed to move ring %p\n", 4723 mip->mi_name, (void *)ring); 4724 } 4725 } 4726 4727 return (rv); 4728 } 4729 4730 /* 4731 * Find a MAC address according to its value. 4732 */ 4733 mac_address_t * 4734 mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr) 4735 { 4736 mac_address_t *map; 4737 4738 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4739 4740 for (map = mip->mi_addresses; map != NULL; map = map->ma_next) { 4741 if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) 4742 break; 4743 } 4744 4745 return (map); 4746 } 4747 4748 /* 4749 * Check whether the MAC address is shared by multiple clients. 4750 */ 4751 boolean_t 4752 mac_check_macaddr_shared(mac_address_t *map) 4753 { 4754 ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip)); 4755 4756 return (map->ma_nusers > 1); 4757 } 4758 4759 /* 4760 * Remove the specified MAC address from the MAC address list and free it. 4761 */ 4762 static void 4763 mac_free_macaddr(mac_address_t *map) 4764 { 4765 mac_impl_t *mip = map->ma_mip; 4766 4767 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4768 ASSERT(mip->mi_addresses != NULL); 4769 4770 map = mac_find_macaddr(mip, map->ma_addr); 4771 4772 ASSERT(map != NULL); 4773 ASSERT(map->ma_nusers == 0); 4774 4775 if (map == mip->mi_addresses) { 4776 mip->mi_addresses = map->ma_next; 4777 } else { 4778 mac_address_t *pre; 4779 4780 pre = mip->mi_addresses; 4781 while (pre->ma_next != map) 4782 pre = pre->ma_next; 4783 pre->ma_next = map->ma_next; 4784 } 4785 4786 kmem_free(map, sizeof (mac_address_t)); 4787 } 4788 4789 /* 4790 * Add a MAC address reference for a client. If the desired MAC address 4791 * exists, add a reference to it. Otherwise, add the new address by adding 4792 * it to a reserved group or setting promiscuous mode. Won't try different 4793 * group is the group is non-NULL, so the caller must explictly share 4794 * default group when needed. 4795 * 4796 * Note, the primary MAC address is initialized at registration time, so 4797 * to add it to default group only need to activate it if its reference 4798 * count is still zero. Also, some drivers may not have advertised RINGS 4799 * capability. 4800 */ 4801 int 4802 mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, 4803 boolean_t use_hw) 4804 { 4805 mac_address_t *map; 4806 int err = 0; 4807 boolean_t allocated_map = B_FALSE; 4808 4809 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4810 4811 map = mac_find_macaddr(mip, mac_addr); 4812 4813 /* 4814 * If the new MAC address has not been added. Allocate a new one 4815 * and set it up. 4816 */ 4817 if (map == NULL) { 4818 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 4819 map->ma_len = mip->mi_type->mt_addr_length; 4820 bcopy(mac_addr, map->ma_addr, map->ma_len); 4821 map->ma_nusers = 0; 4822 map->ma_group = group; 4823 map->ma_mip = mip; 4824 4825 /* add the new MAC address to the head of the address list */ 4826 map->ma_next = mip->mi_addresses; 4827 mip->mi_addresses = map; 4828 4829 allocated_map = B_TRUE; 4830 } 4831 4832 ASSERT(map->ma_group == NULL || map->ma_group == group); 4833 if (map->ma_group == NULL) 4834 map->ma_group = group; 4835 4836 /* 4837 * If the MAC address is already in use, simply account for the 4838 * new client. 4839 */ 4840 if (map->ma_nusers++ > 0) 4841 return (0); 4842 4843 /* 4844 * Activate this MAC address by adding it to the reserved group. 4845 */ 4846 if (group != NULL) { 4847 err = mac_group_addmac(group, (const uint8_t *)mac_addr); 4848 if (err == 0) { 4849 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 4850 return (0); 4851 } 4852 } 4853 4854 /* 4855 * The MAC address addition failed. If the client requires a 4856 * hardware classified MAC address, fail the operation. 4857 */ 4858 if (use_hw) { 4859 err = ENOSPC; 4860 goto bail; 4861 } 4862 4863 /* 4864 * Try promiscuous mode. 4865 * 4866 * For drivers that don't advertise RINGS capability, do 4867 * nothing for the primary address. 4868 */ 4869 if ((group == NULL) && 4870 (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { 4871 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 4872 return (0); 4873 } 4874 4875 /* 4876 * Enable promiscuous mode in order to receive traffic 4877 * to the new MAC address. 4878 */ 4879 if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) { 4880 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; 4881 return (0); 4882 } 4883 4884 /* 4885 * Free the MAC address that could not be added. Don't free 4886 * a pre-existing address, it could have been the entry 4887 * for the primary MAC address which was pre-allocated by 4888 * mac_init_macaddr(), and which must remain on the list. 4889 */ 4890 bail: 4891 map->ma_nusers--; 4892 if (allocated_map) 4893 mac_free_macaddr(map); 4894 return (err); 4895 } 4896 4897 /* 4898 * Remove a reference to a MAC address. This may cause to remove the MAC 4899 * address from an associated group or to turn off promiscuous mode. 4900 * The caller needs to handle the failure properly. 4901 */ 4902 int 4903 mac_remove_macaddr(mac_address_t *map) 4904 { 4905 mac_impl_t *mip = map->ma_mip; 4906 int err = 0; 4907 4908 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4909 4910 ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); 4911 4912 /* 4913 * If it's not the last client using this MAC address, only update 4914 * the MAC clients count. 4915 */ 4916 if (--map->ma_nusers > 0) 4917 return (0); 4918 4919 /* 4920 * The MAC address is no longer used by any MAC client, so remove 4921 * it from its associated group, or turn off promiscuous mode 4922 * if it was enabled for the MAC address. 4923 */ 4924 switch (map->ma_type) { 4925 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 4926 /* 4927 * Don't free the preset primary address for drivers that 4928 * don't advertise RINGS capability. 4929 */ 4930 if (map->ma_group == NULL) 4931 return (0); 4932 4933 err = mac_group_remmac(map->ma_group, map->ma_addr); 4934 if (err == 0) 4935 map->ma_group = NULL; 4936 break; 4937 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 4938 err = i_mac_promisc_set(mip, B_FALSE); 4939 break; 4940 default: 4941 ASSERT(B_FALSE); 4942 } 4943 4944 if (err != 0) 4945 return (err); 4946 4947 /* 4948 * We created MAC address for the primary one at registration, so we 4949 * won't free it here. mac_fini_macaddr() will take care of it. 4950 */ 4951 if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0) 4952 mac_free_macaddr(map); 4953 4954 return (0); 4955 } 4956 4957 /* 4958 * Update an existing MAC address. The caller need to make sure that the new 4959 * value has not been used. 4960 */ 4961 int 4962 mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) 4963 { 4964 mac_impl_t *mip = map->ma_mip; 4965 int err = 0; 4966 4967 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4968 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 4969 4970 switch (map->ma_type) { 4971 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 4972 /* 4973 * Update the primary address for drivers that are not 4974 * RINGS capable. 4975 */ 4976 if (mip->mi_rx_groups == NULL) { 4977 err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) 4978 mac_addr); 4979 if (err != 0) 4980 return (err); 4981 break; 4982 } 4983 4984 /* 4985 * If this MAC address is not currently in use, 4986 * simply break out and update the value. 4987 */ 4988 if (map->ma_nusers == 0) 4989 break; 4990 4991 /* 4992 * Need to replace the MAC address associated with a group. 4993 */ 4994 err = mac_group_remmac(map->ma_group, map->ma_addr); 4995 if (err != 0) 4996 return (err); 4997 4998 err = mac_group_addmac(map->ma_group, mac_addr); 4999 5000 /* 5001 * Failure hints hardware error. The MAC layer needs to 5002 * have error notification facility to handle this. 5003 * Now, simply try to restore the value. 5004 */ 5005 if (err != 0) 5006 (void) mac_group_addmac(map->ma_group, map->ma_addr); 5007 5008 break; 5009 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 5010 /* 5011 * Need to do nothing more if in promiscuous mode. 5012 */ 5013 break; 5014 default: 5015 ASSERT(B_FALSE); 5016 } 5017 5018 /* 5019 * Successfully replaced the MAC address. 5020 */ 5021 if (err == 0) 5022 bcopy(mac_addr, map->ma_addr, map->ma_len); 5023 5024 return (err); 5025 } 5026 5027 /* 5028 * Freshen the MAC address with new value. Its caller must have updated the 5029 * hardware MAC address before calling this function. 5030 * This funcitons is supposed to be used to handle the MAC address change 5031 * notification from underlying drivers. 5032 */ 5033 void 5034 mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr) 5035 { 5036 mac_impl_t *mip = map->ma_mip; 5037 5038 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5039 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 5040 5041 /* 5042 * Freshen the MAC address with new value. 5043 */ 5044 bcopy(mac_addr, map->ma_addr, map->ma_len); 5045 bcopy(mac_addr, mip->mi_addr, map->ma_len); 5046 5047 /* 5048 * Update all MAC clients that share this MAC address. 5049 */ 5050 mac_unicast_update_clients(mip, map); 5051 } 5052 5053 /* 5054 * Set up the primary MAC address. 5055 */ 5056 void 5057 mac_init_macaddr(mac_impl_t *mip) 5058 { 5059 mac_address_t *map; 5060 5061 /* 5062 * The reference count is initialized to zero, until it's really 5063 * activated. 5064 */ 5065 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 5066 map->ma_len = mip->mi_type->mt_addr_length; 5067 bcopy(mip->mi_addr, map->ma_addr, map->ma_len); 5068 5069 /* 5070 * If driver advertises RINGS capability, it shouldn't have initialized 5071 * its primary MAC address. For other drivers, including VNIC, the 5072 * primary address must work after registration. 5073 */ 5074 if (mip->mi_rx_groups == NULL) 5075 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 5076 5077 map->ma_mip = mip; 5078 5079 mip->mi_addresses = map; 5080 } 5081 5082 /* 5083 * Clean up the primary MAC address. Note, only one primary MAC address 5084 * is allowed. All other MAC addresses must have been freed appropriately. 5085 */ 5086 void 5087 mac_fini_macaddr(mac_impl_t *mip) 5088 { 5089 mac_address_t *map = mip->mi_addresses; 5090 5091 if (map == NULL) 5092 return; 5093 5094 /* 5095 * If mi_addresses is initialized, there should be exactly one 5096 * entry left on the list with no users. 5097 */ 5098 ASSERT(map->ma_nusers == 0); 5099 ASSERT(map->ma_next == NULL); 5100 5101 kmem_free(map, sizeof (mac_address_t)); 5102 mip->mi_addresses = NULL; 5103 } 5104 5105 /* 5106 * Logging related functions. 5107 * 5108 * Note that Kernel statistics have been extended to maintain fine 5109 * granularity of statistics viz. hardware lane, software lane, fanout 5110 * stats etc. However, extended accounting continues to support only 5111 * aggregate statistics like before. 5112 */ 5113 5114 /* Write the flow description to a netinfo_t record */ 5115 static netinfo_t * 5116 mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) 5117 { 5118 netinfo_t *ninfo; 5119 net_desc_t *ndesc; 5120 flow_desc_t *fdesc; 5121 mac_resource_props_t *mrp; 5122 5123 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5124 if (ninfo == NULL) 5125 return (NULL); 5126 ndesc = kmem_zalloc(sizeof (net_desc_t), KM_NOSLEEP); 5127 if (ndesc == NULL) { 5128 kmem_free(ninfo, sizeof (netinfo_t)); 5129 return (NULL); 5130 } 5131 5132 /* 5133 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5134 * Updates to the fe_flow_desc are done under the fe_lock 5135 */ 5136 mutex_enter(&flent->fe_lock); 5137 fdesc = &flent->fe_flow_desc; 5138 mrp = &flent->fe_resource_props; 5139 5140 ndesc->nd_name = flent->fe_flow_name; 5141 ndesc->nd_devname = mcip->mci_name; 5142 bcopy(fdesc->fd_src_mac, ndesc->nd_ehost, ETHERADDRL); 5143 bcopy(fdesc->fd_dst_mac, ndesc->nd_edest, ETHERADDRL); 5144 ndesc->nd_sap = htonl(fdesc->fd_sap); 5145 ndesc->nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION; 5146 ndesc->nd_bw_limit = mrp->mrp_maxbw; 5147 if (ndesc->nd_isv4) { 5148 ndesc->nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]); 5149 ndesc->nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]); 5150 } else { 5151 bcopy(&fdesc->fd_local_addr, ndesc->nd_saddr, IPV6_ADDR_LEN); 5152 bcopy(&fdesc->fd_remote_addr, ndesc->nd_daddr, IPV6_ADDR_LEN); 5153 } 5154 ndesc->nd_sport = htons(fdesc->fd_local_port); 5155 ndesc->nd_dport = htons(fdesc->fd_remote_port); 5156 ndesc->nd_protocol = (uint8_t)fdesc->fd_protocol; 5157 mutex_exit(&flent->fe_lock); 5158 5159 ninfo->ni_record = ndesc; 5160 ninfo->ni_size = sizeof (net_desc_t); 5161 ninfo->ni_type = EX_NET_FLDESC_REC; 5162 5163 return (ninfo); 5164 } 5165 5166 /* Write the flow statistics to a netinfo_t record */ 5167 static netinfo_t * 5168 mac_write_flow_stats(flow_entry_t *flent) 5169 { 5170 netinfo_t *ninfo; 5171 net_stat_t *nstat; 5172 mac_soft_ring_set_t *mac_srs; 5173 mac_rx_stats_t *mac_rx_stat; 5174 mac_tx_stats_t *mac_tx_stat; 5175 int i; 5176 5177 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5178 if (ninfo == NULL) 5179 return (NULL); 5180 nstat = kmem_zalloc(sizeof (net_stat_t), KM_NOSLEEP); 5181 if (nstat == NULL) { 5182 kmem_free(ninfo, sizeof (netinfo_t)); 5183 return (NULL); 5184 } 5185 5186 nstat->ns_name = flent->fe_flow_name; 5187 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5188 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5189 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5190 5191 nstat->ns_ibytes += mac_rx_stat->mrs_intrbytes + 5192 mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; 5193 nstat->ns_ipackets += mac_rx_stat->mrs_intrcnt + 5194 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5195 nstat->ns_oerrors += mac_rx_stat->mrs_ierrors; 5196 } 5197 5198 mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); 5199 if (mac_srs != NULL) { 5200 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5201 5202 nstat->ns_obytes = mac_tx_stat->mts_obytes; 5203 nstat->ns_opackets = mac_tx_stat->mts_opackets; 5204 nstat->ns_oerrors = mac_tx_stat->mts_oerrors; 5205 } 5206 5207 ninfo->ni_record = nstat; 5208 ninfo->ni_size = sizeof (net_stat_t); 5209 ninfo->ni_type = EX_NET_FLSTAT_REC; 5210 5211 return (ninfo); 5212 } 5213 5214 /* Write the link description to a netinfo_t record */ 5215 static netinfo_t * 5216 mac_write_link_desc(mac_client_impl_t *mcip) 5217 { 5218 netinfo_t *ninfo; 5219 net_desc_t *ndesc; 5220 flow_entry_t *flent = mcip->mci_flent; 5221 5222 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5223 if (ninfo == NULL) 5224 return (NULL); 5225 ndesc = kmem_zalloc(sizeof (net_desc_t), KM_NOSLEEP); 5226 if (ndesc == NULL) { 5227 kmem_free(ninfo, sizeof (netinfo_t)); 5228 return (NULL); 5229 } 5230 5231 ndesc->nd_name = mcip->mci_name; 5232 ndesc->nd_devname = mcip->mci_name; 5233 ndesc->nd_isv4 = B_TRUE; 5234 /* 5235 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5236 * Updates to the fe_flow_desc are done under the fe_lock 5237 * after removing the flent from the flow table. 5238 */ 5239 mutex_enter(&flent->fe_lock); 5240 bcopy(flent->fe_flow_desc.fd_src_mac, ndesc->nd_ehost, ETHERADDRL); 5241 mutex_exit(&flent->fe_lock); 5242 5243 ninfo->ni_record = ndesc; 5244 ninfo->ni_size = sizeof (net_desc_t); 5245 ninfo->ni_type = EX_NET_LNDESC_REC; 5246 5247 return (ninfo); 5248 } 5249 5250 /* Write the link statistics to a netinfo_t record */ 5251 static netinfo_t * 5252 mac_write_link_stats(mac_client_impl_t *mcip) 5253 { 5254 netinfo_t *ninfo; 5255 net_stat_t *nstat; 5256 flow_entry_t *flent; 5257 mac_soft_ring_set_t *mac_srs; 5258 mac_rx_stats_t *mac_rx_stat; 5259 mac_tx_stats_t *mac_tx_stat; 5260 int i; 5261 5262 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5263 if (ninfo == NULL) 5264 return (NULL); 5265 nstat = kmem_zalloc(sizeof (net_stat_t), KM_NOSLEEP); 5266 if (nstat == NULL) { 5267 kmem_free(ninfo, sizeof (netinfo_t)); 5268 return (NULL); 5269 } 5270 5271 nstat->ns_name = mcip->mci_name; 5272 flent = mcip->mci_flent; 5273 if (flent != NULL) { 5274 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5275 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5276 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5277 5278 nstat->ns_ibytes += mac_rx_stat->mrs_intrbytes + 5279 mac_rx_stat->mrs_pollbytes + 5280 mac_rx_stat->mrs_lclbytes; 5281 nstat->ns_ipackets += mac_rx_stat->mrs_intrcnt + 5282 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5283 nstat->ns_oerrors += mac_rx_stat->mrs_ierrors; 5284 } 5285 } 5286 5287 mac_srs = (mac_soft_ring_set_t *)(mcip->mci_flent->fe_tx_srs); 5288 if (mac_srs != NULL) { 5289 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5290 5291 nstat->ns_obytes = mac_tx_stat->mts_obytes; 5292 nstat->ns_opackets = mac_tx_stat->mts_opackets; 5293 nstat->ns_oerrors = mac_tx_stat->mts_oerrors; 5294 } 5295 5296 ninfo->ni_record = nstat; 5297 ninfo->ni_size = sizeof (net_stat_t); 5298 ninfo->ni_type = EX_NET_LNSTAT_REC; 5299 5300 return (ninfo); 5301 } 5302 5303 typedef struct i_mac_log_state_s { 5304 boolean_t mi_last; 5305 int mi_fenable; 5306 int mi_lenable; 5307 list_t *mi_list; 5308 } i_mac_log_state_t; 5309 5310 /* 5311 * For a given flow, if the description has not been logged before, do it now. 5312 * If it is a VNIC, then we have collected information about it from the MAC 5313 * table, so skip it. 5314 * 5315 * Called through mac_flow_walk_nolock() 5316 * 5317 * Return 0 if successful. 5318 */ 5319 static int 5320 mac_log_flowinfo(flow_entry_t *flent, void *arg) 5321 { 5322 mac_client_impl_t *mcip = flent->fe_mcip; 5323 i_mac_log_state_t *lstate = arg; 5324 netinfo_t *ninfo; 5325 5326 if (mcip == NULL) 5327 return (0); 5328 5329 /* 5330 * If the name starts with "vnic", and fe_user_generated is true (to 5331 * exclude the mcast and active flow entries created implicitly for 5332 * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow, 5333 * vnic/bge1/mcast1 is not and neither is vnic/bge1/active. 5334 */ 5335 if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 && 5336 (flent->fe_type & FLOW_USER) != 0) { 5337 return (0); 5338 } 5339 5340 if (!flent->fe_desc_logged) { 5341 /* 5342 * We don't return error because we want to continue the 5343 * walk in case this is the last walk which means we 5344 * need to reset fe_desc_logged in all the flows. 5345 */ 5346 if ((ninfo = mac_write_flow_desc(flent, mcip)) == NULL) 5347 return (0); 5348 list_insert_tail(lstate->mi_list, ninfo); 5349 flent->fe_desc_logged = B_TRUE; 5350 } 5351 5352 /* 5353 * Regardless of the error, we want to proceed in case we have to 5354 * reset fe_desc_logged. 5355 */ 5356 ninfo = mac_write_flow_stats(flent); 5357 if (ninfo == NULL) 5358 return (-1); 5359 5360 list_insert_tail(lstate->mi_list, ninfo); 5361 5362 if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED)) 5363 flent->fe_desc_logged = B_FALSE; 5364 5365 return (0); 5366 } 5367 5368 /* 5369 * Log the description for each mac client of this mac_impl_t, if it 5370 * hasn't already been done. Additionally, log statistics for the link as 5371 * well. Walk the flow table and log information for each flow as well. 5372 * If it is the last walk (mci_last), then we turn off mci_desc_logged (and 5373 * also fe_desc_logged, if flow logging is on) since we want to log the 5374 * description if and when logging is restarted. 5375 * 5376 * Return 0 upon success or -1 upon failure 5377 */ 5378 static int 5379 i_mac_impl_log(mac_impl_t *mip, i_mac_log_state_t *lstate) 5380 { 5381 mac_client_impl_t *mcip; 5382 netinfo_t *ninfo; 5383 5384 i_mac_perim_enter(mip); 5385 /* 5386 * Only walk the client list for NIC and etherstub 5387 */ 5388 if ((mip->mi_state_flags & MIS_DISABLED) || 5389 ((mip->mi_state_flags & MIS_IS_VNIC) && 5390 (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL))) { 5391 i_mac_perim_exit(mip); 5392 return (0); 5393 } 5394 5395 for (mcip = mip->mi_clients_list; mcip != NULL; 5396 mcip = mcip->mci_client_next) { 5397 if (!MCIP_DATAPATH_SETUP(mcip)) 5398 continue; 5399 if (lstate->mi_lenable) { 5400 if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) { 5401 ninfo = mac_write_link_desc(mcip); 5402 if (ninfo == NULL) { 5403 /* 5404 * We can't terminate it if this is the last 5405 * walk, else there might be some links with 5406 * mi_desc_logged set to true, which means 5407 * their description won't be logged the next 5408 * time logging is started (similarly for the 5409 * flows within such links). We can continue 5410 * without walking the flow table (i.e. to 5411 * set fe_desc_logged to false) because we 5412 * won't have written any flow stuff for this 5413 * link as we haven't logged the link itself. 5414 */ 5415 i_mac_perim_exit(mip); 5416 if (lstate->mi_last) 5417 return (0); 5418 else 5419 return (-1); 5420 } 5421 mcip->mci_state_flags |= MCIS_DESC_LOGGED; 5422 list_insert_tail(lstate->mi_list, ninfo); 5423 } 5424 } 5425 5426 ninfo = mac_write_link_stats(mcip); 5427 if (ninfo == NULL && !lstate->mi_last) { 5428 i_mac_perim_exit(mip); 5429 return (-1); 5430 } 5431 list_insert_tail(lstate->mi_list, ninfo); 5432 5433 if (lstate->mi_last) 5434 mcip->mci_state_flags &= ~MCIS_DESC_LOGGED; 5435 5436 if (lstate->mi_fenable) { 5437 if (mcip->mci_subflow_tab != NULL) { 5438 (void) mac_flow_walk_nolock( 5439 mcip->mci_subflow_tab, mac_log_flowinfo, 5440 lstate); 5441 } 5442 } 5443 } 5444 i_mac_perim_exit(mip); 5445 return (0); 5446 } 5447 5448 /* 5449 * modhash walker function to add a mac_impl_t to a list 5450 */ 5451 /*ARGSUSED*/ 5452 static uint_t 5453 i_mac_impl_list_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 5454 { 5455 list_t *list = (list_t *)arg; 5456 mac_impl_t *mip = (mac_impl_t *)val; 5457 5458 if ((mip->mi_state_flags & MIS_DISABLED) == 0) { 5459 list_insert_tail(list, mip); 5460 mip->mi_ref++; 5461 } 5462 5463 return (MH_WALK_CONTINUE); 5464 } 5465 5466 void 5467 i_mac_log_info(list_t *net_log_list, i_mac_log_state_t *lstate) 5468 { 5469 list_t mac_impl_list; 5470 mac_impl_t *mip; 5471 netinfo_t *ninfo; 5472 5473 /* Create list of mac_impls */ 5474 ASSERT(RW_LOCK_HELD(&i_mac_impl_lock)); 5475 list_create(&mac_impl_list, sizeof (mac_impl_t), offsetof(mac_impl_t, 5476 mi_node)); 5477 mod_hash_walk(i_mac_impl_hash, i_mac_impl_list_walker, &mac_impl_list); 5478 rw_exit(&i_mac_impl_lock); 5479 5480 /* Create log entries for each mac_impl */ 5481 for (mip = list_head(&mac_impl_list); mip != NULL; 5482 mip = list_next(&mac_impl_list, mip)) { 5483 if (i_mac_impl_log(mip, lstate) != 0) 5484 continue; 5485 } 5486 5487 /* Remove elements and destroy list of mac_impls */ 5488 rw_enter(&i_mac_impl_lock, RW_WRITER); 5489 while ((mip = list_remove_tail(&mac_impl_list)) != NULL) { 5490 mip->mi_ref--; 5491 } 5492 rw_exit(&i_mac_impl_lock); 5493 list_destroy(&mac_impl_list); 5494 5495 /* 5496 * Write log entries to files outside of locks, free associated 5497 * structures, and remove entries from the list. 5498 */ 5499 while ((ninfo = list_head(net_log_list)) != NULL) { 5500 (void) exacct_commit_netinfo(ninfo->ni_record, ninfo->ni_type); 5501 list_remove(net_log_list, ninfo); 5502 kmem_free(ninfo->ni_record, ninfo->ni_size); 5503 kmem_free(ninfo, sizeof (*ninfo)); 5504 } 5505 list_destroy(net_log_list); 5506 } 5507 5508 /* 5509 * The timer thread that runs every mac_logging_interval seconds and logs 5510 * link and/or flow information. 5511 */ 5512 /* ARGSUSED */ 5513 void 5514 mac_log_linkinfo(void *arg) 5515 { 5516 i_mac_log_state_t lstate; 5517 list_t net_log_list; 5518 5519 list_create(&net_log_list, sizeof (netinfo_t), 5520 offsetof(netinfo_t, ni_link)); 5521 5522 rw_enter(&i_mac_impl_lock, RW_READER); 5523 if (!mac_flow_log_enable && !mac_link_log_enable) { 5524 rw_exit(&i_mac_impl_lock); 5525 return; 5526 } 5527 lstate.mi_fenable = mac_flow_log_enable; 5528 lstate.mi_lenable = mac_link_log_enable; 5529 lstate.mi_last = B_FALSE; 5530 lstate.mi_list = &net_log_list; 5531 5532 /* Write log entries for each mac_impl in the list */ 5533 i_mac_log_info(&net_log_list, &lstate); 5534 5535 if (mac_flow_log_enable || mac_link_log_enable) { 5536 mac_logging_timer = timeout(mac_log_linkinfo, NULL, 5537 SEC_TO_TICK(mac_logging_interval)); 5538 } 5539 } 5540 5541 typedef struct i_mac_fastpath_state_s { 5542 boolean_t mf_disable; 5543 int mf_err; 5544 } i_mac_fastpath_state_t; 5545 5546 /* modhash walker function to enable or disable fastpath */ 5547 /*ARGSUSED*/ 5548 static uint_t 5549 i_mac_fastpath_walker(mod_hash_key_t key, mod_hash_val_t *val, 5550 void *arg) 5551 { 5552 i_mac_fastpath_state_t *state = arg; 5553 mac_handle_t mh = (mac_handle_t)val; 5554 5555 if (state->mf_disable) 5556 state->mf_err = mac_fastpath_disable(mh); 5557 else 5558 mac_fastpath_enable(mh); 5559 5560 return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 5561 } 5562 5563 /* 5564 * Start the logging timer. 5565 */ 5566 int 5567 mac_start_logusage(mac_logtype_t type, uint_t interval) 5568 { 5569 i_mac_fastpath_state_t dstate = {B_TRUE, 0}; 5570 i_mac_fastpath_state_t estate = {B_FALSE, 0}; 5571 int err; 5572 5573 rw_enter(&i_mac_impl_lock, RW_WRITER); 5574 switch (type) { 5575 case MAC_LOGTYPE_FLOW: 5576 if (mac_flow_log_enable) { 5577 rw_exit(&i_mac_impl_lock); 5578 return (0); 5579 } 5580 /* FALLTHRU */ 5581 case MAC_LOGTYPE_LINK: 5582 if (mac_link_log_enable) { 5583 rw_exit(&i_mac_impl_lock); 5584 return (0); 5585 } 5586 break; 5587 default: 5588 ASSERT(0); 5589 } 5590 5591 /* Disable fastpath */ 5592 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &dstate); 5593 if ((err = dstate.mf_err) != 0) { 5594 /* Reenable fastpath */ 5595 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); 5596 rw_exit(&i_mac_impl_lock); 5597 return (err); 5598 } 5599 5600 switch (type) { 5601 case MAC_LOGTYPE_FLOW: 5602 mac_flow_log_enable = B_TRUE; 5603 /* FALLTHRU */ 5604 case MAC_LOGTYPE_LINK: 5605 mac_link_log_enable = B_TRUE; 5606 break; 5607 } 5608 5609 mac_logging_interval = interval; 5610 rw_exit(&i_mac_impl_lock); 5611 mac_log_linkinfo(NULL); 5612 return (0); 5613 } 5614 5615 /* 5616 * Stop the logging timer if both link and flow logging are turned off. 5617 */ 5618 void 5619 mac_stop_logusage(mac_logtype_t type) 5620 { 5621 i_mac_log_state_t lstate; 5622 i_mac_fastpath_state_t estate = {B_FALSE, 0}; 5623 list_t net_log_list; 5624 5625 list_create(&net_log_list, sizeof (netinfo_t), 5626 offsetof(netinfo_t, ni_link)); 5627 5628 rw_enter(&i_mac_impl_lock, RW_WRITER); 5629 5630 lstate.mi_fenable = mac_flow_log_enable; 5631 lstate.mi_lenable = mac_link_log_enable; 5632 lstate.mi_list = &net_log_list; 5633 5634 /* Last walk */ 5635 lstate.mi_last = B_TRUE; 5636 5637 switch (type) { 5638 case MAC_LOGTYPE_FLOW: 5639 if (lstate.mi_fenable) { 5640 ASSERT(mac_link_log_enable); 5641 mac_flow_log_enable = B_FALSE; 5642 mac_link_log_enable = B_FALSE; 5643 break; 5644 } 5645 /* FALLTHRU */ 5646 case MAC_LOGTYPE_LINK: 5647 if (!lstate.mi_lenable || mac_flow_log_enable) { 5648 rw_exit(&i_mac_impl_lock); 5649 return; 5650 } 5651 mac_link_log_enable = B_FALSE; 5652 break; 5653 default: 5654 ASSERT(0); 5655 } 5656 5657 /* Reenable fastpath */ 5658 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); 5659 5660 (void) untimeout(mac_logging_timer); 5661 mac_logging_timer = 0; 5662 5663 /* Write log entries for each mac_impl in the list */ 5664 i_mac_log_info(&net_log_list, &lstate); 5665 } 5666 5667 /* 5668 * Walk the rx and tx SRS/SRs for a flow and update the priority value. 5669 */ 5670 void 5671 mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) 5672 { 5673 pri_t pri; 5674 int count; 5675 mac_soft_ring_set_t *mac_srs; 5676 5677 if (flent->fe_rx_srs_cnt <= 0) 5678 return; 5679 5680 if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type == 5681 SRST_FLOW) { 5682 pri = FLOW_PRIORITY(mcip->mci_min_pri, 5683 mcip->mci_max_pri, 5684 flent->fe_resource_props.mrp_priority); 5685 } else { 5686 pri = mcip->mci_max_pri; 5687 } 5688 5689 for (count = 0; count < flent->fe_rx_srs_cnt; count++) { 5690 mac_srs = flent->fe_rx_srs[count]; 5691 mac_update_srs_priority(mac_srs, pri); 5692 } 5693 /* 5694 * If we have a Tx SRS, we need to modify all the threads associated 5695 * with it. 5696 */ 5697 if (flent->fe_tx_srs != NULL) 5698 mac_update_srs_priority(flent->fe_tx_srs, pri); 5699 } 5700 5701 /* 5702 * RX and TX rings are reserved according to different semantics depending 5703 * on the requests from the MAC clients and type of rings: 5704 * 5705 * On the Tx side, by default we reserve individual rings, independently from 5706 * the groups. 5707 * 5708 * On the Rx side, the reservation is at the granularity of the group 5709 * of rings, and used for v12n level 1 only. It has a special case for the 5710 * primary client. 5711 * 5712 * If a share is allocated to a MAC client, we allocate a TX group and an 5713 * RX group to the client, and assign TX rings and RX rings to these 5714 * groups according to information gathered from the driver through 5715 * the share capability. 5716 * 5717 * The foreseable evolution of Rx rings will handle v12n level 2 and higher 5718 * to allocate individual rings out of a group and program the hw classifier 5719 * based on IP address or higher level criteria. 5720 */ 5721 5722 /* 5723 * mac_reserve_tx_ring() 5724 * Reserve a unused ring by marking it with MR_INUSE state. 5725 * As reserved, the ring is ready to function. 5726 * 5727 * Notes for Hybrid I/O: 5728 * 5729 * If a specific ring is needed, it is specified through the desired_ring 5730 * argument. Otherwise that argument is set to NULL. 5731 * If the desired ring was previous allocated to another client, this 5732 * function swaps it with a new ring from the group of unassigned rings. 5733 */ 5734 mac_ring_t * 5735 mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) 5736 { 5737 mac_group_t *group; 5738 mac_grp_client_t *mgcp; 5739 mac_client_impl_t *mcip; 5740 mac_soft_ring_set_t *srs; 5741 5742 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5743 5744 /* 5745 * Find an available ring and start it before changing its status. 5746 * The unassigned rings are at the end of the mi_tx_groups 5747 * array. 5748 */ 5749 group = MAC_DEFAULT_TX_GROUP(mip); 5750 5751 /* Can't take the default ring out of the default group */ 5752 ASSERT(desired_ring != (mac_ring_t *)mip->mi_default_tx_ring); 5753 5754 if (desired_ring->mr_state == MR_FREE) { 5755 ASSERT(MAC_GROUP_NO_CLIENT(group)); 5756 if (mac_start_ring(desired_ring) != 0) 5757 return (NULL); 5758 return (desired_ring); 5759 } 5760 /* 5761 * There are clients using this ring, so let's move the clients 5762 * away from using this ring. 5763 */ 5764 for (mgcp = group->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 5765 mcip = mgcp->mgc_client; 5766 mac_tx_client_quiesce((mac_client_handle_t)mcip); 5767 srs = MCIP_TX_SRS(mcip); 5768 ASSERT(mac_tx_srs_ring_present(srs, desired_ring)); 5769 mac_tx_invoke_callbacks(mcip, 5770 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(srs, 5771 desired_ring)); 5772 mac_tx_srs_del_ring(srs, desired_ring); 5773 mac_tx_client_restart((mac_client_handle_t)mcip); 5774 } 5775 return (desired_ring); 5776 } 5777 5778 /* 5779 * For a reserved group with multiple clients, return the primary client. 5780 */ 5781 static mac_client_impl_t * 5782 mac_get_grp_primary(mac_group_t *grp) 5783 { 5784 mac_grp_client_t *mgcp = grp->mrg_clients; 5785 mac_client_impl_t *mcip; 5786 5787 while (mgcp != NULL) { 5788 mcip = mgcp->mgc_client; 5789 if (mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) 5790 return (mcip); 5791 mgcp = mgcp->mgc_next; 5792 } 5793 return (NULL); 5794 } 5795 5796 /* 5797 * Hybrid I/O specifies the ring that should be given to a share. 5798 * If the ring is already used by clients, then we need to release 5799 * the ring back to the default group so that we can give it to 5800 * the share. This means the clients using this ring now get a 5801 * replacement ring. If there aren't any replacement rings, this 5802 * function returns a failure. 5803 */ 5804 static int 5805 mac_reclaim_ring_from_grp(mac_impl_t *mip, mac_ring_type_t ring_type, 5806 mac_ring_t *ring, mac_ring_t **rings, int nrings) 5807 { 5808 mac_group_t *group = (mac_group_t *)ring->mr_gh; 5809 mac_resource_props_t *mrp; 5810 mac_client_impl_t *mcip; 5811 mac_group_t *defgrp; 5812 mac_ring_t *tring; 5813 mac_group_t *tgrp; 5814 int i; 5815 int j; 5816 5817 mcip = MAC_GROUP_ONLY_CLIENT(group); 5818 if (mcip == NULL) 5819 mcip = mac_get_grp_primary(group); 5820 ASSERT(mcip != NULL); 5821 ASSERT(mcip->mci_share == NULL); 5822 5823 mrp = MCIP_RESOURCE_PROPS(mcip); 5824 if (ring_type == MAC_RING_TYPE_RX) { 5825 defgrp = mip->mi_rx_donor_grp; 5826 if ((mrp->mrp_mask & MRP_RX_RINGS) == 0) { 5827 /* Need to put this mac client in the default group */ 5828 if (mac_rx_switch_group(mcip, group, defgrp) != 0) 5829 return (ENOSPC); 5830 } else { 5831 /* 5832 * Switch this ring with some other ring from 5833 * the default group. 5834 */ 5835 for (tring = defgrp->mrg_rings; tring != NULL; 5836 tring = tring->mr_next) { 5837 if (tring->mr_index == 0) 5838 continue; 5839 for (j = 0; j < nrings; j++) { 5840 if (rings[j] == tring) 5841 break; 5842 } 5843 if (j >= nrings) 5844 break; 5845 } 5846 if (tring == NULL) 5847 return (ENOSPC); 5848 if (mac_group_mov_ring(mip, group, tring) != 0) 5849 return (ENOSPC); 5850 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 5851 (void) mac_group_mov_ring(mip, defgrp, tring); 5852 return (ENOSPC); 5853 } 5854 } 5855 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 5856 return (0); 5857 } 5858 5859 defgrp = MAC_DEFAULT_TX_GROUP(mip); 5860 if (ring == (mac_ring_t *)mip->mi_default_tx_ring) { 5861 /* 5862 * See if we can get a spare ring to replace the default 5863 * ring. 5864 */ 5865 if (defgrp->mrg_cur_count == 1) { 5866 /* 5867 * Need to get a ring from another client, see if 5868 * there are any clients that can be moved to 5869 * the default group, thereby freeing some rings. 5870 */ 5871 for (i = 0; i < mip->mi_tx_group_count; i++) { 5872 tgrp = &mip->mi_tx_groups[i]; 5873 if (tgrp->mrg_state == 5874 MAC_GROUP_STATE_REGISTERED) { 5875 continue; 5876 } 5877 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 5878 if (mcip == NULL) 5879 mcip = mac_get_grp_primary(tgrp); 5880 ASSERT(mcip != NULL); 5881 mrp = MCIP_RESOURCE_PROPS(mcip); 5882 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 5883 ASSERT(tgrp->mrg_cur_count == 1); 5884 /* 5885 * If this ring is part of the 5886 * rings asked by the share we cannot 5887 * use it as the default ring. 5888 */ 5889 for (j = 0; j < nrings; j++) { 5890 if (rings[j] == tgrp->mrg_rings) 5891 break; 5892 } 5893 if (j < nrings) 5894 continue; 5895 mac_tx_client_quiesce( 5896 (mac_client_handle_t)mcip); 5897 mac_tx_switch_group(mcip, tgrp, 5898 defgrp); 5899 mac_tx_client_restart( 5900 (mac_client_handle_t)mcip); 5901 break; 5902 } 5903 } 5904 /* 5905 * All the rings are reserved, can't give up the 5906 * default ring. 5907 */ 5908 if (defgrp->mrg_cur_count <= 1) 5909 return (ENOSPC); 5910 } 5911 /* 5912 * Swap the default ring with another. 5913 */ 5914 for (tring = defgrp->mrg_rings; tring != NULL; 5915 tring = tring->mr_next) { 5916 /* 5917 * If this ring is part of the rings asked by the 5918 * share we cannot use it as the default ring. 5919 */ 5920 for (j = 0; j < nrings; j++) { 5921 if (rings[j] == tring) 5922 break; 5923 } 5924 if (j >= nrings) 5925 break; 5926 } 5927 ASSERT(tring != NULL); 5928 mip->mi_default_tx_ring = (mac_ring_handle_t)tring; 5929 return (0); 5930 } 5931 /* 5932 * The Tx ring is with a group reserved by a MAC client. See if 5933 * we can swap it. 5934 */ 5935 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 5936 mcip = MAC_GROUP_ONLY_CLIENT(group); 5937 if (mcip == NULL) 5938 mcip = mac_get_grp_primary(group); 5939 ASSERT(mcip != NULL); 5940 mrp = MCIP_RESOURCE_PROPS(mcip); 5941 mac_tx_client_quiesce((mac_client_handle_t)mcip); 5942 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 5943 ASSERT(group->mrg_cur_count == 1); 5944 /* Put this mac client in the default group */ 5945 mac_tx_switch_group(mcip, group, defgrp); 5946 } else { 5947 /* 5948 * Switch this ring with some other ring from 5949 * the default group. 5950 */ 5951 for (tring = defgrp->mrg_rings; tring != NULL; 5952 tring = tring->mr_next) { 5953 if (tring == (mac_ring_t *)mip->mi_default_tx_ring) 5954 continue; 5955 /* 5956 * If this ring is part of the rings asked by the 5957 * share we cannot use it for swapping. 5958 */ 5959 for (j = 0; j < nrings; j++) { 5960 if (rings[j] == tring) 5961 break; 5962 } 5963 if (j >= nrings) 5964 break; 5965 } 5966 if (tring == NULL) { 5967 mac_tx_client_restart((mac_client_handle_t)mcip); 5968 return (ENOSPC); 5969 } 5970 if (mac_group_mov_ring(mip, group, tring) != 0) { 5971 mac_tx_client_restart((mac_client_handle_t)mcip); 5972 return (ENOSPC); 5973 } 5974 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 5975 (void) mac_group_mov_ring(mip, defgrp, tring); 5976 mac_tx_client_restart((mac_client_handle_t)mcip); 5977 return (ENOSPC); 5978 } 5979 } 5980 mac_tx_client_restart((mac_client_handle_t)mcip); 5981 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 5982 return (0); 5983 } 5984 5985 /* 5986 * Populate a zero-ring group with rings. If the share is non-NULL, 5987 * the rings are chosen according to that share. 5988 * Invoked after allocating a new RX or TX group through 5989 * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively. 5990 * Returns zero on success, an errno otherwise. 5991 */ 5992 int 5993 i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, 5994 mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share, 5995 uint32_t ringcnt) 5996 { 5997 mac_ring_t **rings, *ring; 5998 uint_t nrings; 5999 int rv = 0, i = 0, j; 6000 6001 ASSERT((ring_type == MAC_RING_TYPE_RX && 6002 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) || 6003 (ring_type == MAC_RING_TYPE_TX && 6004 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)); 6005 6006 /* 6007 * First find the rings to allocate to the group. 6008 */ 6009 if (share != NULL) { 6010 /* get rings through ms_squery() */ 6011 mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings); 6012 ASSERT(nrings != 0); 6013 rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t), 6014 KM_SLEEP); 6015 mip->mi_share_capab.ms_squery(share, ring_type, 6016 (mac_ring_handle_t *)rings, &nrings); 6017 for (i = 0; i < nrings; i++) { 6018 /* 6019 * If we have given this ring to a non-default 6020 * group, we need to check if we can get this 6021 * ring. 6022 */ 6023 ring = rings[i]; 6024 if (ring->mr_gh != (mac_group_handle_t)src_group || 6025 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6026 if (mac_reclaim_ring_from_grp(mip, ring_type, 6027 ring, rings, nrings) != 0) { 6028 rv = ENOSPC; 6029 goto bail; 6030 } 6031 } 6032 } 6033 } else { 6034 /* 6035 * Pick one ring from default group. 6036 * 6037 * for now pick the second ring which requires the first ring 6038 * at index 0 to stay in the default group, since it is the 6039 * ring which carries the multicast traffic. 6040 * We need a better way for a driver to indicate this, 6041 * for example a per-ring flag. 6042 */ 6043 rings = kmem_alloc(ringcnt * sizeof (mac_ring_handle_t), 6044 KM_SLEEP); 6045 for (ring = src_group->mrg_rings; ring != NULL; 6046 ring = ring->mr_next) { 6047 if (ring_type == MAC_RING_TYPE_RX && 6048 ring->mr_index == 0) { 6049 continue; 6050 } 6051 if (ring_type == MAC_RING_TYPE_TX && 6052 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6053 continue; 6054 } 6055 rings[i++] = ring; 6056 if (i == ringcnt) 6057 break; 6058 } 6059 ASSERT(ring != NULL); 6060 nrings = i; 6061 /* Not enough rings as required */ 6062 if (nrings != ringcnt) { 6063 rv = ENOSPC; 6064 goto bail; 6065 } 6066 } 6067 6068 switch (ring_type) { 6069 case MAC_RING_TYPE_RX: 6070 if (src_group->mrg_cur_count - nrings < 1) { 6071 /* we ran out of rings */ 6072 rv = ENOSPC; 6073 goto bail; 6074 } 6075 6076 /* move receive rings to new group */ 6077 for (i = 0; i < nrings; i++) { 6078 rv = mac_group_mov_ring(mip, new_group, rings[i]); 6079 if (rv != 0) { 6080 /* move rings back on failure */ 6081 for (j = 0; j < i; j++) { 6082 (void) mac_group_mov_ring(mip, 6083 src_group, rings[j]); 6084 } 6085 goto bail; 6086 } 6087 } 6088 break; 6089 6090 case MAC_RING_TYPE_TX: { 6091 mac_ring_t *tmp_ring; 6092 6093 /* move the TX rings to the new group */ 6094 for (i = 0; i < nrings; i++) { 6095 /* get the desired ring */ 6096 tmp_ring = mac_reserve_tx_ring(mip, rings[i]); 6097 if (tmp_ring == NULL) { 6098 rv = ENOSPC; 6099 goto bail; 6100 } 6101 ASSERT(tmp_ring == rings[i]); 6102 rv = mac_group_mov_ring(mip, new_group, rings[i]); 6103 if (rv != 0) { 6104 /* cleanup on failure */ 6105 for (j = 0; j < i; j++) { 6106 (void) mac_group_mov_ring(mip, 6107 MAC_DEFAULT_TX_GROUP(mip), 6108 rings[j]); 6109 } 6110 goto bail; 6111 } 6112 } 6113 break; 6114 } 6115 } 6116 6117 /* add group to share */ 6118 if (share != NULL) 6119 mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); 6120 6121 bail: 6122 /* free temporary array of rings */ 6123 kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); 6124 6125 return (rv); 6126 } 6127 6128 void 6129 mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) 6130 { 6131 mac_grp_client_t *mgcp; 6132 6133 for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 6134 if (mgcp->mgc_client == mcip) 6135 break; 6136 } 6137 6138 VERIFY(mgcp == NULL); 6139 6140 mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); 6141 mgcp->mgc_client = mcip; 6142 mgcp->mgc_next = grp->mrg_clients; 6143 grp->mrg_clients = mgcp; 6144 6145 } 6146 6147 void 6148 mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) 6149 { 6150 mac_grp_client_t *mgcp, **pprev; 6151 6152 for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL; 6153 pprev = &mgcp->mgc_next, mgcp = *pprev) { 6154 if (mgcp->mgc_client == mcip) 6155 break; 6156 } 6157 6158 ASSERT(mgcp != NULL); 6159 6160 *pprev = mgcp->mgc_next; 6161 kmem_free(mgcp, sizeof (mac_grp_client_t)); 6162 } 6163 6164 /* 6165 * mac_reserve_rx_group() 6166 * 6167 * Finds an available group and exclusively reserves it for a client. 6168 * The group is chosen to suit the flow's resource controls (bandwidth and 6169 * fanout requirements) and the address type. 6170 * If the requestor is the pimary MAC then return the group with the 6171 * largest number of rings, otherwise the default ring when available. 6172 */ 6173 mac_group_t * 6174 mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) 6175 { 6176 mac_share_handle_t share = mcip->mci_share; 6177 mac_impl_t *mip = mcip->mci_mip; 6178 mac_group_t *grp = NULL; 6179 int i; 6180 int err = 0; 6181 mac_address_t *map; 6182 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6183 int nrings; 6184 int donor_grp_rcnt; 6185 boolean_t need_exclgrp = B_FALSE; 6186 int need_rings = 0; 6187 mac_group_t *candidate_grp = NULL; 6188 mac_client_impl_t *gclient; 6189 mac_resource_props_t *gmrp; 6190 mac_group_t *donorgrp = NULL; 6191 boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; 6192 boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; 6193 boolean_t isprimary; 6194 6195 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 6196 6197 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6198 6199 /* 6200 * Check if a group already has this mac address (case of VLANs) 6201 * unless we are moving this MAC client from one group to another. 6202 */ 6203 if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { 6204 if (map->ma_group != NULL) 6205 return (map->ma_group); 6206 } 6207 if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) 6208 return (NULL); 6209 /* 6210 * If exclusive open, return NULL which will enable the 6211 * caller to use the default group. 6212 */ 6213 if (mcip->mci_state_flags & MCIS_EXCLUSIVE) 6214 return (NULL); 6215 6216 /* For dynamic groups default unspecified to 1 */ 6217 if (rxhw && unspec && 6218 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6219 mrp->mrp_nrxrings = 1; 6220 } 6221 /* 6222 * For static grouping we allow only specifying rings=0 and 6223 * unspecified 6224 */ 6225 if (rxhw && mrp->mrp_nrxrings > 0 && 6226 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { 6227 return (NULL); 6228 } 6229 if (rxhw) { 6230 /* 6231 * We have explicitly asked for a group (with nrxrings, 6232 * if unspec). 6233 */ 6234 if (unspec || mrp->mrp_nrxrings > 0) { 6235 need_exclgrp = B_TRUE; 6236 need_rings = mrp->mrp_nrxrings; 6237 } else if (mrp->mrp_nrxrings == 0) { 6238 /* 6239 * We have asked for a software group. 6240 */ 6241 return (NULL); 6242 } 6243 } else if (isprimary && mip->mi_nactiveclients == 1 && 6244 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6245 /* 6246 * If the primary is the only active client on this 6247 * mip and we have not asked for any rings, we give 6248 * it the default group so that the primary gets to 6249 * use all the rings. 6250 */ 6251 return (NULL); 6252 } 6253 6254 /* The group that can donate rings */ 6255 donorgrp = mip->mi_rx_donor_grp; 6256 6257 /* 6258 * The number of rings that the default group can donate. 6259 * We need to leave at least one ring. 6260 */ 6261 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 6262 6263 /* 6264 * Try to exclusively reserve a RX group. 6265 * 6266 * For flows requiring HW_DEFAULT_RING (unicast flow of the primary 6267 * client), try to reserve the a non-default RX group and give 6268 * it all the rings from the donor group, except the default ring 6269 * 6270 * For flows requiring HW_RING (unicast flow of other clients), try 6271 * to reserve non-default RX group with the specified number of 6272 * rings, if available. 6273 * 6274 * For flows that have not asked for software or hardware ring, 6275 * try to reserve a non-default group with 1 ring, if available. 6276 */ 6277 for (i = 1; i < mip->mi_rx_group_count; i++) { 6278 grp = &mip->mi_rx_groups[i]; 6279 6280 DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, 6281 int, grp->mrg_index, mac_group_state_t, grp->mrg_state); 6282 6283 /* 6284 * Check if this group could be a candidate group for 6285 * eviction if we need a group for this MAC client, 6286 * but there aren't any. A candidate group is one 6287 * that didn't ask for an exclusive group, but got 6288 * one and it has enough rings (combined with what 6289 * the donor group can donate) for the new MAC 6290 * client 6291 */ 6292 if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { 6293 /* 6294 * If the primary/donor group is not the default 6295 * group, don't bother looking for a candidate group. 6296 * If we don't have enough rings we will check 6297 * if the primary group can be vacated. 6298 */ 6299 if (candidate_grp == NULL && 6300 donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { 6301 ASSERT(!MAC_GROUP_NO_CLIENT(grp)); 6302 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6303 if (gclient == NULL) 6304 gclient = mac_get_grp_primary(grp); 6305 ASSERT(gclient != NULL); 6306 gmrp = MCIP_RESOURCE_PROPS(gclient); 6307 if (gclient->mci_share == NULL && 6308 (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && 6309 (unspec || 6310 (grp->mrg_cur_count + donor_grp_rcnt >= 6311 need_rings))) { 6312 candidate_grp = grp; 6313 } 6314 } 6315 continue; 6316 } 6317 /* 6318 * This group could already be SHARED by other multicast 6319 * flows on this client. In that case, the group would 6320 * be shared and has already been started. 6321 */ 6322 ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT); 6323 6324 if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) && 6325 (mac_start_group(grp) != 0)) { 6326 continue; 6327 } 6328 6329 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6330 break; 6331 ASSERT(grp->mrg_cur_count == 0); 6332 6333 /* 6334 * Populate the group. Rings should be taken 6335 * from the donor group. 6336 */ 6337 nrings = rxhw ? need_rings : isprimary ? donor_grp_rcnt: 1; 6338 6339 /* 6340 * If the donor group can't donate, let's just walk and 6341 * see if someone can vacate a group, so that we have 6342 * enough rings for this, unless we already have 6343 * identified a candiate group.. 6344 */ 6345 if (nrings <= donor_grp_rcnt) { 6346 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 6347 donorgrp, grp, share, nrings); 6348 if (err == 0) { 6349 /* 6350 * For a share i_mac_group_allocate_rings gets 6351 * the rings from the driver, let's populate 6352 * the property for the client now. 6353 */ 6354 if (share != NULL) { 6355 mac_client_set_rings( 6356 (mac_client_handle_t)mcip, 6357 grp->mrg_cur_count, -1); 6358 } 6359 if (mac_is_primary_client(mcip) && !rxhw) 6360 mip->mi_rx_donor_grp = grp; 6361 break; 6362 } 6363 } 6364 6365 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 6366 mip->mi_name, int, grp->mrg_index, int, err); 6367 6368 /* 6369 * It's a dynamic group but the grouping operation 6370 * failed. 6371 */ 6372 mac_stop_group(grp); 6373 } 6374 /* We didn't find an exclusive group for this MAC client */ 6375 if (i >= mip->mi_rx_group_count) { 6376 6377 if (!need_exclgrp) 6378 return (NULL); 6379 6380 /* 6381 * If we found a candidate group then we switch the 6382 * MAC client from the candidate_group to the default 6383 * group and give the group to this MAC client. If 6384 * we didn't find a candidate_group, check if the 6385 * primary is in its own group and if it can make way 6386 * for this MAC client. 6387 */ 6388 if (candidate_grp == NULL && 6389 donorgrp != MAC_DEFAULT_RX_GROUP(mip) && 6390 donorgrp->mrg_cur_count >= need_rings) { 6391 candidate_grp = donorgrp; 6392 } 6393 if (candidate_grp != NULL) { 6394 boolean_t prim_grp = B_FALSE; 6395 6396 /* 6397 * Switch the MAC client from the candidate group 6398 * to the default group.. If this group was the 6399 * donor group, then after the switch we need 6400 * to update the donor group too. 6401 */ 6402 grp = candidate_grp; 6403 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6404 if (gclient == NULL) 6405 gclient = mac_get_grp_primary(grp); 6406 if (grp == mip->mi_rx_donor_grp) 6407 prim_grp = B_TRUE; 6408 if (mac_rx_switch_group(gclient, grp, 6409 MAC_DEFAULT_RX_GROUP(mip)) != 0) { 6410 return (NULL); 6411 } 6412 if (prim_grp) { 6413 mip->mi_rx_donor_grp = 6414 MAC_DEFAULT_RX_GROUP(mip); 6415 donorgrp = MAC_DEFAULT_RX_GROUP(mip); 6416 } 6417 6418 6419 /* 6420 * Now give this group with the required rings 6421 * to this MAC client. 6422 */ 6423 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 6424 if (mac_start_group(grp) != 0) 6425 return (NULL); 6426 6427 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6428 return (grp); 6429 6430 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 6431 ASSERT(grp->mrg_cur_count == 0); 6432 ASSERT(donor_grp_rcnt >= need_rings); 6433 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 6434 donorgrp, grp, share, need_rings); 6435 if (err == 0) { 6436 /* 6437 * For a share i_mac_group_allocate_rings gets 6438 * the rings from the driver, let's populate 6439 * the property for the client now. 6440 */ 6441 if (share != NULL) { 6442 mac_client_set_rings( 6443 (mac_client_handle_t)mcip, 6444 grp->mrg_cur_count, -1); 6445 } 6446 DTRACE_PROBE2(rx__group__reserved, 6447 char *, mip->mi_name, int, grp->mrg_index); 6448 return (grp); 6449 } 6450 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 6451 mip->mi_name, int, grp->mrg_index, int, err); 6452 mac_stop_group(grp); 6453 } 6454 return (NULL); 6455 } 6456 ASSERT(grp != NULL); 6457 6458 DTRACE_PROBE2(rx__group__reserved, 6459 char *, mip->mi_name, int, grp->mrg_index); 6460 return (grp); 6461 } 6462 6463 /* 6464 * mac_rx_release_group() 6465 * 6466 * This is called when there are no clients left for the group. 6467 * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, 6468 * and if it is a non default group, the shares are removed and 6469 * all rings are assigned back to default group. 6470 */ 6471 void 6472 mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) 6473 { 6474 mac_impl_t *mip = mcip->mci_mip; 6475 mac_ring_t *ring; 6476 6477 ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); 6478 6479 if (mip->mi_rx_donor_grp == group) 6480 mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); 6481 6482 /* 6483 * This is the case where there are no clients left. Any 6484 * SRS etc on this group have also be quiesced. 6485 */ 6486 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 6487 if (ring->mr_classify_type == MAC_HW_CLASSIFIER) { 6488 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 6489 /* 6490 * Remove the SRS associated with the HW ring. 6491 * As a result, polling will be disabled. 6492 */ 6493 ring->mr_srs = NULL; 6494 } 6495 ASSERT(group->mrg_state < MAC_GROUP_STATE_RESERVED || 6496 ring->mr_state == MR_INUSE); 6497 if (ring->mr_state == MR_INUSE) { 6498 mac_stop_ring(ring); 6499 ring->mr_flag = 0; 6500 } 6501 } 6502 6503 /* remove group from share */ 6504 if (mcip->mci_share != NULL) { 6505 mip->mi_share_capab.ms_sremove(mcip->mci_share, 6506 group->mrg_driver); 6507 } 6508 6509 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6510 mac_ring_t *ring; 6511 6512 /* 6513 * Rings were dynamically allocated to group. 6514 * Move rings back to default group. 6515 */ 6516 while ((ring = group->mrg_rings) != NULL) { 6517 (void) mac_group_mov_ring(mip, mip->mi_rx_donor_grp, 6518 ring); 6519 } 6520 } 6521 mac_stop_group(group); 6522 /* 6523 * Possible improvement: See if we can assign the group just released 6524 * to a another client of the mip 6525 */ 6526 } 6527 6528 /* 6529 * When we move the primary's mac address between groups, we need to also 6530 * take all the clients sharing the same mac address along with it (VLANs) 6531 * We remove the mac address for such clients from the group after quiescing 6532 * them. When we add the mac address we restart the client. Note that 6533 * the primary's mac address is removed from the group after all the 6534 * other clients sharing the address are removed. Similarly, the primary's 6535 * mac address is added before all the other client's mac address are 6536 * added. While grp is the group where the clients reside, tgrp is 6537 * the group where the addresses have to be added. 6538 */ 6539 static void 6540 mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, 6541 mac_group_t *tgrp, uint8_t *maddr, boolean_t add) 6542 { 6543 mac_impl_t *mip = mcip->mci_mip; 6544 mac_grp_client_t *mgcp = grp->mrg_clients; 6545 mac_client_impl_t *gmcip; 6546 boolean_t prim; 6547 6548 prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 6549 6550 /* 6551 * If the clients are in a non-default group, we just have to 6552 * walk the group's client list. If it is in the default group 6553 * (which will be shared by other clients as well, we need to 6554 * check if the unicast address matches mcip's unicast. 6555 */ 6556 while (mgcp != NULL) { 6557 gmcip = mgcp->mgc_client; 6558 if (gmcip != mcip && 6559 (grp != MAC_DEFAULT_RX_GROUP(mip) || 6560 mcip->mci_unicast == gmcip->mci_unicast)) { 6561 if (!add) { 6562 mac_rx_client_quiesce( 6563 (mac_client_handle_t)gmcip); 6564 (void) mac_remove_macaddr(mcip->mci_unicast); 6565 } else { 6566 (void) mac_add_macaddr(mip, tgrp, maddr, prim); 6567 mac_rx_client_restart( 6568 (mac_client_handle_t)gmcip); 6569 } 6570 } 6571 mgcp = mgcp->mgc_next; 6572 } 6573 } 6574 6575 6576 /* 6577 * Move the MAC address from fgrp to tgrp. If this is the primary client, 6578 * we need to take any VLANs etc. together too. 6579 */ 6580 static int 6581 mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, 6582 mac_group_t *tgrp) 6583 { 6584 mac_impl_t *mip = mcip->mci_mip; 6585 uint8_t maddr[MAXMACADDRLEN]; 6586 int err = 0; 6587 boolean_t prim; 6588 boolean_t multiclnt = B_FALSE; 6589 6590 mac_rx_client_quiesce((mac_client_handle_t)mcip); 6591 ASSERT(mcip->mci_unicast != NULL); 6592 bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); 6593 6594 prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 6595 if (mcip->mci_unicast->ma_nusers > 1) { 6596 mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); 6597 multiclnt = B_TRUE; 6598 } 6599 ASSERT(mcip->mci_unicast->ma_nusers == 1); 6600 err = mac_remove_macaddr(mcip->mci_unicast); 6601 if (err != 0) { 6602 mac_rx_client_restart((mac_client_handle_t)mcip); 6603 if (multiclnt) { 6604 mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, 6605 B_TRUE); 6606 } 6607 return (err); 6608 } 6609 /* 6610 * Program the H/W Classifier first, if this fails we need 6611 * not proceed with the other stuff. 6612 */ 6613 if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { 6614 /* Revert back the H/W Classifier */ 6615 if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { 6616 /* 6617 * This should not fail now since it worked earlier, 6618 * should we panic? 6619 */ 6620 cmn_err(CE_WARN, 6621 "mac_rx_switch_group: switching %p back" 6622 " to group %p failed!!", (void *)mcip, 6623 (void *)fgrp); 6624 } 6625 mac_rx_client_restart((mac_client_handle_t)mcip); 6626 if (multiclnt) { 6627 mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, 6628 B_TRUE); 6629 } 6630 return (err); 6631 } 6632 mcip->mci_unicast = mac_find_macaddr(mip, maddr); 6633 mac_rx_client_restart((mac_client_handle_t)mcip); 6634 if (multiclnt) 6635 mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); 6636 return (err); 6637 } 6638 6639 /* 6640 * Switch the MAC client from one group to another. This means we need 6641 * to remove the MAC address from the group, remove the MAC client, 6642 * teardown the SRSs and revert the group state. Then, we add the client 6643 * to the destination group, set the SRSs, and add the MAC address to the 6644 * group. 6645 */ 6646 int 6647 mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 6648 mac_group_t *tgrp) 6649 { 6650 int err; 6651 mac_group_state_t next_state; 6652 mac_client_impl_t *group_only_mcip; 6653 mac_client_impl_t *gmcip; 6654 mac_impl_t *mip = mcip->mci_mip; 6655 mac_grp_client_t *mgcp; 6656 6657 ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); 6658 6659 if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) 6660 return (err); 6661 6662 /* 6663 * The group might be reserved, but SRSs may not be set up, e.g. 6664 * primary and its vlans using a reserved group. 6665 */ 6666 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && 6667 MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 6668 mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); 6669 } 6670 if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { 6671 mgcp = fgrp->mrg_clients; 6672 while (mgcp != NULL) { 6673 gmcip = mgcp->mgc_client; 6674 mgcp = mgcp->mgc_next; 6675 mac_group_remove_client(fgrp, gmcip); 6676 mac_group_add_client(tgrp, gmcip); 6677 gmcip->mci_flent->fe_rx_ring_group = tgrp; 6678 } 6679 mac_release_rx_group(mcip, fgrp); 6680 ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); 6681 mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); 6682 } else { 6683 mac_group_remove_client(fgrp, mcip); 6684 mac_group_add_client(tgrp, mcip); 6685 mcip->mci_flent->fe_rx_ring_group = tgrp; 6686 /* 6687 * If there are other clients (VLANs) sharing this address 6688 * we should be here only for the primary. 6689 */ 6690 if (mcip->mci_unicast->ma_nusers > 1) { 6691 /* 6692 * We need to move all the clients that are using 6693 * this h/w address. 6694 */ 6695 mgcp = fgrp->mrg_clients; 6696 while (mgcp != NULL) { 6697 gmcip = mgcp->mgc_client; 6698 mgcp = mgcp->mgc_next; 6699 if (mcip->mci_unicast == gmcip->mci_unicast) { 6700 mac_group_remove_client(fgrp, gmcip); 6701 mac_group_add_client(tgrp, gmcip); 6702 gmcip->mci_flent->fe_rx_ring_group = 6703 tgrp; 6704 } 6705 } 6706 } 6707 /* 6708 * The default group will still take the multicast, 6709 * broadcast traffic etc., so it won't go to 6710 * MAC_GROUP_STATE_REGISTERED. 6711 */ 6712 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) 6713 mac_rx_group_unmark(fgrp, MR_CONDEMNED); 6714 mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); 6715 } 6716 next_state = mac_group_next_state(tgrp, &group_only_mcip, 6717 MAC_DEFAULT_RX_GROUP(mip), B_TRUE); 6718 mac_set_group_state(tgrp, next_state); 6719 /* 6720 * If the destination group is reserved, setup the SRSs etc. 6721 */ 6722 if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 6723 mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); 6724 mac_fanout_setup(mcip, mcip->mci_flent, 6725 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, mcip, NULL, 6726 NULL); 6727 mac_rx_group_unmark(tgrp, MR_INCIPIENT); 6728 } else { 6729 mac_rx_switch_grp_to_sw(tgrp); 6730 } 6731 return (0); 6732 } 6733 6734 /* 6735 * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() 6736 * when a share was allocated to the client. 6737 */ 6738 mac_group_t * 6739 mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) 6740 { 6741 mac_impl_t *mip = mcip->mci_mip; 6742 mac_group_t *grp = NULL; 6743 int rv; 6744 int i; 6745 int err; 6746 mac_group_t *defgrp; 6747 mac_share_handle_t share = mcip->mci_share; 6748 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6749 int nrings; 6750 int defnrings; 6751 boolean_t need_exclgrp = B_FALSE; 6752 int need_rings = 0; 6753 mac_group_t *candidate_grp = NULL; 6754 mac_client_impl_t *gclient; 6755 mac_resource_props_t *gmrp; 6756 boolean_t txhw = mrp->mrp_mask & MRP_TX_RINGS; 6757 boolean_t unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; 6758 boolean_t isprimary; 6759 6760 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6761 /* 6762 * When we come here for a VLAN on the primary (dladm create-vlan), 6763 * we need to pair it along with the primary (to keep it consistent 6764 * with the RX side). So, we check if the primary is already assigned 6765 * to a group and return the group if so. The other way is also 6766 * true, i.e. the VLAN is already created and now we are plumbing 6767 * the primary. 6768 */ 6769 if (!move && isprimary) { 6770 for (gclient = mip->mi_clients_list; gclient != NULL; 6771 gclient = gclient->mci_client_next) { 6772 if (gclient->mci_flent->fe_type & FLOW_PRIMARY_MAC && 6773 gclient->mci_flent->fe_tx_ring_group != NULL) { 6774 return (gclient->mci_flent->fe_tx_ring_group); 6775 } 6776 } 6777 } 6778 6779 if (mip->mi_tx_groups == NULL || mip->mi_tx_group_count == 0) 6780 return (NULL); 6781 6782 /* For dynamic groups, default unspec to 1 */ 6783 if (txhw && unspec && 6784 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6785 mrp->mrp_ntxrings = 1; 6786 } 6787 /* 6788 * For static grouping we allow only specifying rings=0 and 6789 * unspecified 6790 */ 6791 if (txhw && mrp->mrp_ntxrings > 0 && 6792 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC) { 6793 return (NULL); 6794 } 6795 6796 if (txhw) { 6797 /* 6798 * We have explicitly asked for a group (with ntxrings, 6799 * if unspec). 6800 */ 6801 if (unspec || mrp->mrp_ntxrings > 0) { 6802 need_exclgrp = B_TRUE; 6803 need_rings = mrp->mrp_ntxrings; 6804 } else if (mrp->mrp_ntxrings == 0) { 6805 /* 6806 * We have asked for a software group. 6807 */ 6808 return (NULL); 6809 } 6810 } 6811 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6812 /* 6813 * The number of rings that the default group can donate. 6814 * We need to leave at least one ring - the default ring - in 6815 * this group. 6816 */ 6817 defnrings = defgrp->mrg_cur_count - 1; 6818 6819 /* 6820 * Primary gets default group unless explicitly told not 6821 * to (i.e. rings > 0). 6822 */ 6823 if (isprimary && !need_exclgrp) 6824 return (NULL); 6825 6826 nrings = (mrp->mrp_mask & MRP_TX_RINGS) != 0 ? mrp->mrp_ntxrings : 1; 6827 for (i = 0; i < mip->mi_tx_group_count; i++) { 6828 grp = &mip->mi_tx_groups[i]; 6829 if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || 6830 (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) { 6831 /* 6832 * Select a candidate for replacement if we don't 6833 * get an exclusive group. A candidate group is one 6834 * that didn't ask for an exclusive group, but got 6835 * one and it has enough rings (combined with what 6836 * the default group can donate) for the new MAC 6837 * client. 6838 */ 6839 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && 6840 candidate_grp == NULL) { 6841 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6842 if (gclient == NULL) 6843 gclient = mac_get_grp_primary(grp); 6844 gmrp = MCIP_RESOURCE_PROPS(gclient); 6845 if (gclient->mci_share == NULL && 6846 (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && 6847 (unspec || 6848 (grp->mrg_cur_count + defnrings) >= 6849 need_rings)) { 6850 candidate_grp = grp; 6851 } 6852 } 6853 continue; 6854 } 6855 /* 6856 * If the default can't donate let's just walk and 6857 * see if someone can vacate a group, so that we have 6858 * enough rings for this. 6859 */ 6860 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC || 6861 nrings <= defnrings) { 6862 if (grp->mrg_state == MAC_GROUP_STATE_REGISTERED) { 6863 rv = mac_start_group(grp); 6864 ASSERT(rv == 0); 6865 } 6866 break; 6867 } 6868 } 6869 6870 /* The default group */ 6871 if (i >= mip->mi_tx_group_count) { 6872 /* 6873 * If we need an exclusive group and have identified a 6874 * candidate group we switch the MAC client from the 6875 * candidate group to the default group and give the 6876 * candidate group to this client. 6877 */ 6878 if (need_exclgrp && candidate_grp != NULL) { 6879 /* 6880 * Switch the MAC client from the candidate group 6881 * to the default group. 6882 */ 6883 grp = candidate_grp; 6884 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6885 if (gclient == NULL) 6886 gclient = mac_get_grp_primary(grp); 6887 mac_tx_client_quiesce((mac_client_handle_t)gclient); 6888 mac_tx_switch_group(gclient, grp, defgrp); 6889 mac_tx_client_restart((mac_client_handle_t)gclient); 6890 6891 /* 6892 * Give the candidate group with the specified number 6893 * of rings to this MAC client. 6894 */ 6895 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 6896 rv = mac_start_group(grp); 6897 ASSERT(rv == 0); 6898 6899 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6900 return (grp); 6901 6902 ASSERT(grp->mrg_cur_count == 0); 6903 ASSERT(defgrp->mrg_cur_count > need_rings); 6904 6905 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, 6906 defgrp, grp, share, need_rings); 6907 if (err == 0) { 6908 /* 6909 * For a share i_mac_group_allocate_rings gets 6910 * the rings from the driver, let's populate 6911 * the property for the client now. 6912 */ 6913 if (share != NULL) { 6914 mac_client_set_rings( 6915 (mac_client_handle_t)mcip, -1, 6916 grp->mrg_cur_count); 6917 } 6918 mip->mi_tx_group_free--; 6919 return (grp); 6920 } 6921 DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, 6922 mip->mi_name, int, grp->mrg_index, int, err); 6923 mac_stop_group(grp); 6924 } 6925 return (NULL); 6926 } 6927 /* 6928 * We got an exclusive group, but it is not dynamic. 6929 */ 6930 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { 6931 mip->mi_tx_group_free--; 6932 return (grp); 6933 } 6934 6935 rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, defgrp, grp, 6936 share, nrings); 6937 if (rv != 0) { 6938 DTRACE_PROBE3(tx__group__reserve__alloc__rings, 6939 char *, mip->mi_name, int, grp->mrg_index, int, rv); 6940 mac_stop_group(grp); 6941 return (NULL); 6942 } 6943 /* 6944 * For a share i_mac_group_allocate_rings gets the rings from the 6945 * driver, let's populate the property for the client now. 6946 */ 6947 if (share != NULL) { 6948 mac_client_set_rings((mac_client_handle_t)mcip, -1, 6949 grp->mrg_cur_count); 6950 } 6951 mip->mi_tx_group_free--; 6952 return (grp); 6953 } 6954 6955 void 6956 mac_release_tx_group(mac_client_impl_t *mcip, mac_group_t *grp) 6957 { 6958 mac_impl_t *mip = mcip->mci_mip; 6959 mac_share_handle_t share = mcip->mci_share; 6960 mac_ring_t *ring; 6961 mac_soft_ring_set_t *srs = MCIP_TX_SRS(mcip); 6962 mac_group_t *defgrp; 6963 6964 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6965 if (srs != NULL) { 6966 if (srs->srs_soft_ring_count > 0) { 6967 for (ring = grp->mrg_rings; ring != NULL; 6968 ring = ring->mr_next) { 6969 ASSERT(mac_tx_srs_ring_present(srs, ring)); 6970 mac_tx_invoke_callbacks(mcip, 6971 (mac_tx_cookie_t) 6972 mac_tx_srs_get_soft_ring(srs, ring)); 6973 mac_tx_srs_del_ring(srs, ring); 6974 } 6975 } else { 6976 ASSERT(srs->srs_tx.st_arg2 != NULL); 6977 srs->srs_tx.st_arg2 = NULL; 6978 mac_srs_stat_delete(srs); 6979 } 6980 } 6981 if (share != NULL) 6982 mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); 6983 6984 /* move the ring back to the pool */ 6985 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6986 while ((ring = grp->mrg_rings) != NULL) 6987 (void) mac_group_mov_ring(mip, defgrp, ring); 6988 } 6989 mac_stop_group(grp); 6990 mip->mi_tx_group_free++; 6991 } 6992 6993 /* 6994 * Disassociate a MAC client from a group, i.e go through the rings in the 6995 * group and delete all the soft rings tied to them. 6996 */ 6997 static void 6998 mac_tx_dismantle_soft_rings(mac_group_t *fgrp, flow_entry_t *flent) 6999 { 7000 mac_client_impl_t *mcip = flent->fe_mcip; 7001 mac_soft_ring_set_t *tx_srs; 7002 mac_srs_tx_t *tx; 7003 mac_ring_t *ring; 7004 7005 tx_srs = flent->fe_tx_srs; 7006 tx = &tx_srs->srs_tx; 7007 7008 /* Single ring case we haven't created any soft rings */ 7009 if (tx->st_mode == SRS_TX_BW || tx->st_mode == SRS_TX_SERIALIZE || 7010 tx->st_mode == SRS_TX_DEFAULT) { 7011 tx->st_arg2 = NULL; 7012 mac_srs_stat_delete(tx_srs); 7013 /* Fanout case, where we have to dismantle the soft rings */ 7014 } else { 7015 for (ring = fgrp->mrg_rings; ring != NULL; 7016 ring = ring->mr_next) { 7017 ASSERT(mac_tx_srs_ring_present(tx_srs, ring)); 7018 mac_tx_invoke_callbacks(mcip, 7019 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(tx_srs, 7020 ring)); 7021 mac_tx_srs_del_ring(tx_srs, ring); 7022 } 7023 ASSERT(tx->st_arg2 == NULL); 7024 } 7025 } 7026 7027 /* 7028 * Switch the MAC client from one group to another. This means we need 7029 * to remove the MAC client, teardown the SRSs and revert the group state. 7030 * Then, we add the client to the destination roup, set the SRSs etc. 7031 */ 7032 void 7033 mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 7034 mac_group_t *tgrp) 7035 { 7036 mac_client_impl_t *group_only_mcip; 7037 mac_impl_t *mip = mcip->mci_mip; 7038 flow_entry_t *flent = mcip->mci_flent; 7039 mac_group_t *defgrp; 7040 mac_grp_client_t *mgcp; 7041 mac_client_impl_t *gmcip; 7042 flow_entry_t *gflent; 7043 7044 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7045 ASSERT(fgrp == flent->fe_tx_ring_group); 7046 7047 if (fgrp == defgrp) { 7048 /* 7049 * If this is the primary we need to find any VLANs on 7050 * the primary and move them too. 7051 */ 7052 mac_group_remove_client(fgrp, mcip); 7053 mac_tx_dismantle_soft_rings(fgrp, flent); 7054 if (mcip->mci_unicast->ma_nusers > 1) { 7055 mgcp = fgrp->mrg_clients; 7056 while (mgcp != NULL) { 7057 gmcip = mgcp->mgc_client; 7058 mgcp = mgcp->mgc_next; 7059 if (mcip->mci_unicast != gmcip->mci_unicast) 7060 continue; 7061 mac_tx_client_quiesce( 7062 (mac_client_handle_t)gmcip); 7063 7064 gflent = gmcip->mci_flent; 7065 mac_group_remove_client(fgrp, gmcip); 7066 mac_tx_dismantle_soft_rings(fgrp, gflent); 7067 7068 mac_group_add_client(tgrp, gmcip); 7069 gflent->fe_tx_ring_group = tgrp; 7070 /* We could directly set this to SHARED */ 7071 tgrp->mrg_state = mac_group_next_state(tgrp, 7072 &group_only_mcip, defgrp, B_FALSE); 7073 7074 mac_tx_srs_group_setup(gmcip, gflent, 7075 SRST_LINK); 7076 mac_fanout_setup(gmcip, gflent, 7077 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 7078 gmcip, NULL, NULL); 7079 7080 mac_tx_client_restart( 7081 (mac_client_handle_t)gmcip); 7082 } 7083 } 7084 if (MAC_GROUP_NO_CLIENT(fgrp)) { 7085 mac_ring_t *ring; 7086 int cnt; 7087 int ringcnt; 7088 7089 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 7090 /* 7091 * Additionally, we also need to stop all 7092 * the rings in the default group, except 7093 * the default ring. The reason being 7094 * this group won't be released since it is 7095 * the default group, so the rings won't 7096 * be stopped otherwise. 7097 */ 7098 ringcnt = fgrp->mrg_cur_count; 7099 ring = fgrp->mrg_rings; 7100 for (cnt = 0; cnt < ringcnt; cnt++) { 7101 if (ring->mr_state == MR_INUSE && 7102 ring != 7103 (mac_ring_t *)mip->mi_default_tx_ring) { 7104 mac_stop_ring(ring); 7105 ring->mr_flag = 0; 7106 } 7107 ring = ring->mr_next; 7108 } 7109 } else if (MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 7110 fgrp->mrg_state = MAC_GROUP_STATE_RESERVED; 7111 } else { 7112 ASSERT(fgrp->mrg_state == MAC_GROUP_STATE_SHARED); 7113 } 7114 } else { 7115 /* 7116 * We could have VLANs sharing the non-default group with 7117 * the primary. 7118 */ 7119 mgcp = fgrp->mrg_clients; 7120 while (mgcp != NULL) { 7121 gmcip = mgcp->mgc_client; 7122 mgcp = mgcp->mgc_next; 7123 if (gmcip == mcip) 7124 continue; 7125 mac_tx_client_quiesce((mac_client_handle_t)gmcip); 7126 gflent = gmcip->mci_flent; 7127 7128 mac_group_remove_client(fgrp, gmcip); 7129 mac_tx_dismantle_soft_rings(fgrp, gflent); 7130 7131 mac_group_add_client(tgrp, gmcip); 7132 gflent->fe_tx_ring_group = tgrp; 7133 /* We could directly set this to SHARED */ 7134 tgrp->mrg_state = mac_group_next_state(tgrp, 7135 &group_only_mcip, defgrp, B_FALSE); 7136 mac_tx_srs_group_setup(gmcip, gflent, SRST_LINK); 7137 mac_fanout_setup(gmcip, gflent, 7138 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 7139 gmcip, NULL, NULL); 7140 7141 mac_tx_client_restart((mac_client_handle_t)gmcip); 7142 } 7143 mac_group_remove_client(fgrp, mcip); 7144 mac_release_tx_group(mcip, fgrp); 7145 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 7146 } 7147 7148 /* Add it to the tgroup */ 7149 mac_group_add_client(tgrp, mcip); 7150 flent->fe_tx_ring_group = tgrp; 7151 tgrp->mrg_state = mac_group_next_state(tgrp, &group_only_mcip, 7152 defgrp, B_FALSE); 7153 7154 mac_tx_srs_group_setup(mcip, flent, SRST_LINK); 7155 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 7156 mac_rx_deliver, mcip, NULL, NULL); 7157 } 7158 7159 /* 7160 * This is a 1-time control path activity initiated by the client (IP). 7161 * The mac perimeter protects against other simultaneous control activities, 7162 * for example an ioctl that attempts to change the degree of fanout and 7163 * increase or decrease the number of softrings associated with this Tx SRS. 7164 */ 7165 static mac_tx_notify_cb_t * 7166 mac_client_tx_notify_add(mac_client_impl_t *mcip, 7167 mac_tx_notify_t notify, void *arg) 7168 { 7169 mac_cb_info_t *mcbi; 7170 mac_tx_notify_cb_t *mtnfp; 7171 7172 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7173 7174 mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP); 7175 mtnfp->mtnf_fn = notify; 7176 mtnfp->mtnf_arg = arg; 7177 mtnfp->mtnf_link.mcb_objp = mtnfp; 7178 mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t); 7179 mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T; 7180 7181 mcbi = &mcip->mci_tx_notify_cb_info; 7182 mutex_enter(mcbi->mcbi_lockp); 7183 mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link); 7184 mutex_exit(mcbi->mcbi_lockp); 7185 return (mtnfp); 7186 } 7187 7188 static void 7189 mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp) 7190 { 7191 mac_cb_info_t *mcbi; 7192 mac_cb_t **cblist; 7193 7194 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7195 7196 if (!mac_callback_find(&mcip->mci_tx_notify_cb_info, 7197 &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) { 7198 cmn_err(CE_WARN, 7199 "mac_client_tx_notify_remove: callback not " 7200 "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp); 7201 return; 7202 } 7203 7204 mcbi = &mcip->mci_tx_notify_cb_info; 7205 cblist = &mcip->mci_tx_notify_cb_list; 7206 mutex_enter(mcbi->mcbi_lockp); 7207 if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link)) 7208 kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t)); 7209 else 7210 mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info); 7211 mutex_exit(mcbi->mcbi_lockp); 7212 } 7213 7214 /* 7215 * mac_client_tx_notify(): 7216 * call to add and remove flow control callback routine. 7217 */ 7218 mac_tx_notify_handle_t 7219 mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func, 7220 void *ptr) 7221 { 7222 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 7223 mac_tx_notify_cb_t *mtnfp = NULL; 7224 7225 i_mac_perim_enter(mcip->mci_mip); 7226 7227 if (callb_func != NULL) { 7228 /* Add a notify callback */ 7229 mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr); 7230 } else { 7231 mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr); 7232 } 7233 i_mac_perim_exit(mcip->mci_mip); 7234 7235 return ((mac_tx_notify_handle_t)mtnfp); 7236 } 7237 7238 void 7239 mac_bridge_vectors(mac_bridge_tx_t txf, mac_bridge_rx_t rxf, 7240 mac_bridge_ref_t reff, mac_bridge_ls_t lsf) 7241 { 7242 mac_bridge_tx_cb = txf; 7243 mac_bridge_rx_cb = rxf; 7244 mac_bridge_ref_cb = reff; 7245 mac_bridge_ls_cb = lsf; 7246 } 7247 7248 int 7249 mac_bridge_set(mac_handle_t mh, mac_handle_t link) 7250 { 7251 mac_impl_t *mip = (mac_impl_t *)mh; 7252 int retv; 7253 7254 mutex_enter(&mip->mi_bridge_lock); 7255 if (mip->mi_bridge_link == NULL) { 7256 mip->mi_bridge_link = link; 7257 retv = 0; 7258 } else { 7259 retv = EBUSY; 7260 } 7261 mutex_exit(&mip->mi_bridge_lock); 7262 if (retv == 0) { 7263 mac_poll_state_change(mh, B_FALSE); 7264 mac_capab_update(mh); 7265 } 7266 return (retv); 7267 } 7268 7269 /* 7270 * Disable bridging on the indicated link. 7271 */ 7272 void 7273 mac_bridge_clear(mac_handle_t mh, mac_handle_t link) 7274 { 7275 mac_impl_t *mip = (mac_impl_t *)mh; 7276 7277 mutex_enter(&mip->mi_bridge_lock); 7278 ASSERT(mip->mi_bridge_link == link); 7279 mip->mi_bridge_link = NULL; 7280 mutex_exit(&mip->mi_bridge_lock); 7281 mac_poll_state_change(mh, B_TRUE); 7282 mac_capab_update(mh); 7283 } 7284 7285 void 7286 mac_no_active(mac_handle_t mh) 7287 { 7288 mac_impl_t *mip = (mac_impl_t *)mh; 7289 7290 i_mac_perim_enter(mip); 7291 mip->mi_state_flags |= MIS_NO_ACTIVE; 7292 i_mac_perim_exit(mip); 7293 } 7294 7295 /* 7296 * Walk the primary VLAN clients whenever the primary's rings property 7297 * changes and update the mac_resource_props_t for the VLAN's client. 7298 * We need to do this since we don't support setting these properties 7299 * on the primary's VLAN clients, but the VLAN clients have to 7300 * follow the primary w.r.t the rings property; 7301 */ 7302 void 7303 mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) 7304 { 7305 mac_client_impl_t *vmcip; 7306 mac_resource_props_t *vmrp; 7307 7308 for (vmcip = mip->mi_clients_list; vmcip != NULL; 7309 vmcip = vmcip->mci_client_next) { 7310 if (!(vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) || 7311 mac_client_vid((mac_client_handle_t)vmcip) == 7312 VLAN_ID_NONE) { 7313 continue; 7314 } 7315 vmrp = MCIP_RESOURCE_PROPS(vmcip); 7316 7317 vmrp->mrp_nrxrings = mrp->mrp_nrxrings; 7318 if (mrp->mrp_mask & MRP_RX_RINGS) 7319 vmrp->mrp_mask |= MRP_RX_RINGS; 7320 else if (vmrp->mrp_mask & MRP_RX_RINGS) 7321 vmrp->mrp_mask &= ~MRP_RX_RINGS; 7322 7323 vmrp->mrp_ntxrings = mrp->mrp_ntxrings; 7324 if (mrp->mrp_mask & MRP_TX_RINGS) 7325 vmrp->mrp_mask |= MRP_TX_RINGS; 7326 else if (vmrp->mrp_mask & MRP_TX_RINGS) 7327 vmrp->mrp_mask &= ~MRP_TX_RINGS; 7328 7329 if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) 7330 vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; 7331 else 7332 vmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; 7333 7334 if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) 7335 vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; 7336 else 7337 vmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; 7338 } 7339 } 7340 7341 /* 7342 * We are adding or removing ring(s) from a group. The source for taking 7343 * rings is the default group. The destination for giving rings back is 7344 * the default group. 7345 */ 7346 int 7347 mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, 7348 mac_group_t *defgrp) 7349 { 7350 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7351 uint_t modify; 7352 int count; 7353 mac_ring_t *ring; 7354 mac_ring_t *next; 7355 mac_impl_t *mip = mcip->mci_mip; 7356 mac_ring_t **rings; 7357 uint_t ringcnt; 7358 int i = 0; 7359 boolean_t rx_group = group->mrg_type == MAC_RING_TYPE_RX; 7360 int start; 7361 int end; 7362 mac_group_t *tgrp; 7363 int j; 7364 int rv = 0; 7365 7366 /* 7367 * If we are asked for just a group, we give 1 ring, else 7368 * the specified number of rings. 7369 */ 7370 if (rx_group) { 7371 ringcnt = (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) ? 1: 7372 mrp->mrp_nrxrings; 7373 } else { 7374 ringcnt = (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) ? 1: 7375 mrp->mrp_ntxrings; 7376 } 7377 7378 /* don't allow modifying rings for a share for now. */ 7379 ASSERT(mcip->mci_share == NULL); 7380 7381 if (ringcnt == group->mrg_cur_count) 7382 return (0); 7383 7384 if (group->mrg_cur_count > ringcnt) { 7385 modify = group->mrg_cur_count - ringcnt; 7386 if (rx_group) { 7387 if (mip->mi_rx_donor_grp == group) { 7388 ASSERT(mac_is_primary_client(mcip)); 7389 mip->mi_rx_donor_grp = defgrp; 7390 } else { 7391 defgrp = mip->mi_rx_donor_grp; 7392 } 7393 } 7394 ring = group->mrg_rings; 7395 rings = kmem_alloc(modify * sizeof (mac_ring_handle_t), 7396 KM_SLEEP); 7397 j = 0; 7398 for (count = 0; count < modify; count++) { 7399 next = ring->mr_next; 7400 rv = mac_group_mov_ring(mip, defgrp, ring); 7401 if (rv != 0) { 7402 /* cleanup on failure */ 7403 for (j = 0; j < count; j++) { 7404 (void) mac_group_mov_ring(mip, group, 7405 rings[j]); 7406 } 7407 break; 7408 } 7409 rings[j++] = ring; 7410 ring = next; 7411 } 7412 kmem_free(rings, modify * sizeof (mac_ring_handle_t)); 7413 return (rv); 7414 } 7415 if (ringcnt >= MAX_RINGS_PER_GROUP) 7416 return (EINVAL); 7417 7418 modify = ringcnt - group->mrg_cur_count; 7419 7420 if (rx_group) { 7421 if (group != mip->mi_rx_donor_grp) 7422 defgrp = mip->mi_rx_donor_grp; 7423 else 7424 /* 7425 * This is the donor group with all the remaining 7426 * rings. Default group now gets to be the donor 7427 */ 7428 mip->mi_rx_donor_grp = defgrp; 7429 start = 1; 7430 end = mip->mi_rx_group_count; 7431 } else { 7432 start = 0; 7433 end = mip->mi_tx_group_count - 1; 7434 } 7435 /* 7436 * If the default doesn't have any rings, lets see if we can 7437 * take rings given to an h/w client that doesn't need it. 7438 * For now, we just see if there is any one client that can donate 7439 * all the required rings. 7440 */ 7441 if (defgrp->mrg_cur_count < (modify + 1)) { 7442 for (i = start; i < end; i++) { 7443 if (rx_group) { 7444 tgrp = &mip->mi_rx_groups[i]; 7445 if (tgrp == group || tgrp->mrg_state < 7446 MAC_GROUP_STATE_RESERVED) { 7447 continue; 7448 } 7449 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 7450 if (mcip == NULL) 7451 mcip = mac_get_grp_primary(tgrp); 7452 ASSERT(mcip != NULL); 7453 mrp = MCIP_RESOURCE_PROPS(mcip); 7454 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) 7455 continue; 7456 if ((tgrp->mrg_cur_count + 7457 defgrp->mrg_cur_count) < (modify + 1)) { 7458 continue; 7459 } 7460 if (mac_rx_switch_group(mcip, tgrp, 7461 defgrp) != 0) { 7462 return (ENOSPC); 7463 } 7464 } else { 7465 tgrp = &mip->mi_tx_groups[i]; 7466 if (tgrp == group || tgrp->mrg_state < 7467 MAC_GROUP_STATE_RESERVED) { 7468 continue; 7469 } 7470 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 7471 if (mcip == NULL) 7472 mcip = mac_get_grp_primary(tgrp); 7473 mrp = MCIP_RESOURCE_PROPS(mcip); 7474 if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) 7475 continue; 7476 if ((tgrp->mrg_cur_count + 7477 defgrp->mrg_cur_count) < (modify + 1)) { 7478 continue; 7479 } 7480 /* OK, we can switch this to s/w */ 7481 mac_tx_client_quiesce( 7482 (mac_client_handle_t)mcip); 7483 mac_tx_switch_group(mcip, tgrp, defgrp); 7484 mac_tx_client_restart( 7485 (mac_client_handle_t)mcip); 7486 } 7487 } 7488 if (defgrp->mrg_cur_count < (modify + 1)) 7489 return (ENOSPC); 7490 } 7491 if ((rv = i_mac_group_allocate_rings(mip, group->mrg_type, defgrp, 7492 group, mcip->mci_share, modify)) != 0) { 7493 return (rv); 7494 } 7495 return (0); 7496 } 7497 7498 /* 7499 * Given the poolname in mac_resource_props, find the cpupart 7500 * that is associated with this pool. The cpupart will be used 7501 * later for finding the cpus to be bound to the networking threads. 7502 * 7503 * use_default is set B_TRUE if pools are enabled and pool_default 7504 * is returned. This avoids a 2nd lookup to set the poolname 7505 * for pool-effective. 7506 * 7507 * returns: 7508 * 7509 * NULL - pools are disabled or if the 'cpus' property is set. 7510 * cpupart of pool_default - pools are enabled and the pool 7511 * is not available or poolname is blank 7512 * cpupart of named pool - pools are enabled and the pool 7513 * is available. 7514 */ 7515 cpupart_t * 7516 mac_pset_find(mac_resource_props_t *mrp, boolean_t *use_default) 7517 { 7518 pool_t *pool; 7519 cpupart_t *cpupart; 7520 7521 *use_default = B_FALSE; 7522 7523 /* CPUs property is set */ 7524 if (mrp->mrp_mask & MRP_CPUS) 7525 return (NULL); 7526 7527 ASSERT(pool_lock_held()); 7528 7529 /* Pools are disabled, no pset */ 7530 if (pool_state == POOL_DISABLED) 7531 return (NULL); 7532 7533 /* Pools property is set */ 7534 if (mrp->mrp_mask & MRP_POOL) { 7535 if ((pool = pool_lookup_pool_by_name(mrp->mrp_pool)) == NULL) { 7536 /* Pool not found */ 7537 DTRACE_PROBE1(mac_pset_find_no_pool, char *, 7538 mrp->mrp_pool); 7539 *use_default = B_TRUE; 7540 pool = pool_default; 7541 } 7542 /* Pools property is not set */ 7543 } else { 7544 *use_default = B_TRUE; 7545 pool = pool_default; 7546 } 7547 7548 /* Find the CPU pset that corresponds to the pool */ 7549 mutex_enter(&cpu_lock); 7550 if ((cpupart = cpupart_find(pool->pool_pset->pset_id)) == NULL) { 7551 DTRACE_PROBE1(mac_find_pset_no_pset, psetid_t, 7552 pool->pool_pset->pset_id); 7553 } 7554 mutex_exit(&cpu_lock); 7555 7556 return (cpupart); 7557 } 7558 7559 void 7560 mac_set_pool_effective(boolean_t use_default, cpupart_t *cpupart, 7561 mac_resource_props_t *mrp, mac_resource_props_t *emrp) 7562 { 7563 ASSERT(pool_lock_held()); 7564 7565 if (cpupart != NULL) { 7566 emrp->mrp_mask |= MRP_POOL; 7567 if (use_default) { 7568 (void) strcpy(emrp->mrp_pool, 7569 "pool_default"); 7570 } else { 7571 ASSERT(strlen(mrp->mrp_pool) != 0); 7572 (void) strcpy(emrp->mrp_pool, 7573 mrp->mrp_pool); 7574 } 7575 } else { 7576 emrp->mrp_mask &= ~MRP_POOL; 7577 bzero(emrp->mrp_pool, MAXPATHLEN); 7578 } 7579 } 7580 7581 struct mac_pool_arg { 7582 char mpa_poolname[MAXPATHLEN]; 7583 pool_event_t mpa_what; 7584 }; 7585 7586 /*ARGSUSED*/ 7587 static uint_t 7588 mac_pool_link_update(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 7589 { 7590 struct mac_pool_arg *mpa = arg; 7591 mac_impl_t *mip = (mac_impl_t *)val; 7592 mac_client_impl_t *mcip; 7593 mac_resource_props_t *mrp, *emrp; 7594 boolean_t pool_update = B_FALSE; 7595 boolean_t pool_clear = B_FALSE; 7596 boolean_t use_default = B_FALSE; 7597 cpupart_t *cpupart = NULL; 7598 7599 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 7600 i_mac_perim_enter(mip); 7601 for (mcip = mip->mi_clients_list; mcip != NULL; 7602 mcip = mcip->mci_client_next) { 7603 pool_update = B_FALSE; 7604 pool_clear = B_FALSE; 7605 use_default = B_FALSE; 7606 mac_client_get_resources((mac_client_handle_t)mcip, mrp); 7607 emrp = MCIP_EFFECTIVE_PROPS(mcip); 7608 7609 /* 7610 * When pools are enabled 7611 */ 7612 if ((mpa->mpa_what == POOL_E_ENABLE) && 7613 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 7614 mrp->mrp_mask |= MRP_POOL; 7615 pool_update = B_TRUE; 7616 } 7617 7618 /* 7619 * When pools are disabled 7620 */ 7621 if ((mpa->mpa_what == POOL_E_DISABLE) && 7622 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 7623 mrp->mrp_mask |= MRP_POOL; 7624 pool_clear = B_TRUE; 7625 } 7626 7627 /* 7628 * Look for links with the pool property set and the poolname 7629 * matching the one which is changing. 7630 */ 7631 if (strcmp(mrp->mrp_pool, mpa->mpa_poolname) == 0) { 7632 /* 7633 * The pool associated with the link has changed. 7634 */ 7635 if (mpa->mpa_what == POOL_E_CHANGE) { 7636 mrp->mrp_mask |= MRP_POOL; 7637 pool_update = B_TRUE; 7638 } 7639 } 7640 7641 /* 7642 * This link is associated with pool_default and 7643 * pool_default has changed. 7644 */ 7645 if ((mpa->mpa_what == POOL_E_CHANGE) && 7646 (strcmp(emrp->mrp_pool, "pool_default") == 0) && 7647 (strcmp(mpa->mpa_poolname, "pool_default") == 0)) { 7648 mrp->mrp_mask |= MRP_POOL; 7649 pool_update = B_TRUE; 7650 } 7651 7652 /* 7653 * Get new list of cpus for the pool, bind network 7654 * threads to new list of cpus and update resources. 7655 */ 7656 if (pool_update) { 7657 if (MCIP_DATAPATH_SETUP(mcip)) { 7658 pool_lock(); 7659 cpupart = mac_pset_find(mrp, &use_default); 7660 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 7661 mac_rx_deliver, mcip, NULL, cpupart); 7662 mac_set_pool_effective(use_default, cpupart, 7663 mrp, emrp); 7664 pool_unlock(); 7665 } 7666 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 7667 B_FALSE); 7668 } 7669 7670 /* 7671 * Clear the effective pool and bind network threads 7672 * to any available CPU. 7673 */ 7674 if (pool_clear) { 7675 if (MCIP_DATAPATH_SETUP(mcip)) { 7676 emrp->mrp_mask &= ~MRP_POOL; 7677 bzero(emrp->mrp_pool, MAXPATHLEN); 7678 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 7679 mac_rx_deliver, mcip, NULL, NULL); 7680 } 7681 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 7682 B_FALSE); 7683 } 7684 } 7685 i_mac_perim_exit(mip); 7686 kmem_free(mrp, sizeof (*mrp)); 7687 return (MH_WALK_CONTINUE); 7688 } 7689 7690 static void 7691 mac_pool_update(void *arg) 7692 { 7693 mod_hash_walk(i_mac_impl_hash, mac_pool_link_update, arg); 7694 kmem_free(arg, sizeof (struct mac_pool_arg)); 7695 } 7696 7697 /* 7698 * Callback function to be executed when a noteworthy pool event 7699 * takes place. 7700 */ 7701 /* ARGSUSED */ 7702 static void 7703 mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) 7704 { 7705 pool_t *pool; 7706 char *poolname = NULL; 7707 struct mac_pool_arg *mpa; 7708 7709 pool_lock(); 7710 mpa = kmem_zalloc(sizeof (struct mac_pool_arg), KM_SLEEP); 7711 7712 switch (what) { 7713 case POOL_E_ENABLE: 7714 case POOL_E_DISABLE: 7715 break; 7716 7717 case POOL_E_CHANGE: 7718 pool = pool_lookup_pool_by_id(id); 7719 if (pool == NULL) { 7720 kmem_free(mpa, sizeof (struct mac_pool_arg)); 7721 pool_unlock(); 7722 return; 7723 } 7724 pool_get_name(pool, &poolname); 7725 (void) strlcpy(mpa->mpa_poolname, poolname, 7726 sizeof (mpa->mpa_poolname)); 7727 break; 7728 7729 default: 7730 kmem_free(mpa, sizeof (struct mac_pool_arg)); 7731 pool_unlock(); 7732 return; 7733 } 7734 pool_unlock(); 7735 7736 mpa->mpa_what = what; 7737 7738 mac_pool_update(mpa); 7739 } 7740 7741 /* 7742 * Set effective rings property. This could be called from datapath_setup/ 7743 * datapath_teardown or set-linkprop. 7744 * If the group is reserved we just go ahead and set the effective rings. 7745 * Additionally, for TX this could mean the default group has lost/gained 7746 * some rings, so if the default group is reserved, we need to adjust the 7747 * effective rings for the default group clients. For RX, if we are working 7748 * with the non-default group, we just need * to reset the effective props 7749 * for the default group clients. 7750 */ 7751 void 7752 mac_set_rings_effective(mac_client_impl_t *mcip) 7753 { 7754 mac_impl_t *mip = mcip->mci_mip; 7755 mac_group_t *grp; 7756 mac_group_t *defgrp; 7757 flow_entry_t *flent = mcip->mci_flent; 7758 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); 7759 mac_grp_client_t *mgcp; 7760 mac_client_impl_t *gmcip; 7761 7762 grp = flent->fe_rx_ring_group; 7763 if (grp != NULL) { 7764 defgrp = MAC_DEFAULT_RX_GROUP(mip); 7765 /* 7766 * If we have reserved a group, set the effective rings 7767 * to the ring count in the group. 7768 */ 7769 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7770 emrp->mrp_mask |= MRP_RX_RINGS; 7771 emrp->mrp_nrxrings = grp->mrg_cur_count; 7772 } 7773 7774 /* 7775 * We go through the clients in the shared group and 7776 * reset the effective properties. It is possible this 7777 * might have already been done for some client (i.e. 7778 * if some client is being moved to a group that is 7779 * already shared). The case where the default group is 7780 * RESERVED is taken care of above (note in the RX side if 7781 * there is a non-default group, the default group is always 7782 * SHARED). 7783 */ 7784 if (grp != defgrp || grp->mrg_state == MAC_GROUP_STATE_SHARED) { 7785 if (grp->mrg_state == MAC_GROUP_STATE_SHARED) 7786 mgcp = grp->mrg_clients; 7787 else 7788 mgcp = defgrp->mrg_clients; 7789 while (mgcp != NULL) { 7790 gmcip = mgcp->mgc_client; 7791 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7792 if (emrp->mrp_mask & MRP_RX_RINGS) { 7793 emrp->mrp_mask &= ~MRP_RX_RINGS; 7794 emrp->mrp_nrxrings = 0; 7795 } 7796 mgcp = mgcp->mgc_next; 7797 } 7798 } 7799 } 7800 7801 /* Now the TX side */ 7802 grp = flent->fe_tx_ring_group; 7803 if (grp != NULL) { 7804 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7805 7806 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7807 emrp->mrp_mask |= MRP_TX_RINGS; 7808 emrp->mrp_ntxrings = grp->mrg_cur_count; 7809 } else if (grp->mrg_state == MAC_GROUP_STATE_SHARED) { 7810 mgcp = grp->mrg_clients; 7811 while (mgcp != NULL) { 7812 gmcip = mgcp->mgc_client; 7813 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7814 if (emrp->mrp_mask & MRP_TX_RINGS) { 7815 emrp->mrp_mask &= ~MRP_TX_RINGS; 7816 emrp->mrp_ntxrings = 0; 7817 } 7818 mgcp = mgcp->mgc_next; 7819 } 7820 } 7821 7822 /* 7823 * If the group is not the default group and the default 7824 * group is reserved, the ring count in the default group 7825 * might have changed, update it. 7826 */ 7827 if (grp != defgrp && 7828 defgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7829 gmcip = MAC_GROUP_ONLY_CLIENT(defgrp); 7830 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7831 emrp->mrp_ntxrings = defgrp->mrg_cur_count; 7832 } 7833 } 7834 emrp = MCIP_EFFECTIVE_PROPS(mcip); 7835 } 7836 7837 /* 7838 * Check if the primary is in the default group. If so, see if we 7839 * can give it a an exclusive group now that another client is 7840 * being configured. We take the primary out of the default group 7841 * because the multicast/broadcast packets for the all the clients 7842 * will land in the default ring in the default group which means 7843 * any client in the default group, even if it is the only on in 7844 * the group, will lose exclusive access to the rings, hence 7845 * polling. 7846 */ 7847 mac_client_impl_t * 7848 mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) 7849 { 7850 mac_impl_t *mip = mcip->mci_mip; 7851 mac_group_t *defgrp = MAC_DEFAULT_RX_GROUP(mip); 7852 flow_entry_t *flent = mcip->mci_flent; 7853 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7854 uint8_t *mac_addr; 7855 mac_group_t *ngrp; 7856 7857 /* 7858 * Check if the primary is in the default group, if not 7859 * or if it is explicitly configured to be in the default 7860 * group OR set the RX rings property, return. 7861 */ 7862 if (flent->fe_rx_ring_group != defgrp || mrp->mrp_mask & MRP_RX_RINGS) 7863 return (NULL); 7864 7865 /* 7866 * If the new client needs an exclusive group and we 7867 * don't have another for the primary, return. 7868 */ 7869 if (rxhw && mip->mi_rxhwclnt_avail < 2) 7870 return (NULL); 7871 7872 mac_addr = flent->fe_flow_desc.fd_dst_mac; 7873 /* 7874 * We call this when we are setting up the datapath for 7875 * the first non-primary. 7876 */ 7877 ASSERT(mip->mi_nactiveclients == 2); 7878 /* 7879 * OK, now we have the primary that needs to be relocated. 7880 */ 7881 ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); 7882 if (ngrp == NULL) 7883 return (NULL); 7884 if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { 7885 mac_stop_group(ngrp); 7886 return (NULL); 7887 } 7888 return (mcip); 7889 } 7890