1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * MAC Services Module 28 * 29 * The GLDv3 framework locking - The MAC layer 30 * -------------------------------------------- 31 * 32 * The MAC layer is central to the GLD framework and can provide the locking 33 * framework needed for itself and for the use of MAC clients. MAC end points 34 * are fairly disjoint and don't share a lot of state. So a coarse grained 35 * multi-threading scheme is to single thread all create/modify/delete or set 36 * type of control operations on a per mac end point while allowing data threads 37 * concurrently. 38 * 39 * Control operations (set) that modify a mac end point are always serialized on 40 * a per mac end point basis, We have at most 1 such thread per mac end point 41 * at a time. 42 * 43 * All other operations that are not serialized are essentially multi-threaded. 44 * For example a control operation (get) like getting statistics which may not 45 * care about reading values atomically or data threads sending or receiving 46 * data. Mostly these type of operations don't modify the control state. Any 47 * state these operations care about are protected using traditional locks. 48 * 49 * The perimeter only serializes serial operations. It does not imply there 50 * aren't any other concurrent operations. However a serialized operation may 51 * sometimes need to make sure it is the only thread. In this case it needs 52 * to use reference counting mechanisms to cv_wait until any current data 53 * threads are done. 54 * 55 * The mac layer itself does not hold any locks across a call to another layer. 56 * The perimeter is however held across a down call to the driver to make the 57 * whole control operation atomic with respect to other control operations. 58 * Also the data path and get type control operations may proceed concurrently. 59 * These operations synchronize with the single serial operation on a given mac 60 * end point using regular locks. The perimeter ensures that conflicting 61 * operations like say a mac_multicast_add and a mac_multicast_remove on the 62 * same mac end point don't interfere with each other and also ensures that the 63 * changes in the mac layer and the call to the underlying driver to say add a 64 * multicast address are done atomically without interference from a thread 65 * trying to delete the same address. 66 * 67 * For example, consider 68 * mac_multicst_add() 69 * { 70 * mac_perimeter_enter(); serialize all control operations 71 * 72 * grab list lock protect against access by data threads 73 * add to list 74 * drop list lock 75 * 76 * call driver's mi_multicst 77 * 78 * mac_perimeter_exit(); 79 * } 80 * 81 * To lessen the number of serialization locks and simplify the lock hierarchy, 82 * we serialize all the control operations on a per mac end point by using a 83 * single serialization lock called the perimeter. We allow recursive entry into 84 * the perimeter to facilitate use of this mechanism by both the mac client and 85 * the MAC layer itself. 86 * 87 * MAC client means an entity that does an operation on a mac handle 88 * obtained from a mac_open/mac_client_open. Similarly MAC driver means 89 * an entity that does an operation on a mac handle obtained from a 90 * mac_register. An entity could be both client and driver but on different 91 * handles eg. aggr. and should only make the corresponding mac interface calls 92 * i.e. mac driver interface or mac client interface as appropriate for that 93 * mac handle. 94 * 95 * General rules. 96 * ------------- 97 * 98 * R1. The lock order of upcall threads is natually opposite to downcall 99 * threads. Hence upcalls must not hold any locks across layers for fear of 100 * recursive lock enter and lock order violation. This applies to all layers. 101 * 102 * R2. The perimeter is just another lock. Since it is held in the down 103 * direction, acquiring the perimeter in an upcall is prohibited as it would 104 * cause a deadlock. This applies to all layers. 105 * 106 * Note that upcalls that need to grab the mac perimeter (for example 107 * mac_notify upcalls) can still achieve that by posting the request to a 108 * thread, which can then grab all the required perimeters and locks in the 109 * right global order. Note that in the above example the mac layer iself 110 * won't grab the mac perimeter in the mac_notify upcall, instead the upcall 111 * to the client must do that. Please see the aggr code for an example. 112 * 113 * MAC client rules 114 * ---------------- 115 * 116 * R3. A MAC client may use the MAC provided perimeter facility to serialize 117 * control operations on a per mac end point. It does this by by acquring 118 * and holding the perimeter across a sequence of calls to the mac layer. 119 * This ensures atomicity across the entire block of mac calls. In this 120 * model the MAC client must not hold any client locks across the calls to 121 * the mac layer. This model is the preferred solution. 122 * 123 * R4. However if a MAC client has a lot of global state across all mac end 124 * points the per mac end point serialization may not be sufficient. In this 125 * case the client may choose to use global locks or use its own serialization. 126 * To avoid deadlocks, these client layer locks held across the mac calls 127 * in the control path must never be acquired by the data path for the reason 128 * mentioned below. 129 * 130 * (Assume that a control operation that holds a client lock blocks in the 131 * mac layer waiting for upcall reference counts to drop to zero. If an upcall 132 * data thread that holds this reference count, tries to acquire the same 133 * client lock subsequently it will deadlock). 134 * 135 * A MAC client may follow either the R3 model or the R4 model, but can't 136 * mix both. In the former, the hierarchy is Perim -> client locks, but in 137 * the latter it is client locks -> Perim. 138 * 139 * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able 140 * context since they may block while trying to acquire the perimeter. 141 * In addition some calls may block waiting for upcall refcnts to come down to 142 * zero. 143 * 144 * R6. MAC clients must make sure that they are single threaded and all threads 145 * from the top (in particular data threads) have finished before calling 146 * mac_client_close. The MAC framework does not track the number of client 147 * threads using the mac client handle. Also mac clients must make sure 148 * they have undone all the control operations before calling mac_client_close. 149 * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding 150 * mac_unicast_add/mac_multicast_add. 151 * 152 * MAC framework rules 153 * ------------------- 154 * 155 * R7. The mac layer itself must not hold any mac layer locks (except the mac 156 * perimeter) across a call to any other layer from the mac layer. The call to 157 * any other layer could be via mi_* entry points, classifier entry points into 158 * the driver or via upcall pointers into layers above. The mac perimeter may 159 * be acquired or held only in the down direction, for e.g. when calling into 160 * a mi_* driver enty point to provide atomicity of the operation. 161 * 162 * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across 163 * mac driver interfaces, the MAC layer must provide a cut out for control 164 * interfaces like upcall notifications and start them in a separate thread. 165 * 166 * R9. Note that locking order also implies a plumbing order. For example 167 * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt 168 * to plumb in any other order must be failed at mac_open time, otherwise it 169 * could lead to deadlocks due to inverse locking order. 170 * 171 * R10. MAC driver interfaces must not block since the driver could call them 172 * in interrupt context. 173 * 174 * R11. Walkers must preferably not hold any locks while calling walker 175 * callbacks. Instead these can operate on reference counts. In simple 176 * callbacks it may be ok to hold a lock and call the callbacks, but this is 177 * harder to maintain in the general case of arbitrary callbacks. 178 * 179 * R12. The MAC layer must protect upcall notification callbacks using reference 180 * counts rather than holding locks across the callbacks. 181 * 182 * R13. Given the variety of drivers, it is preferable if the MAC layer can make 183 * sure that any pointers (such as mac ring pointers) it passes to the driver 184 * remain valid until mac unregister time. Currently the mac layer achieves 185 * this by using generation numbers for rings and freeing the mac rings only 186 * at unregister time. The MAC layer must provide a layer of indirection and 187 * must not expose underlying driver rings or driver data structures/pointers 188 * directly to MAC clients. 189 * 190 * MAC driver rules 191 * ---------------- 192 * 193 * R14. It would be preferable if MAC drivers don't hold any locks across any 194 * mac call. However at a minimum they must not hold any locks across data 195 * upcalls. They must also make sure that all references to mac data structures 196 * are cleaned up and that it is single threaded at mac_unregister time. 197 * 198 * R15. MAC driver interfaces don't block and so the action may be done 199 * asynchronously in a separate thread as for example handling notifications. 200 * The driver must not assume that the action is complete when the call 201 * returns. 202 * 203 * R16. Drivers must maintain a generation number per Rx ring, and pass it 204 * back to mac_rx_ring(); They are expected to increment the generation 205 * number whenever the ring's stop routine is invoked. 206 * See comments in mac_rx_ring(); 207 * 208 * R17 Similarly mi_stop is another synchronization point and the driver must 209 * ensure that all upcalls are done and there won't be any future upcall 210 * before returning from mi_stop. 211 * 212 * R18. The driver may assume that all set/modify control operations via 213 * the mi_* entry points are single threaded on a per mac end point. 214 * 215 * Lock and Perimeter hierarchy scenarios 216 * --------------------------------------- 217 * 218 * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] 219 * 220 * ft_lock -> fe_lock [mac_flow_lookup] 221 * 222 * mi_rw_lock -> fe_lock [mac_bcast_send] 223 * 224 * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] 225 * 226 * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] 227 * 228 * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] 229 * 230 * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac 231 * client to driver. In the case of clients that explictly use the mac provided 232 * perimeter mechanism for its serialization, the hierarchy is 233 * Perimeter -> mac layer locks, since the client never holds any locks across 234 * the mac calls. In the case of clients that use its own locks the hierarchy 235 * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly 236 * calls mac_perim_enter/exit in this case. 237 * 238 * Subflow creation rules 239 * --------------------------- 240 * o In case of a user specified cpulist present on underlying link and flows, 241 * the flows cpulist must be a subset of the underlying link. 242 * o In case of a user specified fanout mode present on link and flow, the 243 * subflow fanout count has to be less than or equal to that of the 244 * underlying link. The cpu-bindings for the subflows will be a subset of 245 * the underlying link. 246 * o In case if no cpulist specified on both underlying link and flow, the 247 * underlying link relies on a MAC tunable to provide out of box fanout. 248 * The subflow will have no cpulist (the subflow will be unbound) 249 * o In case if no cpulist is specified on the underlying link, a subflow can 250 * carry either a user-specified cpulist or fanout count. The cpu-bindings 251 * for the subflow will not adhere to restriction that they need to be subset 252 * of the underlying link. 253 * o In case where the underlying link is carrying either a user specified 254 * cpulist or fanout mode and for a unspecified subflow, the subflow will be 255 * created unbound. 256 * o While creating unbound subflows, bandwidth mode changes attempt to 257 * figure a right fanout count. In such cases the fanout count will override 258 * the unbound cpu-binding behavior. 259 * o In addition to this, while cycling between flow and link properties, we 260 * impose a restriction that if a link property has a subflow with 261 * user-specified attributes, we will not allow changing the link property. 262 * The administrator needs to reset all the user specified properties for the 263 * subflows before attempting a link property change. 264 * Some of the above rules can be overridden by specifying additional command 265 * line options while creating or modifying link or subflow properties. 266 */ 267 268 #include <sys/types.h> 269 #include <sys/conf.h> 270 #include <sys/id_space.h> 271 #include <sys/esunddi.h> 272 #include <sys/stat.h> 273 #include <sys/mkdev.h> 274 #include <sys/stream.h> 275 #include <sys/strsun.h> 276 #include <sys/strsubr.h> 277 #include <sys/dlpi.h> 278 #include <sys/modhash.h> 279 #include <sys/mac_provider.h> 280 #include <sys/mac_client_impl.h> 281 #include <sys/mac_soft_ring.h> 282 #include <sys/mac_stat.h> 283 #include <sys/mac_impl.h> 284 #include <sys/mac.h> 285 #include <sys/dls.h> 286 #include <sys/dld.h> 287 #include <sys/modctl.h> 288 #include <sys/fs/dv_node.h> 289 #include <sys/thread.h> 290 #include <sys/proc.h> 291 #include <sys/callb.h> 292 #include <sys/cpuvar.h> 293 #include <sys/atomic.h> 294 #include <sys/bitmap.h> 295 #include <sys/sdt.h> 296 #include <sys/mac_flow.h> 297 #include <sys/ddi_intr_impl.h> 298 #include <sys/disp.h> 299 #include <sys/sdt.h> 300 #include <sys/vnic.h> 301 #include <sys/vnic_impl.h> 302 #include <sys/vlan.h> 303 #include <inet/ip.h> 304 #include <inet/ip6.h> 305 #include <sys/exacct.h> 306 #include <sys/exacct_impl.h> 307 #include <inet/nd.h> 308 #include <sys/ethernet.h> 309 #include <sys/pool.h> 310 #include <sys/pool_pset.h> 311 #include <sys/cpupart.h> 312 #include <inet/wifi_ioctl.h> 313 #include <net/wpa.h> 314 315 #define IMPL_HASHSZ 67 /* prime */ 316 317 kmem_cache_t *i_mac_impl_cachep; 318 mod_hash_t *i_mac_impl_hash; 319 krwlock_t i_mac_impl_lock; 320 uint_t i_mac_impl_count; 321 static kmem_cache_t *mac_ring_cache; 322 static id_space_t *minor_ids; 323 static uint32_t minor_count; 324 static pool_event_cb_t mac_pool_event_reg; 325 326 /* 327 * Logging stuff. Perhaps mac_logging_interval could be broken into 328 * mac_flow_log_interval and mac_link_log_interval if we want to be 329 * able to schedule them differently. 330 */ 331 uint_t mac_logging_interval; 332 boolean_t mac_flow_log_enable; 333 boolean_t mac_link_log_enable; 334 timeout_id_t mac_logging_timer; 335 336 /* for debugging, see MAC_DBG_PRT() in mac_impl.h */ 337 int mac_dbg = 0; 338 339 #define MACTYPE_KMODDIR "mac" 340 #define MACTYPE_HASHSZ 67 341 static mod_hash_t *i_mactype_hash; 342 /* 343 * i_mactype_lock synchronizes threads that obtain references to mactype_t 344 * structures through i_mactype_getplugin(). 345 */ 346 static kmutex_t i_mactype_lock; 347 348 /* 349 * mac_tx_percpu_cnt 350 * 351 * Number of per cpu locks per mac_client_impl_t. Used by the transmit side 352 * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. 353 * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. 354 * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. 355 */ 356 int mac_tx_percpu_cnt; 357 int mac_tx_percpu_cnt_max = 128; 358 359 /* 360 * Call back functions for the bridge module. These are guaranteed to be valid 361 * when holding a reference on a link or when holding mip->mi_bridge_lock and 362 * mi_bridge_link is non-NULL. 363 */ 364 mac_bridge_tx_t mac_bridge_tx_cb; 365 mac_bridge_rx_t mac_bridge_rx_cb; 366 mac_bridge_ref_t mac_bridge_ref_cb; 367 mac_bridge_ls_t mac_bridge_ls_cb; 368 369 static int i_mac_constructor(void *, void *, int); 370 static void i_mac_destructor(void *, void *); 371 static int i_mac_ring_ctor(void *, void *, int); 372 static void i_mac_ring_dtor(void *, void *); 373 static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); 374 void mac_tx_client_flush(mac_client_impl_t *); 375 void mac_tx_client_block(mac_client_impl_t *); 376 static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); 377 static int mac_start_group_and_rings(mac_group_t *); 378 static void mac_stop_group_and_rings(mac_group_t *); 379 static void mac_pool_event_cb(pool_event_t, int, void *); 380 381 /* 382 * Module initialization functions. 383 */ 384 385 void 386 mac_init(void) 387 { 388 mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : 389 boot_max_ncpus); 390 391 /* Upper bound is mac_tx_percpu_cnt_max */ 392 if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) 393 mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; 394 395 if (mac_tx_percpu_cnt < 1) { 396 /* Someone set max_tx_percpu_cnt_max to 0 or less */ 397 mac_tx_percpu_cnt = 1; 398 } 399 400 ASSERT(mac_tx_percpu_cnt >= 1); 401 mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); 402 /* 403 * Make it of the form 2**N - 1 in the range 404 * [0 .. mac_tx_percpu_cnt_max - 1] 405 */ 406 mac_tx_percpu_cnt--; 407 408 i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", 409 sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, 410 NULL, NULL, NULL, 0); 411 ASSERT(i_mac_impl_cachep != NULL); 412 413 mac_ring_cache = kmem_cache_create("mac_ring_cache", 414 sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, 415 NULL, NULL, 0); 416 ASSERT(mac_ring_cache != NULL); 417 418 i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", 419 IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 420 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 421 rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); 422 423 mac_flow_init(); 424 mac_soft_ring_init(); 425 mac_bcast_init(); 426 mac_client_init(); 427 428 i_mac_impl_count = 0; 429 430 i_mactype_hash = mod_hash_create_extended("mactype_hash", 431 MACTYPE_HASHSZ, 432 mod_hash_null_keydtor, mod_hash_null_valdtor, 433 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 434 435 /* 436 * Allocate an id space to manage minor numbers. The range of the 437 * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1. This 438 * leaves half of the 32-bit minors available for driver private use. 439 */ 440 minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, 441 MAC_PRIVATE_MINOR-1); 442 ASSERT(minor_ids != NULL); 443 minor_count = 0; 444 445 /* Let's default to 20 seconds */ 446 mac_logging_interval = 20; 447 mac_flow_log_enable = B_FALSE; 448 mac_link_log_enable = B_FALSE; 449 mac_logging_timer = 0; 450 451 /* Register to be notified of noteworthy pools events */ 452 mac_pool_event_reg.pec_func = mac_pool_event_cb; 453 mac_pool_event_reg.pec_arg = NULL; 454 pool_event_cb_register(&mac_pool_event_reg); 455 } 456 457 int 458 mac_fini(void) 459 { 460 461 if (i_mac_impl_count > 0 || minor_count > 0) 462 return (EBUSY); 463 464 pool_event_cb_unregister(&mac_pool_event_reg); 465 466 id_space_destroy(minor_ids); 467 mac_flow_fini(); 468 469 mod_hash_destroy_hash(i_mac_impl_hash); 470 rw_destroy(&i_mac_impl_lock); 471 472 mac_client_fini(); 473 kmem_cache_destroy(mac_ring_cache); 474 475 mod_hash_destroy_hash(i_mactype_hash); 476 mac_soft_ring_finish(); 477 478 479 return (0); 480 } 481 482 /* 483 * Initialize a GLDv3 driver's device ops. A driver that manages its own ops 484 * (e.g. softmac) may pass in a NULL ops argument. 485 */ 486 void 487 mac_init_ops(struct dev_ops *ops, const char *name) 488 { 489 major_t major = ddi_name_to_major((char *)name); 490 491 /* 492 * By returning on error below, we are not letting the driver continue 493 * in an undefined context. The mac_register() function will faill if 494 * DN_GLDV3_DRIVER isn't set. 495 */ 496 if (major == DDI_MAJOR_T_NONE) 497 return; 498 LOCK_DEV_OPS(&devnamesp[major].dn_lock); 499 devnamesp[major].dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER); 500 UNLOCK_DEV_OPS(&devnamesp[major].dn_lock); 501 if (ops != NULL) 502 dld_init_ops(ops, name); 503 } 504 505 void 506 mac_fini_ops(struct dev_ops *ops) 507 { 508 dld_fini_ops(ops); 509 } 510 511 /*ARGSUSED*/ 512 static int 513 i_mac_constructor(void *buf, void *arg, int kmflag) 514 { 515 mac_impl_t *mip = buf; 516 517 bzero(buf, sizeof (mac_impl_t)); 518 519 mip->mi_linkstate = LINK_STATE_UNKNOWN; 520 521 rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); 522 mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); 523 mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); 524 mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); 525 526 mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; 527 cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 528 mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; 529 cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 530 531 mutex_init(&mip->mi_bridge_lock, NULL, MUTEX_DEFAULT, NULL); 532 533 return (0); 534 } 535 536 /*ARGSUSED*/ 537 static void 538 i_mac_destructor(void *buf, void *arg) 539 { 540 mac_impl_t *mip = buf; 541 mac_cb_info_t *mcbi; 542 543 ASSERT(mip->mi_ref == 0); 544 ASSERT(mip->mi_active == 0); 545 ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); 546 ASSERT(mip->mi_devpromisc == 0); 547 ASSERT(mip->mi_ksp == NULL); 548 ASSERT(mip->mi_kstat_count == 0); 549 ASSERT(mip->mi_nclients == 0); 550 ASSERT(mip->mi_nactiveclients == 0); 551 ASSERT(mip->mi_single_active_client == NULL); 552 ASSERT(mip->mi_state_flags == 0); 553 ASSERT(mip->mi_factory_addr == NULL); 554 ASSERT(mip->mi_factory_addr_num == 0); 555 ASSERT(mip->mi_default_tx_ring == NULL); 556 557 mcbi = &mip->mi_notify_cb_info; 558 ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); 559 ASSERT(mip->mi_notify_bits == 0); 560 ASSERT(mip->mi_notify_thread == NULL); 561 ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); 562 mcbi->mcbi_lockp = NULL; 563 564 mcbi = &mip->mi_promisc_cb_info; 565 ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); 566 ASSERT(mip->mi_promisc_list == NULL); 567 ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); 568 mcbi->mcbi_lockp = NULL; 569 570 ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); 571 ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); 572 573 rw_destroy(&mip->mi_rw_lock); 574 575 mutex_destroy(&mip->mi_promisc_lock); 576 cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); 577 mutex_destroy(&mip->mi_notify_lock); 578 cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); 579 mutex_destroy(&mip->mi_ring_lock); 580 581 ASSERT(mip->mi_bridge_link == NULL); 582 } 583 584 /* ARGSUSED */ 585 static int 586 i_mac_ring_ctor(void *buf, void *arg, int kmflag) 587 { 588 mac_ring_t *ring = (mac_ring_t *)buf; 589 590 bzero(ring, sizeof (mac_ring_t)); 591 cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); 592 mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); 593 ring->mr_state = MR_FREE; 594 return (0); 595 } 596 597 /* ARGSUSED */ 598 static void 599 i_mac_ring_dtor(void *buf, void *arg) 600 { 601 mac_ring_t *ring = (mac_ring_t *)buf; 602 603 cv_destroy(&ring->mr_cv); 604 mutex_destroy(&ring->mr_lock); 605 } 606 607 /* 608 * Common functions to do mac callback addition and deletion. Currently this is 609 * used by promisc callbacks and notify callbacks. List addition and deletion 610 * need to take care of list walkers. List walkers in general, can't hold list 611 * locks and make upcall callbacks due to potential lock order and recursive 612 * reentry issues. Instead list walkers increment the list walker count to mark 613 * the presence of a walker thread. Addition can be carefully done to ensure 614 * that the list walker always sees either the old list or the new list. 615 * However the deletion can't be done while the walker is active, instead the 616 * deleting thread simply marks the entry as logically deleted. The last walker 617 * physically deletes and frees up the logically deleted entries when the walk 618 * is complete. 619 */ 620 void 621 mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 622 mac_cb_t *mcb_elem) 623 { 624 mac_cb_t *p; 625 mac_cb_t **pp; 626 627 /* Verify it is not already in the list */ 628 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 629 if (p == mcb_elem) 630 break; 631 } 632 VERIFY(p == NULL); 633 634 /* 635 * Add it to the head of the callback list. The membar ensures that 636 * the following list pointer manipulations reach global visibility 637 * in exactly the program order below. 638 */ 639 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 640 641 mcb_elem->mcb_nextp = *mcb_head; 642 membar_producer(); 643 *mcb_head = mcb_elem; 644 } 645 646 /* 647 * Mark the entry as logically deleted. If there aren't any walkers unlink 648 * from the list. In either case return the corresponding status. 649 */ 650 boolean_t 651 mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 652 mac_cb_t *mcb_elem) 653 { 654 mac_cb_t *p; 655 mac_cb_t **pp; 656 657 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 658 /* 659 * Search the callback list for the entry to be removed 660 */ 661 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 662 if (p == mcb_elem) 663 break; 664 } 665 VERIFY(p != NULL); 666 667 /* 668 * If there are walkers just mark it as deleted and the last walker 669 * will remove from the list and free it. 670 */ 671 if (mcbi->mcbi_walker_cnt != 0) { 672 p->mcb_flags |= MCB_CONDEMNED; 673 mcbi->mcbi_del_cnt++; 674 return (B_FALSE); 675 } 676 677 ASSERT(mcbi->mcbi_del_cnt == 0); 678 *pp = p->mcb_nextp; 679 p->mcb_nextp = NULL; 680 return (B_TRUE); 681 } 682 683 /* 684 * Wait for all pending callback removals to be completed 685 */ 686 void 687 mac_callback_remove_wait(mac_cb_info_t *mcbi) 688 { 689 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 690 while (mcbi->mcbi_del_cnt != 0) { 691 DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); 692 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 693 } 694 } 695 696 /* 697 * The last mac callback walker does the cleanup. Walk the list and unlik 698 * all the logically deleted entries and construct a temporary list of 699 * removed entries. Return the list of removed entries to the caller. 700 */ 701 mac_cb_t * 702 mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) 703 { 704 mac_cb_t *p; 705 mac_cb_t **pp; 706 mac_cb_t *rmlist = NULL; /* List of removed elements */ 707 int cnt = 0; 708 709 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 710 ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); 711 712 pp = mcb_head; 713 while (*pp != NULL) { 714 if ((*pp)->mcb_flags & MCB_CONDEMNED) { 715 p = *pp; 716 *pp = p->mcb_nextp; 717 p->mcb_nextp = rmlist; 718 rmlist = p; 719 cnt++; 720 continue; 721 } 722 pp = &(*pp)->mcb_nextp; 723 } 724 725 ASSERT(mcbi->mcbi_del_cnt == cnt); 726 mcbi->mcbi_del_cnt = 0; 727 return (rmlist); 728 } 729 730 boolean_t 731 mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 732 { 733 mac_cb_t *mcb; 734 735 /* Verify it is not already in the list */ 736 for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { 737 if (mcb == mcb_elem) 738 return (B_TRUE); 739 } 740 741 return (B_FALSE); 742 } 743 744 boolean_t 745 mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 746 { 747 boolean_t found; 748 749 mutex_enter(mcbi->mcbi_lockp); 750 found = mac_callback_lookup(mcb_headp, mcb_elem); 751 mutex_exit(mcbi->mcbi_lockp); 752 753 return (found); 754 } 755 756 /* Free the list of removed callbacks */ 757 void 758 mac_callback_free(mac_cb_t *rmlist) 759 { 760 mac_cb_t *mcb; 761 mac_cb_t *mcb_next; 762 763 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 764 mcb_next = mcb->mcb_nextp; 765 kmem_free(mcb->mcb_objp, mcb->mcb_objsize); 766 } 767 } 768 769 /* 770 * The promisc callbacks are in 2 lists, one off the 'mip' and another off the 771 * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there 772 * is only a single shared total walker count, and an entry can't be physically 773 * unlinked if a walker is active on either list. The last walker does this 774 * cleanup of logically deleted entries. 775 */ 776 void 777 i_mac_promisc_walker_cleanup(mac_impl_t *mip) 778 { 779 mac_cb_t *rmlist; 780 mac_cb_t *mcb; 781 mac_cb_t *mcb_next; 782 mac_promisc_impl_t *mpip; 783 784 /* 785 * Construct a temporary list of deleted callbacks by walking the 786 * the mi_promisc_list. Then for each entry in the temporary list, 787 * remove it from the mci_promisc_list and free the entry. 788 */ 789 rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info, 790 &mip->mi_promisc_list); 791 792 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 793 mcb_next = mcb->mcb_nextp; 794 mpip = (mac_promisc_impl_t *)mcb->mcb_objp; 795 VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info, 796 &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link)); 797 mcb->mcb_flags = 0; 798 mcb->mcb_nextp = NULL; 799 kmem_cache_free(mac_promisc_impl_cache, mpip); 800 } 801 } 802 803 void 804 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) 805 { 806 mac_cb_info_t *mcbi; 807 808 /* 809 * Signal the notify thread even after mi_ref has become zero and 810 * mi_disabled is set. The synchronization with the notify thread 811 * happens in mac_unregister and that implies the driver must make 812 * sure it is single-threaded (with respect to mac calls) and that 813 * all pending mac calls have returned before it calls mac_unregister 814 */ 815 rw_enter(&i_mac_impl_lock, RW_READER); 816 if (mip->mi_state_flags & MIS_DISABLED) 817 goto exit; 818 819 /* 820 * Guard against incorrect notifications. (Running a newer 821 * mac client against an older implementation?) 822 */ 823 if (type >= MAC_NNOTE) 824 goto exit; 825 826 mcbi = &mip->mi_notify_cb_info; 827 mutex_enter(mcbi->mcbi_lockp); 828 mip->mi_notify_bits |= (1 << type); 829 cv_broadcast(&mcbi->mcbi_cv); 830 mutex_exit(mcbi->mcbi_lockp); 831 832 exit: 833 rw_exit(&i_mac_impl_lock); 834 } 835 836 /* 837 * Mac serialization primitives. Please see the block comment at the 838 * top of the file. 839 */ 840 void 841 i_mac_perim_enter(mac_impl_t *mip) 842 { 843 mac_client_impl_t *mcip; 844 845 if (mip->mi_state_flags & MIS_IS_VNIC) { 846 /* 847 * This is a VNIC. Return the lower mac since that is what 848 * we want to serialize on. 849 */ 850 mcip = mac_vnic_lower(mip); 851 mip = mcip->mci_mip; 852 } 853 854 mutex_enter(&mip->mi_perim_lock); 855 if (mip->mi_perim_owner == curthread) { 856 mip->mi_perim_ocnt++; 857 mutex_exit(&mip->mi_perim_lock); 858 return; 859 } 860 861 while (mip->mi_perim_owner != NULL) 862 cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); 863 864 mip->mi_perim_owner = curthread; 865 ASSERT(mip->mi_perim_ocnt == 0); 866 mip->mi_perim_ocnt++; 867 #ifdef DEBUG 868 mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, 869 MAC_PERIM_STACK_DEPTH); 870 #endif 871 mutex_exit(&mip->mi_perim_lock); 872 } 873 874 int 875 i_mac_perim_enter_nowait(mac_impl_t *mip) 876 { 877 /* 878 * The vnic is a special case, since the serialization is done based 879 * on the lower mac. If the lower mac is busy, it does not imply the 880 * vnic can't be unregistered. But in the case of other drivers, 881 * a busy perimeter or open mac handles implies that the mac is busy 882 * and can't be unregistered. 883 */ 884 if (mip->mi_state_flags & MIS_IS_VNIC) { 885 i_mac_perim_enter(mip); 886 return (0); 887 } 888 889 mutex_enter(&mip->mi_perim_lock); 890 if (mip->mi_perim_owner != NULL) { 891 mutex_exit(&mip->mi_perim_lock); 892 return (EBUSY); 893 } 894 ASSERT(mip->mi_perim_ocnt == 0); 895 mip->mi_perim_owner = curthread; 896 mip->mi_perim_ocnt++; 897 mutex_exit(&mip->mi_perim_lock); 898 899 return (0); 900 } 901 902 void 903 i_mac_perim_exit(mac_impl_t *mip) 904 { 905 mac_client_impl_t *mcip; 906 907 if (mip->mi_state_flags & MIS_IS_VNIC) { 908 /* 909 * This is a VNIC. Return the lower mac since that is what 910 * we want to serialize on. 911 */ 912 mcip = mac_vnic_lower(mip); 913 mip = mcip->mci_mip; 914 } 915 916 ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); 917 918 mutex_enter(&mip->mi_perim_lock); 919 if (--mip->mi_perim_ocnt == 0) { 920 mip->mi_perim_owner = NULL; 921 cv_signal(&mip->mi_perim_cv); 922 } 923 mutex_exit(&mip->mi_perim_lock); 924 } 925 926 /* 927 * Returns whether the current thread holds the mac perimeter. Used in making 928 * assertions. 929 */ 930 boolean_t 931 mac_perim_held(mac_handle_t mh) 932 { 933 mac_impl_t *mip = (mac_impl_t *)mh; 934 mac_client_impl_t *mcip; 935 936 if (mip->mi_state_flags & MIS_IS_VNIC) { 937 /* 938 * This is a VNIC. Return the lower mac since that is what 939 * we want to serialize on. 940 */ 941 mcip = mac_vnic_lower(mip); 942 mip = mcip->mci_mip; 943 } 944 return (mip->mi_perim_owner == curthread); 945 } 946 947 /* 948 * mac client interfaces to enter the mac perimeter of a mac end point, given 949 * its mac handle, or macname or linkid. 950 */ 951 void 952 mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) 953 { 954 mac_impl_t *mip = (mac_impl_t *)mh; 955 956 i_mac_perim_enter(mip); 957 /* 958 * The mac_perim_handle_t returned encodes the 'mip' and whether a 959 * mac_open has been done internally while entering the perimeter. 960 * This information is used in mac_perim_exit 961 */ 962 MAC_ENCODE_MPH(*mphp, mip, 0); 963 } 964 965 int 966 mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) 967 { 968 int err; 969 mac_handle_t mh; 970 971 if ((err = mac_open(name, &mh)) != 0) 972 return (err); 973 974 mac_perim_enter_by_mh(mh, mphp); 975 MAC_ENCODE_MPH(*mphp, mh, 1); 976 return (0); 977 } 978 979 int 980 mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) 981 { 982 int err; 983 mac_handle_t mh; 984 985 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 986 return (err); 987 988 mac_perim_enter_by_mh(mh, mphp); 989 MAC_ENCODE_MPH(*mphp, mh, 1); 990 return (0); 991 } 992 993 void 994 mac_perim_exit(mac_perim_handle_t mph) 995 { 996 mac_impl_t *mip; 997 boolean_t need_close; 998 999 MAC_DECODE_MPH(mph, mip, need_close); 1000 i_mac_perim_exit(mip); 1001 if (need_close) 1002 mac_close((mac_handle_t)mip); 1003 } 1004 1005 int 1006 mac_hold(const char *macname, mac_impl_t **pmip) 1007 { 1008 mac_impl_t *mip; 1009 int err; 1010 1011 /* 1012 * Check the device name length to make sure it won't overflow our 1013 * buffer. 1014 */ 1015 if (strlen(macname) >= MAXNAMELEN) 1016 return (EINVAL); 1017 1018 /* 1019 * Look up its entry in the global hash table. 1020 */ 1021 rw_enter(&i_mac_impl_lock, RW_WRITER); 1022 err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, 1023 (mod_hash_val_t *)&mip); 1024 1025 if (err != 0) { 1026 rw_exit(&i_mac_impl_lock); 1027 return (ENOENT); 1028 } 1029 1030 if (mip->mi_state_flags & MIS_DISABLED) { 1031 rw_exit(&i_mac_impl_lock); 1032 return (ENOENT); 1033 } 1034 1035 if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { 1036 rw_exit(&i_mac_impl_lock); 1037 return (EBUSY); 1038 } 1039 1040 mip->mi_ref++; 1041 rw_exit(&i_mac_impl_lock); 1042 1043 *pmip = mip; 1044 return (0); 1045 } 1046 1047 void 1048 mac_rele(mac_impl_t *mip) 1049 { 1050 rw_enter(&i_mac_impl_lock, RW_WRITER); 1051 ASSERT(mip->mi_ref != 0); 1052 if (--mip->mi_ref == 0) { 1053 ASSERT(mip->mi_nactiveclients == 0 && 1054 !(mip->mi_state_flags & MIS_EXCLUSIVE)); 1055 } 1056 rw_exit(&i_mac_impl_lock); 1057 } 1058 1059 /* 1060 * Private GLDv3 function to start a MAC instance. 1061 */ 1062 int 1063 mac_start(mac_handle_t mh) 1064 { 1065 mac_impl_t *mip = (mac_impl_t *)mh; 1066 int err = 0; 1067 mac_group_t *defgrp; 1068 1069 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1070 ASSERT(mip->mi_start != NULL); 1071 1072 /* 1073 * Check whether the device is already started. 1074 */ 1075 if (mip->mi_active++ == 0) { 1076 mac_ring_t *ring = NULL; 1077 1078 /* 1079 * Start the device. 1080 */ 1081 err = mip->mi_start(mip->mi_driver); 1082 if (err != 0) { 1083 mip->mi_active--; 1084 return (err); 1085 } 1086 1087 /* 1088 * Start the default tx ring. 1089 */ 1090 if (mip->mi_default_tx_ring != NULL) { 1091 1092 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1093 if (ring->mr_state != MR_INUSE) { 1094 err = mac_start_ring(ring); 1095 if (err != 0) { 1096 mip->mi_active--; 1097 return (err); 1098 } 1099 } 1100 } 1101 1102 if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1103 /* 1104 * Start the default ring, since it will be needed 1105 * to receive broadcast and multicast traffic for 1106 * both primary and non-primary MAC clients. 1107 */ 1108 ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); 1109 err = mac_start_group_and_rings(defgrp); 1110 if (err != 0) { 1111 mip->mi_active--; 1112 if ((ring != NULL) && 1113 (ring->mr_state == MR_INUSE)) 1114 mac_stop_ring(ring); 1115 return (err); 1116 } 1117 mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); 1118 } 1119 } 1120 1121 return (err); 1122 } 1123 1124 /* 1125 * Private GLDv3 function to stop a MAC instance. 1126 */ 1127 void 1128 mac_stop(mac_handle_t mh) 1129 { 1130 mac_impl_t *mip = (mac_impl_t *)mh; 1131 mac_group_t *grp; 1132 1133 ASSERT(mip->mi_stop != NULL); 1134 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1135 1136 /* 1137 * Check whether the device is still needed. 1138 */ 1139 ASSERT(mip->mi_active != 0); 1140 if (--mip->mi_active == 0) { 1141 if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1142 /* 1143 * There should be no more active clients since the 1144 * MAC is being stopped. Stop the default RX group 1145 * and transition it back to registered state. 1146 * 1147 * When clients are torn down, the groups 1148 * are release via mac_release_rx_group which 1149 * knows the the default group is always in 1150 * started mode since broadcast uses it. So 1151 * we can assert that their are no clients 1152 * (since mac_bcast_add doesn't register itself 1153 * as a client) and group is in SHARED state. 1154 */ 1155 ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); 1156 ASSERT(MAC_GROUP_NO_CLIENT(grp) && 1157 mip->mi_nactiveclients == 0); 1158 mac_stop_group_and_rings(grp); 1159 mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); 1160 } 1161 1162 if (mip->mi_default_tx_ring != NULL) { 1163 mac_ring_t *ring; 1164 1165 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1166 if (ring->mr_state == MR_INUSE) { 1167 mac_stop_ring(ring); 1168 ring->mr_flag = 0; 1169 } 1170 } 1171 1172 /* 1173 * Stop the device. 1174 */ 1175 mip->mi_stop(mip->mi_driver); 1176 } 1177 } 1178 1179 int 1180 i_mac_promisc_set(mac_impl_t *mip, boolean_t on) 1181 { 1182 int err = 0; 1183 1184 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1185 ASSERT(mip->mi_setpromisc != NULL); 1186 1187 if (on) { 1188 /* 1189 * Enable promiscuous mode on the device if not yet enabled. 1190 */ 1191 if (mip->mi_devpromisc++ == 0) { 1192 err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); 1193 if (err != 0) { 1194 mip->mi_devpromisc--; 1195 return (err); 1196 } 1197 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1198 } 1199 } else { 1200 if (mip->mi_devpromisc == 0) 1201 return (EPROTO); 1202 1203 /* 1204 * Disable promiscuous mode on the device if this is the last 1205 * enabling. 1206 */ 1207 if (--mip->mi_devpromisc == 0) { 1208 err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); 1209 if (err != 0) { 1210 mip->mi_devpromisc++; 1211 return (err); 1212 } 1213 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1214 } 1215 } 1216 1217 return (0); 1218 } 1219 1220 /* 1221 * The promiscuity state can change any time. If the caller needs to take 1222 * actions that are atomic with the promiscuity state, then the caller needs 1223 * to bracket the entire sequence with mac_perim_enter/exit 1224 */ 1225 boolean_t 1226 mac_promisc_get(mac_handle_t mh) 1227 { 1228 mac_impl_t *mip = (mac_impl_t *)mh; 1229 1230 /* 1231 * Return the current promiscuity. 1232 */ 1233 return (mip->mi_devpromisc != 0); 1234 } 1235 1236 /* 1237 * Invoked at MAC instance attach time to initialize the list 1238 * of factory MAC addresses supported by a MAC instance. This function 1239 * builds a local cache in the mac_impl_t for the MAC addresses 1240 * supported by the underlying hardware. The MAC clients themselves 1241 * use the mac_addr_factory*() functions to query and reserve 1242 * factory MAC addresses. 1243 */ 1244 void 1245 mac_addr_factory_init(mac_impl_t *mip) 1246 { 1247 mac_capab_multifactaddr_t capab; 1248 uint8_t *addr; 1249 int i; 1250 1251 /* 1252 * First round to see how many factory MAC addresses are available. 1253 */ 1254 bzero(&capab, sizeof (capab)); 1255 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, 1256 &capab) || (capab.mcm_naddr == 0)) { 1257 /* 1258 * The MAC instance doesn't support multiple factory 1259 * MAC addresses, we're done here. 1260 */ 1261 return; 1262 } 1263 1264 /* 1265 * Allocate the space and get all the factory addresses. 1266 */ 1267 addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); 1268 capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); 1269 1270 mip->mi_factory_addr_num = capab.mcm_naddr; 1271 mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * 1272 sizeof (mac_factory_addr_t), KM_SLEEP); 1273 1274 for (i = 0; i < capab.mcm_naddr; i++) { 1275 bcopy(addr + i * MAXMACADDRLEN, 1276 mip->mi_factory_addr[i].mfa_addr, 1277 mip->mi_type->mt_addr_length); 1278 mip->mi_factory_addr[i].mfa_in_use = B_FALSE; 1279 } 1280 1281 kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); 1282 } 1283 1284 void 1285 mac_addr_factory_fini(mac_impl_t *mip) 1286 { 1287 if (mip->mi_factory_addr == NULL) { 1288 ASSERT(mip->mi_factory_addr_num == 0); 1289 return; 1290 } 1291 1292 kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * 1293 sizeof (mac_factory_addr_t)); 1294 1295 mip->mi_factory_addr = NULL; 1296 mip->mi_factory_addr_num = 0; 1297 } 1298 1299 /* 1300 * Reserve a factory MAC address. If *slot is set to -1, the function 1301 * attempts to reserve any of the available factory MAC addresses and 1302 * returns the reserved slot id. If no slots are available, the function 1303 * returns ENOSPC. If *slot is not set to -1, the function reserves 1304 * the specified slot if it is available, or returns EBUSY is the slot 1305 * is already used. Returns ENOTSUP if the underlying MAC does not 1306 * support multiple factory addresses. If the slot number is not -1 but 1307 * is invalid, returns EINVAL. 1308 */ 1309 int 1310 mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) 1311 { 1312 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1313 mac_impl_t *mip = mcip->mci_mip; 1314 int i, ret = 0; 1315 1316 i_mac_perim_enter(mip); 1317 /* 1318 * Protect against concurrent readers that may need a self-consistent 1319 * view of the factory addresses 1320 */ 1321 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1322 1323 if (mip->mi_factory_addr_num == 0) { 1324 ret = ENOTSUP; 1325 goto bail; 1326 } 1327 1328 if (*slot != -1) { 1329 /* check the specified slot */ 1330 if (*slot < 1 || *slot > mip->mi_factory_addr_num) { 1331 ret = EINVAL; 1332 goto bail; 1333 } 1334 if (mip->mi_factory_addr[*slot-1].mfa_in_use) { 1335 ret = EBUSY; 1336 goto bail; 1337 } 1338 } else { 1339 /* pick the next available slot */ 1340 for (i = 0; i < mip->mi_factory_addr_num; i++) { 1341 if (!mip->mi_factory_addr[i].mfa_in_use) 1342 break; 1343 } 1344 1345 if (i == mip->mi_factory_addr_num) { 1346 ret = ENOSPC; 1347 goto bail; 1348 } 1349 *slot = i+1; 1350 } 1351 1352 mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; 1353 mip->mi_factory_addr[*slot-1].mfa_client = mcip; 1354 1355 bail: 1356 rw_exit(&mip->mi_rw_lock); 1357 i_mac_perim_exit(mip); 1358 return (ret); 1359 } 1360 1361 /* 1362 * Release the specified factory MAC address slot. 1363 */ 1364 void 1365 mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) 1366 { 1367 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1368 mac_impl_t *mip = mcip->mci_mip; 1369 1370 i_mac_perim_enter(mip); 1371 /* 1372 * Protect against concurrent readers that may need a self-consistent 1373 * view of the factory addresses 1374 */ 1375 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1376 1377 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1378 ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); 1379 1380 mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; 1381 1382 rw_exit(&mip->mi_rw_lock); 1383 i_mac_perim_exit(mip); 1384 } 1385 1386 /* 1387 * Stores in mac_addr the value of the specified MAC address. Returns 1388 * 0 on success, or EINVAL if the slot number is not valid for the MAC. 1389 * The caller must provide a string of at least MAXNAMELEN bytes. 1390 */ 1391 void 1392 mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, 1393 uint_t *addr_len, char *client_name, boolean_t *in_use_arg) 1394 { 1395 mac_impl_t *mip = (mac_impl_t *)mh; 1396 boolean_t in_use; 1397 1398 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1399 1400 /* 1401 * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter 1402 * and mi_rw_lock 1403 */ 1404 rw_enter(&mip->mi_rw_lock, RW_READER); 1405 bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); 1406 *addr_len = mip->mi_type->mt_addr_length; 1407 in_use = mip->mi_factory_addr[slot-1].mfa_in_use; 1408 if (in_use && client_name != NULL) { 1409 bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, 1410 client_name, MAXNAMELEN); 1411 } 1412 if (in_use_arg != NULL) 1413 *in_use_arg = in_use; 1414 rw_exit(&mip->mi_rw_lock); 1415 } 1416 1417 /* 1418 * Returns the number of factory MAC addresses (in addition to the 1419 * primary MAC address), 0 if the underlying MAC doesn't support 1420 * that feature. 1421 */ 1422 uint_t 1423 mac_addr_factory_num(mac_handle_t mh) 1424 { 1425 mac_impl_t *mip = (mac_impl_t *)mh; 1426 1427 return (mip->mi_factory_addr_num); 1428 } 1429 1430 1431 void 1432 mac_rx_group_unmark(mac_group_t *grp, uint_t flag) 1433 { 1434 mac_ring_t *ring; 1435 1436 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) 1437 ring->mr_flag &= ~flag; 1438 } 1439 1440 /* 1441 * The following mac_hwrings_xxx() functions are private mac client functions 1442 * used by the aggr driver to access and control the underlying HW Rx group 1443 * and rings. In this case, the aggr driver has exclusive control of the 1444 * underlying HW Rx group/rings, it calls the following functions to 1445 * start/stop the HW Rx rings, disable/enable polling, add/remove mac' 1446 * addresses, or set up the Rx callback. 1447 */ 1448 /* ARGSUSED */ 1449 static void 1450 mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, 1451 mblk_t *mp_chain, boolean_t loopback) 1452 { 1453 mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; 1454 mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; 1455 mac_direct_rx_t proc; 1456 void *arg1; 1457 mac_resource_handle_t arg2; 1458 1459 proc = srs_rx->sr_func; 1460 arg1 = srs_rx->sr_arg1; 1461 arg2 = mac_srs->srs_mrh; 1462 1463 proc(arg1, arg2, mp_chain, NULL); 1464 } 1465 1466 /* 1467 * This function is called to get the list of HW rings that are reserved by 1468 * an exclusive mac client. 1469 * 1470 * Return value: the number of HW rings. 1471 */ 1472 int 1473 mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, 1474 mac_ring_handle_t *hwrh, mac_ring_type_t rtype) 1475 { 1476 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1477 flow_entry_t *flent = mcip->mci_flent; 1478 mac_group_t *grp; 1479 mac_ring_t *ring; 1480 int cnt = 0; 1481 1482 if (rtype == MAC_RING_TYPE_RX) { 1483 grp = flent->fe_rx_ring_group; 1484 } else if (rtype == MAC_RING_TYPE_TX) { 1485 grp = flent->fe_tx_ring_group; 1486 } else { 1487 ASSERT(B_FALSE); 1488 return (-1); 1489 } 1490 /* 1491 * The mac client did not reserve any RX group, return directly. 1492 * This is probably because the underlying MAC does not support 1493 * any groups. 1494 */ 1495 if (hwgh != NULL) 1496 *hwgh = NULL; 1497 if (grp == NULL) 1498 return (0); 1499 /* 1500 * This group must be reserved by this mac client. 1501 */ 1502 ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && 1503 (mcip == MAC_GROUP_ONLY_CLIENT(grp))); 1504 1505 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { 1506 ASSERT(cnt < MAX_RINGS_PER_GROUP); 1507 hwrh[cnt] = (mac_ring_handle_t)ring; 1508 } 1509 if (hwgh != NULL) 1510 *hwgh = (mac_group_handle_t)grp; 1511 1512 return (cnt); 1513 } 1514 1515 /* 1516 * This function is called to get info about Tx/Rx rings. 1517 * 1518 * Return value: returns uint_t which will have various bits set 1519 * that indicates different properties of the ring. 1520 */ 1521 uint_t 1522 mac_hwring_getinfo(mac_ring_handle_t rh) 1523 { 1524 mac_ring_t *ring = (mac_ring_t *)rh; 1525 mac_ring_info_t *info = &ring->mr_info; 1526 1527 return (info->mri_flags); 1528 } 1529 1530 /* 1531 * Export ddi interrupt handles from the HW ring to the pseudo ring and 1532 * setup the RX callback of the mac client which exclusively controls 1533 * HW ring. 1534 */ 1535 void 1536 mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, 1537 mac_ring_handle_t pseudo_rh) 1538 { 1539 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1540 mac_ring_t *pseudo_ring; 1541 mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 1542 1543 if (pseudo_rh != NULL) { 1544 pseudo_ring = (mac_ring_t *)pseudo_rh; 1545 /* Export the ddi handles to pseudo ring */ 1546 pseudo_ring->mr_info.mri_intr.mi_ddi_handle = 1547 hw_ring->mr_info.mri_intr.mi_ddi_handle; 1548 pseudo_ring->mr_info.mri_intr.mi_ddi_shared = 1549 hw_ring->mr_info.mri_intr.mi_ddi_shared; 1550 /* 1551 * Save a pointer to pseudo ring in the hw ring. If 1552 * interrupt handle changes, the hw ring will be 1553 * notified of the change (see mac_ring_intr_set()) 1554 * and the appropriate change has to be made to 1555 * the pseudo ring that has exported the ddi handle. 1556 */ 1557 hw_ring->mr_prh = pseudo_rh; 1558 } 1559 1560 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1561 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1562 mac_srs->srs_mrh = prh; 1563 mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; 1564 } 1565 } 1566 1567 void 1568 mac_hwring_teardown(mac_ring_handle_t hwrh) 1569 { 1570 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1571 mac_soft_ring_set_t *mac_srs; 1572 1573 if (hw_ring == NULL) 1574 return; 1575 hw_ring->mr_prh = NULL; 1576 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1577 mac_srs = hw_ring->mr_srs; 1578 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1579 mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; 1580 mac_srs->srs_mrh = NULL; 1581 } 1582 } 1583 1584 int 1585 mac_hwring_disable_intr(mac_ring_handle_t rh) 1586 { 1587 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1588 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1589 1590 return (intr->mi_disable(intr->mi_handle)); 1591 } 1592 1593 int 1594 mac_hwring_enable_intr(mac_ring_handle_t rh) 1595 { 1596 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1597 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1598 1599 return (intr->mi_enable(intr->mi_handle)); 1600 } 1601 1602 int 1603 mac_hwring_start(mac_ring_handle_t rh) 1604 { 1605 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1606 1607 MAC_RING_UNMARK(rr_ring, MR_QUIESCE); 1608 return (0); 1609 } 1610 1611 void 1612 mac_hwring_stop(mac_ring_handle_t rh) 1613 { 1614 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1615 1616 mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); 1617 } 1618 1619 mblk_t * 1620 mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) 1621 { 1622 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1623 mac_ring_info_t *info = &rr_ring->mr_info; 1624 1625 return (info->mri_poll(info->mri_driver, bytes_to_pickup)); 1626 } 1627 1628 /* 1629 * Send packets through a selected tx ring. 1630 */ 1631 mblk_t * 1632 mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) 1633 { 1634 mac_ring_t *ring = (mac_ring_t *)rh; 1635 mac_ring_info_t *info = &ring->mr_info; 1636 1637 ASSERT(ring->mr_type == MAC_RING_TYPE_TX && 1638 ring->mr_state >= MR_INUSE); 1639 return (info->mri_tx(info->mri_driver, mp)); 1640 } 1641 1642 /* 1643 * Query stats for a particular rx/tx ring 1644 */ 1645 int 1646 mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) 1647 { 1648 mac_ring_t *ring = (mac_ring_t *)rh; 1649 mac_ring_info_t *info = &ring->mr_info; 1650 1651 return (info->mri_stat(info->mri_driver, stat, val)); 1652 } 1653 1654 /* 1655 * Private function that is only used by aggr to send packets through 1656 * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports 1657 * that does not expose Tx rings, aggr_ring_tx() entry point needs 1658 * access to mac_impl_t to send packets through m_tx() entry point. 1659 * It accomplishes this by calling mac_hwring_send_priv() function. 1660 */ 1661 mblk_t * 1662 mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) 1663 { 1664 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1665 mac_impl_t *mip = mcip->mci_mip; 1666 1667 MAC_TX(mip, rh, mp, mcip); 1668 return (mp); 1669 } 1670 1671 int 1672 mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) 1673 { 1674 mac_group_t *group = (mac_group_t *)gh; 1675 1676 return (mac_group_addmac(group, addr)); 1677 } 1678 1679 int 1680 mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) 1681 { 1682 mac_group_t *group = (mac_group_t *)gh; 1683 1684 return (mac_group_remmac(group, addr)); 1685 } 1686 1687 /* 1688 * Set the RX group to be shared/reserved. Note that the group must be 1689 * started/stopped outside of this function. 1690 */ 1691 void 1692 mac_set_group_state(mac_group_t *grp, mac_group_state_t state) 1693 { 1694 /* 1695 * If there is no change in the group state, just return. 1696 */ 1697 if (grp->mrg_state == state) 1698 return; 1699 1700 switch (state) { 1701 case MAC_GROUP_STATE_RESERVED: 1702 /* 1703 * Successfully reserved the group. 1704 * 1705 * Given that there is an exclusive client controlling this 1706 * group, we enable the group level polling when available, 1707 * so that SRSs get to turn on/off individual rings they's 1708 * assigned to. 1709 */ 1710 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 1711 1712 if (grp->mrg_type == MAC_RING_TYPE_RX && 1713 GROUP_INTR_DISABLE_FUNC(grp) != NULL) { 1714 GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 1715 } 1716 break; 1717 1718 case MAC_GROUP_STATE_SHARED: 1719 /* 1720 * Set all rings of this group to software classified. 1721 * If the group has an overriding interrupt, then re-enable it. 1722 */ 1723 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 1724 1725 if (grp->mrg_type == MAC_RING_TYPE_RX && 1726 GROUP_INTR_ENABLE_FUNC(grp) != NULL) { 1727 GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 1728 } 1729 /* The ring is not available for reservations any more */ 1730 break; 1731 1732 case MAC_GROUP_STATE_REGISTERED: 1733 /* Also callable from mac_register, perim is not held */ 1734 break; 1735 1736 default: 1737 ASSERT(B_FALSE); 1738 break; 1739 } 1740 1741 grp->mrg_state = state; 1742 } 1743 1744 /* 1745 * Quiesce future hardware classified packets for the specified Rx ring 1746 */ 1747 static void 1748 mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) 1749 { 1750 ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); 1751 ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); 1752 1753 mutex_enter(&rx_ring->mr_lock); 1754 rx_ring->mr_flag |= ring_flag; 1755 while (rx_ring->mr_refcnt != 0) 1756 cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); 1757 mutex_exit(&rx_ring->mr_lock); 1758 } 1759 1760 /* 1761 * Please see mac_tx for details about the per cpu locking scheme 1762 */ 1763 static void 1764 mac_tx_lock_all(mac_client_impl_t *mcip) 1765 { 1766 int i; 1767 1768 for (i = 0; i <= mac_tx_percpu_cnt; i++) 1769 mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1770 } 1771 1772 static void 1773 mac_tx_unlock_all(mac_client_impl_t *mcip) 1774 { 1775 int i; 1776 1777 for (i = mac_tx_percpu_cnt; i >= 0; i--) 1778 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1779 } 1780 1781 static void 1782 mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) 1783 { 1784 int i; 1785 1786 for (i = mac_tx_percpu_cnt; i > 0; i--) 1787 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1788 } 1789 1790 static int 1791 mac_tx_sum_refcnt(mac_client_impl_t *mcip) 1792 { 1793 int i; 1794 int refcnt = 0; 1795 1796 for (i = 0; i <= mac_tx_percpu_cnt; i++) 1797 refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; 1798 1799 return (refcnt); 1800 } 1801 1802 /* 1803 * Stop future Tx packets coming down from the client in preparation for 1804 * quiescing the Tx side. This is needed for dynamic reclaim and reassignment 1805 * of rings between clients 1806 */ 1807 void 1808 mac_tx_client_block(mac_client_impl_t *mcip) 1809 { 1810 mac_tx_lock_all(mcip); 1811 mcip->mci_tx_flag |= MCI_TX_QUIESCE; 1812 while (mac_tx_sum_refcnt(mcip) != 0) { 1813 mac_tx_unlock_allbutzero(mcip); 1814 cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); 1815 mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); 1816 mac_tx_lock_all(mcip); 1817 } 1818 mac_tx_unlock_all(mcip); 1819 } 1820 1821 void 1822 mac_tx_client_unblock(mac_client_impl_t *mcip) 1823 { 1824 mac_tx_lock_all(mcip); 1825 mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; 1826 mac_tx_unlock_all(mcip); 1827 /* 1828 * We may fail to disable flow control for the last MAC_NOTE_TX 1829 * notification because the MAC client is quiesced. Send the 1830 * notification again. 1831 */ 1832 i_mac_notify(mcip->mci_mip, MAC_NOTE_TX); 1833 } 1834 1835 /* 1836 * Wait for an SRS to quiesce. The SRS worker will signal us when the 1837 * quiesce is done. 1838 */ 1839 static void 1840 mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) 1841 { 1842 mutex_enter(&srs->srs_lock); 1843 while (!(srs->srs_state & srs_flag)) 1844 cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); 1845 mutex_exit(&srs->srs_lock); 1846 } 1847 1848 /* 1849 * Quiescing an Rx SRS is achieved by the following sequence. The protocol 1850 * works bottom up by cutting off packet flow from the bottommost point in the 1851 * mac, then the SRS, and then the soft rings. There are 2 use cases of this 1852 * mechanism. One is a temporary quiesce of the SRS, such as say while changing 1853 * the Rx callbacks. Another use case is Rx SRS teardown. In the former case 1854 * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used 1855 * for the SRS and MR flags. In the former case the threads pause waiting for 1856 * a restart, while in the latter case the threads exit. The Tx SRS teardown 1857 * is also mostly similar to the above. 1858 * 1859 * 1. Stop future hardware classified packets at the lowest level in the mac. 1860 * Remove any hardware classification rule (CONDEMNED case) and mark the 1861 * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt 1862 * from increasing. Upcalls from the driver that come through hardware 1863 * classification will be dropped in mac_rx from now on. Then we wait for 1864 * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are 1865 * sure there aren't any upcall threads from the driver through hardware 1866 * classification. In the case of SRS teardown we also remove the 1867 * classification rule in the driver. 1868 * 1869 * 2. Stop future software classified packets by marking the flow entry with 1870 * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from 1871 * increasing. We also remove the flow entry from the table in the latter 1872 * case. Then wait for the fe_refcnt to reach an appropriate quiescent value 1873 * that indicates there aren't any active threads using that flow entry. 1874 * 1875 * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, 1876 * SRS worker thread, and the soft ring threads are quiesced in sequence 1877 * with the SRS worker thread serving as a master controller. This 1878 * mechansim is explained in mac_srs_worker_quiesce(). 1879 * 1880 * The restart mechanism to reactivate the SRS and softrings is explained 1881 * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the 1882 * restart sequence. 1883 */ 1884 void 1885 mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 1886 { 1887 flow_entry_t *flent = srs->srs_flent; 1888 uint_t mr_flag, srs_done_flag; 1889 1890 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 1891 ASSERT(!(srs->srs_type & SRST_TX)); 1892 1893 if (srs_quiesce_flag == SRS_CONDEMNED) { 1894 mr_flag = MR_CONDEMNED; 1895 srs_done_flag = SRS_CONDEMNED_DONE; 1896 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 1897 mac_srs_client_poll_disable(srs->srs_mcip, srs); 1898 } else { 1899 ASSERT(srs_quiesce_flag == SRS_QUIESCE); 1900 mr_flag = MR_QUIESCE; 1901 srs_done_flag = SRS_QUIESCE_DONE; 1902 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 1903 mac_srs_client_poll_quiesce(srs->srs_mcip, srs); 1904 } 1905 1906 if (srs->srs_ring != NULL) { 1907 mac_rx_ring_quiesce(srs->srs_ring, mr_flag); 1908 } else { 1909 /* 1910 * SRS is driven by software classification. In case 1911 * of CONDEMNED, the top level teardown functions will 1912 * deal with flow removal. 1913 */ 1914 if (srs_quiesce_flag != SRS_CONDEMNED) { 1915 FLOW_MARK(flent, FE_QUIESCE); 1916 mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 1917 } 1918 } 1919 1920 /* 1921 * Signal the SRS to quiesce itself, and then cv_wait for the 1922 * SRS quiesce to complete. The SRS worker thread will wake us 1923 * up when the quiesce is complete 1924 */ 1925 mac_srs_signal(srs, srs_quiesce_flag); 1926 mac_srs_quiesce_wait(srs, srs_done_flag); 1927 } 1928 1929 /* 1930 * Remove an SRS. 1931 */ 1932 void 1933 mac_rx_srs_remove(mac_soft_ring_set_t *srs) 1934 { 1935 flow_entry_t *flent = srs->srs_flent; 1936 int i; 1937 1938 mac_rx_srs_quiesce(srs, SRS_CONDEMNED); 1939 /* 1940 * Locate and remove our entry in the fe_rx_srs[] array, and 1941 * adjust the fe_rx_srs array entries and array count by 1942 * moving the last entry into the vacated spot. 1943 */ 1944 mutex_enter(&flent->fe_lock); 1945 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 1946 if (flent->fe_rx_srs[i] == srs) 1947 break; 1948 } 1949 1950 ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); 1951 if (i != flent->fe_rx_srs_cnt - 1) { 1952 flent->fe_rx_srs[i] = 1953 flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; 1954 i = flent->fe_rx_srs_cnt - 1; 1955 } 1956 1957 flent->fe_rx_srs[i] = NULL; 1958 flent->fe_rx_srs_cnt--; 1959 mutex_exit(&flent->fe_lock); 1960 1961 mac_srs_free(srs); 1962 } 1963 1964 static void 1965 mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) 1966 { 1967 mutex_enter(&srs->srs_lock); 1968 srs->srs_state &= ~flag; 1969 mutex_exit(&srs->srs_lock); 1970 } 1971 1972 void 1973 mac_rx_srs_restart(mac_soft_ring_set_t *srs) 1974 { 1975 flow_entry_t *flent = srs->srs_flent; 1976 mac_ring_t *mr; 1977 1978 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 1979 ASSERT((srs->srs_type & SRST_TX) == 0); 1980 1981 /* 1982 * This handles a change in the number of SRSs between the quiesce and 1983 * and restart operation of a flow. 1984 */ 1985 if (!SRS_QUIESCED(srs)) 1986 return; 1987 1988 /* 1989 * Signal the SRS to restart itself. Wait for the restart to complete 1990 * Note that we only restart the SRS if it is not marked as 1991 * permanently quiesced. 1992 */ 1993 if (!SRS_QUIESCED_PERMANENT(srs)) { 1994 mac_srs_signal(srs, SRS_RESTART); 1995 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 1996 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 1997 1998 mac_srs_client_poll_restart(srs->srs_mcip, srs); 1999 } 2000 2001 /* Finally clear the flags to let the packets in */ 2002 mr = srs->srs_ring; 2003 if (mr != NULL) { 2004 MAC_RING_UNMARK(mr, MR_QUIESCE); 2005 /* In case the ring was stopped, safely restart it */ 2006 if (mr->mr_state != MR_INUSE) 2007 (void) mac_start_ring(mr); 2008 } else { 2009 FLOW_UNMARK(flent, FE_QUIESCE); 2010 } 2011 } 2012 2013 /* 2014 * Temporary quiesce of a flow and associated Rx SRS. 2015 * Please see block comment above mac_rx_classify_flow_rem. 2016 */ 2017 /* ARGSUSED */ 2018 int 2019 mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) 2020 { 2021 int i; 2022 2023 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2024 mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], 2025 SRS_QUIESCE); 2026 } 2027 return (0); 2028 } 2029 2030 /* 2031 * Restart a flow and associated Rx SRS that has been quiesced temporarily 2032 * Please see block comment above mac_rx_classify_flow_rem 2033 */ 2034 /* ARGSUSED */ 2035 int 2036 mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) 2037 { 2038 int i; 2039 2040 for (i = 0; i < flent->fe_rx_srs_cnt; i++) 2041 mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); 2042 2043 return (0); 2044 } 2045 2046 void 2047 mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) 2048 { 2049 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2050 flow_entry_t *flent = mcip->mci_flent; 2051 mac_impl_t *mip = mcip->mci_mip; 2052 mac_soft_ring_set_t *mac_srs; 2053 int i; 2054 2055 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2056 2057 if (flent == NULL) 2058 return; 2059 2060 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2061 mac_srs = flent->fe_rx_srs[i]; 2062 mutex_enter(&mac_srs->srs_lock); 2063 if (on) 2064 mac_srs->srs_state |= SRS_QUIESCE_PERM; 2065 else 2066 mac_srs->srs_state &= ~SRS_QUIESCE_PERM; 2067 mutex_exit(&mac_srs->srs_lock); 2068 } 2069 } 2070 2071 void 2072 mac_rx_client_quiesce(mac_client_handle_t mch) 2073 { 2074 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2075 mac_impl_t *mip = mcip->mci_mip; 2076 2077 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2078 2079 if (MCIP_DATAPATH_SETUP(mcip)) { 2080 (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, 2081 NULL); 2082 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2083 mac_rx_classify_flow_quiesce, NULL); 2084 } 2085 } 2086 2087 void 2088 mac_rx_client_restart(mac_client_handle_t mch) 2089 { 2090 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2091 mac_impl_t *mip = mcip->mci_mip; 2092 2093 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2094 2095 if (MCIP_DATAPATH_SETUP(mcip)) { 2096 (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); 2097 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2098 mac_rx_classify_flow_restart, NULL); 2099 } 2100 } 2101 2102 /* 2103 * This function only quiesces the Tx SRS and softring worker threads. Callers 2104 * need to make sure that there aren't any mac client threads doing current or 2105 * future transmits in the mac before calling this function. 2106 */ 2107 void 2108 mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 2109 { 2110 mac_client_impl_t *mcip = srs->srs_mcip; 2111 2112 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2113 2114 ASSERT(srs->srs_type & SRST_TX); 2115 ASSERT(srs_quiesce_flag == SRS_CONDEMNED || 2116 srs_quiesce_flag == SRS_QUIESCE); 2117 2118 /* 2119 * Signal the SRS to quiesce itself, and then cv_wait for the 2120 * SRS quiesce to complete. The SRS worker thread will wake us 2121 * up when the quiesce is complete 2122 */ 2123 mac_srs_signal(srs, srs_quiesce_flag); 2124 mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? 2125 SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); 2126 } 2127 2128 void 2129 mac_tx_srs_restart(mac_soft_ring_set_t *srs) 2130 { 2131 /* 2132 * Resizing the fanout could result in creation of new SRSs. 2133 * They may not necessarily be in the quiesced state in which 2134 * case it need be restarted 2135 */ 2136 if (!SRS_QUIESCED(srs)) 2137 return; 2138 2139 mac_srs_signal(srs, SRS_RESTART); 2140 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2141 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2142 } 2143 2144 /* 2145 * Temporary quiesce of a flow and associated Rx SRS. 2146 * Please see block comment above mac_rx_srs_quiesce 2147 */ 2148 /* ARGSUSED */ 2149 int 2150 mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) 2151 { 2152 /* 2153 * The fe_tx_srs is null for a subflow on an interface that is 2154 * not plumbed 2155 */ 2156 if (flent->fe_tx_srs != NULL) 2157 mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); 2158 return (0); 2159 } 2160 2161 /* ARGSUSED */ 2162 int 2163 mac_tx_flow_restart(flow_entry_t *flent, void *arg) 2164 { 2165 /* 2166 * The fe_tx_srs is null for a subflow on an interface that is 2167 * not plumbed 2168 */ 2169 if (flent->fe_tx_srs != NULL) 2170 mac_tx_srs_restart(flent->fe_tx_srs); 2171 return (0); 2172 } 2173 2174 static void 2175 i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) 2176 { 2177 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2178 2179 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2180 2181 mac_tx_client_block(mcip); 2182 if (MCIP_TX_SRS(mcip) != NULL) { 2183 mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); 2184 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2185 mac_tx_flow_quiesce, NULL); 2186 } 2187 } 2188 2189 void 2190 mac_tx_client_quiesce(mac_client_handle_t mch) 2191 { 2192 i_mac_tx_client_quiesce(mch, SRS_QUIESCE); 2193 } 2194 2195 void 2196 mac_tx_client_condemn(mac_client_handle_t mch) 2197 { 2198 i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); 2199 } 2200 2201 void 2202 mac_tx_client_restart(mac_client_handle_t mch) 2203 { 2204 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2205 2206 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2207 2208 mac_tx_client_unblock(mcip); 2209 if (MCIP_TX_SRS(mcip) != NULL) { 2210 mac_tx_srs_restart(MCIP_TX_SRS(mcip)); 2211 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2212 mac_tx_flow_restart, NULL); 2213 } 2214 } 2215 2216 void 2217 mac_tx_client_flush(mac_client_impl_t *mcip) 2218 { 2219 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2220 2221 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2222 mac_tx_client_restart((mac_client_handle_t)mcip); 2223 } 2224 2225 void 2226 mac_client_quiesce(mac_client_impl_t *mcip) 2227 { 2228 mac_rx_client_quiesce((mac_client_handle_t)mcip); 2229 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2230 } 2231 2232 void 2233 mac_client_restart(mac_client_impl_t *mcip) 2234 { 2235 mac_rx_client_restart((mac_client_handle_t)mcip); 2236 mac_tx_client_restart((mac_client_handle_t)mcip); 2237 } 2238 2239 /* 2240 * Allocate a minor number. 2241 */ 2242 minor_t 2243 mac_minor_hold(boolean_t sleep) 2244 { 2245 minor_t minor; 2246 2247 /* 2248 * Grab a value from the arena. 2249 */ 2250 atomic_add_32(&minor_count, 1); 2251 2252 if (sleep) 2253 minor = (uint_t)id_alloc(minor_ids); 2254 else 2255 minor = (uint_t)id_alloc_nosleep(minor_ids); 2256 2257 if (minor == 0) { 2258 atomic_add_32(&minor_count, -1); 2259 return (0); 2260 } 2261 2262 return (minor); 2263 } 2264 2265 /* 2266 * Release a previously allocated minor number. 2267 */ 2268 void 2269 mac_minor_rele(minor_t minor) 2270 { 2271 /* 2272 * Return the value to the arena. 2273 */ 2274 id_free(minor_ids, minor); 2275 atomic_add_32(&minor_count, -1); 2276 } 2277 2278 uint32_t 2279 mac_no_notification(mac_handle_t mh) 2280 { 2281 mac_impl_t *mip = (mac_impl_t *)mh; 2282 2283 return (((mip->mi_state_flags & MIS_LEGACY) != 0) ? 2284 mip->mi_capab_legacy.ml_unsup_note : 0); 2285 } 2286 2287 /* 2288 * Prevent any new opens of this mac in preparation for unregister 2289 */ 2290 int 2291 i_mac_disable(mac_impl_t *mip) 2292 { 2293 mac_client_impl_t *mcip; 2294 2295 rw_enter(&i_mac_impl_lock, RW_WRITER); 2296 if (mip->mi_state_flags & MIS_DISABLED) { 2297 /* Already disabled, return success */ 2298 rw_exit(&i_mac_impl_lock); 2299 return (0); 2300 } 2301 /* 2302 * See if there are any other references to this mac_t (e.g., VLAN's). 2303 * If so return failure. If all the other checks below pass, then 2304 * set mi_disabled atomically under the i_mac_impl_lock to prevent 2305 * any new VLAN's from being created or new mac client opens of this 2306 * mac end point. 2307 */ 2308 if (mip->mi_ref > 0) { 2309 rw_exit(&i_mac_impl_lock); 2310 return (EBUSY); 2311 } 2312 2313 /* 2314 * mac clients must delete all multicast groups they join before 2315 * closing. bcast groups are reference counted, the last client 2316 * to delete the group will wait till the group is physically 2317 * deleted. Since all clients have closed this mac end point 2318 * mi_bcast_ngrps must be zero at this point 2319 */ 2320 ASSERT(mip->mi_bcast_ngrps == 0); 2321 2322 /* 2323 * Don't let go of this if it has some flows. 2324 * All other code guarantees no flows are added to a disabled 2325 * mac, therefore it is sufficient to check for the flow table 2326 * only here. 2327 */ 2328 mcip = mac_primary_client_handle(mip); 2329 if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { 2330 rw_exit(&i_mac_impl_lock); 2331 return (ENOTEMPTY); 2332 } 2333 2334 mip->mi_state_flags |= MIS_DISABLED; 2335 rw_exit(&i_mac_impl_lock); 2336 return (0); 2337 } 2338 2339 int 2340 mac_disable_nowait(mac_handle_t mh) 2341 { 2342 mac_impl_t *mip = (mac_impl_t *)mh; 2343 int err; 2344 2345 if ((err = i_mac_perim_enter_nowait(mip)) != 0) 2346 return (err); 2347 err = i_mac_disable(mip); 2348 i_mac_perim_exit(mip); 2349 return (err); 2350 } 2351 2352 int 2353 mac_disable(mac_handle_t mh) 2354 { 2355 mac_impl_t *mip = (mac_impl_t *)mh; 2356 int err; 2357 2358 i_mac_perim_enter(mip); 2359 err = i_mac_disable(mip); 2360 i_mac_perim_exit(mip); 2361 2362 /* 2363 * Clean up notification thread and wait for it to exit. 2364 */ 2365 if (err == 0) 2366 i_mac_notify_exit(mip); 2367 2368 return (err); 2369 } 2370 2371 /* 2372 * Called when the MAC instance has a non empty flow table, to de-multiplex 2373 * incoming packets to the right flow. 2374 * The MAC's rw lock is assumed held as a READER. 2375 */ 2376 /* ARGSUSED */ 2377 static mblk_t * 2378 mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) 2379 { 2380 flow_entry_t *flent = NULL; 2381 uint_t flags = FLOW_INBOUND; 2382 int err; 2383 2384 /* 2385 * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN 2386 * to mac_flow_lookup() so that the VLAN packets can be successfully 2387 * passed to the non-VLAN aggregation flows. 2388 * 2389 * Note that there is possibly a race between this and 2390 * mac_unicast_remove/add() and VLAN packets could be incorrectly 2391 * classified to non-VLAN flows of non-aggregation mac clients. These 2392 * VLAN packets will be then filtered out by the mac module. 2393 */ 2394 if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) 2395 flags |= FLOW_IGNORE_VLAN; 2396 2397 err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); 2398 if (err != 0) { 2399 /* no registered receive function */ 2400 return (mp); 2401 } else { 2402 mac_client_impl_t *mcip; 2403 2404 /* 2405 * This flent might just be an additional one on the MAC client, 2406 * i.e. for classification purposes (different fdesc), however 2407 * the resources, SRS et. al., are in the mci_flent, so if 2408 * this isn't the mci_flent, we need to get it. 2409 */ 2410 if ((mcip = flent->fe_mcip) != NULL && 2411 mcip->mci_flent != flent) { 2412 FLOW_REFRELE(flent); 2413 flent = mcip->mci_flent; 2414 FLOW_TRY_REFHOLD(flent, err); 2415 if (err != 0) 2416 return (mp); 2417 } 2418 (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, 2419 B_FALSE); 2420 FLOW_REFRELE(flent); 2421 } 2422 return (NULL); 2423 } 2424 2425 mblk_t * 2426 mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 2427 { 2428 mac_impl_t *mip = (mac_impl_t *)mh; 2429 mblk_t *bp, *bp1, **bpp, *list = NULL; 2430 2431 /* 2432 * We walk the chain and attempt to classify each packet. 2433 * The packets that couldn't be classified will be returned 2434 * back to the caller. 2435 */ 2436 bp = mp_chain; 2437 bpp = &list; 2438 while (bp != NULL) { 2439 bp1 = bp; 2440 bp = bp->b_next; 2441 bp1->b_next = NULL; 2442 2443 if (mac_rx_classify(mip, mrh, bp1) != NULL) { 2444 *bpp = bp1; 2445 bpp = &bp1->b_next; 2446 } 2447 } 2448 return (list); 2449 } 2450 2451 static int 2452 mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) 2453 { 2454 mac_ring_handle_t ring = arg; 2455 2456 if (flent->fe_tx_srs) 2457 mac_tx_srs_wakeup(flent->fe_tx_srs, ring); 2458 return (0); 2459 } 2460 2461 void 2462 i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) 2463 { 2464 mac_client_impl_t *cclient; 2465 mac_soft_ring_set_t *mac_srs; 2466 2467 /* 2468 * After grabbing the mi_rw_lock, the list of clients can't change. 2469 * If there are any clients mi_disabled must be B_FALSE and can't 2470 * get set since there are clients. If there aren't any clients we 2471 * don't do anything. In any case the mip has to be valid. The driver 2472 * must make sure that it goes single threaded (with respect to mac 2473 * calls) and wait for all pending mac calls to finish before calling 2474 * mac_unregister. 2475 */ 2476 rw_enter(&i_mac_impl_lock, RW_READER); 2477 if (mip->mi_state_flags & MIS_DISABLED) { 2478 rw_exit(&i_mac_impl_lock); 2479 return; 2480 } 2481 2482 /* 2483 * Get MAC tx srs from walking mac_client_handle list. 2484 */ 2485 rw_enter(&mip->mi_rw_lock, RW_READER); 2486 for (cclient = mip->mi_clients_list; cclient != NULL; 2487 cclient = cclient->mci_client_next) { 2488 if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { 2489 mac_tx_srs_wakeup(mac_srs, ring); 2490 } else { 2491 /* 2492 * Aggr opens underlying ports in exclusive mode 2493 * and registers flow control callbacks using 2494 * mac_tx_client_notify(). When opened in 2495 * exclusive mode, Tx SRS won't be created 2496 * during mac_unicast_add(). 2497 */ 2498 if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { 2499 mac_tx_invoke_callbacks(cclient, 2500 (mac_tx_cookie_t)ring); 2501 } 2502 } 2503 (void) mac_flow_walk(cclient->mci_subflow_tab, 2504 mac_tx_flow_srs_wakeup, ring); 2505 } 2506 rw_exit(&mip->mi_rw_lock); 2507 rw_exit(&i_mac_impl_lock); 2508 } 2509 2510 /* ARGSUSED */ 2511 void 2512 mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, 2513 boolean_t add) 2514 { 2515 mac_impl_t *mip = (mac_impl_t *)mh; 2516 2517 i_mac_perim_enter((mac_impl_t *)mh); 2518 /* 2519 * If no specific refresh function was given then default to the 2520 * driver's m_multicst entry point. 2521 */ 2522 if (refresh == NULL) { 2523 refresh = mip->mi_multicst; 2524 arg = mip->mi_driver; 2525 } 2526 2527 mac_bcast_refresh(mip, refresh, arg, add); 2528 i_mac_perim_exit((mac_impl_t *)mh); 2529 } 2530 2531 void 2532 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) 2533 { 2534 mac_impl_t *mip = (mac_impl_t *)mh; 2535 2536 /* 2537 * If no specific refresh function was given then default to the 2538 * driver's m_promisc entry point. 2539 */ 2540 if (refresh == NULL) { 2541 refresh = mip->mi_setpromisc; 2542 arg = mip->mi_driver; 2543 } 2544 ASSERT(refresh != NULL); 2545 2546 /* 2547 * Call the refresh function with the current promiscuity. 2548 */ 2549 refresh(arg, (mip->mi_devpromisc != 0)); 2550 } 2551 2552 /* 2553 * The mac client requests that the mac not to change its margin size to 2554 * be less than the specified value. If "current" is B_TRUE, then the client 2555 * requests the mac not to change its margin size to be smaller than the 2556 * current size. Further, return the current margin size value in this case. 2557 * 2558 * We keep every requested size in an ordered list from largest to smallest. 2559 */ 2560 int 2561 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) 2562 { 2563 mac_impl_t *mip = (mac_impl_t *)mh; 2564 mac_margin_req_t **pp, *p; 2565 int err = 0; 2566 2567 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2568 if (current) 2569 *marginp = mip->mi_margin; 2570 2571 /* 2572 * If the current margin value cannot satisfy the margin requested, 2573 * return ENOTSUP directly. 2574 */ 2575 if (*marginp > mip->mi_margin) { 2576 err = ENOTSUP; 2577 goto done; 2578 } 2579 2580 /* 2581 * Check whether the given margin is already in the list. If so, 2582 * bump the reference count. 2583 */ 2584 for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { 2585 if (p->mmr_margin == *marginp) { 2586 /* 2587 * The margin requested is already in the list, 2588 * so just bump the reference count. 2589 */ 2590 p->mmr_ref++; 2591 goto done; 2592 } 2593 if (p->mmr_margin < *marginp) 2594 break; 2595 } 2596 2597 2598 p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); 2599 p->mmr_margin = *marginp; 2600 p->mmr_ref++; 2601 p->mmr_nextp = *pp; 2602 *pp = p; 2603 2604 done: 2605 rw_exit(&(mip->mi_rw_lock)); 2606 return (err); 2607 } 2608 2609 /* 2610 * The mac client requests to cancel its previous mac_margin_add() request. 2611 * We remove the requested margin size from the list. 2612 */ 2613 int 2614 mac_margin_remove(mac_handle_t mh, uint32_t margin) 2615 { 2616 mac_impl_t *mip = (mac_impl_t *)mh; 2617 mac_margin_req_t **pp, *p; 2618 int err = 0; 2619 2620 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2621 /* 2622 * Find the entry in the list for the given margin. 2623 */ 2624 for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { 2625 if (p->mmr_margin == margin) { 2626 if (--p->mmr_ref == 0) 2627 break; 2628 2629 /* 2630 * There is still a reference to this address so 2631 * there's nothing more to do. 2632 */ 2633 goto done; 2634 } 2635 } 2636 2637 /* 2638 * We did not find an entry for the given margin. 2639 */ 2640 if (p == NULL) { 2641 err = ENOENT; 2642 goto done; 2643 } 2644 2645 ASSERT(p->mmr_ref == 0); 2646 2647 /* 2648 * Remove it from the list. 2649 */ 2650 *pp = p->mmr_nextp; 2651 kmem_free(p, sizeof (mac_margin_req_t)); 2652 done: 2653 rw_exit(&(mip->mi_rw_lock)); 2654 return (err); 2655 } 2656 2657 boolean_t 2658 mac_margin_update(mac_handle_t mh, uint32_t margin) 2659 { 2660 mac_impl_t *mip = (mac_impl_t *)mh; 2661 uint32_t margin_needed = 0; 2662 2663 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2664 2665 if (mip->mi_mmrp != NULL) 2666 margin_needed = mip->mi_mmrp->mmr_margin; 2667 2668 if (margin_needed <= margin) 2669 mip->mi_margin = margin; 2670 2671 rw_exit(&(mip->mi_rw_lock)); 2672 2673 if (margin_needed <= margin) 2674 i_mac_notify(mip, MAC_NOTE_MARGIN); 2675 2676 return (margin_needed <= margin); 2677 } 2678 2679 /* 2680 * MAC Type Plugin functions. 2681 */ 2682 2683 mactype_t * 2684 mactype_getplugin(const char *pname) 2685 { 2686 mactype_t *mtype = NULL; 2687 boolean_t tried_modload = B_FALSE; 2688 2689 mutex_enter(&i_mactype_lock); 2690 2691 find_registered_mactype: 2692 if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, 2693 (mod_hash_val_t *)&mtype) != 0) { 2694 if (!tried_modload) { 2695 /* 2696 * If the plugin has not yet been loaded, then 2697 * attempt to load it now. If modload() succeeds, 2698 * the plugin should have registered using 2699 * mactype_register(), in which case we can go back 2700 * and attempt to find it again. 2701 */ 2702 if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { 2703 tried_modload = B_TRUE; 2704 goto find_registered_mactype; 2705 } 2706 } 2707 } else { 2708 /* 2709 * Note that there's no danger that the plugin we've loaded 2710 * could be unloaded between the modload() step and the 2711 * reference count bump here, as we're holding 2712 * i_mactype_lock, which mactype_unregister() also holds. 2713 */ 2714 atomic_inc_32(&mtype->mt_ref); 2715 } 2716 2717 mutex_exit(&i_mactype_lock); 2718 return (mtype); 2719 } 2720 2721 mactype_register_t * 2722 mactype_alloc(uint_t mactype_version) 2723 { 2724 mactype_register_t *mtrp; 2725 2726 /* 2727 * Make sure there isn't a version mismatch between the plugin and 2728 * the framework. In the future, if multiple versions are 2729 * supported, this check could become more sophisticated. 2730 */ 2731 if (mactype_version != MACTYPE_VERSION) 2732 return (NULL); 2733 2734 mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); 2735 mtrp->mtr_version = mactype_version; 2736 return (mtrp); 2737 } 2738 2739 void 2740 mactype_free(mactype_register_t *mtrp) 2741 { 2742 kmem_free(mtrp, sizeof (mactype_register_t)); 2743 } 2744 2745 int 2746 mactype_register(mactype_register_t *mtrp) 2747 { 2748 mactype_t *mtp; 2749 mactype_ops_t *ops = mtrp->mtr_ops; 2750 2751 /* Do some sanity checking before we register this MAC type. */ 2752 if (mtrp->mtr_ident == NULL || ops == NULL) 2753 return (EINVAL); 2754 2755 /* 2756 * Verify that all mandatory callbacks are set in the ops 2757 * vector. 2758 */ 2759 if (ops->mtops_unicst_verify == NULL || 2760 ops->mtops_multicst_verify == NULL || 2761 ops->mtops_sap_verify == NULL || 2762 ops->mtops_header == NULL || 2763 ops->mtops_header_info == NULL) { 2764 return (EINVAL); 2765 } 2766 2767 mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); 2768 mtp->mt_ident = mtrp->mtr_ident; 2769 mtp->mt_ops = *ops; 2770 mtp->mt_type = mtrp->mtr_mactype; 2771 mtp->mt_nativetype = mtrp->mtr_nativetype; 2772 mtp->mt_addr_length = mtrp->mtr_addrlen; 2773 if (mtrp->mtr_brdcst_addr != NULL) { 2774 mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); 2775 bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, 2776 mtrp->mtr_addrlen); 2777 } 2778 2779 mtp->mt_stats = mtrp->mtr_stats; 2780 mtp->mt_statcount = mtrp->mtr_statcount; 2781 2782 mtp->mt_mapping = mtrp->mtr_mapping; 2783 mtp->mt_mappingcount = mtrp->mtr_mappingcount; 2784 2785 if (mod_hash_insert(i_mactype_hash, 2786 (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { 2787 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 2788 kmem_free(mtp, sizeof (*mtp)); 2789 return (EEXIST); 2790 } 2791 return (0); 2792 } 2793 2794 int 2795 mactype_unregister(const char *ident) 2796 { 2797 mactype_t *mtp; 2798 mod_hash_val_t val; 2799 int err; 2800 2801 /* 2802 * Let's not allow MAC drivers to use this plugin while we're 2803 * trying to unregister it. Holding i_mactype_lock also prevents a 2804 * plugin from unregistering while a MAC driver is attempting to 2805 * hold a reference to it in i_mactype_getplugin(). 2806 */ 2807 mutex_enter(&i_mactype_lock); 2808 2809 if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, 2810 (mod_hash_val_t *)&mtp)) != 0) { 2811 /* A plugin is trying to unregister, but it never registered. */ 2812 err = ENXIO; 2813 goto done; 2814 } 2815 2816 if (mtp->mt_ref != 0) { 2817 err = EBUSY; 2818 goto done; 2819 } 2820 2821 err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); 2822 ASSERT(err == 0); 2823 if (err != 0) { 2824 /* This should never happen, thus the ASSERT() above. */ 2825 err = EINVAL; 2826 goto done; 2827 } 2828 ASSERT(mtp == (mactype_t *)val); 2829 2830 if (mtp->mt_brdcst_addr != NULL) 2831 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 2832 kmem_free(mtp, sizeof (mactype_t)); 2833 done: 2834 mutex_exit(&i_mactype_lock); 2835 return (err); 2836 } 2837 2838 /* 2839 * Checks the size of the value size specified for a property as 2840 * part of a property operation. Returns B_TRUE if the size is 2841 * correct, B_FALSE otherwise. 2842 */ 2843 boolean_t 2844 mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range) 2845 { 2846 uint_t minsize = 0; 2847 2848 if (is_range) 2849 return (valsize >= sizeof (mac_propval_range_t)); 2850 2851 switch (id) { 2852 case MAC_PROP_ZONE: 2853 minsize = sizeof (dld_ioc_zid_t); 2854 break; 2855 case MAC_PROP_AUTOPUSH: 2856 if (valsize != 0) 2857 minsize = sizeof (struct dlautopush); 2858 break; 2859 case MAC_PROP_TAGMODE: 2860 minsize = sizeof (link_tagmode_t); 2861 break; 2862 case MAC_PROP_RESOURCE: 2863 case MAC_PROP_RESOURCE_EFF: 2864 minsize = sizeof (mac_resource_props_t); 2865 break; 2866 case MAC_PROP_DUPLEX: 2867 minsize = sizeof (link_duplex_t); 2868 break; 2869 case MAC_PROP_SPEED: 2870 minsize = sizeof (uint64_t); 2871 break; 2872 case MAC_PROP_STATUS: 2873 minsize = sizeof (link_state_t); 2874 break; 2875 case MAC_PROP_AUTONEG: 2876 case MAC_PROP_EN_AUTONEG: 2877 minsize = sizeof (uint8_t); 2878 break; 2879 case MAC_PROP_MTU: 2880 case MAC_PROP_LLIMIT: 2881 case MAC_PROP_LDECAY: 2882 minsize = sizeof (uint32_t); 2883 break; 2884 case MAC_PROP_FLOWCTRL: 2885 minsize = sizeof (link_flowctrl_t); 2886 break; 2887 case MAC_PROP_ADV_10GFDX_CAP: 2888 case MAC_PROP_EN_10GFDX_CAP: 2889 case MAC_PROP_ADV_1000HDX_CAP: 2890 case MAC_PROP_EN_1000HDX_CAP: 2891 case MAC_PROP_ADV_100FDX_CAP: 2892 case MAC_PROP_EN_100FDX_CAP: 2893 case MAC_PROP_ADV_100HDX_CAP: 2894 case MAC_PROP_EN_100HDX_CAP: 2895 case MAC_PROP_ADV_10FDX_CAP: 2896 case MAC_PROP_EN_10FDX_CAP: 2897 case MAC_PROP_ADV_10HDX_CAP: 2898 case MAC_PROP_EN_10HDX_CAP: 2899 case MAC_PROP_ADV_100T4_CAP: 2900 case MAC_PROP_EN_100T4_CAP: 2901 minsize = sizeof (uint8_t); 2902 break; 2903 case MAC_PROP_PVID: 2904 minsize = sizeof (uint16_t); 2905 break; 2906 case MAC_PROP_IPTUN_HOPLIMIT: 2907 minsize = sizeof (uint32_t); 2908 break; 2909 case MAC_PROP_IPTUN_ENCAPLIMIT: 2910 minsize = sizeof (uint32_t); 2911 break; 2912 case MAC_PROP_MAX_TX_RINGS_AVAIL: 2913 case MAC_PROP_MAX_RX_RINGS_AVAIL: 2914 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 2915 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 2916 minsize = sizeof (uint_t); 2917 break; 2918 case MAC_PROP_WL_ESSID: 2919 minsize = sizeof (wl_linkstatus_t); 2920 break; 2921 case MAC_PROP_WL_BSSID: 2922 minsize = sizeof (wl_bssid_t); 2923 break; 2924 case MAC_PROP_WL_BSSTYPE: 2925 minsize = sizeof (wl_bss_type_t); 2926 break; 2927 case MAC_PROP_WL_LINKSTATUS: 2928 minsize = sizeof (wl_linkstatus_t); 2929 break; 2930 case MAC_PROP_WL_DESIRED_RATES: 2931 minsize = sizeof (wl_rates_t); 2932 break; 2933 case MAC_PROP_WL_SUPPORTED_RATES: 2934 minsize = sizeof (wl_rates_t); 2935 break; 2936 case MAC_PROP_WL_AUTH_MODE: 2937 minsize = sizeof (wl_authmode_t); 2938 break; 2939 case MAC_PROP_WL_ENCRYPTION: 2940 minsize = sizeof (wl_encryption_t); 2941 break; 2942 case MAC_PROP_WL_RSSI: 2943 minsize = sizeof (wl_rssi_t); 2944 break; 2945 case MAC_PROP_WL_PHY_CONFIG: 2946 minsize = sizeof (wl_phy_conf_t); 2947 break; 2948 case MAC_PROP_WL_CAPABILITY: 2949 minsize = sizeof (wl_capability_t); 2950 break; 2951 case MAC_PROP_WL_WPA: 2952 minsize = sizeof (wl_wpa_t); 2953 break; 2954 case MAC_PROP_WL_SCANRESULTS: 2955 minsize = sizeof (wl_wpa_ess_t); 2956 break; 2957 case MAC_PROP_WL_POWER_MODE: 2958 minsize = sizeof (wl_ps_mode_t); 2959 break; 2960 case MAC_PROP_WL_RADIO: 2961 minsize = sizeof (wl_radio_t); 2962 break; 2963 case MAC_PROP_WL_ESS_LIST: 2964 minsize = sizeof (wl_ess_list_t); 2965 break; 2966 case MAC_PROP_WL_KEY_TAB: 2967 minsize = sizeof (wl_wep_key_tab_t); 2968 break; 2969 case MAC_PROP_WL_CREATE_IBSS: 2970 minsize = sizeof (wl_create_ibss_t); 2971 break; 2972 case MAC_PROP_WL_SETOPTIE: 2973 minsize = sizeof (wl_wpa_ie_t); 2974 break; 2975 case MAC_PROP_WL_DELKEY: 2976 minsize = sizeof (wl_del_key_t); 2977 break; 2978 case MAC_PROP_WL_KEY: 2979 minsize = sizeof (wl_key_t); 2980 break; 2981 case MAC_PROP_WL_MLME: 2982 minsize = sizeof (wl_mlme_t); 2983 break; 2984 } 2985 2986 return (valsize >= minsize); 2987 } 2988 2989 /* 2990 * mac_set_prop() sets MAC or hardware driver properties: 2991 * 2992 * - MAC-managed properties such as resource properties include maxbw, 2993 * priority, and cpu binding list, as well as the default port VID 2994 * used by bridging. These properties are consumed by the MAC layer 2995 * itself and not passed down to the driver. For resource control 2996 * properties, this function invokes mac_set_resources() which will 2997 * cache the property value in mac_impl_t and may call 2998 * mac_client_set_resource() to update property value of the primary 2999 * mac client, if it exists. 3000 * 3001 * - Properties which act on the hardware and must be passed to the 3002 * driver, such as MTU, through the driver's mc_setprop() entry point. 3003 */ 3004 int 3005 mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3006 uint_t valsize) 3007 { 3008 int err = ENOTSUP; 3009 mac_impl_t *mip = (mac_impl_t *)mh; 3010 3011 ASSERT(MAC_PERIM_HELD(mh)); 3012 3013 switch (id) { 3014 case MAC_PROP_RESOURCE: { 3015 mac_resource_props_t *mrp; 3016 3017 /* call mac_set_resources() for MAC properties */ 3018 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3019 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3020 bcopy(val, mrp, sizeof (*mrp)); 3021 err = mac_set_resources(mh, mrp); 3022 kmem_free(mrp, sizeof (*mrp)); 3023 break; 3024 } 3025 3026 case MAC_PROP_PVID: 3027 ASSERT(valsize >= sizeof (uint16_t)); 3028 if (mip->mi_state_flags & MIS_IS_VNIC) 3029 return (EINVAL); 3030 err = mac_set_pvid(mh, *(uint16_t *)val); 3031 break; 3032 3033 case MAC_PROP_MTU: { 3034 uint32_t mtu; 3035 3036 ASSERT(valsize >= sizeof (uint32_t)); 3037 bcopy(val, &mtu, sizeof (mtu)); 3038 err = mac_set_mtu(mh, mtu, NULL); 3039 break; 3040 } 3041 3042 case MAC_PROP_LLIMIT: 3043 case MAC_PROP_LDECAY: { 3044 uint32_t learnval; 3045 3046 if (valsize < sizeof (learnval) || 3047 (mip->mi_state_flags & MIS_IS_VNIC)) 3048 return (EINVAL); 3049 bcopy(val, &learnval, sizeof (learnval)); 3050 if (learnval == 0 && id == MAC_PROP_LDECAY) 3051 return (EINVAL); 3052 if (id == MAC_PROP_LLIMIT) 3053 mip->mi_llimit = learnval; 3054 else 3055 mip->mi_ldecay = learnval; 3056 err = 0; 3057 break; 3058 } 3059 3060 default: 3061 /* For other driver properties, call driver's callback */ 3062 if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 3063 err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 3064 name, id, valsize, val); 3065 } 3066 } 3067 return (err); 3068 } 3069 3070 /* 3071 * mac_get_prop() gets MAC or device driver properties. 3072 * 3073 * If the property is a driver property, mac_get_prop() calls driver's callback 3074 * entry point to get it. 3075 * If the property is a MAC property, mac_get_prop() invokes mac_get_resources() 3076 * which returns the cached value in mac_impl_t. 3077 */ 3078 int 3079 mac_get_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3080 uint_t valsize) 3081 { 3082 int err = ENOTSUP; 3083 mac_impl_t *mip = (mac_impl_t *)mh; 3084 uint_t rings; 3085 uint_t vlinks; 3086 3087 bzero(val, valsize); 3088 3089 switch (id) { 3090 case MAC_PROP_RESOURCE: { 3091 mac_resource_props_t *mrp; 3092 3093 /* If mac property, read from cache */ 3094 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3095 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3096 mac_get_resources(mh, mrp); 3097 bcopy(mrp, val, sizeof (*mrp)); 3098 kmem_free(mrp, sizeof (*mrp)); 3099 return (0); 3100 } 3101 case MAC_PROP_RESOURCE_EFF: { 3102 mac_resource_props_t *mrp; 3103 3104 /* If mac effective property, read from client */ 3105 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3106 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3107 mac_get_effective_resources(mh, mrp); 3108 bcopy(mrp, val, sizeof (*mrp)); 3109 kmem_free(mrp, sizeof (*mrp)); 3110 return (0); 3111 } 3112 3113 case MAC_PROP_PVID: 3114 ASSERT(valsize >= sizeof (uint16_t)); 3115 if (mip->mi_state_flags & MIS_IS_VNIC) 3116 return (EINVAL); 3117 *(uint16_t *)val = mac_get_pvid(mh); 3118 return (0); 3119 3120 case MAC_PROP_LLIMIT: 3121 case MAC_PROP_LDECAY: 3122 ASSERT(valsize >= sizeof (uint32_t)); 3123 if (mip->mi_state_flags & MIS_IS_VNIC) 3124 return (EINVAL); 3125 if (id == MAC_PROP_LLIMIT) 3126 bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit)); 3127 else 3128 bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay)); 3129 return (0); 3130 3131 case MAC_PROP_MTU: { 3132 uint32_t sdu; 3133 3134 ASSERT(valsize >= sizeof (uint32_t)); 3135 mac_sdu_get(mh, NULL, &sdu); 3136 bcopy(&sdu, val, sizeof (sdu)); 3137 3138 return (0); 3139 } 3140 case MAC_PROP_STATUS: { 3141 link_state_t link_state; 3142 3143 if (valsize < sizeof (link_state)) 3144 return (EINVAL); 3145 link_state = mac_link_get(mh); 3146 bcopy(&link_state, val, sizeof (link_state)); 3147 3148 return (0); 3149 } 3150 3151 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3152 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3153 ASSERT(valsize >= sizeof (uint_t)); 3154 rings = id == MAC_PROP_MAX_RX_RINGS_AVAIL ? 3155 mac_rxavail_get(mh) : mac_txavail_get(mh); 3156 bcopy(&rings, val, sizeof (uint_t)); 3157 return (0); 3158 3159 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3160 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3161 ASSERT(valsize >= sizeof (uint_t)); 3162 vlinks = id == MAC_PROP_MAX_RXHWCLNT_AVAIL ? 3163 mac_rxhwlnksavail_get(mh) : mac_txhwlnksavail_get(mh); 3164 bcopy(&vlinks, val, sizeof (uint_t)); 3165 return (0); 3166 3167 case MAC_PROP_RXRINGSRANGE: 3168 case MAC_PROP_TXRINGSRANGE: 3169 /* 3170 * The value for these properties are returned through 3171 * the MAC_PROP_RESOURCE property. 3172 */ 3173 return (0); 3174 3175 default: 3176 break; 3177 3178 } 3179 3180 /* If driver property, request from driver */ 3181 if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { 3182 err = mip->mi_callbacks->mc_getprop(mip->mi_driver, name, id, 3183 valsize, val); 3184 } 3185 3186 return (err); 3187 } 3188 3189 /* 3190 * Helper function to initialize the range structure for use in 3191 * mac_get_prop. If the type can be other than uint32, we can 3192 * pass that as an arg. 3193 */ 3194 static void 3195 _mac_set_range(mac_propval_range_t *range, uint32_t min, uint32_t max) 3196 { 3197 range->mpr_count = 1; 3198 range->mpr_type = MAC_PROPVAL_UINT32; 3199 range->mpr_range_uint32[0].mpur_min = min; 3200 range->mpr_range_uint32[0].mpur_max = max; 3201 } 3202 3203 /* 3204 * Returns information about the specified property, such as default 3205 * values or permissions. 3206 */ 3207 int 3208 mac_prop_info(mac_handle_t mh, mac_prop_id_t id, char *name, 3209 void *default_val, uint_t default_size, mac_propval_range_t *range, 3210 uint_t *perm) 3211 { 3212 mac_prop_info_state_t state; 3213 mac_impl_t *mip = (mac_impl_t *)mh; 3214 uint_t max; 3215 3216 /* 3217 * A property is read/write by default unless the driver says 3218 * otherwise. 3219 */ 3220 if (perm != NULL) 3221 *perm = MAC_PROP_PERM_RW; 3222 3223 if (default_val != NULL) 3224 bzero(default_val, default_size); 3225 3226 /* 3227 * First, handle framework properties for which we don't need to 3228 * involve the driver. 3229 */ 3230 switch (id) { 3231 case MAC_PROP_RESOURCE: 3232 case MAC_PROP_PVID: 3233 case MAC_PROP_LLIMIT: 3234 case MAC_PROP_LDECAY: 3235 return (0); 3236 3237 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3238 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3239 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3240 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3241 if (perm != NULL) 3242 *perm = MAC_PROP_PERM_READ; 3243 return (0); 3244 3245 case MAC_PROP_RXRINGSRANGE: 3246 case MAC_PROP_TXRINGSRANGE: 3247 /* 3248 * Currently, we support range for RX and TX rings properties. 3249 * When we extend this support to maxbw, cpus and priority, 3250 * we should move this to mac_get_resources. 3251 * There is no default value for RX or TX rings. 3252 */ 3253 if ((mip->mi_state_flags & MIS_IS_VNIC) && 3254 mac_is_vnic_primary(mh)) { 3255 /* 3256 * We don't support setting rings for a VLAN 3257 * data link because it shares its ring with the 3258 * primary MAC client. 3259 */ 3260 if (perm != NULL) 3261 *perm = MAC_PROP_PERM_READ; 3262 if (range != NULL) 3263 range->mpr_count = 0; 3264 } else if (range != NULL) { 3265 if (mip->mi_state_flags & MIS_IS_VNIC) 3266 mh = mac_get_lower_mac_handle(mh); 3267 mip = (mac_impl_t *)mh; 3268 if ((id == MAC_PROP_RXRINGSRANGE && 3269 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) || 3270 (id == MAC_PROP_TXRINGSRANGE && 3271 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC)) { 3272 if (id == MAC_PROP_RXRINGSRANGE) { 3273 if ((mac_rxhwlnksavail_get(mh) + 3274 mac_rxhwlnksrsvd_get(mh)) <= 1) { 3275 /* 3276 * doesn't support groups or 3277 * rings 3278 */ 3279 range->mpr_count = 0; 3280 } else { 3281 /* 3282 * supports specifying groups, 3283 * but not rings 3284 */ 3285 _mac_set_range(range, 0, 0); 3286 } 3287 } else { 3288 if ((mac_txhwlnksavail_get(mh) + 3289 mac_txhwlnksrsvd_get(mh)) <= 1) { 3290 /* 3291 * doesn't support groups or 3292 * rings 3293 */ 3294 range->mpr_count = 0; 3295 } else { 3296 /* 3297 * supports specifying groups, 3298 * but not rings 3299 */ 3300 _mac_set_range(range, 0, 0); 3301 } 3302 } 3303 } else { 3304 max = id == MAC_PROP_RXRINGSRANGE ? 3305 mac_rxavail_get(mh) + mac_rxrsvd_get(mh) : 3306 mac_txavail_get(mh) + mac_txrsvd_get(mh); 3307 if (max <= 1) { 3308 /* 3309 * doesn't support groups or 3310 * rings 3311 */ 3312 range->mpr_count = 0; 3313 } else { 3314 /* 3315 * -1 because we have to leave out the 3316 * default ring. 3317 */ 3318 _mac_set_range(range, 1, max - 1); 3319 } 3320 } 3321 } 3322 return (0); 3323 3324 case MAC_PROP_STATUS: 3325 if (perm != NULL) 3326 *perm = MAC_PROP_PERM_READ; 3327 return (0); 3328 } 3329 3330 /* 3331 * Get the property info from the driver if it implements the 3332 * property info entry point. 3333 */ 3334 bzero(&state, sizeof (state)); 3335 3336 if (mip->mi_callbacks->mc_callbacks & MC_PROPINFO) { 3337 state.pr_default = default_val; 3338 state.pr_default_size = default_size; 3339 3340 /* 3341 * The caller specifies the maximum number of ranges 3342 * it can accomodate using mpr_count. We don't touch 3343 * this value until the driver returns from its 3344 * mc_propinfo() callback, and ensure we don't exceed 3345 * this number of range as the driver defines 3346 * supported range from its mc_propinfo(). 3347 * 3348 * pr_range_cur_count keeps track of how many ranges 3349 * were defined by the driver from its mc_propinfo() 3350 * entry point. 3351 * 3352 * On exit, the user-specified range mpr_count returns 3353 * the number of ranges specified by the driver on 3354 * success, or the number of ranges it wanted to 3355 * define if that number of ranges could not be 3356 * accomodated by the specified range structure. In 3357 * the latter case, the caller will be able to 3358 * allocate a larger range structure, and query the 3359 * property again. 3360 */ 3361 state.pr_range_cur_count = 0; 3362 state.pr_range = range; 3363 3364 mip->mi_callbacks->mc_propinfo(mip->mi_driver, name, id, 3365 (mac_prop_info_handle_t)&state); 3366 3367 if (state.pr_flags & MAC_PROP_INFO_RANGE) 3368 range->mpr_count = state.pr_range_cur_count; 3369 3370 /* 3371 * The operation could fail if the buffer supplied by 3372 * the user was too small for the range or default 3373 * value of the property. 3374 */ 3375 if (state.pr_errno != 0) 3376 return (state.pr_errno); 3377 3378 if (perm != NULL && state.pr_flags & MAC_PROP_INFO_PERM) 3379 *perm = state.pr_perm; 3380 } 3381 3382 /* 3383 * The MAC layer may want to provide default values or allowed 3384 * ranges for properties if the driver does not provide a 3385 * property info entry point, or that entry point exists, but 3386 * it did not provide a default value or allowed ranges for 3387 * that property. 3388 */ 3389 switch (id) { 3390 case MAC_PROP_MTU: { 3391 uint32_t sdu; 3392 3393 mac_sdu_get(mh, NULL, &sdu); 3394 3395 if (range != NULL && !(state.pr_flags & 3396 MAC_PROP_INFO_RANGE)) { 3397 /* MTU range */ 3398 _mac_set_range(range, sdu, sdu); 3399 } 3400 3401 if (default_val != NULL && !(state.pr_flags & 3402 MAC_PROP_INFO_DEFAULT)) { 3403 if (mip->mi_info.mi_media == DL_ETHER) 3404 sdu = ETHERMTU; 3405 /* default MTU value */ 3406 bcopy(&sdu, default_val, sizeof (sdu)); 3407 } 3408 } 3409 } 3410 3411 return (0); 3412 } 3413 3414 int 3415 mac_fastpath_disable(mac_handle_t mh) 3416 { 3417 mac_impl_t *mip = (mac_impl_t *)mh; 3418 3419 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3420 return (0); 3421 3422 return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver)); 3423 } 3424 3425 void 3426 mac_fastpath_enable(mac_handle_t mh) 3427 { 3428 mac_impl_t *mip = (mac_impl_t *)mh; 3429 3430 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3431 return; 3432 3433 mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver); 3434 } 3435 3436 void 3437 mac_register_priv_prop(mac_impl_t *mip, char **priv_props) 3438 { 3439 uint_t nprops, i; 3440 3441 if (priv_props == NULL) 3442 return; 3443 3444 nprops = 0; 3445 while (priv_props[nprops] != NULL) 3446 nprops++; 3447 if (nprops == 0) 3448 return; 3449 3450 3451 mip->mi_priv_prop = kmem_zalloc(nprops * sizeof (char *), KM_SLEEP); 3452 3453 for (i = 0; i < nprops; i++) { 3454 mip->mi_priv_prop[i] = kmem_zalloc(MAXLINKPROPNAME, KM_SLEEP); 3455 (void) strlcpy(mip->mi_priv_prop[i], priv_props[i], 3456 MAXLINKPROPNAME); 3457 } 3458 3459 mip->mi_priv_prop_count = nprops; 3460 } 3461 3462 void 3463 mac_unregister_priv_prop(mac_impl_t *mip) 3464 { 3465 uint_t i; 3466 3467 if (mip->mi_priv_prop_count == 0) { 3468 ASSERT(mip->mi_priv_prop == NULL); 3469 return; 3470 } 3471 3472 for (i = 0; i < mip->mi_priv_prop_count; i++) 3473 kmem_free(mip->mi_priv_prop[i], MAXLINKPROPNAME); 3474 kmem_free(mip->mi_priv_prop, mip->mi_priv_prop_count * 3475 sizeof (char *)); 3476 3477 mip->mi_priv_prop = NULL; 3478 mip->mi_priv_prop_count = 0; 3479 } 3480 3481 /* 3482 * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure 3483 * (by invoking mac_rx()) even after processing mac_stop_ring(). In such 3484 * cases if MAC free's the ring structure after mac_stop_ring(), any 3485 * illegal access to the ring structure coming from the driver will panic 3486 * the system. In order to protect the system from such inadverent access, 3487 * we maintain a cache of rings in the mac_impl_t after they get free'd up. 3488 * When packets are received on free'd up rings, MAC (through the generation 3489 * count mechanism) will drop such packets. 3490 */ 3491 static mac_ring_t * 3492 mac_ring_alloc(mac_impl_t *mip) 3493 { 3494 mac_ring_t *ring; 3495 3496 mutex_enter(&mip->mi_ring_lock); 3497 if (mip->mi_ring_freelist != NULL) { 3498 ring = mip->mi_ring_freelist; 3499 mip->mi_ring_freelist = ring->mr_next; 3500 bzero(ring, sizeof (mac_ring_t)); 3501 mutex_exit(&mip->mi_ring_lock); 3502 } else { 3503 mutex_exit(&mip->mi_ring_lock); 3504 ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); 3505 } 3506 ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); 3507 return (ring); 3508 } 3509 3510 static void 3511 mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) 3512 { 3513 ASSERT(ring->mr_state == MR_FREE); 3514 3515 mutex_enter(&mip->mi_ring_lock); 3516 ring->mr_state = MR_FREE; 3517 ring->mr_flag = 0; 3518 ring->mr_next = mip->mi_ring_freelist; 3519 ring->mr_mip = NULL; 3520 mip->mi_ring_freelist = ring; 3521 mac_ring_stat_delete(ring); 3522 mutex_exit(&mip->mi_ring_lock); 3523 } 3524 3525 static void 3526 mac_ring_freeall(mac_impl_t *mip) 3527 { 3528 mac_ring_t *ring_next; 3529 mutex_enter(&mip->mi_ring_lock); 3530 mac_ring_t *ring = mip->mi_ring_freelist; 3531 while (ring != NULL) { 3532 ring_next = ring->mr_next; 3533 kmem_cache_free(mac_ring_cache, ring); 3534 ring = ring_next; 3535 } 3536 mip->mi_ring_freelist = NULL; 3537 mutex_exit(&mip->mi_ring_lock); 3538 } 3539 3540 int 3541 mac_start_ring(mac_ring_t *ring) 3542 { 3543 int rv = 0; 3544 3545 ASSERT(ring->mr_state == MR_FREE); 3546 3547 if (ring->mr_start != NULL) { 3548 rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); 3549 if (rv != 0) 3550 return (rv); 3551 } 3552 3553 ring->mr_state = MR_INUSE; 3554 return (rv); 3555 } 3556 3557 void 3558 mac_stop_ring(mac_ring_t *ring) 3559 { 3560 ASSERT(ring->mr_state == MR_INUSE); 3561 3562 if (ring->mr_stop != NULL) 3563 ring->mr_stop(ring->mr_driver); 3564 3565 ring->mr_state = MR_FREE; 3566 3567 /* 3568 * Increment the ring generation number for this ring. 3569 */ 3570 ring->mr_gen_num++; 3571 } 3572 3573 int 3574 mac_start_group(mac_group_t *group) 3575 { 3576 int rv = 0; 3577 3578 if (group->mrg_start != NULL) 3579 rv = group->mrg_start(group->mrg_driver); 3580 3581 return (rv); 3582 } 3583 3584 void 3585 mac_stop_group(mac_group_t *group) 3586 { 3587 if (group->mrg_stop != NULL) 3588 group->mrg_stop(group->mrg_driver); 3589 } 3590 3591 /* 3592 * Called from mac_start() on the default Rx group. Broadcast and multicast 3593 * packets are received only on the default group. Hence the default group 3594 * needs to be up even if the primary client is not up, for the other groups 3595 * to be functional. We do this by calling this function at mac_start time 3596 * itself. However the broadcast packets that are received can't make their 3597 * way beyond mac_rx until a mac client creates a broadcast flow. 3598 */ 3599 static int 3600 mac_start_group_and_rings(mac_group_t *group) 3601 { 3602 mac_ring_t *ring; 3603 int rv = 0; 3604 3605 ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED); 3606 if ((rv = mac_start_group(group)) != 0) 3607 return (rv); 3608 3609 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 3610 ASSERT(ring->mr_state == MR_FREE); 3611 if ((rv = mac_start_ring(ring)) != 0) 3612 goto error; 3613 ring->mr_classify_type = MAC_SW_CLASSIFIER; 3614 } 3615 return (0); 3616 3617 error: 3618 mac_stop_group_and_rings(group); 3619 return (rv); 3620 } 3621 3622 /* Called from mac_stop on the default Rx group */ 3623 static void 3624 mac_stop_group_and_rings(mac_group_t *group) 3625 { 3626 mac_ring_t *ring; 3627 3628 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 3629 if (ring->mr_state != MR_FREE) { 3630 mac_stop_ring(ring); 3631 ring->mr_flag = 0; 3632 ring->mr_classify_type = MAC_NO_CLASSIFIER; 3633 } 3634 } 3635 mac_stop_group(group); 3636 } 3637 3638 3639 static mac_ring_t * 3640 mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, 3641 mac_capab_rings_t *cap_rings) 3642 { 3643 mac_ring_t *ring, *rnext; 3644 mac_ring_info_t ring_info; 3645 ddi_intr_handle_t ddi_handle; 3646 3647 ring = mac_ring_alloc(mip); 3648 3649 /* Prepare basic information of ring */ 3650 3651 /* 3652 * Ring index is numbered to be unique across a particular device. 3653 * Ring index computation makes following assumptions: 3654 * - For drivers with static grouping (e.g. ixgbe, bge), 3655 * ring index exchanged with the driver (e.g. during mr_rget) 3656 * is unique only across the group the ring belongs to. 3657 * - Drivers with dynamic grouping (e.g. nxge), start 3658 * with single group (mrg_index = 0). 3659 */ 3660 ring->mr_index = group->mrg_index * group->mrg_info.mgi_count + index; 3661 ring->mr_type = group->mrg_type; 3662 ring->mr_gh = (mac_group_handle_t)group; 3663 3664 /* Insert the new ring to the list. */ 3665 ring->mr_next = group->mrg_rings; 3666 group->mrg_rings = ring; 3667 3668 /* Zero to reuse the info data structure */ 3669 bzero(&ring_info, sizeof (ring_info)); 3670 3671 /* Query ring information from driver */ 3672 cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index, 3673 index, &ring_info, (mac_ring_handle_t)ring); 3674 3675 ring->mr_info = ring_info; 3676 3677 /* 3678 * The interrupt handle could be shared among multiple rings. 3679 * Thus if there is a bunch of rings that are sharing an 3680 * interrupt, then only one ring among the bunch will be made 3681 * available for interrupt re-targeting; the rest will have 3682 * ddi_shared flag set to TRUE and would not be available for 3683 * be interrupt re-targeting. 3684 */ 3685 if ((ddi_handle = ring_info.mri_intr.mi_ddi_handle) != NULL) { 3686 rnext = ring->mr_next; 3687 while (rnext != NULL) { 3688 if (rnext->mr_info.mri_intr.mi_ddi_handle == 3689 ddi_handle) { 3690 /* 3691 * If default ring (mr_index == 0) is part 3692 * of a group of rings sharing an 3693 * interrupt, then set ddi_shared flag for 3694 * the default ring and give another ring 3695 * the chance to be re-targeted. 3696 */ 3697 if (rnext->mr_index == 0 && 3698 !rnext->mr_info.mri_intr.mi_ddi_shared) { 3699 rnext->mr_info.mri_intr.mi_ddi_shared = 3700 B_TRUE; 3701 } else { 3702 ring->mr_info.mri_intr.mi_ddi_shared = 3703 B_TRUE; 3704 } 3705 break; 3706 } 3707 rnext = rnext->mr_next; 3708 } 3709 /* 3710 * If rnext is NULL, then no matching ddi_handle was found. 3711 * Rx rings get registered first. So if this is a Tx ring, 3712 * then go through all the Rx rings and see if there is a 3713 * matching ddi handle. 3714 */ 3715 if (rnext == NULL && ring->mr_type == MAC_RING_TYPE_TX) { 3716 mac_compare_ddi_handle(mip->mi_rx_groups, 3717 mip->mi_rx_group_count, ring); 3718 } 3719 } 3720 3721 /* Update ring's status */ 3722 ring->mr_state = MR_FREE; 3723 ring->mr_flag = 0; 3724 3725 /* Update the ring count of the group */ 3726 group->mrg_cur_count++; 3727 3728 /* Create per ring kstats */ 3729 if (ring->mr_stat != NULL) { 3730 ring->mr_mip = mip; 3731 mac_ring_stat_create(ring); 3732 } 3733 3734 return (ring); 3735 } 3736 3737 /* 3738 * Rings are chained together for easy regrouping. 3739 */ 3740 static void 3741 mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, 3742 mac_capab_rings_t *cap_rings) 3743 { 3744 int index; 3745 3746 /* 3747 * Initialize all ring members of this group. Size of zero will not 3748 * enter the loop, so it's safe for initializing an empty group. 3749 */ 3750 for (index = size - 1; index >= 0; index--) 3751 (void) mac_init_ring(mip, group, index, cap_rings); 3752 } 3753 3754 int 3755 mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) 3756 { 3757 mac_capab_rings_t *cap_rings; 3758 mac_group_t *group; 3759 mac_group_t *groups; 3760 mac_group_info_t group_info; 3761 uint_t group_free = 0; 3762 uint_t ring_left; 3763 mac_ring_t *ring; 3764 int g; 3765 int err = 0; 3766 uint_t grpcnt; 3767 boolean_t pseudo_txgrp = B_FALSE; 3768 3769 switch (rtype) { 3770 case MAC_RING_TYPE_RX: 3771 ASSERT(mip->mi_rx_groups == NULL); 3772 3773 cap_rings = &mip->mi_rx_rings_cap; 3774 cap_rings->mr_type = MAC_RING_TYPE_RX; 3775 break; 3776 case MAC_RING_TYPE_TX: 3777 ASSERT(mip->mi_tx_groups == NULL); 3778 3779 cap_rings = &mip->mi_tx_rings_cap; 3780 cap_rings->mr_type = MAC_RING_TYPE_TX; 3781 break; 3782 default: 3783 ASSERT(B_FALSE); 3784 } 3785 3786 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, cap_rings)) 3787 return (0); 3788 grpcnt = cap_rings->mr_gnum; 3789 3790 /* 3791 * If we have multiple TX rings, but only one TX group, we can 3792 * create pseudo TX groups (one per TX ring) in the MAC layer, 3793 * except for an aggr. For an aggr currently we maintain only 3794 * one group with all the rings (for all its ports), going 3795 * forwards we might change this. 3796 */ 3797 if (rtype == MAC_RING_TYPE_TX && 3798 cap_rings->mr_gnum == 0 && cap_rings->mr_rnum > 0 && 3799 (mip->mi_state_flags & MIS_IS_AGGR) == 0) { 3800 /* 3801 * The -1 here is because we create a default TX group 3802 * with all the rings in it. 3803 */ 3804 grpcnt = cap_rings->mr_rnum - 1; 3805 pseudo_txgrp = B_TRUE; 3806 } 3807 3808 /* 3809 * Allocate a contiguous buffer for all groups. 3810 */ 3811 groups = kmem_zalloc(sizeof (mac_group_t) * (grpcnt+ 1), KM_SLEEP); 3812 3813 ring_left = cap_rings->mr_rnum; 3814 3815 /* 3816 * Get all ring groups if any, and get their ring members 3817 * if any. 3818 */ 3819 for (g = 0; g < grpcnt; g++) { 3820 group = groups + g; 3821 3822 /* Prepare basic information of the group */ 3823 group->mrg_index = g; 3824 group->mrg_type = rtype; 3825 group->mrg_state = MAC_GROUP_STATE_UNINIT; 3826 group->mrg_mh = (mac_handle_t)mip; 3827 group->mrg_next = group + 1; 3828 3829 /* Zero to reuse the info data structure */ 3830 bzero(&group_info, sizeof (group_info)); 3831 3832 if (pseudo_txgrp) { 3833 /* 3834 * This is a pseudo group that we created, apart 3835 * from setting the state there is nothing to be 3836 * done. 3837 */ 3838 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 3839 group_free++; 3840 continue; 3841 } 3842 /* Query group information from driver */ 3843 cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, 3844 (mac_group_handle_t)group); 3845 3846 switch (cap_rings->mr_group_type) { 3847 case MAC_GROUP_TYPE_DYNAMIC: 3848 if (cap_rings->mr_gaddring == NULL || 3849 cap_rings->mr_gremring == NULL) { 3850 DTRACE_PROBE3( 3851 mac__init__rings_no_addremring, 3852 char *, mip->mi_name, 3853 mac_group_add_ring_t, 3854 cap_rings->mr_gaddring, 3855 mac_group_add_ring_t, 3856 cap_rings->mr_gremring); 3857 err = EINVAL; 3858 goto bail; 3859 } 3860 3861 switch (rtype) { 3862 case MAC_RING_TYPE_RX: 3863 /* 3864 * The first RX group must have non-zero 3865 * rings, and the following groups must 3866 * have zero rings. 3867 */ 3868 if (g == 0 && group_info.mgi_count == 0) { 3869 DTRACE_PROBE1( 3870 mac__init__rings__rx__def__zero, 3871 char *, mip->mi_name); 3872 err = EINVAL; 3873 goto bail; 3874 } 3875 if (g > 0 && group_info.mgi_count != 0) { 3876 DTRACE_PROBE3( 3877 mac__init__rings__rx__nonzero, 3878 char *, mip->mi_name, 3879 int, g, int, group_info.mgi_count); 3880 err = EINVAL; 3881 goto bail; 3882 } 3883 break; 3884 case MAC_RING_TYPE_TX: 3885 /* 3886 * All TX ring groups must have zero rings. 3887 */ 3888 if (group_info.mgi_count != 0) { 3889 DTRACE_PROBE3( 3890 mac__init__rings__tx__nonzero, 3891 char *, mip->mi_name, 3892 int, g, int, group_info.mgi_count); 3893 err = EINVAL; 3894 goto bail; 3895 } 3896 break; 3897 } 3898 break; 3899 case MAC_GROUP_TYPE_STATIC: 3900 /* 3901 * Note that an empty group is allowed, e.g., an aggr 3902 * would start with an empty group. 3903 */ 3904 break; 3905 default: 3906 /* unknown group type */ 3907 DTRACE_PROBE2(mac__init__rings__unknown__type, 3908 char *, mip->mi_name, 3909 int, cap_rings->mr_group_type); 3910 err = EINVAL; 3911 goto bail; 3912 } 3913 3914 3915 /* 3916 * Driver must register group->mgi_addmac/remmac() for rx groups 3917 * to support multiple MAC addresses. 3918 */ 3919 if (rtype == MAC_RING_TYPE_RX) { 3920 if ((group_info.mgi_addmac == NULL) || 3921 (group_info.mgi_addmac == NULL)) { 3922 goto bail; 3923 } 3924 } 3925 3926 /* Cache driver-supplied information */ 3927 group->mrg_info = group_info; 3928 3929 /* Update the group's status and group count. */ 3930 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 3931 group_free++; 3932 3933 group->mrg_rings = NULL; 3934 group->mrg_cur_count = 0; 3935 mac_init_group(mip, group, group_info.mgi_count, cap_rings); 3936 ring_left -= group_info.mgi_count; 3937 3938 /* The current group size should be equal to default value */ 3939 ASSERT(group->mrg_cur_count == group_info.mgi_count); 3940 } 3941 3942 /* Build up a dummy group for free resources as a pool */ 3943 group = groups + grpcnt; 3944 3945 /* Prepare basic information of the group */ 3946 group->mrg_index = -1; 3947 group->mrg_type = rtype; 3948 group->mrg_state = MAC_GROUP_STATE_UNINIT; 3949 group->mrg_mh = (mac_handle_t)mip; 3950 group->mrg_next = NULL; 3951 3952 /* 3953 * If there are ungrouped rings, allocate a continuous buffer for 3954 * remaining resources. 3955 */ 3956 if (ring_left != 0) { 3957 group->mrg_rings = NULL; 3958 group->mrg_cur_count = 0; 3959 mac_init_group(mip, group, ring_left, cap_rings); 3960 3961 /* The current group size should be equal to ring_left */ 3962 ASSERT(group->mrg_cur_count == ring_left); 3963 3964 ring_left = 0; 3965 3966 /* Update this group's status */ 3967 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 3968 } else 3969 group->mrg_rings = NULL; 3970 3971 ASSERT(ring_left == 0); 3972 3973 bail: 3974 3975 /* Cache other important information to finalize the initialization */ 3976 switch (rtype) { 3977 case MAC_RING_TYPE_RX: 3978 mip->mi_rx_group_type = cap_rings->mr_group_type; 3979 mip->mi_rx_group_count = cap_rings->mr_gnum; 3980 mip->mi_rx_groups = groups; 3981 mip->mi_rx_donor_grp = groups; 3982 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 3983 /* 3984 * The default ring is reserved since it is 3985 * used for sending the broadcast etc. packets. 3986 */ 3987 mip->mi_rxrings_avail = 3988 mip->mi_rx_groups->mrg_cur_count - 1; 3989 mip->mi_rxrings_rsvd = 1; 3990 } 3991 /* 3992 * The default group cannot be reserved. It is used by 3993 * all the clients that do not have an exclusive group. 3994 */ 3995 mip->mi_rxhwclnt_avail = mip->mi_rx_group_count - 1; 3996 mip->mi_rxhwclnt_used = 1; 3997 break; 3998 case MAC_RING_TYPE_TX: 3999 mip->mi_tx_group_type = pseudo_txgrp ? MAC_GROUP_TYPE_DYNAMIC : 4000 cap_rings->mr_group_type; 4001 mip->mi_tx_group_count = grpcnt; 4002 mip->mi_tx_group_free = group_free; 4003 mip->mi_tx_groups = groups; 4004 4005 group = groups + grpcnt; 4006 ring = group->mrg_rings; 4007 /* 4008 * The ring can be NULL in the case of aggr. Aggr will 4009 * have an empty Tx group which will get populated 4010 * later when pseudo Tx rings are added after 4011 * mac_register() is done. 4012 */ 4013 if (ring == NULL) { 4014 ASSERT(mip->mi_state_flags & MIS_IS_AGGR); 4015 /* 4016 * pass the group to aggr so it can add Tx 4017 * rings to the group later. 4018 */ 4019 cap_rings->mr_gget(mip->mi_driver, rtype, 0, NULL, 4020 (mac_group_handle_t)group); 4021 /* 4022 * Even though there are no rings at this time 4023 * (rings will come later), set the group 4024 * state to registered. 4025 */ 4026 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 4027 } else { 4028 /* 4029 * Ring 0 is used as the default one and it could be 4030 * assigned to a client as well. 4031 */ 4032 while ((ring->mr_index != 0) && (ring->mr_next != NULL)) 4033 ring = ring->mr_next; 4034 ASSERT(ring->mr_index == 0); 4035 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4036 } 4037 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) 4038 mip->mi_txrings_avail = group->mrg_cur_count - 1; 4039 /* 4040 * The default ring cannot be reserved. 4041 */ 4042 mip->mi_txrings_rsvd = 1; 4043 /* 4044 * The default group cannot be reserved. It will be shared 4045 * by clients that do not have an exclusive group. 4046 */ 4047 mip->mi_txhwclnt_avail = mip->mi_tx_group_count; 4048 mip->mi_txhwclnt_used = 1; 4049 break; 4050 default: 4051 ASSERT(B_FALSE); 4052 } 4053 4054 if (err != 0) 4055 mac_free_rings(mip, rtype); 4056 4057 return (err); 4058 } 4059 4060 /* 4061 * The ddi interrupt handle could be shared amoung rings. If so, compare 4062 * the new ring's ddi handle with the existing ones and set ddi_shared 4063 * flag. 4064 */ 4065 void 4066 mac_compare_ddi_handle(mac_group_t *groups, uint_t grpcnt, mac_ring_t *cring) 4067 { 4068 mac_group_t *group; 4069 mac_ring_t *ring; 4070 ddi_intr_handle_t ddi_handle; 4071 int g; 4072 4073 ddi_handle = cring->mr_info.mri_intr.mi_ddi_handle; 4074 for (g = 0; g < grpcnt; g++) { 4075 group = groups + g; 4076 for (ring = group->mrg_rings; ring != NULL; 4077 ring = ring->mr_next) { 4078 if (ring == cring) 4079 continue; 4080 if (ring->mr_info.mri_intr.mi_ddi_handle == 4081 ddi_handle) { 4082 if (cring->mr_type == MAC_RING_TYPE_RX && 4083 ring->mr_index == 0 && 4084 !ring->mr_info.mri_intr.mi_ddi_shared) { 4085 ring->mr_info.mri_intr.mi_ddi_shared = 4086 B_TRUE; 4087 } else { 4088 cring->mr_info.mri_intr.mi_ddi_shared = 4089 B_TRUE; 4090 } 4091 return; 4092 } 4093 } 4094 } 4095 } 4096 4097 /* 4098 * Called to free all groups of particular type (RX or TX). It's assumed that 4099 * no clients are using these groups. 4100 */ 4101 void 4102 mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) 4103 { 4104 mac_group_t *group, *groups; 4105 uint_t group_count; 4106 4107 switch (rtype) { 4108 case MAC_RING_TYPE_RX: 4109 if (mip->mi_rx_groups == NULL) 4110 return; 4111 4112 groups = mip->mi_rx_groups; 4113 group_count = mip->mi_rx_group_count; 4114 4115 mip->mi_rx_groups = NULL; 4116 mip->mi_rx_donor_grp = NULL; 4117 mip->mi_rx_group_count = 0; 4118 break; 4119 case MAC_RING_TYPE_TX: 4120 ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free); 4121 4122 if (mip->mi_tx_groups == NULL) 4123 return; 4124 4125 groups = mip->mi_tx_groups; 4126 group_count = mip->mi_tx_group_count; 4127 4128 mip->mi_tx_groups = NULL; 4129 mip->mi_tx_group_count = 0; 4130 mip->mi_tx_group_free = 0; 4131 mip->mi_default_tx_ring = NULL; 4132 break; 4133 default: 4134 ASSERT(B_FALSE); 4135 } 4136 4137 for (group = groups; group != NULL; group = group->mrg_next) { 4138 mac_ring_t *ring; 4139 4140 if (group->mrg_cur_count == 0) 4141 continue; 4142 4143 ASSERT(group->mrg_rings != NULL); 4144 4145 while ((ring = group->mrg_rings) != NULL) { 4146 group->mrg_rings = ring->mr_next; 4147 mac_ring_free(mip, ring); 4148 } 4149 } 4150 4151 /* Free all the cached rings */ 4152 mac_ring_freeall(mip); 4153 /* Free the block of group data strutures */ 4154 kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); 4155 } 4156 4157 /* 4158 * Associate a MAC address with a receive group. 4159 * 4160 * The return value of this function should always be checked properly, because 4161 * any type of failure could cause unexpected results. A group can be added 4162 * or removed with a MAC address only after it has been reserved. Ideally, 4163 * a successful reservation always leads to calling mac_group_addmac() to 4164 * steer desired traffic. Failure of adding an unicast MAC address doesn't 4165 * always imply that the group is functioning abnormally. 4166 * 4167 * Currently this function is called everywhere, and it reflects assumptions 4168 * about MAC addresses in the implementation. CR 6735196. 4169 */ 4170 int 4171 mac_group_addmac(mac_group_t *group, const uint8_t *addr) 4172 { 4173 ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 4174 ASSERT(group->mrg_info.mgi_addmac != NULL); 4175 4176 return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); 4177 } 4178 4179 /* 4180 * Remove the association between MAC address and receive group. 4181 */ 4182 int 4183 mac_group_remmac(mac_group_t *group, const uint8_t *addr) 4184 { 4185 ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 4186 ASSERT(group->mrg_info.mgi_remmac != NULL); 4187 4188 return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); 4189 } 4190 4191 /* 4192 * This is the entry point for packets transmitted through the bridging code. 4193 * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh' 4194 * pointer may be NULL to select the default ring. 4195 */ 4196 mblk_t * 4197 mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) 4198 { 4199 mac_handle_t mh; 4200 4201 /* 4202 * Once we take a reference on the bridge link, the bridge 4203 * module itself can't unload, so the callback pointers are 4204 * stable. 4205 */ 4206 mutex_enter(&mip->mi_bridge_lock); 4207 if ((mh = mip->mi_bridge_link) != NULL) 4208 mac_bridge_ref_cb(mh, B_TRUE); 4209 mutex_exit(&mip->mi_bridge_lock); 4210 if (mh == NULL) { 4211 MAC_RING_TX(mip, rh, mp, mp); 4212 } else { 4213 mp = mac_bridge_tx_cb(mh, rh, mp); 4214 mac_bridge_ref_cb(mh, B_FALSE); 4215 } 4216 4217 return (mp); 4218 } 4219 4220 /* 4221 * Find a ring from its index. 4222 */ 4223 mac_ring_handle_t 4224 mac_find_ring(mac_group_handle_t gh, int index) 4225 { 4226 mac_group_t *group = (mac_group_t *)gh; 4227 mac_ring_t *ring = group->mrg_rings; 4228 4229 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) 4230 if (ring->mr_index == index) 4231 break; 4232 4233 return ((mac_ring_handle_t)ring); 4234 } 4235 /* 4236 * Add a ring to an existing group. 4237 * 4238 * The ring must be either passed directly (for example if the ring 4239 * movement is initiated by the framework), or specified through a driver 4240 * index (for example when the ring is added by the driver. 4241 * 4242 * The caller needs to call mac_perim_enter() before calling this function. 4243 */ 4244 int 4245 i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) 4246 { 4247 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4248 mac_capab_rings_t *cap_rings; 4249 boolean_t driver_call = (ring == NULL); 4250 mac_group_type_t group_type; 4251 int ret = 0; 4252 flow_entry_t *flent; 4253 4254 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4255 4256 switch (group->mrg_type) { 4257 case MAC_RING_TYPE_RX: 4258 cap_rings = &mip->mi_rx_rings_cap; 4259 group_type = mip->mi_rx_group_type; 4260 break; 4261 case MAC_RING_TYPE_TX: 4262 cap_rings = &mip->mi_tx_rings_cap; 4263 group_type = mip->mi_tx_group_type; 4264 break; 4265 default: 4266 ASSERT(B_FALSE); 4267 } 4268 4269 /* 4270 * There should be no ring with the same ring index in the target 4271 * group. 4272 */ 4273 ASSERT(mac_find_ring((mac_group_handle_t)group, 4274 driver_call ? index : ring->mr_index) == NULL); 4275 4276 if (driver_call) { 4277 /* 4278 * The function is called as a result of a request from 4279 * a driver to add a ring to an existing group, for example 4280 * from the aggregation driver. Allocate a new mac_ring_t 4281 * for that ring. 4282 */ 4283 ring = mac_init_ring(mip, group, index, cap_rings); 4284 ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT); 4285 } else { 4286 /* 4287 * The function is called as a result of a MAC layer request 4288 * to add a ring to an existing group. In this case the 4289 * ring is being moved between groups, which requires 4290 * the underlying driver to support dynamic grouping, 4291 * and the mac_ring_t already exists. 4292 */ 4293 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4294 ASSERT(group->mrg_driver == NULL || 4295 cap_rings->mr_gaddring != NULL); 4296 ASSERT(ring->mr_gh == NULL); 4297 } 4298 4299 /* 4300 * At this point the ring should not be in use, and it should be 4301 * of the right for the target group. 4302 */ 4303 ASSERT(ring->mr_state < MR_INUSE); 4304 ASSERT(ring->mr_srs == NULL); 4305 ASSERT(ring->mr_type == group->mrg_type); 4306 4307 if (!driver_call) { 4308 /* 4309 * Add the driver level hardware ring if the process was not 4310 * initiated by the driver, and the target group is not the 4311 * group. 4312 */ 4313 if (group->mrg_driver != NULL) { 4314 cap_rings->mr_gaddring(group->mrg_driver, 4315 ring->mr_driver, ring->mr_type); 4316 } 4317 4318 /* 4319 * Insert the ring ahead existing rings. 4320 */ 4321 ring->mr_next = group->mrg_rings; 4322 group->mrg_rings = ring; 4323 ring->mr_gh = (mac_group_handle_t)group; 4324 group->mrg_cur_count++; 4325 } 4326 4327 /* 4328 * If the group has not been actively used, we're done. 4329 */ 4330 if (group->mrg_index != -1 && 4331 group->mrg_state < MAC_GROUP_STATE_RESERVED) 4332 return (0); 4333 4334 /* 4335 * Start the ring if needed. Failure causes to undo the grouping action. 4336 */ 4337 if (ring->mr_state != MR_INUSE) { 4338 if ((ret = mac_start_ring(ring)) != 0) { 4339 if (!driver_call) { 4340 cap_rings->mr_gremring(group->mrg_driver, 4341 ring->mr_driver, ring->mr_type); 4342 } 4343 group->mrg_cur_count--; 4344 group->mrg_rings = ring->mr_next; 4345 4346 ring->mr_gh = NULL; 4347 4348 if (driver_call) 4349 mac_ring_free(mip, ring); 4350 4351 return (ret); 4352 } 4353 } 4354 4355 /* 4356 * Set up SRS/SR according to the ring type. 4357 */ 4358 switch (ring->mr_type) { 4359 case MAC_RING_TYPE_RX: 4360 /* 4361 * Setup SRS on top of the new ring if the group is 4362 * reserved for someones exclusive use. 4363 */ 4364 if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { 4365 mac_client_impl_t *mcip; 4366 4367 mcip = MAC_GROUP_ONLY_CLIENT(group); 4368 /* 4369 * Even though this group is reserved we migth still 4370 * have multiple clients, i.e a VLAN shares the 4371 * group with the primary mac client. 4372 */ 4373 if (mcip != NULL) { 4374 flent = mcip->mci_flent; 4375 ASSERT(flent->fe_rx_srs_cnt > 0); 4376 mac_rx_srs_group_setup(mcip, flent, SRST_LINK); 4377 mac_fanout_setup(mcip, flent, 4378 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, 4379 mcip, NULL, NULL); 4380 } else { 4381 ring->mr_classify_type = MAC_SW_CLASSIFIER; 4382 } 4383 } 4384 break; 4385 case MAC_RING_TYPE_TX: 4386 { 4387 mac_grp_client_t *mgcp = group->mrg_clients; 4388 mac_client_impl_t *mcip; 4389 mac_soft_ring_set_t *mac_srs; 4390 mac_srs_tx_t *tx; 4391 4392 if (MAC_GROUP_NO_CLIENT(group)) { 4393 if (ring->mr_state == MR_INUSE) 4394 mac_stop_ring(ring); 4395 ring->mr_flag = 0; 4396 break; 4397 } 4398 /* 4399 * If the rings are being moved to a group that has 4400 * clients using it, then add the new rings to the 4401 * clients SRS. 4402 */ 4403 while (mgcp != NULL) { 4404 boolean_t is_aggr; 4405 4406 mcip = mgcp->mgc_client; 4407 flent = mcip->mci_flent; 4408 is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); 4409 mac_srs = MCIP_TX_SRS(mcip); 4410 tx = &mac_srs->srs_tx; 4411 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4412 /* 4413 * If we are growing from 1 to multiple rings. 4414 */ 4415 if (tx->st_mode == SRS_TX_BW || 4416 tx->st_mode == SRS_TX_SERIALIZE || 4417 tx->st_mode == SRS_TX_DEFAULT) { 4418 mac_ring_t *tx_ring = tx->st_arg2; 4419 4420 tx->st_arg2 = NULL; 4421 mac_tx_srs_stat_recreate(mac_srs, B_TRUE); 4422 mac_tx_srs_add_ring(mac_srs, tx_ring); 4423 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4424 tx->st_mode = is_aggr ? SRS_TX_BW_AGGR : 4425 SRS_TX_BW_FANOUT; 4426 } else { 4427 tx->st_mode = is_aggr ? SRS_TX_AGGR : 4428 SRS_TX_FANOUT; 4429 } 4430 tx->st_func = mac_tx_get_func(tx->st_mode); 4431 } 4432 mac_tx_srs_add_ring(mac_srs, ring); 4433 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 4434 mac_rx_deliver, mcip, NULL, NULL); 4435 mac_tx_client_restart((mac_client_handle_t)mcip); 4436 mgcp = mgcp->mgc_next; 4437 } 4438 break; 4439 } 4440 default: 4441 ASSERT(B_FALSE); 4442 } 4443 /* 4444 * For aggr, the default ring will be NULL to begin with. If it 4445 * is NULL, then pick the first ring that gets added as the 4446 * default ring. Any ring in an aggregation can be removed at 4447 * any time (by the user action of removing a link) and if the 4448 * current default ring gets removed, then a new one gets 4449 * picked (see i_mac_group_rem_ring()). 4450 */ 4451 if (mip->mi_state_flags & MIS_IS_AGGR && 4452 mip->mi_default_tx_ring == NULL && 4453 ring->mr_type == MAC_RING_TYPE_TX) { 4454 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4455 } 4456 4457 MAC_RING_UNMARK(ring, MR_INCIPIENT); 4458 return (0); 4459 } 4460 4461 /* 4462 * Remove a ring from it's current group. MAC internal function for dynamic 4463 * grouping. 4464 * 4465 * The caller needs to call mac_perim_enter() before calling this function. 4466 */ 4467 void 4468 i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, 4469 boolean_t driver_call) 4470 { 4471 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4472 mac_capab_rings_t *cap_rings = NULL; 4473 mac_group_type_t group_type; 4474 4475 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4476 4477 ASSERT(mac_find_ring((mac_group_handle_t)group, 4478 ring->mr_index) == (mac_ring_handle_t)ring); 4479 ASSERT((mac_group_t *)ring->mr_gh == group); 4480 ASSERT(ring->mr_type == group->mrg_type); 4481 4482 if (ring->mr_state == MR_INUSE) 4483 mac_stop_ring(ring); 4484 switch (ring->mr_type) { 4485 case MAC_RING_TYPE_RX: 4486 group_type = mip->mi_rx_group_type; 4487 cap_rings = &mip->mi_rx_rings_cap; 4488 4489 /* 4490 * Only hardware classified packets hold a reference to the 4491 * ring all the way up the Rx path. mac_rx_srs_remove() 4492 * will take care of quiescing the Rx path and removing the 4493 * SRS. The software classified path neither holds a reference 4494 * nor any association with the ring in mac_rx. 4495 */ 4496 if (ring->mr_srs != NULL) { 4497 mac_rx_srs_remove(ring->mr_srs); 4498 ring->mr_srs = NULL; 4499 } 4500 4501 break; 4502 case MAC_RING_TYPE_TX: 4503 { 4504 mac_grp_client_t *mgcp; 4505 mac_client_impl_t *mcip; 4506 mac_soft_ring_set_t *mac_srs; 4507 mac_srs_tx_t *tx; 4508 mac_ring_t *rem_ring; 4509 mac_group_t *defgrp; 4510 uint_t ring_info = 0; 4511 4512 /* 4513 * For TX this function is invoked in three 4514 * cases: 4515 * 4516 * 1) In the case of a failure during the 4517 * initial creation of a group when a share is 4518 * associated with a MAC client. So the SRS is not 4519 * yet setup, and will be setup later after the 4520 * group has been reserved and populated. 4521 * 4522 * 2) From mac_release_tx_group() when freeing 4523 * a TX SRS. 4524 * 4525 * 3) In the case of aggr, when a port gets removed, 4526 * the pseudo Tx rings that it exposed gets removed. 4527 * 4528 * In the first two cases the SRS and its soft 4529 * rings are already quiesced. 4530 */ 4531 if (driver_call) { 4532 mac_client_impl_t *mcip; 4533 mac_soft_ring_set_t *mac_srs; 4534 mac_soft_ring_t *sringp; 4535 mac_srs_tx_t *srs_tx; 4536 4537 if (mip->mi_state_flags & MIS_IS_AGGR && 4538 mip->mi_default_tx_ring == 4539 (mac_ring_handle_t)ring) { 4540 /* pick a new default Tx ring */ 4541 mip->mi_default_tx_ring = 4542 (group->mrg_rings != ring) ? 4543 (mac_ring_handle_t)group->mrg_rings : 4544 (mac_ring_handle_t)(ring->mr_next); 4545 } 4546 /* Presently only aggr case comes here */ 4547 if (group->mrg_state != MAC_GROUP_STATE_RESERVED) 4548 break; 4549 4550 mcip = MAC_GROUP_ONLY_CLIENT(group); 4551 ASSERT(mcip != NULL); 4552 ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); 4553 mac_srs = MCIP_TX_SRS(mcip); 4554 ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || 4555 mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); 4556 srs_tx = &mac_srs->srs_tx; 4557 /* 4558 * Wakeup any callers blocked on this 4559 * Tx ring due to flow control. 4560 */ 4561 sringp = srs_tx->st_soft_rings[ring->mr_index]; 4562 ASSERT(sringp != NULL); 4563 mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)sringp); 4564 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4565 mac_tx_srs_del_ring(mac_srs, ring); 4566 mac_tx_client_restart((mac_client_handle_t)mcip); 4567 break; 4568 } 4569 ASSERT(ring != (mac_ring_t *)mip->mi_default_tx_ring); 4570 group_type = mip->mi_tx_group_type; 4571 cap_rings = &mip->mi_tx_rings_cap; 4572 /* 4573 * See if we need to take it out of the MAC clients using 4574 * this group 4575 */ 4576 if (MAC_GROUP_NO_CLIENT(group)) 4577 break; 4578 mgcp = group->mrg_clients; 4579 defgrp = MAC_DEFAULT_TX_GROUP(mip); 4580 while (mgcp != NULL) { 4581 mcip = mgcp->mgc_client; 4582 mac_srs = MCIP_TX_SRS(mcip); 4583 tx = &mac_srs->srs_tx; 4584 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4585 /* 4586 * If we are here when removing rings from the 4587 * defgroup, mac_reserve_tx_ring would have 4588 * already deleted the ring from the MAC 4589 * clients in the group. 4590 */ 4591 if (group != defgrp) { 4592 mac_tx_invoke_callbacks(mcip, 4593 (mac_tx_cookie_t) 4594 mac_tx_srs_get_soft_ring(mac_srs, ring)); 4595 mac_tx_srs_del_ring(mac_srs, ring); 4596 } 4597 /* 4598 * Additionally, if we are left with only 4599 * one ring in the group after this, we need 4600 * to modify the mode etc. to. (We haven't 4601 * yet taken the ring out, so we check with 2). 4602 */ 4603 if (group->mrg_cur_count == 2) { 4604 if (ring->mr_next == NULL) 4605 rem_ring = group->mrg_rings; 4606 else 4607 rem_ring = ring->mr_next; 4608 mac_tx_invoke_callbacks(mcip, 4609 (mac_tx_cookie_t) 4610 mac_tx_srs_get_soft_ring(mac_srs, 4611 rem_ring)); 4612 mac_tx_srs_del_ring(mac_srs, rem_ring); 4613 if (rem_ring->mr_state != MR_INUSE) { 4614 (void) mac_start_ring(rem_ring); 4615 } 4616 tx->st_arg2 = (void *)rem_ring; 4617 mac_tx_srs_stat_recreate(mac_srs, B_FALSE); 4618 ring_info = mac_hwring_getinfo( 4619 (mac_ring_handle_t)rem_ring); 4620 /* 4621 * We are shrinking from multiple 4622 * to 1 ring. 4623 */ 4624 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4625 tx->st_mode = SRS_TX_BW; 4626 } else if (mac_tx_serialize || 4627 (ring_info & MAC_RING_TX_SERIALIZE)) { 4628 tx->st_mode = SRS_TX_SERIALIZE; 4629 } else { 4630 tx->st_mode = SRS_TX_DEFAULT; 4631 } 4632 tx->st_func = mac_tx_get_func(tx->st_mode); 4633 } 4634 mac_tx_client_restart((mac_client_handle_t)mcip); 4635 mgcp = mgcp->mgc_next; 4636 } 4637 break; 4638 } 4639 default: 4640 ASSERT(B_FALSE); 4641 } 4642 4643 /* 4644 * Remove the ring from the group. 4645 */ 4646 if (ring == group->mrg_rings) 4647 group->mrg_rings = ring->mr_next; 4648 else { 4649 mac_ring_t *pre; 4650 4651 pre = group->mrg_rings; 4652 while (pre->mr_next != ring) 4653 pre = pre->mr_next; 4654 pre->mr_next = ring->mr_next; 4655 } 4656 group->mrg_cur_count--; 4657 4658 if (!driver_call) { 4659 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4660 ASSERT(group->mrg_driver == NULL || 4661 cap_rings->mr_gremring != NULL); 4662 4663 /* 4664 * Remove the driver level hardware ring. 4665 */ 4666 if (group->mrg_driver != NULL) { 4667 cap_rings->mr_gremring(group->mrg_driver, 4668 ring->mr_driver, ring->mr_type); 4669 } 4670 } 4671 4672 ring->mr_gh = NULL; 4673 if (driver_call) 4674 mac_ring_free(mip, ring); 4675 else 4676 ring->mr_flag = 0; 4677 } 4678 4679 /* 4680 * Move a ring to the target group. If needed, remove the ring from the group 4681 * that it currently belongs to. 4682 * 4683 * The caller need to enter MAC's perimeter by calling mac_perim_enter(). 4684 */ 4685 static int 4686 mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring) 4687 { 4688 mac_group_t *s_group = (mac_group_t *)ring->mr_gh; 4689 int rv; 4690 4691 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4692 ASSERT(d_group != NULL); 4693 ASSERT(s_group->mrg_mh == d_group->mrg_mh); 4694 4695 if (s_group == d_group) 4696 return (0); 4697 4698 /* 4699 * Remove it from current group first. 4700 */ 4701 if (s_group != NULL) 4702 i_mac_group_rem_ring(s_group, ring, B_FALSE); 4703 4704 /* 4705 * Add it to the new group. 4706 */ 4707 rv = i_mac_group_add_ring(d_group, ring, 0); 4708 if (rv != 0) { 4709 /* 4710 * Failed to add ring back to source group. If 4711 * that fails, the ring is stuck in limbo, log message. 4712 */ 4713 if (i_mac_group_add_ring(s_group, ring, 0)) { 4714 cmn_err(CE_WARN, "%s: failed to move ring %p\n", 4715 mip->mi_name, (void *)ring); 4716 } 4717 } 4718 4719 return (rv); 4720 } 4721 4722 /* 4723 * Find a MAC address according to its value. 4724 */ 4725 mac_address_t * 4726 mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr) 4727 { 4728 mac_address_t *map; 4729 4730 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4731 4732 for (map = mip->mi_addresses; map != NULL; map = map->ma_next) { 4733 if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) 4734 break; 4735 } 4736 4737 return (map); 4738 } 4739 4740 /* 4741 * Check whether the MAC address is shared by multiple clients. 4742 */ 4743 boolean_t 4744 mac_check_macaddr_shared(mac_address_t *map) 4745 { 4746 ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip)); 4747 4748 return (map->ma_nusers > 1); 4749 } 4750 4751 /* 4752 * Remove the specified MAC address from the MAC address list and free it. 4753 */ 4754 static void 4755 mac_free_macaddr(mac_address_t *map) 4756 { 4757 mac_impl_t *mip = map->ma_mip; 4758 4759 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4760 ASSERT(mip->mi_addresses != NULL); 4761 4762 map = mac_find_macaddr(mip, map->ma_addr); 4763 4764 ASSERT(map != NULL); 4765 ASSERT(map->ma_nusers == 0); 4766 4767 if (map == mip->mi_addresses) { 4768 mip->mi_addresses = map->ma_next; 4769 } else { 4770 mac_address_t *pre; 4771 4772 pre = mip->mi_addresses; 4773 while (pre->ma_next != map) 4774 pre = pre->ma_next; 4775 pre->ma_next = map->ma_next; 4776 } 4777 4778 kmem_free(map, sizeof (mac_address_t)); 4779 } 4780 4781 /* 4782 * Add a MAC address reference for a client. If the desired MAC address 4783 * exists, add a reference to it. Otherwise, add the new address by adding 4784 * it to a reserved group or setting promiscuous mode. Won't try different 4785 * group is the group is non-NULL, so the caller must explictly share 4786 * default group when needed. 4787 * 4788 * Note, the primary MAC address is initialized at registration time, so 4789 * to add it to default group only need to activate it if its reference 4790 * count is still zero. Also, some drivers may not have advertised RINGS 4791 * capability. 4792 */ 4793 int 4794 mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, 4795 boolean_t use_hw) 4796 { 4797 mac_address_t *map; 4798 int err = 0; 4799 boolean_t allocated_map = B_FALSE; 4800 4801 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4802 4803 map = mac_find_macaddr(mip, mac_addr); 4804 4805 /* 4806 * If the new MAC address has not been added. Allocate a new one 4807 * and set it up. 4808 */ 4809 if (map == NULL) { 4810 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 4811 map->ma_len = mip->mi_type->mt_addr_length; 4812 bcopy(mac_addr, map->ma_addr, map->ma_len); 4813 map->ma_nusers = 0; 4814 map->ma_group = group; 4815 map->ma_mip = mip; 4816 4817 /* add the new MAC address to the head of the address list */ 4818 map->ma_next = mip->mi_addresses; 4819 mip->mi_addresses = map; 4820 4821 allocated_map = B_TRUE; 4822 } 4823 4824 ASSERT(map->ma_group == NULL || map->ma_group == group); 4825 if (map->ma_group == NULL) 4826 map->ma_group = group; 4827 4828 /* 4829 * If the MAC address is already in use, simply account for the 4830 * new client. 4831 */ 4832 if (map->ma_nusers++ > 0) 4833 return (0); 4834 4835 /* 4836 * Activate this MAC address by adding it to the reserved group. 4837 */ 4838 if (group != NULL) { 4839 err = mac_group_addmac(group, (const uint8_t *)mac_addr); 4840 if (err == 0) { 4841 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 4842 return (0); 4843 } 4844 } 4845 4846 /* 4847 * The MAC address addition failed. If the client requires a 4848 * hardware classified MAC address, fail the operation. 4849 */ 4850 if (use_hw) { 4851 err = ENOSPC; 4852 goto bail; 4853 } 4854 4855 /* 4856 * Try promiscuous mode. 4857 * 4858 * For drivers that don't advertise RINGS capability, do 4859 * nothing for the primary address. 4860 */ 4861 if ((group == NULL) && 4862 (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { 4863 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 4864 return (0); 4865 } 4866 4867 /* 4868 * Enable promiscuous mode in order to receive traffic 4869 * to the new MAC address. 4870 */ 4871 if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) { 4872 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; 4873 return (0); 4874 } 4875 4876 /* 4877 * Free the MAC address that could not be added. Don't free 4878 * a pre-existing address, it could have been the entry 4879 * for the primary MAC address which was pre-allocated by 4880 * mac_init_macaddr(), and which must remain on the list. 4881 */ 4882 bail: 4883 map->ma_nusers--; 4884 if (allocated_map) 4885 mac_free_macaddr(map); 4886 return (err); 4887 } 4888 4889 /* 4890 * Remove a reference to a MAC address. This may cause to remove the MAC 4891 * address from an associated group or to turn off promiscuous mode. 4892 * The caller needs to handle the failure properly. 4893 */ 4894 int 4895 mac_remove_macaddr(mac_address_t *map) 4896 { 4897 mac_impl_t *mip = map->ma_mip; 4898 int err = 0; 4899 4900 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4901 4902 ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); 4903 4904 /* 4905 * If it's not the last client using this MAC address, only update 4906 * the MAC clients count. 4907 */ 4908 if (--map->ma_nusers > 0) 4909 return (0); 4910 4911 /* 4912 * The MAC address is no longer used by any MAC client, so remove 4913 * it from its associated group, or turn off promiscuous mode 4914 * if it was enabled for the MAC address. 4915 */ 4916 switch (map->ma_type) { 4917 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 4918 /* 4919 * Don't free the preset primary address for drivers that 4920 * don't advertise RINGS capability. 4921 */ 4922 if (map->ma_group == NULL) 4923 return (0); 4924 4925 err = mac_group_remmac(map->ma_group, map->ma_addr); 4926 if (err == 0) 4927 map->ma_group = NULL; 4928 break; 4929 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 4930 err = i_mac_promisc_set(mip, B_FALSE); 4931 break; 4932 default: 4933 ASSERT(B_FALSE); 4934 } 4935 4936 if (err != 0) 4937 return (err); 4938 4939 /* 4940 * We created MAC address for the primary one at registration, so we 4941 * won't free it here. mac_fini_macaddr() will take care of it. 4942 */ 4943 if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0) 4944 mac_free_macaddr(map); 4945 4946 return (0); 4947 } 4948 4949 /* 4950 * Update an existing MAC address. The caller need to make sure that the new 4951 * value has not been used. 4952 */ 4953 int 4954 mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) 4955 { 4956 mac_impl_t *mip = map->ma_mip; 4957 int err = 0; 4958 4959 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4960 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 4961 4962 switch (map->ma_type) { 4963 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 4964 /* 4965 * Update the primary address for drivers that are not 4966 * RINGS capable. 4967 */ 4968 if (mip->mi_rx_groups == NULL) { 4969 err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) 4970 mac_addr); 4971 if (err != 0) 4972 return (err); 4973 break; 4974 } 4975 4976 /* 4977 * If this MAC address is not currently in use, 4978 * simply break out and update the value. 4979 */ 4980 if (map->ma_nusers == 0) 4981 break; 4982 4983 /* 4984 * Need to replace the MAC address associated with a group. 4985 */ 4986 err = mac_group_remmac(map->ma_group, map->ma_addr); 4987 if (err != 0) 4988 return (err); 4989 4990 err = mac_group_addmac(map->ma_group, mac_addr); 4991 4992 /* 4993 * Failure hints hardware error. The MAC layer needs to 4994 * have error notification facility to handle this. 4995 * Now, simply try to restore the value. 4996 */ 4997 if (err != 0) 4998 (void) mac_group_addmac(map->ma_group, map->ma_addr); 4999 5000 break; 5001 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 5002 /* 5003 * Need to do nothing more if in promiscuous mode. 5004 */ 5005 break; 5006 default: 5007 ASSERT(B_FALSE); 5008 } 5009 5010 /* 5011 * Successfully replaced the MAC address. 5012 */ 5013 if (err == 0) 5014 bcopy(mac_addr, map->ma_addr, map->ma_len); 5015 5016 return (err); 5017 } 5018 5019 /* 5020 * Freshen the MAC address with new value. Its caller must have updated the 5021 * hardware MAC address before calling this function. 5022 * This funcitons is supposed to be used to handle the MAC address change 5023 * notification from underlying drivers. 5024 */ 5025 void 5026 mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr) 5027 { 5028 mac_impl_t *mip = map->ma_mip; 5029 5030 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5031 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 5032 5033 /* 5034 * Freshen the MAC address with new value. 5035 */ 5036 bcopy(mac_addr, map->ma_addr, map->ma_len); 5037 bcopy(mac_addr, mip->mi_addr, map->ma_len); 5038 5039 /* 5040 * Update all MAC clients that share this MAC address. 5041 */ 5042 mac_unicast_update_clients(mip, map); 5043 } 5044 5045 /* 5046 * Set up the primary MAC address. 5047 */ 5048 void 5049 mac_init_macaddr(mac_impl_t *mip) 5050 { 5051 mac_address_t *map; 5052 5053 /* 5054 * The reference count is initialized to zero, until it's really 5055 * activated. 5056 */ 5057 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 5058 map->ma_len = mip->mi_type->mt_addr_length; 5059 bcopy(mip->mi_addr, map->ma_addr, map->ma_len); 5060 5061 /* 5062 * If driver advertises RINGS capability, it shouldn't have initialized 5063 * its primary MAC address. For other drivers, including VNIC, the 5064 * primary address must work after registration. 5065 */ 5066 if (mip->mi_rx_groups == NULL) 5067 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 5068 5069 map->ma_mip = mip; 5070 5071 mip->mi_addresses = map; 5072 } 5073 5074 /* 5075 * Clean up the primary MAC address. Note, only one primary MAC address 5076 * is allowed. All other MAC addresses must have been freed appropriately. 5077 */ 5078 void 5079 mac_fini_macaddr(mac_impl_t *mip) 5080 { 5081 mac_address_t *map = mip->mi_addresses; 5082 5083 if (map == NULL) 5084 return; 5085 5086 /* 5087 * If mi_addresses is initialized, there should be exactly one 5088 * entry left on the list with no users. 5089 */ 5090 ASSERT(map->ma_nusers == 0); 5091 ASSERT(map->ma_next == NULL); 5092 5093 kmem_free(map, sizeof (mac_address_t)); 5094 mip->mi_addresses = NULL; 5095 } 5096 5097 /* 5098 * Logging related functions. 5099 * 5100 * Note that Kernel statistics have been extended to maintain fine 5101 * granularity of statistics viz. hardware lane, software lane, fanout 5102 * stats etc. However, extended accounting continues to support only 5103 * aggregate statistics like before. 5104 */ 5105 5106 /* Write the Flow description to the log file */ 5107 int 5108 mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) 5109 { 5110 flow_desc_t *fdesc; 5111 mac_resource_props_t *mrp; 5112 net_desc_t ndesc; 5113 5114 bzero(&ndesc, sizeof (net_desc_t)); 5115 5116 /* 5117 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5118 * Updates to the fe_flow_desc are done under the fe_lock 5119 */ 5120 mutex_enter(&flent->fe_lock); 5121 fdesc = &flent->fe_flow_desc; 5122 mrp = &flent->fe_resource_props; 5123 5124 ndesc.nd_name = flent->fe_flow_name; 5125 ndesc.nd_devname = mcip->mci_name; 5126 bcopy(fdesc->fd_src_mac, ndesc.nd_ehost, ETHERADDRL); 5127 bcopy(fdesc->fd_dst_mac, ndesc.nd_edest, ETHERADDRL); 5128 ndesc.nd_sap = htonl(fdesc->fd_sap); 5129 ndesc.nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION; 5130 ndesc.nd_bw_limit = mrp->mrp_maxbw; 5131 if (ndesc.nd_isv4) { 5132 ndesc.nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]); 5133 ndesc.nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]); 5134 } else { 5135 bcopy(&fdesc->fd_local_addr, ndesc.nd_saddr, IPV6_ADDR_LEN); 5136 bcopy(&fdesc->fd_remote_addr, ndesc.nd_daddr, IPV6_ADDR_LEN); 5137 } 5138 ndesc.nd_sport = htons(fdesc->fd_local_port); 5139 ndesc.nd_dport = htons(fdesc->fd_remote_port); 5140 ndesc.nd_protocol = (uint8_t)fdesc->fd_protocol; 5141 mutex_exit(&flent->fe_lock); 5142 5143 return (exacct_commit_netinfo((void *)&ndesc, EX_NET_FLDESC_REC)); 5144 } 5145 5146 /* Write the Flow statistics to the log file */ 5147 int 5148 mac_write_flow_stats(flow_entry_t *flent) 5149 { 5150 net_stat_t nstat; 5151 mac_soft_ring_set_t *mac_srs; 5152 mac_rx_stats_t *mac_rx_stat; 5153 mac_tx_stats_t *mac_tx_stat; 5154 int i; 5155 5156 bzero(&nstat, sizeof (net_stat_t)); 5157 nstat.ns_name = flent->fe_flow_name; 5158 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5159 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5160 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5161 5162 nstat.ns_ibytes += mac_rx_stat->mrs_intrbytes + 5163 mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; 5164 nstat.ns_ipackets += mac_rx_stat->mrs_intrcnt + 5165 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5166 nstat.ns_oerrors += mac_rx_stat->mrs_ierrors; 5167 } 5168 5169 mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); 5170 if (mac_srs != NULL) { 5171 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5172 5173 nstat.ns_obytes = mac_tx_stat->mts_obytes; 5174 nstat.ns_opackets = mac_tx_stat->mts_opackets; 5175 nstat.ns_oerrors = mac_tx_stat->mts_oerrors; 5176 } 5177 return (exacct_commit_netinfo((void *)&nstat, EX_NET_FLSTAT_REC)); 5178 } 5179 5180 /* Write the Link Description to the log file */ 5181 int 5182 mac_write_link_desc(mac_client_impl_t *mcip) 5183 { 5184 net_desc_t ndesc; 5185 flow_entry_t *flent = mcip->mci_flent; 5186 5187 bzero(&ndesc, sizeof (net_desc_t)); 5188 5189 ndesc.nd_name = mcip->mci_name; 5190 ndesc.nd_devname = mcip->mci_name; 5191 ndesc.nd_isv4 = B_TRUE; 5192 /* 5193 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5194 * Updates to the fe_flow_desc are done under the fe_lock 5195 * after removing the flent from the flow table. 5196 */ 5197 mutex_enter(&flent->fe_lock); 5198 bcopy(flent->fe_flow_desc.fd_src_mac, ndesc.nd_ehost, ETHERADDRL); 5199 mutex_exit(&flent->fe_lock); 5200 5201 return (exacct_commit_netinfo((void *)&ndesc, EX_NET_LNDESC_REC)); 5202 } 5203 5204 /* Write the Link statistics to the log file */ 5205 int 5206 mac_write_link_stats(mac_client_impl_t *mcip) 5207 { 5208 net_stat_t nstat; 5209 flow_entry_t *flent; 5210 mac_soft_ring_set_t *mac_srs; 5211 mac_rx_stats_t *mac_rx_stat; 5212 mac_tx_stats_t *mac_tx_stat; 5213 int i; 5214 5215 bzero(&nstat, sizeof (net_stat_t)); 5216 nstat.ns_name = mcip->mci_name; 5217 flent = mcip->mci_flent; 5218 if (flent != NULL) { 5219 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5220 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5221 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5222 5223 nstat.ns_ibytes += mac_rx_stat->mrs_intrbytes + 5224 mac_rx_stat->mrs_pollbytes + 5225 mac_rx_stat->mrs_lclbytes; 5226 nstat.ns_ipackets += mac_rx_stat->mrs_intrcnt + 5227 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5228 nstat.ns_oerrors += mac_rx_stat->mrs_ierrors; 5229 } 5230 } 5231 5232 mac_srs = (mac_soft_ring_set_t *)(mcip->mci_flent->fe_tx_srs); 5233 if (mac_srs != NULL) { 5234 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5235 5236 nstat.ns_obytes = mac_tx_stat->mts_obytes; 5237 nstat.ns_opackets = mac_tx_stat->mts_opackets; 5238 nstat.ns_oerrors = mac_tx_stat->mts_oerrors; 5239 } 5240 return (exacct_commit_netinfo((void *)&nstat, EX_NET_LNSTAT_REC)); 5241 } 5242 5243 /* 5244 * For a given flow, if the descrition has not been logged before, do it now. 5245 * If it is a VNIC, then we have collected information about it from the MAC 5246 * table, so skip it. 5247 */ 5248 /*ARGSUSED*/ 5249 static int 5250 mac_log_flowinfo(flow_entry_t *flent, void *args) 5251 { 5252 mac_client_impl_t *mcip = flent->fe_mcip; 5253 5254 if (mcip == NULL) 5255 return (0); 5256 5257 /* 5258 * If the name starts with "vnic", and fe_user_generated is true (to 5259 * exclude the mcast and active flow entries created implicitly for 5260 * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow, 5261 * vnic/bge1/mcast1 is not and neither is vnic/bge1/active. 5262 */ 5263 if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 && 5264 (flent->fe_type & FLOW_USER) != 0) { 5265 return (0); 5266 } 5267 5268 if (!flent->fe_desc_logged) { 5269 /* 5270 * We don't return error because we want to continu the 5271 * walk in case this is the last walk which means we 5272 * need to reset fe_desc_logged in all the flows. 5273 */ 5274 if (mac_write_flow_desc(flent, mcip) != 0) 5275 return (0); 5276 flent->fe_desc_logged = B_TRUE; 5277 } 5278 5279 /* 5280 * Regardless of the error, we want to proceed in case we have to 5281 * reset fe_desc_logged. 5282 */ 5283 (void) mac_write_flow_stats(flent); 5284 5285 if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED)) 5286 flent->fe_desc_logged = B_FALSE; 5287 5288 return (0); 5289 } 5290 5291 typedef struct i_mac_log_state_s { 5292 boolean_t mi_last; 5293 int mi_fenable; 5294 int mi_lenable; 5295 } i_mac_log_state_t; 5296 5297 /* 5298 * Walk the mac_impl_ts and log the description for each mac client of this mac, 5299 * if it hasn't already been done. Additionally, log statistics for the link as 5300 * well. Walk the flow table and log information for each flow as well. 5301 * If it is the last walk (mci_last), then we turn off mci_desc_logged (and 5302 * also fe_desc_logged, if flow logging is on) since we want to log the 5303 * description if and when logging is restarted. 5304 */ 5305 /*ARGSUSED*/ 5306 static uint_t 5307 i_mac_log_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 5308 { 5309 mac_impl_t *mip = (mac_impl_t *)val; 5310 i_mac_log_state_t *lstate = (i_mac_log_state_t *)arg; 5311 int ret; 5312 mac_client_impl_t *mcip; 5313 5314 /* 5315 * Only walk the client list for NIC and etherstub 5316 */ 5317 if ((mip->mi_state_flags & MIS_DISABLED) || 5318 ((mip->mi_state_flags & MIS_IS_VNIC) && 5319 (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL))) 5320 return (MH_WALK_CONTINUE); 5321 5322 for (mcip = mip->mi_clients_list; mcip != NULL; 5323 mcip = mcip->mci_client_next) { 5324 if (!MCIP_DATAPATH_SETUP(mcip)) 5325 continue; 5326 if (lstate->mi_lenable) { 5327 if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) { 5328 ret = mac_write_link_desc(mcip); 5329 if (ret != 0) { 5330 /* 5331 * We can't terminate it if this is the last 5332 * walk, else there might be some links with 5333 * mi_desc_logged set to true, which means 5334 * their description won't be logged the next 5335 * time logging is started (similarly for the 5336 * flows within such links). We can continue 5337 * without walking the flow table (i.e. to 5338 * set fe_desc_logged to false) because we 5339 * won't have written any flow stuff for this 5340 * link as we haven't logged the link itself. 5341 */ 5342 if (lstate->mi_last) 5343 return (MH_WALK_CONTINUE); 5344 else 5345 return (MH_WALK_TERMINATE); 5346 } 5347 mcip->mci_state_flags |= MCIS_DESC_LOGGED; 5348 } 5349 } 5350 5351 if (mac_write_link_stats(mcip) != 0 && !lstate->mi_last) 5352 return (MH_WALK_TERMINATE); 5353 5354 if (lstate->mi_last) 5355 mcip->mci_state_flags &= ~MCIS_DESC_LOGGED; 5356 5357 if (lstate->mi_fenable) { 5358 if (mcip->mci_subflow_tab != NULL) { 5359 (void) mac_flow_walk(mcip->mci_subflow_tab, 5360 mac_log_flowinfo, mip); 5361 } 5362 } 5363 } 5364 return (MH_WALK_CONTINUE); 5365 } 5366 5367 /* 5368 * The timer thread that runs every mac_logging_interval seconds and logs 5369 * link and/or flow information. 5370 */ 5371 /* ARGSUSED */ 5372 void 5373 mac_log_linkinfo(void *arg) 5374 { 5375 i_mac_log_state_t lstate; 5376 5377 rw_enter(&i_mac_impl_lock, RW_READER); 5378 if (!mac_flow_log_enable && !mac_link_log_enable) { 5379 rw_exit(&i_mac_impl_lock); 5380 return; 5381 } 5382 lstate.mi_fenable = mac_flow_log_enable; 5383 lstate.mi_lenable = mac_link_log_enable; 5384 lstate.mi_last = B_FALSE; 5385 rw_exit(&i_mac_impl_lock); 5386 5387 mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate); 5388 5389 rw_enter(&i_mac_impl_lock, RW_WRITER); 5390 if (mac_flow_log_enable || mac_link_log_enable) { 5391 mac_logging_timer = timeout(mac_log_linkinfo, NULL, 5392 SEC_TO_TICK(mac_logging_interval)); 5393 } 5394 rw_exit(&i_mac_impl_lock); 5395 } 5396 5397 typedef struct i_mac_fastpath_state_s { 5398 boolean_t mf_disable; 5399 int mf_err; 5400 } i_mac_fastpath_state_t; 5401 5402 /*ARGSUSED*/ 5403 static uint_t 5404 i_mac_fastpath_disable_walker(mod_hash_key_t key, mod_hash_val_t *val, 5405 void *arg) 5406 { 5407 i_mac_fastpath_state_t *state = arg; 5408 mac_handle_t mh = (mac_handle_t)val; 5409 5410 if (state->mf_disable) 5411 state->mf_err = mac_fastpath_disable(mh); 5412 else 5413 mac_fastpath_enable(mh); 5414 5415 return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 5416 } 5417 5418 /* 5419 * Start the logging timer. 5420 */ 5421 int 5422 mac_start_logusage(mac_logtype_t type, uint_t interval) 5423 { 5424 i_mac_fastpath_state_t state = {B_TRUE, 0}; 5425 int err; 5426 5427 rw_enter(&i_mac_impl_lock, RW_WRITER); 5428 switch (type) { 5429 case MAC_LOGTYPE_FLOW: 5430 if (mac_flow_log_enable) { 5431 rw_exit(&i_mac_impl_lock); 5432 return (0); 5433 } 5434 /* FALLTHRU */ 5435 case MAC_LOGTYPE_LINK: 5436 if (mac_link_log_enable) { 5437 rw_exit(&i_mac_impl_lock); 5438 return (0); 5439 } 5440 break; 5441 default: 5442 ASSERT(0); 5443 } 5444 5445 /* Disable fastpath */ 5446 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state); 5447 if ((err = state.mf_err) != 0) { 5448 /* Reenable fastpath */ 5449 state.mf_disable = B_FALSE; 5450 state.mf_err = 0; 5451 mod_hash_walk(i_mac_impl_hash, 5452 i_mac_fastpath_disable_walker, &state); 5453 rw_exit(&i_mac_impl_lock); 5454 return (err); 5455 } 5456 5457 switch (type) { 5458 case MAC_LOGTYPE_FLOW: 5459 mac_flow_log_enable = B_TRUE; 5460 /* FALLTHRU */ 5461 case MAC_LOGTYPE_LINK: 5462 mac_link_log_enable = B_TRUE; 5463 break; 5464 } 5465 5466 mac_logging_interval = interval; 5467 rw_exit(&i_mac_impl_lock); 5468 mac_log_linkinfo(NULL); 5469 return (0); 5470 } 5471 5472 /* 5473 * Stop the logging timer if both Link and Flow logging are turned off. 5474 */ 5475 void 5476 mac_stop_logusage(mac_logtype_t type) 5477 { 5478 i_mac_log_state_t lstate; 5479 i_mac_fastpath_state_t state = {B_FALSE, 0}; 5480 5481 rw_enter(&i_mac_impl_lock, RW_WRITER); 5482 lstate.mi_fenable = mac_flow_log_enable; 5483 lstate.mi_lenable = mac_link_log_enable; 5484 5485 /* Last walk */ 5486 lstate.mi_last = B_TRUE; 5487 5488 switch (type) { 5489 case MAC_LOGTYPE_FLOW: 5490 if (lstate.mi_fenable) { 5491 ASSERT(mac_link_log_enable); 5492 mac_flow_log_enable = B_FALSE; 5493 mac_link_log_enable = B_FALSE; 5494 break; 5495 } 5496 /* FALLTHRU */ 5497 case MAC_LOGTYPE_LINK: 5498 if (!lstate.mi_lenable || mac_flow_log_enable) { 5499 rw_exit(&i_mac_impl_lock); 5500 return; 5501 } 5502 mac_link_log_enable = B_FALSE; 5503 break; 5504 default: 5505 ASSERT(0); 5506 } 5507 5508 /* Reenable fastpath */ 5509 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_disable_walker, &state); 5510 5511 rw_exit(&i_mac_impl_lock); 5512 (void) untimeout(mac_logging_timer); 5513 mac_logging_timer = 0; 5514 5515 /* Last walk */ 5516 mod_hash_walk(i_mac_impl_hash, i_mac_log_walker, &lstate); 5517 } 5518 5519 /* 5520 * Walk the rx and tx SRS/SRs for a flow and update the priority value. 5521 */ 5522 void 5523 mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) 5524 { 5525 pri_t pri; 5526 int count; 5527 mac_soft_ring_set_t *mac_srs; 5528 5529 if (flent->fe_rx_srs_cnt <= 0) 5530 return; 5531 5532 if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type == 5533 SRST_FLOW) { 5534 pri = FLOW_PRIORITY(mcip->mci_min_pri, 5535 mcip->mci_max_pri, 5536 flent->fe_resource_props.mrp_priority); 5537 } else { 5538 pri = mcip->mci_max_pri; 5539 } 5540 5541 for (count = 0; count < flent->fe_rx_srs_cnt; count++) { 5542 mac_srs = flent->fe_rx_srs[count]; 5543 mac_update_srs_priority(mac_srs, pri); 5544 } 5545 /* 5546 * If we have a Tx SRS, we need to modify all the threads associated 5547 * with it. 5548 */ 5549 if (flent->fe_tx_srs != NULL) 5550 mac_update_srs_priority(flent->fe_tx_srs, pri); 5551 } 5552 5553 /* 5554 * RX and TX rings are reserved according to different semantics depending 5555 * on the requests from the MAC clients and type of rings: 5556 * 5557 * On the Tx side, by default we reserve individual rings, independently from 5558 * the groups. 5559 * 5560 * On the Rx side, the reservation is at the granularity of the group 5561 * of rings, and used for v12n level 1 only. It has a special case for the 5562 * primary client. 5563 * 5564 * If a share is allocated to a MAC client, we allocate a TX group and an 5565 * RX group to the client, and assign TX rings and RX rings to these 5566 * groups according to information gathered from the driver through 5567 * the share capability. 5568 * 5569 * The foreseable evolution of Rx rings will handle v12n level 2 and higher 5570 * to allocate individual rings out of a group and program the hw classifier 5571 * based on IP address or higher level criteria. 5572 */ 5573 5574 /* 5575 * mac_reserve_tx_ring() 5576 * Reserve a unused ring by marking it with MR_INUSE state. 5577 * As reserved, the ring is ready to function. 5578 * 5579 * Notes for Hybrid I/O: 5580 * 5581 * If a specific ring is needed, it is specified through the desired_ring 5582 * argument. Otherwise that argument is set to NULL. 5583 * If the desired ring was previous allocated to another client, this 5584 * function swaps it with a new ring from the group of unassigned rings. 5585 */ 5586 mac_ring_t * 5587 mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) 5588 { 5589 mac_group_t *group; 5590 mac_grp_client_t *mgcp; 5591 mac_client_impl_t *mcip; 5592 mac_soft_ring_set_t *srs; 5593 5594 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5595 5596 /* 5597 * Find an available ring and start it before changing its status. 5598 * The unassigned rings are at the end of the mi_tx_groups 5599 * array. 5600 */ 5601 group = MAC_DEFAULT_TX_GROUP(mip); 5602 5603 /* Can't take the default ring out of the default group */ 5604 ASSERT(desired_ring != (mac_ring_t *)mip->mi_default_tx_ring); 5605 5606 if (desired_ring->mr_state == MR_FREE) { 5607 ASSERT(MAC_GROUP_NO_CLIENT(group)); 5608 if (mac_start_ring(desired_ring) != 0) 5609 return (NULL); 5610 return (desired_ring); 5611 } 5612 /* 5613 * There are clients using this ring, so let's move the clients 5614 * away from using this ring. 5615 */ 5616 for (mgcp = group->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 5617 mcip = mgcp->mgc_client; 5618 mac_tx_client_quiesce((mac_client_handle_t)mcip); 5619 srs = MCIP_TX_SRS(mcip); 5620 ASSERT(mac_tx_srs_ring_present(srs, desired_ring)); 5621 mac_tx_invoke_callbacks(mcip, 5622 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(srs, 5623 desired_ring)); 5624 mac_tx_srs_del_ring(srs, desired_ring); 5625 mac_tx_client_restart((mac_client_handle_t)mcip); 5626 } 5627 return (desired_ring); 5628 } 5629 5630 /* 5631 * For a reserved group with multiple clients, return the primary client. 5632 */ 5633 static mac_client_impl_t * 5634 mac_get_grp_primary(mac_group_t *grp) 5635 { 5636 mac_grp_client_t *mgcp = grp->mrg_clients; 5637 mac_client_impl_t *mcip; 5638 5639 while (mgcp != NULL) { 5640 mcip = mgcp->mgc_client; 5641 if (mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) 5642 return (mcip); 5643 mgcp = mgcp->mgc_next; 5644 } 5645 return (NULL); 5646 } 5647 5648 /* 5649 * Hybrid I/O specifies the ring that should be given to a share. 5650 * If the ring is already used by clients, then we need to release 5651 * the ring back to the default group so that we can give it to 5652 * the share. This means the clients using this ring now get a 5653 * replacement ring. If there aren't any replacement rings, this 5654 * function returns a failure. 5655 */ 5656 static int 5657 mac_reclaim_ring_from_grp(mac_impl_t *mip, mac_ring_type_t ring_type, 5658 mac_ring_t *ring, mac_ring_t **rings, int nrings) 5659 { 5660 mac_group_t *group = (mac_group_t *)ring->mr_gh; 5661 mac_resource_props_t *mrp; 5662 mac_client_impl_t *mcip; 5663 mac_group_t *defgrp; 5664 mac_ring_t *tring; 5665 mac_group_t *tgrp; 5666 int i; 5667 int j; 5668 5669 mcip = MAC_GROUP_ONLY_CLIENT(group); 5670 if (mcip == NULL) 5671 mcip = mac_get_grp_primary(group); 5672 ASSERT(mcip != NULL); 5673 ASSERT(mcip->mci_share == NULL); 5674 5675 mrp = MCIP_RESOURCE_PROPS(mcip); 5676 if (ring_type == MAC_RING_TYPE_RX) { 5677 defgrp = mip->mi_rx_donor_grp; 5678 if ((mrp->mrp_mask & MRP_RX_RINGS) == 0) { 5679 /* Need to put this mac client in the default group */ 5680 if (mac_rx_switch_group(mcip, group, defgrp) != 0) 5681 return (ENOSPC); 5682 } else { 5683 /* 5684 * Switch this ring with some other ring from 5685 * the default group. 5686 */ 5687 for (tring = defgrp->mrg_rings; tring != NULL; 5688 tring = tring->mr_next) { 5689 if (tring->mr_index == 0) 5690 continue; 5691 for (j = 0; j < nrings; j++) { 5692 if (rings[j] == tring) 5693 break; 5694 } 5695 if (j >= nrings) 5696 break; 5697 } 5698 if (tring == NULL) 5699 return (ENOSPC); 5700 if (mac_group_mov_ring(mip, group, tring) != 0) 5701 return (ENOSPC); 5702 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 5703 (void) mac_group_mov_ring(mip, defgrp, tring); 5704 return (ENOSPC); 5705 } 5706 } 5707 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 5708 return (0); 5709 } 5710 5711 defgrp = MAC_DEFAULT_TX_GROUP(mip); 5712 if (ring == (mac_ring_t *)mip->mi_default_tx_ring) { 5713 /* 5714 * See if we can get a spare ring to replace the default 5715 * ring. 5716 */ 5717 if (defgrp->mrg_cur_count == 1) { 5718 /* 5719 * Need to get a ring from another client, see if 5720 * there are any clients that can be moved to 5721 * the default group, thereby freeing some rings. 5722 */ 5723 for (i = 0; i < mip->mi_tx_group_count; i++) { 5724 tgrp = &mip->mi_tx_groups[i]; 5725 if (tgrp->mrg_state == 5726 MAC_GROUP_STATE_REGISTERED) { 5727 continue; 5728 } 5729 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 5730 if (mcip == NULL) 5731 mcip = mac_get_grp_primary(tgrp); 5732 ASSERT(mcip != NULL); 5733 mrp = MCIP_RESOURCE_PROPS(mcip); 5734 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 5735 ASSERT(tgrp->mrg_cur_count == 1); 5736 /* 5737 * If this ring is part of the 5738 * rings asked by the share we cannot 5739 * use it as the default ring. 5740 */ 5741 for (j = 0; j < nrings; j++) { 5742 if (rings[j] == tgrp->mrg_rings) 5743 break; 5744 } 5745 if (j < nrings) 5746 continue; 5747 mac_tx_client_quiesce( 5748 (mac_client_handle_t)mcip); 5749 mac_tx_switch_group(mcip, tgrp, 5750 defgrp); 5751 mac_tx_client_restart( 5752 (mac_client_handle_t)mcip); 5753 break; 5754 } 5755 } 5756 /* 5757 * All the rings are reserved, can't give up the 5758 * default ring. 5759 */ 5760 if (defgrp->mrg_cur_count <= 1) 5761 return (ENOSPC); 5762 } 5763 /* 5764 * Swap the default ring with another. 5765 */ 5766 for (tring = defgrp->mrg_rings; tring != NULL; 5767 tring = tring->mr_next) { 5768 /* 5769 * If this ring is part of the rings asked by the 5770 * share we cannot use it as the default ring. 5771 */ 5772 for (j = 0; j < nrings; j++) { 5773 if (rings[j] == tring) 5774 break; 5775 } 5776 if (j >= nrings) 5777 break; 5778 } 5779 ASSERT(tring != NULL); 5780 mip->mi_default_tx_ring = (mac_ring_handle_t)tring; 5781 return (0); 5782 } 5783 /* 5784 * The Tx ring is with a group reserved by a MAC client. See if 5785 * we can swap it. 5786 */ 5787 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 5788 mcip = MAC_GROUP_ONLY_CLIENT(group); 5789 if (mcip == NULL) 5790 mcip = mac_get_grp_primary(group); 5791 ASSERT(mcip != NULL); 5792 mrp = MCIP_RESOURCE_PROPS(mcip); 5793 mac_tx_client_quiesce((mac_client_handle_t)mcip); 5794 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 5795 ASSERT(group->mrg_cur_count == 1); 5796 /* Put this mac client in the default group */ 5797 mac_tx_switch_group(mcip, group, defgrp); 5798 } else { 5799 /* 5800 * Switch this ring with some other ring from 5801 * the default group. 5802 */ 5803 for (tring = defgrp->mrg_rings; tring != NULL; 5804 tring = tring->mr_next) { 5805 if (tring == (mac_ring_t *)mip->mi_default_tx_ring) 5806 continue; 5807 /* 5808 * If this ring is part of the rings asked by the 5809 * share we cannot use it for swapping. 5810 */ 5811 for (j = 0; j < nrings; j++) { 5812 if (rings[j] == tring) 5813 break; 5814 } 5815 if (j >= nrings) 5816 break; 5817 } 5818 if (tring == NULL) { 5819 mac_tx_client_restart((mac_client_handle_t)mcip); 5820 return (ENOSPC); 5821 } 5822 if (mac_group_mov_ring(mip, group, tring) != 0) { 5823 mac_tx_client_restart((mac_client_handle_t)mcip); 5824 return (ENOSPC); 5825 } 5826 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 5827 (void) mac_group_mov_ring(mip, defgrp, tring); 5828 mac_tx_client_restart((mac_client_handle_t)mcip); 5829 return (ENOSPC); 5830 } 5831 } 5832 mac_tx_client_restart((mac_client_handle_t)mcip); 5833 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 5834 return (0); 5835 } 5836 5837 /* 5838 * Populate a zero-ring group with rings. If the share is non-NULL, 5839 * the rings are chosen according to that share. 5840 * Invoked after allocating a new RX or TX group through 5841 * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively. 5842 * Returns zero on success, an errno otherwise. 5843 */ 5844 int 5845 i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, 5846 mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share, 5847 uint32_t ringcnt) 5848 { 5849 mac_ring_t **rings, *ring; 5850 uint_t nrings; 5851 int rv = 0, i = 0, j; 5852 5853 ASSERT((ring_type == MAC_RING_TYPE_RX && 5854 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) || 5855 (ring_type == MAC_RING_TYPE_TX && 5856 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)); 5857 5858 /* 5859 * First find the rings to allocate to the group. 5860 */ 5861 if (share != NULL) { 5862 /* get rings through ms_squery() */ 5863 mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings); 5864 ASSERT(nrings != 0); 5865 rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t), 5866 KM_SLEEP); 5867 mip->mi_share_capab.ms_squery(share, ring_type, 5868 (mac_ring_handle_t *)rings, &nrings); 5869 for (i = 0; i < nrings; i++) { 5870 /* 5871 * If we have given this ring to a non-default 5872 * group, we need to check if we can get this 5873 * ring. 5874 */ 5875 ring = rings[i]; 5876 if (ring->mr_gh != (mac_group_handle_t)src_group || 5877 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 5878 if (mac_reclaim_ring_from_grp(mip, ring_type, 5879 ring, rings, nrings) != 0) { 5880 rv = ENOSPC; 5881 goto bail; 5882 } 5883 } 5884 } 5885 } else { 5886 /* 5887 * Pick one ring from default group. 5888 * 5889 * for now pick the second ring which requires the first ring 5890 * at index 0 to stay in the default group, since it is the 5891 * ring which carries the multicast traffic. 5892 * We need a better way for a driver to indicate this, 5893 * for example a per-ring flag. 5894 */ 5895 rings = kmem_alloc(ringcnt * sizeof (mac_ring_handle_t), 5896 KM_SLEEP); 5897 for (ring = src_group->mrg_rings; ring != NULL; 5898 ring = ring->mr_next) { 5899 if (ring_type == MAC_RING_TYPE_RX && 5900 ring->mr_index == 0) { 5901 continue; 5902 } 5903 if (ring_type == MAC_RING_TYPE_TX && 5904 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 5905 continue; 5906 } 5907 rings[i++] = ring; 5908 if (i == ringcnt) 5909 break; 5910 } 5911 ASSERT(ring != NULL); 5912 nrings = i; 5913 /* Not enough rings as required */ 5914 if (nrings != ringcnt) { 5915 rv = ENOSPC; 5916 goto bail; 5917 } 5918 } 5919 5920 switch (ring_type) { 5921 case MAC_RING_TYPE_RX: 5922 if (src_group->mrg_cur_count - nrings < 1) { 5923 /* we ran out of rings */ 5924 rv = ENOSPC; 5925 goto bail; 5926 } 5927 5928 /* move receive rings to new group */ 5929 for (i = 0; i < nrings; i++) { 5930 rv = mac_group_mov_ring(mip, new_group, rings[i]); 5931 if (rv != 0) { 5932 /* move rings back on failure */ 5933 for (j = 0; j < i; j++) { 5934 (void) mac_group_mov_ring(mip, 5935 src_group, rings[j]); 5936 } 5937 goto bail; 5938 } 5939 } 5940 break; 5941 5942 case MAC_RING_TYPE_TX: { 5943 mac_ring_t *tmp_ring; 5944 5945 /* move the TX rings to the new group */ 5946 for (i = 0; i < nrings; i++) { 5947 /* get the desired ring */ 5948 tmp_ring = mac_reserve_tx_ring(mip, rings[i]); 5949 if (tmp_ring == NULL) { 5950 rv = ENOSPC; 5951 goto bail; 5952 } 5953 ASSERT(tmp_ring == rings[i]); 5954 rv = mac_group_mov_ring(mip, new_group, rings[i]); 5955 if (rv != 0) { 5956 /* cleanup on failure */ 5957 for (j = 0; j < i; j++) { 5958 (void) mac_group_mov_ring(mip, 5959 MAC_DEFAULT_TX_GROUP(mip), 5960 rings[j]); 5961 } 5962 goto bail; 5963 } 5964 } 5965 break; 5966 } 5967 } 5968 5969 /* add group to share */ 5970 if (share != NULL) 5971 mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); 5972 5973 bail: 5974 /* free temporary array of rings */ 5975 kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); 5976 5977 return (rv); 5978 } 5979 5980 void 5981 mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) 5982 { 5983 mac_grp_client_t *mgcp; 5984 5985 for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 5986 if (mgcp->mgc_client == mcip) 5987 break; 5988 } 5989 5990 VERIFY(mgcp == NULL); 5991 5992 mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); 5993 mgcp->mgc_client = mcip; 5994 mgcp->mgc_next = grp->mrg_clients; 5995 grp->mrg_clients = mgcp; 5996 5997 } 5998 5999 void 6000 mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) 6001 { 6002 mac_grp_client_t *mgcp, **pprev; 6003 6004 for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL; 6005 pprev = &mgcp->mgc_next, mgcp = *pprev) { 6006 if (mgcp->mgc_client == mcip) 6007 break; 6008 } 6009 6010 ASSERT(mgcp != NULL); 6011 6012 *pprev = mgcp->mgc_next; 6013 kmem_free(mgcp, sizeof (mac_grp_client_t)); 6014 } 6015 6016 /* 6017 * mac_reserve_rx_group() 6018 * 6019 * Finds an available group and exclusively reserves it for a client. 6020 * The group is chosen to suit the flow's resource controls (bandwidth and 6021 * fanout requirements) and the address type. 6022 * If the requestor is the pimary MAC then return the group with the 6023 * largest number of rings, otherwise the default ring when available. 6024 */ 6025 mac_group_t * 6026 mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) 6027 { 6028 mac_share_handle_t share = mcip->mci_share; 6029 mac_impl_t *mip = mcip->mci_mip; 6030 mac_group_t *grp = NULL; 6031 int i; 6032 int err = 0; 6033 mac_address_t *map; 6034 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6035 int nrings; 6036 int donor_grp_rcnt; 6037 boolean_t need_exclgrp = B_FALSE; 6038 int need_rings = 0; 6039 mac_group_t *candidate_grp = NULL; 6040 mac_client_impl_t *gclient; 6041 mac_resource_props_t *gmrp; 6042 mac_group_t *donorgrp = NULL; 6043 boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; 6044 boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; 6045 boolean_t isprimary; 6046 6047 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 6048 6049 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6050 6051 /* 6052 * Check if a group already has this mac address (case of VLANs) 6053 * unless we are moving this MAC client from one group to another. 6054 */ 6055 if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { 6056 if (map->ma_group != NULL) 6057 return (map->ma_group); 6058 } 6059 if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) 6060 return (NULL); 6061 /* 6062 * If exclusive open, return NULL which will enable the 6063 * caller to use the default group. 6064 */ 6065 if (mcip->mci_state_flags & MCIS_EXCLUSIVE) 6066 return (NULL); 6067 6068 /* For dynamic groups default unspecified to 1 */ 6069 if (rxhw && unspec && 6070 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6071 mrp->mrp_nrxrings = 1; 6072 } 6073 /* 6074 * For static grouping we allow only specifying rings=0 and 6075 * unspecified 6076 */ 6077 if (rxhw && mrp->mrp_nrxrings > 0 && 6078 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { 6079 return (NULL); 6080 } 6081 if (rxhw) { 6082 /* 6083 * We have explicitly asked for a group (with nrxrings, 6084 * if unspec). 6085 */ 6086 if (unspec || mrp->mrp_nrxrings > 0) { 6087 need_exclgrp = B_TRUE; 6088 need_rings = mrp->mrp_nrxrings; 6089 } else if (mrp->mrp_nrxrings == 0) { 6090 /* 6091 * We have asked for a software group. 6092 */ 6093 return (NULL); 6094 } 6095 } else if (isprimary && mip->mi_nactiveclients == 1 && 6096 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6097 /* 6098 * If the primary is the only active client on this 6099 * mip and we have not asked for any rings, we give 6100 * it the default group so that the primary gets to 6101 * use all the rings. 6102 */ 6103 return (NULL); 6104 } 6105 6106 /* The group that can donate rings */ 6107 donorgrp = mip->mi_rx_donor_grp; 6108 6109 /* 6110 * The number of rings that the default group can donate. 6111 * We need to leave at least one ring. 6112 */ 6113 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 6114 6115 /* 6116 * Try to exclusively reserve a RX group. 6117 * 6118 * For flows requiring HW_DEFAULT_RING (unicast flow of the primary 6119 * client), try to reserve the a non-default RX group and give 6120 * it all the rings from the donor group, except the default ring 6121 * 6122 * For flows requiring HW_RING (unicast flow of other clients), try 6123 * to reserve non-default RX group with the specified number of 6124 * rings, if available. 6125 * 6126 * For flows that have not asked for software or hardware ring, 6127 * try to reserve a non-default group with 1 ring, if available. 6128 */ 6129 for (i = 1; i < mip->mi_rx_group_count; i++) { 6130 grp = &mip->mi_rx_groups[i]; 6131 6132 DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, 6133 int, grp->mrg_index, mac_group_state_t, grp->mrg_state); 6134 6135 /* 6136 * Check if this group could be a candidate group for 6137 * eviction if we need a group for this MAC client, 6138 * but there aren't any. A candidate group is one 6139 * that didn't ask for an exclusive group, but got 6140 * one and it has enough rings (combined with what 6141 * the donor group can donate) for the new MAC 6142 * client 6143 */ 6144 if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { 6145 /* 6146 * If the primary/donor group is not the default 6147 * group, don't bother looking for a candidate group. 6148 * If we don't have enough rings we will check 6149 * if the primary group can be vacated. 6150 */ 6151 if (candidate_grp == NULL && 6152 donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { 6153 ASSERT(!MAC_GROUP_NO_CLIENT(grp)); 6154 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6155 if (gclient == NULL) 6156 gclient = mac_get_grp_primary(grp); 6157 ASSERT(gclient != NULL); 6158 gmrp = MCIP_RESOURCE_PROPS(gclient); 6159 if (gclient->mci_share == NULL && 6160 (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && 6161 (unspec || 6162 (grp->mrg_cur_count + donor_grp_rcnt >= 6163 need_rings))) { 6164 candidate_grp = grp; 6165 } 6166 } 6167 continue; 6168 } 6169 /* 6170 * This group could already be SHARED by other multicast 6171 * flows on this client. In that case, the group would 6172 * be shared and has already been started. 6173 */ 6174 ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT); 6175 6176 if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) && 6177 (mac_start_group(grp) != 0)) { 6178 continue; 6179 } 6180 6181 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6182 break; 6183 ASSERT(grp->mrg_cur_count == 0); 6184 6185 /* 6186 * Populate the group. Rings should be taken 6187 * from the donor group. 6188 */ 6189 nrings = rxhw ? need_rings : isprimary ? donor_grp_rcnt: 1; 6190 6191 /* 6192 * If the donor group can't donate, let's just walk and 6193 * see if someone can vacate a group, so that we have 6194 * enough rings for this, unless we already have 6195 * identified a candiate group.. 6196 */ 6197 if (nrings <= donor_grp_rcnt) { 6198 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 6199 donorgrp, grp, share, nrings); 6200 if (err == 0) { 6201 /* 6202 * For a share i_mac_group_allocate_rings gets 6203 * the rings from the driver, let's populate 6204 * the property for the client now. 6205 */ 6206 if (share != NULL) { 6207 mac_client_set_rings( 6208 (mac_client_handle_t)mcip, 6209 grp->mrg_cur_count, -1); 6210 } 6211 if (mac_is_primary_client(mcip) && !rxhw) 6212 mip->mi_rx_donor_grp = grp; 6213 break; 6214 } 6215 } 6216 6217 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 6218 mip->mi_name, int, grp->mrg_index, int, err); 6219 6220 /* 6221 * It's a dynamic group but the grouping operation 6222 * failed. 6223 */ 6224 mac_stop_group(grp); 6225 } 6226 /* We didn't find an exclusive group for this MAC client */ 6227 if (i >= mip->mi_rx_group_count) { 6228 6229 if (!need_exclgrp) 6230 return (NULL); 6231 6232 /* 6233 * If we found a candidate group then we switch the 6234 * MAC client from the candidate_group to the default 6235 * group and give the group to this MAC client. If 6236 * we didn't find a candidate_group, check if the 6237 * primary is in its own group and if it can make way 6238 * for this MAC client. 6239 */ 6240 if (candidate_grp == NULL && 6241 donorgrp != MAC_DEFAULT_RX_GROUP(mip) && 6242 donorgrp->mrg_cur_count >= need_rings) { 6243 candidate_grp = donorgrp; 6244 } 6245 if (candidate_grp != NULL) { 6246 boolean_t prim_grp = B_FALSE; 6247 6248 /* 6249 * Switch the MAC client from the candidate group 6250 * to the default group.. If this group was the 6251 * donor group, then after the switch we need 6252 * to update the donor group too. 6253 */ 6254 grp = candidate_grp; 6255 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6256 if (gclient == NULL) 6257 gclient = mac_get_grp_primary(grp); 6258 if (grp == mip->mi_rx_donor_grp) 6259 prim_grp = B_TRUE; 6260 if (mac_rx_switch_group(gclient, grp, 6261 MAC_DEFAULT_RX_GROUP(mip)) != 0) { 6262 return (NULL); 6263 } 6264 if (prim_grp) { 6265 mip->mi_rx_donor_grp = 6266 MAC_DEFAULT_RX_GROUP(mip); 6267 donorgrp = MAC_DEFAULT_RX_GROUP(mip); 6268 } 6269 6270 6271 /* 6272 * Now give this group with the required rings 6273 * to this MAC client. 6274 */ 6275 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 6276 if (mac_start_group(grp) != 0) 6277 return (NULL); 6278 6279 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6280 return (grp); 6281 6282 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 6283 ASSERT(grp->mrg_cur_count == 0); 6284 ASSERT(donor_grp_rcnt >= need_rings); 6285 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 6286 donorgrp, grp, share, need_rings); 6287 if (err == 0) { 6288 /* 6289 * For a share i_mac_group_allocate_rings gets 6290 * the rings from the driver, let's populate 6291 * the property for the client now. 6292 */ 6293 if (share != NULL) { 6294 mac_client_set_rings( 6295 (mac_client_handle_t)mcip, 6296 grp->mrg_cur_count, -1); 6297 } 6298 DTRACE_PROBE2(rx__group__reserved, 6299 char *, mip->mi_name, int, grp->mrg_index); 6300 return (grp); 6301 } 6302 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 6303 mip->mi_name, int, grp->mrg_index, int, err); 6304 mac_stop_group(grp); 6305 } 6306 return (NULL); 6307 } 6308 ASSERT(grp != NULL); 6309 6310 DTRACE_PROBE2(rx__group__reserved, 6311 char *, mip->mi_name, int, grp->mrg_index); 6312 return (grp); 6313 } 6314 6315 /* 6316 * mac_rx_release_group() 6317 * 6318 * This is called when there are no clients left for the group. 6319 * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, 6320 * and if it is a non default group, the shares are removed and 6321 * all rings are assigned back to default group. 6322 */ 6323 void 6324 mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) 6325 { 6326 mac_impl_t *mip = mcip->mci_mip; 6327 mac_ring_t *ring; 6328 6329 ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); 6330 6331 if (mip->mi_rx_donor_grp == group) 6332 mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); 6333 6334 /* 6335 * This is the case where there are no clients left. Any 6336 * SRS etc on this group have also be quiesced. 6337 */ 6338 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 6339 if (ring->mr_classify_type == MAC_HW_CLASSIFIER) { 6340 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 6341 /* 6342 * Remove the SRS associated with the HW ring. 6343 * As a result, polling will be disabled. 6344 */ 6345 ring->mr_srs = NULL; 6346 } 6347 ASSERT(group->mrg_state < MAC_GROUP_STATE_RESERVED || 6348 ring->mr_state == MR_INUSE); 6349 if (ring->mr_state == MR_INUSE) { 6350 mac_stop_ring(ring); 6351 ring->mr_flag = 0; 6352 } 6353 } 6354 6355 /* remove group from share */ 6356 if (mcip->mci_share != NULL) { 6357 mip->mi_share_capab.ms_sremove(mcip->mci_share, 6358 group->mrg_driver); 6359 } 6360 6361 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6362 mac_ring_t *ring; 6363 6364 /* 6365 * Rings were dynamically allocated to group. 6366 * Move rings back to default group. 6367 */ 6368 while ((ring = group->mrg_rings) != NULL) { 6369 (void) mac_group_mov_ring(mip, mip->mi_rx_donor_grp, 6370 ring); 6371 } 6372 } 6373 mac_stop_group(group); 6374 /* 6375 * Possible improvement: See if we can assign the group just released 6376 * to a another client of the mip 6377 */ 6378 } 6379 6380 /* 6381 * When we move the primary's mac address between groups, we need to also 6382 * take all the clients sharing the same mac address along with it (VLANs) 6383 * We remove the mac address for such clients from the group after quiescing 6384 * them. When we add the mac address we restart the client. Note that 6385 * the primary's mac address is removed from the group after all the 6386 * other clients sharing the address are removed. Similarly, the primary's 6387 * mac address is added before all the other client's mac address are 6388 * added. While grp is the group where the clients reside, tgrp is 6389 * the group where the addresses have to be added. 6390 */ 6391 static void 6392 mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, 6393 mac_group_t *tgrp, uint8_t *maddr, boolean_t add) 6394 { 6395 mac_impl_t *mip = mcip->mci_mip; 6396 mac_grp_client_t *mgcp = grp->mrg_clients; 6397 mac_client_impl_t *gmcip; 6398 boolean_t prim; 6399 6400 prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 6401 6402 /* 6403 * If the clients are in a non-default group, we just have to 6404 * walk the group's client list. If it is in the default group 6405 * (which will be shared by other clients as well, we need to 6406 * check if the unicast address matches mcip's unicast. 6407 */ 6408 while (mgcp != NULL) { 6409 gmcip = mgcp->mgc_client; 6410 if (gmcip != mcip && 6411 (grp != MAC_DEFAULT_RX_GROUP(mip) || 6412 mcip->mci_unicast == gmcip->mci_unicast)) { 6413 if (!add) { 6414 mac_rx_client_quiesce( 6415 (mac_client_handle_t)gmcip); 6416 (void) mac_remove_macaddr(mcip->mci_unicast); 6417 } else { 6418 (void) mac_add_macaddr(mip, tgrp, maddr, prim); 6419 mac_rx_client_restart( 6420 (mac_client_handle_t)gmcip); 6421 } 6422 } 6423 mgcp = mgcp->mgc_next; 6424 } 6425 } 6426 6427 6428 /* 6429 * Move the MAC address from fgrp to tgrp. If this is the primary client, 6430 * we need to take any VLANs etc. together too. 6431 */ 6432 static int 6433 mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, 6434 mac_group_t *tgrp) 6435 { 6436 mac_impl_t *mip = mcip->mci_mip; 6437 uint8_t maddr[MAXMACADDRLEN]; 6438 int err = 0; 6439 boolean_t prim; 6440 boolean_t multiclnt = B_FALSE; 6441 6442 mac_rx_client_quiesce((mac_client_handle_t)mcip); 6443 ASSERT(mcip->mci_unicast != NULL); 6444 bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); 6445 6446 prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 6447 if (mcip->mci_unicast->ma_nusers > 1) { 6448 mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); 6449 multiclnt = B_TRUE; 6450 } 6451 ASSERT(mcip->mci_unicast->ma_nusers == 1); 6452 err = mac_remove_macaddr(mcip->mci_unicast); 6453 if (err != 0) { 6454 mac_rx_client_restart((mac_client_handle_t)mcip); 6455 if (multiclnt) { 6456 mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, 6457 B_TRUE); 6458 } 6459 return (err); 6460 } 6461 /* 6462 * Program the H/W Classifier first, if this fails we need 6463 * not proceed with the other stuff. 6464 */ 6465 if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { 6466 /* Revert back the H/W Classifier */ 6467 if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { 6468 /* 6469 * This should not fail now since it worked earlier, 6470 * should we panic? 6471 */ 6472 cmn_err(CE_WARN, 6473 "mac_rx_switch_group: switching %p back" 6474 " to group %p failed!!", (void *)mcip, 6475 (void *)fgrp); 6476 } 6477 mac_rx_client_restart((mac_client_handle_t)mcip); 6478 if (multiclnt) { 6479 mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, 6480 B_TRUE); 6481 } 6482 return (err); 6483 } 6484 mcip->mci_unicast = mac_find_macaddr(mip, maddr); 6485 mac_rx_client_restart((mac_client_handle_t)mcip); 6486 if (multiclnt) 6487 mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); 6488 return (err); 6489 } 6490 6491 /* 6492 * Switch the MAC client from one group to another. This means we need 6493 * to remove the MAC address from the group, remove the MAC client, 6494 * teardown the SRSs and revert the group state. Then, we add the client 6495 * to the destination group, set the SRSs, and add the MAC address to the 6496 * group. 6497 */ 6498 int 6499 mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 6500 mac_group_t *tgrp) 6501 { 6502 int err; 6503 mac_group_state_t next_state; 6504 mac_client_impl_t *group_only_mcip; 6505 mac_client_impl_t *gmcip; 6506 mac_impl_t *mip = mcip->mci_mip; 6507 mac_grp_client_t *mgcp; 6508 6509 ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); 6510 6511 if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) 6512 return (err); 6513 6514 /* 6515 * The group might be reserved, but SRSs may not be set up, e.g. 6516 * primary and its vlans using a reserved group. 6517 */ 6518 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && 6519 MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 6520 mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); 6521 } 6522 if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { 6523 mgcp = fgrp->mrg_clients; 6524 while (mgcp != NULL) { 6525 gmcip = mgcp->mgc_client; 6526 mgcp = mgcp->mgc_next; 6527 mac_group_remove_client(fgrp, gmcip); 6528 mac_group_add_client(tgrp, gmcip); 6529 gmcip->mci_flent->fe_rx_ring_group = tgrp; 6530 } 6531 mac_release_rx_group(mcip, fgrp); 6532 ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); 6533 mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); 6534 } else { 6535 mac_group_remove_client(fgrp, mcip); 6536 mac_group_add_client(tgrp, mcip); 6537 mcip->mci_flent->fe_rx_ring_group = tgrp; 6538 /* 6539 * If there are other clients (VLANs) sharing this address 6540 * we should be here only for the primary. 6541 */ 6542 if (mcip->mci_unicast->ma_nusers > 1) { 6543 /* 6544 * We need to move all the clients that are using 6545 * this h/w address. 6546 */ 6547 mgcp = fgrp->mrg_clients; 6548 while (mgcp != NULL) { 6549 gmcip = mgcp->mgc_client; 6550 mgcp = mgcp->mgc_next; 6551 if (mcip->mci_unicast == gmcip->mci_unicast) { 6552 mac_group_remove_client(fgrp, gmcip); 6553 mac_group_add_client(tgrp, gmcip); 6554 gmcip->mci_flent->fe_rx_ring_group = 6555 tgrp; 6556 } 6557 } 6558 } 6559 /* 6560 * The default group will still take the multicast, 6561 * broadcast traffic etc., so it won't go to 6562 * MAC_GROUP_STATE_REGISTERED. 6563 */ 6564 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) 6565 mac_rx_group_unmark(fgrp, MR_CONDEMNED); 6566 mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); 6567 } 6568 next_state = mac_group_next_state(tgrp, &group_only_mcip, 6569 MAC_DEFAULT_RX_GROUP(mip), B_TRUE); 6570 mac_set_group_state(tgrp, next_state); 6571 /* 6572 * If the destination group is reserved, setup the SRSs etc. 6573 */ 6574 if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 6575 mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); 6576 mac_fanout_setup(mcip, mcip->mci_flent, 6577 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, mcip, NULL, 6578 NULL); 6579 mac_rx_group_unmark(tgrp, MR_INCIPIENT); 6580 } else { 6581 mac_rx_switch_grp_to_sw(tgrp); 6582 } 6583 return (0); 6584 } 6585 6586 /* 6587 * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() 6588 * when a share was allocated to the client. 6589 */ 6590 mac_group_t * 6591 mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) 6592 { 6593 mac_impl_t *mip = mcip->mci_mip; 6594 mac_group_t *grp = NULL; 6595 int rv; 6596 int i; 6597 int err; 6598 mac_group_t *defgrp; 6599 mac_share_handle_t share = mcip->mci_share; 6600 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6601 int nrings; 6602 int defnrings; 6603 boolean_t need_exclgrp = B_FALSE; 6604 int need_rings = 0; 6605 mac_group_t *candidate_grp = NULL; 6606 mac_client_impl_t *gclient; 6607 mac_resource_props_t *gmrp; 6608 boolean_t txhw = mrp->mrp_mask & MRP_TX_RINGS; 6609 boolean_t unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; 6610 boolean_t isprimary; 6611 6612 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6613 /* 6614 * When we come here for a VLAN on the primary (dladm create-vlan), 6615 * we need to pair it along with the primary (to keep it consistent 6616 * with the RX side). So, we check if the primary is already assigned 6617 * to a group and return the group if so. The other way is also 6618 * true, i.e. the VLAN is already created and now we are plumbing 6619 * the primary. 6620 */ 6621 if (!move && isprimary) { 6622 for (gclient = mip->mi_clients_list; gclient != NULL; 6623 gclient = gclient->mci_client_next) { 6624 if (gclient->mci_flent->fe_type & FLOW_PRIMARY_MAC && 6625 gclient->mci_flent->fe_tx_ring_group != NULL) { 6626 return (gclient->mci_flent->fe_tx_ring_group); 6627 } 6628 } 6629 } 6630 6631 if (mip->mi_tx_groups == NULL || mip->mi_tx_group_count == 0) 6632 return (NULL); 6633 6634 /* For dynamic groups, default unspec to 1 */ 6635 if (txhw && unspec && 6636 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6637 mrp->mrp_ntxrings = 1; 6638 } 6639 /* 6640 * For static grouping we allow only specifying rings=0 and 6641 * unspecified 6642 */ 6643 if (txhw && mrp->mrp_ntxrings > 0 && 6644 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC) { 6645 return (NULL); 6646 } 6647 6648 if (txhw) { 6649 /* 6650 * We have explicitly asked for a group (with ntxrings, 6651 * if unspec). 6652 */ 6653 if (unspec || mrp->mrp_ntxrings > 0) { 6654 need_exclgrp = B_TRUE; 6655 need_rings = mrp->mrp_ntxrings; 6656 } else if (mrp->mrp_ntxrings == 0) { 6657 /* 6658 * We have asked for a software group. 6659 */ 6660 return (NULL); 6661 } 6662 } 6663 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6664 /* 6665 * The number of rings that the default group can donate. 6666 * We need to leave at least one ring - the default ring - in 6667 * this group. 6668 */ 6669 defnrings = defgrp->mrg_cur_count - 1; 6670 6671 /* 6672 * Primary gets default group unless explicitly told not 6673 * to (i.e. rings > 0). 6674 */ 6675 if (isprimary && !need_exclgrp) 6676 return (NULL); 6677 6678 nrings = (mrp->mrp_mask & MRP_TX_RINGS) != 0 ? mrp->mrp_ntxrings : 1; 6679 for (i = 0; i < mip->mi_tx_group_count; i++) { 6680 grp = &mip->mi_tx_groups[i]; 6681 if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || 6682 (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) { 6683 /* 6684 * Select a candidate for replacement if we don't 6685 * get an exclusive group. A candidate group is one 6686 * that didn't ask for an exclusive group, but got 6687 * one and it has enough rings (combined with what 6688 * the default group can donate) for the new MAC 6689 * client. 6690 */ 6691 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && 6692 candidate_grp == NULL) { 6693 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6694 if (gclient == NULL) 6695 gclient = mac_get_grp_primary(grp); 6696 gmrp = MCIP_RESOURCE_PROPS(gclient); 6697 if (gclient->mci_share == NULL && 6698 (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && 6699 (unspec || 6700 (grp->mrg_cur_count + defnrings) >= 6701 need_rings)) { 6702 candidate_grp = grp; 6703 } 6704 } 6705 continue; 6706 } 6707 /* 6708 * If the default can't donate let's just walk and 6709 * see if someone can vacate a group, so that we have 6710 * enough rings for this. 6711 */ 6712 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC || 6713 nrings <= defnrings) { 6714 if (grp->mrg_state == MAC_GROUP_STATE_REGISTERED) { 6715 rv = mac_start_group(grp); 6716 ASSERT(rv == 0); 6717 } 6718 break; 6719 } 6720 } 6721 6722 /* The default group */ 6723 if (i >= mip->mi_tx_group_count) { 6724 /* 6725 * If we need an exclusive group and have identified a 6726 * candidate group we switch the MAC client from the 6727 * candidate group to the default group and give the 6728 * candidate group to this client. 6729 */ 6730 if (need_exclgrp && candidate_grp != NULL) { 6731 /* 6732 * Switch the MAC client from the candidate group 6733 * to the default group. 6734 */ 6735 grp = candidate_grp; 6736 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6737 if (gclient == NULL) 6738 gclient = mac_get_grp_primary(grp); 6739 mac_tx_client_quiesce((mac_client_handle_t)gclient); 6740 mac_tx_switch_group(gclient, grp, defgrp); 6741 mac_tx_client_restart((mac_client_handle_t)gclient); 6742 6743 /* 6744 * Give the candidate group with the specified number 6745 * of rings to this MAC client. 6746 */ 6747 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 6748 rv = mac_start_group(grp); 6749 ASSERT(rv == 0); 6750 6751 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6752 return (grp); 6753 6754 ASSERT(grp->mrg_cur_count == 0); 6755 ASSERT(defgrp->mrg_cur_count > need_rings); 6756 6757 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, 6758 defgrp, grp, share, need_rings); 6759 if (err == 0) { 6760 /* 6761 * For a share i_mac_group_allocate_rings gets 6762 * the rings from the driver, let's populate 6763 * the property for the client now. 6764 */ 6765 if (share != NULL) { 6766 mac_client_set_rings( 6767 (mac_client_handle_t)mcip, -1, 6768 grp->mrg_cur_count); 6769 } 6770 mip->mi_tx_group_free--; 6771 return (grp); 6772 } 6773 DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, 6774 mip->mi_name, int, grp->mrg_index, int, err); 6775 mac_stop_group(grp); 6776 } 6777 return (NULL); 6778 } 6779 /* 6780 * We got an exclusive group, but it is not dynamic. 6781 */ 6782 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { 6783 mip->mi_tx_group_free--; 6784 return (grp); 6785 } 6786 6787 rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, defgrp, grp, 6788 share, nrings); 6789 if (rv != 0) { 6790 DTRACE_PROBE3(tx__group__reserve__alloc__rings, 6791 char *, mip->mi_name, int, grp->mrg_index, int, rv); 6792 mac_stop_group(grp); 6793 return (NULL); 6794 } 6795 /* 6796 * For a share i_mac_group_allocate_rings gets the rings from the 6797 * driver, let's populate the property for the client now. 6798 */ 6799 if (share != NULL) { 6800 mac_client_set_rings((mac_client_handle_t)mcip, -1, 6801 grp->mrg_cur_count); 6802 } 6803 mip->mi_tx_group_free--; 6804 return (grp); 6805 } 6806 6807 void 6808 mac_release_tx_group(mac_client_impl_t *mcip, mac_group_t *grp) 6809 { 6810 mac_impl_t *mip = mcip->mci_mip; 6811 mac_share_handle_t share = mcip->mci_share; 6812 mac_ring_t *ring; 6813 mac_soft_ring_set_t *srs = MCIP_TX_SRS(mcip); 6814 mac_group_t *defgrp; 6815 6816 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6817 if (srs != NULL) { 6818 if (srs->srs_soft_ring_count > 0) { 6819 for (ring = grp->mrg_rings; ring != NULL; 6820 ring = ring->mr_next) { 6821 ASSERT(mac_tx_srs_ring_present(srs, ring)); 6822 mac_tx_invoke_callbacks(mcip, 6823 (mac_tx_cookie_t) 6824 mac_tx_srs_get_soft_ring(srs, ring)); 6825 mac_tx_srs_del_ring(srs, ring); 6826 } 6827 } else { 6828 ASSERT(srs->srs_tx.st_arg2 != NULL); 6829 srs->srs_tx.st_arg2 = NULL; 6830 mac_srs_stat_delete(srs); 6831 } 6832 } 6833 if (share != NULL) 6834 mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); 6835 6836 /* move the ring back to the pool */ 6837 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6838 while ((ring = grp->mrg_rings) != NULL) 6839 (void) mac_group_mov_ring(mip, defgrp, ring); 6840 } 6841 mac_stop_group(grp); 6842 mip->mi_tx_group_free++; 6843 } 6844 6845 /* 6846 * Disassociate a MAC client from a group, i.e go through the rings in the 6847 * group and delete all the soft rings tied to them. 6848 */ 6849 static void 6850 mac_tx_dismantle_soft_rings(mac_group_t *fgrp, flow_entry_t *flent) 6851 { 6852 mac_client_impl_t *mcip = flent->fe_mcip; 6853 mac_soft_ring_set_t *tx_srs; 6854 mac_srs_tx_t *tx; 6855 mac_ring_t *ring; 6856 6857 tx_srs = flent->fe_tx_srs; 6858 tx = &tx_srs->srs_tx; 6859 6860 /* Single ring case we haven't created any soft rings */ 6861 if (tx->st_mode == SRS_TX_BW || tx->st_mode == SRS_TX_SERIALIZE || 6862 tx->st_mode == SRS_TX_DEFAULT) { 6863 tx->st_arg2 = NULL; 6864 mac_srs_stat_delete(tx_srs); 6865 /* Fanout case, where we have to dismantle the soft rings */ 6866 } else { 6867 for (ring = fgrp->mrg_rings; ring != NULL; 6868 ring = ring->mr_next) { 6869 ASSERT(mac_tx_srs_ring_present(tx_srs, ring)); 6870 mac_tx_invoke_callbacks(mcip, 6871 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(tx_srs, 6872 ring)); 6873 mac_tx_srs_del_ring(tx_srs, ring); 6874 } 6875 ASSERT(tx->st_arg2 == NULL); 6876 } 6877 } 6878 6879 /* 6880 * Switch the MAC client from one group to another. This means we need 6881 * to remove the MAC client, teardown the SRSs and revert the group state. 6882 * Then, we add the client to the destination roup, set the SRSs etc. 6883 */ 6884 void 6885 mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 6886 mac_group_t *tgrp) 6887 { 6888 mac_client_impl_t *group_only_mcip; 6889 mac_impl_t *mip = mcip->mci_mip; 6890 flow_entry_t *flent = mcip->mci_flent; 6891 mac_group_t *defgrp; 6892 mac_grp_client_t *mgcp; 6893 mac_client_impl_t *gmcip; 6894 flow_entry_t *gflent; 6895 6896 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6897 ASSERT(fgrp == flent->fe_tx_ring_group); 6898 6899 if (fgrp == defgrp) { 6900 /* 6901 * If this is the primary we need to find any VLANs on 6902 * the primary and move them too. 6903 */ 6904 mac_group_remove_client(fgrp, mcip); 6905 mac_tx_dismantle_soft_rings(fgrp, flent); 6906 if (mcip->mci_unicast->ma_nusers > 1) { 6907 mgcp = fgrp->mrg_clients; 6908 while (mgcp != NULL) { 6909 gmcip = mgcp->mgc_client; 6910 mgcp = mgcp->mgc_next; 6911 if (mcip->mci_unicast != gmcip->mci_unicast) 6912 continue; 6913 mac_tx_client_quiesce( 6914 (mac_client_handle_t)gmcip); 6915 6916 gflent = gmcip->mci_flent; 6917 mac_group_remove_client(fgrp, gmcip); 6918 mac_tx_dismantle_soft_rings(fgrp, gflent); 6919 6920 mac_group_add_client(tgrp, gmcip); 6921 gflent->fe_tx_ring_group = tgrp; 6922 /* We could directly set this to SHARED */ 6923 tgrp->mrg_state = mac_group_next_state(tgrp, 6924 &group_only_mcip, defgrp, B_FALSE); 6925 6926 mac_tx_srs_group_setup(gmcip, gflent, 6927 SRST_LINK); 6928 mac_fanout_setup(gmcip, gflent, 6929 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 6930 gmcip, NULL, NULL); 6931 6932 mac_tx_client_restart( 6933 (mac_client_handle_t)gmcip); 6934 } 6935 } 6936 if (MAC_GROUP_NO_CLIENT(fgrp)) { 6937 mac_ring_t *ring; 6938 int cnt; 6939 int ringcnt; 6940 6941 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 6942 /* 6943 * Additionally, we also need to stop all 6944 * the rings in the default group, except 6945 * the default ring. The reason being 6946 * this group won't be released since it is 6947 * the default group, so the rings won't 6948 * be stopped otherwise. 6949 */ 6950 ringcnt = fgrp->mrg_cur_count; 6951 ring = fgrp->mrg_rings; 6952 for (cnt = 0; cnt < ringcnt; cnt++) { 6953 if (ring->mr_state == MR_INUSE && 6954 ring != 6955 (mac_ring_t *)mip->mi_default_tx_ring) { 6956 mac_stop_ring(ring); 6957 ring->mr_flag = 0; 6958 } 6959 ring = ring->mr_next; 6960 } 6961 } else if (MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 6962 fgrp->mrg_state = MAC_GROUP_STATE_RESERVED; 6963 } else { 6964 ASSERT(fgrp->mrg_state == MAC_GROUP_STATE_SHARED); 6965 } 6966 } else { 6967 /* 6968 * We could have VLANs sharing the non-default group with 6969 * the primary. 6970 */ 6971 mgcp = fgrp->mrg_clients; 6972 while (mgcp != NULL) { 6973 gmcip = mgcp->mgc_client; 6974 mgcp = mgcp->mgc_next; 6975 if (gmcip == mcip) 6976 continue; 6977 mac_tx_client_quiesce((mac_client_handle_t)gmcip); 6978 gflent = gmcip->mci_flent; 6979 6980 mac_group_remove_client(fgrp, gmcip); 6981 mac_tx_dismantle_soft_rings(fgrp, gflent); 6982 6983 mac_group_add_client(tgrp, gmcip); 6984 gflent->fe_tx_ring_group = tgrp; 6985 /* We could directly set this to SHARED */ 6986 tgrp->mrg_state = mac_group_next_state(tgrp, 6987 &group_only_mcip, defgrp, B_FALSE); 6988 mac_tx_srs_group_setup(gmcip, gflent, SRST_LINK); 6989 mac_fanout_setup(gmcip, gflent, 6990 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 6991 gmcip, NULL, NULL); 6992 6993 mac_tx_client_restart((mac_client_handle_t)gmcip); 6994 } 6995 mac_group_remove_client(fgrp, mcip); 6996 mac_release_tx_group(mcip, fgrp); 6997 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 6998 } 6999 7000 /* Add it to the tgroup */ 7001 mac_group_add_client(tgrp, mcip); 7002 flent->fe_tx_ring_group = tgrp; 7003 tgrp->mrg_state = mac_group_next_state(tgrp, &group_only_mcip, 7004 defgrp, B_FALSE); 7005 7006 mac_tx_srs_group_setup(mcip, flent, SRST_LINK); 7007 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 7008 mac_rx_deliver, mcip, NULL, NULL); 7009 } 7010 7011 /* 7012 * This is a 1-time control path activity initiated by the client (IP). 7013 * The mac perimeter protects against other simultaneous control activities, 7014 * for example an ioctl that attempts to change the degree of fanout and 7015 * increase or decrease the number of softrings associated with this Tx SRS. 7016 */ 7017 static mac_tx_notify_cb_t * 7018 mac_client_tx_notify_add(mac_client_impl_t *mcip, 7019 mac_tx_notify_t notify, void *arg) 7020 { 7021 mac_cb_info_t *mcbi; 7022 mac_tx_notify_cb_t *mtnfp; 7023 7024 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7025 7026 mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP); 7027 mtnfp->mtnf_fn = notify; 7028 mtnfp->mtnf_arg = arg; 7029 mtnfp->mtnf_link.mcb_objp = mtnfp; 7030 mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t); 7031 mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T; 7032 7033 mcbi = &mcip->mci_tx_notify_cb_info; 7034 mutex_enter(mcbi->mcbi_lockp); 7035 mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link); 7036 mutex_exit(mcbi->mcbi_lockp); 7037 return (mtnfp); 7038 } 7039 7040 static void 7041 mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp) 7042 { 7043 mac_cb_info_t *mcbi; 7044 mac_cb_t **cblist; 7045 7046 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7047 7048 if (!mac_callback_find(&mcip->mci_tx_notify_cb_info, 7049 &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) { 7050 cmn_err(CE_WARN, 7051 "mac_client_tx_notify_remove: callback not " 7052 "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp); 7053 return; 7054 } 7055 7056 mcbi = &mcip->mci_tx_notify_cb_info; 7057 cblist = &mcip->mci_tx_notify_cb_list; 7058 mutex_enter(mcbi->mcbi_lockp); 7059 if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link)) 7060 kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t)); 7061 else 7062 mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info); 7063 mutex_exit(mcbi->mcbi_lockp); 7064 } 7065 7066 /* 7067 * mac_client_tx_notify(): 7068 * call to add and remove flow control callback routine. 7069 */ 7070 mac_tx_notify_handle_t 7071 mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func, 7072 void *ptr) 7073 { 7074 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 7075 mac_tx_notify_cb_t *mtnfp = NULL; 7076 7077 i_mac_perim_enter(mcip->mci_mip); 7078 7079 if (callb_func != NULL) { 7080 /* Add a notify callback */ 7081 mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr); 7082 } else { 7083 mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr); 7084 } 7085 i_mac_perim_exit(mcip->mci_mip); 7086 7087 return ((mac_tx_notify_handle_t)mtnfp); 7088 } 7089 7090 void 7091 mac_bridge_vectors(mac_bridge_tx_t txf, mac_bridge_rx_t rxf, 7092 mac_bridge_ref_t reff, mac_bridge_ls_t lsf) 7093 { 7094 mac_bridge_tx_cb = txf; 7095 mac_bridge_rx_cb = rxf; 7096 mac_bridge_ref_cb = reff; 7097 mac_bridge_ls_cb = lsf; 7098 } 7099 7100 int 7101 mac_bridge_set(mac_handle_t mh, mac_handle_t link) 7102 { 7103 mac_impl_t *mip = (mac_impl_t *)mh; 7104 int retv; 7105 7106 mutex_enter(&mip->mi_bridge_lock); 7107 if (mip->mi_bridge_link == NULL) { 7108 mip->mi_bridge_link = link; 7109 retv = 0; 7110 } else { 7111 retv = EBUSY; 7112 } 7113 mutex_exit(&mip->mi_bridge_lock); 7114 if (retv == 0) { 7115 mac_poll_state_change(mh, B_FALSE); 7116 mac_capab_update(mh); 7117 } 7118 return (retv); 7119 } 7120 7121 /* 7122 * Disable bridging on the indicated link. 7123 */ 7124 void 7125 mac_bridge_clear(mac_handle_t mh, mac_handle_t link) 7126 { 7127 mac_impl_t *mip = (mac_impl_t *)mh; 7128 7129 mutex_enter(&mip->mi_bridge_lock); 7130 ASSERT(mip->mi_bridge_link == link); 7131 mip->mi_bridge_link = NULL; 7132 mutex_exit(&mip->mi_bridge_lock); 7133 mac_poll_state_change(mh, B_TRUE); 7134 mac_capab_update(mh); 7135 } 7136 7137 void 7138 mac_no_active(mac_handle_t mh) 7139 { 7140 mac_impl_t *mip = (mac_impl_t *)mh; 7141 7142 i_mac_perim_enter(mip); 7143 mip->mi_state_flags |= MIS_NO_ACTIVE; 7144 i_mac_perim_exit(mip); 7145 } 7146 7147 /* 7148 * Walk the primary VLAN clients whenever the primary's rings property 7149 * changes and update the mac_resource_props_t for the VLAN's client. 7150 * We need to do this since we don't support setting these properties 7151 * on the primary's VLAN clients, but the VLAN clients have to 7152 * follow the primary w.r.t the rings property; 7153 */ 7154 void 7155 mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) 7156 { 7157 mac_client_impl_t *vmcip; 7158 mac_resource_props_t *vmrp; 7159 7160 for (vmcip = mip->mi_clients_list; vmcip != NULL; 7161 vmcip = vmcip->mci_client_next) { 7162 if (!(vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) || 7163 mac_client_vid((mac_client_handle_t)vmcip) == 7164 VLAN_ID_NONE) { 7165 continue; 7166 } 7167 vmrp = MCIP_RESOURCE_PROPS(vmcip); 7168 7169 vmrp->mrp_nrxrings = mrp->mrp_nrxrings; 7170 if (mrp->mrp_mask & MRP_RX_RINGS) 7171 vmrp->mrp_mask |= MRP_RX_RINGS; 7172 else if (vmrp->mrp_mask & MRP_RX_RINGS) 7173 vmrp->mrp_mask &= ~MRP_RX_RINGS; 7174 7175 vmrp->mrp_ntxrings = mrp->mrp_ntxrings; 7176 if (mrp->mrp_mask & MRP_TX_RINGS) 7177 vmrp->mrp_mask |= MRP_TX_RINGS; 7178 else if (vmrp->mrp_mask & MRP_TX_RINGS) 7179 vmrp->mrp_mask &= ~MRP_TX_RINGS; 7180 7181 if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) 7182 vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; 7183 else 7184 vmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; 7185 7186 if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) 7187 vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; 7188 else 7189 vmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; 7190 } 7191 } 7192 7193 /* 7194 * We are adding or removing ring(s) from a group. The source for taking 7195 * rings is the default group. The destination for giving rings back is 7196 * the default group. 7197 */ 7198 int 7199 mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, 7200 mac_group_t *defgrp) 7201 { 7202 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7203 uint_t modify; 7204 int count; 7205 mac_ring_t *ring; 7206 mac_ring_t *next; 7207 mac_impl_t *mip = mcip->mci_mip; 7208 mac_ring_t **rings; 7209 uint_t ringcnt; 7210 int i = 0; 7211 boolean_t rx_group = group->mrg_type == MAC_RING_TYPE_RX; 7212 int start; 7213 int end; 7214 mac_group_t *tgrp; 7215 int j; 7216 int rv = 0; 7217 7218 /* 7219 * If we are asked for just a group, we give 1 ring, else 7220 * the specified number of rings. 7221 */ 7222 if (rx_group) { 7223 ringcnt = (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) ? 1: 7224 mrp->mrp_nrxrings; 7225 } else { 7226 ringcnt = (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) ? 1: 7227 mrp->mrp_ntxrings; 7228 } 7229 7230 /* don't allow modifying rings for a share for now. */ 7231 ASSERT(mcip->mci_share == NULL); 7232 7233 if (ringcnt == group->mrg_cur_count) 7234 return (0); 7235 7236 if (group->mrg_cur_count > ringcnt) { 7237 modify = group->mrg_cur_count - ringcnt; 7238 if (rx_group) { 7239 if (mip->mi_rx_donor_grp == group) { 7240 ASSERT(mac_is_primary_client(mcip)); 7241 mip->mi_rx_donor_grp = defgrp; 7242 } else { 7243 defgrp = mip->mi_rx_donor_grp; 7244 } 7245 } 7246 ring = group->mrg_rings; 7247 rings = kmem_alloc(modify * sizeof (mac_ring_handle_t), 7248 KM_SLEEP); 7249 j = 0; 7250 for (count = 0; count < modify; count++) { 7251 next = ring->mr_next; 7252 rv = mac_group_mov_ring(mip, defgrp, ring); 7253 if (rv != 0) { 7254 /* cleanup on failure */ 7255 for (j = 0; j < count; j++) { 7256 (void) mac_group_mov_ring(mip, group, 7257 rings[j]); 7258 } 7259 break; 7260 } 7261 rings[j++] = ring; 7262 ring = next; 7263 } 7264 kmem_free(rings, modify * sizeof (mac_ring_handle_t)); 7265 return (rv); 7266 } 7267 if (ringcnt >= MAX_RINGS_PER_GROUP) 7268 return (EINVAL); 7269 7270 modify = ringcnt - group->mrg_cur_count; 7271 7272 if (rx_group) { 7273 if (group != mip->mi_rx_donor_grp) 7274 defgrp = mip->mi_rx_donor_grp; 7275 else 7276 /* 7277 * This is the donor group with all the remaining 7278 * rings. Default group now gets to be the donor 7279 */ 7280 mip->mi_rx_donor_grp = defgrp; 7281 start = 1; 7282 end = mip->mi_rx_group_count; 7283 } else { 7284 start = 0; 7285 end = mip->mi_tx_group_count - 1; 7286 } 7287 /* 7288 * If the default doesn't have any rings, lets see if we can 7289 * take rings given to an h/w client that doesn't need it. 7290 * For now, we just see if there is any one client that can donate 7291 * all the required rings. 7292 */ 7293 if (defgrp->mrg_cur_count < (modify + 1)) { 7294 for (i = start; i < end; i++) { 7295 if (rx_group) { 7296 tgrp = &mip->mi_rx_groups[i]; 7297 if (tgrp == group || tgrp->mrg_state < 7298 MAC_GROUP_STATE_RESERVED) { 7299 continue; 7300 } 7301 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 7302 if (mcip == NULL) 7303 mcip = mac_get_grp_primary(tgrp); 7304 ASSERT(mcip != NULL); 7305 mrp = MCIP_RESOURCE_PROPS(mcip); 7306 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) 7307 continue; 7308 if ((tgrp->mrg_cur_count + 7309 defgrp->mrg_cur_count) < (modify + 1)) { 7310 continue; 7311 } 7312 if (mac_rx_switch_group(mcip, tgrp, 7313 defgrp) != 0) { 7314 return (ENOSPC); 7315 } 7316 } else { 7317 tgrp = &mip->mi_tx_groups[i]; 7318 if (tgrp == group || tgrp->mrg_state < 7319 MAC_GROUP_STATE_RESERVED) { 7320 continue; 7321 } 7322 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 7323 if (mcip == NULL) 7324 mcip = mac_get_grp_primary(tgrp); 7325 mrp = MCIP_RESOURCE_PROPS(mcip); 7326 if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) 7327 continue; 7328 if ((tgrp->mrg_cur_count + 7329 defgrp->mrg_cur_count) < (modify + 1)) { 7330 continue; 7331 } 7332 /* OK, we can switch this to s/w */ 7333 mac_tx_client_quiesce( 7334 (mac_client_handle_t)mcip); 7335 mac_tx_switch_group(mcip, tgrp, defgrp); 7336 mac_tx_client_restart( 7337 (mac_client_handle_t)mcip); 7338 } 7339 } 7340 if (defgrp->mrg_cur_count < (modify + 1)) 7341 return (ENOSPC); 7342 } 7343 if ((rv = i_mac_group_allocate_rings(mip, group->mrg_type, defgrp, 7344 group, mcip->mci_share, modify)) != 0) { 7345 return (rv); 7346 } 7347 return (0); 7348 } 7349 7350 /* 7351 * Given the poolname in mac_resource_props, find the cpupart 7352 * that is associated with this pool. The cpupart will be used 7353 * later for finding the cpus to be bound to the networking threads. 7354 * 7355 * use_default is set B_TRUE if pools are enabled and pool_default 7356 * is returned. This avoids a 2nd lookup to set the poolname 7357 * for pool-effective. 7358 * 7359 * returns: 7360 * 7361 * NULL - pools are disabled or if the 'cpus' property is set. 7362 * cpupart of pool_default - pools are enabled and the pool 7363 * is not available or poolname is blank 7364 * cpupart of named pool - pools are enabled and the pool 7365 * is available. 7366 */ 7367 cpupart_t * 7368 mac_pset_find(mac_resource_props_t *mrp, boolean_t *use_default) 7369 { 7370 pool_t *pool; 7371 cpupart_t *cpupart; 7372 7373 *use_default = B_FALSE; 7374 7375 /* CPUs property is set */ 7376 if (mrp->mrp_mask & MRP_CPUS) 7377 return (NULL); 7378 7379 ASSERT(pool_lock_held()); 7380 7381 /* Pools are disabled, no pset */ 7382 if (pool_state == POOL_DISABLED) 7383 return (NULL); 7384 7385 /* Pools property is set */ 7386 if (mrp->mrp_mask & MRP_POOL) { 7387 if ((pool = pool_lookup_pool_by_name(mrp->mrp_pool)) == NULL) { 7388 /* Pool not found */ 7389 DTRACE_PROBE1(mac_pset_find_no_pool, char *, 7390 mrp->mrp_pool); 7391 *use_default = B_TRUE; 7392 pool = pool_default; 7393 } 7394 /* Pools property is not set */ 7395 } else { 7396 *use_default = B_TRUE; 7397 pool = pool_default; 7398 } 7399 7400 /* Find the CPU pset that corresponds to the pool */ 7401 mutex_enter(&cpu_lock); 7402 if ((cpupart = cpupart_find(pool->pool_pset->pset_id)) == NULL) { 7403 DTRACE_PROBE1(mac_find_pset_no_pset, psetid_t, 7404 pool->pool_pset->pset_id); 7405 } 7406 mutex_exit(&cpu_lock); 7407 7408 return (cpupart); 7409 } 7410 7411 void 7412 mac_set_pool_effective(boolean_t use_default, cpupart_t *cpupart, 7413 mac_resource_props_t *mrp, mac_resource_props_t *emrp) 7414 { 7415 ASSERT(pool_lock_held()); 7416 7417 if (cpupart != NULL) { 7418 emrp->mrp_mask |= MRP_POOL; 7419 if (use_default) { 7420 (void) strcpy(emrp->mrp_pool, 7421 "pool_default"); 7422 } else { 7423 ASSERT(strlen(mrp->mrp_pool) != 0); 7424 (void) strcpy(emrp->mrp_pool, 7425 mrp->mrp_pool); 7426 } 7427 } else { 7428 emrp->mrp_mask &= ~MRP_POOL; 7429 bzero(emrp->mrp_pool, MAXPATHLEN); 7430 } 7431 } 7432 7433 struct mac_pool_arg { 7434 char mpa_poolname[MAXPATHLEN]; 7435 pool_event_t mpa_what; 7436 }; 7437 7438 /*ARGSUSED*/ 7439 static uint_t 7440 mac_pool_link_update(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 7441 { 7442 struct mac_pool_arg *mpa = arg; 7443 mac_impl_t *mip = (mac_impl_t *)val; 7444 mac_client_impl_t *mcip; 7445 mac_resource_props_t *mrp, *emrp; 7446 boolean_t pool_update = B_FALSE; 7447 boolean_t pool_clear = B_FALSE; 7448 boolean_t use_default = B_FALSE; 7449 cpupart_t *cpupart = NULL; 7450 7451 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 7452 i_mac_perim_enter(mip); 7453 for (mcip = mip->mi_clients_list; mcip != NULL; 7454 mcip = mcip->mci_client_next) { 7455 pool_update = B_FALSE; 7456 pool_clear = B_FALSE; 7457 use_default = B_FALSE; 7458 mac_client_get_resources((mac_client_handle_t)mcip, mrp); 7459 emrp = MCIP_EFFECTIVE_PROPS(mcip); 7460 7461 /* 7462 * When pools are enabled 7463 */ 7464 if ((mpa->mpa_what == POOL_E_ENABLE) && 7465 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 7466 mrp->mrp_mask |= MRP_POOL; 7467 pool_update = B_TRUE; 7468 } 7469 7470 /* 7471 * When pools are disabled 7472 */ 7473 if ((mpa->mpa_what == POOL_E_DISABLE) && 7474 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 7475 mrp->mrp_mask |= MRP_POOL; 7476 pool_clear = B_TRUE; 7477 } 7478 7479 /* 7480 * Look for links with the pool property set and the poolname 7481 * matching the one which is changing. 7482 */ 7483 if (strcmp(mrp->mrp_pool, mpa->mpa_poolname) == 0) { 7484 /* 7485 * The pool associated with the link has changed. 7486 */ 7487 if (mpa->mpa_what == POOL_E_CHANGE) { 7488 mrp->mrp_mask |= MRP_POOL; 7489 pool_update = B_TRUE; 7490 } 7491 } 7492 7493 /* 7494 * This link is associated with pool_default and 7495 * pool_default has changed. 7496 */ 7497 if ((mpa->mpa_what == POOL_E_CHANGE) && 7498 (strcmp(emrp->mrp_pool, "pool_default") == 0) && 7499 (strcmp(mpa->mpa_poolname, "pool_default") == 0)) { 7500 mrp->mrp_mask |= MRP_POOL; 7501 pool_update = B_TRUE; 7502 } 7503 7504 /* 7505 * Get new list of cpus for the pool, bind network 7506 * threads to new list of cpus and update resources. 7507 */ 7508 if (pool_update) { 7509 if (MCIP_DATAPATH_SETUP(mcip)) { 7510 pool_lock(); 7511 cpupart = mac_pset_find(mrp, &use_default); 7512 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 7513 mac_rx_deliver, mcip, NULL, cpupart); 7514 mac_set_pool_effective(use_default, cpupart, 7515 mrp, emrp); 7516 pool_unlock(); 7517 } 7518 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 7519 B_FALSE); 7520 } 7521 7522 /* 7523 * Clear the effective pool and bind network threads 7524 * to any available CPU. 7525 */ 7526 if (pool_clear) { 7527 if (MCIP_DATAPATH_SETUP(mcip)) { 7528 emrp->mrp_mask &= ~MRP_POOL; 7529 bzero(emrp->mrp_pool, MAXPATHLEN); 7530 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 7531 mac_rx_deliver, mcip, NULL, NULL); 7532 } 7533 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 7534 B_FALSE); 7535 } 7536 } 7537 i_mac_perim_exit(mip); 7538 kmem_free(mrp, sizeof (*mrp)); 7539 return (MH_WALK_CONTINUE); 7540 } 7541 7542 static void 7543 mac_pool_update(void *arg) 7544 { 7545 mod_hash_walk(i_mac_impl_hash, mac_pool_link_update, arg); 7546 kmem_free(arg, sizeof (struct mac_pool_arg)); 7547 } 7548 7549 /* 7550 * Callback function to be executed when a noteworthy pool event 7551 * takes place. 7552 */ 7553 /* ARGSUSED */ 7554 static void 7555 mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) 7556 { 7557 pool_t *pool; 7558 char *poolname = NULL; 7559 struct mac_pool_arg *mpa; 7560 7561 pool_lock(); 7562 mpa = kmem_zalloc(sizeof (struct mac_pool_arg), KM_SLEEP); 7563 7564 switch (what) { 7565 case POOL_E_ENABLE: 7566 case POOL_E_DISABLE: 7567 break; 7568 7569 case POOL_E_CHANGE: 7570 pool = pool_lookup_pool_by_id(id); 7571 if (pool == NULL) { 7572 kmem_free(mpa, sizeof (struct mac_pool_arg)); 7573 pool_unlock(); 7574 return; 7575 } 7576 pool_get_name(pool, &poolname); 7577 (void) strlcpy(mpa->mpa_poolname, poolname, 7578 sizeof (mpa->mpa_poolname)); 7579 break; 7580 7581 default: 7582 kmem_free(mpa, sizeof (struct mac_pool_arg)); 7583 pool_unlock(); 7584 return; 7585 } 7586 pool_unlock(); 7587 7588 mpa->mpa_what = what; 7589 7590 mac_pool_update(mpa); 7591 } 7592 7593 /* 7594 * Set effective rings property. This could be called from datapath_setup/ 7595 * datapath_teardown or set-linkprop. 7596 * If the group is reserved we just go ahead and set the effective rings. 7597 * Additionally, for TX this could mean the default group has lost/gained 7598 * some rings, so if the default group is reserved, we need to adjust the 7599 * effective rings for the default group clients. For RX, if we are working 7600 * with the non-default group, we just need * to reset the effective props 7601 * for the default group clients. 7602 */ 7603 void 7604 mac_set_rings_effective(mac_client_impl_t *mcip) 7605 { 7606 mac_impl_t *mip = mcip->mci_mip; 7607 mac_group_t *grp; 7608 mac_group_t *defgrp; 7609 flow_entry_t *flent = mcip->mci_flent; 7610 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); 7611 mac_grp_client_t *mgcp; 7612 mac_client_impl_t *gmcip; 7613 7614 grp = flent->fe_rx_ring_group; 7615 if (grp != NULL) { 7616 defgrp = MAC_DEFAULT_RX_GROUP(mip); 7617 /* 7618 * If we have reserved a group, set the effective rings 7619 * to the ring count in the group. 7620 */ 7621 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7622 emrp->mrp_mask |= MRP_RX_RINGS; 7623 emrp->mrp_nrxrings = grp->mrg_cur_count; 7624 } 7625 7626 /* 7627 * We go through the clients in the shared group and 7628 * reset the effective properties. It is possible this 7629 * might have already been done for some client (i.e. 7630 * if some client is being moved to a group that is 7631 * already shared). The case where the default group is 7632 * RESERVED is taken care of above (note in the RX side if 7633 * there is a non-default group, the default group is always 7634 * SHARED). 7635 */ 7636 if (grp != defgrp || grp->mrg_state == MAC_GROUP_STATE_SHARED) { 7637 if (grp->mrg_state == MAC_GROUP_STATE_SHARED) 7638 mgcp = grp->mrg_clients; 7639 else 7640 mgcp = defgrp->mrg_clients; 7641 while (mgcp != NULL) { 7642 gmcip = mgcp->mgc_client; 7643 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7644 if (emrp->mrp_mask & MRP_RX_RINGS) { 7645 emrp->mrp_mask &= ~MRP_RX_RINGS; 7646 emrp->mrp_nrxrings = 0; 7647 } 7648 mgcp = mgcp->mgc_next; 7649 } 7650 } 7651 } 7652 7653 /* Now the TX side */ 7654 grp = flent->fe_tx_ring_group; 7655 if (grp != NULL) { 7656 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7657 7658 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7659 emrp->mrp_mask |= MRP_TX_RINGS; 7660 emrp->mrp_ntxrings = grp->mrg_cur_count; 7661 } else if (grp->mrg_state == MAC_GROUP_STATE_SHARED) { 7662 mgcp = grp->mrg_clients; 7663 while (mgcp != NULL) { 7664 gmcip = mgcp->mgc_client; 7665 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7666 if (emrp->mrp_mask & MRP_TX_RINGS) { 7667 emrp->mrp_mask &= ~MRP_TX_RINGS; 7668 emrp->mrp_ntxrings = 0; 7669 } 7670 mgcp = mgcp->mgc_next; 7671 } 7672 } 7673 7674 /* 7675 * If the group is not the default group and the default 7676 * group is reserved, the ring count in the default group 7677 * might have changed, update it. 7678 */ 7679 if (grp != defgrp && 7680 defgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7681 gmcip = MAC_GROUP_ONLY_CLIENT(defgrp); 7682 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7683 emrp->mrp_ntxrings = defgrp->mrg_cur_count; 7684 } 7685 } 7686 emrp = MCIP_EFFECTIVE_PROPS(mcip); 7687 } 7688 7689 /* 7690 * Check if the primary is in the default group. If so, see if we 7691 * can give it a an exclusive group now that another client is 7692 * being configured. We take the primary out of the default group 7693 * because the multicast/broadcast packets for the all the clients 7694 * will land in the default ring in the default group which means 7695 * any client in the default group, even if it is the only on in 7696 * the group, will lose exclusive access to the rings, hence 7697 * polling. 7698 */ 7699 mac_client_impl_t * 7700 mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) 7701 { 7702 mac_impl_t *mip = mcip->mci_mip; 7703 mac_group_t *defgrp = MAC_DEFAULT_RX_GROUP(mip); 7704 flow_entry_t *flent = mcip->mci_flent; 7705 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7706 uint8_t *mac_addr; 7707 mac_group_t *ngrp; 7708 7709 /* 7710 * Check if the primary is in the default group, if not 7711 * or if it is explicitly configured to be in the default 7712 * group OR set the RX rings property, return. 7713 */ 7714 if (flent->fe_rx_ring_group != defgrp || mrp->mrp_mask & MRP_RX_RINGS) 7715 return (NULL); 7716 7717 /* 7718 * If the new client needs an exclusive group and we 7719 * don't have another for the primary, return. 7720 */ 7721 if (rxhw && mip->mi_rxhwclnt_avail < 2) 7722 return (NULL); 7723 7724 mac_addr = flent->fe_flow_desc.fd_dst_mac; 7725 /* 7726 * We call this when we are setting up the datapath for 7727 * the first non-primary. 7728 */ 7729 ASSERT(mip->mi_nactiveclients == 2); 7730 /* 7731 * OK, now we have the primary that needs to be relocated. 7732 */ 7733 ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); 7734 if (ngrp == NULL) 7735 return (NULL); 7736 if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { 7737 mac_stop_group(ngrp); 7738 return (NULL); 7739 } 7740 return (mcip); 7741 } 7742