1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2015 Joyent, Inc. 25 */ 26 27 /* 28 * MAC Services Module 29 * 30 * The GLDv3 framework locking - The MAC layer 31 * -------------------------------------------- 32 * 33 * The MAC layer is central to the GLD framework and can provide the locking 34 * framework needed for itself and for the use of MAC clients. MAC end points 35 * are fairly disjoint and don't share a lot of state. So a coarse grained 36 * multi-threading scheme is to single thread all create/modify/delete or set 37 * type of control operations on a per mac end point while allowing data threads 38 * concurrently. 39 * 40 * Control operations (set) that modify a mac end point are always serialized on 41 * a per mac end point basis, We have at most 1 such thread per mac end point 42 * at a time. 43 * 44 * All other operations that are not serialized are essentially multi-threaded. 45 * For example a control operation (get) like getting statistics which may not 46 * care about reading values atomically or data threads sending or receiving 47 * data. Mostly these type of operations don't modify the control state. Any 48 * state these operations care about are protected using traditional locks. 49 * 50 * The perimeter only serializes serial operations. It does not imply there 51 * aren't any other concurrent operations. However a serialized operation may 52 * sometimes need to make sure it is the only thread. In this case it needs 53 * to use reference counting mechanisms to cv_wait until any current data 54 * threads are done. 55 * 56 * The mac layer itself does not hold any locks across a call to another layer. 57 * The perimeter is however held across a down call to the driver to make the 58 * whole control operation atomic with respect to other control operations. 59 * Also the data path and get type control operations may proceed concurrently. 60 * These operations synchronize with the single serial operation on a given mac 61 * end point using regular locks. The perimeter ensures that conflicting 62 * operations like say a mac_multicast_add and a mac_multicast_remove on the 63 * same mac end point don't interfere with each other and also ensures that the 64 * changes in the mac layer and the call to the underlying driver to say add a 65 * multicast address are done atomically without interference from a thread 66 * trying to delete the same address. 67 * 68 * For example, consider 69 * mac_multicst_add() 70 * { 71 * mac_perimeter_enter(); serialize all control operations 72 * 73 * grab list lock protect against access by data threads 74 * add to list 75 * drop list lock 76 * 77 * call driver's mi_multicst 78 * 79 * mac_perimeter_exit(); 80 * } 81 * 82 * To lessen the number of serialization locks and simplify the lock hierarchy, 83 * we serialize all the control operations on a per mac end point by using a 84 * single serialization lock called the perimeter. We allow recursive entry into 85 * the perimeter to facilitate use of this mechanism by both the mac client and 86 * the MAC layer itself. 87 * 88 * MAC client means an entity that does an operation on a mac handle 89 * obtained from a mac_open/mac_client_open. Similarly MAC driver means 90 * an entity that does an operation on a mac handle obtained from a 91 * mac_register. An entity could be both client and driver but on different 92 * handles eg. aggr. and should only make the corresponding mac interface calls 93 * i.e. mac driver interface or mac client interface as appropriate for that 94 * mac handle. 95 * 96 * General rules. 97 * ------------- 98 * 99 * R1. The lock order of upcall threads is natually opposite to downcall 100 * threads. Hence upcalls must not hold any locks across layers for fear of 101 * recursive lock enter and lock order violation. This applies to all layers. 102 * 103 * R2. The perimeter is just another lock. Since it is held in the down 104 * direction, acquiring the perimeter in an upcall is prohibited as it would 105 * cause a deadlock. This applies to all layers. 106 * 107 * Note that upcalls that need to grab the mac perimeter (for example 108 * mac_notify upcalls) can still achieve that by posting the request to a 109 * thread, which can then grab all the required perimeters and locks in the 110 * right global order. Note that in the above example the mac layer iself 111 * won't grab the mac perimeter in the mac_notify upcall, instead the upcall 112 * to the client must do that. Please see the aggr code for an example. 113 * 114 * MAC client rules 115 * ---------------- 116 * 117 * R3. A MAC client may use the MAC provided perimeter facility to serialize 118 * control operations on a per mac end point. It does this by by acquring 119 * and holding the perimeter across a sequence of calls to the mac layer. 120 * This ensures atomicity across the entire block of mac calls. In this 121 * model the MAC client must not hold any client locks across the calls to 122 * the mac layer. This model is the preferred solution. 123 * 124 * R4. However if a MAC client has a lot of global state across all mac end 125 * points the per mac end point serialization may not be sufficient. In this 126 * case the client may choose to use global locks or use its own serialization. 127 * To avoid deadlocks, these client layer locks held across the mac calls 128 * in the control path must never be acquired by the data path for the reason 129 * mentioned below. 130 * 131 * (Assume that a control operation that holds a client lock blocks in the 132 * mac layer waiting for upcall reference counts to drop to zero. If an upcall 133 * data thread that holds this reference count, tries to acquire the same 134 * client lock subsequently it will deadlock). 135 * 136 * A MAC client may follow either the R3 model or the R4 model, but can't 137 * mix both. In the former, the hierarchy is Perim -> client locks, but in 138 * the latter it is client locks -> Perim. 139 * 140 * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able 141 * context since they may block while trying to acquire the perimeter. 142 * In addition some calls may block waiting for upcall refcnts to come down to 143 * zero. 144 * 145 * R6. MAC clients must make sure that they are single threaded and all threads 146 * from the top (in particular data threads) have finished before calling 147 * mac_client_close. The MAC framework does not track the number of client 148 * threads using the mac client handle. Also mac clients must make sure 149 * they have undone all the control operations before calling mac_client_close. 150 * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding 151 * mac_unicast_add/mac_multicast_add. 152 * 153 * MAC framework rules 154 * ------------------- 155 * 156 * R7. The mac layer itself must not hold any mac layer locks (except the mac 157 * perimeter) across a call to any other layer from the mac layer. The call to 158 * any other layer could be via mi_* entry points, classifier entry points into 159 * the driver or via upcall pointers into layers above. The mac perimeter may 160 * be acquired or held only in the down direction, for e.g. when calling into 161 * a mi_* driver enty point to provide atomicity of the operation. 162 * 163 * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across 164 * mac driver interfaces, the MAC layer must provide a cut out for control 165 * interfaces like upcall notifications and start them in a separate thread. 166 * 167 * R9. Note that locking order also implies a plumbing order. For example 168 * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt 169 * to plumb in any other order must be failed at mac_open time, otherwise it 170 * could lead to deadlocks due to inverse locking order. 171 * 172 * R10. MAC driver interfaces must not block since the driver could call them 173 * in interrupt context. 174 * 175 * R11. Walkers must preferably not hold any locks while calling walker 176 * callbacks. Instead these can operate on reference counts. In simple 177 * callbacks it may be ok to hold a lock and call the callbacks, but this is 178 * harder to maintain in the general case of arbitrary callbacks. 179 * 180 * R12. The MAC layer must protect upcall notification callbacks using reference 181 * counts rather than holding locks across the callbacks. 182 * 183 * R13. Given the variety of drivers, it is preferable if the MAC layer can make 184 * sure that any pointers (such as mac ring pointers) it passes to the driver 185 * remain valid until mac unregister time. Currently the mac layer achieves 186 * this by using generation numbers for rings and freeing the mac rings only 187 * at unregister time. The MAC layer must provide a layer of indirection and 188 * must not expose underlying driver rings or driver data structures/pointers 189 * directly to MAC clients. 190 * 191 * MAC driver rules 192 * ---------------- 193 * 194 * R14. It would be preferable if MAC drivers don't hold any locks across any 195 * mac call. However at a minimum they must not hold any locks across data 196 * upcalls. They must also make sure that all references to mac data structures 197 * are cleaned up and that it is single threaded at mac_unregister time. 198 * 199 * R15. MAC driver interfaces don't block and so the action may be done 200 * asynchronously in a separate thread as for example handling notifications. 201 * The driver must not assume that the action is complete when the call 202 * returns. 203 * 204 * R16. Drivers must maintain a generation number per Rx ring, and pass it 205 * back to mac_rx_ring(); They are expected to increment the generation 206 * number whenever the ring's stop routine is invoked. 207 * See comments in mac_rx_ring(); 208 * 209 * R17 Similarly mi_stop is another synchronization point and the driver must 210 * ensure that all upcalls are done and there won't be any future upcall 211 * before returning from mi_stop. 212 * 213 * R18. The driver may assume that all set/modify control operations via 214 * the mi_* entry points are single threaded on a per mac end point. 215 * 216 * Lock and Perimeter hierarchy scenarios 217 * --------------------------------------- 218 * 219 * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] 220 * 221 * ft_lock -> fe_lock [mac_flow_lookup] 222 * 223 * mi_rw_lock -> fe_lock [mac_bcast_send] 224 * 225 * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] 226 * 227 * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] 228 * 229 * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] 230 * 231 * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac 232 * client to driver. In the case of clients that explictly use the mac provided 233 * perimeter mechanism for its serialization, the hierarchy is 234 * Perimeter -> mac layer locks, since the client never holds any locks across 235 * the mac calls. In the case of clients that use its own locks the hierarchy 236 * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly 237 * calls mac_perim_enter/exit in this case. 238 * 239 * Subflow creation rules 240 * --------------------------- 241 * o In case of a user specified cpulist present on underlying link and flows, 242 * the flows cpulist must be a subset of the underlying link. 243 * o In case of a user specified fanout mode present on link and flow, the 244 * subflow fanout count has to be less than or equal to that of the 245 * underlying link. The cpu-bindings for the subflows will be a subset of 246 * the underlying link. 247 * o In case if no cpulist specified on both underlying link and flow, the 248 * underlying link relies on a MAC tunable to provide out of box fanout. 249 * The subflow will have no cpulist (the subflow will be unbound) 250 * o In case if no cpulist is specified on the underlying link, a subflow can 251 * carry either a user-specified cpulist or fanout count. The cpu-bindings 252 * for the subflow will not adhere to restriction that they need to be subset 253 * of the underlying link. 254 * o In case where the underlying link is carrying either a user specified 255 * cpulist or fanout mode and for a unspecified subflow, the subflow will be 256 * created unbound. 257 * o While creating unbound subflows, bandwidth mode changes attempt to 258 * figure a right fanout count. In such cases the fanout count will override 259 * the unbound cpu-binding behavior. 260 * o In addition to this, while cycling between flow and link properties, we 261 * impose a restriction that if a link property has a subflow with 262 * user-specified attributes, we will not allow changing the link property. 263 * The administrator needs to reset all the user specified properties for the 264 * subflows before attempting a link property change. 265 * Some of the above rules can be overridden by specifying additional command 266 * line options while creating or modifying link or subflow properties. 267 * 268 * Datapath 269 * -------- 270 * 271 * For information on the datapath, the world of soft rings, hardware rings, how 272 * it is structured, and the path of an mblk_t between a driver and a mac 273 * client, see mac_sched.c. 274 */ 275 276 #include <sys/types.h> 277 #include <sys/conf.h> 278 #include <sys/id_space.h> 279 #include <sys/esunddi.h> 280 #include <sys/stat.h> 281 #include <sys/mkdev.h> 282 #include <sys/stream.h> 283 #include <sys/strsun.h> 284 #include <sys/strsubr.h> 285 #include <sys/dlpi.h> 286 #include <sys/list.h> 287 #include <sys/modhash.h> 288 #include <sys/mac_provider.h> 289 #include <sys/mac_client_impl.h> 290 #include <sys/mac_soft_ring.h> 291 #include <sys/mac_stat.h> 292 #include <sys/mac_impl.h> 293 #include <sys/mac.h> 294 #include <sys/dls.h> 295 #include <sys/dld.h> 296 #include <sys/modctl.h> 297 #include <sys/fs/dv_node.h> 298 #include <sys/thread.h> 299 #include <sys/proc.h> 300 #include <sys/callb.h> 301 #include <sys/cpuvar.h> 302 #include <sys/atomic.h> 303 #include <sys/bitmap.h> 304 #include <sys/sdt.h> 305 #include <sys/mac_flow.h> 306 #include <sys/ddi_intr_impl.h> 307 #include <sys/disp.h> 308 #include <sys/sdt.h> 309 #include <sys/vnic.h> 310 #include <sys/vnic_impl.h> 311 #include <sys/vlan.h> 312 #include <inet/ip.h> 313 #include <inet/ip6.h> 314 #include <sys/exacct.h> 315 #include <sys/exacct_impl.h> 316 #include <inet/nd.h> 317 #include <sys/ethernet.h> 318 #include <sys/pool.h> 319 #include <sys/pool_pset.h> 320 #include <sys/cpupart.h> 321 #include <inet/wifi_ioctl.h> 322 #include <net/wpa.h> 323 324 #define IMPL_HASHSZ 67 /* prime */ 325 326 kmem_cache_t *i_mac_impl_cachep; 327 mod_hash_t *i_mac_impl_hash; 328 krwlock_t i_mac_impl_lock; 329 uint_t i_mac_impl_count; 330 static kmem_cache_t *mac_ring_cache; 331 static id_space_t *minor_ids; 332 static uint32_t minor_count; 333 static pool_event_cb_t mac_pool_event_reg; 334 335 /* 336 * Logging stuff. Perhaps mac_logging_interval could be broken into 337 * mac_flow_log_interval and mac_link_log_interval if we want to be 338 * able to schedule them differently. 339 */ 340 uint_t mac_logging_interval; 341 boolean_t mac_flow_log_enable; 342 boolean_t mac_link_log_enable; 343 timeout_id_t mac_logging_timer; 344 345 /* for debugging, see MAC_DBG_PRT() in mac_impl.h */ 346 int mac_dbg = 0; 347 348 #define MACTYPE_KMODDIR "mac" 349 #define MACTYPE_HASHSZ 67 350 static mod_hash_t *i_mactype_hash; 351 /* 352 * i_mactype_lock synchronizes threads that obtain references to mactype_t 353 * structures through i_mactype_getplugin(). 354 */ 355 static kmutex_t i_mactype_lock; 356 357 /* 358 * mac_tx_percpu_cnt 359 * 360 * Number of per cpu locks per mac_client_impl_t. Used by the transmit side 361 * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. 362 * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. 363 * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. 364 */ 365 int mac_tx_percpu_cnt; 366 int mac_tx_percpu_cnt_max = 128; 367 368 /* 369 * Call back functions for the bridge module. These are guaranteed to be valid 370 * when holding a reference on a link or when holding mip->mi_bridge_lock and 371 * mi_bridge_link is non-NULL. 372 */ 373 mac_bridge_tx_t mac_bridge_tx_cb; 374 mac_bridge_rx_t mac_bridge_rx_cb; 375 mac_bridge_ref_t mac_bridge_ref_cb; 376 mac_bridge_ls_t mac_bridge_ls_cb; 377 378 static int i_mac_constructor(void *, void *, int); 379 static void i_mac_destructor(void *, void *); 380 static int i_mac_ring_ctor(void *, void *, int); 381 static void i_mac_ring_dtor(void *, void *); 382 static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); 383 void mac_tx_client_flush(mac_client_impl_t *); 384 void mac_tx_client_block(mac_client_impl_t *); 385 static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); 386 static int mac_start_group_and_rings(mac_group_t *); 387 static void mac_stop_group_and_rings(mac_group_t *); 388 static void mac_pool_event_cb(pool_event_t, int, void *); 389 390 typedef struct netinfo_s { 391 list_node_t ni_link; 392 void *ni_record; 393 int ni_size; 394 int ni_type; 395 } netinfo_t; 396 397 /* 398 * Module initialization functions. 399 */ 400 401 void 402 mac_init(void) 403 { 404 mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : 405 boot_max_ncpus); 406 407 /* Upper bound is mac_tx_percpu_cnt_max */ 408 if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) 409 mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; 410 411 if (mac_tx_percpu_cnt < 1) { 412 /* Someone set max_tx_percpu_cnt_max to 0 or less */ 413 mac_tx_percpu_cnt = 1; 414 } 415 416 ASSERT(mac_tx_percpu_cnt >= 1); 417 mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); 418 /* 419 * Make it of the form 2**N - 1 in the range 420 * [0 .. mac_tx_percpu_cnt_max - 1] 421 */ 422 mac_tx_percpu_cnt--; 423 424 i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", 425 sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, 426 NULL, NULL, NULL, 0); 427 ASSERT(i_mac_impl_cachep != NULL); 428 429 mac_ring_cache = kmem_cache_create("mac_ring_cache", 430 sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, 431 NULL, NULL, 0); 432 ASSERT(mac_ring_cache != NULL); 433 434 i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", 435 IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 436 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 437 rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); 438 439 mac_flow_init(); 440 mac_soft_ring_init(); 441 mac_bcast_init(); 442 mac_client_init(); 443 444 i_mac_impl_count = 0; 445 446 i_mactype_hash = mod_hash_create_extended("mactype_hash", 447 MACTYPE_HASHSZ, 448 mod_hash_null_keydtor, mod_hash_null_valdtor, 449 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 450 451 /* 452 * Allocate an id space to manage minor numbers. The range of the 453 * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1. This 454 * leaves half of the 32-bit minors available for driver private use. 455 */ 456 minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, 457 MAC_PRIVATE_MINOR-1); 458 ASSERT(minor_ids != NULL); 459 minor_count = 0; 460 461 /* Let's default to 20 seconds */ 462 mac_logging_interval = 20; 463 mac_flow_log_enable = B_FALSE; 464 mac_link_log_enable = B_FALSE; 465 mac_logging_timer = 0; 466 467 /* Register to be notified of noteworthy pools events */ 468 mac_pool_event_reg.pec_func = mac_pool_event_cb; 469 mac_pool_event_reg.pec_arg = NULL; 470 pool_event_cb_register(&mac_pool_event_reg); 471 } 472 473 int 474 mac_fini(void) 475 { 476 477 if (i_mac_impl_count > 0 || minor_count > 0) 478 return (EBUSY); 479 480 pool_event_cb_unregister(&mac_pool_event_reg); 481 482 id_space_destroy(minor_ids); 483 mac_flow_fini(); 484 485 mod_hash_destroy_hash(i_mac_impl_hash); 486 rw_destroy(&i_mac_impl_lock); 487 488 mac_client_fini(); 489 kmem_cache_destroy(mac_ring_cache); 490 491 mod_hash_destroy_hash(i_mactype_hash); 492 mac_soft_ring_finish(); 493 494 495 return (0); 496 } 497 498 /* 499 * Initialize a GLDv3 driver's device ops. A driver that manages its own ops 500 * (e.g. softmac) may pass in a NULL ops argument. 501 */ 502 void 503 mac_init_ops(struct dev_ops *ops, const char *name) 504 { 505 major_t major = ddi_name_to_major((char *)name); 506 507 /* 508 * By returning on error below, we are not letting the driver continue 509 * in an undefined context. The mac_register() function will faill if 510 * DN_GLDV3_DRIVER isn't set. 511 */ 512 if (major == DDI_MAJOR_T_NONE) 513 return; 514 LOCK_DEV_OPS(&devnamesp[major].dn_lock); 515 devnamesp[major].dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER); 516 UNLOCK_DEV_OPS(&devnamesp[major].dn_lock); 517 if (ops != NULL) 518 dld_init_ops(ops, name); 519 } 520 521 void 522 mac_fini_ops(struct dev_ops *ops) 523 { 524 dld_fini_ops(ops); 525 } 526 527 /*ARGSUSED*/ 528 static int 529 i_mac_constructor(void *buf, void *arg, int kmflag) 530 { 531 mac_impl_t *mip = buf; 532 533 bzero(buf, sizeof (mac_impl_t)); 534 535 mip->mi_linkstate = LINK_STATE_UNKNOWN; 536 537 rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); 538 mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); 539 mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); 540 mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); 541 542 mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; 543 cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 544 mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; 545 cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 546 547 mutex_init(&mip->mi_bridge_lock, NULL, MUTEX_DEFAULT, NULL); 548 549 return (0); 550 } 551 552 /*ARGSUSED*/ 553 static void 554 i_mac_destructor(void *buf, void *arg) 555 { 556 mac_impl_t *mip = buf; 557 mac_cb_info_t *mcbi; 558 559 ASSERT(mip->mi_ref == 0); 560 ASSERT(mip->mi_active == 0); 561 ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); 562 ASSERT(mip->mi_devpromisc == 0); 563 ASSERT(mip->mi_ksp == NULL); 564 ASSERT(mip->mi_kstat_count == 0); 565 ASSERT(mip->mi_nclients == 0); 566 ASSERT(mip->mi_nactiveclients == 0); 567 ASSERT(mip->mi_single_active_client == NULL); 568 ASSERT(mip->mi_state_flags == 0); 569 ASSERT(mip->mi_factory_addr == NULL); 570 ASSERT(mip->mi_factory_addr_num == 0); 571 ASSERT(mip->mi_default_tx_ring == NULL); 572 573 mcbi = &mip->mi_notify_cb_info; 574 ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); 575 ASSERT(mip->mi_notify_bits == 0); 576 ASSERT(mip->mi_notify_thread == NULL); 577 ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); 578 mcbi->mcbi_lockp = NULL; 579 580 mcbi = &mip->mi_promisc_cb_info; 581 ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); 582 ASSERT(mip->mi_promisc_list == NULL); 583 ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); 584 mcbi->mcbi_lockp = NULL; 585 586 ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); 587 ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); 588 589 rw_destroy(&mip->mi_rw_lock); 590 591 mutex_destroy(&mip->mi_promisc_lock); 592 cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); 593 mutex_destroy(&mip->mi_notify_lock); 594 cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); 595 mutex_destroy(&mip->mi_ring_lock); 596 597 ASSERT(mip->mi_bridge_link == NULL); 598 } 599 600 /* ARGSUSED */ 601 static int 602 i_mac_ring_ctor(void *buf, void *arg, int kmflag) 603 { 604 mac_ring_t *ring = (mac_ring_t *)buf; 605 606 bzero(ring, sizeof (mac_ring_t)); 607 cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); 608 mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); 609 ring->mr_state = MR_FREE; 610 return (0); 611 } 612 613 /* ARGSUSED */ 614 static void 615 i_mac_ring_dtor(void *buf, void *arg) 616 { 617 mac_ring_t *ring = (mac_ring_t *)buf; 618 619 cv_destroy(&ring->mr_cv); 620 mutex_destroy(&ring->mr_lock); 621 } 622 623 /* 624 * Common functions to do mac callback addition and deletion. Currently this is 625 * used by promisc callbacks and notify callbacks. List addition and deletion 626 * need to take care of list walkers. List walkers in general, can't hold list 627 * locks and make upcall callbacks due to potential lock order and recursive 628 * reentry issues. Instead list walkers increment the list walker count to mark 629 * the presence of a walker thread. Addition can be carefully done to ensure 630 * that the list walker always sees either the old list or the new list. 631 * However the deletion can't be done while the walker is active, instead the 632 * deleting thread simply marks the entry as logically deleted. The last walker 633 * physically deletes and frees up the logically deleted entries when the walk 634 * is complete. 635 */ 636 void 637 mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 638 mac_cb_t *mcb_elem) 639 { 640 mac_cb_t *p; 641 mac_cb_t **pp; 642 643 /* Verify it is not already in the list */ 644 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 645 if (p == mcb_elem) 646 break; 647 } 648 VERIFY(p == NULL); 649 650 /* 651 * Add it to the head of the callback list. The membar ensures that 652 * the following list pointer manipulations reach global visibility 653 * in exactly the program order below. 654 */ 655 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 656 657 mcb_elem->mcb_nextp = *mcb_head; 658 membar_producer(); 659 *mcb_head = mcb_elem; 660 } 661 662 /* 663 * Mark the entry as logically deleted. If there aren't any walkers unlink 664 * from the list. In either case return the corresponding status. 665 */ 666 boolean_t 667 mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 668 mac_cb_t *mcb_elem) 669 { 670 mac_cb_t *p; 671 mac_cb_t **pp; 672 673 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 674 /* 675 * Search the callback list for the entry to be removed 676 */ 677 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 678 if (p == mcb_elem) 679 break; 680 } 681 VERIFY(p != NULL); 682 683 /* 684 * If there are walkers just mark it as deleted and the last walker 685 * will remove from the list and free it. 686 */ 687 if (mcbi->mcbi_walker_cnt != 0) { 688 p->mcb_flags |= MCB_CONDEMNED; 689 mcbi->mcbi_del_cnt++; 690 return (B_FALSE); 691 } 692 693 ASSERT(mcbi->mcbi_del_cnt == 0); 694 *pp = p->mcb_nextp; 695 p->mcb_nextp = NULL; 696 return (B_TRUE); 697 } 698 699 /* 700 * Wait for all pending callback removals to be completed 701 */ 702 void 703 mac_callback_remove_wait(mac_cb_info_t *mcbi) 704 { 705 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 706 while (mcbi->mcbi_del_cnt != 0) { 707 DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); 708 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 709 } 710 } 711 712 /* 713 * The last mac callback walker does the cleanup. Walk the list and unlik 714 * all the logically deleted entries and construct a temporary list of 715 * removed entries. Return the list of removed entries to the caller. 716 */ 717 mac_cb_t * 718 mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) 719 { 720 mac_cb_t *p; 721 mac_cb_t **pp; 722 mac_cb_t *rmlist = NULL; /* List of removed elements */ 723 int cnt = 0; 724 725 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 726 ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); 727 728 pp = mcb_head; 729 while (*pp != NULL) { 730 if ((*pp)->mcb_flags & MCB_CONDEMNED) { 731 p = *pp; 732 *pp = p->mcb_nextp; 733 p->mcb_nextp = rmlist; 734 rmlist = p; 735 cnt++; 736 continue; 737 } 738 pp = &(*pp)->mcb_nextp; 739 } 740 741 ASSERT(mcbi->mcbi_del_cnt == cnt); 742 mcbi->mcbi_del_cnt = 0; 743 return (rmlist); 744 } 745 746 boolean_t 747 mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 748 { 749 mac_cb_t *mcb; 750 751 /* Verify it is not already in the list */ 752 for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { 753 if (mcb == mcb_elem) 754 return (B_TRUE); 755 } 756 757 return (B_FALSE); 758 } 759 760 boolean_t 761 mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 762 { 763 boolean_t found; 764 765 mutex_enter(mcbi->mcbi_lockp); 766 found = mac_callback_lookup(mcb_headp, mcb_elem); 767 mutex_exit(mcbi->mcbi_lockp); 768 769 return (found); 770 } 771 772 /* Free the list of removed callbacks */ 773 void 774 mac_callback_free(mac_cb_t *rmlist) 775 { 776 mac_cb_t *mcb; 777 mac_cb_t *mcb_next; 778 779 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 780 mcb_next = mcb->mcb_nextp; 781 kmem_free(mcb->mcb_objp, mcb->mcb_objsize); 782 } 783 } 784 785 /* 786 * The promisc callbacks are in 2 lists, one off the 'mip' and another off the 787 * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there 788 * is only a single shared total walker count, and an entry can't be physically 789 * unlinked if a walker is active on either list. The last walker does this 790 * cleanup of logically deleted entries. 791 */ 792 void 793 i_mac_promisc_walker_cleanup(mac_impl_t *mip) 794 { 795 mac_cb_t *rmlist; 796 mac_cb_t *mcb; 797 mac_cb_t *mcb_next; 798 mac_promisc_impl_t *mpip; 799 800 /* 801 * Construct a temporary list of deleted callbacks by walking the 802 * the mi_promisc_list. Then for each entry in the temporary list, 803 * remove it from the mci_promisc_list and free the entry. 804 */ 805 rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info, 806 &mip->mi_promisc_list); 807 808 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 809 mcb_next = mcb->mcb_nextp; 810 mpip = (mac_promisc_impl_t *)mcb->mcb_objp; 811 VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info, 812 &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link)); 813 mcb->mcb_flags = 0; 814 mcb->mcb_nextp = NULL; 815 kmem_cache_free(mac_promisc_impl_cache, mpip); 816 } 817 } 818 819 void 820 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) 821 { 822 mac_cb_info_t *mcbi; 823 824 /* 825 * Signal the notify thread even after mi_ref has become zero and 826 * mi_disabled is set. The synchronization with the notify thread 827 * happens in mac_unregister and that implies the driver must make 828 * sure it is single-threaded (with respect to mac calls) and that 829 * all pending mac calls have returned before it calls mac_unregister 830 */ 831 rw_enter(&i_mac_impl_lock, RW_READER); 832 if (mip->mi_state_flags & MIS_DISABLED) 833 goto exit; 834 835 /* 836 * Guard against incorrect notifications. (Running a newer 837 * mac client against an older implementation?) 838 */ 839 if (type >= MAC_NNOTE) 840 goto exit; 841 842 mcbi = &mip->mi_notify_cb_info; 843 mutex_enter(mcbi->mcbi_lockp); 844 mip->mi_notify_bits |= (1 << type); 845 cv_broadcast(&mcbi->mcbi_cv); 846 mutex_exit(mcbi->mcbi_lockp); 847 848 exit: 849 rw_exit(&i_mac_impl_lock); 850 } 851 852 /* 853 * Mac serialization primitives. Please see the block comment at the 854 * top of the file. 855 */ 856 void 857 i_mac_perim_enter(mac_impl_t *mip) 858 { 859 mac_client_impl_t *mcip; 860 861 if (mip->mi_state_flags & MIS_IS_VNIC) { 862 /* 863 * This is a VNIC. Return the lower mac since that is what 864 * we want to serialize on. 865 */ 866 mcip = mac_vnic_lower(mip); 867 mip = mcip->mci_mip; 868 } 869 870 mutex_enter(&mip->mi_perim_lock); 871 if (mip->mi_perim_owner == curthread) { 872 mip->mi_perim_ocnt++; 873 mutex_exit(&mip->mi_perim_lock); 874 return; 875 } 876 877 while (mip->mi_perim_owner != NULL) 878 cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); 879 880 mip->mi_perim_owner = curthread; 881 ASSERT(mip->mi_perim_ocnt == 0); 882 mip->mi_perim_ocnt++; 883 #ifdef DEBUG 884 mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, 885 MAC_PERIM_STACK_DEPTH); 886 #endif 887 mutex_exit(&mip->mi_perim_lock); 888 } 889 890 int 891 i_mac_perim_enter_nowait(mac_impl_t *mip) 892 { 893 /* 894 * The vnic is a special case, since the serialization is done based 895 * on the lower mac. If the lower mac is busy, it does not imply the 896 * vnic can't be unregistered. But in the case of other drivers, 897 * a busy perimeter or open mac handles implies that the mac is busy 898 * and can't be unregistered. 899 */ 900 if (mip->mi_state_flags & MIS_IS_VNIC) { 901 i_mac_perim_enter(mip); 902 return (0); 903 } 904 905 mutex_enter(&mip->mi_perim_lock); 906 if (mip->mi_perim_owner != NULL) { 907 mutex_exit(&mip->mi_perim_lock); 908 return (EBUSY); 909 } 910 ASSERT(mip->mi_perim_ocnt == 0); 911 mip->mi_perim_owner = curthread; 912 mip->mi_perim_ocnt++; 913 mutex_exit(&mip->mi_perim_lock); 914 915 return (0); 916 } 917 918 void 919 i_mac_perim_exit(mac_impl_t *mip) 920 { 921 mac_client_impl_t *mcip; 922 923 if (mip->mi_state_flags & MIS_IS_VNIC) { 924 /* 925 * This is a VNIC. Return the lower mac since that is what 926 * we want to serialize on. 927 */ 928 mcip = mac_vnic_lower(mip); 929 mip = mcip->mci_mip; 930 } 931 932 ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); 933 934 mutex_enter(&mip->mi_perim_lock); 935 if (--mip->mi_perim_ocnt == 0) { 936 mip->mi_perim_owner = NULL; 937 cv_signal(&mip->mi_perim_cv); 938 } 939 mutex_exit(&mip->mi_perim_lock); 940 } 941 942 /* 943 * Returns whether the current thread holds the mac perimeter. Used in making 944 * assertions. 945 */ 946 boolean_t 947 mac_perim_held(mac_handle_t mh) 948 { 949 mac_impl_t *mip = (mac_impl_t *)mh; 950 mac_client_impl_t *mcip; 951 952 if (mip->mi_state_flags & MIS_IS_VNIC) { 953 /* 954 * This is a VNIC. Return the lower mac since that is what 955 * we want to serialize on. 956 */ 957 mcip = mac_vnic_lower(mip); 958 mip = mcip->mci_mip; 959 } 960 return (mip->mi_perim_owner == curthread); 961 } 962 963 /* 964 * mac client interfaces to enter the mac perimeter of a mac end point, given 965 * its mac handle, or macname or linkid. 966 */ 967 void 968 mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) 969 { 970 mac_impl_t *mip = (mac_impl_t *)mh; 971 972 i_mac_perim_enter(mip); 973 /* 974 * The mac_perim_handle_t returned encodes the 'mip' and whether a 975 * mac_open has been done internally while entering the perimeter. 976 * This information is used in mac_perim_exit 977 */ 978 MAC_ENCODE_MPH(*mphp, mip, 0); 979 } 980 981 int 982 mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) 983 { 984 int err; 985 mac_handle_t mh; 986 987 if ((err = mac_open(name, &mh)) != 0) 988 return (err); 989 990 mac_perim_enter_by_mh(mh, mphp); 991 MAC_ENCODE_MPH(*mphp, mh, 1); 992 return (0); 993 } 994 995 int 996 mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) 997 { 998 int err; 999 mac_handle_t mh; 1000 1001 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 1002 return (err); 1003 1004 mac_perim_enter_by_mh(mh, mphp); 1005 MAC_ENCODE_MPH(*mphp, mh, 1); 1006 return (0); 1007 } 1008 1009 void 1010 mac_perim_exit(mac_perim_handle_t mph) 1011 { 1012 mac_impl_t *mip; 1013 boolean_t need_close; 1014 1015 MAC_DECODE_MPH(mph, mip, need_close); 1016 i_mac_perim_exit(mip); 1017 if (need_close) 1018 mac_close((mac_handle_t)mip); 1019 } 1020 1021 int 1022 mac_hold(const char *macname, mac_impl_t **pmip) 1023 { 1024 mac_impl_t *mip; 1025 int err; 1026 1027 /* 1028 * Check the device name length to make sure it won't overflow our 1029 * buffer. 1030 */ 1031 if (strlen(macname) >= MAXNAMELEN) 1032 return (EINVAL); 1033 1034 /* 1035 * Look up its entry in the global hash table. 1036 */ 1037 rw_enter(&i_mac_impl_lock, RW_WRITER); 1038 err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, 1039 (mod_hash_val_t *)&mip); 1040 1041 if (err != 0) { 1042 rw_exit(&i_mac_impl_lock); 1043 return (ENOENT); 1044 } 1045 1046 if (mip->mi_state_flags & MIS_DISABLED) { 1047 rw_exit(&i_mac_impl_lock); 1048 return (ENOENT); 1049 } 1050 1051 if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { 1052 rw_exit(&i_mac_impl_lock); 1053 return (EBUSY); 1054 } 1055 1056 mip->mi_ref++; 1057 rw_exit(&i_mac_impl_lock); 1058 1059 *pmip = mip; 1060 return (0); 1061 } 1062 1063 void 1064 mac_rele(mac_impl_t *mip) 1065 { 1066 rw_enter(&i_mac_impl_lock, RW_WRITER); 1067 ASSERT(mip->mi_ref != 0); 1068 if (--mip->mi_ref == 0) { 1069 ASSERT(mip->mi_nactiveclients == 0 && 1070 !(mip->mi_state_flags & MIS_EXCLUSIVE)); 1071 } 1072 rw_exit(&i_mac_impl_lock); 1073 } 1074 1075 /* 1076 * Private GLDv3 function to start a MAC instance. 1077 */ 1078 int 1079 mac_start(mac_handle_t mh) 1080 { 1081 mac_impl_t *mip = (mac_impl_t *)mh; 1082 int err = 0; 1083 mac_group_t *defgrp; 1084 1085 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1086 ASSERT(mip->mi_start != NULL); 1087 1088 /* 1089 * Check whether the device is already started. 1090 */ 1091 if (mip->mi_active++ == 0) { 1092 mac_ring_t *ring = NULL; 1093 1094 /* 1095 * Start the device. 1096 */ 1097 err = mip->mi_start(mip->mi_driver); 1098 if (err != 0) { 1099 mip->mi_active--; 1100 return (err); 1101 } 1102 1103 /* 1104 * Start the default tx ring. 1105 */ 1106 if (mip->mi_default_tx_ring != NULL) { 1107 1108 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1109 if (ring->mr_state != MR_INUSE) { 1110 err = mac_start_ring(ring); 1111 if (err != 0) { 1112 mip->mi_active--; 1113 return (err); 1114 } 1115 } 1116 } 1117 1118 if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1119 /* 1120 * Start the default ring, since it will be needed 1121 * to receive broadcast and multicast traffic for 1122 * both primary and non-primary MAC clients. 1123 */ 1124 ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); 1125 err = mac_start_group_and_rings(defgrp); 1126 if (err != 0) { 1127 mip->mi_active--; 1128 if ((ring != NULL) && 1129 (ring->mr_state == MR_INUSE)) 1130 mac_stop_ring(ring); 1131 return (err); 1132 } 1133 mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); 1134 } 1135 } 1136 1137 return (err); 1138 } 1139 1140 /* 1141 * Private GLDv3 function to stop a MAC instance. 1142 */ 1143 void 1144 mac_stop(mac_handle_t mh) 1145 { 1146 mac_impl_t *mip = (mac_impl_t *)mh; 1147 mac_group_t *grp; 1148 1149 ASSERT(mip->mi_stop != NULL); 1150 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1151 1152 /* 1153 * Check whether the device is still needed. 1154 */ 1155 ASSERT(mip->mi_active != 0); 1156 if (--mip->mi_active == 0) { 1157 if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1158 /* 1159 * There should be no more active clients since the 1160 * MAC is being stopped. Stop the default RX group 1161 * and transition it back to registered state. 1162 * 1163 * When clients are torn down, the groups 1164 * are release via mac_release_rx_group which 1165 * knows the the default group is always in 1166 * started mode since broadcast uses it. So 1167 * we can assert that their are no clients 1168 * (since mac_bcast_add doesn't register itself 1169 * as a client) and group is in SHARED state. 1170 */ 1171 ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); 1172 ASSERT(MAC_GROUP_NO_CLIENT(grp) && 1173 mip->mi_nactiveclients == 0); 1174 mac_stop_group_and_rings(grp); 1175 mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); 1176 } 1177 1178 if (mip->mi_default_tx_ring != NULL) { 1179 mac_ring_t *ring; 1180 1181 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1182 if (ring->mr_state == MR_INUSE) { 1183 mac_stop_ring(ring); 1184 ring->mr_flag = 0; 1185 } 1186 } 1187 1188 /* 1189 * Stop the device. 1190 */ 1191 mip->mi_stop(mip->mi_driver); 1192 } 1193 } 1194 1195 int 1196 i_mac_promisc_set(mac_impl_t *mip, boolean_t on) 1197 { 1198 int err = 0; 1199 1200 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1201 ASSERT(mip->mi_setpromisc != NULL); 1202 1203 if (on) { 1204 /* 1205 * Enable promiscuous mode on the device if not yet enabled. 1206 */ 1207 if (mip->mi_devpromisc++ == 0) { 1208 err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); 1209 if (err != 0) { 1210 mip->mi_devpromisc--; 1211 return (err); 1212 } 1213 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1214 } 1215 } else { 1216 if (mip->mi_devpromisc == 0) 1217 return (EPROTO); 1218 1219 /* 1220 * Disable promiscuous mode on the device if this is the last 1221 * enabling. 1222 */ 1223 if (--mip->mi_devpromisc == 0) { 1224 err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); 1225 if (err != 0) { 1226 mip->mi_devpromisc++; 1227 return (err); 1228 } 1229 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1230 } 1231 } 1232 1233 return (0); 1234 } 1235 1236 /* 1237 * The promiscuity state can change any time. If the caller needs to take 1238 * actions that are atomic with the promiscuity state, then the caller needs 1239 * to bracket the entire sequence with mac_perim_enter/exit 1240 */ 1241 boolean_t 1242 mac_promisc_get(mac_handle_t mh) 1243 { 1244 mac_impl_t *mip = (mac_impl_t *)mh; 1245 1246 /* 1247 * Return the current promiscuity. 1248 */ 1249 return (mip->mi_devpromisc != 0); 1250 } 1251 1252 /* 1253 * Invoked at MAC instance attach time to initialize the list 1254 * of factory MAC addresses supported by a MAC instance. This function 1255 * builds a local cache in the mac_impl_t for the MAC addresses 1256 * supported by the underlying hardware. The MAC clients themselves 1257 * use the mac_addr_factory*() functions to query and reserve 1258 * factory MAC addresses. 1259 */ 1260 void 1261 mac_addr_factory_init(mac_impl_t *mip) 1262 { 1263 mac_capab_multifactaddr_t capab; 1264 uint8_t *addr; 1265 int i; 1266 1267 /* 1268 * First round to see how many factory MAC addresses are available. 1269 */ 1270 bzero(&capab, sizeof (capab)); 1271 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, 1272 &capab) || (capab.mcm_naddr == 0)) { 1273 /* 1274 * The MAC instance doesn't support multiple factory 1275 * MAC addresses, we're done here. 1276 */ 1277 return; 1278 } 1279 1280 /* 1281 * Allocate the space and get all the factory addresses. 1282 */ 1283 addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); 1284 capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); 1285 1286 mip->mi_factory_addr_num = capab.mcm_naddr; 1287 mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * 1288 sizeof (mac_factory_addr_t), KM_SLEEP); 1289 1290 for (i = 0; i < capab.mcm_naddr; i++) { 1291 bcopy(addr + i * MAXMACADDRLEN, 1292 mip->mi_factory_addr[i].mfa_addr, 1293 mip->mi_type->mt_addr_length); 1294 mip->mi_factory_addr[i].mfa_in_use = B_FALSE; 1295 } 1296 1297 kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); 1298 } 1299 1300 void 1301 mac_addr_factory_fini(mac_impl_t *mip) 1302 { 1303 if (mip->mi_factory_addr == NULL) { 1304 ASSERT(mip->mi_factory_addr_num == 0); 1305 return; 1306 } 1307 1308 kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * 1309 sizeof (mac_factory_addr_t)); 1310 1311 mip->mi_factory_addr = NULL; 1312 mip->mi_factory_addr_num = 0; 1313 } 1314 1315 /* 1316 * Reserve a factory MAC address. If *slot is set to -1, the function 1317 * attempts to reserve any of the available factory MAC addresses and 1318 * returns the reserved slot id. If no slots are available, the function 1319 * returns ENOSPC. If *slot is not set to -1, the function reserves 1320 * the specified slot if it is available, or returns EBUSY is the slot 1321 * is already used. Returns ENOTSUP if the underlying MAC does not 1322 * support multiple factory addresses. If the slot number is not -1 but 1323 * is invalid, returns EINVAL. 1324 */ 1325 int 1326 mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) 1327 { 1328 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1329 mac_impl_t *mip = mcip->mci_mip; 1330 int i, ret = 0; 1331 1332 i_mac_perim_enter(mip); 1333 /* 1334 * Protect against concurrent readers that may need a self-consistent 1335 * view of the factory addresses 1336 */ 1337 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1338 1339 if (mip->mi_factory_addr_num == 0) { 1340 ret = ENOTSUP; 1341 goto bail; 1342 } 1343 1344 if (*slot != -1) { 1345 /* check the specified slot */ 1346 if (*slot < 1 || *slot > mip->mi_factory_addr_num) { 1347 ret = EINVAL; 1348 goto bail; 1349 } 1350 if (mip->mi_factory_addr[*slot-1].mfa_in_use) { 1351 ret = EBUSY; 1352 goto bail; 1353 } 1354 } else { 1355 /* pick the next available slot */ 1356 for (i = 0; i < mip->mi_factory_addr_num; i++) { 1357 if (!mip->mi_factory_addr[i].mfa_in_use) 1358 break; 1359 } 1360 1361 if (i == mip->mi_factory_addr_num) { 1362 ret = ENOSPC; 1363 goto bail; 1364 } 1365 *slot = i+1; 1366 } 1367 1368 mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; 1369 mip->mi_factory_addr[*slot-1].mfa_client = mcip; 1370 1371 bail: 1372 rw_exit(&mip->mi_rw_lock); 1373 i_mac_perim_exit(mip); 1374 return (ret); 1375 } 1376 1377 /* 1378 * Release the specified factory MAC address slot. 1379 */ 1380 void 1381 mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) 1382 { 1383 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1384 mac_impl_t *mip = mcip->mci_mip; 1385 1386 i_mac_perim_enter(mip); 1387 /* 1388 * Protect against concurrent readers that may need a self-consistent 1389 * view of the factory addresses 1390 */ 1391 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1392 1393 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1394 ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); 1395 1396 mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; 1397 1398 rw_exit(&mip->mi_rw_lock); 1399 i_mac_perim_exit(mip); 1400 } 1401 1402 /* 1403 * Stores in mac_addr the value of the specified MAC address. Returns 1404 * 0 on success, or EINVAL if the slot number is not valid for the MAC. 1405 * The caller must provide a string of at least MAXNAMELEN bytes. 1406 */ 1407 void 1408 mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, 1409 uint_t *addr_len, char *client_name, boolean_t *in_use_arg) 1410 { 1411 mac_impl_t *mip = (mac_impl_t *)mh; 1412 boolean_t in_use; 1413 1414 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1415 1416 /* 1417 * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter 1418 * and mi_rw_lock 1419 */ 1420 rw_enter(&mip->mi_rw_lock, RW_READER); 1421 bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); 1422 *addr_len = mip->mi_type->mt_addr_length; 1423 in_use = mip->mi_factory_addr[slot-1].mfa_in_use; 1424 if (in_use && client_name != NULL) { 1425 bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, 1426 client_name, MAXNAMELEN); 1427 } 1428 if (in_use_arg != NULL) 1429 *in_use_arg = in_use; 1430 rw_exit(&mip->mi_rw_lock); 1431 } 1432 1433 /* 1434 * Returns the number of factory MAC addresses (in addition to the 1435 * primary MAC address), 0 if the underlying MAC doesn't support 1436 * that feature. 1437 */ 1438 uint_t 1439 mac_addr_factory_num(mac_handle_t mh) 1440 { 1441 mac_impl_t *mip = (mac_impl_t *)mh; 1442 1443 return (mip->mi_factory_addr_num); 1444 } 1445 1446 1447 void 1448 mac_rx_group_unmark(mac_group_t *grp, uint_t flag) 1449 { 1450 mac_ring_t *ring; 1451 1452 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) 1453 ring->mr_flag &= ~flag; 1454 } 1455 1456 /* 1457 * The following mac_hwrings_xxx() functions are private mac client functions 1458 * used by the aggr driver to access and control the underlying HW Rx group 1459 * and rings. In this case, the aggr driver has exclusive control of the 1460 * underlying HW Rx group/rings, it calls the following functions to 1461 * start/stop the HW Rx rings, disable/enable polling, add/remove mac' 1462 * addresses, or set up the Rx callback. 1463 */ 1464 /* ARGSUSED */ 1465 static void 1466 mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, 1467 mblk_t *mp_chain, boolean_t loopback) 1468 { 1469 mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; 1470 mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; 1471 mac_direct_rx_t proc; 1472 void *arg1; 1473 mac_resource_handle_t arg2; 1474 1475 proc = srs_rx->sr_func; 1476 arg1 = srs_rx->sr_arg1; 1477 arg2 = mac_srs->srs_mrh; 1478 1479 proc(arg1, arg2, mp_chain, NULL); 1480 } 1481 1482 /* 1483 * This function is called to get the list of HW rings that are reserved by 1484 * an exclusive mac client. 1485 * 1486 * Return value: the number of HW rings. 1487 */ 1488 int 1489 mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, 1490 mac_ring_handle_t *hwrh, mac_ring_type_t rtype) 1491 { 1492 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1493 flow_entry_t *flent = mcip->mci_flent; 1494 mac_group_t *grp; 1495 mac_ring_t *ring; 1496 int cnt = 0; 1497 1498 if (rtype == MAC_RING_TYPE_RX) { 1499 grp = flent->fe_rx_ring_group; 1500 } else if (rtype == MAC_RING_TYPE_TX) { 1501 grp = flent->fe_tx_ring_group; 1502 } else { 1503 ASSERT(B_FALSE); 1504 return (-1); 1505 } 1506 /* 1507 * The mac client did not reserve any RX group, return directly. 1508 * This is probably because the underlying MAC does not support 1509 * any groups. 1510 */ 1511 if (hwgh != NULL) 1512 *hwgh = NULL; 1513 if (grp == NULL) 1514 return (0); 1515 /* 1516 * This group must be reserved by this mac client. 1517 */ 1518 ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && 1519 (mcip == MAC_GROUP_ONLY_CLIENT(grp))); 1520 1521 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { 1522 ASSERT(cnt < MAX_RINGS_PER_GROUP); 1523 hwrh[cnt] = (mac_ring_handle_t)ring; 1524 } 1525 if (hwgh != NULL) 1526 *hwgh = (mac_group_handle_t)grp; 1527 1528 return (cnt); 1529 } 1530 1531 /* 1532 * This function is called to get info about Tx/Rx rings. 1533 * 1534 * Return value: returns uint_t which will have various bits set 1535 * that indicates different properties of the ring. 1536 */ 1537 uint_t 1538 mac_hwring_getinfo(mac_ring_handle_t rh) 1539 { 1540 mac_ring_t *ring = (mac_ring_t *)rh; 1541 mac_ring_info_t *info = &ring->mr_info; 1542 1543 return (info->mri_flags); 1544 } 1545 1546 /* 1547 * Export ddi interrupt handles from the HW ring to the pseudo ring and 1548 * setup the RX callback of the mac client which exclusively controls 1549 * HW ring. 1550 */ 1551 void 1552 mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, 1553 mac_ring_handle_t pseudo_rh) 1554 { 1555 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1556 mac_ring_t *pseudo_ring; 1557 mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 1558 1559 if (pseudo_rh != NULL) { 1560 pseudo_ring = (mac_ring_t *)pseudo_rh; 1561 /* Export the ddi handles to pseudo ring */ 1562 pseudo_ring->mr_info.mri_intr.mi_ddi_handle = 1563 hw_ring->mr_info.mri_intr.mi_ddi_handle; 1564 pseudo_ring->mr_info.mri_intr.mi_ddi_shared = 1565 hw_ring->mr_info.mri_intr.mi_ddi_shared; 1566 /* 1567 * Save a pointer to pseudo ring in the hw ring. If 1568 * interrupt handle changes, the hw ring will be 1569 * notified of the change (see mac_ring_intr_set()) 1570 * and the appropriate change has to be made to 1571 * the pseudo ring that has exported the ddi handle. 1572 */ 1573 hw_ring->mr_prh = pseudo_rh; 1574 } 1575 1576 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1577 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1578 mac_srs->srs_mrh = prh; 1579 mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; 1580 } 1581 } 1582 1583 void 1584 mac_hwring_teardown(mac_ring_handle_t hwrh) 1585 { 1586 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1587 mac_soft_ring_set_t *mac_srs; 1588 1589 if (hw_ring == NULL) 1590 return; 1591 hw_ring->mr_prh = NULL; 1592 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1593 mac_srs = hw_ring->mr_srs; 1594 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1595 mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; 1596 mac_srs->srs_mrh = NULL; 1597 } 1598 } 1599 1600 int 1601 mac_hwring_disable_intr(mac_ring_handle_t rh) 1602 { 1603 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1604 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1605 1606 return (intr->mi_disable(intr->mi_handle)); 1607 } 1608 1609 int 1610 mac_hwring_enable_intr(mac_ring_handle_t rh) 1611 { 1612 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1613 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1614 1615 return (intr->mi_enable(intr->mi_handle)); 1616 } 1617 1618 int 1619 mac_hwring_start(mac_ring_handle_t rh) 1620 { 1621 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1622 1623 MAC_RING_UNMARK(rr_ring, MR_QUIESCE); 1624 return (0); 1625 } 1626 1627 void 1628 mac_hwring_stop(mac_ring_handle_t rh) 1629 { 1630 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1631 1632 mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); 1633 } 1634 1635 mblk_t * 1636 mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) 1637 { 1638 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1639 mac_ring_info_t *info = &rr_ring->mr_info; 1640 1641 return (info->mri_poll(info->mri_driver, bytes_to_pickup)); 1642 } 1643 1644 /* 1645 * Send packets through a selected tx ring. 1646 */ 1647 mblk_t * 1648 mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) 1649 { 1650 mac_ring_t *ring = (mac_ring_t *)rh; 1651 mac_ring_info_t *info = &ring->mr_info; 1652 1653 ASSERT(ring->mr_type == MAC_RING_TYPE_TX && 1654 ring->mr_state >= MR_INUSE); 1655 return (info->mri_tx(info->mri_driver, mp)); 1656 } 1657 1658 /* 1659 * Query stats for a particular rx/tx ring 1660 */ 1661 int 1662 mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) 1663 { 1664 mac_ring_t *ring = (mac_ring_t *)rh; 1665 mac_ring_info_t *info = &ring->mr_info; 1666 1667 return (info->mri_stat(info->mri_driver, stat, val)); 1668 } 1669 1670 /* 1671 * Private function that is only used by aggr to send packets through 1672 * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports 1673 * that does not expose Tx rings, aggr_ring_tx() entry point needs 1674 * access to mac_impl_t to send packets through m_tx() entry point. 1675 * It accomplishes this by calling mac_hwring_send_priv() function. 1676 */ 1677 mblk_t * 1678 mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) 1679 { 1680 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1681 mac_impl_t *mip = mcip->mci_mip; 1682 1683 MAC_TX(mip, rh, mp, mcip); 1684 return (mp); 1685 } 1686 1687 /* 1688 * Private function that is only used by aggr to update the default transmission 1689 * ring. Because aggr exposes a pseudo Tx ring even for ports that may 1690 * temporarily be down, it may need to update the default ring that is used by 1691 * MAC such that it refers to a link that can actively be used to send traffic. 1692 * Note that this is different from the case where the port has been removed 1693 * from the group. In those cases, all of the rings will be torn down because 1694 * the ring will no longer exist. It's important to give aggr a case where the 1695 * rings can still exist such that it may be able to continue to send LACP PDUs 1696 * to potentially restore the link. 1697 * 1698 * Finally, we explicitly don't do anything if the ring hasn't been enabled yet. 1699 * This is to help out aggr which doesn't really know the internal state that 1700 * MAC does about the rings and can't know that it's not quite ready for use 1701 * yet. 1702 */ 1703 void 1704 mac_hwring_set_default(mac_handle_t mh, mac_ring_handle_t rh) 1705 { 1706 mac_impl_t *mip = (mac_impl_t *)mh; 1707 mac_ring_t *ring = (mac_ring_t *)rh; 1708 1709 ASSERT(MAC_PERIM_HELD(mh)); 1710 VERIFY(mip->mi_state_flags & MIS_IS_AGGR); 1711 1712 if (ring->mr_state != MR_INUSE) 1713 return; 1714 1715 mip->mi_default_tx_ring = rh; 1716 } 1717 1718 int 1719 mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) 1720 { 1721 mac_group_t *group = (mac_group_t *)gh; 1722 1723 return (mac_group_addmac(group, addr)); 1724 } 1725 1726 int 1727 mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) 1728 { 1729 mac_group_t *group = (mac_group_t *)gh; 1730 1731 return (mac_group_remmac(group, addr)); 1732 } 1733 1734 /* 1735 * Set the RX group to be shared/reserved. Note that the group must be 1736 * started/stopped outside of this function. 1737 */ 1738 void 1739 mac_set_group_state(mac_group_t *grp, mac_group_state_t state) 1740 { 1741 /* 1742 * If there is no change in the group state, just return. 1743 */ 1744 if (grp->mrg_state == state) 1745 return; 1746 1747 switch (state) { 1748 case MAC_GROUP_STATE_RESERVED: 1749 /* 1750 * Successfully reserved the group. 1751 * 1752 * Given that there is an exclusive client controlling this 1753 * group, we enable the group level polling when available, 1754 * so that SRSs get to turn on/off individual rings they's 1755 * assigned to. 1756 */ 1757 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 1758 1759 if (grp->mrg_type == MAC_RING_TYPE_RX && 1760 GROUP_INTR_DISABLE_FUNC(grp) != NULL) { 1761 GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 1762 } 1763 break; 1764 1765 case MAC_GROUP_STATE_SHARED: 1766 /* 1767 * Set all rings of this group to software classified. 1768 * If the group has an overriding interrupt, then re-enable it. 1769 */ 1770 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 1771 1772 if (grp->mrg_type == MAC_RING_TYPE_RX && 1773 GROUP_INTR_ENABLE_FUNC(grp) != NULL) { 1774 GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 1775 } 1776 /* The ring is not available for reservations any more */ 1777 break; 1778 1779 case MAC_GROUP_STATE_REGISTERED: 1780 /* Also callable from mac_register, perim is not held */ 1781 break; 1782 1783 default: 1784 ASSERT(B_FALSE); 1785 break; 1786 } 1787 1788 grp->mrg_state = state; 1789 } 1790 1791 /* 1792 * Quiesce future hardware classified packets for the specified Rx ring 1793 */ 1794 static void 1795 mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) 1796 { 1797 ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); 1798 ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); 1799 1800 mutex_enter(&rx_ring->mr_lock); 1801 rx_ring->mr_flag |= ring_flag; 1802 while (rx_ring->mr_refcnt != 0) 1803 cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); 1804 mutex_exit(&rx_ring->mr_lock); 1805 } 1806 1807 /* 1808 * Please see mac_tx for details about the per cpu locking scheme 1809 */ 1810 static void 1811 mac_tx_lock_all(mac_client_impl_t *mcip) 1812 { 1813 int i; 1814 1815 for (i = 0; i <= mac_tx_percpu_cnt; i++) 1816 mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1817 } 1818 1819 static void 1820 mac_tx_unlock_all(mac_client_impl_t *mcip) 1821 { 1822 int i; 1823 1824 for (i = mac_tx_percpu_cnt; i >= 0; i--) 1825 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1826 } 1827 1828 static void 1829 mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) 1830 { 1831 int i; 1832 1833 for (i = mac_tx_percpu_cnt; i > 0; i--) 1834 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1835 } 1836 1837 static int 1838 mac_tx_sum_refcnt(mac_client_impl_t *mcip) 1839 { 1840 int i; 1841 int refcnt = 0; 1842 1843 for (i = 0; i <= mac_tx_percpu_cnt; i++) 1844 refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; 1845 1846 return (refcnt); 1847 } 1848 1849 /* 1850 * Stop future Tx packets coming down from the client in preparation for 1851 * quiescing the Tx side. This is needed for dynamic reclaim and reassignment 1852 * of rings between clients 1853 */ 1854 void 1855 mac_tx_client_block(mac_client_impl_t *mcip) 1856 { 1857 mac_tx_lock_all(mcip); 1858 mcip->mci_tx_flag |= MCI_TX_QUIESCE; 1859 while (mac_tx_sum_refcnt(mcip) != 0) { 1860 mac_tx_unlock_allbutzero(mcip); 1861 cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); 1862 mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); 1863 mac_tx_lock_all(mcip); 1864 } 1865 mac_tx_unlock_all(mcip); 1866 } 1867 1868 void 1869 mac_tx_client_unblock(mac_client_impl_t *mcip) 1870 { 1871 mac_tx_lock_all(mcip); 1872 mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; 1873 mac_tx_unlock_all(mcip); 1874 /* 1875 * We may fail to disable flow control for the last MAC_NOTE_TX 1876 * notification because the MAC client is quiesced. Send the 1877 * notification again. 1878 */ 1879 i_mac_notify(mcip->mci_mip, MAC_NOTE_TX); 1880 } 1881 1882 /* 1883 * Wait for an SRS to quiesce. The SRS worker will signal us when the 1884 * quiesce is done. 1885 */ 1886 static void 1887 mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) 1888 { 1889 mutex_enter(&srs->srs_lock); 1890 while (!(srs->srs_state & srs_flag)) 1891 cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); 1892 mutex_exit(&srs->srs_lock); 1893 } 1894 1895 /* 1896 * Quiescing an Rx SRS is achieved by the following sequence. The protocol 1897 * works bottom up by cutting off packet flow from the bottommost point in the 1898 * mac, then the SRS, and then the soft rings. There are 2 use cases of this 1899 * mechanism. One is a temporary quiesce of the SRS, such as say while changing 1900 * the Rx callbacks. Another use case is Rx SRS teardown. In the former case 1901 * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used 1902 * for the SRS and MR flags. In the former case the threads pause waiting for 1903 * a restart, while in the latter case the threads exit. The Tx SRS teardown 1904 * is also mostly similar to the above. 1905 * 1906 * 1. Stop future hardware classified packets at the lowest level in the mac. 1907 * Remove any hardware classification rule (CONDEMNED case) and mark the 1908 * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt 1909 * from increasing. Upcalls from the driver that come through hardware 1910 * classification will be dropped in mac_rx from now on. Then we wait for 1911 * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are 1912 * sure there aren't any upcall threads from the driver through hardware 1913 * classification. In the case of SRS teardown we also remove the 1914 * classification rule in the driver. 1915 * 1916 * 2. Stop future software classified packets by marking the flow entry with 1917 * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from 1918 * increasing. We also remove the flow entry from the table in the latter 1919 * case. Then wait for the fe_refcnt to reach an appropriate quiescent value 1920 * that indicates there aren't any active threads using that flow entry. 1921 * 1922 * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, 1923 * SRS worker thread, and the soft ring threads are quiesced in sequence 1924 * with the SRS worker thread serving as a master controller. This 1925 * mechansim is explained in mac_srs_worker_quiesce(). 1926 * 1927 * The restart mechanism to reactivate the SRS and softrings is explained 1928 * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the 1929 * restart sequence. 1930 */ 1931 void 1932 mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 1933 { 1934 flow_entry_t *flent = srs->srs_flent; 1935 uint_t mr_flag, srs_done_flag; 1936 1937 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 1938 ASSERT(!(srs->srs_type & SRST_TX)); 1939 1940 if (srs_quiesce_flag == SRS_CONDEMNED) { 1941 mr_flag = MR_CONDEMNED; 1942 srs_done_flag = SRS_CONDEMNED_DONE; 1943 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 1944 mac_srs_client_poll_disable(srs->srs_mcip, srs); 1945 } else { 1946 ASSERT(srs_quiesce_flag == SRS_QUIESCE); 1947 mr_flag = MR_QUIESCE; 1948 srs_done_flag = SRS_QUIESCE_DONE; 1949 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 1950 mac_srs_client_poll_quiesce(srs->srs_mcip, srs); 1951 } 1952 1953 if (srs->srs_ring != NULL) { 1954 mac_rx_ring_quiesce(srs->srs_ring, mr_flag); 1955 } else { 1956 /* 1957 * SRS is driven by software classification. In case 1958 * of CONDEMNED, the top level teardown functions will 1959 * deal with flow removal. 1960 */ 1961 if (srs_quiesce_flag != SRS_CONDEMNED) { 1962 FLOW_MARK(flent, FE_QUIESCE); 1963 mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 1964 } 1965 } 1966 1967 /* 1968 * Signal the SRS to quiesce itself, and then cv_wait for the 1969 * SRS quiesce to complete. The SRS worker thread will wake us 1970 * up when the quiesce is complete 1971 */ 1972 mac_srs_signal(srs, srs_quiesce_flag); 1973 mac_srs_quiesce_wait(srs, srs_done_flag); 1974 } 1975 1976 /* 1977 * Remove an SRS. 1978 */ 1979 void 1980 mac_rx_srs_remove(mac_soft_ring_set_t *srs) 1981 { 1982 flow_entry_t *flent = srs->srs_flent; 1983 int i; 1984 1985 mac_rx_srs_quiesce(srs, SRS_CONDEMNED); 1986 /* 1987 * Locate and remove our entry in the fe_rx_srs[] array, and 1988 * adjust the fe_rx_srs array entries and array count by 1989 * moving the last entry into the vacated spot. 1990 */ 1991 mutex_enter(&flent->fe_lock); 1992 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 1993 if (flent->fe_rx_srs[i] == srs) 1994 break; 1995 } 1996 1997 ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); 1998 if (i != flent->fe_rx_srs_cnt - 1) { 1999 flent->fe_rx_srs[i] = 2000 flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; 2001 i = flent->fe_rx_srs_cnt - 1; 2002 } 2003 2004 flent->fe_rx_srs[i] = NULL; 2005 flent->fe_rx_srs_cnt--; 2006 mutex_exit(&flent->fe_lock); 2007 2008 mac_srs_free(srs); 2009 } 2010 2011 static void 2012 mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) 2013 { 2014 mutex_enter(&srs->srs_lock); 2015 srs->srs_state &= ~flag; 2016 mutex_exit(&srs->srs_lock); 2017 } 2018 2019 void 2020 mac_rx_srs_restart(mac_soft_ring_set_t *srs) 2021 { 2022 flow_entry_t *flent = srs->srs_flent; 2023 mac_ring_t *mr; 2024 2025 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 2026 ASSERT((srs->srs_type & SRST_TX) == 0); 2027 2028 /* 2029 * This handles a change in the number of SRSs between the quiesce and 2030 * and restart operation of a flow. 2031 */ 2032 if (!SRS_QUIESCED(srs)) 2033 return; 2034 2035 /* 2036 * Signal the SRS to restart itself. Wait for the restart to complete 2037 * Note that we only restart the SRS if it is not marked as 2038 * permanently quiesced. 2039 */ 2040 if (!SRS_QUIESCED_PERMANENT(srs)) { 2041 mac_srs_signal(srs, SRS_RESTART); 2042 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2043 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2044 2045 mac_srs_client_poll_restart(srs->srs_mcip, srs); 2046 } 2047 2048 /* Finally clear the flags to let the packets in */ 2049 mr = srs->srs_ring; 2050 if (mr != NULL) { 2051 MAC_RING_UNMARK(mr, MR_QUIESCE); 2052 /* In case the ring was stopped, safely restart it */ 2053 if (mr->mr_state != MR_INUSE) 2054 (void) mac_start_ring(mr); 2055 } else { 2056 FLOW_UNMARK(flent, FE_QUIESCE); 2057 } 2058 } 2059 2060 /* 2061 * Temporary quiesce of a flow and associated Rx SRS. 2062 * Please see block comment above mac_rx_classify_flow_rem. 2063 */ 2064 /* ARGSUSED */ 2065 int 2066 mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) 2067 { 2068 int i; 2069 2070 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2071 mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], 2072 SRS_QUIESCE); 2073 } 2074 return (0); 2075 } 2076 2077 /* 2078 * Restart a flow and associated Rx SRS that has been quiesced temporarily 2079 * Please see block comment above mac_rx_classify_flow_rem 2080 */ 2081 /* ARGSUSED */ 2082 int 2083 mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) 2084 { 2085 int i; 2086 2087 for (i = 0; i < flent->fe_rx_srs_cnt; i++) 2088 mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); 2089 2090 return (0); 2091 } 2092 2093 void 2094 mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) 2095 { 2096 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2097 flow_entry_t *flent = mcip->mci_flent; 2098 mac_impl_t *mip = mcip->mci_mip; 2099 mac_soft_ring_set_t *mac_srs; 2100 int i; 2101 2102 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2103 2104 if (flent == NULL) 2105 return; 2106 2107 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2108 mac_srs = flent->fe_rx_srs[i]; 2109 mutex_enter(&mac_srs->srs_lock); 2110 if (on) 2111 mac_srs->srs_state |= SRS_QUIESCE_PERM; 2112 else 2113 mac_srs->srs_state &= ~SRS_QUIESCE_PERM; 2114 mutex_exit(&mac_srs->srs_lock); 2115 } 2116 } 2117 2118 void 2119 mac_rx_client_quiesce(mac_client_handle_t mch) 2120 { 2121 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2122 mac_impl_t *mip = mcip->mci_mip; 2123 2124 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2125 2126 if (MCIP_DATAPATH_SETUP(mcip)) { 2127 (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, 2128 NULL); 2129 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2130 mac_rx_classify_flow_quiesce, NULL); 2131 } 2132 } 2133 2134 void 2135 mac_rx_client_restart(mac_client_handle_t mch) 2136 { 2137 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2138 mac_impl_t *mip = mcip->mci_mip; 2139 2140 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2141 2142 if (MCIP_DATAPATH_SETUP(mcip)) { 2143 (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); 2144 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2145 mac_rx_classify_flow_restart, NULL); 2146 } 2147 } 2148 2149 /* 2150 * This function only quiesces the Tx SRS and softring worker threads. Callers 2151 * need to make sure that there aren't any mac client threads doing current or 2152 * future transmits in the mac before calling this function. 2153 */ 2154 void 2155 mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 2156 { 2157 mac_client_impl_t *mcip = srs->srs_mcip; 2158 2159 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2160 2161 ASSERT(srs->srs_type & SRST_TX); 2162 ASSERT(srs_quiesce_flag == SRS_CONDEMNED || 2163 srs_quiesce_flag == SRS_QUIESCE); 2164 2165 /* 2166 * Signal the SRS to quiesce itself, and then cv_wait for the 2167 * SRS quiesce to complete. The SRS worker thread will wake us 2168 * up when the quiesce is complete 2169 */ 2170 mac_srs_signal(srs, srs_quiesce_flag); 2171 mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? 2172 SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); 2173 } 2174 2175 void 2176 mac_tx_srs_restart(mac_soft_ring_set_t *srs) 2177 { 2178 /* 2179 * Resizing the fanout could result in creation of new SRSs. 2180 * They may not necessarily be in the quiesced state in which 2181 * case it need be restarted 2182 */ 2183 if (!SRS_QUIESCED(srs)) 2184 return; 2185 2186 mac_srs_signal(srs, SRS_RESTART); 2187 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2188 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2189 } 2190 2191 /* 2192 * Temporary quiesce of a flow and associated Rx SRS. 2193 * Please see block comment above mac_rx_srs_quiesce 2194 */ 2195 /* ARGSUSED */ 2196 int 2197 mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) 2198 { 2199 /* 2200 * The fe_tx_srs is null for a subflow on an interface that is 2201 * not plumbed 2202 */ 2203 if (flent->fe_tx_srs != NULL) 2204 mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); 2205 return (0); 2206 } 2207 2208 /* ARGSUSED */ 2209 int 2210 mac_tx_flow_restart(flow_entry_t *flent, void *arg) 2211 { 2212 /* 2213 * The fe_tx_srs is null for a subflow on an interface that is 2214 * not plumbed 2215 */ 2216 if (flent->fe_tx_srs != NULL) 2217 mac_tx_srs_restart(flent->fe_tx_srs); 2218 return (0); 2219 } 2220 2221 static void 2222 i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) 2223 { 2224 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2225 2226 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2227 2228 mac_tx_client_block(mcip); 2229 if (MCIP_TX_SRS(mcip) != NULL) { 2230 mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); 2231 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2232 mac_tx_flow_quiesce, NULL); 2233 } 2234 } 2235 2236 void 2237 mac_tx_client_quiesce(mac_client_handle_t mch) 2238 { 2239 i_mac_tx_client_quiesce(mch, SRS_QUIESCE); 2240 } 2241 2242 void 2243 mac_tx_client_condemn(mac_client_handle_t mch) 2244 { 2245 i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); 2246 } 2247 2248 void 2249 mac_tx_client_restart(mac_client_handle_t mch) 2250 { 2251 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2252 2253 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2254 2255 mac_tx_client_unblock(mcip); 2256 if (MCIP_TX_SRS(mcip) != NULL) { 2257 mac_tx_srs_restart(MCIP_TX_SRS(mcip)); 2258 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2259 mac_tx_flow_restart, NULL); 2260 } 2261 } 2262 2263 void 2264 mac_tx_client_flush(mac_client_impl_t *mcip) 2265 { 2266 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2267 2268 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2269 mac_tx_client_restart((mac_client_handle_t)mcip); 2270 } 2271 2272 void 2273 mac_client_quiesce(mac_client_impl_t *mcip) 2274 { 2275 mac_rx_client_quiesce((mac_client_handle_t)mcip); 2276 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2277 } 2278 2279 void 2280 mac_client_restart(mac_client_impl_t *mcip) 2281 { 2282 mac_rx_client_restart((mac_client_handle_t)mcip); 2283 mac_tx_client_restart((mac_client_handle_t)mcip); 2284 } 2285 2286 /* 2287 * Allocate a minor number. 2288 */ 2289 minor_t 2290 mac_minor_hold(boolean_t sleep) 2291 { 2292 minor_t minor; 2293 2294 /* 2295 * Grab a value from the arena. 2296 */ 2297 atomic_inc_32(&minor_count); 2298 2299 if (sleep) 2300 minor = (uint_t)id_alloc(minor_ids); 2301 else 2302 minor = (uint_t)id_alloc_nosleep(minor_ids); 2303 2304 if (minor == 0) { 2305 atomic_dec_32(&minor_count); 2306 return (0); 2307 } 2308 2309 return (minor); 2310 } 2311 2312 /* 2313 * Release a previously allocated minor number. 2314 */ 2315 void 2316 mac_minor_rele(minor_t minor) 2317 { 2318 /* 2319 * Return the value to the arena. 2320 */ 2321 id_free(minor_ids, minor); 2322 atomic_dec_32(&minor_count); 2323 } 2324 2325 uint32_t 2326 mac_no_notification(mac_handle_t mh) 2327 { 2328 mac_impl_t *mip = (mac_impl_t *)mh; 2329 2330 return (((mip->mi_state_flags & MIS_LEGACY) != 0) ? 2331 mip->mi_capab_legacy.ml_unsup_note : 0); 2332 } 2333 2334 /* 2335 * Prevent any new opens of this mac in preparation for unregister 2336 */ 2337 int 2338 i_mac_disable(mac_impl_t *mip) 2339 { 2340 mac_client_impl_t *mcip; 2341 2342 rw_enter(&i_mac_impl_lock, RW_WRITER); 2343 if (mip->mi_state_flags & MIS_DISABLED) { 2344 /* Already disabled, return success */ 2345 rw_exit(&i_mac_impl_lock); 2346 return (0); 2347 } 2348 /* 2349 * See if there are any other references to this mac_t (e.g., VLAN's). 2350 * If so return failure. If all the other checks below pass, then 2351 * set mi_disabled atomically under the i_mac_impl_lock to prevent 2352 * any new VLAN's from being created or new mac client opens of this 2353 * mac end point. 2354 */ 2355 if (mip->mi_ref > 0) { 2356 rw_exit(&i_mac_impl_lock); 2357 return (EBUSY); 2358 } 2359 2360 /* 2361 * mac clients must delete all multicast groups they join before 2362 * closing. bcast groups are reference counted, the last client 2363 * to delete the group will wait till the group is physically 2364 * deleted. Since all clients have closed this mac end point 2365 * mi_bcast_ngrps must be zero at this point 2366 */ 2367 ASSERT(mip->mi_bcast_ngrps == 0); 2368 2369 /* 2370 * Don't let go of this if it has some flows. 2371 * All other code guarantees no flows are added to a disabled 2372 * mac, therefore it is sufficient to check for the flow table 2373 * only here. 2374 */ 2375 mcip = mac_primary_client_handle(mip); 2376 if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { 2377 rw_exit(&i_mac_impl_lock); 2378 return (ENOTEMPTY); 2379 } 2380 2381 mip->mi_state_flags |= MIS_DISABLED; 2382 rw_exit(&i_mac_impl_lock); 2383 return (0); 2384 } 2385 2386 int 2387 mac_disable_nowait(mac_handle_t mh) 2388 { 2389 mac_impl_t *mip = (mac_impl_t *)mh; 2390 int err; 2391 2392 if ((err = i_mac_perim_enter_nowait(mip)) != 0) 2393 return (err); 2394 err = i_mac_disable(mip); 2395 i_mac_perim_exit(mip); 2396 return (err); 2397 } 2398 2399 int 2400 mac_disable(mac_handle_t mh) 2401 { 2402 mac_impl_t *mip = (mac_impl_t *)mh; 2403 int err; 2404 2405 i_mac_perim_enter(mip); 2406 err = i_mac_disable(mip); 2407 i_mac_perim_exit(mip); 2408 2409 /* 2410 * Clean up notification thread and wait for it to exit. 2411 */ 2412 if (err == 0) 2413 i_mac_notify_exit(mip); 2414 2415 return (err); 2416 } 2417 2418 /* 2419 * Called when the MAC instance has a non empty flow table, to de-multiplex 2420 * incoming packets to the right flow. 2421 * The MAC's rw lock is assumed held as a READER. 2422 */ 2423 /* ARGSUSED */ 2424 static mblk_t * 2425 mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) 2426 { 2427 flow_entry_t *flent = NULL; 2428 uint_t flags = FLOW_INBOUND; 2429 int err; 2430 2431 /* 2432 * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN 2433 * to mac_flow_lookup() so that the VLAN packets can be successfully 2434 * passed to the non-VLAN aggregation flows. 2435 * 2436 * Note that there is possibly a race between this and 2437 * mac_unicast_remove/add() and VLAN packets could be incorrectly 2438 * classified to non-VLAN flows of non-aggregation mac clients. These 2439 * VLAN packets will be then filtered out by the mac module. 2440 */ 2441 if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) 2442 flags |= FLOW_IGNORE_VLAN; 2443 2444 err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); 2445 if (err != 0) { 2446 /* no registered receive function */ 2447 return (mp); 2448 } else { 2449 mac_client_impl_t *mcip; 2450 2451 /* 2452 * This flent might just be an additional one on the MAC client, 2453 * i.e. for classification purposes (different fdesc), however 2454 * the resources, SRS et. al., are in the mci_flent, so if 2455 * this isn't the mci_flent, we need to get it. 2456 */ 2457 if ((mcip = flent->fe_mcip) != NULL && 2458 mcip->mci_flent != flent) { 2459 FLOW_REFRELE(flent); 2460 flent = mcip->mci_flent; 2461 FLOW_TRY_REFHOLD(flent, err); 2462 if (err != 0) 2463 return (mp); 2464 } 2465 (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, 2466 B_FALSE); 2467 FLOW_REFRELE(flent); 2468 } 2469 return (NULL); 2470 } 2471 2472 mblk_t * 2473 mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 2474 { 2475 mac_impl_t *mip = (mac_impl_t *)mh; 2476 mblk_t *bp, *bp1, **bpp, *list = NULL; 2477 2478 /* 2479 * We walk the chain and attempt to classify each packet. 2480 * The packets that couldn't be classified will be returned 2481 * back to the caller. 2482 */ 2483 bp = mp_chain; 2484 bpp = &list; 2485 while (bp != NULL) { 2486 bp1 = bp; 2487 bp = bp->b_next; 2488 bp1->b_next = NULL; 2489 2490 if (mac_rx_classify(mip, mrh, bp1) != NULL) { 2491 *bpp = bp1; 2492 bpp = &bp1->b_next; 2493 } 2494 } 2495 return (list); 2496 } 2497 2498 static int 2499 mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) 2500 { 2501 mac_ring_handle_t ring = arg; 2502 2503 if (flent->fe_tx_srs) 2504 mac_tx_srs_wakeup(flent->fe_tx_srs, ring); 2505 return (0); 2506 } 2507 2508 void 2509 i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) 2510 { 2511 mac_client_impl_t *cclient; 2512 mac_soft_ring_set_t *mac_srs; 2513 2514 /* 2515 * After grabbing the mi_rw_lock, the list of clients can't change. 2516 * If there are any clients mi_disabled must be B_FALSE and can't 2517 * get set since there are clients. If there aren't any clients we 2518 * don't do anything. In any case the mip has to be valid. The driver 2519 * must make sure that it goes single threaded (with respect to mac 2520 * calls) and wait for all pending mac calls to finish before calling 2521 * mac_unregister. 2522 */ 2523 rw_enter(&i_mac_impl_lock, RW_READER); 2524 if (mip->mi_state_flags & MIS_DISABLED) { 2525 rw_exit(&i_mac_impl_lock); 2526 return; 2527 } 2528 2529 /* 2530 * Get MAC tx srs from walking mac_client_handle list. 2531 */ 2532 rw_enter(&mip->mi_rw_lock, RW_READER); 2533 for (cclient = mip->mi_clients_list; cclient != NULL; 2534 cclient = cclient->mci_client_next) { 2535 if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { 2536 mac_tx_srs_wakeup(mac_srs, ring); 2537 } else { 2538 /* 2539 * Aggr opens underlying ports in exclusive mode 2540 * and registers flow control callbacks using 2541 * mac_tx_client_notify(). When opened in 2542 * exclusive mode, Tx SRS won't be created 2543 * during mac_unicast_add(). 2544 */ 2545 if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { 2546 mac_tx_invoke_callbacks(cclient, 2547 (mac_tx_cookie_t)ring); 2548 } 2549 } 2550 (void) mac_flow_walk(cclient->mci_subflow_tab, 2551 mac_tx_flow_srs_wakeup, ring); 2552 } 2553 rw_exit(&mip->mi_rw_lock); 2554 rw_exit(&i_mac_impl_lock); 2555 } 2556 2557 /* ARGSUSED */ 2558 void 2559 mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, 2560 boolean_t add) 2561 { 2562 mac_impl_t *mip = (mac_impl_t *)mh; 2563 2564 i_mac_perim_enter((mac_impl_t *)mh); 2565 /* 2566 * If no specific refresh function was given then default to the 2567 * driver's m_multicst entry point. 2568 */ 2569 if (refresh == NULL) { 2570 refresh = mip->mi_multicst; 2571 arg = mip->mi_driver; 2572 } 2573 2574 mac_bcast_refresh(mip, refresh, arg, add); 2575 i_mac_perim_exit((mac_impl_t *)mh); 2576 } 2577 2578 void 2579 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) 2580 { 2581 mac_impl_t *mip = (mac_impl_t *)mh; 2582 2583 /* 2584 * If no specific refresh function was given then default to the 2585 * driver's m_promisc entry point. 2586 */ 2587 if (refresh == NULL) { 2588 refresh = mip->mi_setpromisc; 2589 arg = mip->mi_driver; 2590 } 2591 ASSERT(refresh != NULL); 2592 2593 /* 2594 * Call the refresh function with the current promiscuity. 2595 */ 2596 refresh(arg, (mip->mi_devpromisc != 0)); 2597 } 2598 2599 /* 2600 * The mac client requests that the mac not to change its margin size to 2601 * be less than the specified value. If "current" is B_TRUE, then the client 2602 * requests the mac not to change its margin size to be smaller than the 2603 * current size. Further, return the current margin size value in this case. 2604 * 2605 * We keep every requested size in an ordered list from largest to smallest. 2606 */ 2607 int 2608 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) 2609 { 2610 mac_impl_t *mip = (mac_impl_t *)mh; 2611 mac_margin_req_t **pp, *p; 2612 int err = 0; 2613 2614 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2615 if (current) 2616 *marginp = mip->mi_margin; 2617 2618 /* 2619 * If the current margin value cannot satisfy the margin requested, 2620 * return ENOTSUP directly. 2621 */ 2622 if (*marginp > mip->mi_margin) { 2623 err = ENOTSUP; 2624 goto done; 2625 } 2626 2627 /* 2628 * Check whether the given margin is already in the list. If so, 2629 * bump the reference count. 2630 */ 2631 for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { 2632 if (p->mmr_margin == *marginp) { 2633 /* 2634 * The margin requested is already in the list, 2635 * so just bump the reference count. 2636 */ 2637 p->mmr_ref++; 2638 goto done; 2639 } 2640 if (p->mmr_margin < *marginp) 2641 break; 2642 } 2643 2644 2645 p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); 2646 p->mmr_margin = *marginp; 2647 p->mmr_ref++; 2648 p->mmr_nextp = *pp; 2649 *pp = p; 2650 2651 done: 2652 rw_exit(&(mip->mi_rw_lock)); 2653 return (err); 2654 } 2655 2656 /* 2657 * The mac client requests to cancel its previous mac_margin_add() request. 2658 * We remove the requested margin size from the list. 2659 */ 2660 int 2661 mac_margin_remove(mac_handle_t mh, uint32_t margin) 2662 { 2663 mac_impl_t *mip = (mac_impl_t *)mh; 2664 mac_margin_req_t **pp, *p; 2665 int err = 0; 2666 2667 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2668 /* 2669 * Find the entry in the list for the given margin. 2670 */ 2671 for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { 2672 if (p->mmr_margin == margin) { 2673 if (--p->mmr_ref == 0) 2674 break; 2675 2676 /* 2677 * There is still a reference to this address so 2678 * there's nothing more to do. 2679 */ 2680 goto done; 2681 } 2682 } 2683 2684 /* 2685 * We did not find an entry for the given margin. 2686 */ 2687 if (p == NULL) { 2688 err = ENOENT; 2689 goto done; 2690 } 2691 2692 ASSERT(p->mmr_ref == 0); 2693 2694 /* 2695 * Remove it from the list. 2696 */ 2697 *pp = p->mmr_nextp; 2698 kmem_free(p, sizeof (mac_margin_req_t)); 2699 done: 2700 rw_exit(&(mip->mi_rw_lock)); 2701 return (err); 2702 } 2703 2704 boolean_t 2705 mac_margin_update(mac_handle_t mh, uint32_t margin) 2706 { 2707 mac_impl_t *mip = (mac_impl_t *)mh; 2708 uint32_t margin_needed = 0; 2709 2710 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2711 2712 if (mip->mi_mmrp != NULL) 2713 margin_needed = mip->mi_mmrp->mmr_margin; 2714 2715 if (margin_needed <= margin) 2716 mip->mi_margin = margin; 2717 2718 rw_exit(&(mip->mi_rw_lock)); 2719 2720 if (margin_needed <= margin) 2721 i_mac_notify(mip, MAC_NOTE_MARGIN); 2722 2723 return (margin_needed <= margin); 2724 } 2725 2726 /* 2727 * MAC clients use this interface to request that a MAC device not change its 2728 * MTU below the specified amount. At this time, that amount must be within the 2729 * range of the device's current minimum and the device's current maximum. eg. a 2730 * client cannot request a 3000 byte MTU when the device's MTU is currently 2731 * 2000. 2732 * 2733 * If "current" is set to B_TRUE, then the request is to simply to reserve the 2734 * current underlying mac's maximum for this mac client and return it in mtup. 2735 */ 2736 int 2737 mac_mtu_add(mac_handle_t mh, uint32_t *mtup, boolean_t current) 2738 { 2739 mac_impl_t *mip = (mac_impl_t *)mh; 2740 mac_mtu_req_t *prev, *cur; 2741 mac_propval_range_t mpr; 2742 int err; 2743 2744 i_mac_perim_enter(mip); 2745 rw_enter(&mip->mi_rw_lock, RW_WRITER); 2746 2747 if (current == B_TRUE) 2748 *mtup = mip->mi_sdu_max; 2749 mpr.mpr_count = 1; 2750 err = mac_prop_info(mh, MAC_PROP_MTU, "mtu", NULL, 0, &mpr, NULL); 2751 if (err != 0) { 2752 rw_exit(&mip->mi_rw_lock); 2753 i_mac_perim_exit(mip); 2754 return (err); 2755 } 2756 2757 if (*mtup > mip->mi_sdu_max || 2758 *mtup < mpr.mpr_range_uint32[0].mpur_min) { 2759 rw_exit(&mip->mi_rw_lock); 2760 i_mac_perim_exit(mip); 2761 return (ENOTSUP); 2762 } 2763 2764 prev = NULL; 2765 for (cur = mip->mi_mtrp; cur != NULL; cur = cur->mtr_nextp) { 2766 if (*mtup == cur->mtr_mtu) { 2767 cur->mtr_ref++; 2768 rw_exit(&mip->mi_rw_lock); 2769 i_mac_perim_exit(mip); 2770 return (0); 2771 } 2772 2773 if (*mtup > cur->mtr_mtu) 2774 break; 2775 2776 prev = cur; 2777 } 2778 2779 cur = kmem_alloc(sizeof (mac_mtu_req_t), KM_SLEEP); 2780 cur->mtr_mtu = *mtup; 2781 cur->mtr_ref = 1; 2782 if (prev != NULL) { 2783 cur->mtr_nextp = prev->mtr_nextp; 2784 prev->mtr_nextp = cur; 2785 } else { 2786 cur->mtr_nextp = mip->mi_mtrp; 2787 mip->mi_mtrp = cur; 2788 } 2789 2790 rw_exit(&mip->mi_rw_lock); 2791 i_mac_perim_exit(mip); 2792 return (0); 2793 } 2794 2795 int 2796 mac_mtu_remove(mac_handle_t mh, uint32_t mtu) 2797 { 2798 mac_impl_t *mip = (mac_impl_t *)mh; 2799 mac_mtu_req_t *cur, *prev; 2800 2801 i_mac_perim_enter(mip); 2802 rw_enter(&mip->mi_rw_lock, RW_WRITER); 2803 2804 prev = NULL; 2805 for (cur = mip->mi_mtrp; cur != NULL; cur = cur->mtr_nextp) { 2806 if (cur->mtr_mtu == mtu) { 2807 ASSERT(cur->mtr_ref > 0); 2808 cur->mtr_ref--; 2809 if (cur->mtr_ref == 0) { 2810 if (prev == NULL) { 2811 mip->mi_mtrp = cur->mtr_nextp; 2812 } else { 2813 prev->mtr_nextp = cur->mtr_nextp; 2814 } 2815 kmem_free(cur, sizeof (mac_mtu_req_t)); 2816 } 2817 rw_exit(&mip->mi_rw_lock); 2818 i_mac_perim_exit(mip); 2819 return (0); 2820 } 2821 2822 prev = cur; 2823 } 2824 2825 rw_exit(&mip->mi_rw_lock); 2826 i_mac_perim_exit(mip); 2827 return (ENOENT); 2828 } 2829 2830 /* 2831 * MAC Type Plugin functions. 2832 */ 2833 2834 mactype_t * 2835 mactype_getplugin(const char *pname) 2836 { 2837 mactype_t *mtype = NULL; 2838 boolean_t tried_modload = B_FALSE; 2839 2840 mutex_enter(&i_mactype_lock); 2841 2842 find_registered_mactype: 2843 if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, 2844 (mod_hash_val_t *)&mtype) != 0) { 2845 if (!tried_modload) { 2846 /* 2847 * If the plugin has not yet been loaded, then 2848 * attempt to load it now. If modload() succeeds, 2849 * the plugin should have registered using 2850 * mactype_register(), in which case we can go back 2851 * and attempt to find it again. 2852 */ 2853 if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { 2854 tried_modload = B_TRUE; 2855 goto find_registered_mactype; 2856 } 2857 } 2858 } else { 2859 /* 2860 * Note that there's no danger that the plugin we've loaded 2861 * could be unloaded between the modload() step and the 2862 * reference count bump here, as we're holding 2863 * i_mactype_lock, which mactype_unregister() also holds. 2864 */ 2865 atomic_inc_32(&mtype->mt_ref); 2866 } 2867 2868 mutex_exit(&i_mactype_lock); 2869 return (mtype); 2870 } 2871 2872 mactype_register_t * 2873 mactype_alloc(uint_t mactype_version) 2874 { 2875 mactype_register_t *mtrp; 2876 2877 /* 2878 * Make sure there isn't a version mismatch between the plugin and 2879 * the framework. In the future, if multiple versions are 2880 * supported, this check could become more sophisticated. 2881 */ 2882 if (mactype_version != MACTYPE_VERSION) 2883 return (NULL); 2884 2885 mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); 2886 mtrp->mtr_version = mactype_version; 2887 return (mtrp); 2888 } 2889 2890 void 2891 mactype_free(mactype_register_t *mtrp) 2892 { 2893 kmem_free(mtrp, sizeof (mactype_register_t)); 2894 } 2895 2896 int 2897 mactype_register(mactype_register_t *mtrp) 2898 { 2899 mactype_t *mtp; 2900 mactype_ops_t *ops = mtrp->mtr_ops; 2901 2902 /* Do some sanity checking before we register this MAC type. */ 2903 if (mtrp->mtr_ident == NULL || ops == NULL) 2904 return (EINVAL); 2905 2906 /* 2907 * Verify that all mandatory callbacks are set in the ops 2908 * vector. 2909 */ 2910 if (ops->mtops_unicst_verify == NULL || 2911 ops->mtops_multicst_verify == NULL || 2912 ops->mtops_sap_verify == NULL || 2913 ops->mtops_header == NULL || 2914 ops->mtops_header_info == NULL) { 2915 return (EINVAL); 2916 } 2917 2918 mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); 2919 mtp->mt_ident = mtrp->mtr_ident; 2920 mtp->mt_ops = *ops; 2921 mtp->mt_type = mtrp->mtr_mactype; 2922 mtp->mt_nativetype = mtrp->mtr_nativetype; 2923 mtp->mt_addr_length = mtrp->mtr_addrlen; 2924 if (mtrp->mtr_brdcst_addr != NULL) { 2925 mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); 2926 bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, 2927 mtrp->mtr_addrlen); 2928 } 2929 2930 mtp->mt_stats = mtrp->mtr_stats; 2931 mtp->mt_statcount = mtrp->mtr_statcount; 2932 2933 mtp->mt_mapping = mtrp->mtr_mapping; 2934 mtp->mt_mappingcount = mtrp->mtr_mappingcount; 2935 2936 if (mod_hash_insert(i_mactype_hash, 2937 (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { 2938 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 2939 kmem_free(mtp, sizeof (*mtp)); 2940 return (EEXIST); 2941 } 2942 return (0); 2943 } 2944 2945 int 2946 mactype_unregister(const char *ident) 2947 { 2948 mactype_t *mtp; 2949 mod_hash_val_t val; 2950 int err; 2951 2952 /* 2953 * Let's not allow MAC drivers to use this plugin while we're 2954 * trying to unregister it. Holding i_mactype_lock also prevents a 2955 * plugin from unregistering while a MAC driver is attempting to 2956 * hold a reference to it in i_mactype_getplugin(). 2957 */ 2958 mutex_enter(&i_mactype_lock); 2959 2960 if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, 2961 (mod_hash_val_t *)&mtp)) != 0) { 2962 /* A plugin is trying to unregister, but it never registered. */ 2963 err = ENXIO; 2964 goto done; 2965 } 2966 2967 if (mtp->mt_ref != 0) { 2968 err = EBUSY; 2969 goto done; 2970 } 2971 2972 err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); 2973 ASSERT(err == 0); 2974 if (err != 0) { 2975 /* This should never happen, thus the ASSERT() above. */ 2976 err = EINVAL; 2977 goto done; 2978 } 2979 ASSERT(mtp == (mactype_t *)val); 2980 2981 if (mtp->mt_brdcst_addr != NULL) 2982 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 2983 kmem_free(mtp, sizeof (mactype_t)); 2984 done: 2985 mutex_exit(&i_mactype_lock); 2986 return (err); 2987 } 2988 2989 /* 2990 * Checks the size of the value size specified for a property as 2991 * part of a property operation. Returns B_TRUE if the size is 2992 * correct, B_FALSE otherwise. 2993 */ 2994 boolean_t 2995 mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range) 2996 { 2997 uint_t minsize = 0; 2998 2999 if (is_range) 3000 return (valsize >= sizeof (mac_propval_range_t)); 3001 3002 switch (id) { 3003 case MAC_PROP_ZONE: 3004 minsize = sizeof (dld_ioc_zid_t); 3005 break; 3006 case MAC_PROP_AUTOPUSH: 3007 if (valsize != 0) 3008 minsize = sizeof (struct dlautopush); 3009 break; 3010 case MAC_PROP_TAGMODE: 3011 minsize = sizeof (link_tagmode_t); 3012 break; 3013 case MAC_PROP_RESOURCE: 3014 case MAC_PROP_RESOURCE_EFF: 3015 minsize = sizeof (mac_resource_props_t); 3016 break; 3017 case MAC_PROP_DUPLEX: 3018 minsize = sizeof (link_duplex_t); 3019 break; 3020 case MAC_PROP_SPEED: 3021 minsize = sizeof (uint64_t); 3022 break; 3023 case MAC_PROP_STATUS: 3024 minsize = sizeof (link_state_t); 3025 break; 3026 case MAC_PROP_AUTONEG: 3027 case MAC_PROP_EN_AUTONEG: 3028 minsize = sizeof (uint8_t); 3029 break; 3030 case MAC_PROP_MTU: 3031 case MAC_PROP_LLIMIT: 3032 case MAC_PROP_LDECAY: 3033 minsize = sizeof (uint32_t); 3034 break; 3035 case MAC_PROP_FLOWCTRL: 3036 minsize = sizeof (link_flowctrl_t); 3037 break; 3038 case MAC_PROP_ADV_10GFDX_CAP: 3039 case MAC_PROP_EN_10GFDX_CAP: 3040 case MAC_PROP_ADV_1000HDX_CAP: 3041 case MAC_PROP_EN_1000HDX_CAP: 3042 case MAC_PROP_ADV_100FDX_CAP: 3043 case MAC_PROP_EN_100FDX_CAP: 3044 case MAC_PROP_ADV_100HDX_CAP: 3045 case MAC_PROP_EN_100HDX_CAP: 3046 case MAC_PROP_ADV_10FDX_CAP: 3047 case MAC_PROP_EN_10FDX_CAP: 3048 case MAC_PROP_ADV_10HDX_CAP: 3049 case MAC_PROP_EN_10HDX_CAP: 3050 case MAC_PROP_ADV_100T4_CAP: 3051 case MAC_PROP_EN_100T4_CAP: 3052 minsize = sizeof (uint8_t); 3053 break; 3054 case MAC_PROP_PVID: 3055 minsize = sizeof (uint16_t); 3056 break; 3057 case MAC_PROP_IPTUN_HOPLIMIT: 3058 minsize = sizeof (uint32_t); 3059 break; 3060 case MAC_PROP_IPTUN_ENCAPLIMIT: 3061 minsize = sizeof (uint32_t); 3062 break; 3063 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3064 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3065 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3066 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3067 minsize = sizeof (uint_t); 3068 break; 3069 case MAC_PROP_WL_ESSID: 3070 minsize = sizeof (wl_linkstatus_t); 3071 break; 3072 case MAC_PROP_WL_BSSID: 3073 minsize = sizeof (wl_bssid_t); 3074 break; 3075 case MAC_PROP_WL_BSSTYPE: 3076 minsize = sizeof (wl_bss_type_t); 3077 break; 3078 case MAC_PROP_WL_LINKSTATUS: 3079 minsize = sizeof (wl_linkstatus_t); 3080 break; 3081 case MAC_PROP_WL_DESIRED_RATES: 3082 minsize = sizeof (wl_rates_t); 3083 break; 3084 case MAC_PROP_WL_SUPPORTED_RATES: 3085 minsize = sizeof (wl_rates_t); 3086 break; 3087 case MAC_PROP_WL_AUTH_MODE: 3088 minsize = sizeof (wl_authmode_t); 3089 break; 3090 case MAC_PROP_WL_ENCRYPTION: 3091 minsize = sizeof (wl_encryption_t); 3092 break; 3093 case MAC_PROP_WL_RSSI: 3094 minsize = sizeof (wl_rssi_t); 3095 break; 3096 case MAC_PROP_WL_PHY_CONFIG: 3097 minsize = sizeof (wl_phy_conf_t); 3098 break; 3099 case MAC_PROP_WL_CAPABILITY: 3100 minsize = sizeof (wl_capability_t); 3101 break; 3102 case MAC_PROP_WL_WPA: 3103 minsize = sizeof (wl_wpa_t); 3104 break; 3105 case MAC_PROP_WL_SCANRESULTS: 3106 minsize = sizeof (wl_wpa_ess_t); 3107 break; 3108 case MAC_PROP_WL_POWER_MODE: 3109 minsize = sizeof (wl_ps_mode_t); 3110 break; 3111 case MAC_PROP_WL_RADIO: 3112 minsize = sizeof (wl_radio_t); 3113 break; 3114 case MAC_PROP_WL_ESS_LIST: 3115 minsize = sizeof (wl_ess_list_t); 3116 break; 3117 case MAC_PROP_WL_KEY_TAB: 3118 minsize = sizeof (wl_wep_key_tab_t); 3119 break; 3120 case MAC_PROP_WL_CREATE_IBSS: 3121 minsize = sizeof (wl_create_ibss_t); 3122 break; 3123 case MAC_PROP_WL_SETOPTIE: 3124 minsize = sizeof (wl_wpa_ie_t); 3125 break; 3126 case MAC_PROP_WL_DELKEY: 3127 minsize = sizeof (wl_del_key_t); 3128 break; 3129 case MAC_PROP_WL_KEY: 3130 minsize = sizeof (wl_key_t); 3131 break; 3132 case MAC_PROP_WL_MLME: 3133 minsize = sizeof (wl_mlme_t); 3134 break; 3135 } 3136 3137 return (valsize >= minsize); 3138 } 3139 3140 /* 3141 * mac_set_prop() sets MAC or hardware driver properties: 3142 * 3143 * - MAC-managed properties such as resource properties include maxbw, 3144 * priority, and cpu binding list, as well as the default port VID 3145 * used by bridging. These properties are consumed by the MAC layer 3146 * itself and not passed down to the driver. For resource control 3147 * properties, this function invokes mac_set_resources() which will 3148 * cache the property value in mac_impl_t and may call 3149 * mac_client_set_resource() to update property value of the primary 3150 * mac client, if it exists. 3151 * 3152 * - Properties which act on the hardware and must be passed to the 3153 * driver, such as MTU, through the driver's mc_setprop() entry point. 3154 */ 3155 int 3156 mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3157 uint_t valsize) 3158 { 3159 int err = ENOTSUP; 3160 mac_impl_t *mip = (mac_impl_t *)mh; 3161 3162 ASSERT(MAC_PERIM_HELD(mh)); 3163 3164 switch (id) { 3165 case MAC_PROP_RESOURCE: { 3166 mac_resource_props_t *mrp; 3167 3168 /* call mac_set_resources() for MAC properties */ 3169 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3170 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3171 bcopy(val, mrp, sizeof (*mrp)); 3172 err = mac_set_resources(mh, mrp); 3173 kmem_free(mrp, sizeof (*mrp)); 3174 break; 3175 } 3176 3177 case MAC_PROP_PVID: 3178 ASSERT(valsize >= sizeof (uint16_t)); 3179 if (mip->mi_state_flags & MIS_IS_VNIC) 3180 return (EINVAL); 3181 err = mac_set_pvid(mh, *(uint16_t *)val); 3182 break; 3183 3184 case MAC_PROP_MTU: { 3185 uint32_t mtu; 3186 3187 ASSERT(valsize >= sizeof (uint32_t)); 3188 bcopy(val, &mtu, sizeof (mtu)); 3189 err = mac_set_mtu(mh, mtu, NULL); 3190 break; 3191 } 3192 3193 case MAC_PROP_LLIMIT: 3194 case MAC_PROP_LDECAY: { 3195 uint32_t learnval; 3196 3197 if (valsize < sizeof (learnval) || 3198 (mip->mi_state_flags & MIS_IS_VNIC)) 3199 return (EINVAL); 3200 bcopy(val, &learnval, sizeof (learnval)); 3201 if (learnval == 0 && id == MAC_PROP_LDECAY) 3202 return (EINVAL); 3203 if (id == MAC_PROP_LLIMIT) 3204 mip->mi_llimit = learnval; 3205 else 3206 mip->mi_ldecay = learnval; 3207 err = 0; 3208 break; 3209 } 3210 3211 default: 3212 /* For other driver properties, call driver's callback */ 3213 if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 3214 err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 3215 name, id, valsize, val); 3216 } 3217 } 3218 return (err); 3219 } 3220 3221 /* 3222 * mac_get_prop() gets MAC or device driver properties. 3223 * 3224 * If the property is a driver property, mac_get_prop() calls driver's callback 3225 * entry point to get it. 3226 * If the property is a MAC property, mac_get_prop() invokes mac_get_resources() 3227 * which returns the cached value in mac_impl_t. 3228 */ 3229 int 3230 mac_get_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3231 uint_t valsize) 3232 { 3233 int err = ENOTSUP; 3234 mac_impl_t *mip = (mac_impl_t *)mh; 3235 uint_t rings; 3236 uint_t vlinks; 3237 3238 bzero(val, valsize); 3239 3240 switch (id) { 3241 case MAC_PROP_RESOURCE: { 3242 mac_resource_props_t *mrp; 3243 3244 /* If mac property, read from cache */ 3245 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3246 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3247 mac_get_resources(mh, mrp); 3248 bcopy(mrp, val, sizeof (*mrp)); 3249 kmem_free(mrp, sizeof (*mrp)); 3250 return (0); 3251 } 3252 case MAC_PROP_RESOURCE_EFF: { 3253 mac_resource_props_t *mrp; 3254 3255 /* If mac effective property, read from client */ 3256 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3257 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3258 mac_get_effective_resources(mh, mrp); 3259 bcopy(mrp, val, sizeof (*mrp)); 3260 kmem_free(mrp, sizeof (*mrp)); 3261 return (0); 3262 } 3263 3264 case MAC_PROP_PVID: 3265 ASSERT(valsize >= sizeof (uint16_t)); 3266 if (mip->mi_state_flags & MIS_IS_VNIC) 3267 return (EINVAL); 3268 *(uint16_t *)val = mac_get_pvid(mh); 3269 return (0); 3270 3271 case MAC_PROP_LLIMIT: 3272 case MAC_PROP_LDECAY: 3273 ASSERT(valsize >= sizeof (uint32_t)); 3274 if (mip->mi_state_flags & MIS_IS_VNIC) 3275 return (EINVAL); 3276 if (id == MAC_PROP_LLIMIT) 3277 bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit)); 3278 else 3279 bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay)); 3280 return (0); 3281 3282 case MAC_PROP_MTU: { 3283 uint32_t sdu; 3284 3285 ASSERT(valsize >= sizeof (uint32_t)); 3286 mac_sdu_get2(mh, NULL, &sdu, NULL); 3287 bcopy(&sdu, val, sizeof (sdu)); 3288 3289 return (0); 3290 } 3291 case MAC_PROP_STATUS: { 3292 link_state_t link_state; 3293 3294 if (valsize < sizeof (link_state)) 3295 return (EINVAL); 3296 link_state = mac_link_get(mh); 3297 bcopy(&link_state, val, sizeof (link_state)); 3298 3299 return (0); 3300 } 3301 3302 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3303 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3304 ASSERT(valsize >= sizeof (uint_t)); 3305 rings = id == MAC_PROP_MAX_RX_RINGS_AVAIL ? 3306 mac_rxavail_get(mh) : mac_txavail_get(mh); 3307 bcopy(&rings, val, sizeof (uint_t)); 3308 return (0); 3309 3310 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3311 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3312 ASSERT(valsize >= sizeof (uint_t)); 3313 vlinks = id == MAC_PROP_MAX_RXHWCLNT_AVAIL ? 3314 mac_rxhwlnksavail_get(mh) : mac_txhwlnksavail_get(mh); 3315 bcopy(&vlinks, val, sizeof (uint_t)); 3316 return (0); 3317 3318 case MAC_PROP_RXRINGSRANGE: 3319 case MAC_PROP_TXRINGSRANGE: 3320 /* 3321 * The value for these properties are returned through 3322 * the MAC_PROP_RESOURCE property. 3323 */ 3324 return (0); 3325 3326 default: 3327 break; 3328 3329 } 3330 3331 /* If driver property, request from driver */ 3332 if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { 3333 err = mip->mi_callbacks->mc_getprop(mip->mi_driver, name, id, 3334 valsize, val); 3335 } 3336 3337 return (err); 3338 } 3339 3340 /* 3341 * Helper function to initialize the range structure for use in 3342 * mac_get_prop. If the type can be other than uint32, we can 3343 * pass that as an arg. 3344 */ 3345 static void 3346 _mac_set_range(mac_propval_range_t *range, uint32_t min, uint32_t max) 3347 { 3348 range->mpr_count = 1; 3349 range->mpr_type = MAC_PROPVAL_UINT32; 3350 range->mpr_range_uint32[0].mpur_min = min; 3351 range->mpr_range_uint32[0].mpur_max = max; 3352 } 3353 3354 /* 3355 * Returns information about the specified property, such as default 3356 * values or permissions. 3357 */ 3358 int 3359 mac_prop_info(mac_handle_t mh, mac_prop_id_t id, char *name, 3360 void *default_val, uint_t default_size, mac_propval_range_t *range, 3361 uint_t *perm) 3362 { 3363 mac_prop_info_state_t state; 3364 mac_impl_t *mip = (mac_impl_t *)mh; 3365 uint_t max; 3366 3367 /* 3368 * A property is read/write by default unless the driver says 3369 * otherwise. 3370 */ 3371 if (perm != NULL) 3372 *perm = MAC_PROP_PERM_RW; 3373 3374 if (default_val != NULL) 3375 bzero(default_val, default_size); 3376 3377 /* 3378 * First, handle framework properties for which we don't need to 3379 * involve the driver. 3380 */ 3381 switch (id) { 3382 case MAC_PROP_RESOURCE: 3383 case MAC_PROP_PVID: 3384 case MAC_PROP_LLIMIT: 3385 case MAC_PROP_LDECAY: 3386 return (0); 3387 3388 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3389 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3390 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3391 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3392 if (perm != NULL) 3393 *perm = MAC_PROP_PERM_READ; 3394 return (0); 3395 3396 case MAC_PROP_RXRINGSRANGE: 3397 case MAC_PROP_TXRINGSRANGE: 3398 /* 3399 * Currently, we support range for RX and TX rings properties. 3400 * When we extend this support to maxbw, cpus and priority, 3401 * we should move this to mac_get_resources. 3402 * There is no default value for RX or TX rings. 3403 */ 3404 if ((mip->mi_state_flags & MIS_IS_VNIC) && 3405 mac_is_vnic_primary(mh)) { 3406 /* 3407 * We don't support setting rings for a VLAN 3408 * data link because it shares its ring with the 3409 * primary MAC client. 3410 */ 3411 if (perm != NULL) 3412 *perm = MAC_PROP_PERM_READ; 3413 if (range != NULL) 3414 range->mpr_count = 0; 3415 } else if (range != NULL) { 3416 if (mip->mi_state_flags & MIS_IS_VNIC) 3417 mh = mac_get_lower_mac_handle(mh); 3418 mip = (mac_impl_t *)mh; 3419 if ((id == MAC_PROP_RXRINGSRANGE && 3420 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) || 3421 (id == MAC_PROP_TXRINGSRANGE && 3422 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC)) { 3423 if (id == MAC_PROP_RXRINGSRANGE) { 3424 if ((mac_rxhwlnksavail_get(mh) + 3425 mac_rxhwlnksrsvd_get(mh)) <= 1) { 3426 /* 3427 * doesn't support groups or 3428 * rings 3429 */ 3430 range->mpr_count = 0; 3431 } else { 3432 /* 3433 * supports specifying groups, 3434 * but not rings 3435 */ 3436 _mac_set_range(range, 0, 0); 3437 } 3438 } else { 3439 if ((mac_txhwlnksavail_get(mh) + 3440 mac_txhwlnksrsvd_get(mh)) <= 1) { 3441 /* 3442 * doesn't support groups or 3443 * rings 3444 */ 3445 range->mpr_count = 0; 3446 } else { 3447 /* 3448 * supports specifying groups, 3449 * but not rings 3450 */ 3451 _mac_set_range(range, 0, 0); 3452 } 3453 } 3454 } else { 3455 max = id == MAC_PROP_RXRINGSRANGE ? 3456 mac_rxavail_get(mh) + mac_rxrsvd_get(mh) : 3457 mac_txavail_get(mh) + mac_txrsvd_get(mh); 3458 if (max <= 1) { 3459 /* 3460 * doesn't support groups or 3461 * rings 3462 */ 3463 range->mpr_count = 0; 3464 } else { 3465 /* 3466 * -1 because we have to leave out the 3467 * default ring. 3468 */ 3469 _mac_set_range(range, 1, max - 1); 3470 } 3471 } 3472 } 3473 return (0); 3474 3475 case MAC_PROP_STATUS: 3476 if (perm != NULL) 3477 *perm = MAC_PROP_PERM_READ; 3478 return (0); 3479 } 3480 3481 /* 3482 * Get the property info from the driver if it implements the 3483 * property info entry point. 3484 */ 3485 bzero(&state, sizeof (state)); 3486 3487 if (mip->mi_callbacks->mc_callbacks & MC_PROPINFO) { 3488 state.pr_default = default_val; 3489 state.pr_default_size = default_size; 3490 3491 /* 3492 * The caller specifies the maximum number of ranges 3493 * it can accomodate using mpr_count. We don't touch 3494 * this value until the driver returns from its 3495 * mc_propinfo() callback, and ensure we don't exceed 3496 * this number of range as the driver defines 3497 * supported range from its mc_propinfo(). 3498 * 3499 * pr_range_cur_count keeps track of how many ranges 3500 * were defined by the driver from its mc_propinfo() 3501 * entry point. 3502 * 3503 * On exit, the user-specified range mpr_count returns 3504 * the number of ranges specified by the driver on 3505 * success, or the number of ranges it wanted to 3506 * define if that number of ranges could not be 3507 * accomodated by the specified range structure. In 3508 * the latter case, the caller will be able to 3509 * allocate a larger range structure, and query the 3510 * property again. 3511 */ 3512 state.pr_range_cur_count = 0; 3513 state.pr_range = range; 3514 3515 mip->mi_callbacks->mc_propinfo(mip->mi_driver, name, id, 3516 (mac_prop_info_handle_t)&state); 3517 3518 if (state.pr_flags & MAC_PROP_INFO_RANGE) 3519 range->mpr_count = state.pr_range_cur_count; 3520 3521 /* 3522 * The operation could fail if the buffer supplied by 3523 * the user was too small for the range or default 3524 * value of the property. 3525 */ 3526 if (state.pr_errno != 0) 3527 return (state.pr_errno); 3528 3529 if (perm != NULL && state.pr_flags & MAC_PROP_INFO_PERM) 3530 *perm = state.pr_perm; 3531 } 3532 3533 /* 3534 * The MAC layer may want to provide default values or allowed 3535 * ranges for properties if the driver does not provide a 3536 * property info entry point, or that entry point exists, but 3537 * it did not provide a default value or allowed ranges for 3538 * that property. 3539 */ 3540 switch (id) { 3541 case MAC_PROP_MTU: { 3542 uint32_t sdu; 3543 3544 mac_sdu_get2(mh, NULL, &sdu, NULL); 3545 3546 if (range != NULL && !(state.pr_flags & 3547 MAC_PROP_INFO_RANGE)) { 3548 /* MTU range */ 3549 _mac_set_range(range, sdu, sdu); 3550 } 3551 3552 if (default_val != NULL && !(state.pr_flags & 3553 MAC_PROP_INFO_DEFAULT)) { 3554 if (mip->mi_info.mi_media == DL_ETHER) 3555 sdu = ETHERMTU; 3556 /* default MTU value */ 3557 bcopy(&sdu, default_val, sizeof (sdu)); 3558 } 3559 } 3560 } 3561 3562 return (0); 3563 } 3564 3565 int 3566 mac_fastpath_disable(mac_handle_t mh) 3567 { 3568 mac_impl_t *mip = (mac_impl_t *)mh; 3569 3570 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3571 return (0); 3572 3573 return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver)); 3574 } 3575 3576 void 3577 mac_fastpath_enable(mac_handle_t mh) 3578 { 3579 mac_impl_t *mip = (mac_impl_t *)mh; 3580 3581 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3582 return; 3583 3584 mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver); 3585 } 3586 3587 void 3588 mac_register_priv_prop(mac_impl_t *mip, char **priv_props) 3589 { 3590 uint_t nprops, i; 3591 3592 if (priv_props == NULL) 3593 return; 3594 3595 nprops = 0; 3596 while (priv_props[nprops] != NULL) 3597 nprops++; 3598 if (nprops == 0) 3599 return; 3600 3601 3602 mip->mi_priv_prop = kmem_zalloc(nprops * sizeof (char *), KM_SLEEP); 3603 3604 for (i = 0; i < nprops; i++) { 3605 mip->mi_priv_prop[i] = kmem_zalloc(MAXLINKPROPNAME, KM_SLEEP); 3606 (void) strlcpy(mip->mi_priv_prop[i], priv_props[i], 3607 MAXLINKPROPNAME); 3608 } 3609 3610 mip->mi_priv_prop_count = nprops; 3611 } 3612 3613 void 3614 mac_unregister_priv_prop(mac_impl_t *mip) 3615 { 3616 uint_t i; 3617 3618 if (mip->mi_priv_prop_count == 0) { 3619 ASSERT(mip->mi_priv_prop == NULL); 3620 return; 3621 } 3622 3623 for (i = 0; i < mip->mi_priv_prop_count; i++) 3624 kmem_free(mip->mi_priv_prop[i], MAXLINKPROPNAME); 3625 kmem_free(mip->mi_priv_prop, mip->mi_priv_prop_count * 3626 sizeof (char *)); 3627 3628 mip->mi_priv_prop = NULL; 3629 mip->mi_priv_prop_count = 0; 3630 } 3631 3632 /* 3633 * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure 3634 * (by invoking mac_rx()) even after processing mac_stop_ring(). In such 3635 * cases if MAC free's the ring structure after mac_stop_ring(), any 3636 * illegal access to the ring structure coming from the driver will panic 3637 * the system. In order to protect the system from such inadverent access, 3638 * we maintain a cache of rings in the mac_impl_t after they get free'd up. 3639 * When packets are received on free'd up rings, MAC (through the generation 3640 * count mechanism) will drop such packets. 3641 */ 3642 static mac_ring_t * 3643 mac_ring_alloc(mac_impl_t *mip) 3644 { 3645 mac_ring_t *ring; 3646 3647 mutex_enter(&mip->mi_ring_lock); 3648 if (mip->mi_ring_freelist != NULL) { 3649 ring = mip->mi_ring_freelist; 3650 mip->mi_ring_freelist = ring->mr_next; 3651 bzero(ring, sizeof (mac_ring_t)); 3652 mutex_exit(&mip->mi_ring_lock); 3653 } else { 3654 mutex_exit(&mip->mi_ring_lock); 3655 ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); 3656 } 3657 ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); 3658 return (ring); 3659 } 3660 3661 static void 3662 mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) 3663 { 3664 ASSERT(ring->mr_state == MR_FREE); 3665 3666 mutex_enter(&mip->mi_ring_lock); 3667 ring->mr_state = MR_FREE; 3668 ring->mr_flag = 0; 3669 ring->mr_next = mip->mi_ring_freelist; 3670 ring->mr_mip = NULL; 3671 mip->mi_ring_freelist = ring; 3672 mac_ring_stat_delete(ring); 3673 mutex_exit(&mip->mi_ring_lock); 3674 } 3675 3676 static void 3677 mac_ring_freeall(mac_impl_t *mip) 3678 { 3679 mac_ring_t *ring_next; 3680 mutex_enter(&mip->mi_ring_lock); 3681 mac_ring_t *ring = mip->mi_ring_freelist; 3682 while (ring != NULL) { 3683 ring_next = ring->mr_next; 3684 kmem_cache_free(mac_ring_cache, ring); 3685 ring = ring_next; 3686 } 3687 mip->mi_ring_freelist = NULL; 3688 mutex_exit(&mip->mi_ring_lock); 3689 } 3690 3691 int 3692 mac_start_ring(mac_ring_t *ring) 3693 { 3694 int rv = 0; 3695 3696 ASSERT(ring->mr_state == MR_FREE); 3697 3698 if (ring->mr_start != NULL) { 3699 rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); 3700 if (rv != 0) 3701 return (rv); 3702 } 3703 3704 ring->mr_state = MR_INUSE; 3705 return (rv); 3706 } 3707 3708 void 3709 mac_stop_ring(mac_ring_t *ring) 3710 { 3711 ASSERT(ring->mr_state == MR_INUSE); 3712 3713 if (ring->mr_stop != NULL) 3714 ring->mr_stop(ring->mr_driver); 3715 3716 ring->mr_state = MR_FREE; 3717 3718 /* 3719 * Increment the ring generation number for this ring. 3720 */ 3721 ring->mr_gen_num++; 3722 } 3723 3724 int 3725 mac_start_group(mac_group_t *group) 3726 { 3727 int rv = 0; 3728 3729 if (group->mrg_start != NULL) 3730 rv = group->mrg_start(group->mrg_driver); 3731 3732 return (rv); 3733 } 3734 3735 void 3736 mac_stop_group(mac_group_t *group) 3737 { 3738 if (group->mrg_stop != NULL) 3739 group->mrg_stop(group->mrg_driver); 3740 } 3741 3742 /* 3743 * Called from mac_start() on the default Rx group. Broadcast and multicast 3744 * packets are received only on the default group. Hence the default group 3745 * needs to be up even if the primary client is not up, for the other groups 3746 * to be functional. We do this by calling this function at mac_start time 3747 * itself. However the broadcast packets that are received can't make their 3748 * way beyond mac_rx until a mac client creates a broadcast flow. 3749 */ 3750 static int 3751 mac_start_group_and_rings(mac_group_t *group) 3752 { 3753 mac_ring_t *ring; 3754 int rv = 0; 3755 3756 ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED); 3757 if ((rv = mac_start_group(group)) != 0) 3758 return (rv); 3759 3760 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 3761 ASSERT(ring->mr_state == MR_FREE); 3762 if ((rv = mac_start_ring(ring)) != 0) 3763 goto error; 3764 ring->mr_classify_type = MAC_SW_CLASSIFIER; 3765 } 3766 return (0); 3767 3768 error: 3769 mac_stop_group_and_rings(group); 3770 return (rv); 3771 } 3772 3773 /* Called from mac_stop on the default Rx group */ 3774 static void 3775 mac_stop_group_and_rings(mac_group_t *group) 3776 { 3777 mac_ring_t *ring; 3778 3779 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 3780 if (ring->mr_state != MR_FREE) { 3781 mac_stop_ring(ring); 3782 ring->mr_flag = 0; 3783 ring->mr_classify_type = MAC_NO_CLASSIFIER; 3784 } 3785 } 3786 mac_stop_group(group); 3787 } 3788 3789 3790 static mac_ring_t * 3791 mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, 3792 mac_capab_rings_t *cap_rings) 3793 { 3794 mac_ring_t *ring, *rnext; 3795 mac_ring_info_t ring_info; 3796 ddi_intr_handle_t ddi_handle; 3797 3798 ring = mac_ring_alloc(mip); 3799 3800 /* Prepare basic information of ring */ 3801 3802 /* 3803 * Ring index is numbered to be unique across a particular device. 3804 * Ring index computation makes following assumptions: 3805 * - For drivers with static grouping (e.g. ixgbe, bge), 3806 * ring index exchanged with the driver (e.g. during mr_rget) 3807 * is unique only across the group the ring belongs to. 3808 * - Drivers with dynamic grouping (e.g. nxge), start 3809 * with single group (mrg_index = 0). 3810 */ 3811 ring->mr_index = group->mrg_index * group->mrg_info.mgi_count + index; 3812 ring->mr_type = group->mrg_type; 3813 ring->mr_gh = (mac_group_handle_t)group; 3814 3815 /* Insert the new ring to the list. */ 3816 ring->mr_next = group->mrg_rings; 3817 group->mrg_rings = ring; 3818 3819 /* Zero to reuse the info data structure */ 3820 bzero(&ring_info, sizeof (ring_info)); 3821 3822 /* Query ring information from driver */ 3823 cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index, 3824 index, &ring_info, (mac_ring_handle_t)ring); 3825 3826 ring->mr_info = ring_info; 3827 3828 /* 3829 * The interrupt handle could be shared among multiple rings. 3830 * Thus if there is a bunch of rings that are sharing an 3831 * interrupt, then only one ring among the bunch will be made 3832 * available for interrupt re-targeting; the rest will have 3833 * ddi_shared flag set to TRUE and would not be available for 3834 * be interrupt re-targeting. 3835 */ 3836 if ((ddi_handle = ring_info.mri_intr.mi_ddi_handle) != NULL) { 3837 rnext = ring->mr_next; 3838 while (rnext != NULL) { 3839 if (rnext->mr_info.mri_intr.mi_ddi_handle == 3840 ddi_handle) { 3841 /* 3842 * If default ring (mr_index == 0) is part 3843 * of a group of rings sharing an 3844 * interrupt, then set ddi_shared flag for 3845 * the default ring and give another ring 3846 * the chance to be re-targeted. 3847 */ 3848 if (rnext->mr_index == 0 && 3849 !rnext->mr_info.mri_intr.mi_ddi_shared) { 3850 rnext->mr_info.mri_intr.mi_ddi_shared = 3851 B_TRUE; 3852 } else { 3853 ring->mr_info.mri_intr.mi_ddi_shared = 3854 B_TRUE; 3855 } 3856 break; 3857 } 3858 rnext = rnext->mr_next; 3859 } 3860 /* 3861 * If rnext is NULL, then no matching ddi_handle was found. 3862 * Rx rings get registered first. So if this is a Tx ring, 3863 * then go through all the Rx rings and see if there is a 3864 * matching ddi handle. 3865 */ 3866 if (rnext == NULL && ring->mr_type == MAC_RING_TYPE_TX) { 3867 mac_compare_ddi_handle(mip->mi_rx_groups, 3868 mip->mi_rx_group_count, ring); 3869 } 3870 } 3871 3872 /* Update ring's status */ 3873 ring->mr_state = MR_FREE; 3874 ring->mr_flag = 0; 3875 3876 /* Update the ring count of the group */ 3877 group->mrg_cur_count++; 3878 3879 /* Create per ring kstats */ 3880 if (ring->mr_stat != NULL) { 3881 ring->mr_mip = mip; 3882 mac_ring_stat_create(ring); 3883 } 3884 3885 return (ring); 3886 } 3887 3888 /* 3889 * Rings are chained together for easy regrouping. 3890 */ 3891 static void 3892 mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, 3893 mac_capab_rings_t *cap_rings) 3894 { 3895 int index; 3896 3897 /* 3898 * Initialize all ring members of this group. Size of zero will not 3899 * enter the loop, so it's safe for initializing an empty group. 3900 */ 3901 for (index = size - 1; index >= 0; index--) 3902 (void) mac_init_ring(mip, group, index, cap_rings); 3903 } 3904 3905 int 3906 mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) 3907 { 3908 mac_capab_rings_t *cap_rings; 3909 mac_group_t *group; 3910 mac_group_t *groups; 3911 mac_group_info_t group_info; 3912 uint_t group_free = 0; 3913 uint_t ring_left; 3914 mac_ring_t *ring; 3915 int g; 3916 int err = 0; 3917 uint_t grpcnt; 3918 boolean_t pseudo_txgrp = B_FALSE; 3919 3920 switch (rtype) { 3921 case MAC_RING_TYPE_RX: 3922 ASSERT(mip->mi_rx_groups == NULL); 3923 3924 cap_rings = &mip->mi_rx_rings_cap; 3925 cap_rings->mr_type = MAC_RING_TYPE_RX; 3926 break; 3927 case MAC_RING_TYPE_TX: 3928 ASSERT(mip->mi_tx_groups == NULL); 3929 3930 cap_rings = &mip->mi_tx_rings_cap; 3931 cap_rings->mr_type = MAC_RING_TYPE_TX; 3932 break; 3933 default: 3934 ASSERT(B_FALSE); 3935 } 3936 3937 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, cap_rings)) 3938 return (0); 3939 grpcnt = cap_rings->mr_gnum; 3940 3941 /* 3942 * If we have multiple TX rings, but only one TX group, we can 3943 * create pseudo TX groups (one per TX ring) in the MAC layer, 3944 * except for an aggr. For an aggr currently we maintain only 3945 * one group with all the rings (for all its ports), going 3946 * forwards we might change this. 3947 */ 3948 if (rtype == MAC_RING_TYPE_TX && 3949 cap_rings->mr_gnum == 0 && cap_rings->mr_rnum > 0 && 3950 (mip->mi_state_flags & MIS_IS_AGGR) == 0) { 3951 /* 3952 * The -1 here is because we create a default TX group 3953 * with all the rings in it. 3954 */ 3955 grpcnt = cap_rings->mr_rnum - 1; 3956 pseudo_txgrp = B_TRUE; 3957 } 3958 3959 /* 3960 * Allocate a contiguous buffer for all groups. 3961 */ 3962 groups = kmem_zalloc(sizeof (mac_group_t) * (grpcnt+ 1), KM_SLEEP); 3963 3964 ring_left = cap_rings->mr_rnum; 3965 3966 /* 3967 * Get all ring groups if any, and get their ring members 3968 * if any. 3969 */ 3970 for (g = 0; g < grpcnt; g++) { 3971 group = groups + g; 3972 3973 /* Prepare basic information of the group */ 3974 group->mrg_index = g; 3975 group->mrg_type = rtype; 3976 group->mrg_state = MAC_GROUP_STATE_UNINIT; 3977 group->mrg_mh = (mac_handle_t)mip; 3978 group->mrg_next = group + 1; 3979 3980 /* Zero to reuse the info data structure */ 3981 bzero(&group_info, sizeof (group_info)); 3982 3983 if (pseudo_txgrp) { 3984 /* 3985 * This is a pseudo group that we created, apart 3986 * from setting the state there is nothing to be 3987 * done. 3988 */ 3989 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 3990 group_free++; 3991 continue; 3992 } 3993 /* Query group information from driver */ 3994 cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, 3995 (mac_group_handle_t)group); 3996 3997 switch (cap_rings->mr_group_type) { 3998 case MAC_GROUP_TYPE_DYNAMIC: 3999 if (cap_rings->mr_gaddring == NULL || 4000 cap_rings->mr_gremring == NULL) { 4001 DTRACE_PROBE3( 4002 mac__init__rings_no_addremring, 4003 char *, mip->mi_name, 4004 mac_group_add_ring_t, 4005 cap_rings->mr_gaddring, 4006 mac_group_add_ring_t, 4007 cap_rings->mr_gremring); 4008 err = EINVAL; 4009 goto bail; 4010 } 4011 4012 switch (rtype) { 4013 case MAC_RING_TYPE_RX: 4014 /* 4015 * The first RX group must have non-zero 4016 * rings, and the following groups must 4017 * have zero rings. 4018 */ 4019 if (g == 0 && group_info.mgi_count == 0) { 4020 DTRACE_PROBE1( 4021 mac__init__rings__rx__def__zero, 4022 char *, mip->mi_name); 4023 err = EINVAL; 4024 goto bail; 4025 } 4026 if (g > 0 && group_info.mgi_count != 0) { 4027 DTRACE_PROBE3( 4028 mac__init__rings__rx__nonzero, 4029 char *, mip->mi_name, 4030 int, g, int, group_info.mgi_count); 4031 err = EINVAL; 4032 goto bail; 4033 } 4034 break; 4035 case MAC_RING_TYPE_TX: 4036 /* 4037 * All TX ring groups must have zero rings. 4038 */ 4039 if (group_info.mgi_count != 0) { 4040 DTRACE_PROBE3( 4041 mac__init__rings__tx__nonzero, 4042 char *, mip->mi_name, 4043 int, g, int, group_info.mgi_count); 4044 err = EINVAL; 4045 goto bail; 4046 } 4047 break; 4048 } 4049 break; 4050 case MAC_GROUP_TYPE_STATIC: 4051 /* 4052 * Note that an empty group is allowed, e.g., an aggr 4053 * would start with an empty group. 4054 */ 4055 break; 4056 default: 4057 /* unknown group type */ 4058 DTRACE_PROBE2(mac__init__rings__unknown__type, 4059 char *, mip->mi_name, 4060 int, cap_rings->mr_group_type); 4061 err = EINVAL; 4062 goto bail; 4063 } 4064 4065 4066 /* 4067 * Driver must register group->mgi_addmac/remmac() for rx groups 4068 * to support multiple MAC addresses. 4069 */ 4070 if (rtype == MAC_RING_TYPE_RX && 4071 ((group_info.mgi_addmac == NULL) || 4072 (group_info.mgi_remmac == NULL))) { 4073 err = EINVAL; 4074 goto bail; 4075 } 4076 4077 /* Cache driver-supplied information */ 4078 group->mrg_info = group_info; 4079 4080 /* Update the group's status and group count. */ 4081 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 4082 group_free++; 4083 4084 group->mrg_rings = NULL; 4085 group->mrg_cur_count = 0; 4086 mac_init_group(mip, group, group_info.mgi_count, cap_rings); 4087 ring_left -= group_info.mgi_count; 4088 4089 /* The current group size should be equal to default value */ 4090 ASSERT(group->mrg_cur_count == group_info.mgi_count); 4091 } 4092 4093 /* Build up a dummy group for free resources as a pool */ 4094 group = groups + grpcnt; 4095 4096 /* Prepare basic information of the group */ 4097 group->mrg_index = -1; 4098 group->mrg_type = rtype; 4099 group->mrg_state = MAC_GROUP_STATE_UNINIT; 4100 group->mrg_mh = (mac_handle_t)mip; 4101 group->mrg_next = NULL; 4102 4103 /* 4104 * If there are ungrouped rings, allocate a continuous buffer for 4105 * remaining resources. 4106 */ 4107 if (ring_left != 0) { 4108 group->mrg_rings = NULL; 4109 group->mrg_cur_count = 0; 4110 mac_init_group(mip, group, ring_left, cap_rings); 4111 4112 /* The current group size should be equal to ring_left */ 4113 ASSERT(group->mrg_cur_count == ring_left); 4114 4115 ring_left = 0; 4116 4117 /* Update this group's status */ 4118 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 4119 } else 4120 group->mrg_rings = NULL; 4121 4122 ASSERT(ring_left == 0); 4123 4124 bail: 4125 4126 /* Cache other important information to finalize the initialization */ 4127 switch (rtype) { 4128 case MAC_RING_TYPE_RX: 4129 mip->mi_rx_group_type = cap_rings->mr_group_type; 4130 mip->mi_rx_group_count = cap_rings->mr_gnum; 4131 mip->mi_rx_groups = groups; 4132 mip->mi_rx_donor_grp = groups; 4133 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 4134 /* 4135 * The default ring is reserved since it is 4136 * used for sending the broadcast etc. packets. 4137 */ 4138 mip->mi_rxrings_avail = 4139 mip->mi_rx_groups->mrg_cur_count - 1; 4140 mip->mi_rxrings_rsvd = 1; 4141 } 4142 /* 4143 * The default group cannot be reserved. It is used by 4144 * all the clients that do not have an exclusive group. 4145 */ 4146 mip->mi_rxhwclnt_avail = mip->mi_rx_group_count - 1; 4147 mip->mi_rxhwclnt_used = 1; 4148 break; 4149 case MAC_RING_TYPE_TX: 4150 mip->mi_tx_group_type = pseudo_txgrp ? MAC_GROUP_TYPE_DYNAMIC : 4151 cap_rings->mr_group_type; 4152 mip->mi_tx_group_count = grpcnt; 4153 mip->mi_tx_group_free = group_free; 4154 mip->mi_tx_groups = groups; 4155 4156 group = groups + grpcnt; 4157 ring = group->mrg_rings; 4158 /* 4159 * The ring can be NULL in the case of aggr. Aggr will 4160 * have an empty Tx group which will get populated 4161 * later when pseudo Tx rings are added after 4162 * mac_register() is done. 4163 */ 4164 if (ring == NULL) { 4165 ASSERT(mip->mi_state_flags & MIS_IS_AGGR); 4166 /* 4167 * pass the group to aggr so it can add Tx 4168 * rings to the group later. 4169 */ 4170 cap_rings->mr_gget(mip->mi_driver, rtype, 0, NULL, 4171 (mac_group_handle_t)group); 4172 /* 4173 * Even though there are no rings at this time 4174 * (rings will come later), set the group 4175 * state to registered. 4176 */ 4177 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 4178 } else { 4179 /* 4180 * Ring 0 is used as the default one and it could be 4181 * assigned to a client as well. 4182 */ 4183 while ((ring->mr_index != 0) && (ring->mr_next != NULL)) 4184 ring = ring->mr_next; 4185 ASSERT(ring->mr_index == 0); 4186 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4187 } 4188 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) 4189 mip->mi_txrings_avail = group->mrg_cur_count - 1; 4190 /* 4191 * The default ring cannot be reserved. 4192 */ 4193 mip->mi_txrings_rsvd = 1; 4194 /* 4195 * The default group cannot be reserved. It will be shared 4196 * by clients that do not have an exclusive group. 4197 */ 4198 mip->mi_txhwclnt_avail = mip->mi_tx_group_count; 4199 mip->mi_txhwclnt_used = 1; 4200 break; 4201 default: 4202 ASSERT(B_FALSE); 4203 } 4204 4205 if (err != 0) 4206 mac_free_rings(mip, rtype); 4207 4208 return (err); 4209 } 4210 4211 /* 4212 * The ddi interrupt handle could be shared amoung rings. If so, compare 4213 * the new ring's ddi handle with the existing ones and set ddi_shared 4214 * flag. 4215 */ 4216 void 4217 mac_compare_ddi_handle(mac_group_t *groups, uint_t grpcnt, mac_ring_t *cring) 4218 { 4219 mac_group_t *group; 4220 mac_ring_t *ring; 4221 ddi_intr_handle_t ddi_handle; 4222 int g; 4223 4224 ddi_handle = cring->mr_info.mri_intr.mi_ddi_handle; 4225 for (g = 0; g < grpcnt; g++) { 4226 group = groups + g; 4227 for (ring = group->mrg_rings; ring != NULL; 4228 ring = ring->mr_next) { 4229 if (ring == cring) 4230 continue; 4231 if (ring->mr_info.mri_intr.mi_ddi_handle == 4232 ddi_handle) { 4233 if (cring->mr_type == MAC_RING_TYPE_RX && 4234 ring->mr_index == 0 && 4235 !ring->mr_info.mri_intr.mi_ddi_shared) { 4236 ring->mr_info.mri_intr.mi_ddi_shared = 4237 B_TRUE; 4238 } else { 4239 cring->mr_info.mri_intr.mi_ddi_shared = 4240 B_TRUE; 4241 } 4242 return; 4243 } 4244 } 4245 } 4246 } 4247 4248 /* 4249 * Called to free all groups of particular type (RX or TX). It's assumed that 4250 * no clients are using these groups. 4251 */ 4252 void 4253 mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) 4254 { 4255 mac_group_t *group, *groups; 4256 uint_t group_count; 4257 4258 switch (rtype) { 4259 case MAC_RING_TYPE_RX: 4260 if (mip->mi_rx_groups == NULL) 4261 return; 4262 4263 groups = mip->mi_rx_groups; 4264 group_count = mip->mi_rx_group_count; 4265 4266 mip->mi_rx_groups = NULL; 4267 mip->mi_rx_donor_grp = NULL; 4268 mip->mi_rx_group_count = 0; 4269 break; 4270 case MAC_RING_TYPE_TX: 4271 ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free); 4272 4273 if (mip->mi_tx_groups == NULL) 4274 return; 4275 4276 groups = mip->mi_tx_groups; 4277 group_count = mip->mi_tx_group_count; 4278 4279 mip->mi_tx_groups = NULL; 4280 mip->mi_tx_group_count = 0; 4281 mip->mi_tx_group_free = 0; 4282 mip->mi_default_tx_ring = NULL; 4283 break; 4284 default: 4285 ASSERT(B_FALSE); 4286 } 4287 4288 for (group = groups; group != NULL; group = group->mrg_next) { 4289 mac_ring_t *ring; 4290 4291 if (group->mrg_cur_count == 0) 4292 continue; 4293 4294 ASSERT(group->mrg_rings != NULL); 4295 4296 while ((ring = group->mrg_rings) != NULL) { 4297 group->mrg_rings = ring->mr_next; 4298 mac_ring_free(mip, ring); 4299 } 4300 } 4301 4302 /* Free all the cached rings */ 4303 mac_ring_freeall(mip); 4304 /* Free the block of group data strutures */ 4305 kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); 4306 } 4307 4308 /* 4309 * Associate a MAC address with a receive group. 4310 * 4311 * The return value of this function should always be checked properly, because 4312 * any type of failure could cause unexpected results. A group can be added 4313 * or removed with a MAC address only after it has been reserved. Ideally, 4314 * a successful reservation always leads to calling mac_group_addmac() to 4315 * steer desired traffic. Failure of adding an unicast MAC address doesn't 4316 * always imply that the group is functioning abnormally. 4317 * 4318 * Currently this function is called everywhere, and it reflects assumptions 4319 * about MAC addresses in the implementation. CR 6735196. 4320 */ 4321 int 4322 mac_group_addmac(mac_group_t *group, const uint8_t *addr) 4323 { 4324 ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 4325 ASSERT(group->mrg_info.mgi_addmac != NULL); 4326 4327 return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); 4328 } 4329 4330 /* 4331 * Remove the association between MAC address and receive group. 4332 */ 4333 int 4334 mac_group_remmac(mac_group_t *group, const uint8_t *addr) 4335 { 4336 ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 4337 ASSERT(group->mrg_info.mgi_remmac != NULL); 4338 4339 return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); 4340 } 4341 4342 /* 4343 * This is the entry point for packets transmitted through the bridging code. 4344 * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh' 4345 * pointer may be NULL to select the default ring. 4346 */ 4347 mblk_t * 4348 mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) 4349 { 4350 mac_handle_t mh; 4351 4352 /* 4353 * Once we take a reference on the bridge link, the bridge 4354 * module itself can't unload, so the callback pointers are 4355 * stable. 4356 */ 4357 mutex_enter(&mip->mi_bridge_lock); 4358 if ((mh = mip->mi_bridge_link) != NULL) 4359 mac_bridge_ref_cb(mh, B_TRUE); 4360 mutex_exit(&mip->mi_bridge_lock); 4361 if (mh == NULL) { 4362 MAC_RING_TX(mip, rh, mp, mp); 4363 } else { 4364 mp = mac_bridge_tx_cb(mh, rh, mp); 4365 mac_bridge_ref_cb(mh, B_FALSE); 4366 } 4367 4368 return (mp); 4369 } 4370 4371 /* 4372 * Find a ring from its index. 4373 */ 4374 mac_ring_handle_t 4375 mac_find_ring(mac_group_handle_t gh, int index) 4376 { 4377 mac_group_t *group = (mac_group_t *)gh; 4378 mac_ring_t *ring = group->mrg_rings; 4379 4380 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) 4381 if (ring->mr_index == index) 4382 break; 4383 4384 return ((mac_ring_handle_t)ring); 4385 } 4386 /* 4387 * Add a ring to an existing group. 4388 * 4389 * The ring must be either passed directly (for example if the ring 4390 * movement is initiated by the framework), or specified through a driver 4391 * index (for example when the ring is added by the driver. 4392 * 4393 * The caller needs to call mac_perim_enter() before calling this function. 4394 */ 4395 int 4396 i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) 4397 { 4398 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4399 mac_capab_rings_t *cap_rings; 4400 boolean_t driver_call = (ring == NULL); 4401 mac_group_type_t group_type; 4402 int ret = 0; 4403 flow_entry_t *flent; 4404 4405 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4406 4407 switch (group->mrg_type) { 4408 case MAC_RING_TYPE_RX: 4409 cap_rings = &mip->mi_rx_rings_cap; 4410 group_type = mip->mi_rx_group_type; 4411 break; 4412 case MAC_RING_TYPE_TX: 4413 cap_rings = &mip->mi_tx_rings_cap; 4414 group_type = mip->mi_tx_group_type; 4415 break; 4416 default: 4417 ASSERT(B_FALSE); 4418 } 4419 4420 /* 4421 * There should be no ring with the same ring index in the target 4422 * group. 4423 */ 4424 ASSERT(mac_find_ring((mac_group_handle_t)group, 4425 driver_call ? index : ring->mr_index) == NULL); 4426 4427 if (driver_call) { 4428 /* 4429 * The function is called as a result of a request from 4430 * a driver to add a ring to an existing group, for example 4431 * from the aggregation driver. Allocate a new mac_ring_t 4432 * for that ring. 4433 */ 4434 ring = mac_init_ring(mip, group, index, cap_rings); 4435 ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT); 4436 } else { 4437 /* 4438 * The function is called as a result of a MAC layer request 4439 * to add a ring to an existing group. In this case the 4440 * ring is being moved between groups, which requires 4441 * the underlying driver to support dynamic grouping, 4442 * and the mac_ring_t already exists. 4443 */ 4444 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4445 ASSERT(group->mrg_driver == NULL || 4446 cap_rings->mr_gaddring != NULL); 4447 ASSERT(ring->mr_gh == NULL); 4448 } 4449 4450 /* 4451 * At this point the ring should not be in use, and it should be 4452 * of the right for the target group. 4453 */ 4454 ASSERT(ring->mr_state < MR_INUSE); 4455 ASSERT(ring->mr_srs == NULL); 4456 ASSERT(ring->mr_type == group->mrg_type); 4457 4458 if (!driver_call) { 4459 /* 4460 * Add the driver level hardware ring if the process was not 4461 * initiated by the driver, and the target group is not the 4462 * group. 4463 */ 4464 if (group->mrg_driver != NULL) { 4465 cap_rings->mr_gaddring(group->mrg_driver, 4466 ring->mr_driver, ring->mr_type); 4467 } 4468 4469 /* 4470 * Insert the ring ahead existing rings. 4471 */ 4472 ring->mr_next = group->mrg_rings; 4473 group->mrg_rings = ring; 4474 ring->mr_gh = (mac_group_handle_t)group; 4475 group->mrg_cur_count++; 4476 } 4477 4478 /* 4479 * If the group has not been actively used, we're done. 4480 */ 4481 if (group->mrg_index != -1 && 4482 group->mrg_state < MAC_GROUP_STATE_RESERVED) 4483 return (0); 4484 4485 /* 4486 * Start the ring if needed. Failure causes to undo the grouping action. 4487 */ 4488 if (ring->mr_state != MR_INUSE) { 4489 if ((ret = mac_start_ring(ring)) != 0) { 4490 if (!driver_call) { 4491 cap_rings->mr_gremring(group->mrg_driver, 4492 ring->mr_driver, ring->mr_type); 4493 } 4494 group->mrg_cur_count--; 4495 group->mrg_rings = ring->mr_next; 4496 4497 ring->mr_gh = NULL; 4498 4499 if (driver_call) 4500 mac_ring_free(mip, ring); 4501 4502 return (ret); 4503 } 4504 } 4505 4506 /* 4507 * Set up SRS/SR according to the ring type. 4508 */ 4509 switch (ring->mr_type) { 4510 case MAC_RING_TYPE_RX: 4511 /* 4512 * Setup SRS on top of the new ring if the group is 4513 * reserved for someones exclusive use. 4514 */ 4515 if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { 4516 mac_client_impl_t *mcip; 4517 4518 mcip = MAC_GROUP_ONLY_CLIENT(group); 4519 /* 4520 * Even though this group is reserved we migth still 4521 * have multiple clients, i.e a VLAN shares the 4522 * group with the primary mac client. 4523 */ 4524 if (mcip != NULL) { 4525 flent = mcip->mci_flent; 4526 ASSERT(flent->fe_rx_srs_cnt > 0); 4527 mac_rx_srs_group_setup(mcip, flent, SRST_LINK); 4528 mac_fanout_setup(mcip, flent, 4529 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, 4530 mcip, NULL, NULL); 4531 } else { 4532 ring->mr_classify_type = MAC_SW_CLASSIFIER; 4533 } 4534 } 4535 break; 4536 case MAC_RING_TYPE_TX: 4537 { 4538 mac_grp_client_t *mgcp = group->mrg_clients; 4539 mac_client_impl_t *mcip; 4540 mac_soft_ring_set_t *mac_srs; 4541 mac_srs_tx_t *tx; 4542 4543 if (MAC_GROUP_NO_CLIENT(group)) { 4544 if (ring->mr_state == MR_INUSE) 4545 mac_stop_ring(ring); 4546 ring->mr_flag = 0; 4547 break; 4548 } 4549 /* 4550 * If the rings are being moved to a group that has 4551 * clients using it, then add the new rings to the 4552 * clients SRS. 4553 */ 4554 while (mgcp != NULL) { 4555 boolean_t is_aggr; 4556 4557 mcip = mgcp->mgc_client; 4558 flent = mcip->mci_flent; 4559 is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); 4560 mac_srs = MCIP_TX_SRS(mcip); 4561 tx = &mac_srs->srs_tx; 4562 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4563 /* 4564 * If we are growing from 1 to multiple rings. 4565 */ 4566 if (tx->st_mode == SRS_TX_BW || 4567 tx->st_mode == SRS_TX_SERIALIZE || 4568 tx->st_mode == SRS_TX_DEFAULT) { 4569 mac_ring_t *tx_ring = tx->st_arg2; 4570 4571 tx->st_arg2 = NULL; 4572 mac_tx_srs_stat_recreate(mac_srs, B_TRUE); 4573 mac_tx_srs_add_ring(mac_srs, tx_ring); 4574 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4575 tx->st_mode = is_aggr ? SRS_TX_BW_AGGR : 4576 SRS_TX_BW_FANOUT; 4577 } else { 4578 tx->st_mode = is_aggr ? SRS_TX_AGGR : 4579 SRS_TX_FANOUT; 4580 } 4581 tx->st_func = mac_tx_get_func(tx->st_mode); 4582 } 4583 mac_tx_srs_add_ring(mac_srs, ring); 4584 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 4585 mac_rx_deliver, mcip, NULL, NULL); 4586 mac_tx_client_restart((mac_client_handle_t)mcip); 4587 mgcp = mgcp->mgc_next; 4588 } 4589 break; 4590 } 4591 default: 4592 ASSERT(B_FALSE); 4593 } 4594 /* 4595 * For aggr, the default ring will be NULL to begin with. If it 4596 * is NULL, then pick the first ring that gets added as the 4597 * default ring. Any ring in an aggregation can be removed at 4598 * any time (by the user action of removing a link) and if the 4599 * current default ring gets removed, then a new one gets 4600 * picked (see i_mac_group_rem_ring()). 4601 */ 4602 if (mip->mi_state_flags & MIS_IS_AGGR && 4603 mip->mi_default_tx_ring == NULL && 4604 ring->mr_type == MAC_RING_TYPE_TX) { 4605 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4606 } 4607 4608 MAC_RING_UNMARK(ring, MR_INCIPIENT); 4609 return (0); 4610 } 4611 4612 /* 4613 * Remove a ring from it's current group. MAC internal function for dynamic 4614 * grouping. 4615 * 4616 * The caller needs to call mac_perim_enter() before calling this function. 4617 */ 4618 void 4619 i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, 4620 boolean_t driver_call) 4621 { 4622 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4623 mac_capab_rings_t *cap_rings = NULL; 4624 mac_group_type_t group_type; 4625 4626 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4627 4628 ASSERT(mac_find_ring((mac_group_handle_t)group, 4629 ring->mr_index) == (mac_ring_handle_t)ring); 4630 ASSERT((mac_group_t *)ring->mr_gh == group); 4631 ASSERT(ring->mr_type == group->mrg_type); 4632 4633 if (ring->mr_state == MR_INUSE) 4634 mac_stop_ring(ring); 4635 switch (ring->mr_type) { 4636 case MAC_RING_TYPE_RX: 4637 group_type = mip->mi_rx_group_type; 4638 cap_rings = &mip->mi_rx_rings_cap; 4639 4640 /* 4641 * Only hardware classified packets hold a reference to the 4642 * ring all the way up the Rx path. mac_rx_srs_remove() 4643 * will take care of quiescing the Rx path and removing the 4644 * SRS. The software classified path neither holds a reference 4645 * nor any association with the ring in mac_rx. 4646 */ 4647 if (ring->mr_srs != NULL) { 4648 mac_rx_srs_remove(ring->mr_srs); 4649 ring->mr_srs = NULL; 4650 } 4651 4652 break; 4653 case MAC_RING_TYPE_TX: 4654 { 4655 mac_grp_client_t *mgcp; 4656 mac_client_impl_t *mcip; 4657 mac_soft_ring_set_t *mac_srs; 4658 mac_srs_tx_t *tx; 4659 mac_ring_t *rem_ring; 4660 mac_group_t *defgrp; 4661 uint_t ring_info = 0; 4662 4663 /* 4664 * For TX this function is invoked in three 4665 * cases: 4666 * 4667 * 1) In the case of a failure during the 4668 * initial creation of a group when a share is 4669 * associated with a MAC client. So the SRS is not 4670 * yet setup, and will be setup later after the 4671 * group has been reserved and populated. 4672 * 4673 * 2) From mac_release_tx_group() when freeing 4674 * a TX SRS. 4675 * 4676 * 3) In the case of aggr, when a port gets removed, 4677 * the pseudo Tx rings that it exposed gets removed. 4678 * 4679 * In the first two cases the SRS and its soft 4680 * rings are already quiesced. 4681 */ 4682 if (driver_call) { 4683 mac_client_impl_t *mcip; 4684 mac_soft_ring_set_t *mac_srs; 4685 mac_soft_ring_t *sringp; 4686 mac_srs_tx_t *srs_tx; 4687 4688 if (mip->mi_state_flags & MIS_IS_AGGR && 4689 mip->mi_default_tx_ring == 4690 (mac_ring_handle_t)ring) { 4691 /* pick a new default Tx ring */ 4692 mip->mi_default_tx_ring = 4693 (group->mrg_rings != ring) ? 4694 (mac_ring_handle_t)group->mrg_rings : 4695 (mac_ring_handle_t)(ring->mr_next); 4696 } 4697 /* Presently only aggr case comes here */ 4698 if (group->mrg_state != MAC_GROUP_STATE_RESERVED) 4699 break; 4700 4701 mcip = MAC_GROUP_ONLY_CLIENT(group); 4702 ASSERT(mcip != NULL); 4703 ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); 4704 mac_srs = MCIP_TX_SRS(mcip); 4705 ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || 4706 mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); 4707 srs_tx = &mac_srs->srs_tx; 4708 /* 4709 * Wakeup any callers blocked on this 4710 * Tx ring due to flow control. 4711 */ 4712 sringp = srs_tx->st_soft_rings[ring->mr_index]; 4713 ASSERT(sringp != NULL); 4714 mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)sringp); 4715 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4716 mac_tx_srs_del_ring(mac_srs, ring); 4717 mac_tx_client_restart((mac_client_handle_t)mcip); 4718 break; 4719 } 4720 ASSERT(ring != (mac_ring_t *)mip->mi_default_tx_ring); 4721 group_type = mip->mi_tx_group_type; 4722 cap_rings = &mip->mi_tx_rings_cap; 4723 /* 4724 * See if we need to take it out of the MAC clients using 4725 * this group 4726 */ 4727 if (MAC_GROUP_NO_CLIENT(group)) 4728 break; 4729 mgcp = group->mrg_clients; 4730 defgrp = MAC_DEFAULT_TX_GROUP(mip); 4731 while (mgcp != NULL) { 4732 mcip = mgcp->mgc_client; 4733 mac_srs = MCIP_TX_SRS(mcip); 4734 tx = &mac_srs->srs_tx; 4735 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4736 /* 4737 * If we are here when removing rings from the 4738 * defgroup, mac_reserve_tx_ring would have 4739 * already deleted the ring from the MAC 4740 * clients in the group. 4741 */ 4742 if (group != defgrp) { 4743 mac_tx_invoke_callbacks(mcip, 4744 (mac_tx_cookie_t) 4745 mac_tx_srs_get_soft_ring(mac_srs, ring)); 4746 mac_tx_srs_del_ring(mac_srs, ring); 4747 } 4748 /* 4749 * Additionally, if we are left with only 4750 * one ring in the group after this, we need 4751 * to modify the mode etc. to. (We haven't 4752 * yet taken the ring out, so we check with 2). 4753 */ 4754 if (group->mrg_cur_count == 2) { 4755 if (ring->mr_next == NULL) 4756 rem_ring = group->mrg_rings; 4757 else 4758 rem_ring = ring->mr_next; 4759 mac_tx_invoke_callbacks(mcip, 4760 (mac_tx_cookie_t) 4761 mac_tx_srs_get_soft_ring(mac_srs, 4762 rem_ring)); 4763 mac_tx_srs_del_ring(mac_srs, rem_ring); 4764 if (rem_ring->mr_state != MR_INUSE) { 4765 (void) mac_start_ring(rem_ring); 4766 } 4767 tx->st_arg2 = (void *)rem_ring; 4768 mac_tx_srs_stat_recreate(mac_srs, B_FALSE); 4769 ring_info = mac_hwring_getinfo( 4770 (mac_ring_handle_t)rem_ring); 4771 /* 4772 * We are shrinking from multiple 4773 * to 1 ring. 4774 */ 4775 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4776 tx->st_mode = SRS_TX_BW; 4777 } else if (mac_tx_serialize || 4778 (ring_info & MAC_RING_TX_SERIALIZE)) { 4779 tx->st_mode = SRS_TX_SERIALIZE; 4780 } else { 4781 tx->st_mode = SRS_TX_DEFAULT; 4782 } 4783 tx->st_func = mac_tx_get_func(tx->st_mode); 4784 } 4785 mac_tx_client_restart((mac_client_handle_t)mcip); 4786 mgcp = mgcp->mgc_next; 4787 } 4788 break; 4789 } 4790 default: 4791 ASSERT(B_FALSE); 4792 } 4793 4794 /* 4795 * Remove the ring from the group. 4796 */ 4797 if (ring == group->mrg_rings) 4798 group->mrg_rings = ring->mr_next; 4799 else { 4800 mac_ring_t *pre; 4801 4802 pre = group->mrg_rings; 4803 while (pre->mr_next != ring) 4804 pre = pre->mr_next; 4805 pre->mr_next = ring->mr_next; 4806 } 4807 group->mrg_cur_count--; 4808 4809 if (!driver_call) { 4810 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4811 ASSERT(group->mrg_driver == NULL || 4812 cap_rings->mr_gremring != NULL); 4813 4814 /* 4815 * Remove the driver level hardware ring. 4816 */ 4817 if (group->mrg_driver != NULL) { 4818 cap_rings->mr_gremring(group->mrg_driver, 4819 ring->mr_driver, ring->mr_type); 4820 } 4821 } 4822 4823 ring->mr_gh = NULL; 4824 if (driver_call) 4825 mac_ring_free(mip, ring); 4826 else 4827 ring->mr_flag = 0; 4828 } 4829 4830 /* 4831 * Move a ring to the target group. If needed, remove the ring from the group 4832 * that it currently belongs to. 4833 * 4834 * The caller need to enter MAC's perimeter by calling mac_perim_enter(). 4835 */ 4836 static int 4837 mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring) 4838 { 4839 mac_group_t *s_group = (mac_group_t *)ring->mr_gh; 4840 int rv; 4841 4842 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4843 ASSERT(d_group != NULL); 4844 ASSERT(s_group->mrg_mh == d_group->mrg_mh); 4845 4846 if (s_group == d_group) 4847 return (0); 4848 4849 /* 4850 * Remove it from current group first. 4851 */ 4852 if (s_group != NULL) 4853 i_mac_group_rem_ring(s_group, ring, B_FALSE); 4854 4855 /* 4856 * Add it to the new group. 4857 */ 4858 rv = i_mac_group_add_ring(d_group, ring, 0); 4859 if (rv != 0) { 4860 /* 4861 * Failed to add ring back to source group. If 4862 * that fails, the ring is stuck in limbo, log message. 4863 */ 4864 if (i_mac_group_add_ring(s_group, ring, 0)) { 4865 cmn_err(CE_WARN, "%s: failed to move ring %p\n", 4866 mip->mi_name, (void *)ring); 4867 } 4868 } 4869 4870 return (rv); 4871 } 4872 4873 /* 4874 * Find a MAC address according to its value. 4875 */ 4876 mac_address_t * 4877 mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr) 4878 { 4879 mac_address_t *map; 4880 4881 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4882 4883 for (map = mip->mi_addresses; map != NULL; map = map->ma_next) { 4884 if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) 4885 break; 4886 } 4887 4888 return (map); 4889 } 4890 4891 /* 4892 * Check whether the MAC address is shared by multiple clients. 4893 */ 4894 boolean_t 4895 mac_check_macaddr_shared(mac_address_t *map) 4896 { 4897 ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip)); 4898 4899 return (map->ma_nusers > 1); 4900 } 4901 4902 /* 4903 * Remove the specified MAC address from the MAC address list and free it. 4904 */ 4905 static void 4906 mac_free_macaddr(mac_address_t *map) 4907 { 4908 mac_impl_t *mip = map->ma_mip; 4909 4910 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4911 ASSERT(mip->mi_addresses != NULL); 4912 4913 map = mac_find_macaddr(mip, map->ma_addr); 4914 4915 ASSERT(map != NULL); 4916 ASSERT(map->ma_nusers == 0); 4917 4918 if (map == mip->mi_addresses) { 4919 mip->mi_addresses = map->ma_next; 4920 } else { 4921 mac_address_t *pre; 4922 4923 pre = mip->mi_addresses; 4924 while (pre->ma_next != map) 4925 pre = pre->ma_next; 4926 pre->ma_next = map->ma_next; 4927 } 4928 4929 kmem_free(map, sizeof (mac_address_t)); 4930 } 4931 4932 /* 4933 * Add a MAC address reference for a client. If the desired MAC address 4934 * exists, add a reference to it. Otherwise, add the new address by adding 4935 * it to a reserved group or setting promiscuous mode. Won't try different 4936 * group is the group is non-NULL, so the caller must explictly share 4937 * default group when needed. 4938 * 4939 * Note, the primary MAC address is initialized at registration time, so 4940 * to add it to default group only need to activate it if its reference 4941 * count is still zero. Also, some drivers may not have advertised RINGS 4942 * capability. 4943 */ 4944 int 4945 mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, 4946 boolean_t use_hw) 4947 { 4948 mac_address_t *map; 4949 int err = 0; 4950 boolean_t allocated_map = B_FALSE; 4951 4952 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4953 4954 map = mac_find_macaddr(mip, mac_addr); 4955 4956 /* 4957 * If the new MAC address has not been added. Allocate a new one 4958 * and set it up. 4959 */ 4960 if (map == NULL) { 4961 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 4962 map->ma_len = mip->mi_type->mt_addr_length; 4963 bcopy(mac_addr, map->ma_addr, map->ma_len); 4964 map->ma_nusers = 0; 4965 map->ma_group = group; 4966 map->ma_mip = mip; 4967 4968 /* add the new MAC address to the head of the address list */ 4969 map->ma_next = mip->mi_addresses; 4970 mip->mi_addresses = map; 4971 4972 allocated_map = B_TRUE; 4973 } 4974 4975 ASSERT(map->ma_group == NULL || map->ma_group == group); 4976 if (map->ma_group == NULL) 4977 map->ma_group = group; 4978 4979 /* 4980 * If the MAC address is already in use, simply account for the 4981 * new client. 4982 */ 4983 if (map->ma_nusers++ > 0) 4984 return (0); 4985 4986 /* 4987 * Activate this MAC address by adding it to the reserved group. 4988 */ 4989 if (group != NULL) { 4990 err = mac_group_addmac(group, (const uint8_t *)mac_addr); 4991 if (err == 0) { 4992 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 4993 return (0); 4994 } 4995 } 4996 4997 /* 4998 * The MAC address addition failed. If the client requires a 4999 * hardware classified MAC address, fail the operation. 5000 */ 5001 if (use_hw) { 5002 err = ENOSPC; 5003 goto bail; 5004 } 5005 5006 /* 5007 * Try promiscuous mode. 5008 * 5009 * For drivers that don't advertise RINGS capability, do 5010 * nothing for the primary address. 5011 */ 5012 if ((group == NULL) && 5013 (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { 5014 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 5015 return (0); 5016 } 5017 5018 /* 5019 * Enable promiscuous mode in order to receive traffic 5020 * to the new MAC address. 5021 */ 5022 if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) { 5023 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; 5024 return (0); 5025 } 5026 5027 /* 5028 * Free the MAC address that could not be added. Don't free 5029 * a pre-existing address, it could have been the entry 5030 * for the primary MAC address which was pre-allocated by 5031 * mac_init_macaddr(), and which must remain on the list. 5032 */ 5033 bail: 5034 map->ma_nusers--; 5035 if (allocated_map) 5036 mac_free_macaddr(map); 5037 return (err); 5038 } 5039 5040 /* 5041 * Remove a reference to a MAC address. This may cause to remove the MAC 5042 * address from an associated group or to turn off promiscuous mode. 5043 * The caller needs to handle the failure properly. 5044 */ 5045 int 5046 mac_remove_macaddr(mac_address_t *map) 5047 { 5048 mac_impl_t *mip = map->ma_mip; 5049 int err = 0; 5050 5051 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5052 5053 ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); 5054 5055 /* 5056 * If it's not the last client using this MAC address, only update 5057 * the MAC clients count. 5058 */ 5059 if (--map->ma_nusers > 0) 5060 return (0); 5061 5062 /* 5063 * The MAC address is no longer used by any MAC client, so remove 5064 * it from its associated group, or turn off promiscuous mode 5065 * if it was enabled for the MAC address. 5066 */ 5067 switch (map->ma_type) { 5068 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 5069 /* 5070 * Don't free the preset primary address for drivers that 5071 * don't advertise RINGS capability. 5072 */ 5073 if (map->ma_group == NULL) 5074 return (0); 5075 5076 err = mac_group_remmac(map->ma_group, map->ma_addr); 5077 if (err == 0) 5078 map->ma_group = NULL; 5079 break; 5080 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 5081 err = i_mac_promisc_set(mip, B_FALSE); 5082 break; 5083 default: 5084 ASSERT(B_FALSE); 5085 } 5086 5087 if (err != 0) 5088 return (err); 5089 5090 /* 5091 * We created MAC address for the primary one at registration, so we 5092 * won't free it here. mac_fini_macaddr() will take care of it. 5093 */ 5094 if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0) 5095 mac_free_macaddr(map); 5096 5097 return (0); 5098 } 5099 5100 /* 5101 * Update an existing MAC address. The caller need to make sure that the new 5102 * value has not been used. 5103 */ 5104 int 5105 mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) 5106 { 5107 mac_impl_t *mip = map->ma_mip; 5108 int err = 0; 5109 5110 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5111 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 5112 5113 switch (map->ma_type) { 5114 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 5115 /* 5116 * Update the primary address for drivers that are not 5117 * RINGS capable. 5118 */ 5119 if (mip->mi_rx_groups == NULL) { 5120 err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) 5121 mac_addr); 5122 if (err != 0) 5123 return (err); 5124 break; 5125 } 5126 5127 /* 5128 * If this MAC address is not currently in use, 5129 * simply break out and update the value. 5130 */ 5131 if (map->ma_nusers == 0) 5132 break; 5133 5134 /* 5135 * Need to replace the MAC address associated with a group. 5136 */ 5137 err = mac_group_remmac(map->ma_group, map->ma_addr); 5138 if (err != 0) 5139 return (err); 5140 5141 err = mac_group_addmac(map->ma_group, mac_addr); 5142 5143 /* 5144 * Failure hints hardware error. The MAC layer needs to 5145 * have error notification facility to handle this. 5146 * Now, simply try to restore the value. 5147 */ 5148 if (err != 0) 5149 (void) mac_group_addmac(map->ma_group, map->ma_addr); 5150 5151 break; 5152 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 5153 /* 5154 * Need to do nothing more if in promiscuous mode. 5155 */ 5156 break; 5157 default: 5158 ASSERT(B_FALSE); 5159 } 5160 5161 /* 5162 * Successfully replaced the MAC address. 5163 */ 5164 if (err == 0) 5165 bcopy(mac_addr, map->ma_addr, map->ma_len); 5166 5167 return (err); 5168 } 5169 5170 /* 5171 * Freshen the MAC address with new value. Its caller must have updated the 5172 * hardware MAC address before calling this function. 5173 * This funcitons is supposed to be used to handle the MAC address change 5174 * notification from underlying drivers. 5175 */ 5176 void 5177 mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr) 5178 { 5179 mac_impl_t *mip = map->ma_mip; 5180 5181 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5182 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 5183 5184 /* 5185 * Freshen the MAC address with new value. 5186 */ 5187 bcopy(mac_addr, map->ma_addr, map->ma_len); 5188 bcopy(mac_addr, mip->mi_addr, map->ma_len); 5189 5190 /* 5191 * Update all MAC clients that share this MAC address. 5192 */ 5193 mac_unicast_update_clients(mip, map); 5194 } 5195 5196 /* 5197 * Set up the primary MAC address. 5198 */ 5199 void 5200 mac_init_macaddr(mac_impl_t *mip) 5201 { 5202 mac_address_t *map; 5203 5204 /* 5205 * The reference count is initialized to zero, until it's really 5206 * activated. 5207 */ 5208 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 5209 map->ma_len = mip->mi_type->mt_addr_length; 5210 bcopy(mip->mi_addr, map->ma_addr, map->ma_len); 5211 5212 /* 5213 * If driver advertises RINGS capability, it shouldn't have initialized 5214 * its primary MAC address. For other drivers, including VNIC, the 5215 * primary address must work after registration. 5216 */ 5217 if (mip->mi_rx_groups == NULL) 5218 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 5219 5220 map->ma_mip = mip; 5221 5222 mip->mi_addresses = map; 5223 } 5224 5225 /* 5226 * Clean up the primary MAC address. Note, only one primary MAC address 5227 * is allowed. All other MAC addresses must have been freed appropriately. 5228 */ 5229 void 5230 mac_fini_macaddr(mac_impl_t *mip) 5231 { 5232 mac_address_t *map = mip->mi_addresses; 5233 5234 if (map == NULL) 5235 return; 5236 5237 /* 5238 * If mi_addresses is initialized, there should be exactly one 5239 * entry left on the list with no users. 5240 */ 5241 ASSERT(map->ma_nusers == 0); 5242 ASSERT(map->ma_next == NULL); 5243 5244 kmem_free(map, sizeof (mac_address_t)); 5245 mip->mi_addresses = NULL; 5246 } 5247 5248 /* 5249 * Logging related functions. 5250 * 5251 * Note that Kernel statistics have been extended to maintain fine 5252 * granularity of statistics viz. hardware lane, software lane, fanout 5253 * stats etc. However, extended accounting continues to support only 5254 * aggregate statistics like before. 5255 */ 5256 5257 /* Write the flow description to a netinfo_t record */ 5258 static netinfo_t * 5259 mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) 5260 { 5261 netinfo_t *ninfo; 5262 net_desc_t *ndesc; 5263 flow_desc_t *fdesc; 5264 mac_resource_props_t *mrp; 5265 5266 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5267 if (ninfo == NULL) 5268 return (NULL); 5269 ndesc = kmem_zalloc(sizeof (net_desc_t), KM_NOSLEEP); 5270 if (ndesc == NULL) { 5271 kmem_free(ninfo, sizeof (netinfo_t)); 5272 return (NULL); 5273 } 5274 5275 /* 5276 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5277 * Updates to the fe_flow_desc are done under the fe_lock 5278 */ 5279 mutex_enter(&flent->fe_lock); 5280 fdesc = &flent->fe_flow_desc; 5281 mrp = &flent->fe_resource_props; 5282 5283 ndesc->nd_name = flent->fe_flow_name; 5284 ndesc->nd_devname = mcip->mci_name; 5285 bcopy(fdesc->fd_src_mac, ndesc->nd_ehost, ETHERADDRL); 5286 bcopy(fdesc->fd_dst_mac, ndesc->nd_edest, ETHERADDRL); 5287 ndesc->nd_sap = htonl(fdesc->fd_sap); 5288 ndesc->nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION; 5289 ndesc->nd_bw_limit = mrp->mrp_maxbw; 5290 if (ndesc->nd_isv4) { 5291 ndesc->nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]); 5292 ndesc->nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]); 5293 } else { 5294 bcopy(&fdesc->fd_local_addr, ndesc->nd_saddr, IPV6_ADDR_LEN); 5295 bcopy(&fdesc->fd_remote_addr, ndesc->nd_daddr, IPV6_ADDR_LEN); 5296 } 5297 ndesc->nd_sport = htons(fdesc->fd_local_port); 5298 ndesc->nd_dport = htons(fdesc->fd_remote_port); 5299 ndesc->nd_protocol = (uint8_t)fdesc->fd_protocol; 5300 mutex_exit(&flent->fe_lock); 5301 5302 ninfo->ni_record = ndesc; 5303 ninfo->ni_size = sizeof (net_desc_t); 5304 ninfo->ni_type = EX_NET_FLDESC_REC; 5305 5306 return (ninfo); 5307 } 5308 5309 /* Write the flow statistics to a netinfo_t record */ 5310 static netinfo_t * 5311 mac_write_flow_stats(flow_entry_t *flent) 5312 { 5313 netinfo_t *ninfo; 5314 net_stat_t *nstat; 5315 mac_soft_ring_set_t *mac_srs; 5316 mac_rx_stats_t *mac_rx_stat; 5317 mac_tx_stats_t *mac_tx_stat; 5318 int i; 5319 5320 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5321 if (ninfo == NULL) 5322 return (NULL); 5323 nstat = kmem_zalloc(sizeof (net_stat_t), KM_NOSLEEP); 5324 if (nstat == NULL) { 5325 kmem_free(ninfo, sizeof (netinfo_t)); 5326 return (NULL); 5327 } 5328 5329 nstat->ns_name = flent->fe_flow_name; 5330 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5331 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5332 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5333 5334 nstat->ns_ibytes += mac_rx_stat->mrs_intrbytes + 5335 mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; 5336 nstat->ns_ipackets += mac_rx_stat->mrs_intrcnt + 5337 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5338 nstat->ns_oerrors += mac_rx_stat->mrs_ierrors; 5339 } 5340 5341 mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); 5342 if (mac_srs != NULL) { 5343 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5344 5345 nstat->ns_obytes = mac_tx_stat->mts_obytes; 5346 nstat->ns_opackets = mac_tx_stat->mts_opackets; 5347 nstat->ns_oerrors = mac_tx_stat->mts_oerrors; 5348 } 5349 5350 ninfo->ni_record = nstat; 5351 ninfo->ni_size = sizeof (net_stat_t); 5352 ninfo->ni_type = EX_NET_FLSTAT_REC; 5353 5354 return (ninfo); 5355 } 5356 5357 /* Write the link description to a netinfo_t record */ 5358 static netinfo_t * 5359 mac_write_link_desc(mac_client_impl_t *mcip) 5360 { 5361 netinfo_t *ninfo; 5362 net_desc_t *ndesc; 5363 flow_entry_t *flent = mcip->mci_flent; 5364 5365 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5366 if (ninfo == NULL) 5367 return (NULL); 5368 ndesc = kmem_zalloc(sizeof (net_desc_t), KM_NOSLEEP); 5369 if (ndesc == NULL) { 5370 kmem_free(ninfo, sizeof (netinfo_t)); 5371 return (NULL); 5372 } 5373 5374 ndesc->nd_name = mcip->mci_name; 5375 ndesc->nd_devname = mcip->mci_name; 5376 ndesc->nd_isv4 = B_TRUE; 5377 /* 5378 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5379 * Updates to the fe_flow_desc are done under the fe_lock 5380 * after removing the flent from the flow table. 5381 */ 5382 mutex_enter(&flent->fe_lock); 5383 bcopy(flent->fe_flow_desc.fd_src_mac, ndesc->nd_ehost, ETHERADDRL); 5384 mutex_exit(&flent->fe_lock); 5385 5386 ninfo->ni_record = ndesc; 5387 ninfo->ni_size = sizeof (net_desc_t); 5388 ninfo->ni_type = EX_NET_LNDESC_REC; 5389 5390 return (ninfo); 5391 } 5392 5393 /* Write the link statistics to a netinfo_t record */ 5394 static netinfo_t * 5395 mac_write_link_stats(mac_client_impl_t *mcip) 5396 { 5397 netinfo_t *ninfo; 5398 net_stat_t *nstat; 5399 flow_entry_t *flent; 5400 mac_soft_ring_set_t *mac_srs; 5401 mac_rx_stats_t *mac_rx_stat; 5402 mac_tx_stats_t *mac_tx_stat; 5403 int i; 5404 5405 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5406 if (ninfo == NULL) 5407 return (NULL); 5408 nstat = kmem_zalloc(sizeof (net_stat_t), KM_NOSLEEP); 5409 if (nstat == NULL) { 5410 kmem_free(ninfo, sizeof (netinfo_t)); 5411 return (NULL); 5412 } 5413 5414 nstat->ns_name = mcip->mci_name; 5415 flent = mcip->mci_flent; 5416 if (flent != NULL) { 5417 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5418 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5419 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5420 5421 nstat->ns_ibytes += mac_rx_stat->mrs_intrbytes + 5422 mac_rx_stat->mrs_pollbytes + 5423 mac_rx_stat->mrs_lclbytes; 5424 nstat->ns_ipackets += mac_rx_stat->mrs_intrcnt + 5425 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5426 nstat->ns_oerrors += mac_rx_stat->mrs_ierrors; 5427 } 5428 } 5429 5430 mac_srs = (mac_soft_ring_set_t *)(mcip->mci_flent->fe_tx_srs); 5431 if (mac_srs != NULL) { 5432 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5433 5434 nstat->ns_obytes = mac_tx_stat->mts_obytes; 5435 nstat->ns_opackets = mac_tx_stat->mts_opackets; 5436 nstat->ns_oerrors = mac_tx_stat->mts_oerrors; 5437 } 5438 5439 ninfo->ni_record = nstat; 5440 ninfo->ni_size = sizeof (net_stat_t); 5441 ninfo->ni_type = EX_NET_LNSTAT_REC; 5442 5443 return (ninfo); 5444 } 5445 5446 typedef struct i_mac_log_state_s { 5447 boolean_t mi_last; 5448 int mi_fenable; 5449 int mi_lenable; 5450 list_t *mi_list; 5451 } i_mac_log_state_t; 5452 5453 /* 5454 * For a given flow, if the description has not been logged before, do it now. 5455 * If it is a VNIC, then we have collected information about it from the MAC 5456 * table, so skip it. 5457 * 5458 * Called through mac_flow_walk_nolock() 5459 * 5460 * Return 0 if successful. 5461 */ 5462 static int 5463 mac_log_flowinfo(flow_entry_t *flent, void *arg) 5464 { 5465 mac_client_impl_t *mcip = flent->fe_mcip; 5466 i_mac_log_state_t *lstate = arg; 5467 netinfo_t *ninfo; 5468 5469 if (mcip == NULL) 5470 return (0); 5471 5472 /* 5473 * If the name starts with "vnic", and fe_user_generated is true (to 5474 * exclude the mcast and active flow entries created implicitly for 5475 * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow, 5476 * vnic/bge1/mcast1 is not and neither is vnic/bge1/active. 5477 */ 5478 if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 && 5479 (flent->fe_type & FLOW_USER) != 0) { 5480 return (0); 5481 } 5482 5483 if (!flent->fe_desc_logged) { 5484 /* 5485 * We don't return error because we want to continue the 5486 * walk in case this is the last walk which means we 5487 * need to reset fe_desc_logged in all the flows. 5488 */ 5489 if ((ninfo = mac_write_flow_desc(flent, mcip)) == NULL) 5490 return (0); 5491 list_insert_tail(lstate->mi_list, ninfo); 5492 flent->fe_desc_logged = B_TRUE; 5493 } 5494 5495 /* 5496 * Regardless of the error, we want to proceed in case we have to 5497 * reset fe_desc_logged. 5498 */ 5499 ninfo = mac_write_flow_stats(flent); 5500 if (ninfo == NULL) 5501 return (-1); 5502 5503 list_insert_tail(lstate->mi_list, ninfo); 5504 5505 if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED)) 5506 flent->fe_desc_logged = B_FALSE; 5507 5508 return (0); 5509 } 5510 5511 /* 5512 * Log the description for each mac client of this mac_impl_t, if it 5513 * hasn't already been done. Additionally, log statistics for the link as 5514 * well. Walk the flow table and log information for each flow as well. 5515 * If it is the last walk (mci_last), then we turn off mci_desc_logged (and 5516 * also fe_desc_logged, if flow logging is on) since we want to log the 5517 * description if and when logging is restarted. 5518 * 5519 * Return 0 upon success or -1 upon failure 5520 */ 5521 static int 5522 i_mac_impl_log(mac_impl_t *mip, i_mac_log_state_t *lstate) 5523 { 5524 mac_client_impl_t *mcip; 5525 netinfo_t *ninfo; 5526 5527 i_mac_perim_enter(mip); 5528 /* 5529 * Only walk the client list for NIC and etherstub 5530 */ 5531 if ((mip->mi_state_flags & MIS_DISABLED) || 5532 ((mip->mi_state_flags & MIS_IS_VNIC) && 5533 (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL))) { 5534 i_mac_perim_exit(mip); 5535 return (0); 5536 } 5537 5538 for (mcip = mip->mi_clients_list; mcip != NULL; 5539 mcip = mcip->mci_client_next) { 5540 if (!MCIP_DATAPATH_SETUP(mcip)) 5541 continue; 5542 if (lstate->mi_lenable) { 5543 if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) { 5544 ninfo = mac_write_link_desc(mcip); 5545 if (ninfo == NULL) { 5546 /* 5547 * We can't terminate it if this is the last 5548 * walk, else there might be some links with 5549 * mi_desc_logged set to true, which means 5550 * their description won't be logged the next 5551 * time logging is started (similarly for the 5552 * flows within such links). We can continue 5553 * without walking the flow table (i.e. to 5554 * set fe_desc_logged to false) because we 5555 * won't have written any flow stuff for this 5556 * link as we haven't logged the link itself. 5557 */ 5558 i_mac_perim_exit(mip); 5559 if (lstate->mi_last) 5560 return (0); 5561 else 5562 return (-1); 5563 } 5564 mcip->mci_state_flags |= MCIS_DESC_LOGGED; 5565 list_insert_tail(lstate->mi_list, ninfo); 5566 } 5567 } 5568 5569 ninfo = mac_write_link_stats(mcip); 5570 if (ninfo == NULL && !lstate->mi_last) { 5571 i_mac_perim_exit(mip); 5572 return (-1); 5573 } 5574 list_insert_tail(lstate->mi_list, ninfo); 5575 5576 if (lstate->mi_last) 5577 mcip->mci_state_flags &= ~MCIS_DESC_LOGGED; 5578 5579 if (lstate->mi_fenable) { 5580 if (mcip->mci_subflow_tab != NULL) { 5581 (void) mac_flow_walk_nolock( 5582 mcip->mci_subflow_tab, mac_log_flowinfo, 5583 lstate); 5584 } 5585 } 5586 } 5587 i_mac_perim_exit(mip); 5588 return (0); 5589 } 5590 5591 /* 5592 * modhash walker function to add a mac_impl_t to a list 5593 */ 5594 /*ARGSUSED*/ 5595 static uint_t 5596 i_mac_impl_list_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 5597 { 5598 list_t *list = (list_t *)arg; 5599 mac_impl_t *mip = (mac_impl_t *)val; 5600 5601 if ((mip->mi_state_flags & MIS_DISABLED) == 0) { 5602 list_insert_tail(list, mip); 5603 mip->mi_ref++; 5604 } 5605 5606 return (MH_WALK_CONTINUE); 5607 } 5608 5609 void 5610 i_mac_log_info(list_t *net_log_list, i_mac_log_state_t *lstate) 5611 { 5612 list_t mac_impl_list; 5613 mac_impl_t *mip; 5614 netinfo_t *ninfo; 5615 5616 /* Create list of mac_impls */ 5617 ASSERT(RW_LOCK_HELD(&i_mac_impl_lock)); 5618 list_create(&mac_impl_list, sizeof (mac_impl_t), offsetof(mac_impl_t, 5619 mi_node)); 5620 mod_hash_walk(i_mac_impl_hash, i_mac_impl_list_walker, &mac_impl_list); 5621 rw_exit(&i_mac_impl_lock); 5622 5623 /* Create log entries for each mac_impl */ 5624 for (mip = list_head(&mac_impl_list); mip != NULL; 5625 mip = list_next(&mac_impl_list, mip)) { 5626 if (i_mac_impl_log(mip, lstate) != 0) 5627 continue; 5628 } 5629 5630 /* Remove elements and destroy list of mac_impls */ 5631 rw_enter(&i_mac_impl_lock, RW_WRITER); 5632 while ((mip = list_remove_tail(&mac_impl_list)) != NULL) { 5633 mip->mi_ref--; 5634 } 5635 rw_exit(&i_mac_impl_lock); 5636 list_destroy(&mac_impl_list); 5637 5638 /* 5639 * Write log entries to files outside of locks, free associated 5640 * structures, and remove entries from the list. 5641 */ 5642 while ((ninfo = list_head(net_log_list)) != NULL) { 5643 (void) exacct_commit_netinfo(ninfo->ni_record, ninfo->ni_type); 5644 list_remove(net_log_list, ninfo); 5645 kmem_free(ninfo->ni_record, ninfo->ni_size); 5646 kmem_free(ninfo, sizeof (*ninfo)); 5647 } 5648 list_destroy(net_log_list); 5649 } 5650 5651 /* 5652 * The timer thread that runs every mac_logging_interval seconds and logs 5653 * link and/or flow information. 5654 */ 5655 /* ARGSUSED */ 5656 void 5657 mac_log_linkinfo(void *arg) 5658 { 5659 i_mac_log_state_t lstate; 5660 list_t net_log_list; 5661 5662 list_create(&net_log_list, sizeof (netinfo_t), 5663 offsetof(netinfo_t, ni_link)); 5664 5665 rw_enter(&i_mac_impl_lock, RW_READER); 5666 if (!mac_flow_log_enable && !mac_link_log_enable) { 5667 rw_exit(&i_mac_impl_lock); 5668 return; 5669 } 5670 lstate.mi_fenable = mac_flow_log_enable; 5671 lstate.mi_lenable = mac_link_log_enable; 5672 lstate.mi_last = B_FALSE; 5673 lstate.mi_list = &net_log_list; 5674 5675 /* Write log entries for each mac_impl in the list */ 5676 i_mac_log_info(&net_log_list, &lstate); 5677 5678 if (mac_flow_log_enable || mac_link_log_enable) { 5679 mac_logging_timer = timeout(mac_log_linkinfo, NULL, 5680 SEC_TO_TICK(mac_logging_interval)); 5681 } 5682 } 5683 5684 typedef struct i_mac_fastpath_state_s { 5685 boolean_t mf_disable; 5686 int mf_err; 5687 } i_mac_fastpath_state_t; 5688 5689 /* modhash walker function to enable or disable fastpath */ 5690 /*ARGSUSED*/ 5691 static uint_t 5692 i_mac_fastpath_walker(mod_hash_key_t key, mod_hash_val_t *val, 5693 void *arg) 5694 { 5695 i_mac_fastpath_state_t *state = arg; 5696 mac_handle_t mh = (mac_handle_t)val; 5697 5698 if (state->mf_disable) 5699 state->mf_err = mac_fastpath_disable(mh); 5700 else 5701 mac_fastpath_enable(mh); 5702 5703 return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 5704 } 5705 5706 /* 5707 * Start the logging timer. 5708 */ 5709 int 5710 mac_start_logusage(mac_logtype_t type, uint_t interval) 5711 { 5712 i_mac_fastpath_state_t dstate = {B_TRUE, 0}; 5713 i_mac_fastpath_state_t estate = {B_FALSE, 0}; 5714 int err; 5715 5716 rw_enter(&i_mac_impl_lock, RW_WRITER); 5717 switch (type) { 5718 case MAC_LOGTYPE_FLOW: 5719 if (mac_flow_log_enable) { 5720 rw_exit(&i_mac_impl_lock); 5721 return (0); 5722 } 5723 /* FALLTHRU */ 5724 case MAC_LOGTYPE_LINK: 5725 if (mac_link_log_enable) { 5726 rw_exit(&i_mac_impl_lock); 5727 return (0); 5728 } 5729 break; 5730 default: 5731 ASSERT(0); 5732 } 5733 5734 /* Disable fastpath */ 5735 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &dstate); 5736 if ((err = dstate.mf_err) != 0) { 5737 /* Reenable fastpath */ 5738 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); 5739 rw_exit(&i_mac_impl_lock); 5740 return (err); 5741 } 5742 5743 switch (type) { 5744 case MAC_LOGTYPE_FLOW: 5745 mac_flow_log_enable = B_TRUE; 5746 /* FALLTHRU */ 5747 case MAC_LOGTYPE_LINK: 5748 mac_link_log_enable = B_TRUE; 5749 break; 5750 } 5751 5752 mac_logging_interval = interval; 5753 rw_exit(&i_mac_impl_lock); 5754 mac_log_linkinfo(NULL); 5755 return (0); 5756 } 5757 5758 /* 5759 * Stop the logging timer if both link and flow logging are turned off. 5760 */ 5761 void 5762 mac_stop_logusage(mac_logtype_t type) 5763 { 5764 i_mac_log_state_t lstate; 5765 i_mac_fastpath_state_t estate = {B_FALSE, 0}; 5766 list_t net_log_list; 5767 5768 list_create(&net_log_list, sizeof (netinfo_t), 5769 offsetof(netinfo_t, ni_link)); 5770 5771 rw_enter(&i_mac_impl_lock, RW_WRITER); 5772 5773 lstate.mi_fenable = mac_flow_log_enable; 5774 lstate.mi_lenable = mac_link_log_enable; 5775 lstate.mi_list = &net_log_list; 5776 5777 /* Last walk */ 5778 lstate.mi_last = B_TRUE; 5779 5780 switch (type) { 5781 case MAC_LOGTYPE_FLOW: 5782 if (lstate.mi_fenable) { 5783 ASSERT(mac_link_log_enable); 5784 mac_flow_log_enable = B_FALSE; 5785 mac_link_log_enable = B_FALSE; 5786 break; 5787 } 5788 /* FALLTHRU */ 5789 case MAC_LOGTYPE_LINK: 5790 if (!lstate.mi_lenable || mac_flow_log_enable) { 5791 rw_exit(&i_mac_impl_lock); 5792 return; 5793 } 5794 mac_link_log_enable = B_FALSE; 5795 break; 5796 default: 5797 ASSERT(0); 5798 } 5799 5800 /* Reenable fastpath */ 5801 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); 5802 5803 (void) untimeout(mac_logging_timer); 5804 mac_logging_timer = 0; 5805 5806 /* Write log entries for each mac_impl in the list */ 5807 i_mac_log_info(&net_log_list, &lstate); 5808 } 5809 5810 /* 5811 * Walk the rx and tx SRS/SRs for a flow and update the priority value. 5812 */ 5813 void 5814 mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) 5815 { 5816 pri_t pri; 5817 int count; 5818 mac_soft_ring_set_t *mac_srs; 5819 5820 if (flent->fe_rx_srs_cnt <= 0) 5821 return; 5822 5823 if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type == 5824 SRST_FLOW) { 5825 pri = FLOW_PRIORITY(mcip->mci_min_pri, 5826 mcip->mci_max_pri, 5827 flent->fe_resource_props.mrp_priority); 5828 } else { 5829 pri = mcip->mci_max_pri; 5830 } 5831 5832 for (count = 0; count < flent->fe_rx_srs_cnt; count++) { 5833 mac_srs = flent->fe_rx_srs[count]; 5834 mac_update_srs_priority(mac_srs, pri); 5835 } 5836 /* 5837 * If we have a Tx SRS, we need to modify all the threads associated 5838 * with it. 5839 */ 5840 if (flent->fe_tx_srs != NULL) 5841 mac_update_srs_priority(flent->fe_tx_srs, pri); 5842 } 5843 5844 /* 5845 * RX and TX rings are reserved according to different semantics depending 5846 * on the requests from the MAC clients and type of rings: 5847 * 5848 * On the Tx side, by default we reserve individual rings, independently from 5849 * the groups. 5850 * 5851 * On the Rx side, the reservation is at the granularity of the group 5852 * of rings, and used for v12n level 1 only. It has a special case for the 5853 * primary client. 5854 * 5855 * If a share is allocated to a MAC client, we allocate a TX group and an 5856 * RX group to the client, and assign TX rings and RX rings to these 5857 * groups according to information gathered from the driver through 5858 * the share capability. 5859 * 5860 * The foreseable evolution of Rx rings will handle v12n level 2 and higher 5861 * to allocate individual rings out of a group and program the hw classifier 5862 * based on IP address or higher level criteria. 5863 */ 5864 5865 /* 5866 * mac_reserve_tx_ring() 5867 * Reserve a unused ring by marking it with MR_INUSE state. 5868 * As reserved, the ring is ready to function. 5869 * 5870 * Notes for Hybrid I/O: 5871 * 5872 * If a specific ring is needed, it is specified through the desired_ring 5873 * argument. Otherwise that argument is set to NULL. 5874 * If the desired ring was previous allocated to another client, this 5875 * function swaps it with a new ring from the group of unassigned rings. 5876 */ 5877 mac_ring_t * 5878 mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) 5879 { 5880 mac_group_t *group; 5881 mac_grp_client_t *mgcp; 5882 mac_client_impl_t *mcip; 5883 mac_soft_ring_set_t *srs; 5884 5885 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5886 5887 /* 5888 * Find an available ring and start it before changing its status. 5889 * The unassigned rings are at the end of the mi_tx_groups 5890 * array. 5891 */ 5892 group = MAC_DEFAULT_TX_GROUP(mip); 5893 5894 /* Can't take the default ring out of the default group */ 5895 ASSERT(desired_ring != (mac_ring_t *)mip->mi_default_tx_ring); 5896 5897 if (desired_ring->mr_state == MR_FREE) { 5898 ASSERT(MAC_GROUP_NO_CLIENT(group)); 5899 if (mac_start_ring(desired_ring) != 0) 5900 return (NULL); 5901 return (desired_ring); 5902 } 5903 /* 5904 * There are clients using this ring, so let's move the clients 5905 * away from using this ring. 5906 */ 5907 for (mgcp = group->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 5908 mcip = mgcp->mgc_client; 5909 mac_tx_client_quiesce((mac_client_handle_t)mcip); 5910 srs = MCIP_TX_SRS(mcip); 5911 ASSERT(mac_tx_srs_ring_present(srs, desired_ring)); 5912 mac_tx_invoke_callbacks(mcip, 5913 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(srs, 5914 desired_ring)); 5915 mac_tx_srs_del_ring(srs, desired_ring); 5916 mac_tx_client_restart((mac_client_handle_t)mcip); 5917 } 5918 return (desired_ring); 5919 } 5920 5921 /* 5922 * For a reserved group with multiple clients, return the primary client. 5923 */ 5924 static mac_client_impl_t * 5925 mac_get_grp_primary(mac_group_t *grp) 5926 { 5927 mac_grp_client_t *mgcp = grp->mrg_clients; 5928 mac_client_impl_t *mcip; 5929 5930 while (mgcp != NULL) { 5931 mcip = mgcp->mgc_client; 5932 if (mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) 5933 return (mcip); 5934 mgcp = mgcp->mgc_next; 5935 } 5936 return (NULL); 5937 } 5938 5939 /* 5940 * Hybrid I/O specifies the ring that should be given to a share. 5941 * If the ring is already used by clients, then we need to release 5942 * the ring back to the default group so that we can give it to 5943 * the share. This means the clients using this ring now get a 5944 * replacement ring. If there aren't any replacement rings, this 5945 * function returns a failure. 5946 */ 5947 static int 5948 mac_reclaim_ring_from_grp(mac_impl_t *mip, mac_ring_type_t ring_type, 5949 mac_ring_t *ring, mac_ring_t **rings, int nrings) 5950 { 5951 mac_group_t *group = (mac_group_t *)ring->mr_gh; 5952 mac_resource_props_t *mrp; 5953 mac_client_impl_t *mcip; 5954 mac_group_t *defgrp; 5955 mac_ring_t *tring; 5956 mac_group_t *tgrp; 5957 int i; 5958 int j; 5959 5960 mcip = MAC_GROUP_ONLY_CLIENT(group); 5961 if (mcip == NULL) 5962 mcip = mac_get_grp_primary(group); 5963 ASSERT(mcip != NULL); 5964 ASSERT(mcip->mci_share == NULL); 5965 5966 mrp = MCIP_RESOURCE_PROPS(mcip); 5967 if (ring_type == MAC_RING_TYPE_RX) { 5968 defgrp = mip->mi_rx_donor_grp; 5969 if ((mrp->mrp_mask & MRP_RX_RINGS) == 0) { 5970 /* Need to put this mac client in the default group */ 5971 if (mac_rx_switch_group(mcip, group, defgrp) != 0) 5972 return (ENOSPC); 5973 } else { 5974 /* 5975 * Switch this ring with some other ring from 5976 * the default group. 5977 */ 5978 for (tring = defgrp->mrg_rings; tring != NULL; 5979 tring = tring->mr_next) { 5980 if (tring->mr_index == 0) 5981 continue; 5982 for (j = 0; j < nrings; j++) { 5983 if (rings[j] == tring) 5984 break; 5985 } 5986 if (j >= nrings) 5987 break; 5988 } 5989 if (tring == NULL) 5990 return (ENOSPC); 5991 if (mac_group_mov_ring(mip, group, tring) != 0) 5992 return (ENOSPC); 5993 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 5994 (void) mac_group_mov_ring(mip, defgrp, tring); 5995 return (ENOSPC); 5996 } 5997 } 5998 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 5999 return (0); 6000 } 6001 6002 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6003 if (ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6004 /* 6005 * See if we can get a spare ring to replace the default 6006 * ring. 6007 */ 6008 if (defgrp->mrg_cur_count == 1) { 6009 /* 6010 * Need to get a ring from another client, see if 6011 * there are any clients that can be moved to 6012 * the default group, thereby freeing some rings. 6013 */ 6014 for (i = 0; i < mip->mi_tx_group_count; i++) { 6015 tgrp = &mip->mi_tx_groups[i]; 6016 if (tgrp->mrg_state == 6017 MAC_GROUP_STATE_REGISTERED) { 6018 continue; 6019 } 6020 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 6021 if (mcip == NULL) 6022 mcip = mac_get_grp_primary(tgrp); 6023 ASSERT(mcip != NULL); 6024 mrp = MCIP_RESOURCE_PROPS(mcip); 6025 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 6026 ASSERT(tgrp->mrg_cur_count == 1); 6027 /* 6028 * If this ring is part of the 6029 * rings asked by the share we cannot 6030 * use it as the default ring. 6031 */ 6032 for (j = 0; j < nrings; j++) { 6033 if (rings[j] == tgrp->mrg_rings) 6034 break; 6035 } 6036 if (j < nrings) 6037 continue; 6038 mac_tx_client_quiesce( 6039 (mac_client_handle_t)mcip); 6040 mac_tx_switch_group(mcip, tgrp, 6041 defgrp); 6042 mac_tx_client_restart( 6043 (mac_client_handle_t)mcip); 6044 break; 6045 } 6046 } 6047 /* 6048 * All the rings are reserved, can't give up the 6049 * default ring. 6050 */ 6051 if (defgrp->mrg_cur_count <= 1) 6052 return (ENOSPC); 6053 } 6054 /* 6055 * Swap the default ring with another. 6056 */ 6057 for (tring = defgrp->mrg_rings; tring != NULL; 6058 tring = tring->mr_next) { 6059 /* 6060 * If this ring is part of the rings asked by the 6061 * share we cannot use it as the default ring. 6062 */ 6063 for (j = 0; j < nrings; j++) { 6064 if (rings[j] == tring) 6065 break; 6066 } 6067 if (j >= nrings) 6068 break; 6069 } 6070 ASSERT(tring != NULL); 6071 mip->mi_default_tx_ring = (mac_ring_handle_t)tring; 6072 return (0); 6073 } 6074 /* 6075 * The Tx ring is with a group reserved by a MAC client. See if 6076 * we can swap it. 6077 */ 6078 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 6079 mcip = MAC_GROUP_ONLY_CLIENT(group); 6080 if (mcip == NULL) 6081 mcip = mac_get_grp_primary(group); 6082 ASSERT(mcip != NULL); 6083 mrp = MCIP_RESOURCE_PROPS(mcip); 6084 mac_tx_client_quiesce((mac_client_handle_t)mcip); 6085 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 6086 ASSERT(group->mrg_cur_count == 1); 6087 /* Put this mac client in the default group */ 6088 mac_tx_switch_group(mcip, group, defgrp); 6089 } else { 6090 /* 6091 * Switch this ring with some other ring from 6092 * the default group. 6093 */ 6094 for (tring = defgrp->mrg_rings; tring != NULL; 6095 tring = tring->mr_next) { 6096 if (tring == (mac_ring_t *)mip->mi_default_tx_ring) 6097 continue; 6098 /* 6099 * If this ring is part of the rings asked by the 6100 * share we cannot use it for swapping. 6101 */ 6102 for (j = 0; j < nrings; j++) { 6103 if (rings[j] == tring) 6104 break; 6105 } 6106 if (j >= nrings) 6107 break; 6108 } 6109 if (tring == NULL) { 6110 mac_tx_client_restart((mac_client_handle_t)mcip); 6111 return (ENOSPC); 6112 } 6113 if (mac_group_mov_ring(mip, group, tring) != 0) { 6114 mac_tx_client_restart((mac_client_handle_t)mcip); 6115 return (ENOSPC); 6116 } 6117 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 6118 (void) mac_group_mov_ring(mip, defgrp, tring); 6119 mac_tx_client_restart((mac_client_handle_t)mcip); 6120 return (ENOSPC); 6121 } 6122 } 6123 mac_tx_client_restart((mac_client_handle_t)mcip); 6124 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 6125 return (0); 6126 } 6127 6128 /* 6129 * Populate a zero-ring group with rings. If the share is non-NULL, 6130 * the rings are chosen according to that share. 6131 * Invoked after allocating a new RX or TX group through 6132 * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively. 6133 * Returns zero on success, an errno otherwise. 6134 */ 6135 int 6136 i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, 6137 mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share, 6138 uint32_t ringcnt) 6139 { 6140 mac_ring_t **rings, *ring; 6141 uint_t nrings; 6142 int rv = 0, i = 0, j; 6143 6144 ASSERT((ring_type == MAC_RING_TYPE_RX && 6145 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) || 6146 (ring_type == MAC_RING_TYPE_TX && 6147 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)); 6148 6149 /* 6150 * First find the rings to allocate to the group. 6151 */ 6152 if (share != NULL) { 6153 /* get rings through ms_squery() */ 6154 mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings); 6155 ASSERT(nrings != 0); 6156 rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t), 6157 KM_SLEEP); 6158 mip->mi_share_capab.ms_squery(share, ring_type, 6159 (mac_ring_handle_t *)rings, &nrings); 6160 for (i = 0; i < nrings; i++) { 6161 /* 6162 * If we have given this ring to a non-default 6163 * group, we need to check if we can get this 6164 * ring. 6165 */ 6166 ring = rings[i]; 6167 if (ring->mr_gh != (mac_group_handle_t)src_group || 6168 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6169 if (mac_reclaim_ring_from_grp(mip, ring_type, 6170 ring, rings, nrings) != 0) { 6171 rv = ENOSPC; 6172 goto bail; 6173 } 6174 } 6175 } 6176 } else { 6177 /* 6178 * Pick one ring from default group. 6179 * 6180 * for now pick the second ring which requires the first ring 6181 * at index 0 to stay in the default group, since it is the 6182 * ring which carries the multicast traffic. 6183 * We need a better way for a driver to indicate this, 6184 * for example a per-ring flag. 6185 */ 6186 rings = kmem_alloc(ringcnt * sizeof (mac_ring_handle_t), 6187 KM_SLEEP); 6188 for (ring = src_group->mrg_rings; ring != NULL; 6189 ring = ring->mr_next) { 6190 if (ring_type == MAC_RING_TYPE_RX && 6191 ring->mr_index == 0) { 6192 continue; 6193 } 6194 if (ring_type == MAC_RING_TYPE_TX && 6195 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6196 continue; 6197 } 6198 rings[i++] = ring; 6199 if (i == ringcnt) 6200 break; 6201 } 6202 ASSERT(ring != NULL); 6203 nrings = i; 6204 /* Not enough rings as required */ 6205 if (nrings != ringcnt) { 6206 rv = ENOSPC; 6207 goto bail; 6208 } 6209 } 6210 6211 switch (ring_type) { 6212 case MAC_RING_TYPE_RX: 6213 if (src_group->mrg_cur_count - nrings < 1) { 6214 /* we ran out of rings */ 6215 rv = ENOSPC; 6216 goto bail; 6217 } 6218 6219 /* move receive rings to new group */ 6220 for (i = 0; i < nrings; i++) { 6221 rv = mac_group_mov_ring(mip, new_group, rings[i]); 6222 if (rv != 0) { 6223 /* move rings back on failure */ 6224 for (j = 0; j < i; j++) { 6225 (void) mac_group_mov_ring(mip, 6226 src_group, rings[j]); 6227 } 6228 goto bail; 6229 } 6230 } 6231 break; 6232 6233 case MAC_RING_TYPE_TX: { 6234 mac_ring_t *tmp_ring; 6235 6236 /* move the TX rings to the new group */ 6237 for (i = 0; i < nrings; i++) { 6238 /* get the desired ring */ 6239 tmp_ring = mac_reserve_tx_ring(mip, rings[i]); 6240 if (tmp_ring == NULL) { 6241 rv = ENOSPC; 6242 goto bail; 6243 } 6244 ASSERT(tmp_ring == rings[i]); 6245 rv = mac_group_mov_ring(mip, new_group, rings[i]); 6246 if (rv != 0) { 6247 /* cleanup on failure */ 6248 for (j = 0; j < i; j++) { 6249 (void) mac_group_mov_ring(mip, 6250 MAC_DEFAULT_TX_GROUP(mip), 6251 rings[j]); 6252 } 6253 goto bail; 6254 } 6255 } 6256 break; 6257 } 6258 } 6259 6260 /* add group to share */ 6261 if (share != NULL) 6262 mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); 6263 6264 bail: 6265 /* free temporary array of rings */ 6266 kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); 6267 6268 return (rv); 6269 } 6270 6271 void 6272 mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) 6273 { 6274 mac_grp_client_t *mgcp; 6275 6276 for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 6277 if (mgcp->mgc_client == mcip) 6278 break; 6279 } 6280 6281 VERIFY(mgcp == NULL); 6282 6283 mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); 6284 mgcp->mgc_client = mcip; 6285 mgcp->mgc_next = grp->mrg_clients; 6286 grp->mrg_clients = mgcp; 6287 6288 } 6289 6290 void 6291 mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) 6292 { 6293 mac_grp_client_t *mgcp, **pprev; 6294 6295 for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL; 6296 pprev = &mgcp->mgc_next, mgcp = *pprev) { 6297 if (mgcp->mgc_client == mcip) 6298 break; 6299 } 6300 6301 ASSERT(mgcp != NULL); 6302 6303 *pprev = mgcp->mgc_next; 6304 kmem_free(mgcp, sizeof (mac_grp_client_t)); 6305 } 6306 6307 /* 6308 * mac_reserve_rx_group() 6309 * 6310 * Finds an available group and exclusively reserves it for a client. 6311 * The group is chosen to suit the flow's resource controls (bandwidth and 6312 * fanout requirements) and the address type. 6313 * If the requestor is the pimary MAC then return the group with the 6314 * largest number of rings, otherwise the default ring when available. 6315 */ 6316 mac_group_t * 6317 mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) 6318 { 6319 mac_share_handle_t share = mcip->mci_share; 6320 mac_impl_t *mip = mcip->mci_mip; 6321 mac_group_t *grp = NULL; 6322 int i; 6323 int err = 0; 6324 mac_address_t *map; 6325 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6326 int nrings; 6327 int donor_grp_rcnt; 6328 boolean_t need_exclgrp = B_FALSE; 6329 int need_rings = 0; 6330 mac_group_t *candidate_grp = NULL; 6331 mac_client_impl_t *gclient; 6332 mac_resource_props_t *gmrp; 6333 mac_group_t *donorgrp = NULL; 6334 boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; 6335 boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; 6336 boolean_t isprimary; 6337 6338 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 6339 6340 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6341 6342 /* 6343 * Check if a group already has this mac address (case of VLANs) 6344 * unless we are moving this MAC client from one group to another. 6345 */ 6346 if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { 6347 if (map->ma_group != NULL) 6348 return (map->ma_group); 6349 } 6350 if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) 6351 return (NULL); 6352 /* 6353 * If exclusive open, return NULL which will enable the 6354 * caller to use the default group. 6355 */ 6356 if (mcip->mci_state_flags & MCIS_EXCLUSIVE) 6357 return (NULL); 6358 6359 /* For dynamic groups default unspecified to 1 */ 6360 if (rxhw && unspec && 6361 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6362 mrp->mrp_nrxrings = 1; 6363 } 6364 /* 6365 * For static grouping we allow only specifying rings=0 and 6366 * unspecified 6367 */ 6368 if (rxhw && mrp->mrp_nrxrings > 0 && 6369 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { 6370 return (NULL); 6371 } 6372 if (rxhw) { 6373 /* 6374 * We have explicitly asked for a group (with nrxrings, 6375 * if unspec). 6376 */ 6377 if (unspec || mrp->mrp_nrxrings > 0) { 6378 need_exclgrp = B_TRUE; 6379 need_rings = mrp->mrp_nrxrings; 6380 } else if (mrp->mrp_nrxrings == 0) { 6381 /* 6382 * We have asked for a software group. 6383 */ 6384 return (NULL); 6385 } 6386 } else if (isprimary && mip->mi_nactiveclients == 1 && 6387 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6388 /* 6389 * If the primary is the only active client on this 6390 * mip and we have not asked for any rings, we give 6391 * it the default group so that the primary gets to 6392 * use all the rings. 6393 */ 6394 return (NULL); 6395 } 6396 6397 /* The group that can donate rings */ 6398 donorgrp = mip->mi_rx_donor_grp; 6399 6400 /* 6401 * The number of rings that the default group can donate. 6402 * We need to leave at least one ring. 6403 */ 6404 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 6405 6406 /* 6407 * Try to exclusively reserve a RX group. 6408 * 6409 * For flows requiring HW_DEFAULT_RING (unicast flow of the primary 6410 * client), try to reserve the a non-default RX group and give 6411 * it all the rings from the donor group, except the default ring 6412 * 6413 * For flows requiring HW_RING (unicast flow of other clients), try 6414 * to reserve non-default RX group with the specified number of 6415 * rings, if available. 6416 * 6417 * For flows that have not asked for software or hardware ring, 6418 * try to reserve a non-default group with 1 ring, if available. 6419 */ 6420 for (i = 1; i < mip->mi_rx_group_count; i++) { 6421 grp = &mip->mi_rx_groups[i]; 6422 6423 DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, 6424 int, grp->mrg_index, mac_group_state_t, grp->mrg_state); 6425 6426 /* 6427 * Check if this group could be a candidate group for 6428 * eviction if we need a group for this MAC client, 6429 * but there aren't any. A candidate group is one 6430 * that didn't ask for an exclusive group, but got 6431 * one and it has enough rings (combined with what 6432 * the donor group can donate) for the new MAC 6433 * client 6434 */ 6435 if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { 6436 /* 6437 * If the primary/donor group is not the default 6438 * group, don't bother looking for a candidate group. 6439 * If we don't have enough rings we will check 6440 * if the primary group can be vacated. 6441 */ 6442 if (candidate_grp == NULL && 6443 donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { 6444 ASSERT(!MAC_GROUP_NO_CLIENT(grp)); 6445 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6446 if (gclient == NULL) 6447 gclient = mac_get_grp_primary(grp); 6448 ASSERT(gclient != NULL); 6449 gmrp = MCIP_RESOURCE_PROPS(gclient); 6450 if (gclient->mci_share == NULL && 6451 (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && 6452 (unspec || 6453 (grp->mrg_cur_count + donor_grp_rcnt >= 6454 need_rings))) { 6455 candidate_grp = grp; 6456 } 6457 } 6458 continue; 6459 } 6460 /* 6461 * This group could already be SHARED by other multicast 6462 * flows on this client. In that case, the group would 6463 * be shared and has already been started. 6464 */ 6465 ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT); 6466 6467 if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) && 6468 (mac_start_group(grp) != 0)) { 6469 continue; 6470 } 6471 6472 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6473 break; 6474 ASSERT(grp->mrg_cur_count == 0); 6475 6476 /* 6477 * Populate the group. Rings should be taken 6478 * from the donor group. 6479 */ 6480 nrings = rxhw ? need_rings : isprimary ? donor_grp_rcnt: 1; 6481 6482 /* 6483 * If the donor group can't donate, let's just walk and 6484 * see if someone can vacate a group, so that we have 6485 * enough rings for this, unless we already have 6486 * identified a candiate group.. 6487 */ 6488 if (nrings <= donor_grp_rcnt) { 6489 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 6490 donorgrp, grp, share, nrings); 6491 if (err == 0) { 6492 /* 6493 * For a share i_mac_group_allocate_rings gets 6494 * the rings from the driver, let's populate 6495 * the property for the client now. 6496 */ 6497 if (share != NULL) { 6498 mac_client_set_rings( 6499 (mac_client_handle_t)mcip, 6500 grp->mrg_cur_count, -1); 6501 } 6502 if (mac_is_primary_client(mcip) && !rxhw) 6503 mip->mi_rx_donor_grp = grp; 6504 break; 6505 } 6506 } 6507 6508 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 6509 mip->mi_name, int, grp->mrg_index, int, err); 6510 6511 /* 6512 * It's a dynamic group but the grouping operation 6513 * failed. 6514 */ 6515 mac_stop_group(grp); 6516 } 6517 /* We didn't find an exclusive group for this MAC client */ 6518 if (i >= mip->mi_rx_group_count) { 6519 6520 if (!need_exclgrp) 6521 return (NULL); 6522 6523 /* 6524 * If we found a candidate group then we switch the 6525 * MAC client from the candidate_group to the default 6526 * group and give the group to this MAC client. If 6527 * we didn't find a candidate_group, check if the 6528 * primary is in its own group and if it can make way 6529 * for this MAC client. 6530 */ 6531 if (candidate_grp == NULL && 6532 donorgrp != MAC_DEFAULT_RX_GROUP(mip) && 6533 donorgrp->mrg_cur_count >= need_rings) { 6534 candidate_grp = donorgrp; 6535 } 6536 if (candidate_grp != NULL) { 6537 boolean_t prim_grp = B_FALSE; 6538 6539 /* 6540 * Switch the MAC client from the candidate group 6541 * to the default group.. If this group was the 6542 * donor group, then after the switch we need 6543 * to update the donor group too. 6544 */ 6545 grp = candidate_grp; 6546 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6547 if (gclient == NULL) 6548 gclient = mac_get_grp_primary(grp); 6549 if (grp == mip->mi_rx_donor_grp) 6550 prim_grp = B_TRUE; 6551 if (mac_rx_switch_group(gclient, grp, 6552 MAC_DEFAULT_RX_GROUP(mip)) != 0) { 6553 return (NULL); 6554 } 6555 if (prim_grp) { 6556 mip->mi_rx_donor_grp = 6557 MAC_DEFAULT_RX_GROUP(mip); 6558 donorgrp = MAC_DEFAULT_RX_GROUP(mip); 6559 } 6560 6561 6562 /* 6563 * Now give this group with the required rings 6564 * to this MAC client. 6565 */ 6566 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 6567 if (mac_start_group(grp) != 0) 6568 return (NULL); 6569 6570 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6571 return (grp); 6572 6573 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 6574 ASSERT(grp->mrg_cur_count == 0); 6575 ASSERT(donor_grp_rcnt >= need_rings); 6576 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 6577 donorgrp, grp, share, need_rings); 6578 if (err == 0) { 6579 /* 6580 * For a share i_mac_group_allocate_rings gets 6581 * the rings from the driver, let's populate 6582 * the property for the client now. 6583 */ 6584 if (share != NULL) { 6585 mac_client_set_rings( 6586 (mac_client_handle_t)mcip, 6587 grp->mrg_cur_count, -1); 6588 } 6589 DTRACE_PROBE2(rx__group__reserved, 6590 char *, mip->mi_name, int, grp->mrg_index); 6591 return (grp); 6592 } 6593 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 6594 mip->mi_name, int, grp->mrg_index, int, err); 6595 mac_stop_group(grp); 6596 } 6597 return (NULL); 6598 } 6599 ASSERT(grp != NULL); 6600 6601 DTRACE_PROBE2(rx__group__reserved, 6602 char *, mip->mi_name, int, grp->mrg_index); 6603 return (grp); 6604 } 6605 6606 /* 6607 * mac_rx_release_group() 6608 * 6609 * This is called when there are no clients left for the group. 6610 * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, 6611 * and if it is a non default group, the shares are removed and 6612 * all rings are assigned back to default group. 6613 */ 6614 void 6615 mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) 6616 { 6617 mac_impl_t *mip = mcip->mci_mip; 6618 mac_ring_t *ring; 6619 6620 ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); 6621 6622 if (mip->mi_rx_donor_grp == group) 6623 mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); 6624 6625 /* 6626 * This is the case where there are no clients left. Any 6627 * SRS etc on this group have also be quiesced. 6628 */ 6629 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 6630 if (ring->mr_classify_type == MAC_HW_CLASSIFIER) { 6631 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 6632 /* 6633 * Remove the SRS associated with the HW ring. 6634 * As a result, polling will be disabled. 6635 */ 6636 ring->mr_srs = NULL; 6637 } 6638 ASSERT(group->mrg_state < MAC_GROUP_STATE_RESERVED || 6639 ring->mr_state == MR_INUSE); 6640 if (ring->mr_state == MR_INUSE) { 6641 mac_stop_ring(ring); 6642 ring->mr_flag = 0; 6643 } 6644 } 6645 6646 /* remove group from share */ 6647 if (mcip->mci_share != NULL) { 6648 mip->mi_share_capab.ms_sremove(mcip->mci_share, 6649 group->mrg_driver); 6650 } 6651 6652 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6653 mac_ring_t *ring; 6654 6655 /* 6656 * Rings were dynamically allocated to group. 6657 * Move rings back to default group. 6658 */ 6659 while ((ring = group->mrg_rings) != NULL) { 6660 (void) mac_group_mov_ring(mip, mip->mi_rx_donor_grp, 6661 ring); 6662 } 6663 } 6664 mac_stop_group(group); 6665 /* 6666 * Possible improvement: See if we can assign the group just released 6667 * to a another client of the mip 6668 */ 6669 } 6670 6671 /* 6672 * When we move the primary's mac address between groups, we need to also 6673 * take all the clients sharing the same mac address along with it (VLANs) 6674 * We remove the mac address for such clients from the group after quiescing 6675 * them. When we add the mac address we restart the client. Note that 6676 * the primary's mac address is removed from the group after all the 6677 * other clients sharing the address are removed. Similarly, the primary's 6678 * mac address is added before all the other client's mac address are 6679 * added. While grp is the group where the clients reside, tgrp is 6680 * the group where the addresses have to be added. 6681 */ 6682 static void 6683 mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, 6684 mac_group_t *tgrp, uint8_t *maddr, boolean_t add) 6685 { 6686 mac_impl_t *mip = mcip->mci_mip; 6687 mac_grp_client_t *mgcp = grp->mrg_clients; 6688 mac_client_impl_t *gmcip; 6689 boolean_t prim; 6690 6691 prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 6692 6693 /* 6694 * If the clients are in a non-default group, we just have to 6695 * walk the group's client list. If it is in the default group 6696 * (which will be shared by other clients as well, we need to 6697 * check if the unicast address matches mcip's unicast. 6698 */ 6699 while (mgcp != NULL) { 6700 gmcip = mgcp->mgc_client; 6701 if (gmcip != mcip && 6702 (grp != MAC_DEFAULT_RX_GROUP(mip) || 6703 mcip->mci_unicast == gmcip->mci_unicast)) { 6704 if (!add) { 6705 mac_rx_client_quiesce( 6706 (mac_client_handle_t)gmcip); 6707 (void) mac_remove_macaddr(mcip->mci_unicast); 6708 } else { 6709 (void) mac_add_macaddr(mip, tgrp, maddr, prim); 6710 mac_rx_client_restart( 6711 (mac_client_handle_t)gmcip); 6712 } 6713 } 6714 mgcp = mgcp->mgc_next; 6715 } 6716 } 6717 6718 6719 /* 6720 * Move the MAC address from fgrp to tgrp. If this is the primary client, 6721 * we need to take any VLANs etc. together too. 6722 */ 6723 static int 6724 mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, 6725 mac_group_t *tgrp) 6726 { 6727 mac_impl_t *mip = mcip->mci_mip; 6728 uint8_t maddr[MAXMACADDRLEN]; 6729 int err = 0; 6730 boolean_t prim; 6731 boolean_t multiclnt = B_FALSE; 6732 6733 mac_rx_client_quiesce((mac_client_handle_t)mcip); 6734 ASSERT(mcip->mci_unicast != NULL); 6735 bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); 6736 6737 prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 6738 if (mcip->mci_unicast->ma_nusers > 1) { 6739 mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); 6740 multiclnt = B_TRUE; 6741 } 6742 ASSERT(mcip->mci_unicast->ma_nusers == 1); 6743 err = mac_remove_macaddr(mcip->mci_unicast); 6744 if (err != 0) { 6745 mac_rx_client_restart((mac_client_handle_t)mcip); 6746 if (multiclnt) { 6747 mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, 6748 B_TRUE); 6749 } 6750 return (err); 6751 } 6752 /* 6753 * Program the H/W Classifier first, if this fails we need 6754 * not proceed with the other stuff. 6755 */ 6756 if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { 6757 /* Revert back the H/W Classifier */ 6758 if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { 6759 /* 6760 * This should not fail now since it worked earlier, 6761 * should we panic? 6762 */ 6763 cmn_err(CE_WARN, 6764 "mac_rx_switch_group: switching %p back" 6765 " to group %p failed!!", (void *)mcip, 6766 (void *)fgrp); 6767 } 6768 mac_rx_client_restart((mac_client_handle_t)mcip); 6769 if (multiclnt) { 6770 mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, 6771 B_TRUE); 6772 } 6773 return (err); 6774 } 6775 mcip->mci_unicast = mac_find_macaddr(mip, maddr); 6776 mac_rx_client_restart((mac_client_handle_t)mcip); 6777 if (multiclnt) 6778 mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); 6779 return (err); 6780 } 6781 6782 /* 6783 * Switch the MAC client from one group to another. This means we need 6784 * to remove the MAC address from the group, remove the MAC client, 6785 * teardown the SRSs and revert the group state. Then, we add the client 6786 * to the destination group, set the SRSs, and add the MAC address to the 6787 * group. 6788 */ 6789 int 6790 mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 6791 mac_group_t *tgrp) 6792 { 6793 int err; 6794 mac_group_state_t next_state; 6795 mac_client_impl_t *group_only_mcip; 6796 mac_client_impl_t *gmcip; 6797 mac_impl_t *mip = mcip->mci_mip; 6798 mac_grp_client_t *mgcp; 6799 6800 ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); 6801 6802 if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) 6803 return (err); 6804 6805 /* 6806 * The group might be reserved, but SRSs may not be set up, e.g. 6807 * primary and its vlans using a reserved group. 6808 */ 6809 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && 6810 MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 6811 mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); 6812 } 6813 if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { 6814 mgcp = fgrp->mrg_clients; 6815 while (mgcp != NULL) { 6816 gmcip = mgcp->mgc_client; 6817 mgcp = mgcp->mgc_next; 6818 mac_group_remove_client(fgrp, gmcip); 6819 mac_group_add_client(tgrp, gmcip); 6820 gmcip->mci_flent->fe_rx_ring_group = tgrp; 6821 } 6822 mac_release_rx_group(mcip, fgrp); 6823 ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); 6824 mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); 6825 } else { 6826 mac_group_remove_client(fgrp, mcip); 6827 mac_group_add_client(tgrp, mcip); 6828 mcip->mci_flent->fe_rx_ring_group = tgrp; 6829 /* 6830 * If there are other clients (VLANs) sharing this address 6831 * we should be here only for the primary. 6832 */ 6833 if (mcip->mci_unicast->ma_nusers > 1) { 6834 /* 6835 * We need to move all the clients that are using 6836 * this h/w address. 6837 */ 6838 mgcp = fgrp->mrg_clients; 6839 while (mgcp != NULL) { 6840 gmcip = mgcp->mgc_client; 6841 mgcp = mgcp->mgc_next; 6842 if (mcip->mci_unicast == gmcip->mci_unicast) { 6843 mac_group_remove_client(fgrp, gmcip); 6844 mac_group_add_client(tgrp, gmcip); 6845 gmcip->mci_flent->fe_rx_ring_group = 6846 tgrp; 6847 } 6848 } 6849 } 6850 /* 6851 * The default group will still take the multicast, 6852 * broadcast traffic etc., so it won't go to 6853 * MAC_GROUP_STATE_REGISTERED. 6854 */ 6855 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) 6856 mac_rx_group_unmark(fgrp, MR_CONDEMNED); 6857 mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); 6858 } 6859 next_state = mac_group_next_state(tgrp, &group_only_mcip, 6860 MAC_DEFAULT_RX_GROUP(mip), B_TRUE); 6861 mac_set_group_state(tgrp, next_state); 6862 /* 6863 * If the destination group is reserved, setup the SRSs etc. 6864 */ 6865 if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 6866 mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); 6867 mac_fanout_setup(mcip, mcip->mci_flent, 6868 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, mcip, NULL, 6869 NULL); 6870 mac_rx_group_unmark(tgrp, MR_INCIPIENT); 6871 } else { 6872 mac_rx_switch_grp_to_sw(tgrp); 6873 } 6874 return (0); 6875 } 6876 6877 /* 6878 * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() 6879 * when a share was allocated to the client. 6880 */ 6881 mac_group_t * 6882 mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) 6883 { 6884 mac_impl_t *mip = mcip->mci_mip; 6885 mac_group_t *grp = NULL; 6886 int rv; 6887 int i; 6888 int err; 6889 mac_group_t *defgrp; 6890 mac_share_handle_t share = mcip->mci_share; 6891 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6892 int nrings; 6893 int defnrings; 6894 boolean_t need_exclgrp = B_FALSE; 6895 int need_rings = 0; 6896 mac_group_t *candidate_grp = NULL; 6897 mac_client_impl_t *gclient; 6898 mac_resource_props_t *gmrp; 6899 boolean_t txhw = mrp->mrp_mask & MRP_TX_RINGS; 6900 boolean_t unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; 6901 boolean_t isprimary; 6902 6903 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6904 /* 6905 * When we come here for a VLAN on the primary (dladm create-vlan), 6906 * we need to pair it along with the primary (to keep it consistent 6907 * with the RX side). So, we check if the primary is already assigned 6908 * to a group and return the group if so. The other way is also 6909 * true, i.e. the VLAN is already created and now we are plumbing 6910 * the primary. 6911 */ 6912 if (!move && isprimary) { 6913 for (gclient = mip->mi_clients_list; gclient != NULL; 6914 gclient = gclient->mci_client_next) { 6915 if (gclient->mci_flent->fe_type & FLOW_PRIMARY_MAC && 6916 gclient->mci_flent->fe_tx_ring_group != NULL) { 6917 return (gclient->mci_flent->fe_tx_ring_group); 6918 } 6919 } 6920 } 6921 6922 if (mip->mi_tx_groups == NULL || mip->mi_tx_group_count == 0) 6923 return (NULL); 6924 6925 /* For dynamic groups, default unspec to 1 */ 6926 if (txhw && unspec && 6927 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6928 mrp->mrp_ntxrings = 1; 6929 } 6930 /* 6931 * For static grouping we allow only specifying rings=0 and 6932 * unspecified 6933 */ 6934 if (txhw && mrp->mrp_ntxrings > 0 && 6935 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC) { 6936 return (NULL); 6937 } 6938 6939 if (txhw) { 6940 /* 6941 * We have explicitly asked for a group (with ntxrings, 6942 * if unspec). 6943 */ 6944 if (unspec || mrp->mrp_ntxrings > 0) { 6945 need_exclgrp = B_TRUE; 6946 need_rings = mrp->mrp_ntxrings; 6947 } else if (mrp->mrp_ntxrings == 0) { 6948 /* 6949 * We have asked for a software group. 6950 */ 6951 return (NULL); 6952 } 6953 } 6954 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6955 /* 6956 * The number of rings that the default group can donate. 6957 * We need to leave at least one ring - the default ring - in 6958 * this group. 6959 */ 6960 defnrings = defgrp->mrg_cur_count - 1; 6961 6962 /* 6963 * Primary gets default group unless explicitly told not 6964 * to (i.e. rings > 0). 6965 */ 6966 if (isprimary && !need_exclgrp) 6967 return (NULL); 6968 6969 nrings = (mrp->mrp_mask & MRP_TX_RINGS) != 0 ? mrp->mrp_ntxrings : 1; 6970 for (i = 0; i < mip->mi_tx_group_count; i++) { 6971 grp = &mip->mi_tx_groups[i]; 6972 if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || 6973 (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) { 6974 /* 6975 * Select a candidate for replacement if we don't 6976 * get an exclusive group. A candidate group is one 6977 * that didn't ask for an exclusive group, but got 6978 * one and it has enough rings (combined with what 6979 * the default group can donate) for the new MAC 6980 * client. 6981 */ 6982 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && 6983 candidate_grp == NULL) { 6984 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6985 if (gclient == NULL) 6986 gclient = mac_get_grp_primary(grp); 6987 gmrp = MCIP_RESOURCE_PROPS(gclient); 6988 if (gclient->mci_share == NULL && 6989 (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && 6990 (unspec || 6991 (grp->mrg_cur_count + defnrings) >= 6992 need_rings)) { 6993 candidate_grp = grp; 6994 } 6995 } 6996 continue; 6997 } 6998 /* 6999 * If the default can't donate let's just walk and 7000 * see if someone can vacate a group, so that we have 7001 * enough rings for this. 7002 */ 7003 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC || 7004 nrings <= defnrings) { 7005 if (grp->mrg_state == MAC_GROUP_STATE_REGISTERED) { 7006 rv = mac_start_group(grp); 7007 ASSERT(rv == 0); 7008 } 7009 break; 7010 } 7011 } 7012 7013 /* The default group */ 7014 if (i >= mip->mi_tx_group_count) { 7015 /* 7016 * If we need an exclusive group and have identified a 7017 * candidate group we switch the MAC client from the 7018 * candidate group to the default group and give the 7019 * candidate group to this client. 7020 */ 7021 if (need_exclgrp && candidate_grp != NULL) { 7022 /* 7023 * Switch the MAC client from the candidate group 7024 * to the default group. 7025 */ 7026 grp = candidate_grp; 7027 gclient = MAC_GROUP_ONLY_CLIENT(grp); 7028 if (gclient == NULL) 7029 gclient = mac_get_grp_primary(grp); 7030 mac_tx_client_quiesce((mac_client_handle_t)gclient); 7031 mac_tx_switch_group(gclient, grp, defgrp); 7032 mac_tx_client_restart((mac_client_handle_t)gclient); 7033 7034 /* 7035 * Give the candidate group with the specified number 7036 * of rings to this MAC client. 7037 */ 7038 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 7039 rv = mac_start_group(grp); 7040 ASSERT(rv == 0); 7041 7042 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) 7043 return (grp); 7044 7045 ASSERT(grp->mrg_cur_count == 0); 7046 ASSERT(defgrp->mrg_cur_count > need_rings); 7047 7048 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, 7049 defgrp, grp, share, need_rings); 7050 if (err == 0) { 7051 /* 7052 * For a share i_mac_group_allocate_rings gets 7053 * the rings from the driver, let's populate 7054 * the property for the client now. 7055 */ 7056 if (share != NULL) { 7057 mac_client_set_rings( 7058 (mac_client_handle_t)mcip, -1, 7059 grp->mrg_cur_count); 7060 } 7061 mip->mi_tx_group_free--; 7062 return (grp); 7063 } 7064 DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, 7065 mip->mi_name, int, grp->mrg_index, int, err); 7066 mac_stop_group(grp); 7067 } 7068 return (NULL); 7069 } 7070 /* 7071 * We got an exclusive group, but it is not dynamic. 7072 */ 7073 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { 7074 mip->mi_tx_group_free--; 7075 return (grp); 7076 } 7077 7078 rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, defgrp, grp, 7079 share, nrings); 7080 if (rv != 0) { 7081 DTRACE_PROBE3(tx__group__reserve__alloc__rings, 7082 char *, mip->mi_name, int, grp->mrg_index, int, rv); 7083 mac_stop_group(grp); 7084 return (NULL); 7085 } 7086 /* 7087 * For a share i_mac_group_allocate_rings gets the rings from the 7088 * driver, let's populate the property for the client now. 7089 */ 7090 if (share != NULL) { 7091 mac_client_set_rings((mac_client_handle_t)mcip, -1, 7092 grp->mrg_cur_count); 7093 } 7094 mip->mi_tx_group_free--; 7095 return (grp); 7096 } 7097 7098 void 7099 mac_release_tx_group(mac_client_impl_t *mcip, mac_group_t *grp) 7100 { 7101 mac_impl_t *mip = mcip->mci_mip; 7102 mac_share_handle_t share = mcip->mci_share; 7103 mac_ring_t *ring; 7104 mac_soft_ring_set_t *srs = MCIP_TX_SRS(mcip); 7105 mac_group_t *defgrp; 7106 7107 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7108 if (srs != NULL) { 7109 if (srs->srs_soft_ring_count > 0) { 7110 for (ring = grp->mrg_rings; ring != NULL; 7111 ring = ring->mr_next) { 7112 ASSERT(mac_tx_srs_ring_present(srs, ring)); 7113 mac_tx_invoke_callbacks(mcip, 7114 (mac_tx_cookie_t) 7115 mac_tx_srs_get_soft_ring(srs, ring)); 7116 mac_tx_srs_del_ring(srs, ring); 7117 } 7118 } else { 7119 ASSERT(srs->srs_tx.st_arg2 != NULL); 7120 srs->srs_tx.st_arg2 = NULL; 7121 mac_srs_stat_delete(srs); 7122 } 7123 } 7124 if (share != NULL) 7125 mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); 7126 7127 /* move the ring back to the pool */ 7128 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 7129 while ((ring = grp->mrg_rings) != NULL) 7130 (void) mac_group_mov_ring(mip, defgrp, ring); 7131 } 7132 mac_stop_group(grp); 7133 mip->mi_tx_group_free++; 7134 } 7135 7136 /* 7137 * Disassociate a MAC client from a group, i.e go through the rings in the 7138 * group and delete all the soft rings tied to them. 7139 */ 7140 static void 7141 mac_tx_dismantle_soft_rings(mac_group_t *fgrp, flow_entry_t *flent) 7142 { 7143 mac_client_impl_t *mcip = flent->fe_mcip; 7144 mac_soft_ring_set_t *tx_srs; 7145 mac_srs_tx_t *tx; 7146 mac_ring_t *ring; 7147 7148 tx_srs = flent->fe_tx_srs; 7149 tx = &tx_srs->srs_tx; 7150 7151 /* Single ring case we haven't created any soft rings */ 7152 if (tx->st_mode == SRS_TX_BW || tx->st_mode == SRS_TX_SERIALIZE || 7153 tx->st_mode == SRS_TX_DEFAULT) { 7154 tx->st_arg2 = NULL; 7155 mac_srs_stat_delete(tx_srs); 7156 /* Fanout case, where we have to dismantle the soft rings */ 7157 } else { 7158 for (ring = fgrp->mrg_rings; ring != NULL; 7159 ring = ring->mr_next) { 7160 ASSERT(mac_tx_srs_ring_present(tx_srs, ring)); 7161 mac_tx_invoke_callbacks(mcip, 7162 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(tx_srs, 7163 ring)); 7164 mac_tx_srs_del_ring(tx_srs, ring); 7165 } 7166 ASSERT(tx->st_arg2 == NULL); 7167 } 7168 } 7169 7170 /* 7171 * Switch the MAC client from one group to another. This means we need 7172 * to remove the MAC client, teardown the SRSs and revert the group state. 7173 * Then, we add the client to the destination roup, set the SRSs etc. 7174 */ 7175 void 7176 mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 7177 mac_group_t *tgrp) 7178 { 7179 mac_client_impl_t *group_only_mcip; 7180 mac_impl_t *mip = mcip->mci_mip; 7181 flow_entry_t *flent = mcip->mci_flent; 7182 mac_group_t *defgrp; 7183 mac_grp_client_t *mgcp; 7184 mac_client_impl_t *gmcip; 7185 flow_entry_t *gflent; 7186 7187 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7188 ASSERT(fgrp == flent->fe_tx_ring_group); 7189 7190 if (fgrp == defgrp) { 7191 /* 7192 * If this is the primary we need to find any VLANs on 7193 * the primary and move them too. 7194 */ 7195 mac_group_remove_client(fgrp, mcip); 7196 mac_tx_dismantle_soft_rings(fgrp, flent); 7197 if (mcip->mci_unicast->ma_nusers > 1) { 7198 mgcp = fgrp->mrg_clients; 7199 while (mgcp != NULL) { 7200 gmcip = mgcp->mgc_client; 7201 mgcp = mgcp->mgc_next; 7202 if (mcip->mci_unicast != gmcip->mci_unicast) 7203 continue; 7204 mac_tx_client_quiesce( 7205 (mac_client_handle_t)gmcip); 7206 7207 gflent = gmcip->mci_flent; 7208 mac_group_remove_client(fgrp, gmcip); 7209 mac_tx_dismantle_soft_rings(fgrp, gflent); 7210 7211 mac_group_add_client(tgrp, gmcip); 7212 gflent->fe_tx_ring_group = tgrp; 7213 /* We could directly set this to SHARED */ 7214 tgrp->mrg_state = mac_group_next_state(tgrp, 7215 &group_only_mcip, defgrp, B_FALSE); 7216 7217 mac_tx_srs_group_setup(gmcip, gflent, 7218 SRST_LINK); 7219 mac_fanout_setup(gmcip, gflent, 7220 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 7221 gmcip, NULL, NULL); 7222 7223 mac_tx_client_restart( 7224 (mac_client_handle_t)gmcip); 7225 } 7226 } 7227 if (MAC_GROUP_NO_CLIENT(fgrp)) { 7228 mac_ring_t *ring; 7229 int cnt; 7230 int ringcnt; 7231 7232 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 7233 /* 7234 * Additionally, we also need to stop all 7235 * the rings in the default group, except 7236 * the default ring. The reason being 7237 * this group won't be released since it is 7238 * the default group, so the rings won't 7239 * be stopped otherwise. 7240 */ 7241 ringcnt = fgrp->mrg_cur_count; 7242 ring = fgrp->mrg_rings; 7243 for (cnt = 0; cnt < ringcnt; cnt++) { 7244 if (ring->mr_state == MR_INUSE && 7245 ring != 7246 (mac_ring_t *)mip->mi_default_tx_ring) { 7247 mac_stop_ring(ring); 7248 ring->mr_flag = 0; 7249 } 7250 ring = ring->mr_next; 7251 } 7252 } else if (MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 7253 fgrp->mrg_state = MAC_GROUP_STATE_RESERVED; 7254 } else { 7255 ASSERT(fgrp->mrg_state == MAC_GROUP_STATE_SHARED); 7256 } 7257 } else { 7258 /* 7259 * We could have VLANs sharing the non-default group with 7260 * the primary. 7261 */ 7262 mgcp = fgrp->mrg_clients; 7263 while (mgcp != NULL) { 7264 gmcip = mgcp->mgc_client; 7265 mgcp = mgcp->mgc_next; 7266 if (gmcip == mcip) 7267 continue; 7268 mac_tx_client_quiesce((mac_client_handle_t)gmcip); 7269 gflent = gmcip->mci_flent; 7270 7271 mac_group_remove_client(fgrp, gmcip); 7272 mac_tx_dismantle_soft_rings(fgrp, gflent); 7273 7274 mac_group_add_client(tgrp, gmcip); 7275 gflent->fe_tx_ring_group = tgrp; 7276 /* We could directly set this to SHARED */ 7277 tgrp->mrg_state = mac_group_next_state(tgrp, 7278 &group_only_mcip, defgrp, B_FALSE); 7279 mac_tx_srs_group_setup(gmcip, gflent, SRST_LINK); 7280 mac_fanout_setup(gmcip, gflent, 7281 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 7282 gmcip, NULL, NULL); 7283 7284 mac_tx_client_restart((mac_client_handle_t)gmcip); 7285 } 7286 mac_group_remove_client(fgrp, mcip); 7287 mac_release_tx_group(mcip, fgrp); 7288 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 7289 } 7290 7291 /* Add it to the tgroup */ 7292 mac_group_add_client(tgrp, mcip); 7293 flent->fe_tx_ring_group = tgrp; 7294 tgrp->mrg_state = mac_group_next_state(tgrp, &group_only_mcip, 7295 defgrp, B_FALSE); 7296 7297 mac_tx_srs_group_setup(mcip, flent, SRST_LINK); 7298 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 7299 mac_rx_deliver, mcip, NULL, NULL); 7300 } 7301 7302 /* 7303 * This is a 1-time control path activity initiated by the client (IP). 7304 * The mac perimeter protects against other simultaneous control activities, 7305 * for example an ioctl that attempts to change the degree of fanout and 7306 * increase or decrease the number of softrings associated with this Tx SRS. 7307 */ 7308 static mac_tx_notify_cb_t * 7309 mac_client_tx_notify_add(mac_client_impl_t *mcip, 7310 mac_tx_notify_t notify, void *arg) 7311 { 7312 mac_cb_info_t *mcbi; 7313 mac_tx_notify_cb_t *mtnfp; 7314 7315 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7316 7317 mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP); 7318 mtnfp->mtnf_fn = notify; 7319 mtnfp->mtnf_arg = arg; 7320 mtnfp->mtnf_link.mcb_objp = mtnfp; 7321 mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t); 7322 mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T; 7323 7324 mcbi = &mcip->mci_tx_notify_cb_info; 7325 mutex_enter(mcbi->mcbi_lockp); 7326 mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link); 7327 mutex_exit(mcbi->mcbi_lockp); 7328 return (mtnfp); 7329 } 7330 7331 static void 7332 mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp) 7333 { 7334 mac_cb_info_t *mcbi; 7335 mac_cb_t **cblist; 7336 7337 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7338 7339 if (!mac_callback_find(&mcip->mci_tx_notify_cb_info, 7340 &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) { 7341 cmn_err(CE_WARN, 7342 "mac_client_tx_notify_remove: callback not " 7343 "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp); 7344 return; 7345 } 7346 7347 mcbi = &mcip->mci_tx_notify_cb_info; 7348 cblist = &mcip->mci_tx_notify_cb_list; 7349 mutex_enter(mcbi->mcbi_lockp); 7350 if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link)) 7351 kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t)); 7352 else 7353 mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info); 7354 mutex_exit(mcbi->mcbi_lockp); 7355 } 7356 7357 /* 7358 * mac_client_tx_notify(): 7359 * call to add and remove flow control callback routine. 7360 */ 7361 mac_tx_notify_handle_t 7362 mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func, 7363 void *ptr) 7364 { 7365 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 7366 mac_tx_notify_cb_t *mtnfp = NULL; 7367 7368 i_mac_perim_enter(mcip->mci_mip); 7369 7370 if (callb_func != NULL) { 7371 /* Add a notify callback */ 7372 mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr); 7373 } else { 7374 mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr); 7375 } 7376 i_mac_perim_exit(mcip->mci_mip); 7377 7378 return ((mac_tx_notify_handle_t)mtnfp); 7379 } 7380 7381 void 7382 mac_bridge_vectors(mac_bridge_tx_t txf, mac_bridge_rx_t rxf, 7383 mac_bridge_ref_t reff, mac_bridge_ls_t lsf) 7384 { 7385 mac_bridge_tx_cb = txf; 7386 mac_bridge_rx_cb = rxf; 7387 mac_bridge_ref_cb = reff; 7388 mac_bridge_ls_cb = lsf; 7389 } 7390 7391 int 7392 mac_bridge_set(mac_handle_t mh, mac_handle_t link) 7393 { 7394 mac_impl_t *mip = (mac_impl_t *)mh; 7395 int retv; 7396 7397 mutex_enter(&mip->mi_bridge_lock); 7398 if (mip->mi_bridge_link == NULL) { 7399 mip->mi_bridge_link = link; 7400 retv = 0; 7401 } else { 7402 retv = EBUSY; 7403 } 7404 mutex_exit(&mip->mi_bridge_lock); 7405 if (retv == 0) { 7406 mac_poll_state_change(mh, B_FALSE); 7407 mac_capab_update(mh); 7408 } 7409 return (retv); 7410 } 7411 7412 /* 7413 * Disable bridging on the indicated link. 7414 */ 7415 void 7416 mac_bridge_clear(mac_handle_t mh, mac_handle_t link) 7417 { 7418 mac_impl_t *mip = (mac_impl_t *)mh; 7419 7420 mutex_enter(&mip->mi_bridge_lock); 7421 ASSERT(mip->mi_bridge_link == link); 7422 mip->mi_bridge_link = NULL; 7423 mutex_exit(&mip->mi_bridge_lock); 7424 mac_poll_state_change(mh, B_TRUE); 7425 mac_capab_update(mh); 7426 } 7427 7428 void 7429 mac_no_active(mac_handle_t mh) 7430 { 7431 mac_impl_t *mip = (mac_impl_t *)mh; 7432 7433 i_mac_perim_enter(mip); 7434 mip->mi_state_flags |= MIS_NO_ACTIVE; 7435 i_mac_perim_exit(mip); 7436 } 7437 7438 /* 7439 * Walk the primary VLAN clients whenever the primary's rings property 7440 * changes and update the mac_resource_props_t for the VLAN's client. 7441 * We need to do this since we don't support setting these properties 7442 * on the primary's VLAN clients, but the VLAN clients have to 7443 * follow the primary w.r.t the rings property; 7444 */ 7445 void 7446 mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) 7447 { 7448 mac_client_impl_t *vmcip; 7449 mac_resource_props_t *vmrp; 7450 7451 for (vmcip = mip->mi_clients_list; vmcip != NULL; 7452 vmcip = vmcip->mci_client_next) { 7453 if (!(vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) || 7454 mac_client_vid((mac_client_handle_t)vmcip) == 7455 VLAN_ID_NONE) { 7456 continue; 7457 } 7458 vmrp = MCIP_RESOURCE_PROPS(vmcip); 7459 7460 vmrp->mrp_nrxrings = mrp->mrp_nrxrings; 7461 if (mrp->mrp_mask & MRP_RX_RINGS) 7462 vmrp->mrp_mask |= MRP_RX_RINGS; 7463 else if (vmrp->mrp_mask & MRP_RX_RINGS) 7464 vmrp->mrp_mask &= ~MRP_RX_RINGS; 7465 7466 vmrp->mrp_ntxrings = mrp->mrp_ntxrings; 7467 if (mrp->mrp_mask & MRP_TX_RINGS) 7468 vmrp->mrp_mask |= MRP_TX_RINGS; 7469 else if (vmrp->mrp_mask & MRP_TX_RINGS) 7470 vmrp->mrp_mask &= ~MRP_TX_RINGS; 7471 7472 if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) 7473 vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; 7474 else 7475 vmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; 7476 7477 if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) 7478 vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; 7479 else 7480 vmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; 7481 } 7482 } 7483 7484 /* 7485 * We are adding or removing ring(s) from a group. The source for taking 7486 * rings is the default group. The destination for giving rings back is 7487 * the default group. 7488 */ 7489 int 7490 mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, 7491 mac_group_t *defgrp) 7492 { 7493 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7494 uint_t modify; 7495 int count; 7496 mac_ring_t *ring; 7497 mac_ring_t *next; 7498 mac_impl_t *mip = mcip->mci_mip; 7499 mac_ring_t **rings; 7500 uint_t ringcnt; 7501 int i = 0; 7502 boolean_t rx_group = group->mrg_type == MAC_RING_TYPE_RX; 7503 int start; 7504 int end; 7505 mac_group_t *tgrp; 7506 int j; 7507 int rv = 0; 7508 7509 /* 7510 * If we are asked for just a group, we give 1 ring, else 7511 * the specified number of rings. 7512 */ 7513 if (rx_group) { 7514 ringcnt = (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) ? 1: 7515 mrp->mrp_nrxrings; 7516 } else { 7517 ringcnt = (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) ? 1: 7518 mrp->mrp_ntxrings; 7519 } 7520 7521 /* don't allow modifying rings for a share for now. */ 7522 ASSERT(mcip->mci_share == NULL); 7523 7524 if (ringcnt == group->mrg_cur_count) 7525 return (0); 7526 7527 if (group->mrg_cur_count > ringcnt) { 7528 modify = group->mrg_cur_count - ringcnt; 7529 if (rx_group) { 7530 if (mip->mi_rx_donor_grp == group) { 7531 ASSERT(mac_is_primary_client(mcip)); 7532 mip->mi_rx_donor_grp = defgrp; 7533 } else { 7534 defgrp = mip->mi_rx_donor_grp; 7535 } 7536 } 7537 ring = group->mrg_rings; 7538 rings = kmem_alloc(modify * sizeof (mac_ring_handle_t), 7539 KM_SLEEP); 7540 j = 0; 7541 for (count = 0; count < modify; count++) { 7542 next = ring->mr_next; 7543 rv = mac_group_mov_ring(mip, defgrp, ring); 7544 if (rv != 0) { 7545 /* cleanup on failure */ 7546 for (j = 0; j < count; j++) { 7547 (void) mac_group_mov_ring(mip, group, 7548 rings[j]); 7549 } 7550 break; 7551 } 7552 rings[j++] = ring; 7553 ring = next; 7554 } 7555 kmem_free(rings, modify * sizeof (mac_ring_handle_t)); 7556 return (rv); 7557 } 7558 if (ringcnt >= MAX_RINGS_PER_GROUP) 7559 return (EINVAL); 7560 7561 modify = ringcnt - group->mrg_cur_count; 7562 7563 if (rx_group) { 7564 if (group != mip->mi_rx_donor_grp) 7565 defgrp = mip->mi_rx_donor_grp; 7566 else 7567 /* 7568 * This is the donor group with all the remaining 7569 * rings. Default group now gets to be the donor 7570 */ 7571 mip->mi_rx_donor_grp = defgrp; 7572 start = 1; 7573 end = mip->mi_rx_group_count; 7574 } else { 7575 start = 0; 7576 end = mip->mi_tx_group_count - 1; 7577 } 7578 /* 7579 * If the default doesn't have any rings, lets see if we can 7580 * take rings given to an h/w client that doesn't need it. 7581 * For now, we just see if there is any one client that can donate 7582 * all the required rings. 7583 */ 7584 if (defgrp->mrg_cur_count < (modify + 1)) { 7585 for (i = start; i < end; i++) { 7586 if (rx_group) { 7587 tgrp = &mip->mi_rx_groups[i]; 7588 if (tgrp == group || tgrp->mrg_state < 7589 MAC_GROUP_STATE_RESERVED) { 7590 continue; 7591 } 7592 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 7593 if (mcip == NULL) 7594 mcip = mac_get_grp_primary(tgrp); 7595 ASSERT(mcip != NULL); 7596 mrp = MCIP_RESOURCE_PROPS(mcip); 7597 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) 7598 continue; 7599 if ((tgrp->mrg_cur_count + 7600 defgrp->mrg_cur_count) < (modify + 1)) { 7601 continue; 7602 } 7603 if (mac_rx_switch_group(mcip, tgrp, 7604 defgrp) != 0) { 7605 return (ENOSPC); 7606 } 7607 } else { 7608 tgrp = &mip->mi_tx_groups[i]; 7609 if (tgrp == group || tgrp->mrg_state < 7610 MAC_GROUP_STATE_RESERVED) { 7611 continue; 7612 } 7613 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 7614 if (mcip == NULL) 7615 mcip = mac_get_grp_primary(tgrp); 7616 mrp = MCIP_RESOURCE_PROPS(mcip); 7617 if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) 7618 continue; 7619 if ((tgrp->mrg_cur_count + 7620 defgrp->mrg_cur_count) < (modify + 1)) { 7621 continue; 7622 } 7623 /* OK, we can switch this to s/w */ 7624 mac_tx_client_quiesce( 7625 (mac_client_handle_t)mcip); 7626 mac_tx_switch_group(mcip, tgrp, defgrp); 7627 mac_tx_client_restart( 7628 (mac_client_handle_t)mcip); 7629 } 7630 } 7631 if (defgrp->mrg_cur_count < (modify + 1)) 7632 return (ENOSPC); 7633 } 7634 if ((rv = i_mac_group_allocate_rings(mip, group->mrg_type, defgrp, 7635 group, mcip->mci_share, modify)) != 0) { 7636 return (rv); 7637 } 7638 return (0); 7639 } 7640 7641 /* 7642 * Given the poolname in mac_resource_props, find the cpupart 7643 * that is associated with this pool. The cpupart will be used 7644 * later for finding the cpus to be bound to the networking threads. 7645 * 7646 * use_default is set B_TRUE if pools are enabled and pool_default 7647 * is returned. This avoids a 2nd lookup to set the poolname 7648 * for pool-effective. 7649 * 7650 * returns: 7651 * 7652 * NULL - pools are disabled or if the 'cpus' property is set. 7653 * cpupart of pool_default - pools are enabled and the pool 7654 * is not available or poolname is blank 7655 * cpupart of named pool - pools are enabled and the pool 7656 * is available. 7657 */ 7658 cpupart_t * 7659 mac_pset_find(mac_resource_props_t *mrp, boolean_t *use_default) 7660 { 7661 pool_t *pool; 7662 cpupart_t *cpupart; 7663 7664 *use_default = B_FALSE; 7665 7666 /* CPUs property is set */ 7667 if (mrp->mrp_mask & MRP_CPUS) 7668 return (NULL); 7669 7670 ASSERT(pool_lock_held()); 7671 7672 /* Pools are disabled, no pset */ 7673 if (pool_state == POOL_DISABLED) 7674 return (NULL); 7675 7676 /* Pools property is set */ 7677 if (mrp->mrp_mask & MRP_POOL) { 7678 if ((pool = pool_lookup_pool_by_name(mrp->mrp_pool)) == NULL) { 7679 /* Pool not found */ 7680 DTRACE_PROBE1(mac_pset_find_no_pool, char *, 7681 mrp->mrp_pool); 7682 *use_default = B_TRUE; 7683 pool = pool_default; 7684 } 7685 /* Pools property is not set */ 7686 } else { 7687 *use_default = B_TRUE; 7688 pool = pool_default; 7689 } 7690 7691 /* Find the CPU pset that corresponds to the pool */ 7692 mutex_enter(&cpu_lock); 7693 if ((cpupart = cpupart_find(pool->pool_pset->pset_id)) == NULL) { 7694 DTRACE_PROBE1(mac_find_pset_no_pset, psetid_t, 7695 pool->pool_pset->pset_id); 7696 } 7697 mutex_exit(&cpu_lock); 7698 7699 return (cpupart); 7700 } 7701 7702 void 7703 mac_set_pool_effective(boolean_t use_default, cpupart_t *cpupart, 7704 mac_resource_props_t *mrp, mac_resource_props_t *emrp) 7705 { 7706 ASSERT(pool_lock_held()); 7707 7708 if (cpupart != NULL) { 7709 emrp->mrp_mask |= MRP_POOL; 7710 if (use_default) { 7711 (void) strcpy(emrp->mrp_pool, 7712 "pool_default"); 7713 } else { 7714 ASSERT(strlen(mrp->mrp_pool) != 0); 7715 (void) strcpy(emrp->mrp_pool, 7716 mrp->mrp_pool); 7717 } 7718 } else { 7719 emrp->mrp_mask &= ~MRP_POOL; 7720 bzero(emrp->mrp_pool, MAXPATHLEN); 7721 } 7722 } 7723 7724 struct mac_pool_arg { 7725 char mpa_poolname[MAXPATHLEN]; 7726 pool_event_t mpa_what; 7727 }; 7728 7729 /*ARGSUSED*/ 7730 static uint_t 7731 mac_pool_link_update(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 7732 { 7733 struct mac_pool_arg *mpa = arg; 7734 mac_impl_t *mip = (mac_impl_t *)val; 7735 mac_client_impl_t *mcip; 7736 mac_resource_props_t *mrp, *emrp; 7737 boolean_t pool_update = B_FALSE; 7738 boolean_t pool_clear = B_FALSE; 7739 boolean_t use_default = B_FALSE; 7740 cpupart_t *cpupart = NULL; 7741 7742 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 7743 i_mac_perim_enter(mip); 7744 for (mcip = mip->mi_clients_list; mcip != NULL; 7745 mcip = mcip->mci_client_next) { 7746 pool_update = B_FALSE; 7747 pool_clear = B_FALSE; 7748 use_default = B_FALSE; 7749 mac_client_get_resources((mac_client_handle_t)mcip, mrp); 7750 emrp = MCIP_EFFECTIVE_PROPS(mcip); 7751 7752 /* 7753 * When pools are enabled 7754 */ 7755 if ((mpa->mpa_what == POOL_E_ENABLE) && 7756 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 7757 mrp->mrp_mask |= MRP_POOL; 7758 pool_update = B_TRUE; 7759 } 7760 7761 /* 7762 * When pools are disabled 7763 */ 7764 if ((mpa->mpa_what == POOL_E_DISABLE) && 7765 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 7766 mrp->mrp_mask |= MRP_POOL; 7767 pool_clear = B_TRUE; 7768 } 7769 7770 /* 7771 * Look for links with the pool property set and the poolname 7772 * matching the one which is changing. 7773 */ 7774 if (strcmp(mrp->mrp_pool, mpa->mpa_poolname) == 0) { 7775 /* 7776 * The pool associated with the link has changed. 7777 */ 7778 if (mpa->mpa_what == POOL_E_CHANGE) { 7779 mrp->mrp_mask |= MRP_POOL; 7780 pool_update = B_TRUE; 7781 } 7782 } 7783 7784 /* 7785 * This link is associated with pool_default and 7786 * pool_default has changed. 7787 */ 7788 if ((mpa->mpa_what == POOL_E_CHANGE) && 7789 (strcmp(emrp->mrp_pool, "pool_default") == 0) && 7790 (strcmp(mpa->mpa_poolname, "pool_default") == 0)) { 7791 mrp->mrp_mask |= MRP_POOL; 7792 pool_update = B_TRUE; 7793 } 7794 7795 /* 7796 * Get new list of cpus for the pool, bind network 7797 * threads to new list of cpus and update resources. 7798 */ 7799 if (pool_update) { 7800 if (MCIP_DATAPATH_SETUP(mcip)) { 7801 pool_lock(); 7802 cpupart = mac_pset_find(mrp, &use_default); 7803 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 7804 mac_rx_deliver, mcip, NULL, cpupart); 7805 mac_set_pool_effective(use_default, cpupart, 7806 mrp, emrp); 7807 pool_unlock(); 7808 } 7809 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 7810 B_FALSE); 7811 } 7812 7813 /* 7814 * Clear the effective pool and bind network threads 7815 * to any available CPU. 7816 */ 7817 if (pool_clear) { 7818 if (MCIP_DATAPATH_SETUP(mcip)) { 7819 emrp->mrp_mask &= ~MRP_POOL; 7820 bzero(emrp->mrp_pool, MAXPATHLEN); 7821 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 7822 mac_rx_deliver, mcip, NULL, NULL); 7823 } 7824 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 7825 B_FALSE); 7826 } 7827 } 7828 i_mac_perim_exit(mip); 7829 kmem_free(mrp, sizeof (*mrp)); 7830 return (MH_WALK_CONTINUE); 7831 } 7832 7833 static void 7834 mac_pool_update(void *arg) 7835 { 7836 mod_hash_walk(i_mac_impl_hash, mac_pool_link_update, arg); 7837 kmem_free(arg, sizeof (struct mac_pool_arg)); 7838 } 7839 7840 /* 7841 * Callback function to be executed when a noteworthy pool event 7842 * takes place. 7843 */ 7844 /* ARGSUSED */ 7845 static void 7846 mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) 7847 { 7848 pool_t *pool; 7849 char *poolname = NULL; 7850 struct mac_pool_arg *mpa; 7851 7852 pool_lock(); 7853 mpa = kmem_zalloc(sizeof (struct mac_pool_arg), KM_SLEEP); 7854 7855 switch (what) { 7856 case POOL_E_ENABLE: 7857 case POOL_E_DISABLE: 7858 break; 7859 7860 case POOL_E_CHANGE: 7861 pool = pool_lookup_pool_by_id(id); 7862 if (pool == NULL) { 7863 kmem_free(mpa, sizeof (struct mac_pool_arg)); 7864 pool_unlock(); 7865 return; 7866 } 7867 pool_get_name(pool, &poolname); 7868 (void) strlcpy(mpa->mpa_poolname, poolname, 7869 sizeof (mpa->mpa_poolname)); 7870 break; 7871 7872 default: 7873 kmem_free(mpa, sizeof (struct mac_pool_arg)); 7874 pool_unlock(); 7875 return; 7876 } 7877 pool_unlock(); 7878 7879 mpa->mpa_what = what; 7880 7881 mac_pool_update(mpa); 7882 } 7883 7884 /* 7885 * Set effective rings property. This could be called from datapath_setup/ 7886 * datapath_teardown or set-linkprop. 7887 * If the group is reserved we just go ahead and set the effective rings. 7888 * Additionally, for TX this could mean the default group has lost/gained 7889 * some rings, so if the default group is reserved, we need to adjust the 7890 * effective rings for the default group clients. For RX, if we are working 7891 * with the non-default group, we just need * to reset the effective props 7892 * for the default group clients. 7893 */ 7894 void 7895 mac_set_rings_effective(mac_client_impl_t *mcip) 7896 { 7897 mac_impl_t *mip = mcip->mci_mip; 7898 mac_group_t *grp; 7899 mac_group_t *defgrp; 7900 flow_entry_t *flent = mcip->mci_flent; 7901 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); 7902 mac_grp_client_t *mgcp; 7903 mac_client_impl_t *gmcip; 7904 7905 grp = flent->fe_rx_ring_group; 7906 if (grp != NULL) { 7907 defgrp = MAC_DEFAULT_RX_GROUP(mip); 7908 /* 7909 * If we have reserved a group, set the effective rings 7910 * to the ring count in the group. 7911 */ 7912 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7913 emrp->mrp_mask |= MRP_RX_RINGS; 7914 emrp->mrp_nrxrings = grp->mrg_cur_count; 7915 } 7916 7917 /* 7918 * We go through the clients in the shared group and 7919 * reset the effective properties. It is possible this 7920 * might have already been done for some client (i.e. 7921 * if some client is being moved to a group that is 7922 * already shared). The case where the default group is 7923 * RESERVED is taken care of above (note in the RX side if 7924 * there is a non-default group, the default group is always 7925 * SHARED). 7926 */ 7927 if (grp != defgrp || grp->mrg_state == MAC_GROUP_STATE_SHARED) { 7928 if (grp->mrg_state == MAC_GROUP_STATE_SHARED) 7929 mgcp = grp->mrg_clients; 7930 else 7931 mgcp = defgrp->mrg_clients; 7932 while (mgcp != NULL) { 7933 gmcip = mgcp->mgc_client; 7934 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7935 if (emrp->mrp_mask & MRP_RX_RINGS) { 7936 emrp->mrp_mask &= ~MRP_RX_RINGS; 7937 emrp->mrp_nrxrings = 0; 7938 } 7939 mgcp = mgcp->mgc_next; 7940 } 7941 } 7942 } 7943 7944 /* Now the TX side */ 7945 grp = flent->fe_tx_ring_group; 7946 if (grp != NULL) { 7947 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7948 7949 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7950 emrp->mrp_mask |= MRP_TX_RINGS; 7951 emrp->mrp_ntxrings = grp->mrg_cur_count; 7952 } else if (grp->mrg_state == MAC_GROUP_STATE_SHARED) { 7953 mgcp = grp->mrg_clients; 7954 while (mgcp != NULL) { 7955 gmcip = mgcp->mgc_client; 7956 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7957 if (emrp->mrp_mask & MRP_TX_RINGS) { 7958 emrp->mrp_mask &= ~MRP_TX_RINGS; 7959 emrp->mrp_ntxrings = 0; 7960 } 7961 mgcp = mgcp->mgc_next; 7962 } 7963 } 7964 7965 /* 7966 * If the group is not the default group and the default 7967 * group is reserved, the ring count in the default group 7968 * might have changed, update it. 7969 */ 7970 if (grp != defgrp && 7971 defgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7972 gmcip = MAC_GROUP_ONLY_CLIENT(defgrp); 7973 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7974 emrp->mrp_ntxrings = defgrp->mrg_cur_count; 7975 } 7976 } 7977 emrp = MCIP_EFFECTIVE_PROPS(mcip); 7978 } 7979 7980 /* 7981 * Check if the primary is in the default group. If so, see if we 7982 * can give it a an exclusive group now that another client is 7983 * being configured. We take the primary out of the default group 7984 * because the multicast/broadcast packets for the all the clients 7985 * will land in the default ring in the default group which means 7986 * any client in the default group, even if it is the only on in 7987 * the group, will lose exclusive access to the rings, hence 7988 * polling. 7989 */ 7990 mac_client_impl_t * 7991 mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) 7992 { 7993 mac_impl_t *mip = mcip->mci_mip; 7994 mac_group_t *defgrp = MAC_DEFAULT_RX_GROUP(mip); 7995 flow_entry_t *flent = mcip->mci_flent; 7996 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7997 uint8_t *mac_addr; 7998 mac_group_t *ngrp; 7999 8000 /* 8001 * Check if the primary is in the default group, if not 8002 * or if it is explicitly configured to be in the default 8003 * group OR set the RX rings property, return. 8004 */ 8005 if (flent->fe_rx_ring_group != defgrp || mrp->mrp_mask & MRP_RX_RINGS) 8006 return (NULL); 8007 8008 /* 8009 * If the new client needs an exclusive group and we 8010 * don't have another for the primary, return. 8011 */ 8012 if (rxhw && mip->mi_rxhwclnt_avail < 2) 8013 return (NULL); 8014 8015 mac_addr = flent->fe_flow_desc.fd_dst_mac; 8016 /* 8017 * We call this when we are setting up the datapath for 8018 * the first non-primary. 8019 */ 8020 ASSERT(mip->mi_nactiveclients == 2); 8021 /* 8022 * OK, now we have the primary that needs to be relocated. 8023 */ 8024 ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); 8025 if (ngrp == NULL) 8026 return (NULL); 8027 if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { 8028 mac_stop_group(ngrp); 8029 return (NULL); 8030 } 8031 return (mcip); 8032 } 8033