1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 25 */ 26 27 /* 28 * MAC Services Module 29 * 30 * The GLDv3 framework locking - The MAC layer 31 * -------------------------------------------- 32 * 33 * The MAC layer is central to the GLD framework and can provide the locking 34 * framework needed for itself and for the use of MAC clients. MAC end points 35 * are fairly disjoint and don't share a lot of state. So a coarse grained 36 * multi-threading scheme is to single thread all create/modify/delete or set 37 * type of control operations on a per mac end point while allowing data threads 38 * concurrently. 39 * 40 * Control operations (set) that modify a mac end point are always serialized on 41 * a per mac end point basis, We have at most 1 such thread per mac end point 42 * at a time. 43 * 44 * All other operations that are not serialized are essentially multi-threaded. 45 * For example a control operation (get) like getting statistics which may not 46 * care about reading values atomically or data threads sending or receiving 47 * data. Mostly these type of operations don't modify the control state. Any 48 * state these operations care about are protected using traditional locks. 49 * 50 * The perimeter only serializes serial operations. It does not imply there 51 * aren't any other concurrent operations. However a serialized operation may 52 * sometimes need to make sure it is the only thread. In this case it needs 53 * to use reference counting mechanisms to cv_wait until any current data 54 * threads are done. 55 * 56 * The mac layer itself does not hold any locks across a call to another layer. 57 * The perimeter is however held across a down call to the driver to make the 58 * whole control operation atomic with respect to other control operations. 59 * Also the data path and get type control operations may proceed concurrently. 60 * These operations synchronize with the single serial operation on a given mac 61 * end point using regular locks. The perimeter ensures that conflicting 62 * operations like say a mac_multicast_add and a mac_multicast_remove on the 63 * same mac end point don't interfere with each other and also ensures that the 64 * changes in the mac layer and the call to the underlying driver to say add a 65 * multicast address are done atomically without interference from a thread 66 * trying to delete the same address. 67 * 68 * For example, consider 69 * mac_multicst_add() 70 * { 71 * mac_perimeter_enter(); serialize all control operations 72 * 73 * grab list lock protect against access by data threads 74 * add to list 75 * drop list lock 76 * 77 * call driver's mi_multicst 78 * 79 * mac_perimeter_exit(); 80 * } 81 * 82 * To lessen the number of serialization locks and simplify the lock hierarchy, 83 * we serialize all the control operations on a per mac end point by using a 84 * single serialization lock called the perimeter. We allow recursive entry into 85 * the perimeter to facilitate use of this mechanism by both the mac client and 86 * the MAC layer itself. 87 * 88 * MAC client means an entity that does an operation on a mac handle 89 * obtained from a mac_open/mac_client_open. Similarly MAC driver means 90 * an entity that does an operation on a mac handle obtained from a 91 * mac_register. An entity could be both client and driver but on different 92 * handles eg. aggr. and should only make the corresponding mac interface calls 93 * i.e. mac driver interface or mac client interface as appropriate for that 94 * mac handle. 95 * 96 * General rules. 97 * ------------- 98 * 99 * R1. The lock order of upcall threads is natually opposite to downcall 100 * threads. Hence upcalls must not hold any locks across layers for fear of 101 * recursive lock enter and lock order violation. This applies to all layers. 102 * 103 * R2. The perimeter is just another lock. Since it is held in the down 104 * direction, acquiring the perimeter in an upcall is prohibited as it would 105 * cause a deadlock. This applies to all layers. 106 * 107 * Note that upcalls that need to grab the mac perimeter (for example 108 * mac_notify upcalls) can still achieve that by posting the request to a 109 * thread, which can then grab all the required perimeters and locks in the 110 * right global order. Note that in the above example the mac layer iself 111 * won't grab the mac perimeter in the mac_notify upcall, instead the upcall 112 * to the client must do that. Please see the aggr code for an example. 113 * 114 * MAC client rules 115 * ---------------- 116 * 117 * R3. A MAC client may use the MAC provided perimeter facility to serialize 118 * control operations on a per mac end point. It does this by by acquring 119 * and holding the perimeter across a sequence of calls to the mac layer. 120 * This ensures atomicity across the entire block of mac calls. In this 121 * model the MAC client must not hold any client locks across the calls to 122 * the mac layer. This model is the preferred solution. 123 * 124 * R4. However if a MAC client has a lot of global state across all mac end 125 * points the per mac end point serialization may not be sufficient. In this 126 * case the client may choose to use global locks or use its own serialization. 127 * To avoid deadlocks, these client layer locks held across the mac calls 128 * in the control path must never be acquired by the data path for the reason 129 * mentioned below. 130 * 131 * (Assume that a control operation that holds a client lock blocks in the 132 * mac layer waiting for upcall reference counts to drop to zero. If an upcall 133 * data thread that holds this reference count, tries to acquire the same 134 * client lock subsequently it will deadlock). 135 * 136 * A MAC client may follow either the R3 model or the R4 model, but can't 137 * mix both. In the former, the hierarchy is Perim -> client locks, but in 138 * the latter it is client locks -> Perim. 139 * 140 * R5. MAC clients must make MAC calls (excluding data calls) in a cv_wait'able 141 * context since they may block while trying to acquire the perimeter. 142 * In addition some calls may block waiting for upcall refcnts to come down to 143 * zero. 144 * 145 * R6. MAC clients must make sure that they are single threaded and all threads 146 * from the top (in particular data threads) have finished before calling 147 * mac_client_close. The MAC framework does not track the number of client 148 * threads using the mac client handle. Also mac clients must make sure 149 * they have undone all the control operations before calling mac_client_close. 150 * For example mac_unicast_remove/mac_multicast_remove to undo the corresponding 151 * mac_unicast_add/mac_multicast_add. 152 * 153 * MAC framework rules 154 * ------------------- 155 * 156 * R7. The mac layer itself must not hold any mac layer locks (except the mac 157 * perimeter) across a call to any other layer from the mac layer. The call to 158 * any other layer could be via mi_* entry points, classifier entry points into 159 * the driver or via upcall pointers into layers above. The mac perimeter may 160 * be acquired or held only in the down direction, for e.g. when calling into 161 * a mi_* driver enty point to provide atomicity of the operation. 162 * 163 * R8. Since it is not guaranteed (see R14) that drivers won't hold locks across 164 * mac driver interfaces, the MAC layer must provide a cut out for control 165 * interfaces like upcall notifications and start them in a separate thread. 166 * 167 * R9. Note that locking order also implies a plumbing order. For example 168 * VNICs are allowed to be created over aggrs, but not vice-versa. An attempt 169 * to plumb in any other order must be failed at mac_open time, otherwise it 170 * could lead to deadlocks due to inverse locking order. 171 * 172 * R10. MAC driver interfaces must not block since the driver could call them 173 * in interrupt context. 174 * 175 * R11. Walkers must preferably not hold any locks while calling walker 176 * callbacks. Instead these can operate on reference counts. In simple 177 * callbacks it may be ok to hold a lock and call the callbacks, but this is 178 * harder to maintain in the general case of arbitrary callbacks. 179 * 180 * R12. The MAC layer must protect upcall notification callbacks using reference 181 * counts rather than holding locks across the callbacks. 182 * 183 * R13. Given the variety of drivers, it is preferable if the MAC layer can make 184 * sure that any pointers (such as mac ring pointers) it passes to the driver 185 * remain valid until mac unregister time. Currently the mac layer achieves 186 * this by using generation numbers for rings and freeing the mac rings only 187 * at unregister time. The MAC layer must provide a layer of indirection and 188 * must not expose underlying driver rings or driver data structures/pointers 189 * directly to MAC clients. 190 * 191 * MAC driver rules 192 * ---------------- 193 * 194 * R14. It would be preferable if MAC drivers don't hold any locks across any 195 * mac call. However at a minimum they must not hold any locks across data 196 * upcalls. They must also make sure that all references to mac data structures 197 * are cleaned up and that it is single threaded at mac_unregister time. 198 * 199 * R15. MAC driver interfaces don't block and so the action may be done 200 * asynchronously in a separate thread as for example handling notifications. 201 * The driver must not assume that the action is complete when the call 202 * returns. 203 * 204 * R16. Drivers must maintain a generation number per Rx ring, and pass it 205 * back to mac_rx_ring(); They are expected to increment the generation 206 * number whenever the ring's stop routine is invoked. 207 * See comments in mac_rx_ring(); 208 * 209 * R17 Similarly mi_stop is another synchronization point and the driver must 210 * ensure that all upcalls are done and there won't be any future upcall 211 * before returning from mi_stop. 212 * 213 * R18. The driver may assume that all set/modify control operations via 214 * the mi_* entry points are single threaded on a per mac end point. 215 * 216 * Lock and Perimeter hierarchy scenarios 217 * --------------------------------------- 218 * 219 * i_mac_impl_lock -> mi_rw_lock -> srs_lock -> s_ring_lock[i_mac_tx_srs_notify] 220 * 221 * ft_lock -> fe_lock [mac_flow_lookup] 222 * 223 * mi_rw_lock -> fe_lock [mac_bcast_send] 224 * 225 * srs_lock -> mac_bw_lock [mac_rx_srs_drain_bw] 226 * 227 * cpu_lock -> mac_srs_g_lock -> srs_lock -> s_ring_lock [mac_walk_srs_and_bind] 228 * 229 * i_dls_devnet_lock -> mac layer locks [dls_devnet_rename] 230 * 231 * Perimeters are ordered P1 -> P2 -> P3 from top to bottom in order of mac 232 * client to driver. In the case of clients that explictly use the mac provided 233 * perimeter mechanism for its serialization, the hierarchy is 234 * Perimeter -> mac layer locks, since the client never holds any locks across 235 * the mac calls. In the case of clients that use its own locks the hierarchy 236 * is Client locks -> Mac Perim -> Mac layer locks. The client never explicitly 237 * calls mac_perim_enter/exit in this case. 238 * 239 * Subflow creation rules 240 * --------------------------- 241 * o In case of a user specified cpulist present on underlying link and flows, 242 * the flows cpulist must be a subset of the underlying link. 243 * o In case of a user specified fanout mode present on link and flow, the 244 * subflow fanout count has to be less than or equal to that of the 245 * underlying link. The cpu-bindings for the subflows will be a subset of 246 * the underlying link. 247 * o In case if no cpulist specified on both underlying link and flow, the 248 * underlying link relies on a MAC tunable to provide out of box fanout. 249 * The subflow will have no cpulist (the subflow will be unbound) 250 * o In case if no cpulist is specified on the underlying link, a subflow can 251 * carry either a user-specified cpulist or fanout count. The cpu-bindings 252 * for the subflow will not adhere to restriction that they need to be subset 253 * of the underlying link. 254 * o In case where the underlying link is carrying either a user specified 255 * cpulist or fanout mode and for a unspecified subflow, the subflow will be 256 * created unbound. 257 * o While creating unbound subflows, bandwidth mode changes attempt to 258 * figure a right fanout count. In such cases the fanout count will override 259 * the unbound cpu-binding behavior. 260 * o In addition to this, while cycling between flow and link properties, we 261 * impose a restriction that if a link property has a subflow with 262 * user-specified attributes, we will not allow changing the link property. 263 * The administrator needs to reset all the user specified properties for the 264 * subflows before attempting a link property change. 265 * Some of the above rules can be overridden by specifying additional command 266 * line options while creating or modifying link or subflow properties. 267 */ 268 269 #include <sys/types.h> 270 #include <sys/conf.h> 271 #include <sys/id_space.h> 272 #include <sys/esunddi.h> 273 #include <sys/stat.h> 274 #include <sys/mkdev.h> 275 #include <sys/stream.h> 276 #include <sys/strsun.h> 277 #include <sys/strsubr.h> 278 #include <sys/dlpi.h> 279 #include <sys/list.h> 280 #include <sys/modhash.h> 281 #include <sys/mac_provider.h> 282 #include <sys/mac_client_impl.h> 283 #include <sys/mac_soft_ring.h> 284 #include <sys/mac_stat.h> 285 #include <sys/mac_impl.h> 286 #include <sys/mac.h> 287 #include <sys/dls.h> 288 #include <sys/dld.h> 289 #include <sys/modctl.h> 290 #include <sys/fs/dv_node.h> 291 #include <sys/thread.h> 292 #include <sys/proc.h> 293 #include <sys/callb.h> 294 #include <sys/cpuvar.h> 295 #include <sys/atomic.h> 296 #include <sys/bitmap.h> 297 #include <sys/sdt.h> 298 #include <sys/mac_flow.h> 299 #include <sys/ddi_intr_impl.h> 300 #include <sys/disp.h> 301 #include <sys/sdt.h> 302 #include <sys/vnic.h> 303 #include <sys/vnic_impl.h> 304 #include <sys/vlan.h> 305 #include <inet/ip.h> 306 #include <inet/ip6.h> 307 #include <sys/exacct.h> 308 #include <sys/exacct_impl.h> 309 #include <inet/nd.h> 310 #include <sys/ethernet.h> 311 #include <sys/pool.h> 312 #include <sys/pool_pset.h> 313 #include <sys/cpupart.h> 314 #include <inet/wifi_ioctl.h> 315 #include <net/wpa.h> 316 317 #define IMPL_HASHSZ 67 /* prime */ 318 319 kmem_cache_t *i_mac_impl_cachep; 320 mod_hash_t *i_mac_impl_hash; 321 krwlock_t i_mac_impl_lock; 322 uint_t i_mac_impl_count; 323 static kmem_cache_t *mac_ring_cache; 324 static id_space_t *minor_ids; 325 static uint32_t minor_count; 326 static pool_event_cb_t mac_pool_event_reg; 327 328 /* 329 * Logging stuff. Perhaps mac_logging_interval could be broken into 330 * mac_flow_log_interval and mac_link_log_interval if we want to be 331 * able to schedule them differently. 332 */ 333 uint_t mac_logging_interval; 334 boolean_t mac_flow_log_enable; 335 boolean_t mac_link_log_enable; 336 timeout_id_t mac_logging_timer; 337 338 /* for debugging, see MAC_DBG_PRT() in mac_impl.h */ 339 int mac_dbg = 0; 340 341 #define MACTYPE_KMODDIR "mac" 342 #define MACTYPE_HASHSZ 67 343 static mod_hash_t *i_mactype_hash; 344 /* 345 * i_mactype_lock synchronizes threads that obtain references to mactype_t 346 * structures through i_mactype_getplugin(). 347 */ 348 static kmutex_t i_mactype_lock; 349 350 /* 351 * mac_tx_percpu_cnt 352 * 353 * Number of per cpu locks per mac_client_impl_t. Used by the transmit side 354 * in mac_tx to reduce lock contention. This is sized at boot time in mac_init. 355 * mac_tx_percpu_cnt_max is settable in /etc/system and must be a power of 2. 356 * Per cpu locks may be disabled by setting mac_tx_percpu_cnt_max to 1. 357 */ 358 int mac_tx_percpu_cnt; 359 int mac_tx_percpu_cnt_max = 128; 360 361 /* 362 * Call back functions for the bridge module. These are guaranteed to be valid 363 * when holding a reference on a link or when holding mip->mi_bridge_lock and 364 * mi_bridge_link is non-NULL. 365 */ 366 mac_bridge_tx_t mac_bridge_tx_cb; 367 mac_bridge_rx_t mac_bridge_rx_cb; 368 mac_bridge_ref_t mac_bridge_ref_cb; 369 mac_bridge_ls_t mac_bridge_ls_cb; 370 371 static int i_mac_constructor(void *, void *, int); 372 static void i_mac_destructor(void *, void *); 373 static int i_mac_ring_ctor(void *, void *, int); 374 static void i_mac_ring_dtor(void *, void *); 375 static mblk_t *mac_rx_classify(mac_impl_t *, mac_resource_handle_t, mblk_t *); 376 void mac_tx_client_flush(mac_client_impl_t *); 377 void mac_tx_client_block(mac_client_impl_t *); 378 static void mac_rx_ring_quiesce(mac_ring_t *, uint_t); 379 static int mac_start_group_and_rings(mac_group_t *); 380 static void mac_stop_group_and_rings(mac_group_t *); 381 static void mac_pool_event_cb(pool_event_t, int, void *); 382 383 typedef struct netinfo_s { 384 list_node_t ni_link; 385 void *ni_record; 386 int ni_size; 387 int ni_type; 388 } netinfo_t; 389 390 /* 391 * Module initialization functions. 392 */ 393 394 void 395 mac_init(void) 396 { 397 mac_tx_percpu_cnt = ((boot_max_ncpus == -1) ? max_ncpus : 398 boot_max_ncpus); 399 400 /* Upper bound is mac_tx_percpu_cnt_max */ 401 if (mac_tx_percpu_cnt > mac_tx_percpu_cnt_max) 402 mac_tx_percpu_cnt = mac_tx_percpu_cnt_max; 403 404 if (mac_tx_percpu_cnt < 1) { 405 /* Someone set max_tx_percpu_cnt_max to 0 or less */ 406 mac_tx_percpu_cnt = 1; 407 } 408 409 ASSERT(mac_tx_percpu_cnt >= 1); 410 mac_tx_percpu_cnt = (1 << highbit(mac_tx_percpu_cnt - 1)); 411 /* 412 * Make it of the form 2**N - 1 in the range 413 * [0 .. mac_tx_percpu_cnt_max - 1] 414 */ 415 mac_tx_percpu_cnt--; 416 417 i_mac_impl_cachep = kmem_cache_create("mac_impl_cache", 418 sizeof (mac_impl_t), 0, i_mac_constructor, i_mac_destructor, 419 NULL, NULL, NULL, 0); 420 ASSERT(i_mac_impl_cachep != NULL); 421 422 mac_ring_cache = kmem_cache_create("mac_ring_cache", 423 sizeof (mac_ring_t), 0, i_mac_ring_ctor, i_mac_ring_dtor, NULL, 424 NULL, NULL, 0); 425 ASSERT(mac_ring_cache != NULL); 426 427 i_mac_impl_hash = mod_hash_create_extended("mac_impl_hash", 428 IMPL_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor, 429 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 430 rw_init(&i_mac_impl_lock, NULL, RW_DEFAULT, NULL); 431 432 mac_flow_init(); 433 mac_soft_ring_init(); 434 mac_bcast_init(); 435 mac_client_init(); 436 437 i_mac_impl_count = 0; 438 439 i_mactype_hash = mod_hash_create_extended("mactype_hash", 440 MACTYPE_HASHSZ, 441 mod_hash_null_keydtor, mod_hash_null_valdtor, 442 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 443 444 /* 445 * Allocate an id space to manage minor numbers. The range of the 446 * space will be from MAC_MAX_MINOR+1 to MAC_PRIVATE_MINOR-1. This 447 * leaves half of the 32-bit minors available for driver private use. 448 */ 449 minor_ids = id_space_create("mac_minor_ids", MAC_MAX_MINOR+1, 450 MAC_PRIVATE_MINOR-1); 451 ASSERT(minor_ids != NULL); 452 minor_count = 0; 453 454 /* Let's default to 20 seconds */ 455 mac_logging_interval = 20; 456 mac_flow_log_enable = B_FALSE; 457 mac_link_log_enable = B_FALSE; 458 mac_logging_timer = 0; 459 460 /* Register to be notified of noteworthy pools events */ 461 mac_pool_event_reg.pec_func = mac_pool_event_cb; 462 mac_pool_event_reg.pec_arg = NULL; 463 pool_event_cb_register(&mac_pool_event_reg); 464 } 465 466 int 467 mac_fini(void) 468 { 469 470 if (i_mac_impl_count > 0 || minor_count > 0) 471 return (EBUSY); 472 473 pool_event_cb_unregister(&mac_pool_event_reg); 474 475 id_space_destroy(minor_ids); 476 mac_flow_fini(); 477 478 mod_hash_destroy_hash(i_mac_impl_hash); 479 rw_destroy(&i_mac_impl_lock); 480 481 mac_client_fini(); 482 kmem_cache_destroy(mac_ring_cache); 483 484 mod_hash_destroy_hash(i_mactype_hash); 485 mac_soft_ring_finish(); 486 487 488 return (0); 489 } 490 491 /* 492 * Initialize a GLDv3 driver's device ops. A driver that manages its own ops 493 * (e.g. softmac) may pass in a NULL ops argument. 494 */ 495 void 496 mac_init_ops(struct dev_ops *ops, const char *name) 497 { 498 major_t major = ddi_name_to_major((char *)name); 499 500 /* 501 * By returning on error below, we are not letting the driver continue 502 * in an undefined context. The mac_register() function will faill if 503 * DN_GLDV3_DRIVER isn't set. 504 */ 505 if (major == DDI_MAJOR_T_NONE) 506 return; 507 LOCK_DEV_OPS(&devnamesp[major].dn_lock); 508 devnamesp[major].dn_flags |= (DN_GLDV3_DRIVER | DN_NETWORK_DRIVER); 509 UNLOCK_DEV_OPS(&devnamesp[major].dn_lock); 510 if (ops != NULL) 511 dld_init_ops(ops, name); 512 } 513 514 void 515 mac_fini_ops(struct dev_ops *ops) 516 { 517 dld_fini_ops(ops); 518 } 519 520 /*ARGSUSED*/ 521 static int 522 i_mac_constructor(void *buf, void *arg, int kmflag) 523 { 524 mac_impl_t *mip = buf; 525 526 bzero(buf, sizeof (mac_impl_t)); 527 528 mip->mi_linkstate = LINK_STATE_UNKNOWN; 529 530 rw_init(&mip->mi_rw_lock, NULL, RW_DRIVER, NULL); 531 mutex_init(&mip->mi_notify_lock, NULL, MUTEX_DRIVER, NULL); 532 mutex_init(&mip->mi_promisc_lock, NULL, MUTEX_DRIVER, NULL); 533 mutex_init(&mip->mi_ring_lock, NULL, MUTEX_DEFAULT, NULL); 534 535 mip->mi_notify_cb_info.mcbi_lockp = &mip->mi_notify_lock; 536 cv_init(&mip->mi_notify_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 537 mip->mi_promisc_cb_info.mcbi_lockp = &mip->mi_promisc_lock; 538 cv_init(&mip->mi_promisc_cb_info.mcbi_cv, NULL, CV_DRIVER, NULL); 539 540 mutex_init(&mip->mi_bridge_lock, NULL, MUTEX_DEFAULT, NULL); 541 542 return (0); 543 } 544 545 /*ARGSUSED*/ 546 static void 547 i_mac_destructor(void *buf, void *arg) 548 { 549 mac_impl_t *mip = buf; 550 mac_cb_info_t *mcbi; 551 552 ASSERT(mip->mi_ref == 0); 553 ASSERT(mip->mi_active == 0); 554 ASSERT(mip->mi_linkstate == LINK_STATE_UNKNOWN); 555 ASSERT(mip->mi_devpromisc == 0); 556 ASSERT(mip->mi_ksp == NULL); 557 ASSERT(mip->mi_kstat_count == 0); 558 ASSERT(mip->mi_nclients == 0); 559 ASSERT(mip->mi_nactiveclients == 0); 560 ASSERT(mip->mi_single_active_client == NULL); 561 ASSERT(mip->mi_state_flags == 0); 562 ASSERT(mip->mi_factory_addr == NULL); 563 ASSERT(mip->mi_factory_addr_num == 0); 564 ASSERT(mip->mi_default_tx_ring == NULL); 565 566 mcbi = &mip->mi_notify_cb_info; 567 ASSERT(mcbi->mcbi_del_cnt == 0 && mcbi->mcbi_walker_cnt == 0); 568 ASSERT(mip->mi_notify_bits == 0); 569 ASSERT(mip->mi_notify_thread == NULL); 570 ASSERT(mcbi->mcbi_lockp == &mip->mi_notify_lock); 571 mcbi->mcbi_lockp = NULL; 572 573 mcbi = &mip->mi_promisc_cb_info; 574 ASSERT(mcbi->mcbi_del_cnt == 0 && mip->mi_promisc_list == NULL); 575 ASSERT(mip->mi_promisc_list == NULL); 576 ASSERT(mcbi->mcbi_lockp == &mip->mi_promisc_lock); 577 mcbi->mcbi_lockp = NULL; 578 579 ASSERT(mip->mi_bcast_ngrps == 0 && mip->mi_bcast_grp == NULL); 580 ASSERT(mip->mi_perim_owner == NULL && mip->mi_perim_ocnt == 0); 581 582 rw_destroy(&mip->mi_rw_lock); 583 584 mutex_destroy(&mip->mi_promisc_lock); 585 cv_destroy(&mip->mi_promisc_cb_info.mcbi_cv); 586 mutex_destroy(&mip->mi_notify_lock); 587 cv_destroy(&mip->mi_notify_cb_info.mcbi_cv); 588 mutex_destroy(&mip->mi_ring_lock); 589 590 ASSERT(mip->mi_bridge_link == NULL); 591 } 592 593 /* ARGSUSED */ 594 static int 595 i_mac_ring_ctor(void *buf, void *arg, int kmflag) 596 { 597 mac_ring_t *ring = (mac_ring_t *)buf; 598 599 bzero(ring, sizeof (mac_ring_t)); 600 cv_init(&ring->mr_cv, NULL, CV_DEFAULT, NULL); 601 mutex_init(&ring->mr_lock, NULL, MUTEX_DEFAULT, NULL); 602 ring->mr_state = MR_FREE; 603 return (0); 604 } 605 606 /* ARGSUSED */ 607 static void 608 i_mac_ring_dtor(void *buf, void *arg) 609 { 610 mac_ring_t *ring = (mac_ring_t *)buf; 611 612 cv_destroy(&ring->mr_cv); 613 mutex_destroy(&ring->mr_lock); 614 } 615 616 /* 617 * Common functions to do mac callback addition and deletion. Currently this is 618 * used by promisc callbacks and notify callbacks. List addition and deletion 619 * need to take care of list walkers. List walkers in general, can't hold list 620 * locks and make upcall callbacks due to potential lock order and recursive 621 * reentry issues. Instead list walkers increment the list walker count to mark 622 * the presence of a walker thread. Addition can be carefully done to ensure 623 * that the list walker always sees either the old list or the new list. 624 * However the deletion can't be done while the walker is active, instead the 625 * deleting thread simply marks the entry as logically deleted. The last walker 626 * physically deletes and frees up the logically deleted entries when the walk 627 * is complete. 628 */ 629 void 630 mac_callback_add(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 631 mac_cb_t *mcb_elem) 632 { 633 mac_cb_t *p; 634 mac_cb_t **pp; 635 636 /* Verify it is not already in the list */ 637 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 638 if (p == mcb_elem) 639 break; 640 } 641 VERIFY(p == NULL); 642 643 /* 644 * Add it to the head of the callback list. The membar ensures that 645 * the following list pointer manipulations reach global visibility 646 * in exactly the program order below. 647 */ 648 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 649 650 mcb_elem->mcb_nextp = *mcb_head; 651 membar_producer(); 652 *mcb_head = mcb_elem; 653 } 654 655 /* 656 * Mark the entry as logically deleted. If there aren't any walkers unlink 657 * from the list. In either case return the corresponding status. 658 */ 659 boolean_t 660 mac_callback_remove(mac_cb_info_t *mcbi, mac_cb_t **mcb_head, 661 mac_cb_t *mcb_elem) 662 { 663 mac_cb_t *p; 664 mac_cb_t **pp; 665 666 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 667 /* 668 * Search the callback list for the entry to be removed 669 */ 670 for (pp = mcb_head; (p = *pp) != NULL; pp = &p->mcb_nextp) { 671 if (p == mcb_elem) 672 break; 673 } 674 VERIFY(p != NULL); 675 676 /* 677 * If there are walkers just mark it as deleted and the last walker 678 * will remove from the list and free it. 679 */ 680 if (mcbi->mcbi_walker_cnt != 0) { 681 p->mcb_flags |= MCB_CONDEMNED; 682 mcbi->mcbi_del_cnt++; 683 return (B_FALSE); 684 } 685 686 ASSERT(mcbi->mcbi_del_cnt == 0); 687 *pp = p->mcb_nextp; 688 p->mcb_nextp = NULL; 689 return (B_TRUE); 690 } 691 692 /* 693 * Wait for all pending callback removals to be completed 694 */ 695 void 696 mac_callback_remove_wait(mac_cb_info_t *mcbi) 697 { 698 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 699 while (mcbi->mcbi_del_cnt != 0) { 700 DTRACE_PROBE1(need_wait, mac_cb_info_t *, mcbi); 701 cv_wait(&mcbi->mcbi_cv, mcbi->mcbi_lockp); 702 } 703 } 704 705 /* 706 * The last mac callback walker does the cleanup. Walk the list and unlik 707 * all the logically deleted entries and construct a temporary list of 708 * removed entries. Return the list of removed entries to the caller. 709 */ 710 mac_cb_t * 711 mac_callback_walker_cleanup(mac_cb_info_t *mcbi, mac_cb_t **mcb_head) 712 { 713 mac_cb_t *p; 714 mac_cb_t **pp; 715 mac_cb_t *rmlist = NULL; /* List of removed elements */ 716 int cnt = 0; 717 718 ASSERT(MUTEX_HELD(mcbi->mcbi_lockp)); 719 ASSERT(mcbi->mcbi_del_cnt != 0 && mcbi->mcbi_walker_cnt == 0); 720 721 pp = mcb_head; 722 while (*pp != NULL) { 723 if ((*pp)->mcb_flags & MCB_CONDEMNED) { 724 p = *pp; 725 *pp = p->mcb_nextp; 726 p->mcb_nextp = rmlist; 727 rmlist = p; 728 cnt++; 729 continue; 730 } 731 pp = &(*pp)->mcb_nextp; 732 } 733 734 ASSERT(mcbi->mcbi_del_cnt == cnt); 735 mcbi->mcbi_del_cnt = 0; 736 return (rmlist); 737 } 738 739 boolean_t 740 mac_callback_lookup(mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 741 { 742 mac_cb_t *mcb; 743 744 /* Verify it is not already in the list */ 745 for (mcb = *mcb_headp; mcb != NULL; mcb = mcb->mcb_nextp) { 746 if (mcb == mcb_elem) 747 return (B_TRUE); 748 } 749 750 return (B_FALSE); 751 } 752 753 boolean_t 754 mac_callback_find(mac_cb_info_t *mcbi, mac_cb_t **mcb_headp, mac_cb_t *mcb_elem) 755 { 756 boolean_t found; 757 758 mutex_enter(mcbi->mcbi_lockp); 759 found = mac_callback_lookup(mcb_headp, mcb_elem); 760 mutex_exit(mcbi->mcbi_lockp); 761 762 return (found); 763 } 764 765 /* Free the list of removed callbacks */ 766 void 767 mac_callback_free(mac_cb_t *rmlist) 768 { 769 mac_cb_t *mcb; 770 mac_cb_t *mcb_next; 771 772 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 773 mcb_next = mcb->mcb_nextp; 774 kmem_free(mcb->mcb_objp, mcb->mcb_objsize); 775 } 776 } 777 778 /* 779 * The promisc callbacks are in 2 lists, one off the 'mip' and another off the 780 * 'mcip' threaded by mpi_mi_link and mpi_mci_link respectively. However there 781 * is only a single shared total walker count, and an entry can't be physically 782 * unlinked if a walker is active on either list. The last walker does this 783 * cleanup of logically deleted entries. 784 */ 785 void 786 i_mac_promisc_walker_cleanup(mac_impl_t *mip) 787 { 788 mac_cb_t *rmlist; 789 mac_cb_t *mcb; 790 mac_cb_t *mcb_next; 791 mac_promisc_impl_t *mpip; 792 793 /* 794 * Construct a temporary list of deleted callbacks by walking the 795 * the mi_promisc_list. Then for each entry in the temporary list, 796 * remove it from the mci_promisc_list and free the entry. 797 */ 798 rmlist = mac_callback_walker_cleanup(&mip->mi_promisc_cb_info, 799 &mip->mi_promisc_list); 800 801 for (mcb = rmlist; mcb != NULL; mcb = mcb_next) { 802 mcb_next = mcb->mcb_nextp; 803 mpip = (mac_promisc_impl_t *)mcb->mcb_objp; 804 VERIFY(mac_callback_remove(&mip->mi_promisc_cb_info, 805 &mpip->mpi_mcip->mci_promisc_list, &mpip->mpi_mci_link)); 806 mcb->mcb_flags = 0; 807 mcb->mcb_nextp = NULL; 808 kmem_cache_free(mac_promisc_impl_cache, mpip); 809 } 810 } 811 812 void 813 i_mac_notify(mac_impl_t *mip, mac_notify_type_t type) 814 { 815 mac_cb_info_t *mcbi; 816 817 /* 818 * Signal the notify thread even after mi_ref has become zero and 819 * mi_disabled is set. The synchronization with the notify thread 820 * happens in mac_unregister and that implies the driver must make 821 * sure it is single-threaded (with respect to mac calls) and that 822 * all pending mac calls have returned before it calls mac_unregister 823 */ 824 rw_enter(&i_mac_impl_lock, RW_READER); 825 if (mip->mi_state_flags & MIS_DISABLED) 826 goto exit; 827 828 /* 829 * Guard against incorrect notifications. (Running a newer 830 * mac client against an older implementation?) 831 */ 832 if (type >= MAC_NNOTE) 833 goto exit; 834 835 mcbi = &mip->mi_notify_cb_info; 836 mutex_enter(mcbi->mcbi_lockp); 837 mip->mi_notify_bits |= (1 << type); 838 cv_broadcast(&mcbi->mcbi_cv); 839 mutex_exit(mcbi->mcbi_lockp); 840 841 exit: 842 rw_exit(&i_mac_impl_lock); 843 } 844 845 /* 846 * Mac serialization primitives. Please see the block comment at the 847 * top of the file. 848 */ 849 void 850 i_mac_perim_enter(mac_impl_t *mip) 851 { 852 mac_client_impl_t *mcip; 853 854 if (mip->mi_state_flags & MIS_IS_VNIC) { 855 /* 856 * This is a VNIC. Return the lower mac since that is what 857 * we want to serialize on. 858 */ 859 mcip = mac_vnic_lower(mip); 860 mip = mcip->mci_mip; 861 } 862 863 mutex_enter(&mip->mi_perim_lock); 864 if (mip->mi_perim_owner == curthread) { 865 mip->mi_perim_ocnt++; 866 mutex_exit(&mip->mi_perim_lock); 867 return; 868 } 869 870 while (mip->mi_perim_owner != NULL) 871 cv_wait(&mip->mi_perim_cv, &mip->mi_perim_lock); 872 873 mip->mi_perim_owner = curthread; 874 ASSERT(mip->mi_perim_ocnt == 0); 875 mip->mi_perim_ocnt++; 876 #ifdef DEBUG 877 mip->mi_perim_stack_depth = getpcstack(mip->mi_perim_stack, 878 MAC_PERIM_STACK_DEPTH); 879 #endif 880 mutex_exit(&mip->mi_perim_lock); 881 } 882 883 int 884 i_mac_perim_enter_nowait(mac_impl_t *mip) 885 { 886 /* 887 * The vnic is a special case, since the serialization is done based 888 * on the lower mac. If the lower mac is busy, it does not imply the 889 * vnic can't be unregistered. But in the case of other drivers, 890 * a busy perimeter or open mac handles implies that the mac is busy 891 * and can't be unregistered. 892 */ 893 if (mip->mi_state_flags & MIS_IS_VNIC) { 894 i_mac_perim_enter(mip); 895 return (0); 896 } 897 898 mutex_enter(&mip->mi_perim_lock); 899 if (mip->mi_perim_owner != NULL) { 900 mutex_exit(&mip->mi_perim_lock); 901 return (EBUSY); 902 } 903 ASSERT(mip->mi_perim_ocnt == 0); 904 mip->mi_perim_owner = curthread; 905 mip->mi_perim_ocnt++; 906 mutex_exit(&mip->mi_perim_lock); 907 908 return (0); 909 } 910 911 void 912 i_mac_perim_exit(mac_impl_t *mip) 913 { 914 mac_client_impl_t *mcip; 915 916 if (mip->mi_state_flags & MIS_IS_VNIC) { 917 /* 918 * This is a VNIC. Return the lower mac since that is what 919 * we want to serialize on. 920 */ 921 mcip = mac_vnic_lower(mip); 922 mip = mcip->mci_mip; 923 } 924 925 ASSERT(mip->mi_perim_owner == curthread && mip->mi_perim_ocnt != 0); 926 927 mutex_enter(&mip->mi_perim_lock); 928 if (--mip->mi_perim_ocnt == 0) { 929 mip->mi_perim_owner = NULL; 930 cv_signal(&mip->mi_perim_cv); 931 } 932 mutex_exit(&mip->mi_perim_lock); 933 } 934 935 /* 936 * Returns whether the current thread holds the mac perimeter. Used in making 937 * assertions. 938 */ 939 boolean_t 940 mac_perim_held(mac_handle_t mh) 941 { 942 mac_impl_t *mip = (mac_impl_t *)mh; 943 mac_client_impl_t *mcip; 944 945 if (mip->mi_state_flags & MIS_IS_VNIC) { 946 /* 947 * This is a VNIC. Return the lower mac since that is what 948 * we want to serialize on. 949 */ 950 mcip = mac_vnic_lower(mip); 951 mip = mcip->mci_mip; 952 } 953 return (mip->mi_perim_owner == curthread); 954 } 955 956 /* 957 * mac client interfaces to enter the mac perimeter of a mac end point, given 958 * its mac handle, or macname or linkid. 959 */ 960 void 961 mac_perim_enter_by_mh(mac_handle_t mh, mac_perim_handle_t *mphp) 962 { 963 mac_impl_t *mip = (mac_impl_t *)mh; 964 965 i_mac_perim_enter(mip); 966 /* 967 * The mac_perim_handle_t returned encodes the 'mip' and whether a 968 * mac_open has been done internally while entering the perimeter. 969 * This information is used in mac_perim_exit 970 */ 971 MAC_ENCODE_MPH(*mphp, mip, 0); 972 } 973 974 int 975 mac_perim_enter_by_macname(const char *name, mac_perim_handle_t *mphp) 976 { 977 int err; 978 mac_handle_t mh; 979 980 if ((err = mac_open(name, &mh)) != 0) 981 return (err); 982 983 mac_perim_enter_by_mh(mh, mphp); 984 MAC_ENCODE_MPH(*mphp, mh, 1); 985 return (0); 986 } 987 988 int 989 mac_perim_enter_by_linkid(datalink_id_t linkid, mac_perim_handle_t *mphp) 990 { 991 int err; 992 mac_handle_t mh; 993 994 if ((err = mac_open_by_linkid(linkid, &mh)) != 0) 995 return (err); 996 997 mac_perim_enter_by_mh(mh, mphp); 998 MAC_ENCODE_MPH(*mphp, mh, 1); 999 return (0); 1000 } 1001 1002 void 1003 mac_perim_exit(mac_perim_handle_t mph) 1004 { 1005 mac_impl_t *mip; 1006 boolean_t need_close; 1007 1008 MAC_DECODE_MPH(mph, mip, need_close); 1009 i_mac_perim_exit(mip); 1010 if (need_close) 1011 mac_close((mac_handle_t)mip); 1012 } 1013 1014 int 1015 mac_hold(const char *macname, mac_impl_t **pmip) 1016 { 1017 mac_impl_t *mip; 1018 int err; 1019 1020 /* 1021 * Check the device name length to make sure it won't overflow our 1022 * buffer. 1023 */ 1024 if (strlen(macname) >= MAXNAMELEN) 1025 return (EINVAL); 1026 1027 /* 1028 * Look up its entry in the global hash table. 1029 */ 1030 rw_enter(&i_mac_impl_lock, RW_WRITER); 1031 err = mod_hash_find(i_mac_impl_hash, (mod_hash_key_t)macname, 1032 (mod_hash_val_t *)&mip); 1033 1034 if (err != 0) { 1035 rw_exit(&i_mac_impl_lock); 1036 return (ENOENT); 1037 } 1038 1039 if (mip->mi_state_flags & MIS_DISABLED) { 1040 rw_exit(&i_mac_impl_lock); 1041 return (ENOENT); 1042 } 1043 1044 if (mip->mi_state_flags & MIS_EXCLUSIVE_HELD) { 1045 rw_exit(&i_mac_impl_lock); 1046 return (EBUSY); 1047 } 1048 1049 mip->mi_ref++; 1050 rw_exit(&i_mac_impl_lock); 1051 1052 *pmip = mip; 1053 return (0); 1054 } 1055 1056 void 1057 mac_rele(mac_impl_t *mip) 1058 { 1059 rw_enter(&i_mac_impl_lock, RW_WRITER); 1060 ASSERT(mip->mi_ref != 0); 1061 if (--mip->mi_ref == 0) { 1062 ASSERT(mip->mi_nactiveclients == 0 && 1063 !(mip->mi_state_flags & MIS_EXCLUSIVE)); 1064 } 1065 rw_exit(&i_mac_impl_lock); 1066 } 1067 1068 /* 1069 * Private GLDv3 function to start a MAC instance. 1070 */ 1071 int 1072 mac_start(mac_handle_t mh) 1073 { 1074 mac_impl_t *mip = (mac_impl_t *)mh; 1075 int err = 0; 1076 mac_group_t *defgrp; 1077 1078 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1079 ASSERT(mip->mi_start != NULL); 1080 1081 /* 1082 * Check whether the device is already started. 1083 */ 1084 if (mip->mi_active++ == 0) { 1085 mac_ring_t *ring = NULL; 1086 1087 /* 1088 * Start the device. 1089 */ 1090 err = mip->mi_start(mip->mi_driver); 1091 if (err != 0) { 1092 mip->mi_active--; 1093 return (err); 1094 } 1095 1096 /* 1097 * Start the default tx ring. 1098 */ 1099 if (mip->mi_default_tx_ring != NULL) { 1100 1101 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1102 if (ring->mr_state != MR_INUSE) { 1103 err = mac_start_ring(ring); 1104 if (err != 0) { 1105 mip->mi_active--; 1106 return (err); 1107 } 1108 } 1109 } 1110 1111 if ((defgrp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1112 /* 1113 * Start the default ring, since it will be needed 1114 * to receive broadcast and multicast traffic for 1115 * both primary and non-primary MAC clients. 1116 */ 1117 ASSERT(defgrp->mrg_state == MAC_GROUP_STATE_REGISTERED); 1118 err = mac_start_group_and_rings(defgrp); 1119 if (err != 0) { 1120 mip->mi_active--; 1121 if ((ring != NULL) && 1122 (ring->mr_state == MR_INUSE)) 1123 mac_stop_ring(ring); 1124 return (err); 1125 } 1126 mac_set_group_state(defgrp, MAC_GROUP_STATE_SHARED); 1127 } 1128 } 1129 1130 return (err); 1131 } 1132 1133 /* 1134 * Private GLDv3 function to stop a MAC instance. 1135 */ 1136 void 1137 mac_stop(mac_handle_t mh) 1138 { 1139 mac_impl_t *mip = (mac_impl_t *)mh; 1140 mac_group_t *grp; 1141 1142 ASSERT(mip->mi_stop != NULL); 1143 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1144 1145 /* 1146 * Check whether the device is still needed. 1147 */ 1148 ASSERT(mip->mi_active != 0); 1149 if (--mip->mi_active == 0) { 1150 if ((grp = MAC_DEFAULT_RX_GROUP(mip)) != NULL) { 1151 /* 1152 * There should be no more active clients since the 1153 * MAC is being stopped. Stop the default RX group 1154 * and transition it back to registered state. 1155 * 1156 * When clients are torn down, the groups 1157 * are release via mac_release_rx_group which 1158 * knows the the default group is always in 1159 * started mode since broadcast uses it. So 1160 * we can assert that their are no clients 1161 * (since mac_bcast_add doesn't register itself 1162 * as a client) and group is in SHARED state. 1163 */ 1164 ASSERT(grp->mrg_state == MAC_GROUP_STATE_SHARED); 1165 ASSERT(MAC_GROUP_NO_CLIENT(grp) && 1166 mip->mi_nactiveclients == 0); 1167 mac_stop_group_and_rings(grp); 1168 mac_set_group_state(grp, MAC_GROUP_STATE_REGISTERED); 1169 } 1170 1171 if (mip->mi_default_tx_ring != NULL) { 1172 mac_ring_t *ring; 1173 1174 ring = (mac_ring_t *)mip->mi_default_tx_ring; 1175 if (ring->mr_state == MR_INUSE) { 1176 mac_stop_ring(ring); 1177 ring->mr_flag = 0; 1178 } 1179 } 1180 1181 /* 1182 * Stop the device. 1183 */ 1184 mip->mi_stop(mip->mi_driver); 1185 } 1186 } 1187 1188 int 1189 i_mac_promisc_set(mac_impl_t *mip, boolean_t on) 1190 { 1191 int err = 0; 1192 1193 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 1194 ASSERT(mip->mi_setpromisc != NULL); 1195 1196 if (on) { 1197 /* 1198 * Enable promiscuous mode on the device if not yet enabled. 1199 */ 1200 if (mip->mi_devpromisc++ == 0) { 1201 err = mip->mi_setpromisc(mip->mi_driver, B_TRUE); 1202 if (err != 0) { 1203 mip->mi_devpromisc--; 1204 return (err); 1205 } 1206 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1207 } 1208 } else { 1209 if (mip->mi_devpromisc == 0) 1210 return (EPROTO); 1211 1212 /* 1213 * Disable promiscuous mode on the device if this is the last 1214 * enabling. 1215 */ 1216 if (--mip->mi_devpromisc == 0) { 1217 err = mip->mi_setpromisc(mip->mi_driver, B_FALSE); 1218 if (err != 0) { 1219 mip->mi_devpromisc++; 1220 return (err); 1221 } 1222 i_mac_notify(mip, MAC_NOTE_DEVPROMISC); 1223 } 1224 } 1225 1226 return (0); 1227 } 1228 1229 /* 1230 * The promiscuity state can change any time. If the caller needs to take 1231 * actions that are atomic with the promiscuity state, then the caller needs 1232 * to bracket the entire sequence with mac_perim_enter/exit 1233 */ 1234 boolean_t 1235 mac_promisc_get(mac_handle_t mh) 1236 { 1237 mac_impl_t *mip = (mac_impl_t *)mh; 1238 1239 /* 1240 * Return the current promiscuity. 1241 */ 1242 return (mip->mi_devpromisc != 0); 1243 } 1244 1245 /* 1246 * Invoked at MAC instance attach time to initialize the list 1247 * of factory MAC addresses supported by a MAC instance. This function 1248 * builds a local cache in the mac_impl_t for the MAC addresses 1249 * supported by the underlying hardware. The MAC clients themselves 1250 * use the mac_addr_factory*() functions to query and reserve 1251 * factory MAC addresses. 1252 */ 1253 void 1254 mac_addr_factory_init(mac_impl_t *mip) 1255 { 1256 mac_capab_multifactaddr_t capab; 1257 uint8_t *addr; 1258 int i; 1259 1260 /* 1261 * First round to see how many factory MAC addresses are available. 1262 */ 1263 bzero(&capab, sizeof (capab)); 1264 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_MULTIFACTADDR, 1265 &capab) || (capab.mcm_naddr == 0)) { 1266 /* 1267 * The MAC instance doesn't support multiple factory 1268 * MAC addresses, we're done here. 1269 */ 1270 return; 1271 } 1272 1273 /* 1274 * Allocate the space and get all the factory addresses. 1275 */ 1276 addr = kmem_alloc(capab.mcm_naddr * MAXMACADDRLEN, KM_SLEEP); 1277 capab.mcm_getaddr(mip->mi_driver, capab.mcm_naddr, addr); 1278 1279 mip->mi_factory_addr_num = capab.mcm_naddr; 1280 mip->mi_factory_addr = kmem_zalloc(mip->mi_factory_addr_num * 1281 sizeof (mac_factory_addr_t), KM_SLEEP); 1282 1283 for (i = 0; i < capab.mcm_naddr; i++) { 1284 bcopy(addr + i * MAXMACADDRLEN, 1285 mip->mi_factory_addr[i].mfa_addr, 1286 mip->mi_type->mt_addr_length); 1287 mip->mi_factory_addr[i].mfa_in_use = B_FALSE; 1288 } 1289 1290 kmem_free(addr, capab.mcm_naddr * MAXMACADDRLEN); 1291 } 1292 1293 void 1294 mac_addr_factory_fini(mac_impl_t *mip) 1295 { 1296 if (mip->mi_factory_addr == NULL) { 1297 ASSERT(mip->mi_factory_addr_num == 0); 1298 return; 1299 } 1300 1301 kmem_free(mip->mi_factory_addr, mip->mi_factory_addr_num * 1302 sizeof (mac_factory_addr_t)); 1303 1304 mip->mi_factory_addr = NULL; 1305 mip->mi_factory_addr_num = 0; 1306 } 1307 1308 /* 1309 * Reserve a factory MAC address. If *slot is set to -1, the function 1310 * attempts to reserve any of the available factory MAC addresses and 1311 * returns the reserved slot id. If no slots are available, the function 1312 * returns ENOSPC. If *slot is not set to -1, the function reserves 1313 * the specified slot if it is available, or returns EBUSY is the slot 1314 * is already used. Returns ENOTSUP if the underlying MAC does not 1315 * support multiple factory addresses. If the slot number is not -1 but 1316 * is invalid, returns EINVAL. 1317 */ 1318 int 1319 mac_addr_factory_reserve(mac_client_handle_t mch, int *slot) 1320 { 1321 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1322 mac_impl_t *mip = mcip->mci_mip; 1323 int i, ret = 0; 1324 1325 i_mac_perim_enter(mip); 1326 /* 1327 * Protect against concurrent readers that may need a self-consistent 1328 * view of the factory addresses 1329 */ 1330 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1331 1332 if (mip->mi_factory_addr_num == 0) { 1333 ret = ENOTSUP; 1334 goto bail; 1335 } 1336 1337 if (*slot != -1) { 1338 /* check the specified slot */ 1339 if (*slot < 1 || *slot > mip->mi_factory_addr_num) { 1340 ret = EINVAL; 1341 goto bail; 1342 } 1343 if (mip->mi_factory_addr[*slot-1].mfa_in_use) { 1344 ret = EBUSY; 1345 goto bail; 1346 } 1347 } else { 1348 /* pick the next available slot */ 1349 for (i = 0; i < mip->mi_factory_addr_num; i++) { 1350 if (!mip->mi_factory_addr[i].mfa_in_use) 1351 break; 1352 } 1353 1354 if (i == mip->mi_factory_addr_num) { 1355 ret = ENOSPC; 1356 goto bail; 1357 } 1358 *slot = i+1; 1359 } 1360 1361 mip->mi_factory_addr[*slot-1].mfa_in_use = B_TRUE; 1362 mip->mi_factory_addr[*slot-1].mfa_client = mcip; 1363 1364 bail: 1365 rw_exit(&mip->mi_rw_lock); 1366 i_mac_perim_exit(mip); 1367 return (ret); 1368 } 1369 1370 /* 1371 * Release the specified factory MAC address slot. 1372 */ 1373 void 1374 mac_addr_factory_release(mac_client_handle_t mch, uint_t slot) 1375 { 1376 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1377 mac_impl_t *mip = mcip->mci_mip; 1378 1379 i_mac_perim_enter(mip); 1380 /* 1381 * Protect against concurrent readers that may need a self-consistent 1382 * view of the factory addresses 1383 */ 1384 rw_enter(&mip->mi_rw_lock, RW_WRITER); 1385 1386 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1387 ASSERT(mip->mi_factory_addr[slot-1].mfa_in_use); 1388 1389 mip->mi_factory_addr[slot-1].mfa_in_use = B_FALSE; 1390 1391 rw_exit(&mip->mi_rw_lock); 1392 i_mac_perim_exit(mip); 1393 } 1394 1395 /* 1396 * Stores in mac_addr the value of the specified MAC address. Returns 1397 * 0 on success, or EINVAL if the slot number is not valid for the MAC. 1398 * The caller must provide a string of at least MAXNAMELEN bytes. 1399 */ 1400 void 1401 mac_addr_factory_value(mac_handle_t mh, int slot, uchar_t *mac_addr, 1402 uint_t *addr_len, char *client_name, boolean_t *in_use_arg) 1403 { 1404 mac_impl_t *mip = (mac_impl_t *)mh; 1405 boolean_t in_use; 1406 1407 ASSERT(slot > 0 && slot <= mip->mi_factory_addr_num); 1408 1409 /* 1410 * Readers need to hold mi_rw_lock. Writers need to hold mac perimeter 1411 * and mi_rw_lock 1412 */ 1413 rw_enter(&mip->mi_rw_lock, RW_READER); 1414 bcopy(mip->mi_factory_addr[slot-1].mfa_addr, mac_addr, MAXMACADDRLEN); 1415 *addr_len = mip->mi_type->mt_addr_length; 1416 in_use = mip->mi_factory_addr[slot-1].mfa_in_use; 1417 if (in_use && client_name != NULL) { 1418 bcopy(mip->mi_factory_addr[slot-1].mfa_client->mci_name, 1419 client_name, MAXNAMELEN); 1420 } 1421 if (in_use_arg != NULL) 1422 *in_use_arg = in_use; 1423 rw_exit(&mip->mi_rw_lock); 1424 } 1425 1426 /* 1427 * Returns the number of factory MAC addresses (in addition to the 1428 * primary MAC address), 0 if the underlying MAC doesn't support 1429 * that feature. 1430 */ 1431 uint_t 1432 mac_addr_factory_num(mac_handle_t mh) 1433 { 1434 mac_impl_t *mip = (mac_impl_t *)mh; 1435 1436 return (mip->mi_factory_addr_num); 1437 } 1438 1439 1440 void 1441 mac_rx_group_unmark(mac_group_t *grp, uint_t flag) 1442 { 1443 mac_ring_t *ring; 1444 1445 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next) 1446 ring->mr_flag &= ~flag; 1447 } 1448 1449 /* 1450 * The following mac_hwrings_xxx() functions are private mac client functions 1451 * used by the aggr driver to access and control the underlying HW Rx group 1452 * and rings. In this case, the aggr driver has exclusive control of the 1453 * underlying HW Rx group/rings, it calls the following functions to 1454 * start/stop the HW Rx rings, disable/enable polling, add/remove mac' 1455 * addresses, or set up the Rx callback. 1456 */ 1457 /* ARGSUSED */ 1458 static void 1459 mac_hwrings_rx_process(void *arg, mac_resource_handle_t srs, 1460 mblk_t *mp_chain, boolean_t loopback) 1461 { 1462 mac_soft_ring_set_t *mac_srs = (mac_soft_ring_set_t *)srs; 1463 mac_srs_rx_t *srs_rx = &mac_srs->srs_rx; 1464 mac_direct_rx_t proc; 1465 void *arg1; 1466 mac_resource_handle_t arg2; 1467 1468 proc = srs_rx->sr_func; 1469 arg1 = srs_rx->sr_arg1; 1470 arg2 = mac_srs->srs_mrh; 1471 1472 proc(arg1, arg2, mp_chain, NULL); 1473 } 1474 1475 /* 1476 * This function is called to get the list of HW rings that are reserved by 1477 * an exclusive mac client. 1478 * 1479 * Return value: the number of HW rings. 1480 */ 1481 int 1482 mac_hwrings_get(mac_client_handle_t mch, mac_group_handle_t *hwgh, 1483 mac_ring_handle_t *hwrh, mac_ring_type_t rtype) 1484 { 1485 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1486 flow_entry_t *flent = mcip->mci_flent; 1487 mac_group_t *grp; 1488 mac_ring_t *ring; 1489 int cnt = 0; 1490 1491 if (rtype == MAC_RING_TYPE_RX) { 1492 grp = flent->fe_rx_ring_group; 1493 } else if (rtype == MAC_RING_TYPE_TX) { 1494 grp = flent->fe_tx_ring_group; 1495 } else { 1496 ASSERT(B_FALSE); 1497 return (-1); 1498 } 1499 /* 1500 * The mac client did not reserve any RX group, return directly. 1501 * This is probably because the underlying MAC does not support 1502 * any groups. 1503 */ 1504 if (hwgh != NULL) 1505 *hwgh = NULL; 1506 if (grp == NULL) 1507 return (0); 1508 /* 1509 * This group must be reserved by this mac client. 1510 */ 1511 ASSERT((grp->mrg_state == MAC_GROUP_STATE_RESERVED) && 1512 (mcip == MAC_GROUP_ONLY_CLIENT(grp))); 1513 1514 for (ring = grp->mrg_rings; ring != NULL; ring = ring->mr_next, cnt++) { 1515 ASSERT(cnt < MAX_RINGS_PER_GROUP); 1516 hwrh[cnt] = (mac_ring_handle_t)ring; 1517 } 1518 if (hwgh != NULL) 1519 *hwgh = (mac_group_handle_t)grp; 1520 1521 return (cnt); 1522 } 1523 1524 /* 1525 * This function is called to get info about Tx/Rx rings. 1526 * 1527 * Return value: returns uint_t which will have various bits set 1528 * that indicates different properties of the ring. 1529 */ 1530 uint_t 1531 mac_hwring_getinfo(mac_ring_handle_t rh) 1532 { 1533 mac_ring_t *ring = (mac_ring_t *)rh; 1534 mac_ring_info_t *info = &ring->mr_info; 1535 1536 return (info->mri_flags); 1537 } 1538 1539 /* 1540 * Export ddi interrupt handles from the HW ring to the pseudo ring and 1541 * setup the RX callback of the mac client which exclusively controls 1542 * HW ring. 1543 */ 1544 void 1545 mac_hwring_setup(mac_ring_handle_t hwrh, mac_resource_handle_t prh, 1546 mac_ring_handle_t pseudo_rh) 1547 { 1548 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1549 mac_ring_t *pseudo_ring; 1550 mac_soft_ring_set_t *mac_srs = hw_ring->mr_srs; 1551 1552 if (pseudo_rh != NULL) { 1553 pseudo_ring = (mac_ring_t *)pseudo_rh; 1554 /* Export the ddi handles to pseudo ring */ 1555 pseudo_ring->mr_info.mri_intr.mi_ddi_handle = 1556 hw_ring->mr_info.mri_intr.mi_ddi_handle; 1557 pseudo_ring->mr_info.mri_intr.mi_ddi_shared = 1558 hw_ring->mr_info.mri_intr.mi_ddi_shared; 1559 /* 1560 * Save a pointer to pseudo ring in the hw ring. If 1561 * interrupt handle changes, the hw ring will be 1562 * notified of the change (see mac_ring_intr_set()) 1563 * and the appropriate change has to be made to 1564 * the pseudo ring that has exported the ddi handle. 1565 */ 1566 hw_ring->mr_prh = pseudo_rh; 1567 } 1568 1569 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1570 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1571 mac_srs->srs_mrh = prh; 1572 mac_srs->srs_rx.sr_lower_proc = mac_hwrings_rx_process; 1573 } 1574 } 1575 1576 void 1577 mac_hwring_teardown(mac_ring_handle_t hwrh) 1578 { 1579 mac_ring_t *hw_ring = (mac_ring_t *)hwrh; 1580 mac_soft_ring_set_t *mac_srs; 1581 1582 if (hw_ring == NULL) 1583 return; 1584 hw_ring->mr_prh = NULL; 1585 if (hw_ring->mr_type == MAC_RING_TYPE_RX) { 1586 mac_srs = hw_ring->mr_srs; 1587 ASSERT(!(mac_srs->srs_type & SRST_TX)); 1588 mac_srs->srs_rx.sr_lower_proc = mac_rx_srs_process; 1589 mac_srs->srs_mrh = NULL; 1590 } 1591 } 1592 1593 int 1594 mac_hwring_disable_intr(mac_ring_handle_t rh) 1595 { 1596 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1597 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1598 1599 return (intr->mi_disable(intr->mi_handle)); 1600 } 1601 1602 int 1603 mac_hwring_enable_intr(mac_ring_handle_t rh) 1604 { 1605 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1606 mac_intr_t *intr = &rr_ring->mr_info.mri_intr; 1607 1608 return (intr->mi_enable(intr->mi_handle)); 1609 } 1610 1611 int 1612 mac_hwring_start(mac_ring_handle_t rh) 1613 { 1614 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1615 1616 MAC_RING_UNMARK(rr_ring, MR_QUIESCE); 1617 return (0); 1618 } 1619 1620 void 1621 mac_hwring_stop(mac_ring_handle_t rh) 1622 { 1623 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1624 1625 mac_rx_ring_quiesce(rr_ring, MR_QUIESCE); 1626 } 1627 1628 mblk_t * 1629 mac_hwring_poll(mac_ring_handle_t rh, int bytes_to_pickup) 1630 { 1631 mac_ring_t *rr_ring = (mac_ring_t *)rh; 1632 mac_ring_info_t *info = &rr_ring->mr_info; 1633 1634 return (info->mri_poll(info->mri_driver, bytes_to_pickup)); 1635 } 1636 1637 /* 1638 * Send packets through a selected tx ring. 1639 */ 1640 mblk_t * 1641 mac_hwring_tx(mac_ring_handle_t rh, mblk_t *mp) 1642 { 1643 mac_ring_t *ring = (mac_ring_t *)rh; 1644 mac_ring_info_t *info = &ring->mr_info; 1645 1646 ASSERT(ring->mr_type == MAC_RING_TYPE_TX && 1647 ring->mr_state >= MR_INUSE); 1648 return (info->mri_tx(info->mri_driver, mp)); 1649 } 1650 1651 /* 1652 * Query stats for a particular rx/tx ring 1653 */ 1654 int 1655 mac_hwring_getstat(mac_ring_handle_t rh, uint_t stat, uint64_t *val) 1656 { 1657 mac_ring_t *ring = (mac_ring_t *)rh; 1658 mac_ring_info_t *info = &ring->mr_info; 1659 1660 return (info->mri_stat(info->mri_driver, stat, val)); 1661 } 1662 1663 /* 1664 * Private function that is only used by aggr to send packets through 1665 * a port/Tx ring. Since aggr exposes a pseudo Tx ring even for ports 1666 * that does not expose Tx rings, aggr_ring_tx() entry point needs 1667 * access to mac_impl_t to send packets through m_tx() entry point. 1668 * It accomplishes this by calling mac_hwring_send_priv() function. 1669 */ 1670 mblk_t * 1671 mac_hwring_send_priv(mac_client_handle_t mch, mac_ring_handle_t rh, mblk_t *mp) 1672 { 1673 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 1674 mac_impl_t *mip = mcip->mci_mip; 1675 1676 MAC_TX(mip, rh, mp, mcip); 1677 return (mp); 1678 } 1679 1680 int 1681 mac_hwgroup_addmac(mac_group_handle_t gh, const uint8_t *addr) 1682 { 1683 mac_group_t *group = (mac_group_t *)gh; 1684 1685 return (mac_group_addmac(group, addr)); 1686 } 1687 1688 int 1689 mac_hwgroup_remmac(mac_group_handle_t gh, const uint8_t *addr) 1690 { 1691 mac_group_t *group = (mac_group_t *)gh; 1692 1693 return (mac_group_remmac(group, addr)); 1694 } 1695 1696 /* 1697 * Set the RX group to be shared/reserved. Note that the group must be 1698 * started/stopped outside of this function. 1699 */ 1700 void 1701 mac_set_group_state(mac_group_t *grp, mac_group_state_t state) 1702 { 1703 /* 1704 * If there is no change in the group state, just return. 1705 */ 1706 if (grp->mrg_state == state) 1707 return; 1708 1709 switch (state) { 1710 case MAC_GROUP_STATE_RESERVED: 1711 /* 1712 * Successfully reserved the group. 1713 * 1714 * Given that there is an exclusive client controlling this 1715 * group, we enable the group level polling when available, 1716 * so that SRSs get to turn on/off individual rings they's 1717 * assigned to. 1718 */ 1719 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 1720 1721 if (grp->mrg_type == MAC_RING_TYPE_RX && 1722 GROUP_INTR_DISABLE_FUNC(grp) != NULL) { 1723 GROUP_INTR_DISABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 1724 } 1725 break; 1726 1727 case MAC_GROUP_STATE_SHARED: 1728 /* 1729 * Set all rings of this group to software classified. 1730 * If the group has an overriding interrupt, then re-enable it. 1731 */ 1732 ASSERT(MAC_PERIM_HELD(grp->mrg_mh)); 1733 1734 if (grp->mrg_type == MAC_RING_TYPE_RX && 1735 GROUP_INTR_ENABLE_FUNC(grp) != NULL) { 1736 GROUP_INTR_ENABLE_FUNC(grp)(GROUP_INTR_HANDLE(grp)); 1737 } 1738 /* The ring is not available for reservations any more */ 1739 break; 1740 1741 case MAC_GROUP_STATE_REGISTERED: 1742 /* Also callable from mac_register, perim is not held */ 1743 break; 1744 1745 default: 1746 ASSERT(B_FALSE); 1747 break; 1748 } 1749 1750 grp->mrg_state = state; 1751 } 1752 1753 /* 1754 * Quiesce future hardware classified packets for the specified Rx ring 1755 */ 1756 static void 1757 mac_rx_ring_quiesce(mac_ring_t *rx_ring, uint_t ring_flag) 1758 { 1759 ASSERT(rx_ring->mr_classify_type == MAC_HW_CLASSIFIER); 1760 ASSERT(ring_flag == MR_CONDEMNED || ring_flag == MR_QUIESCE); 1761 1762 mutex_enter(&rx_ring->mr_lock); 1763 rx_ring->mr_flag |= ring_flag; 1764 while (rx_ring->mr_refcnt != 0) 1765 cv_wait(&rx_ring->mr_cv, &rx_ring->mr_lock); 1766 mutex_exit(&rx_ring->mr_lock); 1767 } 1768 1769 /* 1770 * Please see mac_tx for details about the per cpu locking scheme 1771 */ 1772 static void 1773 mac_tx_lock_all(mac_client_impl_t *mcip) 1774 { 1775 int i; 1776 1777 for (i = 0; i <= mac_tx_percpu_cnt; i++) 1778 mutex_enter(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1779 } 1780 1781 static void 1782 mac_tx_unlock_all(mac_client_impl_t *mcip) 1783 { 1784 int i; 1785 1786 for (i = mac_tx_percpu_cnt; i >= 0; i--) 1787 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1788 } 1789 1790 static void 1791 mac_tx_unlock_allbutzero(mac_client_impl_t *mcip) 1792 { 1793 int i; 1794 1795 for (i = mac_tx_percpu_cnt; i > 0; i--) 1796 mutex_exit(&mcip->mci_tx_pcpu[i].pcpu_tx_lock); 1797 } 1798 1799 static int 1800 mac_tx_sum_refcnt(mac_client_impl_t *mcip) 1801 { 1802 int i; 1803 int refcnt = 0; 1804 1805 for (i = 0; i <= mac_tx_percpu_cnt; i++) 1806 refcnt += mcip->mci_tx_pcpu[i].pcpu_tx_refcnt; 1807 1808 return (refcnt); 1809 } 1810 1811 /* 1812 * Stop future Tx packets coming down from the client in preparation for 1813 * quiescing the Tx side. This is needed for dynamic reclaim and reassignment 1814 * of rings between clients 1815 */ 1816 void 1817 mac_tx_client_block(mac_client_impl_t *mcip) 1818 { 1819 mac_tx_lock_all(mcip); 1820 mcip->mci_tx_flag |= MCI_TX_QUIESCE; 1821 while (mac_tx_sum_refcnt(mcip) != 0) { 1822 mac_tx_unlock_allbutzero(mcip); 1823 cv_wait(&mcip->mci_tx_cv, &mcip->mci_tx_pcpu[0].pcpu_tx_lock); 1824 mutex_exit(&mcip->mci_tx_pcpu[0].pcpu_tx_lock); 1825 mac_tx_lock_all(mcip); 1826 } 1827 mac_tx_unlock_all(mcip); 1828 } 1829 1830 void 1831 mac_tx_client_unblock(mac_client_impl_t *mcip) 1832 { 1833 mac_tx_lock_all(mcip); 1834 mcip->mci_tx_flag &= ~MCI_TX_QUIESCE; 1835 mac_tx_unlock_all(mcip); 1836 /* 1837 * We may fail to disable flow control for the last MAC_NOTE_TX 1838 * notification because the MAC client is quiesced. Send the 1839 * notification again. 1840 */ 1841 i_mac_notify(mcip->mci_mip, MAC_NOTE_TX); 1842 } 1843 1844 /* 1845 * Wait for an SRS to quiesce. The SRS worker will signal us when the 1846 * quiesce is done. 1847 */ 1848 static void 1849 mac_srs_quiesce_wait(mac_soft_ring_set_t *srs, uint_t srs_flag) 1850 { 1851 mutex_enter(&srs->srs_lock); 1852 while (!(srs->srs_state & srs_flag)) 1853 cv_wait(&srs->srs_quiesce_done_cv, &srs->srs_lock); 1854 mutex_exit(&srs->srs_lock); 1855 } 1856 1857 /* 1858 * Quiescing an Rx SRS is achieved by the following sequence. The protocol 1859 * works bottom up by cutting off packet flow from the bottommost point in the 1860 * mac, then the SRS, and then the soft rings. There are 2 use cases of this 1861 * mechanism. One is a temporary quiesce of the SRS, such as say while changing 1862 * the Rx callbacks. Another use case is Rx SRS teardown. In the former case 1863 * the QUIESCE prefix/suffix is used and in the latter the CONDEMNED is used 1864 * for the SRS and MR flags. In the former case the threads pause waiting for 1865 * a restart, while in the latter case the threads exit. The Tx SRS teardown 1866 * is also mostly similar to the above. 1867 * 1868 * 1. Stop future hardware classified packets at the lowest level in the mac. 1869 * Remove any hardware classification rule (CONDEMNED case) and mark the 1870 * rings as CONDEMNED or QUIESCE as appropriate. This prevents the mr_refcnt 1871 * from increasing. Upcalls from the driver that come through hardware 1872 * classification will be dropped in mac_rx from now on. Then we wait for 1873 * the mr_refcnt to drop to zero. When the mr_refcnt reaches zero we are 1874 * sure there aren't any upcall threads from the driver through hardware 1875 * classification. In the case of SRS teardown we also remove the 1876 * classification rule in the driver. 1877 * 1878 * 2. Stop future software classified packets by marking the flow entry with 1879 * FE_QUIESCE or FE_CONDEMNED as appropriate which prevents the refcnt from 1880 * increasing. We also remove the flow entry from the table in the latter 1881 * case. Then wait for the fe_refcnt to reach an appropriate quiescent value 1882 * that indicates there aren't any active threads using that flow entry. 1883 * 1884 * 3. Quiesce the SRS and softrings by signaling the SRS. The SRS poll thread, 1885 * SRS worker thread, and the soft ring threads are quiesced in sequence 1886 * with the SRS worker thread serving as a master controller. This 1887 * mechansim is explained in mac_srs_worker_quiesce(). 1888 * 1889 * The restart mechanism to reactivate the SRS and softrings is explained 1890 * in mac_srs_worker_restart(). Here we just signal the SRS worker to start the 1891 * restart sequence. 1892 */ 1893 void 1894 mac_rx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 1895 { 1896 flow_entry_t *flent = srs->srs_flent; 1897 uint_t mr_flag, srs_done_flag; 1898 1899 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 1900 ASSERT(!(srs->srs_type & SRST_TX)); 1901 1902 if (srs_quiesce_flag == SRS_CONDEMNED) { 1903 mr_flag = MR_CONDEMNED; 1904 srs_done_flag = SRS_CONDEMNED_DONE; 1905 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 1906 mac_srs_client_poll_disable(srs->srs_mcip, srs); 1907 } else { 1908 ASSERT(srs_quiesce_flag == SRS_QUIESCE); 1909 mr_flag = MR_QUIESCE; 1910 srs_done_flag = SRS_QUIESCE_DONE; 1911 if (srs->srs_type & SRST_CLIENT_POLL_ENABLED) 1912 mac_srs_client_poll_quiesce(srs->srs_mcip, srs); 1913 } 1914 1915 if (srs->srs_ring != NULL) { 1916 mac_rx_ring_quiesce(srs->srs_ring, mr_flag); 1917 } else { 1918 /* 1919 * SRS is driven by software classification. In case 1920 * of CONDEMNED, the top level teardown functions will 1921 * deal with flow removal. 1922 */ 1923 if (srs_quiesce_flag != SRS_CONDEMNED) { 1924 FLOW_MARK(flent, FE_QUIESCE); 1925 mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 1926 } 1927 } 1928 1929 /* 1930 * Signal the SRS to quiesce itself, and then cv_wait for the 1931 * SRS quiesce to complete. The SRS worker thread will wake us 1932 * up when the quiesce is complete 1933 */ 1934 mac_srs_signal(srs, srs_quiesce_flag); 1935 mac_srs_quiesce_wait(srs, srs_done_flag); 1936 } 1937 1938 /* 1939 * Remove an SRS. 1940 */ 1941 void 1942 mac_rx_srs_remove(mac_soft_ring_set_t *srs) 1943 { 1944 flow_entry_t *flent = srs->srs_flent; 1945 int i; 1946 1947 mac_rx_srs_quiesce(srs, SRS_CONDEMNED); 1948 /* 1949 * Locate and remove our entry in the fe_rx_srs[] array, and 1950 * adjust the fe_rx_srs array entries and array count by 1951 * moving the last entry into the vacated spot. 1952 */ 1953 mutex_enter(&flent->fe_lock); 1954 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 1955 if (flent->fe_rx_srs[i] == srs) 1956 break; 1957 } 1958 1959 ASSERT(i != 0 && i < flent->fe_rx_srs_cnt); 1960 if (i != flent->fe_rx_srs_cnt - 1) { 1961 flent->fe_rx_srs[i] = 1962 flent->fe_rx_srs[flent->fe_rx_srs_cnt - 1]; 1963 i = flent->fe_rx_srs_cnt - 1; 1964 } 1965 1966 flent->fe_rx_srs[i] = NULL; 1967 flent->fe_rx_srs_cnt--; 1968 mutex_exit(&flent->fe_lock); 1969 1970 mac_srs_free(srs); 1971 } 1972 1973 static void 1974 mac_srs_clear_flag(mac_soft_ring_set_t *srs, uint_t flag) 1975 { 1976 mutex_enter(&srs->srs_lock); 1977 srs->srs_state &= ~flag; 1978 mutex_exit(&srs->srs_lock); 1979 } 1980 1981 void 1982 mac_rx_srs_restart(mac_soft_ring_set_t *srs) 1983 { 1984 flow_entry_t *flent = srs->srs_flent; 1985 mac_ring_t *mr; 1986 1987 ASSERT(MAC_PERIM_HELD((mac_handle_t)FLENT_TO_MIP(flent))); 1988 ASSERT((srs->srs_type & SRST_TX) == 0); 1989 1990 /* 1991 * This handles a change in the number of SRSs between the quiesce and 1992 * and restart operation of a flow. 1993 */ 1994 if (!SRS_QUIESCED(srs)) 1995 return; 1996 1997 /* 1998 * Signal the SRS to restart itself. Wait for the restart to complete 1999 * Note that we only restart the SRS if it is not marked as 2000 * permanently quiesced. 2001 */ 2002 if (!SRS_QUIESCED_PERMANENT(srs)) { 2003 mac_srs_signal(srs, SRS_RESTART); 2004 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2005 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2006 2007 mac_srs_client_poll_restart(srs->srs_mcip, srs); 2008 } 2009 2010 /* Finally clear the flags to let the packets in */ 2011 mr = srs->srs_ring; 2012 if (mr != NULL) { 2013 MAC_RING_UNMARK(mr, MR_QUIESCE); 2014 /* In case the ring was stopped, safely restart it */ 2015 if (mr->mr_state != MR_INUSE) 2016 (void) mac_start_ring(mr); 2017 } else { 2018 FLOW_UNMARK(flent, FE_QUIESCE); 2019 } 2020 } 2021 2022 /* 2023 * Temporary quiesce of a flow and associated Rx SRS. 2024 * Please see block comment above mac_rx_classify_flow_rem. 2025 */ 2026 /* ARGSUSED */ 2027 int 2028 mac_rx_classify_flow_quiesce(flow_entry_t *flent, void *arg) 2029 { 2030 int i; 2031 2032 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2033 mac_rx_srs_quiesce((mac_soft_ring_set_t *)flent->fe_rx_srs[i], 2034 SRS_QUIESCE); 2035 } 2036 return (0); 2037 } 2038 2039 /* 2040 * Restart a flow and associated Rx SRS that has been quiesced temporarily 2041 * Please see block comment above mac_rx_classify_flow_rem 2042 */ 2043 /* ARGSUSED */ 2044 int 2045 mac_rx_classify_flow_restart(flow_entry_t *flent, void *arg) 2046 { 2047 int i; 2048 2049 for (i = 0; i < flent->fe_rx_srs_cnt; i++) 2050 mac_rx_srs_restart((mac_soft_ring_set_t *)flent->fe_rx_srs[i]); 2051 2052 return (0); 2053 } 2054 2055 void 2056 mac_srs_perm_quiesce(mac_client_handle_t mch, boolean_t on) 2057 { 2058 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2059 flow_entry_t *flent = mcip->mci_flent; 2060 mac_impl_t *mip = mcip->mci_mip; 2061 mac_soft_ring_set_t *mac_srs; 2062 int i; 2063 2064 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2065 2066 if (flent == NULL) 2067 return; 2068 2069 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 2070 mac_srs = flent->fe_rx_srs[i]; 2071 mutex_enter(&mac_srs->srs_lock); 2072 if (on) 2073 mac_srs->srs_state |= SRS_QUIESCE_PERM; 2074 else 2075 mac_srs->srs_state &= ~SRS_QUIESCE_PERM; 2076 mutex_exit(&mac_srs->srs_lock); 2077 } 2078 } 2079 2080 void 2081 mac_rx_client_quiesce(mac_client_handle_t mch) 2082 { 2083 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2084 mac_impl_t *mip = mcip->mci_mip; 2085 2086 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2087 2088 if (MCIP_DATAPATH_SETUP(mcip)) { 2089 (void) mac_rx_classify_flow_quiesce(mcip->mci_flent, 2090 NULL); 2091 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2092 mac_rx_classify_flow_quiesce, NULL); 2093 } 2094 } 2095 2096 void 2097 mac_rx_client_restart(mac_client_handle_t mch) 2098 { 2099 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2100 mac_impl_t *mip = mcip->mci_mip; 2101 2102 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 2103 2104 if (MCIP_DATAPATH_SETUP(mcip)) { 2105 (void) mac_rx_classify_flow_restart(mcip->mci_flent, NULL); 2106 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2107 mac_rx_classify_flow_restart, NULL); 2108 } 2109 } 2110 2111 /* 2112 * This function only quiesces the Tx SRS and softring worker threads. Callers 2113 * need to make sure that there aren't any mac client threads doing current or 2114 * future transmits in the mac before calling this function. 2115 */ 2116 void 2117 mac_tx_srs_quiesce(mac_soft_ring_set_t *srs, uint_t srs_quiesce_flag) 2118 { 2119 mac_client_impl_t *mcip = srs->srs_mcip; 2120 2121 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2122 2123 ASSERT(srs->srs_type & SRST_TX); 2124 ASSERT(srs_quiesce_flag == SRS_CONDEMNED || 2125 srs_quiesce_flag == SRS_QUIESCE); 2126 2127 /* 2128 * Signal the SRS to quiesce itself, and then cv_wait for the 2129 * SRS quiesce to complete. The SRS worker thread will wake us 2130 * up when the quiesce is complete 2131 */ 2132 mac_srs_signal(srs, srs_quiesce_flag); 2133 mac_srs_quiesce_wait(srs, srs_quiesce_flag == SRS_QUIESCE ? 2134 SRS_QUIESCE_DONE : SRS_CONDEMNED_DONE); 2135 } 2136 2137 void 2138 mac_tx_srs_restart(mac_soft_ring_set_t *srs) 2139 { 2140 /* 2141 * Resizing the fanout could result in creation of new SRSs. 2142 * They may not necessarily be in the quiesced state in which 2143 * case it need be restarted 2144 */ 2145 if (!SRS_QUIESCED(srs)) 2146 return; 2147 2148 mac_srs_signal(srs, SRS_RESTART); 2149 mac_srs_quiesce_wait(srs, SRS_RESTART_DONE); 2150 mac_srs_clear_flag(srs, SRS_RESTART_DONE); 2151 } 2152 2153 /* 2154 * Temporary quiesce of a flow and associated Rx SRS. 2155 * Please see block comment above mac_rx_srs_quiesce 2156 */ 2157 /* ARGSUSED */ 2158 int 2159 mac_tx_flow_quiesce(flow_entry_t *flent, void *arg) 2160 { 2161 /* 2162 * The fe_tx_srs is null for a subflow on an interface that is 2163 * not plumbed 2164 */ 2165 if (flent->fe_tx_srs != NULL) 2166 mac_tx_srs_quiesce(flent->fe_tx_srs, SRS_QUIESCE); 2167 return (0); 2168 } 2169 2170 /* ARGSUSED */ 2171 int 2172 mac_tx_flow_restart(flow_entry_t *flent, void *arg) 2173 { 2174 /* 2175 * The fe_tx_srs is null for a subflow on an interface that is 2176 * not plumbed 2177 */ 2178 if (flent->fe_tx_srs != NULL) 2179 mac_tx_srs_restart(flent->fe_tx_srs); 2180 return (0); 2181 } 2182 2183 static void 2184 i_mac_tx_client_quiesce(mac_client_handle_t mch, uint_t srs_quiesce_flag) 2185 { 2186 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2187 2188 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2189 2190 mac_tx_client_block(mcip); 2191 if (MCIP_TX_SRS(mcip) != NULL) { 2192 mac_tx_srs_quiesce(MCIP_TX_SRS(mcip), srs_quiesce_flag); 2193 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2194 mac_tx_flow_quiesce, NULL); 2195 } 2196 } 2197 2198 void 2199 mac_tx_client_quiesce(mac_client_handle_t mch) 2200 { 2201 i_mac_tx_client_quiesce(mch, SRS_QUIESCE); 2202 } 2203 2204 void 2205 mac_tx_client_condemn(mac_client_handle_t mch) 2206 { 2207 i_mac_tx_client_quiesce(mch, SRS_CONDEMNED); 2208 } 2209 2210 void 2211 mac_tx_client_restart(mac_client_handle_t mch) 2212 { 2213 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 2214 2215 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2216 2217 mac_tx_client_unblock(mcip); 2218 if (MCIP_TX_SRS(mcip) != NULL) { 2219 mac_tx_srs_restart(MCIP_TX_SRS(mcip)); 2220 (void) mac_flow_walk_nolock(mcip->mci_subflow_tab, 2221 mac_tx_flow_restart, NULL); 2222 } 2223 } 2224 2225 void 2226 mac_tx_client_flush(mac_client_impl_t *mcip) 2227 { 2228 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 2229 2230 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2231 mac_tx_client_restart((mac_client_handle_t)mcip); 2232 } 2233 2234 void 2235 mac_client_quiesce(mac_client_impl_t *mcip) 2236 { 2237 mac_rx_client_quiesce((mac_client_handle_t)mcip); 2238 mac_tx_client_quiesce((mac_client_handle_t)mcip); 2239 } 2240 2241 void 2242 mac_client_restart(mac_client_impl_t *mcip) 2243 { 2244 mac_rx_client_restart((mac_client_handle_t)mcip); 2245 mac_tx_client_restart((mac_client_handle_t)mcip); 2246 } 2247 2248 /* 2249 * Allocate a minor number. 2250 */ 2251 minor_t 2252 mac_minor_hold(boolean_t sleep) 2253 { 2254 minor_t minor; 2255 2256 /* 2257 * Grab a value from the arena. 2258 */ 2259 atomic_inc_32(&minor_count); 2260 2261 if (sleep) 2262 minor = (uint_t)id_alloc(minor_ids); 2263 else 2264 minor = (uint_t)id_alloc_nosleep(minor_ids); 2265 2266 if (minor == 0) { 2267 atomic_dec_32(&minor_count); 2268 return (0); 2269 } 2270 2271 return (minor); 2272 } 2273 2274 /* 2275 * Release a previously allocated minor number. 2276 */ 2277 void 2278 mac_minor_rele(minor_t minor) 2279 { 2280 /* 2281 * Return the value to the arena. 2282 */ 2283 id_free(minor_ids, minor); 2284 atomic_dec_32(&minor_count); 2285 } 2286 2287 uint32_t 2288 mac_no_notification(mac_handle_t mh) 2289 { 2290 mac_impl_t *mip = (mac_impl_t *)mh; 2291 2292 return (((mip->mi_state_flags & MIS_LEGACY) != 0) ? 2293 mip->mi_capab_legacy.ml_unsup_note : 0); 2294 } 2295 2296 /* 2297 * Prevent any new opens of this mac in preparation for unregister 2298 */ 2299 int 2300 i_mac_disable(mac_impl_t *mip) 2301 { 2302 mac_client_impl_t *mcip; 2303 2304 rw_enter(&i_mac_impl_lock, RW_WRITER); 2305 if (mip->mi_state_flags & MIS_DISABLED) { 2306 /* Already disabled, return success */ 2307 rw_exit(&i_mac_impl_lock); 2308 return (0); 2309 } 2310 /* 2311 * See if there are any other references to this mac_t (e.g., VLAN's). 2312 * If so return failure. If all the other checks below pass, then 2313 * set mi_disabled atomically under the i_mac_impl_lock to prevent 2314 * any new VLAN's from being created or new mac client opens of this 2315 * mac end point. 2316 */ 2317 if (mip->mi_ref > 0) { 2318 rw_exit(&i_mac_impl_lock); 2319 return (EBUSY); 2320 } 2321 2322 /* 2323 * mac clients must delete all multicast groups they join before 2324 * closing. bcast groups are reference counted, the last client 2325 * to delete the group will wait till the group is physically 2326 * deleted. Since all clients have closed this mac end point 2327 * mi_bcast_ngrps must be zero at this point 2328 */ 2329 ASSERT(mip->mi_bcast_ngrps == 0); 2330 2331 /* 2332 * Don't let go of this if it has some flows. 2333 * All other code guarantees no flows are added to a disabled 2334 * mac, therefore it is sufficient to check for the flow table 2335 * only here. 2336 */ 2337 mcip = mac_primary_client_handle(mip); 2338 if ((mcip != NULL) && mac_link_has_flows((mac_client_handle_t)mcip)) { 2339 rw_exit(&i_mac_impl_lock); 2340 return (ENOTEMPTY); 2341 } 2342 2343 mip->mi_state_flags |= MIS_DISABLED; 2344 rw_exit(&i_mac_impl_lock); 2345 return (0); 2346 } 2347 2348 int 2349 mac_disable_nowait(mac_handle_t mh) 2350 { 2351 mac_impl_t *mip = (mac_impl_t *)mh; 2352 int err; 2353 2354 if ((err = i_mac_perim_enter_nowait(mip)) != 0) 2355 return (err); 2356 err = i_mac_disable(mip); 2357 i_mac_perim_exit(mip); 2358 return (err); 2359 } 2360 2361 int 2362 mac_disable(mac_handle_t mh) 2363 { 2364 mac_impl_t *mip = (mac_impl_t *)mh; 2365 int err; 2366 2367 i_mac_perim_enter(mip); 2368 err = i_mac_disable(mip); 2369 i_mac_perim_exit(mip); 2370 2371 /* 2372 * Clean up notification thread and wait for it to exit. 2373 */ 2374 if (err == 0) 2375 i_mac_notify_exit(mip); 2376 2377 return (err); 2378 } 2379 2380 /* 2381 * Called when the MAC instance has a non empty flow table, to de-multiplex 2382 * incoming packets to the right flow. 2383 * The MAC's rw lock is assumed held as a READER. 2384 */ 2385 /* ARGSUSED */ 2386 static mblk_t * 2387 mac_rx_classify(mac_impl_t *mip, mac_resource_handle_t mrh, mblk_t *mp) 2388 { 2389 flow_entry_t *flent = NULL; 2390 uint_t flags = FLOW_INBOUND; 2391 int err; 2392 2393 /* 2394 * If the mac is a port of an aggregation, pass FLOW_IGNORE_VLAN 2395 * to mac_flow_lookup() so that the VLAN packets can be successfully 2396 * passed to the non-VLAN aggregation flows. 2397 * 2398 * Note that there is possibly a race between this and 2399 * mac_unicast_remove/add() and VLAN packets could be incorrectly 2400 * classified to non-VLAN flows of non-aggregation mac clients. These 2401 * VLAN packets will be then filtered out by the mac module. 2402 */ 2403 if ((mip->mi_state_flags & MIS_EXCLUSIVE) != 0) 2404 flags |= FLOW_IGNORE_VLAN; 2405 2406 err = mac_flow_lookup(mip->mi_flow_tab, mp, flags, &flent); 2407 if (err != 0) { 2408 /* no registered receive function */ 2409 return (mp); 2410 } else { 2411 mac_client_impl_t *mcip; 2412 2413 /* 2414 * This flent might just be an additional one on the MAC client, 2415 * i.e. for classification purposes (different fdesc), however 2416 * the resources, SRS et. al., are in the mci_flent, so if 2417 * this isn't the mci_flent, we need to get it. 2418 */ 2419 if ((mcip = flent->fe_mcip) != NULL && 2420 mcip->mci_flent != flent) { 2421 FLOW_REFRELE(flent); 2422 flent = mcip->mci_flent; 2423 FLOW_TRY_REFHOLD(flent, err); 2424 if (err != 0) 2425 return (mp); 2426 } 2427 (flent->fe_cb_fn)(flent->fe_cb_arg1, flent->fe_cb_arg2, mp, 2428 B_FALSE); 2429 FLOW_REFRELE(flent); 2430 } 2431 return (NULL); 2432 } 2433 2434 mblk_t * 2435 mac_rx_flow(mac_handle_t mh, mac_resource_handle_t mrh, mblk_t *mp_chain) 2436 { 2437 mac_impl_t *mip = (mac_impl_t *)mh; 2438 mblk_t *bp, *bp1, **bpp, *list = NULL; 2439 2440 /* 2441 * We walk the chain and attempt to classify each packet. 2442 * The packets that couldn't be classified will be returned 2443 * back to the caller. 2444 */ 2445 bp = mp_chain; 2446 bpp = &list; 2447 while (bp != NULL) { 2448 bp1 = bp; 2449 bp = bp->b_next; 2450 bp1->b_next = NULL; 2451 2452 if (mac_rx_classify(mip, mrh, bp1) != NULL) { 2453 *bpp = bp1; 2454 bpp = &bp1->b_next; 2455 } 2456 } 2457 return (list); 2458 } 2459 2460 static int 2461 mac_tx_flow_srs_wakeup(flow_entry_t *flent, void *arg) 2462 { 2463 mac_ring_handle_t ring = arg; 2464 2465 if (flent->fe_tx_srs) 2466 mac_tx_srs_wakeup(flent->fe_tx_srs, ring); 2467 return (0); 2468 } 2469 2470 void 2471 i_mac_tx_srs_notify(mac_impl_t *mip, mac_ring_handle_t ring) 2472 { 2473 mac_client_impl_t *cclient; 2474 mac_soft_ring_set_t *mac_srs; 2475 2476 /* 2477 * After grabbing the mi_rw_lock, the list of clients can't change. 2478 * If there are any clients mi_disabled must be B_FALSE and can't 2479 * get set since there are clients. If there aren't any clients we 2480 * don't do anything. In any case the mip has to be valid. The driver 2481 * must make sure that it goes single threaded (with respect to mac 2482 * calls) and wait for all pending mac calls to finish before calling 2483 * mac_unregister. 2484 */ 2485 rw_enter(&i_mac_impl_lock, RW_READER); 2486 if (mip->mi_state_flags & MIS_DISABLED) { 2487 rw_exit(&i_mac_impl_lock); 2488 return; 2489 } 2490 2491 /* 2492 * Get MAC tx srs from walking mac_client_handle list. 2493 */ 2494 rw_enter(&mip->mi_rw_lock, RW_READER); 2495 for (cclient = mip->mi_clients_list; cclient != NULL; 2496 cclient = cclient->mci_client_next) { 2497 if ((mac_srs = MCIP_TX_SRS(cclient)) != NULL) { 2498 mac_tx_srs_wakeup(mac_srs, ring); 2499 } else { 2500 /* 2501 * Aggr opens underlying ports in exclusive mode 2502 * and registers flow control callbacks using 2503 * mac_tx_client_notify(). When opened in 2504 * exclusive mode, Tx SRS won't be created 2505 * during mac_unicast_add(). 2506 */ 2507 if (cclient->mci_state_flags & MCIS_EXCLUSIVE) { 2508 mac_tx_invoke_callbacks(cclient, 2509 (mac_tx_cookie_t)ring); 2510 } 2511 } 2512 (void) mac_flow_walk(cclient->mci_subflow_tab, 2513 mac_tx_flow_srs_wakeup, ring); 2514 } 2515 rw_exit(&mip->mi_rw_lock); 2516 rw_exit(&i_mac_impl_lock); 2517 } 2518 2519 /* ARGSUSED */ 2520 void 2521 mac_multicast_refresh(mac_handle_t mh, mac_multicst_t refresh, void *arg, 2522 boolean_t add) 2523 { 2524 mac_impl_t *mip = (mac_impl_t *)mh; 2525 2526 i_mac_perim_enter((mac_impl_t *)mh); 2527 /* 2528 * If no specific refresh function was given then default to the 2529 * driver's m_multicst entry point. 2530 */ 2531 if (refresh == NULL) { 2532 refresh = mip->mi_multicst; 2533 arg = mip->mi_driver; 2534 } 2535 2536 mac_bcast_refresh(mip, refresh, arg, add); 2537 i_mac_perim_exit((mac_impl_t *)mh); 2538 } 2539 2540 void 2541 mac_promisc_refresh(mac_handle_t mh, mac_setpromisc_t refresh, void *arg) 2542 { 2543 mac_impl_t *mip = (mac_impl_t *)mh; 2544 2545 /* 2546 * If no specific refresh function was given then default to the 2547 * driver's m_promisc entry point. 2548 */ 2549 if (refresh == NULL) { 2550 refresh = mip->mi_setpromisc; 2551 arg = mip->mi_driver; 2552 } 2553 ASSERT(refresh != NULL); 2554 2555 /* 2556 * Call the refresh function with the current promiscuity. 2557 */ 2558 refresh(arg, (mip->mi_devpromisc != 0)); 2559 } 2560 2561 /* 2562 * The mac client requests that the mac not to change its margin size to 2563 * be less than the specified value. If "current" is B_TRUE, then the client 2564 * requests the mac not to change its margin size to be smaller than the 2565 * current size. Further, return the current margin size value in this case. 2566 * 2567 * We keep every requested size in an ordered list from largest to smallest. 2568 */ 2569 int 2570 mac_margin_add(mac_handle_t mh, uint32_t *marginp, boolean_t current) 2571 { 2572 mac_impl_t *mip = (mac_impl_t *)mh; 2573 mac_margin_req_t **pp, *p; 2574 int err = 0; 2575 2576 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2577 if (current) 2578 *marginp = mip->mi_margin; 2579 2580 /* 2581 * If the current margin value cannot satisfy the margin requested, 2582 * return ENOTSUP directly. 2583 */ 2584 if (*marginp > mip->mi_margin) { 2585 err = ENOTSUP; 2586 goto done; 2587 } 2588 2589 /* 2590 * Check whether the given margin is already in the list. If so, 2591 * bump the reference count. 2592 */ 2593 for (pp = &mip->mi_mmrp; (p = *pp) != NULL; pp = &p->mmr_nextp) { 2594 if (p->mmr_margin == *marginp) { 2595 /* 2596 * The margin requested is already in the list, 2597 * so just bump the reference count. 2598 */ 2599 p->mmr_ref++; 2600 goto done; 2601 } 2602 if (p->mmr_margin < *marginp) 2603 break; 2604 } 2605 2606 2607 p = kmem_zalloc(sizeof (mac_margin_req_t), KM_SLEEP); 2608 p->mmr_margin = *marginp; 2609 p->mmr_ref++; 2610 p->mmr_nextp = *pp; 2611 *pp = p; 2612 2613 done: 2614 rw_exit(&(mip->mi_rw_lock)); 2615 return (err); 2616 } 2617 2618 /* 2619 * The mac client requests to cancel its previous mac_margin_add() request. 2620 * We remove the requested margin size from the list. 2621 */ 2622 int 2623 mac_margin_remove(mac_handle_t mh, uint32_t margin) 2624 { 2625 mac_impl_t *mip = (mac_impl_t *)mh; 2626 mac_margin_req_t **pp, *p; 2627 int err = 0; 2628 2629 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2630 /* 2631 * Find the entry in the list for the given margin. 2632 */ 2633 for (pp = &(mip->mi_mmrp); (p = *pp) != NULL; pp = &(p->mmr_nextp)) { 2634 if (p->mmr_margin == margin) { 2635 if (--p->mmr_ref == 0) 2636 break; 2637 2638 /* 2639 * There is still a reference to this address so 2640 * there's nothing more to do. 2641 */ 2642 goto done; 2643 } 2644 } 2645 2646 /* 2647 * We did not find an entry for the given margin. 2648 */ 2649 if (p == NULL) { 2650 err = ENOENT; 2651 goto done; 2652 } 2653 2654 ASSERT(p->mmr_ref == 0); 2655 2656 /* 2657 * Remove it from the list. 2658 */ 2659 *pp = p->mmr_nextp; 2660 kmem_free(p, sizeof (mac_margin_req_t)); 2661 done: 2662 rw_exit(&(mip->mi_rw_lock)); 2663 return (err); 2664 } 2665 2666 boolean_t 2667 mac_margin_update(mac_handle_t mh, uint32_t margin) 2668 { 2669 mac_impl_t *mip = (mac_impl_t *)mh; 2670 uint32_t margin_needed = 0; 2671 2672 rw_enter(&(mip->mi_rw_lock), RW_WRITER); 2673 2674 if (mip->mi_mmrp != NULL) 2675 margin_needed = mip->mi_mmrp->mmr_margin; 2676 2677 if (margin_needed <= margin) 2678 mip->mi_margin = margin; 2679 2680 rw_exit(&(mip->mi_rw_lock)); 2681 2682 if (margin_needed <= margin) 2683 i_mac_notify(mip, MAC_NOTE_MARGIN); 2684 2685 return (margin_needed <= margin); 2686 } 2687 2688 /* 2689 * MAC clients use this interface to request that a MAC device not change its 2690 * MTU below the specified amount. At this time, that amount must be within the 2691 * range of the device's current minimum and the device's current maximum. eg. a 2692 * client cannot request a 3000 byte MTU when the device's MTU is currently 2693 * 2000. 2694 * 2695 * If "current" is set to B_TRUE, then the request is to simply to reserve the 2696 * current underlying mac's maximum for this mac client and return it in mtup. 2697 */ 2698 int 2699 mac_mtu_add(mac_handle_t mh, uint32_t *mtup, boolean_t current) 2700 { 2701 mac_impl_t *mip = (mac_impl_t *)mh; 2702 mac_mtu_req_t *prev, *cur; 2703 mac_propval_range_t mpr; 2704 int err; 2705 2706 i_mac_perim_enter(mip); 2707 rw_enter(&mip->mi_rw_lock, RW_WRITER); 2708 2709 if (current == B_TRUE) 2710 *mtup = mip->mi_sdu_max; 2711 mpr.mpr_count = 1; 2712 err = mac_prop_info(mh, MAC_PROP_MTU, "mtu", NULL, 0, &mpr, NULL); 2713 if (err != 0) { 2714 rw_exit(&mip->mi_rw_lock); 2715 i_mac_perim_exit(mip); 2716 return (err); 2717 } 2718 2719 if (*mtup > mip->mi_sdu_max || 2720 *mtup < mpr.mpr_range_uint32[0].mpur_min) { 2721 rw_exit(&mip->mi_rw_lock); 2722 i_mac_perim_exit(mip); 2723 return (ENOTSUP); 2724 } 2725 2726 prev = NULL; 2727 for (cur = mip->mi_mtrp; cur != NULL; cur = cur->mtr_nextp) { 2728 if (*mtup == cur->mtr_mtu) { 2729 cur->mtr_ref++; 2730 rw_exit(&mip->mi_rw_lock); 2731 i_mac_perim_exit(mip); 2732 return (0); 2733 } 2734 2735 if (*mtup > cur->mtr_mtu) 2736 break; 2737 2738 prev = cur; 2739 } 2740 2741 cur = kmem_alloc(sizeof (mac_mtu_req_t), KM_SLEEP); 2742 cur->mtr_mtu = *mtup; 2743 cur->mtr_ref = 1; 2744 if (prev != NULL) { 2745 cur->mtr_nextp = prev->mtr_nextp; 2746 prev->mtr_nextp = cur; 2747 } else { 2748 cur->mtr_nextp = mip->mi_mtrp; 2749 mip->mi_mtrp = cur; 2750 } 2751 2752 rw_exit(&mip->mi_rw_lock); 2753 i_mac_perim_exit(mip); 2754 return (0); 2755 } 2756 2757 int 2758 mac_mtu_remove(mac_handle_t mh, uint32_t mtu) 2759 { 2760 mac_impl_t *mip = (mac_impl_t *)mh; 2761 mac_mtu_req_t *cur, *prev; 2762 2763 i_mac_perim_enter(mip); 2764 rw_enter(&mip->mi_rw_lock, RW_WRITER); 2765 2766 prev = NULL; 2767 for (cur = mip->mi_mtrp; cur != NULL; cur = cur->mtr_nextp) { 2768 if (cur->mtr_mtu == mtu) { 2769 ASSERT(cur->mtr_ref > 0); 2770 cur->mtr_ref--; 2771 if (cur->mtr_ref == 0) { 2772 if (prev == NULL) { 2773 mip->mi_mtrp = cur->mtr_nextp; 2774 } else { 2775 prev->mtr_nextp = cur->mtr_nextp; 2776 } 2777 kmem_free(cur, sizeof (mac_mtu_req_t)); 2778 } 2779 rw_exit(&mip->mi_rw_lock); 2780 i_mac_perim_exit(mip); 2781 return (0); 2782 } 2783 2784 prev = cur; 2785 } 2786 2787 rw_exit(&mip->mi_rw_lock); 2788 i_mac_perim_exit(mip); 2789 return (ENOENT); 2790 } 2791 2792 /* 2793 * MAC Type Plugin functions. 2794 */ 2795 2796 mactype_t * 2797 mactype_getplugin(const char *pname) 2798 { 2799 mactype_t *mtype = NULL; 2800 boolean_t tried_modload = B_FALSE; 2801 2802 mutex_enter(&i_mactype_lock); 2803 2804 find_registered_mactype: 2805 if (mod_hash_find(i_mactype_hash, (mod_hash_key_t)pname, 2806 (mod_hash_val_t *)&mtype) != 0) { 2807 if (!tried_modload) { 2808 /* 2809 * If the plugin has not yet been loaded, then 2810 * attempt to load it now. If modload() succeeds, 2811 * the plugin should have registered using 2812 * mactype_register(), in which case we can go back 2813 * and attempt to find it again. 2814 */ 2815 if (modload(MACTYPE_KMODDIR, (char *)pname) != -1) { 2816 tried_modload = B_TRUE; 2817 goto find_registered_mactype; 2818 } 2819 } 2820 } else { 2821 /* 2822 * Note that there's no danger that the plugin we've loaded 2823 * could be unloaded between the modload() step and the 2824 * reference count bump here, as we're holding 2825 * i_mactype_lock, which mactype_unregister() also holds. 2826 */ 2827 atomic_inc_32(&mtype->mt_ref); 2828 } 2829 2830 mutex_exit(&i_mactype_lock); 2831 return (mtype); 2832 } 2833 2834 mactype_register_t * 2835 mactype_alloc(uint_t mactype_version) 2836 { 2837 mactype_register_t *mtrp; 2838 2839 /* 2840 * Make sure there isn't a version mismatch between the plugin and 2841 * the framework. In the future, if multiple versions are 2842 * supported, this check could become more sophisticated. 2843 */ 2844 if (mactype_version != MACTYPE_VERSION) 2845 return (NULL); 2846 2847 mtrp = kmem_zalloc(sizeof (mactype_register_t), KM_SLEEP); 2848 mtrp->mtr_version = mactype_version; 2849 return (mtrp); 2850 } 2851 2852 void 2853 mactype_free(mactype_register_t *mtrp) 2854 { 2855 kmem_free(mtrp, sizeof (mactype_register_t)); 2856 } 2857 2858 int 2859 mactype_register(mactype_register_t *mtrp) 2860 { 2861 mactype_t *mtp; 2862 mactype_ops_t *ops = mtrp->mtr_ops; 2863 2864 /* Do some sanity checking before we register this MAC type. */ 2865 if (mtrp->mtr_ident == NULL || ops == NULL) 2866 return (EINVAL); 2867 2868 /* 2869 * Verify that all mandatory callbacks are set in the ops 2870 * vector. 2871 */ 2872 if (ops->mtops_unicst_verify == NULL || 2873 ops->mtops_multicst_verify == NULL || 2874 ops->mtops_sap_verify == NULL || 2875 ops->mtops_header == NULL || 2876 ops->mtops_header_info == NULL) { 2877 return (EINVAL); 2878 } 2879 2880 mtp = kmem_zalloc(sizeof (*mtp), KM_SLEEP); 2881 mtp->mt_ident = mtrp->mtr_ident; 2882 mtp->mt_ops = *ops; 2883 mtp->mt_type = mtrp->mtr_mactype; 2884 mtp->mt_nativetype = mtrp->mtr_nativetype; 2885 mtp->mt_addr_length = mtrp->mtr_addrlen; 2886 if (mtrp->mtr_brdcst_addr != NULL) { 2887 mtp->mt_brdcst_addr = kmem_alloc(mtrp->mtr_addrlen, KM_SLEEP); 2888 bcopy(mtrp->mtr_brdcst_addr, mtp->mt_brdcst_addr, 2889 mtrp->mtr_addrlen); 2890 } 2891 2892 mtp->mt_stats = mtrp->mtr_stats; 2893 mtp->mt_statcount = mtrp->mtr_statcount; 2894 2895 mtp->mt_mapping = mtrp->mtr_mapping; 2896 mtp->mt_mappingcount = mtrp->mtr_mappingcount; 2897 2898 if (mod_hash_insert(i_mactype_hash, 2899 (mod_hash_key_t)mtp->mt_ident, (mod_hash_val_t)mtp) != 0) { 2900 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 2901 kmem_free(mtp, sizeof (*mtp)); 2902 return (EEXIST); 2903 } 2904 return (0); 2905 } 2906 2907 int 2908 mactype_unregister(const char *ident) 2909 { 2910 mactype_t *mtp; 2911 mod_hash_val_t val; 2912 int err; 2913 2914 /* 2915 * Let's not allow MAC drivers to use this plugin while we're 2916 * trying to unregister it. Holding i_mactype_lock also prevents a 2917 * plugin from unregistering while a MAC driver is attempting to 2918 * hold a reference to it in i_mactype_getplugin(). 2919 */ 2920 mutex_enter(&i_mactype_lock); 2921 2922 if ((err = mod_hash_find(i_mactype_hash, (mod_hash_key_t)ident, 2923 (mod_hash_val_t *)&mtp)) != 0) { 2924 /* A plugin is trying to unregister, but it never registered. */ 2925 err = ENXIO; 2926 goto done; 2927 } 2928 2929 if (mtp->mt_ref != 0) { 2930 err = EBUSY; 2931 goto done; 2932 } 2933 2934 err = mod_hash_remove(i_mactype_hash, (mod_hash_key_t)ident, &val); 2935 ASSERT(err == 0); 2936 if (err != 0) { 2937 /* This should never happen, thus the ASSERT() above. */ 2938 err = EINVAL; 2939 goto done; 2940 } 2941 ASSERT(mtp == (mactype_t *)val); 2942 2943 if (mtp->mt_brdcst_addr != NULL) 2944 kmem_free(mtp->mt_brdcst_addr, mtp->mt_addr_length); 2945 kmem_free(mtp, sizeof (mactype_t)); 2946 done: 2947 mutex_exit(&i_mactype_lock); 2948 return (err); 2949 } 2950 2951 /* 2952 * Checks the size of the value size specified for a property as 2953 * part of a property operation. Returns B_TRUE if the size is 2954 * correct, B_FALSE otherwise. 2955 */ 2956 boolean_t 2957 mac_prop_check_size(mac_prop_id_t id, uint_t valsize, boolean_t is_range) 2958 { 2959 uint_t minsize = 0; 2960 2961 if (is_range) 2962 return (valsize >= sizeof (mac_propval_range_t)); 2963 2964 switch (id) { 2965 case MAC_PROP_ZONE: 2966 minsize = sizeof (dld_ioc_zid_t); 2967 break; 2968 case MAC_PROP_AUTOPUSH: 2969 if (valsize != 0) 2970 minsize = sizeof (struct dlautopush); 2971 break; 2972 case MAC_PROP_TAGMODE: 2973 minsize = sizeof (link_tagmode_t); 2974 break; 2975 case MAC_PROP_RESOURCE: 2976 case MAC_PROP_RESOURCE_EFF: 2977 minsize = sizeof (mac_resource_props_t); 2978 break; 2979 case MAC_PROP_DUPLEX: 2980 minsize = sizeof (link_duplex_t); 2981 break; 2982 case MAC_PROP_SPEED: 2983 minsize = sizeof (uint64_t); 2984 break; 2985 case MAC_PROP_STATUS: 2986 minsize = sizeof (link_state_t); 2987 break; 2988 case MAC_PROP_AUTONEG: 2989 case MAC_PROP_EN_AUTONEG: 2990 minsize = sizeof (uint8_t); 2991 break; 2992 case MAC_PROP_MTU: 2993 case MAC_PROP_LLIMIT: 2994 case MAC_PROP_LDECAY: 2995 minsize = sizeof (uint32_t); 2996 break; 2997 case MAC_PROP_FLOWCTRL: 2998 minsize = sizeof (link_flowctrl_t); 2999 break; 3000 case MAC_PROP_ADV_10GFDX_CAP: 3001 case MAC_PROP_EN_10GFDX_CAP: 3002 case MAC_PROP_ADV_1000HDX_CAP: 3003 case MAC_PROP_EN_1000HDX_CAP: 3004 case MAC_PROP_ADV_100FDX_CAP: 3005 case MAC_PROP_EN_100FDX_CAP: 3006 case MAC_PROP_ADV_100HDX_CAP: 3007 case MAC_PROP_EN_100HDX_CAP: 3008 case MAC_PROP_ADV_10FDX_CAP: 3009 case MAC_PROP_EN_10FDX_CAP: 3010 case MAC_PROP_ADV_10HDX_CAP: 3011 case MAC_PROP_EN_10HDX_CAP: 3012 case MAC_PROP_ADV_100T4_CAP: 3013 case MAC_PROP_EN_100T4_CAP: 3014 minsize = sizeof (uint8_t); 3015 break; 3016 case MAC_PROP_PVID: 3017 minsize = sizeof (uint16_t); 3018 break; 3019 case MAC_PROP_IPTUN_HOPLIMIT: 3020 minsize = sizeof (uint32_t); 3021 break; 3022 case MAC_PROP_IPTUN_ENCAPLIMIT: 3023 minsize = sizeof (uint32_t); 3024 break; 3025 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3026 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3027 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3028 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3029 minsize = sizeof (uint_t); 3030 break; 3031 case MAC_PROP_WL_ESSID: 3032 minsize = sizeof (wl_linkstatus_t); 3033 break; 3034 case MAC_PROP_WL_BSSID: 3035 minsize = sizeof (wl_bssid_t); 3036 break; 3037 case MAC_PROP_WL_BSSTYPE: 3038 minsize = sizeof (wl_bss_type_t); 3039 break; 3040 case MAC_PROP_WL_LINKSTATUS: 3041 minsize = sizeof (wl_linkstatus_t); 3042 break; 3043 case MAC_PROP_WL_DESIRED_RATES: 3044 minsize = sizeof (wl_rates_t); 3045 break; 3046 case MAC_PROP_WL_SUPPORTED_RATES: 3047 minsize = sizeof (wl_rates_t); 3048 break; 3049 case MAC_PROP_WL_AUTH_MODE: 3050 minsize = sizeof (wl_authmode_t); 3051 break; 3052 case MAC_PROP_WL_ENCRYPTION: 3053 minsize = sizeof (wl_encryption_t); 3054 break; 3055 case MAC_PROP_WL_RSSI: 3056 minsize = sizeof (wl_rssi_t); 3057 break; 3058 case MAC_PROP_WL_PHY_CONFIG: 3059 minsize = sizeof (wl_phy_conf_t); 3060 break; 3061 case MAC_PROP_WL_CAPABILITY: 3062 minsize = sizeof (wl_capability_t); 3063 break; 3064 case MAC_PROP_WL_WPA: 3065 minsize = sizeof (wl_wpa_t); 3066 break; 3067 case MAC_PROP_WL_SCANRESULTS: 3068 minsize = sizeof (wl_wpa_ess_t); 3069 break; 3070 case MAC_PROP_WL_POWER_MODE: 3071 minsize = sizeof (wl_ps_mode_t); 3072 break; 3073 case MAC_PROP_WL_RADIO: 3074 minsize = sizeof (wl_radio_t); 3075 break; 3076 case MAC_PROP_WL_ESS_LIST: 3077 minsize = sizeof (wl_ess_list_t); 3078 break; 3079 case MAC_PROP_WL_KEY_TAB: 3080 minsize = sizeof (wl_wep_key_tab_t); 3081 break; 3082 case MAC_PROP_WL_CREATE_IBSS: 3083 minsize = sizeof (wl_create_ibss_t); 3084 break; 3085 case MAC_PROP_WL_SETOPTIE: 3086 minsize = sizeof (wl_wpa_ie_t); 3087 break; 3088 case MAC_PROP_WL_DELKEY: 3089 minsize = sizeof (wl_del_key_t); 3090 break; 3091 case MAC_PROP_WL_KEY: 3092 minsize = sizeof (wl_key_t); 3093 break; 3094 case MAC_PROP_WL_MLME: 3095 minsize = sizeof (wl_mlme_t); 3096 break; 3097 } 3098 3099 return (valsize >= minsize); 3100 } 3101 3102 /* 3103 * mac_set_prop() sets MAC or hardware driver properties: 3104 * 3105 * - MAC-managed properties such as resource properties include maxbw, 3106 * priority, and cpu binding list, as well as the default port VID 3107 * used by bridging. These properties are consumed by the MAC layer 3108 * itself and not passed down to the driver. For resource control 3109 * properties, this function invokes mac_set_resources() which will 3110 * cache the property value in mac_impl_t and may call 3111 * mac_client_set_resource() to update property value of the primary 3112 * mac client, if it exists. 3113 * 3114 * - Properties which act on the hardware and must be passed to the 3115 * driver, such as MTU, through the driver's mc_setprop() entry point. 3116 */ 3117 int 3118 mac_set_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3119 uint_t valsize) 3120 { 3121 int err = ENOTSUP; 3122 mac_impl_t *mip = (mac_impl_t *)mh; 3123 3124 ASSERT(MAC_PERIM_HELD(mh)); 3125 3126 switch (id) { 3127 case MAC_PROP_RESOURCE: { 3128 mac_resource_props_t *mrp; 3129 3130 /* call mac_set_resources() for MAC properties */ 3131 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3132 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3133 bcopy(val, mrp, sizeof (*mrp)); 3134 err = mac_set_resources(mh, mrp); 3135 kmem_free(mrp, sizeof (*mrp)); 3136 break; 3137 } 3138 3139 case MAC_PROP_PVID: 3140 ASSERT(valsize >= sizeof (uint16_t)); 3141 if (mip->mi_state_flags & MIS_IS_VNIC) 3142 return (EINVAL); 3143 err = mac_set_pvid(mh, *(uint16_t *)val); 3144 break; 3145 3146 case MAC_PROP_MTU: { 3147 uint32_t mtu; 3148 3149 ASSERT(valsize >= sizeof (uint32_t)); 3150 bcopy(val, &mtu, sizeof (mtu)); 3151 err = mac_set_mtu(mh, mtu, NULL); 3152 break; 3153 } 3154 3155 case MAC_PROP_LLIMIT: 3156 case MAC_PROP_LDECAY: { 3157 uint32_t learnval; 3158 3159 if (valsize < sizeof (learnval) || 3160 (mip->mi_state_flags & MIS_IS_VNIC)) 3161 return (EINVAL); 3162 bcopy(val, &learnval, sizeof (learnval)); 3163 if (learnval == 0 && id == MAC_PROP_LDECAY) 3164 return (EINVAL); 3165 if (id == MAC_PROP_LLIMIT) 3166 mip->mi_llimit = learnval; 3167 else 3168 mip->mi_ldecay = learnval; 3169 err = 0; 3170 break; 3171 } 3172 3173 default: 3174 /* For other driver properties, call driver's callback */ 3175 if (mip->mi_callbacks->mc_callbacks & MC_SETPROP) { 3176 err = mip->mi_callbacks->mc_setprop(mip->mi_driver, 3177 name, id, valsize, val); 3178 } 3179 } 3180 return (err); 3181 } 3182 3183 /* 3184 * mac_get_prop() gets MAC or device driver properties. 3185 * 3186 * If the property is a driver property, mac_get_prop() calls driver's callback 3187 * entry point to get it. 3188 * If the property is a MAC property, mac_get_prop() invokes mac_get_resources() 3189 * which returns the cached value in mac_impl_t. 3190 */ 3191 int 3192 mac_get_prop(mac_handle_t mh, mac_prop_id_t id, char *name, void *val, 3193 uint_t valsize) 3194 { 3195 int err = ENOTSUP; 3196 mac_impl_t *mip = (mac_impl_t *)mh; 3197 uint_t rings; 3198 uint_t vlinks; 3199 3200 bzero(val, valsize); 3201 3202 switch (id) { 3203 case MAC_PROP_RESOURCE: { 3204 mac_resource_props_t *mrp; 3205 3206 /* If mac property, read from cache */ 3207 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3208 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3209 mac_get_resources(mh, mrp); 3210 bcopy(mrp, val, sizeof (*mrp)); 3211 kmem_free(mrp, sizeof (*mrp)); 3212 return (0); 3213 } 3214 case MAC_PROP_RESOURCE_EFF: { 3215 mac_resource_props_t *mrp; 3216 3217 /* If mac effective property, read from client */ 3218 ASSERT(valsize >= sizeof (mac_resource_props_t)); 3219 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 3220 mac_get_effective_resources(mh, mrp); 3221 bcopy(mrp, val, sizeof (*mrp)); 3222 kmem_free(mrp, sizeof (*mrp)); 3223 return (0); 3224 } 3225 3226 case MAC_PROP_PVID: 3227 ASSERT(valsize >= sizeof (uint16_t)); 3228 if (mip->mi_state_flags & MIS_IS_VNIC) 3229 return (EINVAL); 3230 *(uint16_t *)val = mac_get_pvid(mh); 3231 return (0); 3232 3233 case MAC_PROP_LLIMIT: 3234 case MAC_PROP_LDECAY: 3235 ASSERT(valsize >= sizeof (uint32_t)); 3236 if (mip->mi_state_flags & MIS_IS_VNIC) 3237 return (EINVAL); 3238 if (id == MAC_PROP_LLIMIT) 3239 bcopy(&mip->mi_llimit, val, sizeof (mip->mi_llimit)); 3240 else 3241 bcopy(&mip->mi_ldecay, val, sizeof (mip->mi_ldecay)); 3242 return (0); 3243 3244 case MAC_PROP_MTU: { 3245 uint32_t sdu; 3246 3247 ASSERT(valsize >= sizeof (uint32_t)); 3248 mac_sdu_get2(mh, NULL, &sdu, NULL); 3249 bcopy(&sdu, val, sizeof (sdu)); 3250 3251 return (0); 3252 } 3253 case MAC_PROP_STATUS: { 3254 link_state_t link_state; 3255 3256 if (valsize < sizeof (link_state)) 3257 return (EINVAL); 3258 link_state = mac_link_get(mh); 3259 bcopy(&link_state, val, sizeof (link_state)); 3260 3261 return (0); 3262 } 3263 3264 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3265 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3266 ASSERT(valsize >= sizeof (uint_t)); 3267 rings = id == MAC_PROP_MAX_RX_RINGS_AVAIL ? 3268 mac_rxavail_get(mh) : mac_txavail_get(mh); 3269 bcopy(&rings, val, sizeof (uint_t)); 3270 return (0); 3271 3272 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3273 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3274 ASSERT(valsize >= sizeof (uint_t)); 3275 vlinks = id == MAC_PROP_MAX_RXHWCLNT_AVAIL ? 3276 mac_rxhwlnksavail_get(mh) : mac_txhwlnksavail_get(mh); 3277 bcopy(&vlinks, val, sizeof (uint_t)); 3278 return (0); 3279 3280 case MAC_PROP_RXRINGSRANGE: 3281 case MAC_PROP_TXRINGSRANGE: 3282 /* 3283 * The value for these properties are returned through 3284 * the MAC_PROP_RESOURCE property. 3285 */ 3286 return (0); 3287 3288 default: 3289 break; 3290 3291 } 3292 3293 /* If driver property, request from driver */ 3294 if (mip->mi_callbacks->mc_callbacks & MC_GETPROP) { 3295 err = mip->mi_callbacks->mc_getprop(mip->mi_driver, name, id, 3296 valsize, val); 3297 } 3298 3299 return (err); 3300 } 3301 3302 /* 3303 * Helper function to initialize the range structure for use in 3304 * mac_get_prop. If the type can be other than uint32, we can 3305 * pass that as an arg. 3306 */ 3307 static void 3308 _mac_set_range(mac_propval_range_t *range, uint32_t min, uint32_t max) 3309 { 3310 range->mpr_count = 1; 3311 range->mpr_type = MAC_PROPVAL_UINT32; 3312 range->mpr_range_uint32[0].mpur_min = min; 3313 range->mpr_range_uint32[0].mpur_max = max; 3314 } 3315 3316 /* 3317 * Returns information about the specified property, such as default 3318 * values or permissions. 3319 */ 3320 int 3321 mac_prop_info(mac_handle_t mh, mac_prop_id_t id, char *name, 3322 void *default_val, uint_t default_size, mac_propval_range_t *range, 3323 uint_t *perm) 3324 { 3325 mac_prop_info_state_t state; 3326 mac_impl_t *mip = (mac_impl_t *)mh; 3327 uint_t max; 3328 3329 /* 3330 * A property is read/write by default unless the driver says 3331 * otherwise. 3332 */ 3333 if (perm != NULL) 3334 *perm = MAC_PROP_PERM_RW; 3335 3336 if (default_val != NULL) 3337 bzero(default_val, default_size); 3338 3339 /* 3340 * First, handle framework properties for which we don't need to 3341 * involve the driver. 3342 */ 3343 switch (id) { 3344 case MAC_PROP_RESOURCE: 3345 case MAC_PROP_PVID: 3346 case MAC_PROP_LLIMIT: 3347 case MAC_PROP_LDECAY: 3348 return (0); 3349 3350 case MAC_PROP_MAX_RX_RINGS_AVAIL: 3351 case MAC_PROP_MAX_TX_RINGS_AVAIL: 3352 case MAC_PROP_MAX_RXHWCLNT_AVAIL: 3353 case MAC_PROP_MAX_TXHWCLNT_AVAIL: 3354 if (perm != NULL) 3355 *perm = MAC_PROP_PERM_READ; 3356 return (0); 3357 3358 case MAC_PROP_RXRINGSRANGE: 3359 case MAC_PROP_TXRINGSRANGE: 3360 /* 3361 * Currently, we support range for RX and TX rings properties. 3362 * When we extend this support to maxbw, cpus and priority, 3363 * we should move this to mac_get_resources. 3364 * There is no default value for RX or TX rings. 3365 */ 3366 if ((mip->mi_state_flags & MIS_IS_VNIC) && 3367 mac_is_vnic_primary(mh)) { 3368 /* 3369 * We don't support setting rings for a VLAN 3370 * data link because it shares its ring with the 3371 * primary MAC client. 3372 */ 3373 if (perm != NULL) 3374 *perm = MAC_PROP_PERM_READ; 3375 if (range != NULL) 3376 range->mpr_count = 0; 3377 } else if (range != NULL) { 3378 if (mip->mi_state_flags & MIS_IS_VNIC) 3379 mh = mac_get_lower_mac_handle(mh); 3380 mip = (mac_impl_t *)mh; 3381 if ((id == MAC_PROP_RXRINGSRANGE && 3382 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) || 3383 (id == MAC_PROP_TXRINGSRANGE && 3384 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC)) { 3385 if (id == MAC_PROP_RXRINGSRANGE) { 3386 if ((mac_rxhwlnksavail_get(mh) + 3387 mac_rxhwlnksrsvd_get(mh)) <= 1) { 3388 /* 3389 * doesn't support groups or 3390 * rings 3391 */ 3392 range->mpr_count = 0; 3393 } else { 3394 /* 3395 * supports specifying groups, 3396 * but not rings 3397 */ 3398 _mac_set_range(range, 0, 0); 3399 } 3400 } else { 3401 if ((mac_txhwlnksavail_get(mh) + 3402 mac_txhwlnksrsvd_get(mh)) <= 1) { 3403 /* 3404 * doesn't support groups or 3405 * rings 3406 */ 3407 range->mpr_count = 0; 3408 } else { 3409 /* 3410 * supports specifying groups, 3411 * but not rings 3412 */ 3413 _mac_set_range(range, 0, 0); 3414 } 3415 } 3416 } else { 3417 max = id == MAC_PROP_RXRINGSRANGE ? 3418 mac_rxavail_get(mh) + mac_rxrsvd_get(mh) : 3419 mac_txavail_get(mh) + mac_txrsvd_get(mh); 3420 if (max <= 1) { 3421 /* 3422 * doesn't support groups or 3423 * rings 3424 */ 3425 range->mpr_count = 0; 3426 } else { 3427 /* 3428 * -1 because we have to leave out the 3429 * default ring. 3430 */ 3431 _mac_set_range(range, 1, max - 1); 3432 } 3433 } 3434 } 3435 return (0); 3436 3437 case MAC_PROP_STATUS: 3438 if (perm != NULL) 3439 *perm = MAC_PROP_PERM_READ; 3440 return (0); 3441 } 3442 3443 /* 3444 * Get the property info from the driver if it implements the 3445 * property info entry point. 3446 */ 3447 bzero(&state, sizeof (state)); 3448 3449 if (mip->mi_callbacks->mc_callbacks & MC_PROPINFO) { 3450 state.pr_default = default_val; 3451 state.pr_default_size = default_size; 3452 3453 /* 3454 * The caller specifies the maximum number of ranges 3455 * it can accomodate using mpr_count. We don't touch 3456 * this value until the driver returns from its 3457 * mc_propinfo() callback, and ensure we don't exceed 3458 * this number of range as the driver defines 3459 * supported range from its mc_propinfo(). 3460 * 3461 * pr_range_cur_count keeps track of how many ranges 3462 * were defined by the driver from its mc_propinfo() 3463 * entry point. 3464 * 3465 * On exit, the user-specified range mpr_count returns 3466 * the number of ranges specified by the driver on 3467 * success, or the number of ranges it wanted to 3468 * define if that number of ranges could not be 3469 * accomodated by the specified range structure. In 3470 * the latter case, the caller will be able to 3471 * allocate a larger range structure, and query the 3472 * property again. 3473 */ 3474 state.pr_range_cur_count = 0; 3475 state.pr_range = range; 3476 3477 mip->mi_callbacks->mc_propinfo(mip->mi_driver, name, id, 3478 (mac_prop_info_handle_t)&state); 3479 3480 if (state.pr_flags & MAC_PROP_INFO_RANGE) 3481 range->mpr_count = state.pr_range_cur_count; 3482 3483 /* 3484 * The operation could fail if the buffer supplied by 3485 * the user was too small for the range or default 3486 * value of the property. 3487 */ 3488 if (state.pr_errno != 0) 3489 return (state.pr_errno); 3490 3491 if (perm != NULL && state.pr_flags & MAC_PROP_INFO_PERM) 3492 *perm = state.pr_perm; 3493 } 3494 3495 /* 3496 * The MAC layer may want to provide default values or allowed 3497 * ranges for properties if the driver does not provide a 3498 * property info entry point, or that entry point exists, but 3499 * it did not provide a default value or allowed ranges for 3500 * that property. 3501 */ 3502 switch (id) { 3503 case MAC_PROP_MTU: { 3504 uint32_t sdu; 3505 3506 mac_sdu_get2(mh, NULL, &sdu, NULL); 3507 3508 if (range != NULL && !(state.pr_flags & 3509 MAC_PROP_INFO_RANGE)) { 3510 /* MTU range */ 3511 _mac_set_range(range, sdu, sdu); 3512 } 3513 3514 if (default_val != NULL && !(state.pr_flags & 3515 MAC_PROP_INFO_DEFAULT)) { 3516 if (mip->mi_info.mi_media == DL_ETHER) 3517 sdu = ETHERMTU; 3518 /* default MTU value */ 3519 bcopy(&sdu, default_val, sizeof (sdu)); 3520 } 3521 } 3522 } 3523 3524 return (0); 3525 } 3526 3527 int 3528 mac_fastpath_disable(mac_handle_t mh) 3529 { 3530 mac_impl_t *mip = (mac_impl_t *)mh; 3531 3532 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3533 return (0); 3534 3535 return (mip->mi_capab_legacy.ml_fastpath_disable(mip->mi_driver)); 3536 } 3537 3538 void 3539 mac_fastpath_enable(mac_handle_t mh) 3540 { 3541 mac_impl_t *mip = (mac_impl_t *)mh; 3542 3543 if ((mip->mi_state_flags & MIS_LEGACY) == 0) 3544 return; 3545 3546 mip->mi_capab_legacy.ml_fastpath_enable(mip->mi_driver); 3547 } 3548 3549 void 3550 mac_register_priv_prop(mac_impl_t *mip, char **priv_props) 3551 { 3552 uint_t nprops, i; 3553 3554 if (priv_props == NULL) 3555 return; 3556 3557 nprops = 0; 3558 while (priv_props[nprops] != NULL) 3559 nprops++; 3560 if (nprops == 0) 3561 return; 3562 3563 3564 mip->mi_priv_prop = kmem_zalloc(nprops * sizeof (char *), KM_SLEEP); 3565 3566 for (i = 0; i < nprops; i++) { 3567 mip->mi_priv_prop[i] = kmem_zalloc(MAXLINKPROPNAME, KM_SLEEP); 3568 (void) strlcpy(mip->mi_priv_prop[i], priv_props[i], 3569 MAXLINKPROPNAME); 3570 } 3571 3572 mip->mi_priv_prop_count = nprops; 3573 } 3574 3575 void 3576 mac_unregister_priv_prop(mac_impl_t *mip) 3577 { 3578 uint_t i; 3579 3580 if (mip->mi_priv_prop_count == 0) { 3581 ASSERT(mip->mi_priv_prop == NULL); 3582 return; 3583 } 3584 3585 for (i = 0; i < mip->mi_priv_prop_count; i++) 3586 kmem_free(mip->mi_priv_prop[i], MAXLINKPROPNAME); 3587 kmem_free(mip->mi_priv_prop, mip->mi_priv_prop_count * 3588 sizeof (char *)); 3589 3590 mip->mi_priv_prop = NULL; 3591 mip->mi_priv_prop_count = 0; 3592 } 3593 3594 /* 3595 * mac_ring_t 'mr' macros. Some rogue drivers may access ring structure 3596 * (by invoking mac_rx()) even after processing mac_stop_ring(). In such 3597 * cases if MAC free's the ring structure after mac_stop_ring(), any 3598 * illegal access to the ring structure coming from the driver will panic 3599 * the system. In order to protect the system from such inadverent access, 3600 * we maintain a cache of rings in the mac_impl_t after they get free'd up. 3601 * When packets are received on free'd up rings, MAC (through the generation 3602 * count mechanism) will drop such packets. 3603 */ 3604 static mac_ring_t * 3605 mac_ring_alloc(mac_impl_t *mip) 3606 { 3607 mac_ring_t *ring; 3608 3609 mutex_enter(&mip->mi_ring_lock); 3610 if (mip->mi_ring_freelist != NULL) { 3611 ring = mip->mi_ring_freelist; 3612 mip->mi_ring_freelist = ring->mr_next; 3613 bzero(ring, sizeof (mac_ring_t)); 3614 mutex_exit(&mip->mi_ring_lock); 3615 } else { 3616 mutex_exit(&mip->mi_ring_lock); 3617 ring = kmem_cache_alloc(mac_ring_cache, KM_SLEEP); 3618 } 3619 ASSERT((ring != NULL) && (ring->mr_state == MR_FREE)); 3620 return (ring); 3621 } 3622 3623 static void 3624 mac_ring_free(mac_impl_t *mip, mac_ring_t *ring) 3625 { 3626 ASSERT(ring->mr_state == MR_FREE); 3627 3628 mutex_enter(&mip->mi_ring_lock); 3629 ring->mr_state = MR_FREE; 3630 ring->mr_flag = 0; 3631 ring->mr_next = mip->mi_ring_freelist; 3632 ring->mr_mip = NULL; 3633 mip->mi_ring_freelist = ring; 3634 mac_ring_stat_delete(ring); 3635 mutex_exit(&mip->mi_ring_lock); 3636 } 3637 3638 static void 3639 mac_ring_freeall(mac_impl_t *mip) 3640 { 3641 mac_ring_t *ring_next; 3642 mutex_enter(&mip->mi_ring_lock); 3643 mac_ring_t *ring = mip->mi_ring_freelist; 3644 while (ring != NULL) { 3645 ring_next = ring->mr_next; 3646 kmem_cache_free(mac_ring_cache, ring); 3647 ring = ring_next; 3648 } 3649 mip->mi_ring_freelist = NULL; 3650 mutex_exit(&mip->mi_ring_lock); 3651 } 3652 3653 int 3654 mac_start_ring(mac_ring_t *ring) 3655 { 3656 int rv = 0; 3657 3658 ASSERT(ring->mr_state == MR_FREE); 3659 3660 if (ring->mr_start != NULL) { 3661 rv = ring->mr_start(ring->mr_driver, ring->mr_gen_num); 3662 if (rv != 0) 3663 return (rv); 3664 } 3665 3666 ring->mr_state = MR_INUSE; 3667 return (rv); 3668 } 3669 3670 void 3671 mac_stop_ring(mac_ring_t *ring) 3672 { 3673 ASSERT(ring->mr_state == MR_INUSE); 3674 3675 if (ring->mr_stop != NULL) 3676 ring->mr_stop(ring->mr_driver); 3677 3678 ring->mr_state = MR_FREE; 3679 3680 /* 3681 * Increment the ring generation number for this ring. 3682 */ 3683 ring->mr_gen_num++; 3684 } 3685 3686 int 3687 mac_start_group(mac_group_t *group) 3688 { 3689 int rv = 0; 3690 3691 if (group->mrg_start != NULL) 3692 rv = group->mrg_start(group->mrg_driver); 3693 3694 return (rv); 3695 } 3696 3697 void 3698 mac_stop_group(mac_group_t *group) 3699 { 3700 if (group->mrg_stop != NULL) 3701 group->mrg_stop(group->mrg_driver); 3702 } 3703 3704 /* 3705 * Called from mac_start() on the default Rx group. Broadcast and multicast 3706 * packets are received only on the default group. Hence the default group 3707 * needs to be up even if the primary client is not up, for the other groups 3708 * to be functional. We do this by calling this function at mac_start time 3709 * itself. However the broadcast packets that are received can't make their 3710 * way beyond mac_rx until a mac client creates a broadcast flow. 3711 */ 3712 static int 3713 mac_start_group_and_rings(mac_group_t *group) 3714 { 3715 mac_ring_t *ring; 3716 int rv = 0; 3717 3718 ASSERT(group->mrg_state == MAC_GROUP_STATE_REGISTERED); 3719 if ((rv = mac_start_group(group)) != 0) 3720 return (rv); 3721 3722 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 3723 ASSERT(ring->mr_state == MR_FREE); 3724 if ((rv = mac_start_ring(ring)) != 0) 3725 goto error; 3726 ring->mr_classify_type = MAC_SW_CLASSIFIER; 3727 } 3728 return (0); 3729 3730 error: 3731 mac_stop_group_and_rings(group); 3732 return (rv); 3733 } 3734 3735 /* Called from mac_stop on the default Rx group */ 3736 static void 3737 mac_stop_group_and_rings(mac_group_t *group) 3738 { 3739 mac_ring_t *ring; 3740 3741 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 3742 if (ring->mr_state != MR_FREE) { 3743 mac_stop_ring(ring); 3744 ring->mr_flag = 0; 3745 ring->mr_classify_type = MAC_NO_CLASSIFIER; 3746 } 3747 } 3748 mac_stop_group(group); 3749 } 3750 3751 3752 static mac_ring_t * 3753 mac_init_ring(mac_impl_t *mip, mac_group_t *group, int index, 3754 mac_capab_rings_t *cap_rings) 3755 { 3756 mac_ring_t *ring, *rnext; 3757 mac_ring_info_t ring_info; 3758 ddi_intr_handle_t ddi_handle; 3759 3760 ring = mac_ring_alloc(mip); 3761 3762 /* Prepare basic information of ring */ 3763 3764 /* 3765 * Ring index is numbered to be unique across a particular device. 3766 * Ring index computation makes following assumptions: 3767 * - For drivers with static grouping (e.g. ixgbe, bge), 3768 * ring index exchanged with the driver (e.g. during mr_rget) 3769 * is unique only across the group the ring belongs to. 3770 * - Drivers with dynamic grouping (e.g. nxge), start 3771 * with single group (mrg_index = 0). 3772 */ 3773 ring->mr_index = group->mrg_index * group->mrg_info.mgi_count + index; 3774 ring->mr_type = group->mrg_type; 3775 ring->mr_gh = (mac_group_handle_t)group; 3776 3777 /* Insert the new ring to the list. */ 3778 ring->mr_next = group->mrg_rings; 3779 group->mrg_rings = ring; 3780 3781 /* Zero to reuse the info data structure */ 3782 bzero(&ring_info, sizeof (ring_info)); 3783 3784 /* Query ring information from driver */ 3785 cap_rings->mr_rget(mip->mi_driver, group->mrg_type, group->mrg_index, 3786 index, &ring_info, (mac_ring_handle_t)ring); 3787 3788 ring->mr_info = ring_info; 3789 3790 /* 3791 * The interrupt handle could be shared among multiple rings. 3792 * Thus if there is a bunch of rings that are sharing an 3793 * interrupt, then only one ring among the bunch will be made 3794 * available for interrupt re-targeting; the rest will have 3795 * ddi_shared flag set to TRUE and would not be available for 3796 * be interrupt re-targeting. 3797 */ 3798 if ((ddi_handle = ring_info.mri_intr.mi_ddi_handle) != NULL) { 3799 rnext = ring->mr_next; 3800 while (rnext != NULL) { 3801 if (rnext->mr_info.mri_intr.mi_ddi_handle == 3802 ddi_handle) { 3803 /* 3804 * If default ring (mr_index == 0) is part 3805 * of a group of rings sharing an 3806 * interrupt, then set ddi_shared flag for 3807 * the default ring and give another ring 3808 * the chance to be re-targeted. 3809 */ 3810 if (rnext->mr_index == 0 && 3811 !rnext->mr_info.mri_intr.mi_ddi_shared) { 3812 rnext->mr_info.mri_intr.mi_ddi_shared = 3813 B_TRUE; 3814 } else { 3815 ring->mr_info.mri_intr.mi_ddi_shared = 3816 B_TRUE; 3817 } 3818 break; 3819 } 3820 rnext = rnext->mr_next; 3821 } 3822 /* 3823 * If rnext is NULL, then no matching ddi_handle was found. 3824 * Rx rings get registered first. So if this is a Tx ring, 3825 * then go through all the Rx rings and see if there is a 3826 * matching ddi handle. 3827 */ 3828 if (rnext == NULL && ring->mr_type == MAC_RING_TYPE_TX) { 3829 mac_compare_ddi_handle(mip->mi_rx_groups, 3830 mip->mi_rx_group_count, ring); 3831 } 3832 } 3833 3834 /* Update ring's status */ 3835 ring->mr_state = MR_FREE; 3836 ring->mr_flag = 0; 3837 3838 /* Update the ring count of the group */ 3839 group->mrg_cur_count++; 3840 3841 /* Create per ring kstats */ 3842 if (ring->mr_stat != NULL) { 3843 ring->mr_mip = mip; 3844 mac_ring_stat_create(ring); 3845 } 3846 3847 return (ring); 3848 } 3849 3850 /* 3851 * Rings are chained together for easy regrouping. 3852 */ 3853 static void 3854 mac_init_group(mac_impl_t *mip, mac_group_t *group, int size, 3855 mac_capab_rings_t *cap_rings) 3856 { 3857 int index; 3858 3859 /* 3860 * Initialize all ring members of this group. Size of zero will not 3861 * enter the loop, so it's safe for initializing an empty group. 3862 */ 3863 for (index = size - 1; index >= 0; index--) 3864 (void) mac_init_ring(mip, group, index, cap_rings); 3865 } 3866 3867 int 3868 mac_init_rings(mac_impl_t *mip, mac_ring_type_t rtype) 3869 { 3870 mac_capab_rings_t *cap_rings; 3871 mac_group_t *group; 3872 mac_group_t *groups; 3873 mac_group_info_t group_info; 3874 uint_t group_free = 0; 3875 uint_t ring_left; 3876 mac_ring_t *ring; 3877 int g; 3878 int err = 0; 3879 uint_t grpcnt; 3880 boolean_t pseudo_txgrp = B_FALSE; 3881 3882 switch (rtype) { 3883 case MAC_RING_TYPE_RX: 3884 ASSERT(mip->mi_rx_groups == NULL); 3885 3886 cap_rings = &mip->mi_rx_rings_cap; 3887 cap_rings->mr_type = MAC_RING_TYPE_RX; 3888 break; 3889 case MAC_RING_TYPE_TX: 3890 ASSERT(mip->mi_tx_groups == NULL); 3891 3892 cap_rings = &mip->mi_tx_rings_cap; 3893 cap_rings->mr_type = MAC_RING_TYPE_TX; 3894 break; 3895 default: 3896 ASSERT(B_FALSE); 3897 } 3898 3899 if (!i_mac_capab_get((mac_handle_t)mip, MAC_CAPAB_RINGS, cap_rings)) 3900 return (0); 3901 grpcnt = cap_rings->mr_gnum; 3902 3903 /* 3904 * If we have multiple TX rings, but only one TX group, we can 3905 * create pseudo TX groups (one per TX ring) in the MAC layer, 3906 * except for an aggr. For an aggr currently we maintain only 3907 * one group with all the rings (for all its ports), going 3908 * forwards we might change this. 3909 */ 3910 if (rtype == MAC_RING_TYPE_TX && 3911 cap_rings->mr_gnum == 0 && cap_rings->mr_rnum > 0 && 3912 (mip->mi_state_flags & MIS_IS_AGGR) == 0) { 3913 /* 3914 * The -1 here is because we create a default TX group 3915 * with all the rings in it. 3916 */ 3917 grpcnt = cap_rings->mr_rnum - 1; 3918 pseudo_txgrp = B_TRUE; 3919 } 3920 3921 /* 3922 * Allocate a contiguous buffer for all groups. 3923 */ 3924 groups = kmem_zalloc(sizeof (mac_group_t) * (grpcnt+ 1), KM_SLEEP); 3925 3926 ring_left = cap_rings->mr_rnum; 3927 3928 /* 3929 * Get all ring groups if any, and get their ring members 3930 * if any. 3931 */ 3932 for (g = 0; g < grpcnt; g++) { 3933 group = groups + g; 3934 3935 /* Prepare basic information of the group */ 3936 group->mrg_index = g; 3937 group->mrg_type = rtype; 3938 group->mrg_state = MAC_GROUP_STATE_UNINIT; 3939 group->mrg_mh = (mac_handle_t)mip; 3940 group->mrg_next = group + 1; 3941 3942 /* Zero to reuse the info data structure */ 3943 bzero(&group_info, sizeof (group_info)); 3944 3945 if (pseudo_txgrp) { 3946 /* 3947 * This is a pseudo group that we created, apart 3948 * from setting the state there is nothing to be 3949 * done. 3950 */ 3951 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 3952 group_free++; 3953 continue; 3954 } 3955 /* Query group information from driver */ 3956 cap_rings->mr_gget(mip->mi_driver, rtype, g, &group_info, 3957 (mac_group_handle_t)group); 3958 3959 switch (cap_rings->mr_group_type) { 3960 case MAC_GROUP_TYPE_DYNAMIC: 3961 if (cap_rings->mr_gaddring == NULL || 3962 cap_rings->mr_gremring == NULL) { 3963 DTRACE_PROBE3( 3964 mac__init__rings_no_addremring, 3965 char *, mip->mi_name, 3966 mac_group_add_ring_t, 3967 cap_rings->mr_gaddring, 3968 mac_group_add_ring_t, 3969 cap_rings->mr_gremring); 3970 err = EINVAL; 3971 goto bail; 3972 } 3973 3974 switch (rtype) { 3975 case MAC_RING_TYPE_RX: 3976 /* 3977 * The first RX group must have non-zero 3978 * rings, and the following groups must 3979 * have zero rings. 3980 */ 3981 if (g == 0 && group_info.mgi_count == 0) { 3982 DTRACE_PROBE1( 3983 mac__init__rings__rx__def__zero, 3984 char *, mip->mi_name); 3985 err = EINVAL; 3986 goto bail; 3987 } 3988 if (g > 0 && group_info.mgi_count != 0) { 3989 DTRACE_PROBE3( 3990 mac__init__rings__rx__nonzero, 3991 char *, mip->mi_name, 3992 int, g, int, group_info.mgi_count); 3993 err = EINVAL; 3994 goto bail; 3995 } 3996 break; 3997 case MAC_RING_TYPE_TX: 3998 /* 3999 * All TX ring groups must have zero rings. 4000 */ 4001 if (group_info.mgi_count != 0) { 4002 DTRACE_PROBE3( 4003 mac__init__rings__tx__nonzero, 4004 char *, mip->mi_name, 4005 int, g, int, group_info.mgi_count); 4006 err = EINVAL; 4007 goto bail; 4008 } 4009 break; 4010 } 4011 break; 4012 case MAC_GROUP_TYPE_STATIC: 4013 /* 4014 * Note that an empty group is allowed, e.g., an aggr 4015 * would start with an empty group. 4016 */ 4017 break; 4018 default: 4019 /* unknown group type */ 4020 DTRACE_PROBE2(mac__init__rings__unknown__type, 4021 char *, mip->mi_name, 4022 int, cap_rings->mr_group_type); 4023 err = EINVAL; 4024 goto bail; 4025 } 4026 4027 4028 /* 4029 * Driver must register group->mgi_addmac/remmac() for rx groups 4030 * to support multiple MAC addresses. 4031 */ 4032 if (rtype == MAC_RING_TYPE_RX) { 4033 if ((group_info.mgi_addmac == NULL) || 4034 (group_info.mgi_addmac == NULL)) { 4035 goto bail; 4036 } 4037 } 4038 4039 /* Cache driver-supplied information */ 4040 group->mrg_info = group_info; 4041 4042 /* Update the group's status and group count. */ 4043 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 4044 group_free++; 4045 4046 group->mrg_rings = NULL; 4047 group->mrg_cur_count = 0; 4048 mac_init_group(mip, group, group_info.mgi_count, cap_rings); 4049 ring_left -= group_info.mgi_count; 4050 4051 /* The current group size should be equal to default value */ 4052 ASSERT(group->mrg_cur_count == group_info.mgi_count); 4053 } 4054 4055 /* Build up a dummy group for free resources as a pool */ 4056 group = groups + grpcnt; 4057 4058 /* Prepare basic information of the group */ 4059 group->mrg_index = -1; 4060 group->mrg_type = rtype; 4061 group->mrg_state = MAC_GROUP_STATE_UNINIT; 4062 group->mrg_mh = (mac_handle_t)mip; 4063 group->mrg_next = NULL; 4064 4065 /* 4066 * If there are ungrouped rings, allocate a continuous buffer for 4067 * remaining resources. 4068 */ 4069 if (ring_left != 0) { 4070 group->mrg_rings = NULL; 4071 group->mrg_cur_count = 0; 4072 mac_init_group(mip, group, ring_left, cap_rings); 4073 4074 /* The current group size should be equal to ring_left */ 4075 ASSERT(group->mrg_cur_count == ring_left); 4076 4077 ring_left = 0; 4078 4079 /* Update this group's status */ 4080 mac_set_group_state(group, MAC_GROUP_STATE_REGISTERED); 4081 } else 4082 group->mrg_rings = NULL; 4083 4084 ASSERT(ring_left == 0); 4085 4086 bail: 4087 4088 /* Cache other important information to finalize the initialization */ 4089 switch (rtype) { 4090 case MAC_RING_TYPE_RX: 4091 mip->mi_rx_group_type = cap_rings->mr_group_type; 4092 mip->mi_rx_group_count = cap_rings->mr_gnum; 4093 mip->mi_rx_groups = groups; 4094 mip->mi_rx_donor_grp = groups; 4095 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 4096 /* 4097 * The default ring is reserved since it is 4098 * used for sending the broadcast etc. packets. 4099 */ 4100 mip->mi_rxrings_avail = 4101 mip->mi_rx_groups->mrg_cur_count - 1; 4102 mip->mi_rxrings_rsvd = 1; 4103 } 4104 /* 4105 * The default group cannot be reserved. It is used by 4106 * all the clients that do not have an exclusive group. 4107 */ 4108 mip->mi_rxhwclnt_avail = mip->mi_rx_group_count - 1; 4109 mip->mi_rxhwclnt_used = 1; 4110 break; 4111 case MAC_RING_TYPE_TX: 4112 mip->mi_tx_group_type = pseudo_txgrp ? MAC_GROUP_TYPE_DYNAMIC : 4113 cap_rings->mr_group_type; 4114 mip->mi_tx_group_count = grpcnt; 4115 mip->mi_tx_group_free = group_free; 4116 mip->mi_tx_groups = groups; 4117 4118 group = groups + grpcnt; 4119 ring = group->mrg_rings; 4120 /* 4121 * The ring can be NULL in the case of aggr. Aggr will 4122 * have an empty Tx group which will get populated 4123 * later when pseudo Tx rings are added after 4124 * mac_register() is done. 4125 */ 4126 if (ring == NULL) { 4127 ASSERT(mip->mi_state_flags & MIS_IS_AGGR); 4128 /* 4129 * pass the group to aggr so it can add Tx 4130 * rings to the group later. 4131 */ 4132 cap_rings->mr_gget(mip->mi_driver, rtype, 0, NULL, 4133 (mac_group_handle_t)group); 4134 /* 4135 * Even though there are no rings at this time 4136 * (rings will come later), set the group 4137 * state to registered. 4138 */ 4139 group->mrg_state = MAC_GROUP_STATE_REGISTERED; 4140 } else { 4141 /* 4142 * Ring 0 is used as the default one and it could be 4143 * assigned to a client as well. 4144 */ 4145 while ((ring->mr_index != 0) && (ring->mr_next != NULL)) 4146 ring = ring->mr_next; 4147 ASSERT(ring->mr_index == 0); 4148 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4149 } 4150 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) 4151 mip->mi_txrings_avail = group->mrg_cur_count - 1; 4152 /* 4153 * The default ring cannot be reserved. 4154 */ 4155 mip->mi_txrings_rsvd = 1; 4156 /* 4157 * The default group cannot be reserved. It will be shared 4158 * by clients that do not have an exclusive group. 4159 */ 4160 mip->mi_txhwclnt_avail = mip->mi_tx_group_count; 4161 mip->mi_txhwclnt_used = 1; 4162 break; 4163 default: 4164 ASSERT(B_FALSE); 4165 } 4166 4167 if (err != 0) 4168 mac_free_rings(mip, rtype); 4169 4170 return (err); 4171 } 4172 4173 /* 4174 * The ddi interrupt handle could be shared amoung rings. If so, compare 4175 * the new ring's ddi handle with the existing ones and set ddi_shared 4176 * flag. 4177 */ 4178 void 4179 mac_compare_ddi_handle(mac_group_t *groups, uint_t grpcnt, mac_ring_t *cring) 4180 { 4181 mac_group_t *group; 4182 mac_ring_t *ring; 4183 ddi_intr_handle_t ddi_handle; 4184 int g; 4185 4186 ddi_handle = cring->mr_info.mri_intr.mi_ddi_handle; 4187 for (g = 0; g < grpcnt; g++) { 4188 group = groups + g; 4189 for (ring = group->mrg_rings; ring != NULL; 4190 ring = ring->mr_next) { 4191 if (ring == cring) 4192 continue; 4193 if (ring->mr_info.mri_intr.mi_ddi_handle == 4194 ddi_handle) { 4195 if (cring->mr_type == MAC_RING_TYPE_RX && 4196 ring->mr_index == 0 && 4197 !ring->mr_info.mri_intr.mi_ddi_shared) { 4198 ring->mr_info.mri_intr.mi_ddi_shared = 4199 B_TRUE; 4200 } else { 4201 cring->mr_info.mri_intr.mi_ddi_shared = 4202 B_TRUE; 4203 } 4204 return; 4205 } 4206 } 4207 } 4208 } 4209 4210 /* 4211 * Called to free all groups of particular type (RX or TX). It's assumed that 4212 * no clients are using these groups. 4213 */ 4214 void 4215 mac_free_rings(mac_impl_t *mip, mac_ring_type_t rtype) 4216 { 4217 mac_group_t *group, *groups; 4218 uint_t group_count; 4219 4220 switch (rtype) { 4221 case MAC_RING_TYPE_RX: 4222 if (mip->mi_rx_groups == NULL) 4223 return; 4224 4225 groups = mip->mi_rx_groups; 4226 group_count = mip->mi_rx_group_count; 4227 4228 mip->mi_rx_groups = NULL; 4229 mip->mi_rx_donor_grp = NULL; 4230 mip->mi_rx_group_count = 0; 4231 break; 4232 case MAC_RING_TYPE_TX: 4233 ASSERT(mip->mi_tx_group_count == mip->mi_tx_group_free); 4234 4235 if (mip->mi_tx_groups == NULL) 4236 return; 4237 4238 groups = mip->mi_tx_groups; 4239 group_count = mip->mi_tx_group_count; 4240 4241 mip->mi_tx_groups = NULL; 4242 mip->mi_tx_group_count = 0; 4243 mip->mi_tx_group_free = 0; 4244 mip->mi_default_tx_ring = NULL; 4245 break; 4246 default: 4247 ASSERT(B_FALSE); 4248 } 4249 4250 for (group = groups; group != NULL; group = group->mrg_next) { 4251 mac_ring_t *ring; 4252 4253 if (group->mrg_cur_count == 0) 4254 continue; 4255 4256 ASSERT(group->mrg_rings != NULL); 4257 4258 while ((ring = group->mrg_rings) != NULL) { 4259 group->mrg_rings = ring->mr_next; 4260 mac_ring_free(mip, ring); 4261 } 4262 } 4263 4264 /* Free all the cached rings */ 4265 mac_ring_freeall(mip); 4266 /* Free the block of group data strutures */ 4267 kmem_free(groups, sizeof (mac_group_t) * (group_count + 1)); 4268 } 4269 4270 /* 4271 * Associate a MAC address with a receive group. 4272 * 4273 * The return value of this function should always be checked properly, because 4274 * any type of failure could cause unexpected results. A group can be added 4275 * or removed with a MAC address only after it has been reserved. Ideally, 4276 * a successful reservation always leads to calling mac_group_addmac() to 4277 * steer desired traffic. Failure of adding an unicast MAC address doesn't 4278 * always imply that the group is functioning abnormally. 4279 * 4280 * Currently this function is called everywhere, and it reflects assumptions 4281 * about MAC addresses in the implementation. CR 6735196. 4282 */ 4283 int 4284 mac_group_addmac(mac_group_t *group, const uint8_t *addr) 4285 { 4286 ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 4287 ASSERT(group->mrg_info.mgi_addmac != NULL); 4288 4289 return (group->mrg_info.mgi_addmac(group->mrg_info.mgi_driver, addr)); 4290 } 4291 4292 /* 4293 * Remove the association between MAC address and receive group. 4294 */ 4295 int 4296 mac_group_remmac(mac_group_t *group, const uint8_t *addr) 4297 { 4298 ASSERT(group->mrg_type == MAC_RING_TYPE_RX); 4299 ASSERT(group->mrg_info.mgi_remmac != NULL); 4300 4301 return (group->mrg_info.mgi_remmac(group->mrg_info.mgi_driver, addr)); 4302 } 4303 4304 /* 4305 * This is the entry point for packets transmitted through the bridging code. 4306 * If no bridge is in place, MAC_RING_TX transmits using tx ring. The 'rh' 4307 * pointer may be NULL to select the default ring. 4308 */ 4309 mblk_t * 4310 mac_bridge_tx(mac_impl_t *mip, mac_ring_handle_t rh, mblk_t *mp) 4311 { 4312 mac_handle_t mh; 4313 4314 /* 4315 * Once we take a reference on the bridge link, the bridge 4316 * module itself can't unload, so the callback pointers are 4317 * stable. 4318 */ 4319 mutex_enter(&mip->mi_bridge_lock); 4320 if ((mh = mip->mi_bridge_link) != NULL) 4321 mac_bridge_ref_cb(mh, B_TRUE); 4322 mutex_exit(&mip->mi_bridge_lock); 4323 if (mh == NULL) { 4324 MAC_RING_TX(mip, rh, mp, mp); 4325 } else { 4326 mp = mac_bridge_tx_cb(mh, rh, mp); 4327 mac_bridge_ref_cb(mh, B_FALSE); 4328 } 4329 4330 return (mp); 4331 } 4332 4333 /* 4334 * Find a ring from its index. 4335 */ 4336 mac_ring_handle_t 4337 mac_find_ring(mac_group_handle_t gh, int index) 4338 { 4339 mac_group_t *group = (mac_group_t *)gh; 4340 mac_ring_t *ring = group->mrg_rings; 4341 4342 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) 4343 if (ring->mr_index == index) 4344 break; 4345 4346 return ((mac_ring_handle_t)ring); 4347 } 4348 /* 4349 * Add a ring to an existing group. 4350 * 4351 * The ring must be either passed directly (for example if the ring 4352 * movement is initiated by the framework), or specified through a driver 4353 * index (for example when the ring is added by the driver. 4354 * 4355 * The caller needs to call mac_perim_enter() before calling this function. 4356 */ 4357 int 4358 i_mac_group_add_ring(mac_group_t *group, mac_ring_t *ring, int index) 4359 { 4360 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4361 mac_capab_rings_t *cap_rings; 4362 boolean_t driver_call = (ring == NULL); 4363 mac_group_type_t group_type; 4364 int ret = 0; 4365 flow_entry_t *flent; 4366 4367 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4368 4369 switch (group->mrg_type) { 4370 case MAC_RING_TYPE_RX: 4371 cap_rings = &mip->mi_rx_rings_cap; 4372 group_type = mip->mi_rx_group_type; 4373 break; 4374 case MAC_RING_TYPE_TX: 4375 cap_rings = &mip->mi_tx_rings_cap; 4376 group_type = mip->mi_tx_group_type; 4377 break; 4378 default: 4379 ASSERT(B_FALSE); 4380 } 4381 4382 /* 4383 * There should be no ring with the same ring index in the target 4384 * group. 4385 */ 4386 ASSERT(mac_find_ring((mac_group_handle_t)group, 4387 driver_call ? index : ring->mr_index) == NULL); 4388 4389 if (driver_call) { 4390 /* 4391 * The function is called as a result of a request from 4392 * a driver to add a ring to an existing group, for example 4393 * from the aggregation driver. Allocate a new mac_ring_t 4394 * for that ring. 4395 */ 4396 ring = mac_init_ring(mip, group, index, cap_rings); 4397 ASSERT(group->mrg_state > MAC_GROUP_STATE_UNINIT); 4398 } else { 4399 /* 4400 * The function is called as a result of a MAC layer request 4401 * to add a ring to an existing group. In this case the 4402 * ring is being moved between groups, which requires 4403 * the underlying driver to support dynamic grouping, 4404 * and the mac_ring_t already exists. 4405 */ 4406 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4407 ASSERT(group->mrg_driver == NULL || 4408 cap_rings->mr_gaddring != NULL); 4409 ASSERT(ring->mr_gh == NULL); 4410 } 4411 4412 /* 4413 * At this point the ring should not be in use, and it should be 4414 * of the right for the target group. 4415 */ 4416 ASSERT(ring->mr_state < MR_INUSE); 4417 ASSERT(ring->mr_srs == NULL); 4418 ASSERT(ring->mr_type == group->mrg_type); 4419 4420 if (!driver_call) { 4421 /* 4422 * Add the driver level hardware ring if the process was not 4423 * initiated by the driver, and the target group is not the 4424 * group. 4425 */ 4426 if (group->mrg_driver != NULL) { 4427 cap_rings->mr_gaddring(group->mrg_driver, 4428 ring->mr_driver, ring->mr_type); 4429 } 4430 4431 /* 4432 * Insert the ring ahead existing rings. 4433 */ 4434 ring->mr_next = group->mrg_rings; 4435 group->mrg_rings = ring; 4436 ring->mr_gh = (mac_group_handle_t)group; 4437 group->mrg_cur_count++; 4438 } 4439 4440 /* 4441 * If the group has not been actively used, we're done. 4442 */ 4443 if (group->mrg_index != -1 && 4444 group->mrg_state < MAC_GROUP_STATE_RESERVED) 4445 return (0); 4446 4447 /* 4448 * Start the ring if needed. Failure causes to undo the grouping action. 4449 */ 4450 if (ring->mr_state != MR_INUSE) { 4451 if ((ret = mac_start_ring(ring)) != 0) { 4452 if (!driver_call) { 4453 cap_rings->mr_gremring(group->mrg_driver, 4454 ring->mr_driver, ring->mr_type); 4455 } 4456 group->mrg_cur_count--; 4457 group->mrg_rings = ring->mr_next; 4458 4459 ring->mr_gh = NULL; 4460 4461 if (driver_call) 4462 mac_ring_free(mip, ring); 4463 4464 return (ret); 4465 } 4466 } 4467 4468 /* 4469 * Set up SRS/SR according to the ring type. 4470 */ 4471 switch (ring->mr_type) { 4472 case MAC_RING_TYPE_RX: 4473 /* 4474 * Setup SRS on top of the new ring if the group is 4475 * reserved for someones exclusive use. 4476 */ 4477 if (group->mrg_state == MAC_GROUP_STATE_RESERVED) { 4478 mac_client_impl_t *mcip; 4479 4480 mcip = MAC_GROUP_ONLY_CLIENT(group); 4481 /* 4482 * Even though this group is reserved we migth still 4483 * have multiple clients, i.e a VLAN shares the 4484 * group with the primary mac client. 4485 */ 4486 if (mcip != NULL) { 4487 flent = mcip->mci_flent; 4488 ASSERT(flent->fe_rx_srs_cnt > 0); 4489 mac_rx_srs_group_setup(mcip, flent, SRST_LINK); 4490 mac_fanout_setup(mcip, flent, 4491 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, 4492 mcip, NULL, NULL); 4493 } else { 4494 ring->mr_classify_type = MAC_SW_CLASSIFIER; 4495 } 4496 } 4497 break; 4498 case MAC_RING_TYPE_TX: 4499 { 4500 mac_grp_client_t *mgcp = group->mrg_clients; 4501 mac_client_impl_t *mcip; 4502 mac_soft_ring_set_t *mac_srs; 4503 mac_srs_tx_t *tx; 4504 4505 if (MAC_GROUP_NO_CLIENT(group)) { 4506 if (ring->mr_state == MR_INUSE) 4507 mac_stop_ring(ring); 4508 ring->mr_flag = 0; 4509 break; 4510 } 4511 /* 4512 * If the rings are being moved to a group that has 4513 * clients using it, then add the new rings to the 4514 * clients SRS. 4515 */ 4516 while (mgcp != NULL) { 4517 boolean_t is_aggr; 4518 4519 mcip = mgcp->mgc_client; 4520 flent = mcip->mci_flent; 4521 is_aggr = (mcip->mci_state_flags & MCIS_IS_AGGR); 4522 mac_srs = MCIP_TX_SRS(mcip); 4523 tx = &mac_srs->srs_tx; 4524 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4525 /* 4526 * If we are growing from 1 to multiple rings. 4527 */ 4528 if (tx->st_mode == SRS_TX_BW || 4529 tx->st_mode == SRS_TX_SERIALIZE || 4530 tx->st_mode == SRS_TX_DEFAULT) { 4531 mac_ring_t *tx_ring = tx->st_arg2; 4532 4533 tx->st_arg2 = NULL; 4534 mac_tx_srs_stat_recreate(mac_srs, B_TRUE); 4535 mac_tx_srs_add_ring(mac_srs, tx_ring); 4536 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4537 tx->st_mode = is_aggr ? SRS_TX_BW_AGGR : 4538 SRS_TX_BW_FANOUT; 4539 } else { 4540 tx->st_mode = is_aggr ? SRS_TX_AGGR : 4541 SRS_TX_FANOUT; 4542 } 4543 tx->st_func = mac_tx_get_func(tx->st_mode); 4544 } 4545 mac_tx_srs_add_ring(mac_srs, ring); 4546 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 4547 mac_rx_deliver, mcip, NULL, NULL); 4548 mac_tx_client_restart((mac_client_handle_t)mcip); 4549 mgcp = mgcp->mgc_next; 4550 } 4551 break; 4552 } 4553 default: 4554 ASSERT(B_FALSE); 4555 } 4556 /* 4557 * For aggr, the default ring will be NULL to begin with. If it 4558 * is NULL, then pick the first ring that gets added as the 4559 * default ring. Any ring in an aggregation can be removed at 4560 * any time (by the user action of removing a link) and if the 4561 * current default ring gets removed, then a new one gets 4562 * picked (see i_mac_group_rem_ring()). 4563 */ 4564 if (mip->mi_state_flags & MIS_IS_AGGR && 4565 mip->mi_default_tx_ring == NULL && 4566 ring->mr_type == MAC_RING_TYPE_TX) { 4567 mip->mi_default_tx_ring = (mac_ring_handle_t)ring; 4568 } 4569 4570 MAC_RING_UNMARK(ring, MR_INCIPIENT); 4571 return (0); 4572 } 4573 4574 /* 4575 * Remove a ring from it's current group. MAC internal function for dynamic 4576 * grouping. 4577 * 4578 * The caller needs to call mac_perim_enter() before calling this function. 4579 */ 4580 void 4581 i_mac_group_rem_ring(mac_group_t *group, mac_ring_t *ring, 4582 boolean_t driver_call) 4583 { 4584 mac_impl_t *mip = (mac_impl_t *)group->mrg_mh; 4585 mac_capab_rings_t *cap_rings = NULL; 4586 mac_group_type_t group_type; 4587 4588 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4589 4590 ASSERT(mac_find_ring((mac_group_handle_t)group, 4591 ring->mr_index) == (mac_ring_handle_t)ring); 4592 ASSERT((mac_group_t *)ring->mr_gh == group); 4593 ASSERT(ring->mr_type == group->mrg_type); 4594 4595 if (ring->mr_state == MR_INUSE) 4596 mac_stop_ring(ring); 4597 switch (ring->mr_type) { 4598 case MAC_RING_TYPE_RX: 4599 group_type = mip->mi_rx_group_type; 4600 cap_rings = &mip->mi_rx_rings_cap; 4601 4602 /* 4603 * Only hardware classified packets hold a reference to the 4604 * ring all the way up the Rx path. mac_rx_srs_remove() 4605 * will take care of quiescing the Rx path and removing the 4606 * SRS. The software classified path neither holds a reference 4607 * nor any association with the ring in mac_rx. 4608 */ 4609 if (ring->mr_srs != NULL) { 4610 mac_rx_srs_remove(ring->mr_srs); 4611 ring->mr_srs = NULL; 4612 } 4613 4614 break; 4615 case MAC_RING_TYPE_TX: 4616 { 4617 mac_grp_client_t *mgcp; 4618 mac_client_impl_t *mcip; 4619 mac_soft_ring_set_t *mac_srs; 4620 mac_srs_tx_t *tx; 4621 mac_ring_t *rem_ring; 4622 mac_group_t *defgrp; 4623 uint_t ring_info = 0; 4624 4625 /* 4626 * For TX this function is invoked in three 4627 * cases: 4628 * 4629 * 1) In the case of a failure during the 4630 * initial creation of a group when a share is 4631 * associated with a MAC client. So the SRS is not 4632 * yet setup, and will be setup later after the 4633 * group has been reserved and populated. 4634 * 4635 * 2) From mac_release_tx_group() when freeing 4636 * a TX SRS. 4637 * 4638 * 3) In the case of aggr, when a port gets removed, 4639 * the pseudo Tx rings that it exposed gets removed. 4640 * 4641 * In the first two cases the SRS and its soft 4642 * rings are already quiesced. 4643 */ 4644 if (driver_call) { 4645 mac_client_impl_t *mcip; 4646 mac_soft_ring_set_t *mac_srs; 4647 mac_soft_ring_t *sringp; 4648 mac_srs_tx_t *srs_tx; 4649 4650 if (mip->mi_state_flags & MIS_IS_AGGR && 4651 mip->mi_default_tx_ring == 4652 (mac_ring_handle_t)ring) { 4653 /* pick a new default Tx ring */ 4654 mip->mi_default_tx_ring = 4655 (group->mrg_rings != ring) ? 4656 (mac_ring_handle_t)group->mrg_rings : 4657 (mac_ring_handle_t)(ring->mr_next); 4658 } 4659 /* Presently only aggr case comes here */ 4660 if (group->mrg_state != MAC_GROUP_STATE_RESERVED) 4661 break; 4662 4663 mcip = MAC_GROUP_ONLY_CLIENT(group); 4664 ASSERT(mcip != NULL); 4665 ASSERT(mcip->mci_state_flags & MCIS_IS_AGGR); 4666 mac_srs = MCIP_TX_SRS(mcip); 4667 ASSERT(mac_srs->srs_tx.st_mode == SRS_TX_AGGR || 4668 mac_srs->srs_tx.st_mode == SRS_TX_BW_AGGR); 4669 srs_tx = &mac_srs->srs_tx; 4670 /* 4671 * Wakeup any callers blocked on this 4672 * Tx ring due to flow control. 4673 */ 4674 sringp = srs_tx->st_soft_rings[ring->mr_index]; 4675 ASSERT(sringp != NULL); 4676 mac_tx_invoke_callbacks(mcip, (mac_tx_cookie_t)sringp); 4677 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4678 mac_tx_srs_del_ring(mac_srs, ring); 4679 mac_tx_client_restart((mac_client_handle_t)mcip); 4680 break; 4681 } 4682 ASSERT(ring != (mac_ring_t *)mip->mi_default_tx_ring); 4683 group_type = mip->mi_tx_group_type; 4684 cap_rings = &mip->mi_tx_rings_cap; 4685 /* 4686 * See if we need to take it out of the MAC clients using 4687 * this group 4688 */ 4689 if (MAC_GROUP_NO_CLIENT(group)) 4690 break; 4691 mgcp = group->mrg_clients; 4692 defgrp = MAC_DEFAULT_TX_GROUP(mip); 4693 while (mgcp != NULL) { 4694 mcip = mgcp->mgc_client; 4695 mac_srs = MCIP_TX_SRS(mcip); 4696 tx = &mac_srs->srs_tx; 4697 mac_tx_client_quiesce((mac_client_handle_t)mcip); 4698 /* 4699 * If we are here when removing rings from the 4700 * defgroup, mac_reserve_tx_ring would have 4701 * already deleted the ring from the MAC 4702 * clients in the group. 4703 */ 4704 if (group != defgrp) { 4705 mac_tx_invoke_callbacks(mcip, 4706 (mac_tx_cookie_t) 4707 mac_tx_srs_get_soft_ring(mac_srs, ring)); 4708 mac_tx_srs_del_ring(mac_srs, ring); 4709 } 4710 /* 4711 * Additionally, if we are left with only 4712 * one ring in the group after this, we need 4713 * to modify the mode etc. to. (We haven't 4714 * yet taken the ring out, so we check with 2). 4715 */ 4716 if (group->mrg_cur_count == 2) { 4717 if (ring->mr_next == NULL) 4718 rem_ring = group->mrg_rings; 4719 else 4720 rem_ring = ring->mr_next; 4721 mac_tx_invoke_callbacks(mcip, 4722 (mac_tx_cookie_t) 4723 mac_tx_srs_get_soft_ring(mac_srs, 4724 rem_ring)); 4725 mac_tx_srs_del_ring(mac_srs, rem_ring); 4726 if (rem_ring->mr_state != MR_INUSE) { 4727 (void) mac_start_ring(rem_ring); 4728 } 4729 tx->st_arg2 = (void *)rem_ring; 4730 mac_tx_srs_stat_recreate(mac_srs, B_FALSE); 4731 ring_info = mac_hwring_getinfo( 4732 (mac_ring_handle_t)rem_ring); 4733 /* 4734 * We are shrinking from multiple 4735 * to 1 ring. 4736 */ 4737 if (mac_srs->srs_type & SRST_BW_CONTROL) { 4738 tx->st_mode = SRS_TX_BW; 4739 } else if (mac_tx_serialize || 4740 (ring_info & MAC_RING_TX_SERIALIZE)) { 4741 tx->st_mode = SRS_TX_SERIALIZE; 4742 } else { 4743 tx->st_mode = SRS_TX_DEFAULT; 4744 } 4745 tx->st_func = mac_tx_get_func(tx->st_mode); 4746 } 4747 mac_tx_client_restart((mac_client_handle_t)mcip); 4748 mgcp = mgcp->mgc_next; 4749 } 4750 break; 4751 } 4752 default: 4753 ASSERT(B_FALSE); 4754 } 4755 4756 /* 4757 * Remove the ring from the group. 4758 */ 4759 if (ring == group->mrg_rings) 4760 group->mrg_rings = ring->mr_next; 4761 else { 4762 mac_ring_t *pre; 4763 4764 pre = group->mrg_rings; 4765 while (pre->mr_next != ring) 4766 pre = pre->mr_next; 4767 pre->mr_next = ring->mr_next; 4768 } 4769 group->mrg_cur_count--; 4770 4771 if (!driver_call) { 4772 ASSERT(group_type == MAC_GROUP_TYPE_DYNAMIC); 4773 ASSERT(group->mrg_driver == NULL || 4774 cap_rings->mr_gremring != NULL); 4775 4776 /* 4777 * Remove the driver level hardware ring. 4778 */ 4779 if (group->mrg_driver != NULL) { 4780 cap_rings->mr_gremring(group->mrg_driver, 4781 ring->mr_driver, ring->mr_type); 4782 } 4783 } 4784 4785 ring->mr_gh = NULL; 4786 if (driver_call) 4787 mac_ring_free(mip, ring); 4788 else 4789 ring->mr_flag = 0; 4790 } 4791 4792 /* 4793 * Move a ring to the target group. If needed, remove the ring from the group 4794 * that it currently belongs to. 4795 * 4796 * The caller need to enter MAC's perimeter by calling mac_perim_enter(). 4797 */ 4798 static int 4799 mac_group_mov_ring(mac_impl_t *mip, mac_group_t *d_group, mac_ring_t *ring) 4800 { 4801 mac_group_t *s_group = (mac_group_t *)ring->mr_gh; 4802 int rv; 4803 4804 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4805 ASSERT(d_group != NULL); 4806 ASSERT(s_group->mrg_mh == d_group->mrg_mh); 4807 4808 if (s_group == d_group) 4809 return (0); 4810 4811 /* 4812 * Remove it from current group first. 4813 */ 4814 if (s_group != NULL) 4815 i_mac_group_rem_ring(s_group, ring, B_FALSE); 4816 4817 /* 4818 * Add it to the new group. 4819 */ 4820 rv = i_mac_group_add_ring(d_group, ring, 0); 4821 if (rv != 0) { 4822 /* 4823 * Failed to add ring back to source group. If 4824 * that fails, the ring is stuck in limbo, log message. 4825 */ 4826 if (i_mac_group_add_ring(s_group, ring, 0)) { 4827 cmn_err(CE_WARN, "%s: failed to move ring %p\n", 4828 mip->mi_name, (void *)ring); 4829 } 4830 } 4831 4832 return (rv); 4833 } 4834 4835 /* 4836 * Find a MAC address according to its value. 4837 */ 4838 mac_address_t * 4839 mac_find_macaddr(mac_impl_t *mip, uint8_t *mac_addr) 4840 { 4841 mac_address_t *map; 4842 4843 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4844 4845 for (map = mip->mi_addresses; map != NULL; map = map->ma_next) { 4846 if (bcmp(mac_addr, map->ma_addr, map->ma_len) == 0) 4847 break; 4848 } 4849 4850 return (map); 4851 } 4852 4853 /* 4854 * Check whether the MAC address is shared by multiple clients. 4855 */ 4856 boolean_t 4857 mac_check_macaddr_shared(mac_address_t *map) 4858 { 4859 ASSERT(MAC_PERIM_HELD((mac_handle_t)map->ma_mip)); 4860 4861 return (map->ma_nusers > 1); 4862 } 4863 4864 /* 4865 * Remove the specified MAC address from the MAC address list and free it. 4866 */ 4867 static void 4868 mac_free_macaddr(mac_address_t *map) 4869 { 4870 mac_impl_t *mip = map->ma_mip; 4871 4872 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4873 ASSERT(mip->mi_addresses != NULL); 4874 4875 map = mac_find_macaddr(mip, map->ma_addr); 4876 4877 ASSERT(map != NULL); 4878 ASSERT(map->ma_nusers == 0); 4879 4880 if (map == mip->mi_addresses) { 4881 mip->mi_addresses = map->ma_next; 4882 } else { 4883 mac_address_t *pre; 4884 4885 pre = mip->mi_addresses; 4886 while (pre->ma_next != map) 4887 pre = pre->ma_next; 4888 pre->ma_next = map->ma_next; 4889 } 4890 4891 kmem_free(map, sizeof (mac_address_t)); 4892 } 4893 4894 /* 4895 * Add a MAC address reference for a client. If the desired MAC address 4896 * exists, add a reference to it. Otherwise, add the new address by adding 4897 * it to a reserved group or setting promiscuous mode. Won't try different 4898 * group is the group is non-NULL, so the caller must explictly share 4899 * default group when needed. 4900 * 4901 * Note, the primary MAC address is initialized at registration time, so 4902 * to add it to default group only need to activate it if its reference 4903 * count is still zero. Also, some drivers may not have advertised RINGS 4904 * capability. 4905 */ 4906 int 4907 mac_add_macaddr(mac_impl_t *mip, mac_group_t *group, uint8_t *mac_addr, 4908 boolean_t use_hw) 4909 { 4910 mac_address_t *map; 4911 int err = 0; 4912 boolean_t allocated_map = B_FALSE; 4913 4914 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 4915 4916 map = mac_find_macaddr(mip, mac_addr); 4917 4918 /* 4919 * If the new MAC address has not been added. Allocate a new one 4920 * and set it up. 4921 */ 4922 if (map == NULL) { 4923 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 4924 map->ma_len = mip->mi_type->mt_addr_length; 4925 bcopy(mac_addr, map->ma_addr, map->ma_len); 4926 map->ma_nusers = 0; 4927 map->ma_group = group; 4928 map->ma_mip = mip; 4929 4930 /* add the new MAC address to the head of the address list */ 4931 map->ma_next = mip->mi_addresses; 4932 mip->mi_addresses = map; 4933 4934 allocated_map = B_TRUE; 4935 } 4936 4937 ASSERT(map->ma_group == NULL || map->ma_group == group); 4938 if (map->ma_group == NULL) 4939 map->ma_group = group; 4940 4941 /* 4942 * If the MAC address is already in use, simply account for the 4943 * new client. 4944 */ 4945 if (map->ma_nusers++ > 0) 4946 return (0); 4947 4948 /* 4949 * Activate this MAC address by adding it to the reserved group. 4950 */ 4951 if (group != NULL) { 4952 err = mac_group_addmac(group, (const uint8_t *)mac_addr); 4953 if (err == 0) { 4954 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 4955 return (0); 4956 } 4957 } 4958 4959 /* 4960 * The MAC address addition failed. If the client requires a 4961 * hardware classified MAC address, fail the operation. 4962 */ 4963 if (use_hw) { 4964 err = ENOSPC; 4965 goto bail; 4966 } 4967 4968 /* 4969 * Try promiscuous mode. 4970 * 4971 * For drivers that don't advertise RINGS capability, do 4972 * nothing for the primary address. 4973 */ 4974 if ((group == NULL) && 4975 (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) == 0)) { 4976 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 4977 return (0); 4978 } 4979 4980 /* 4981 * Enable promiscuous mode in order to receive traffic 4982 * to the new MAC address. 4983 */ 4984 if ((err = i_mac_promisc_set(mip, B_TRUE)) == 0) { 4985 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_PROMISC; 4986 return (0); 4987 } 4988 4989 /* 4990 * Free the MAC address that could not be added. Don't free 4991 * a pre-existing address, it could have been the entry 4992 * for the primary MAC address which was pre-allocated by 4993 * mac_init_macaddr(), and which must remain on the list. 4994 */ 4995 bail: 4996 map->ma_nusers--; 4997 if (allocated_map) 4998 mac_free_macaddr(map); 4999 return (err); 5000 } 5001 5002 /* 5003 * Remove a reference to a MAC address. This may cause to remove the MAC 5004 * address from an associated group or to turn off promiscuous mode. 5005 * The caller needs to handle the failure properly. 5006 */ 5007 int 5008 mac_remove_macaddr(mac_address_t *map) 5009 { 5010 mac_impl_t *mip = map->ma_mip; 5011 int err = 0; 5012 5013 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5014 5015 ASSERT(map == mac_find_macaddr(mip, map->ma_addr)); 5016 5017 /* 5018 * If it's not the last client using this MAC address, only update 5019 * the MAC clients count. 5020 */ 5021 if (--map->ma_nusers > 0) 5022 return (0); 5023 5024 /* 5025 * The MAC address is no longer used by any MAC client, so remove 5026 * it from its associated group, or turn off promiscuous mode 5027 * if it was enabled for the MAC address. 5028 */ 5029 switch (map->ma_type) { 5030 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 5031 /* 5032 * Don't free the preset primary address for drivers that 5033 * don't advertise RINGS capability. 5034 */ 5035 if (map->ma_group == NULL) 5036 return (0); 5037 5038 err = mac_group_remmac(map->ma_group, map->ma_addr); 5039 if (err == 0) 5040 map->ma_group = NULL; 5041 break; 5042 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 5043 err = i_mac_promisc_set(mip, B_FALSE); 5044 break; 5045 default: 5046 ASSERT(B_FALSE); 5047 } 5048 5049 if (err != 0) 5050 return (err); 5051 5052 /* 5053 * We created MAC address for the primary one at registration, so we 5054 * won't free it here. mac_fini_macaddr() will take care of it. 5055 */ 5056 if (bcmp(map->ma_addr, mip->mi_addr, map->ma_len) != 0) 5057 mac_free_macaddr(map); 5058 5059 return (0); 5060 } 5061 5062 /* 5063 * Update an existing MAC address. The caller need to make sure that the new 5064 * value has not been used. 5065 */ 5066 int 5067 mac_update_macaddr(mac_address_t *map, uint8_t *mac_addr) 5068 { 5069 mac_impl_t *mip = map->ma_mip; 5070 int err = 0; 5071 5072 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5073 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 5074 5075 switch (map->ma_type) { 5076 case MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED: 5077 /* 5078 * Update the primary address for drivers that are not 5079 * RINGS capable. 5080 */ 5081 if (mip->mi_rx_groups == NULL) { 5082 err = mip->mi_unicst(mip->mi_driver, (const uint8_t *) 5083 mac_addr); 5084 if (err != 0) 5085 return (err); 5086 break; 5087 } 5088 5089 /* 5090 * If this MAC address is not currently in use, 5091 * simply break out and update the value. 5092 */ 5093 if (map->ma_nusers == 0) 5094 break; 5095 5096 /* 5097 * Need to replace the MAC address associated with a group. 5098 */ 5099 err = mac_group_remmac(map->ma_group, map->ma_addr); 5100 if (err != 0) 5101 return (err); 5102 5103 err = mac_group_addmac(map->ma_group, mac_addr); 5104 5105 /* 5106 * Failure hints hardware error. The MAC layer needs to 5107 * have error notification facility to handle this. 5108 * Now, simply try to restore the value. 5109 */ 5110 if (err != 0) 5111 (void) mac_group_addmac(map->ma_group, map->ma_addr); 5112 5113 break; 5114 case MAC_ADDRESS_TYPE_UNICAST_PROMISC: 5115 /* 5116 * Need to do nothing more if in promiscuous mode. 5117 */ 5118 break; 5119 default: 5120 ASSERT(B_FALSE); 5121 } 5122 5123 /* 5124 * Successfully replaced the MAC address. 5125 */ 5126 if (err == 0) 5127 bcopy(mac_addr, map->ma_addr, map->ma_len); 5128 5129 return (err); 5130 } 5131 5132 /* 5133 * Freshen the MAC address with new value. Its caller must have updated the 5134 * hardware MAC address before calling this function. 5135 * This funcitons is supposed to be used to handle the MAC address change 5136 * notification from underlying drivers. 5137 */ 5138 void 5139 mac_freshen_macaddr(mac_address_t *map, uint8_t *mac_addr) 5140 { 5141 mac_impl_t *mip = map->ma_mip; 5142 5143 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5144 ASSERT(mac_find_macaddr(mip, mac_addr) == NULL); 5145 5146 /* 5147 * Freshen the MAC address with new value. 5148 */ 5149 bcopy(mac_addr, map->ma_addr, map->ma_len); 5150 bcopy(mac_addr, mip->mi_addr, map->ma_len); 5151 5152 /* 5153 * Update all MAC clients that share this MAC address. 5154 */ 5155 mac_unicast_update_clients(mip, map); 5156 } 5157 5158 /* 5159 * Set up the primary MAC address. 5160 */ 5161 void 5162 mac_init_macaddr(mac_impl_t *mip) 5163 { 5164 mac_address_t *map; 5165 5166 /* 5167 * The reference count is initialized to zero, until it's really 5168 * activated. 5169 */ 5170 map = kmem_zalloc(sizeof (mac_address_t), KM_SLEEP); 5171 map->ma_len = mip->mi_type->mt_addr_length; 5172 bcopy(mip->mi_addr, map->ma_addr, map->ma_len); 5173 5174 /* 5175 * If driver advertises RINGS capability, it shouldn't have initialized 5176 * its primary MAC address. For other drivers, including VNIC, the 5177 * primary address must work after registration. 5178 */ 5179 if (mip->mi_rx_groups == NULL) 5180 map->ma_type = MAC_ADDRESS_TYPE_UNICAST_CLASSIFIED; 5181 5182 map->ma_mip = mip; 5183 5184 mip->mi_addresses = map; 5185 } 5186 5187 /* 5188 * Clean up the primary MAC address. Note, only one primary MAC address 5189 * is allowed. All other MAC addresses must have been freed appropriately. 5190 */ 5191 void 5192 mac_fini_macaddr(mac_impl_t *mip) 5193 { 5194 mac_address_t *map = mip->mi_addresses; 5195 5196 if (map == NULL) 5197 return; 5198 5199 /* 5200 * If mi_addresses is initialized, there should be exactly one 5201 * entry left on the list with no users. 5202 */ 5203 ASSERT(map->ma_nusers == 0); 5204 ASSERT(map->ma_next == NULL); 5205 5206 kmem_free(map, sizeof (mac_address_t)); 5207 mip->mi_addresses = NULL; 5208 } 5209 5210 /* 5211 * Logging related functions. 5212 * 5213 * Note that Kernel statistics have been extended to maintain fine 5214 * granularity of statistics viz. hardware lane, software lane, fanout 5215 * stats etc. However, extended accounting continues to support only 5216 * aggregate statistics like before. 5217 */ 5218 5219 /* Write the flow description to a netinfo_t record */ 5220 static netinfo_t * 5221 mac_write_flow_desc(flow_entry_t *flent, mac_client_impl_t *mcip) 5222 { 5223 netinfo_t *ninfo; 5224 net_desc_t *ndesc; 5225 flow_desc_t *fdesc; 5226 mac_resource_props_t *mrp; 5227 5228 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5229 if (ninfo == NULL) 5230 return (NULL); 5231 ndesc = kmem_zalloc(sizeof (net_desc_t), KM_NOSLEEP); 5232 if (ndesc == NULL) { 5233 kmem_free(ninfo, sizeof (netinfo_t)); 5234 return (NULL); 5235 } 5236 5237 /* 5238 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5239 * Updates to the fe_flow_desc are done under the fe_lock 5240 */ 5241 mutex_enter(&flent->fe_lock); 5242 fdesc = &flent->fe_flow_desc; 5243 mrp = &flent->fe_resource_props; 5244 5245 ndesc->nd_name = flent->fe_flow_name; 5246 ndesc->nd_devname = mcip->mci_name; 5247 bcopy(fdesc->fd_src_mac, ndesc->nd_ehost, ETHERADDRL); 5248 bcopy(fdesc->fd_dst_mac, ndesc->nd_edest, ETHERADDRL); 5249 ndesc->nd_sap = htonl(fdesc->fd_sap); 5250 ndesc->nd_isv4 = (uint8_t)fdesc->fd_ipversion == IPV4_VERSION; 5251 ndesc->nd_bw_limit = mrp->mrp_maxbw; 5252 if (ndesc->nd_isv4) { 5253 ndesc->nd_saddr[3] = htonl(fdesc->fd_local_addr.s6_addr32[3]); 5254 ndesc->nd_daddr[3] = htonl(fdesc->fd_remote_addr.s6_addr32[3]); 5255 } else { 5256 bcopy(&fdesc->fd_local_addr, ndesc->nd_saddr, IPV6_ADDR_LEN); 5257 bcopy(&fdesc->fd_remote_addr, ndesc->nd_daddr, IPV6_ADDR_LEN); 5258 } 5259 ndesc->nd_sport = htons(fdesc->fd_local_port); 5260 ndesc->nd_dport = htons(fdesc->fd_remote_port); 5261 ndesc->nd_protocol = (uint8_t)fdesc->fd_protocol; 5262 mutex_exit(&flent->fe_lock); 5263 5264 ninfo->ni_record = ndesc; 5265 ninfo->ni_size = sizeof (net_desc_t); 5266 ninfo->ni_type = EX_NET_FLDESC_REC; 5267 5268 return (ninfo); 5269 } 5270 5271 /* Write the flow statistics to a netinfo_t record */ 5272 static netinfo_t * 5273 mac_write_flow_stats(flow_entry_t *flent) 5274 { 5275 netinfo_t *ninfo; 5276 net_stat_t *nstat; 5277 mac_soft_ring_set_t *mac_srs; 5278 mac_rx_stats_t *mac_rx_stat; 5279 mac_tx_stats_t *mac_tx_stat; 5280 int i; 5281 5282 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5283 if (ninfo == NULL) 5284 return (NULL); 5285 nstat = kmem_zalloc(sizeof (net_stat_t), KM_NOSLEEP); 5286 if (nstat == NULL) { 5287 kmem_free(ninfo, sizeof (netinfo_t)); 5288 return (NULL); 5289 } 5290 5291 nstat->ns_name = flent->fe_flow_name; 5292 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5293 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5294 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5295 5296 nstat->ns_ibytes += mac_rx_stat->mrs_intrbytes + 5297 mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes; 5298 nstat->ns_ipackets += mac_rx_stat->mrs_intrcnt + 5299 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5300 nstat->ns_oerrors += mac_rx_stat->mrs_ierrors; 5301 } 5302 5303 mac_srs = (mac_soft_ring_set_t *)(flent->fe_tx_srs); 5304 if (mac_srs != NULL) { 5305 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5306 5307 nstat->ns_obytes = mac_tx_stat->mts_obytes; 5308 nstat->ns_opackets = mac_tx_stat->mts_opackets; 5309 nstat->ns_oerrors = mac_tx_stat->mts_oerrors; 5310 } 5311 5312 ninfo->ni_record = nstat; 5313 ninfo->ni_size = sizeof (net_stat_t); 5314 ninfo->ni_type = EX_NET_FLSTAT_REC; 5315 5316 return (ninfo); 5317 } 5318 5319 /* Write the link description to a netinfo_t record */ 5320 static netinfo_t * 5321 mac_write_link_desc(mac_client_impl_t *mcip) 5322 { 5323 netinfo_t *ninfo; 5324 net_desc_t *ndesc; 5325 flow_entry_t *flent = mcip->mci_flent; 5326 5327 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5328 if (ninfo == NULL) 5329 return (NULL); 5330 ndesc = kmem_zalloc(sizeof (net_desc_t), KM_NOSLEEP); 5331 if (ndesc == NULL) { 5332 kmem_free(ninfo, sizeof (netinfo_t)); 5333 return (NULL); 5334 } 5335 5336 ndesc->nd_name = mcip->mci_name; 5337 ndesc->nd_devname = mcip->mci_name; 5338 ndesc->nd_isv4 = B_TRUE; 5339 /* 5340 * Grab the fe_lock to see a self-consistent fe_flow_desc. 5341 * Updates to the fe_flow_desc are done under the fe_lock 5342 * after removing the flent from the flow table. 5343 */ 5344 mutex_enter(&flent->fe_lock); 5345 bcopy(flent->fe_flow_desc.fd_src_mac, ndesc->nd_ehost, ETHERADDRL); 5346 mutex_exit(&flent->fe_lock); 5347 5348 ninfo->ni_record = ndesc; 5349 ninfo->ni_size = sizeof (net_desc_t); 5350 ninfo->ni_type = EX_NET_LNDESC_REC; 5351 5352 return (ninfo); 5353 } 5354 5355 /* Write the link statistics to a netinfo_t record */ 5356 static netinfo_t * 5357 mac_write_link_stats(mac_client_impl_t *mcip) 5358 { 5359 netinfo_t *ninfo; 5360 net_stat_t *nstat; 5361 flow_entry_t *flent; 5362 mac_soft_ring_set_t *mac_srs; 5363 mac_rx_stats_t *mac_rx_stat; 5364 mac_tx_stats_t *mac_tx_stat; 5365 int i; 5366 5367 ninfo = kmem_zalloc(sizeof (netinfo_t), KM_NOSLEEP); 5368 if (ninfo == NULL) 5369 return (NULL); 5370 nstat = kmem_zalloc(sizeof (net_stat_t), KM_NOSLEEP); 5371 if (nstat == NULL) { 5372 kmem_free(ninfo, sizeof (netinfo_t)); 5373 return (NULL); 5374 } 5375 5376 nstat->ns_name = mcip->mci_name; 5377 flent = mcip->mci_flent; 5378 if (flent != NULL) { 5379 for (i = 0; i < flent->fe_rx_srs_cnt; i++) { 5380 mac_srs = (mac_soft_ring_set_t *)flent->fe_rx_srs[i]; 5381 mac_rx_stat = &mac_srs->srs_rx.sr_stat; 5382 5383 nstat->ns_ibytes += mac_rx_stat->mrs_intrbytes + 5384 mac_rx_stat->mrs_pollbytes + 5385 mac_rx_stat->mrs_lclbytes; 5386 nstat->ns_ipackets += mac_rx_stat->mrs_intrcnt + 5387 mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt; 5388 nstat->ns_oerrors += mac_rx_stat->mrs_ierrors; 5389 } 5390 } 5391 5392 mac_srs = (mac_soft_ring_set_t *)(mcip->mci_flent->fe_tx_srs); 5393 if (mac_srs != NULL) { 5394 mac_tx_stat = &mac_srs->srs_tx.st_stat; 5395 5396 nstat->ns_obytes = mac_tx_stat->mts_obytes; 5397 nstat->ns_opackets = mac_tx_stat->mts_opackets; 5398 nstat->ns_oerrors = mac_tx_stat->mts_oerrors; 5399 } 5400 5401 ninfo->ni_record = nstat; 5402 ninfo->ni_size = sizeof (net_stat_t); 5403 ninfo->ni_type = EX_NET_LNSTAT_REC; 5404 5405 return (ninfo); 5406 } 5407 5408 typedef struct i_mac_log_state_s { 5409 boolean_t mi_last; 5410 int mi_fenable; 5411 int mi_lenable; 5412 list_t *mi_list; 5413 } i_mac_log_state_t; 5414 5415 /* 5416 * For a given flow, if the description has not been logged before, do it now. 5417 * If it is a VNIC, then we have collected information about it from the MAC 5418 * table, so skip it. 5419 * 5420 * Called through mac_flow_walk_nolock() 5421 * 5422 * Return 0 if successful. 5423 */ 5424 static int 5425 mac_log_flowinfo(flow_entry_t *flent, void *arg) 5426 { 5427 mac_client_impl_t *mcip = flent->fe_mcip; 5428 i_mac_log_state_t *lstate = arg; 5429 netinfo_t *ninfo; 5430 5431 if (mcip == NULL) 5432 return (0); 5433 5434 /* 5435 * If the name starts with "vnic", and fe_user_generated is true (to 5436 * exclude the mcast and active flow entries created implicitly for 5437 * a vnic, it is a VNIC flow. i.e. vnic1 is a vnic flow, 5438 * vnic/bge1/mcast1 is not and neither is vnic/bge1/active. 5439 */ 5440 if (strncasecmp(flent->fe_flow_name, "vnic", 4) == 0 && 5441 (flent->fe_type & FLOW_USER) != 0) { 5442 return (0); 5443 } 5444 5445 if (!flent->fe_desc_logged) { 5446 /* 5447 * We don't return error because we want to continue the 5448 * walk in case this is the last walk which means we 5449 * need to reset fe_desc_logged in all the flows. 5450 */ 5451 if ((ninfo = mac_write_flow_desc(flent, mcip)) == NULL) 5452 return (0); 5453 list_insert_tail(lstate->mi_list, ninfo); 5454 flent->fe_desc_logged = B_TRUE; 5455 } 5456 5457 /* 5458 * Regardless of the error, we want to proceed in case we have to 5459 * reset fe_desc_logged. 5460 */ 5461 ninfo = mac_write_flow_stats(flent); 5462 if (ninfo == NULL) 5463 return (-1); 5464 5465 list_insert_tail(lstate->mi_list, ninfo); 5466 5467 if (mcip != NULL && !(mcip->mci_state_flags & MCIS_DESC_LOGGED)) 5468 flent->fe_desc_logged = B_FALSE; 5469 5470 return (0); 5471 } 5472 5473 /* 5474 * Log the description for each mac client of this mac_impl_t, if it 5475 * hasn't already been done. Additionally, log statistics for the link as 5476 * well. Walk the flow table and log information for each flow as well. 5477 * If it is the last walk (mci_last), then we turn off mci_desc_logged (and 5478 * also fe_desc_logged, if flow logging is on) since we want to log the 5479 * description if and when logging is restarted. 5480 * 5481 * Return 0 upon success or -1 upon failure 5482 */ 5483 static int 5484 i_mac_impl_log(mac_impl_t *mip, i_mac_log_state_t *lstate) 5485 { 5486 mac_client_impl_t *mcip; 5487 netinfo_t *ninfo; 5488 5489 i_mac_perim_enter(mip); 5490 /* 5491 * Only walk the client list for NIC and etherstub 5492 */ 5493 if ((mip->mi_state_flags & MIS_DISABLED) || 5494 ((mip->mi_state_flags & MIS_IS_VNIC) && 5495 (mac_get_lower_mac_handle((mac_handle_t)mip) != NULL))) { 5496 i_mac_perim_exit(mip); 5497 return (0); 5498 } 5499 5500 for (mcip = mip->mi_clients_list; mcip != NULL; 5501 mcip = mcip->mci_client_next) { 5502 if (!MCIP_DATAPATH_SETUP(mcip)) 5503 continue; 5504 if (lstate->mi_lenable) { 5505 if (!(mcip->mci_state_flags & MCIS_DESC_LOGGED)) { 5506 ninfo = mac_write_link_desc(mcip); 5507 if (ninfo == NULL) { 5508 /* 5509 * We can't terminate it if this is the last 5510 * walk, else there might be some links with 5511 * mi_desc_logged set to true, which means 5512 * their description won't be logged the next 5513 * time logging is started (similarly for the 5514 * flows within such links). We can continue 5515 * without walking the flow table (i.e. to 5516 * set fe_desc_logged to false) because we 5517 * won't have written any flow stuff for this 5518 * link as we haven't logged the link itself. 5519 */ 5520 i_mac_perim_exit(mip); 5521 if (lstate->mi_last) 5522 return (0); 5523 else 5524 return (-1); 5525 } 5526 mcip->mci_state_flags |= MCIS_DESC_LOGGED; 5527 list_insert_tail(lstate->mi_list, ninfo); 5528 } 5529 } 5530 5531 ninfo = mac_write_link_stats(mcip); 5532 if (ninfo == NULL && !lstate->mi_last) { 5533 i_mac_perim_exit(mip); 5534 return (-1); 5535 } 5536 list_insert_tail(lstate->mi_list, ninfo); 5537 5538 if (lstate->mi_last) 5539 mcip->mci_state_flags &= ~MCIS_DESC_LOGGED; 5540 5541 if (lstate->mi_fenable) { 5542 if (mcip->mci_subflow_tab != NULL) { 5543 (void) mac_flow_walk_nolock( 5544 mcip->mci_subflow_tab, mac_log_flowinfo, 5545 lstate); 5546 } 5547 } 5548 } 5549 i_mac_perim_exit(mip); 5550 return (0); 5551 } 5552 5553 /* 5554 * modhash walker function to add a mac_impl_t to a list 5555 */ 5556 /*ARGSUSED*/ 5557 static uint_t 5558 i_mac_impl_list_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 5559 { 5560 list_t *list = (list_t *)arg; 5561 mac_impl_t *mip = (mac_impl_t *)val; 5562 5563 if ((mip->mi_state_flags & MIS_DISABLED) == 0) { 5564 list_insert_tail(list, mip); 5565 mip->mi_ref++; 5566 } 5567 5568 return (MH_WALK_CONTINUE); 5569 } 5570 5571 void 5572 i_mac_log_info(list_t *net_log_list, i_mac_log_state_t *lstate) 5573 { 5574 list_t mac_impl_list; 5575 mac_impl_t *mip; 5576 netinfo_t *ninfo; 5577 5578 /* Create list of mac_impls */ 5579 ASSERT(RW_LOCK_HELD(&i_mac_impl_lock)); 5580 list_create(&mac_impl_list, sizeof (mac_impl_t), offsetof(mac_impl_t, 5581 mi_node)); 5582 mod_hash_walk(i_mac_impl_hash, i_mac_impl_list_walker, &mac_impl_list); 5583 rw_exit(&i_mac_impl_lock); 5584 5585 /* Create log entries for each mac_impl */ 5586 for (mip = list_head(&mac_impl_list); mip != NULL; 5587 mip = list_next(&mac_impl_list, mip)) { 5588 if (i_mac_impl_log(mip, lstate) != 0) 5589 continue; 5590 } 5591 5592 /* Remove elements and destroy list of mac_impls */ 5593 rw_enter(&i_mac_impl_lock, RW_WRITER); 5594 while ((mip = list_remove_tail(&mac_impl_list)) != NULL) { 5595 mip->mi_ref--; 5596 } 5597 rw_exit(&i_mac_impl_lock); 5598 list_destroy(&mac_impl_list); 5599 5600 /* 5601 * Write log entries to files outside of locks, free associated 5602 * structures, and remove entries from the list. 5603 */ 5604 while ((ninfo = list_head(net_log_list)) != NULL) { 5605 (void) exacct_commit_netinfo(ninfo->ni_record, ninfo->ni_type); 5606 list_remove(net_log_list, ninfo); 5607 kmem_free(ninfo->ni_record, ninfo->ni_size); 5608 kmem_free(ninfo, sizeof (*ninfo)); 5609 } 5610 list_destroy(net_log_list); 5611 } 5612 5613 /* 5614 * The timer thread that runs every mac_logging_interval seconds and logs 5615 * link and/or flow information. 5616 */ 5617 /* ARGSUSED */ 5618 void 5619 mac_log_linkinfo(void *arg) 5620 { 5621 i_mac_log_state_t lstate; 5622 list_t net_log_list; 5623 5624 list_create(&net_log_list, sizeof (netinfo_t), 5625 offsetof(netinfo_t, ni_link)); 5626 5627 rw_enter(&i_mac_impl_lock, RW_READER); 5628 if (!mac_flow_log_enable && !mac_link_log_enable) { 5629 rw_exit(&i_mac_impl_lock); 5630 return; 5631 } 5632 lstate.mi_fenable = mac_flow_log_enable; 5633 lstate.mi_lenable = mac_link_log_enable; 5634 lstate.mi_last = B_FALSE; 5635 lstate.mi_list = &net_log_list; 5636 5637 /* Write log entries for each mac_impl in the list */ 5638 i_mac_log_info(&net_log_list, &lstate); 5639 5640 if (mac_flow_log_enable || mac_link_log_enable) { 5641 mac_logging_timer = timeout(mac_log_linkinfo, NULL, 5642 SEC_TO_TICK(mac_logging_interval)); 5643 } 5644 } 5645 5646 typedef struct i_mac_fastpath_state_s { 5647 boolean_t mf_disable; 5648 int mf_err; 5649 } i_mac_fastpath_state_t; 5650 5651 /* modhash walker function to enable or disable fastpath */ 5652 /*ARGSUSED*/ 5653 static uint_t 5654 i_mac_fastpath_walker(mod_hash_key_t key, mod_hash_val_t *val, 5655 void *arg) 5656 { 5657 i_mac_fastpath_state_t *state = arg; 5658 mac_handle_t mh = (mac_handle_t)val; 5659 5660 if (state->mf_disable) 5661 state->mf_err = mac_fastpath_disable(mh); 5662 else 5663 mac_fastpath_enable(mh); 5664 5665 return (state->mf_err == 0 ? MH_WALK_CONTINUE : MH_WALK_TERMINATE); 5666 } 5667 5668 /* 5669 * Start the logging timer. 5670 */ 5671 int 5672 mac_start_logusage(mac_logtype_t type, uint_t interval) 5673 { 5674 i_mac_fastpath_state_t dstate = {B_TRUE, 0}; 5675 i_mac_fastpath_state_t estate = {B_FALSE, 0}; 5676 int err; 5677 5678 rw_enter(&i_mac_impl_lock, RW_WRITER); 5679 switch (type) { 5680 case MAC_LOGTYPE_FLOW: 5681 if (mac_flow_log_enable) { 5682 rw_exit(&i_mac_impl_lock); 5683 return (0); 5684 } 5685 /* FALLTHRU */ 5686 case MAC_LOGTYPE_LINK: 5687 if (mac_link_log_enable) { 5688 rw_exit(&i_mac_impl_lock); 5689 return (0); 5690 } 5691 break; 5692 default: 5693 ASSERT(0); 5694 } 5695 5696 /* Disable fastpath */ 5697 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &dstate); 5698 if ((err = dstate.mf_err) != 0) { 5699 /* Reenable fastpath */ 5700 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); 5701 rw_exit(&i_mac_impl_lock); 5702 return (err); 5703 } 5704 5705 switch (type) { 5706 case MAC_LOGTYPE_FLOW: 5707 mac_flow_log_enable = B_TRUE; 5708 /* FALLTHRU */ 5709 case MAC_LOGTYPE_LINK: 5710 mac_link_log_enable = B_TRUE; 5711 break; 5712 } 5713 5714 mac_logging_interval = interval; 5715 rw_exit(&i_mac_impl_lock); 5716 mac_log_linkinfo(NULL); 5717 return (0); 5718 } 5719 5720 /* 5721 * Stop the logging timer if both link and flow logging are turned off. 5722 */ 5723 void 5724 mac_stop_logusage(mac_logtype_t type) 5725 { 5726 i_mac_log_state_t lstate; 5727 i_mac_fastpath_state_t estate = {B_FALSE, 0}; 5728 list_t net_log_list; 5729 5730 list_create(&net_log_list, sizeof (netinfo_t), 5731 offsetof(netinfo_t, ni_link)); 5732 5733 rw_enter(&i_mac_impl_lock, RW_WRITER); 5734 5735 lstate.mi_fenable = mac_flow_log_enable; 5736 lstate.mi_lenable = mac_link_log_enable; 5737 lstate.mi_list = &net_log_list; 5738 5739 /* Last walk */ 5740 lstate.mi_last = B_TRUE; 5741 5742 switch (type) { 5743 case MAC_LOGTYPE_FLOW: 5744 if (lstate.mi_fenable) { 5745 ASSERT(mac_link_log_enable); 5746 mac_flow_log_enable = B_FALSE; 5747 mac_link_log_enable = B_FALSE; 5748 break; 5749 } 5750 /* FALLTHRU */ 5751 case MAC_LOGTYPE_LINK: 5752 if (!lstate.mi_lenable || mac_flow_log_enable) { 5753 rw_exit(&i_mac_impl_lock); 5754 return; 5755 } 5756 mac_link_log_enable = B_FALSE; 5757 break; 5758 default: 5759 ASSERT(0); 5760 } 5761 5762 /* Reenable fastpath */ 5763 mod_hash_walk(i_mac_impl_hash, i_mac_fastpath_walker, &estate); 5764 5765 (void) untimeout(mac_logging_timer); 5766 mac_logging_timer = 0; 5767 5768 /* Write log entries for each mac_impl in the list */ 5769 i_mac_log_info(&net_log_list, &lstate); 5770 } 5771 5772 /* 5773 * Walk the rx and tx SRS/SRs for a flow and update the priority value. 5774 */ 5775 void 5776 mac_flow_update_priority(mac_client_impl_t *mcip, flow_entry_t *flent) 5777 { 5778 pri_t pri; 5779 int count; 5780 mac_soft_ring_set_t *mac_srs; 5781 5782 if (flent->fe_rx_srs_cnt <= 0) 5783 return; 5784 5785 if (((mac_soft_ring_set_t *)flent->fe_rx_srs[0])->srs_type == 5786 SRST_FLOW) { 5787 pri = FLOW_PRIORITY(mcip->mci_min_pri, 5788 mcip->mci_max_pri, 5789 flent->fe_resource_props.mrp_priority); 5790 } else { 5791 pri = mcip->mci_max_pri; 5792 } 5793 5794 for (count = 0; count < flent->fe_rx_srs_cnt; count++) { 5795 mac_srs = flent->fe_rx_srs[count]; 5796 mac_update_srs_priority(mac_srs, pri); 5797 } 5798 /* 5799 * If we have a Tx SRS, we need to modify all the threads associated 5800 * with it. 5801 */ 5802 if (flent->fe_tx_srs != NULL) 5803 mac_update_srs_priority(flent->fe_tx_srs, pri); 5804 } 5805 5806 /* 5807 * RX and TX rings are reserved according to different semantics depending 5808 * on the requests from the MAC clients and type of rings: 5809 * 5810 * On the Tx side, by default we reserve individual rings, independently from 5811 * the groups. 5812 * 5813 * On the Rx side, the reservation is at the granularity of the group 5814 * of rings, and used for v12n level 1 only. It has a special case for the 5815 * primary client. 5816 * 5817 * If a share is allocated to a MAC client, we allocate a TX group and an 5818 * RX group to the client, and assign TX rings and RX rings to these 5819 * groups according to information gathered from the driver through 5820 * the share capability. 5821 * 5822 * The foreseable evolution of Rx rings will handle v12n level 2 and higher 5823 * to allocate individual rings out of a group and program the hw classifier 5824 * based on IP address or higher level criteria. 5825 */ 5826 5827 /* 5828 * mac_reserve_tx_ring() 5829 * Reserve a unused ring by marking it with MR_INUSE state. 5830 * As reserved, the ring is ready to function. 5831 * 5832 * Notes for Hybrid I/O: 5833 * 5834 * If a specific ring is needed, it is specified through the desired_ring 5835 * argument. Otherwise that argument is set to NULL. 5836 * If the desired ring was previous allocated to another client, this 5837 * function swaps it with a new ring from the group of unassigned rings. 5838 */ 5839 mac_ring_t * 5840 mac_reserve_tx_ring(mac_impl_t *mip, mac_ring_t *desired_ring) 5841 { 5842 mac_group_t *group; 5843 mac_grp_client_t *mgcp; 5844 mac_client_impl_t *mcip; 5845 mac_soft_ring_set_t *srs; 5846 5847 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 5848 5849 /* 5850 * Find an available ring and start it before changing its status. 5851 * The unassigned rings are at the end of the mi_tx_groups 5852 * array. 5853 */ 5854 group = MAC_DEFAULT_TX_GROUP(mip); 5855 5856 /* Can't take the default ring out of the default group */ 5857 ASSERT(desired_ring != (mac_ring_t *)mip->mi_default_tx_ring); 5858 5859 if (desired_ring->mr_state == MR_FREE) { 5860 ASSERT(MAC_GROUP_NO_CLIENT(group)); 5861 if (mac_start_ring(desired_ring) != 0) 5862 return (NULL); 5863 return (desired_ring); 5864 } 5865 /* 5866 * There are clients using this ring, so let's move the clients 5867 * away from using this ring. 5868 */ 5869 for (mgcp = group->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 5870 mcip = mgcp->mgc_client; 5871 mac_tx_client_quiesce((mac_client_handle_t)mcip); 5872 srs = MCIP_TX_SRS(mcip); 5873 ASSERT(mac_tx_srs_ring_present(srs, desired_ring)); 5874 mac_tx_invoke_callbacks(mcip, 5875 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(srs, 5876 desired_ring)); 5877 mac_tx_srs_del_ring(srs, desired_ring); 5878 mac_tx_client_restart((mac_client_handle_t)mcip); 5879 } 5880 return (desired_ring); 5881 } 5882 5883 /* 5884 * For a reserved group with multiple clients, return the primary client. 5885 */ 5886 static mac_client_impl_t * 5887 mac_get_grp_primary(mac_group_t *grp) 5888 { 5889 mac_grp_client_t *mgcp = grp->mrg_clients; 5890 mac_client_impl_t *mcip; 5891 5892 while (mgcp != NULL) { 5893 mcip = mgcp->mgc_client; 5894 if (mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) 5895 return (mcip); 5896 mgcp = mgcp->mgc_next; 5897 } 5898 return (NULL); 5899 } 5900 5901 /* 5902 * Hybrid I/O specifies the ring that should be given to a share. 5903 * If the ring is already used by clients, then we need to release 5904 * the ring back to the default group so that we can give it to 5905 * the share. This means the clients using this ring now get a 5906 * replacement ring. If there aren't any replacement rings, this 5907 * function returns a failure. 5908 */ 5909 static int 5910 mac_reclaim_ring_from_grp(mac_impl_t *mip, mac_ring_type_t ring_type, 5911 mac_ring_t *ring, mac_ring_t **rings, int nrings) 5912 { 5913 mac_group_t *group = (mac_group_t *)ring->mr_gh; 5914 mac_resource_props_t *mrp; 5915 mac_client_impl_t *mcip; 5916 mac_group_t *defgrp; 5917 mac_ring_t *tring; 5918 mac_group_t *tgrp; 5919 int i; 5920 int j; 5921 5922 mcip = MAC_GROUP_ONLY_CLIENT(group); 5923 if (mcip == NULL) 5924 mcip = mac_get_grp_primary(group); 5925 ASSERT(mcip != NULL); 5926 ASSERT(mcip->mci_share == NULL); 5927 5928 mrp = MCIP_RESOURCE_PROPS(mcip); 5929 if (ring_type == MAC_RING_TYPE_RX) { 5930 defgrp = mip->mi_rx_donor_grp; 5931 if ((mrp->mrp_mask & MRP_RX_RINGS) == 0) { 5932 /* Need to put this mac client in the default group */ 5933 if (mac_rx_switch_group(mcip, group, defgrp) != 0) 5934 return (ENOSPC); 5935 } else { 5936 /* 5937 * Switch this ring with some other ring from 5938 * the default group. 5939 */ 5940 for (tring = defgrp->mrg_rings; tring != NULL; 5941 tring = tring->mr_next) { 5942 if (tring->mr_index == 0) 5943 continue; 5944 for (j = 0; j < nrings; j++) { 5945 if (rings[j] == tring) 5946 break; 5947 } 5948 if (j >= nrings) 5949 break; 5950 } 5951 if (tring == NULL) 5952 return (ENOSPC); 5953 if (mac_group_mov_ring(mip, group, tring) != 0) 5954 return (ENOSPC); 5955 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 5956 (void) mac_group_mov_ring(mip, defgrp, tring); 5957 return (ENOSPC); 5958 } 5959 } 5960 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 5961 return (0); 5962 } 5963 5964 defgrp = MAC_DEFAULT_TX_GROUP(mip); 5965 if (ring == (mac_ring_t *)mip->mi_default_tx_ring) { 5966 /* 5967 * See if we can get a spare ring to replace the default 5968 * ring. 5969 */ 5970 if (defgrp->mrg_cur_count == 1) { 5971 /* 5972 * Need to get a ring from another client, see if 5973 * there are any clients that can be moved to 5974 * the default group, thereby freeing some rings. 5975 */ 5976 for (i = 0; i < mip->mi_tx_group_count; i++) { 5977 tgrp = &mip->mi_tx_groups[i]; 5978 if (tgrp->mrg_state == 5979 MAC_GROUP_STATE_REGISTERED) { 5980 continue; 5981 } 5982 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 5983 if (mcip == NULL) 5984 mcip = mac_get_grp_primary(tgrp); 5985 ASSERT(mcip != NULL); 5986 mrp = MCIP_RESOURCE_PROPS(mcip); 5987 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 5988 ASSERT(tgrp->mrg_cur_count == 1); 5989 /* 5990 * If this ring is part of the 5991 * rings asked by the share we cannot 5992 * use it as the default ring. 5993 */ 5994 for (j = 0; j < nrings; j++) { 5995 if (rings[j] == tgrp->mrg_rings) 5996 break; 5997 } 5998 if (j < nrings) 5999 continue; 6000 mac_tx_client_quiesce( 6001 (mac_client_handle_t)mcip); 6002 mac_tx_switch_group(mcip, tgrp, 6003 defgrp); 6004 mac_tx_client_restart( 6005 (mac_client_handle_t)mcip); 6006 break; 6007 } 6008 } 6009 /* 6010 * All the rings are reserved, can't give up the 6011 * default ring. 6012 */ 6013 if (defgrp->mrg_cur_count <= 1) 6014 return (ENOSPC); 6015 } 6016 /* 6017 * Swap the default ring with another. 6018 */ 6019 for (tring = defgrp->mrg_rings; tring != NULL; 6020 tring = tring->mr_next) { 6021 /* 6022 * If this ring is part of the rings asked by the 6023 * share we cannot use it as the default ring. 6024 */ 6025 for (j = 0; j < nrings; j++) { 6026 if (rings[j] == tring) 6027 break; 6028 } 6029 if (j >= nrings) 6030 break; 6031 } 6032 ASSERT(tring != NULL); 6033 mip->mi_default_tx_ring = (mac_ring_handle_t)tring; 6034 return (0); 6035 } 6036 /* 6037 * The Tx ring is with a group reserved by a MAC client. See if 6038 * we can swap it. 6039 */ 6040 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 6041 mcip = MAC_GROUP_ONLY_CLIENT(group); 6042 if (mcip == NULL) 6043 mcip = mac_get_grp_primary(group); 6044 ASSERT(mcip != NULL); 6045 mrp = MCIP_RESOURCE_PROPS(mcip); 6046 mac_tx_client_quiesce((mac_client_handle_t)mcip); 6047 if ((mrp->mrp_mask & MRP_TX_RINGS) == 0) { 6048 ASSERT(group->mrg_cur_count == 1); 6049 /* Put this mac client in the default group */ 6050 mac_tx_switch_group(mcip, group, defgrp); 6051 } else { 6052 /* 6053 * Switch this ring with some other ring from 6054 * the default group. 6055 */ 6056 for (tring = defgrp->mrg_rings; tring != NULL; 6057 tring = tring->mr_next) { 6058 if (tring == (mac_ring_t *)mip->mi_default_tx_ring) 6059 continue; 6060 /* 6061 * If this ring is part of the rings asked by the 6062 * share we cannot use it for swapping. 6063 */ 6064 for (j = 0; j < nrings; j++) { 6065 if (rings[j] == tring) 6066 break; 6067 } 6068 if (j >= nrings) 6069 break; 6070 } 6071 if (tring == NULL) { 6072 mac_tx_client_restart((mac_client_handle_t)mcip); 6073 return (ENOSPC); 6074 } 6075 if (mac_group_mov_ring(mip, group, tring) != 0) { 6076 mac_tx_client_restart((mac_client_handle_t)mcip); 6077 return (ENOSPC); 6078 } 6079 if (mac_group_mov_ring(mip, defgrp, ring) != 0) { 6080 (void) mac_group_mov_ring(mip, defgrp, tring); 6081 mac_tx_client_restart((mac_client_handle_t)mcip); 6082 return (ENOSPC); 6083 } 6084 } 6085 mac_tx_client_restart((mac_client_handle_t)mcip); 6086 ASSERT(ring->mr_gh == (mac_group_handle_t)defgrp); 6087 return (0); 6088 } 6089 6090 /* 6091 * Populate a zero-ring group with rings. If the share is non-NULL, 6092 * the rings are chosen according to that share. 6093 * Invoked after allocating a new RX or TX group through 6094 * mac_reserve_rx_group() or mac_reserve_tx_group(), respectively. 6095 * Returns zero on success, an errno otherwise. 6096 */ 6097 int 6098 i_mac_group_allocate_rings(mac_impl_t *mip, mac_ring_type_t ring_type, 6099 mac_group_t *src_group, mac_group_t *new_group, mac_share_handle_t share, 6100 uint32_t ringcnt) 6101 { 6102 mac_ring_t **rings, *ring; 6103 uint_t nrings; 6104 int rv = 0, i = 0, j; 6105 6106 ASSERT((ring_type == MAC_RING_TYPE_RX && 6107 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) || 6108 (ring_type == MAC_RING_TYPE_TX && 6109 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC)); 6110 6111 /* 6112 * First find the rings to allocate to the group. 6113 */ 6114 if (share != NULL) { 6115 /* get rings through ms_squery() */ 6116 mip->mi_share_capab.ms_squery(share, ring_type, NULL, &nrings); 6117 ASSERT(nrings != 0); 6118 rings = kmem_alloc(nrings * sizeof (mac_ring_handle_t), 6119 KM_SLEEP); 6120 mip->mi_share_capab.ms_squery(share, ring_type, 6121 (mac_ring_handle_t *)rings, &nrings); 6122 for (i = 0; i < nrings; i++) { 6123 /* 6124 * If we have given this ring to a non-default 6125 * group, we need to check if we can get this 6126 * ring. 6127 */ 6128 ring = rings[i]; 6129 if (ring->mr_gh != (mac_group_handle_t)src_group || 6130 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6131 if (mac_reclaim_ring_from_grp(mip, ring_type, 6132 ring, rings, nrings) != 0) { 6133 rv = ENOSPC; 6134 goto bail; 6135 } 6136 } 6137 } 6138 } else { 6139 /* 6140 * Pick one ring from default group. 6141 * 6142 * for now pick the second ring which requires the first ring 6143 * at index 0 to stay in the default group, since it is the 6144 * ring which carries the multicast traffic. 6145 * We need a better way for a driver to indicate this, 6146 * for example a per-ring flag. 6147 */ 6148 rings = kmem_alloc(ringcnt * sizeof (mac_ring_handle_t), 6149 KM_SLEEP); 6150 for (ring = src_group->mrg_rings; ring != NULL; 6151 ring = ring->mr_next) { 6152 if (ring_type == MAC_RING_TYPE_RX && 6153 ring->mr_index == 0) { 6154 continue; 6155 } 6156 if (ring_type == MAC_RING_TYPE_TX && 6157 ring == (mac_ring_t *)mip->mi_default_tx_ring) { 6158 continue; 6159 } 6160 rings[i++] = ring; 6161 if (i == ringcnt) 6162 break; 6163 } 6164 ASSERT(ring != NULL); 6165 nrings = i; 6166 /* Not enough rings as required */ 6167 if (nrings != ringcnt) { 6168 rv = ENOSPC; 6169 goto bail; 6170 } 6171 } 6172 6173 switch (ring_type) { 6174 case MAC_RING_TYPE_RX: 6175 if (src_group->mrg_cur_count - nrings < 1) { 6176 /* we ran out of rings */ 6177 rv = ENOSPC; 6178 goto bail; 6179 } 6180 6181 /* move receive rings to new group */ 6182 for (i = 0; i < nrings; i++) { 6183 rv = mac_group_mov_ring(mip, new_group, rings[i]); 6184 if (rv != 0) { 6185 /* move rings back on failure */ 6186 for (j = 0; j < i; j++) { 6187 (void) mac_group_mov_ring(mip, 6188 src_group, rings[j]); 6189 } 6190 goto bail; 6191 } 6192 } 6193 break; 6194 6195 case MAC_RING_TYPE_TX: { 6196 mac_ring_t *tmp_ring; 6197 6198 /* move the TX rings to the new group */ 6199 for (i = 0; i < nrings; i++) { 6200 /* get the desired ring */ 6201 tmp_ring = mac_reserve_tx_ring(mip, rings[i]); 6202 if (tmp_ring == NULL) { 6203 rv = ENOSPC; 6204 goto bail; 6205 } 6206 ASSERT(tmp_ring == rings[i]); 6207 rv = mac_group_mov_ring(mip, new_group, rings[i]); 6208 if (rv != 0) { 6209 /* cleanup on failure */ 6210 for (j = 0; j < i; j++) { 6211 (void) mac_group_mov_ring(mip, 6212 MAC_DEFAULT_TX_GROUP(mip), 6213 rings[j]); 6214 } 6215 goto bail; 6216 } 6217 } 6218 break; 6219 } 6220 } 6221 6222 /* add group to share */ 6223 if (share != NULL) 6224 mip->mi_share_capab.ms_sadd(share, new_group->mrg_driver); 6225 6226 bail: 6227 /* free temporary array of rings */ 6228 kmem_free(rings, nrings * sizeof (mac_ring_handle_t)); 6229 6230 return (rv); 6231 } 6232 6233 void 6234 mac_group_add_client(mac_group_t *grp, mac_client_impl_t *mcip) 6235 { 6236 mac_grp_client_t *mgcp; 6237 6238 for (mgcp = grp->mrg_clients; mgcp != NULL; mgcp = mgcp->mgc_next) { 6239 if (mgcp->mgc_client == mcip) 6240 break; 6241 } 6242 6243 VERIFY(mgcp == NULL); 6244 6245 mgcp = kmem_zalloc(sizeof (mac_grp_client_t), KM_SLEEP); 6246 mgcp->mgc_client = mcip; 6247 mgcp->mgc_next = grp->mrg_clients; 6248 grp->mrg_clients = mgcp; 6249 6250 } 6251 6252 void 6253 mac_group_remove_client(mac_group_t *grp, mac_client_impl_t *mcip) 6254 { 6255 mac_grp_client_t *mgcp, **pprev; 6256 6257 for (pprev = &grp->mrg_clients, mgcp = *pprev; mgcp != NULL; 6258 pprev = &mgcp->mgc_next, mgcp = *pprev) { 6259 if (mgcp->mgc_client == mcip) 6260 break; 6261 } 6262 6263 ASSERT(mgcp != NULL); 6264 6265 *pprev = mgcp->mgc_next; 6266 kmem_free(mgcp, sizeof (mac_grp_client_t)); 6267 } 6268 6269 /* 6270 * mac_reserve_rx_group() 6271 * 6272 * Finds an available group and exclusively reserves it for a client. 6273 * The group is chosen to suit the flow's resource controls (bandwidth and 6274 * fanout requirements) and the address type. 6275 * If the requestor is the pimary MAC then return the group with the 6276 * largest number of rings, otherwise the default ring when available. 6277 */ 6278 mac_group_t * 6279 mac_reserve_rx_group(mac_client_impl_t *mcip, uint8_t *mac_addr, boolean_t move) 6280 { 6281 mac_share_handle_t share = mcip->mci_share; 6282 mac_impl_t *mip = mcip->mci_mip; 6283 mac_group_t *grp = NULL; 6284 int i; 6285 int err = 0; 6286 mac_address_t *map; 6287 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6288 int nrings; 6289 int donor_grp_rcnt; 6290 boolean_t need_exclgrp = B_FALSE; 6291 int need_rings = 0; 6292 mac_group_t *candidate_grp = NULL; 6293 mac_client_impl_t *gclient; 6294 mac_resource_props_t *gmrp; 6295 mac_group_t *donorgrp = NULL; 6296 boolean_t rxhw = mrp->mrp_mask & MRP_RX_RINGS; 6297 boolean_t unspec = mrp->mrp_mask & MRP_RXRINGS_UNSPEC; 6298 boolean_t isprimary; 6299 6300 ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 6301 6302 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6303 6304 /* 6305 * Check if a group already has this mac address (case of VLANs) 6306 * unless we are moving this MAC client from one group to another. 6307 */ 6308 if (!move && (map = mac_find_macaddr(mip, mac_addr)) != NULL) { 6309 if (map->ma_group != NULL) 6310 return (map->ma_group); 6311 } 6312 if (mip->mi_rx_groups == NULL || mip->mi_rx_group_count == 0) 6313 return (NULL); 6314 /* 6315 * If exclusive open, return NULL which will enable the 6316 * caller to use the default group. 6317 */ 6318 if (mcip->mci_state_flags & MCIS_EXCLUSIVE) 6319 return (NULL); 6320 6321 /* For dynamic groups default unspecified to 1 */ 6322 if (rxhw && unspec && 6323 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6324 mrp->mrp_nrxrings = 1; 6325 } 6326 /* 6327 * For static grouping we allow only specifying rings=0 and 6328 * unspecified 6329 */ 6330 if (rxhw && mrp->mrp_nrxrings > 0 && 6331 mip->mi_rx_group_type == MAC_GROUP_TYPE_STATIC) { 6332 return (NULL); 6333 } 6334 if (rxhw) { 6335 /* 6336 * We have explicitly asked for a group (with nrxrings, 6337 * if unspec). 6338 */ 6339 if (unspec || mrp->mrp_nrxrings > 0) { 6340 need_exclgrp = B_TRUE; 6341 need_rings = mrp->mrp_nrxrings; 6342 } else if (mrp->mrp_nrxrings == 0) { 6343 /* 6344 * We have asked for a software group. 6345 */ 6346 return (NULL); 6347 } 6348 } else if (isprimary && mip->mi_nactiveclients == 1 && 6349 mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6350 /* 6351 * If the primary is the only active client on this 6352 * mip and we have not asked for any rings, we give 6353 * it the default group so that the primary gets to 6354 * use all the rings. 6355 */ 6356 return (NULL); 6357 } 6358 6359 /* The group that can donate rings */ 6360 donorgrp = mip->mi_rx_donor_grp; 6361 6362 /* 6363 * The number of rings that the default group can donate. 6364 * We need to leave at least one ring. 6365 */ 6366 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 6367 6368 /* 6369 * Try to exclusively reserve a RX group. 6370 * 6371 * For flows requiring HW_DEFAULT_RING (unicast flow of the primary 6372 * client), try to reserve the a non-default RX group and give 6373 * it all the rings from the donor group, except the default ring 6374 * 6375 * For flows requiring HW_RING (unicast flow of other clients), try 6376 * to reserve non-default RX group with the specified number of 6377 * rings, if available. 6378 * 6379 * For flows that have not asked for software or hardware ring, 6380 * try to reserve a non-default group with 1 ring, if available. 6381 */ 6382 for (i = 1; i < mip->mi_rx_group_count; i++) { 6383 grp = &mip->mi_rx_groups[i]; 6384 6385 DTRACE_PROBE3(rx__group__trying, char *, mip->mi_name, 6386 int, grp->mrg_index, mac_group_state_t, grp->mrg_state); 6387 6388 /* 6389 * Check if this group could be a candidate group for 6390 * eviction if we need a group for this MAC client, 6391 * but there aren't any. A candidate group is one 6392 * that didn't ask for an exclusive group, but got 6393 * one and it has enough rings (combined with what 6394 * the donor group can donate) for the new MAC 6395 * client 6396 */ 6397 if (grp->mrg_state >= MAC_GROUP_STATE_RESERVED) { 6398 /* 6399 * If the primary/donor group is not the default 6400 * group, don't bother looking for a candidate group. 6401 * If we don't have enough rings we will check 6402 * if the primary group can be vacated. 6403 */ 6404 if (candidate_grp == NULL && 6405 donorgrp == MAC_DEFAULT_RX_GROUP(mip)) { 6406 ASSERT(!MAC_GROUP_NO_CLIENT(grp)); 6407 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6408 if (gclient == NULL) 6409 gclient = mac_get_grp_primary(grp); 6410 ASSERT(gclient != NULL); 6411 gmrp = MCIP_RESOURCE_PROPS(gclient); 6412 if (gclient->mci_share == NULL && 6413 (gmrp->mrp_mask & MRP_RX_RINGS) == 0 && 6414 (unspec || 6415 (grp->mrg_cur_count + donor_grp_rcnt >= 6416 need_rings))) { 6417 candidate_grp = grp; 6418 } 6419 } 6420 continue; 6421 } 6422 /* 6423 * This group could already be SHARED by other multicast 6424 * flows on this client. In that case, the group would 6425 * be shared and has already been started. 6426 */ 6427 ASSERT(grp->mrg_state != MAC_GROUP_STATE_UNINIT); 6428 6429 if ((grp->mrg_state == MAC_GROUP_STATE_REGISTERED) && 6430 (mac_start_group(grp) != 0)) { 6431 continue; 6432 } 6433 6434 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6435 break; 6436 ASSERT(grp->mrg_cur_count == 0); 6437 6438 /* 6439 * Populate the group. Rings should be taken 6440 * from the donor group. 6441 */ 6442 nrings = rxhw ? need_rings : isprimary ? donor_grp_rcnt: 1; 6443 6444 /* 6445 * If the donor group can't donate, let's just walk and 6446 * see if someone can vacate a group, so that we have 6447 * enough rings for this, unless we already have 6448 * identified a candiate group.. 6449 */ 6450 if (nrings <= donor_grp_rcnt) { 6451 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 6452 donorgrp, grp, share, nrings); 6453 if (err == 0) { 6454 /* 6455 * For a share i_mac_group_allocate_rings gets 6456 * the rings from the driver, let's populate 6457 * the property for the client now. 6458 */ 6459 if (share != NULL) { 6460 mac_client_set_rings( 6461 (mac_client_handle_t)mcip, 6462 grp->mrg_cur_count, -1); 6463 } 6464 if (mac_is_primary_client(mcip) && !rxhw) 6465 mip->mi_rx_donor_grp = grp; 6466 break; 6467 } 6468 } 6469 6470 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 6471 mip->mi_name, int, grp->mrg_index, int, err); 6472 6473 /* 6474 * It's a dynamic group but the grouping operation 6475 * failed. 6476 */ 6477 mac_stop_group(grp); 6478 } 6479 /* We didn't find an exclusive group for this MAC client */ 6480 if (i >= mip->mi_rx_group_count) { 6481 6482 if (!need_exclgrp) 6483 return (NULL); 6484 6485 /* 6486 * If we found a candidate group then we switch the 6487 * MAC client from the candidate_group to the default 6488 * group and give the group to this MAC client. If 6489 * we didn't find a candidate_group, check if the 6490 * primary is in its own group and if it can make way 6491 * for this MAC client. 6492 */ 6493 if (candidate_grp == NULL && 6494 donorgrp != MAC_DEFAULT_RX_GROUP(mip) && 6495 donorgrp->mrg_cur_count >= need_rings) { 6496 candidate_grp = donorgrp; 6497 } 6498 if (candidate_grp != NULL) { 6499 boolean_t prim_grp = B_FALSE; 6500 6501 /* 6502 * Switch the MAC client from the candidate group 6503 * to the default group.. If this group was the 6504 * donor group, then after the switch we need 6505 * to update the donor group too. 6506 */ 6507 grp = candidate_grp; 6508 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6509 if (gclient == NULL) 6510 gclient = mac_get_grp_primary(grp); 6511 if (grp == mip->mi_rx_donor_grp) 6512 prim_grp = B_TRUE; 6513 if (mac_rx_switch_group(gclient, grp, 6514 MAC_DEFAULT_RX_GROUP(mip)) != 0) { 6515 return (NULL); 6516 } 6517 if (prim_grp) { 6518 mip->mi_rx_donor_grp = 6519 MAC_DEFAULT_RX_GROUP(mip); 6520 donorgrp = MAC_DEFAULT_RX_GROUP(mip); 6521 } 6522 6523 6524 /* 6525 * Now give this group with the required rings 6526 * to this MAC client. 6527 */ 6528 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 6529 if (mac_start_group(grp) != 0) 6530 return (NULL); 6531 6532 if (mip->mi_rx_group_type != MAC_GROUP_TYPE_DYNAMIC) 6533 return (grp); 6534 6535 donor_grp_rcnt = donorgrp->mrg_cur_count - 1; 6536 ASSERT(grp->mrg_cur_count == 0); 6537 ASSERT(donor_grp_rcnt >= need_rings); 6538 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_RX, 6539 donorgrp, grp, share, need_rings); 6540 if (err == 0) { 6541 /* 6542 * For a share i_mac_group_allocate_rings gets 6543 * the rings from the driver, let's populate 6544 * the property for the client now. 6545 */ 6546 if (share != NULL) { 6547 mac_client_set_rings( 6548 (mac_client_handle_t)mcip, 6549 grp->mrg_cur_count, -1); 6550 } 6551 DTRACE_PROBE2(rx__group__reserved, 6552 char *, mip->mi_name, int, grp->mrg_index); 6553 return (grp); 6554 } 6555 DTRACE_PROBE3(rx__group__reserve__alloc__rings, char *, 6556 mip->mi_name, int, grp->mrg_index, int, err); 6557 mac_stop_group(grp); 6558 } 6559 return (NULL); 6560 } 6561 ASSERT(grp != NULL); 6562 6563 DTRACE_PROBE2(rx__group__reserved, 6564 char *, mip->mi_name, int, grp->mrg_index); 6565 return (grp); 6566 } 6567 6568 /* 6569 * mac_rx_release_group() 6570 * 6571 * This is called when there are no clients left for the group. 6572 * The group is stopped and marked MAC_GROUP_STATE_REGISTERED, 6573 * and if it is a non default group, the shares are removed and 6574 * all rings are assigned back to default group. 6575 */ 6576 void 6577 mac_release_rx_group(mac_client_impl_t *mcip, mac_group_t *group) 6578 { 6579 mac_impl_t *mip = mcip->mci_mip; 6580 mac_ring_t *ring; 6581 6582 ASSERT(group != MAC_DEFAULT_RX_GROUP(mip)); 6583 6584 if (mip->mi_rx_donor_grp == group) 6585 mip->mi_rx_donor_grp = MAC_DEFAULT_RX_GROUP(mip); 6586 6587 /* 6588 * This is the case where there are no clients left. Any 6589 * SRS etc on this group have also be quiesced. 6590 */ 6591 for (ring = group->mrg_rings; ring != NULL; ring = ring->mr_next) { 6592 if (ring->mr_classify_type == MAC_HW_CLASSIFIER) { 6593 ASSERT(group->mrg_state == MAC_GROUP_STATE_RESERVED); 6594 /* 6595 * Remove the SRS associated with the HW ring. 6596 * As a result, polling will be disabled. 6597 */ 6598 ring->mr_srs = NULL; 6599 } 6600 ASSERT(group->mrg_state < MAC_GROUP_STATE_RESERVED || 6601 ring->mr_state == MR_INUSE); 6602 if (ring->mr_state == MR_INUSE) { 6603 mac_stop_ring(ring); 6604 ring->mr_flag = 0; 6605 } 6606 } 6607 6608 /* remove group from share */ 6609 if (mcip->mci_share != NULL) { 6610 mip->mi_share_capab.ms_sremove(mcip->mci_share, 6611 group->mrg_driver); 6612 } 6613 6614 if (mip->mi_rx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6615 mac_ring_t *ring; 6616 6617 /* 6618 * Rings were dynamically allocated to group. 6619 * Move rings back to default group. 6620 */ 6621 while ((ring = group->mrg_rings) != NULL) { 6622 (void) mac_group_mov_ring(mip, mip->mi_rx_donor_grp, 6623 ring); 6624 } 6625 } 6626 mac_stop_group(group); 6627 /* 6628 * Possible improvement: See if we can assign the group just released 6629 * to a another client of the mip 6630 */ 6631 } 6632 6633 /* 6634 * When we move the primary's mac address between groups, we need to also 6635 * take all the clients sharing the same mac address along with it (VLANs) 6636 * We remove the mac address for such clients from the group after quiescing 6637 * them. When we add the mac address we restart the client. Note that 6638 * the primary's mac address is removed from the group after all the 6639 * other clients sharing the address are removed. Similarly, the primary's 6640 * mac address is added before all the other client's mac address are 6641 * added. While grp is the group where the clients reside, tgrp is 6642 * the group where the addresses have to be added. 6643 */ 6644 static void 6645 mac_rx_move_macaddr_prim(mac_client_impl_t *mcip, mac_group_t *grp, 6646 mac_group_t *tgrp, uint8_t *maddr, boolean_t add) 6647 { 6648 mac_impl_t *mip = mcip->mci_mip; 6649 mac_grp_client_t *mgcp = grp->mrg_clients; 6650 mac_client_impl_t *gmcip; 6651 boolean_t prim; 6652 6653 prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 6654 6655 /* 6656 * If the clients are in a non-default group, we just have to 6657 * walk the group's client list. If it is in the default group 6658 * (which will be shared by other clients as well, we need to 6659 * check if the unicast address matches mcip's unicast. 6660 */ 6661 while (mgcp != NULL) { 6662 gmcip = mgcp->mgc_client; 6663 if (gmcip != mcip && 6664 (grp != MAC_DEFAULT_RX_GROUP(mip) || 6665 mcip->mci_unicast == gmcip->mci_unicast)) { 6666 if (!add) { 6667 mac_rx_client_quiesce( 6668 (mac_client_handle_t)gmcip); 6669 (void) mac_remove_macaddr(mcip->mci_unicast); 6670 } else { 6671 (void) mac_add_macaddr(mip, tgrp, maddr, prim); 6672 mac_rx_client_restart( 6673 (mac_client_handle_t)gmcip); 6674 } 6675 } 6676 mgcp = mgcp->mgc_next; 6677 } 6678 } 6679 6680 6681 /* 6682 * Move the MAC address from fgrp to tgrp. If this is the primary client, 6683 * we need to take any VLANs etc. together too. 6684 */ 6685 static int 6686 mac_rx_move_macaddr(mac_client_impl_t *mcip, mac_group_t *fgrp, 6687 mac_group_t *tgrp) 6688 { 6689 mac_impl_t *mip = mcip->mci_mip; 6690 uint8_t maddr[MAXMACADDRLEN]; 6691 int err = 0; 6692 boolean_t prim; 6693 boolean_t multiclnt = B_FALSE; 6694 6695 mac_rx_client_quiesce((mac_client_handle_t)mcip); 6696 ASSERT(mcip->mci_unicast != NULL); 6697 bcopy(mcip->mci_unicast->ma_addr, maddr, mcip->mci_unicast->ma_len); 6698 6699 prim = (mcip->mci_state_flags & MCIS_UNICAST_HW) != 0; 6700 if (mcip->mci_unicast->ma_nusers > 1) { 6701 mac_rx_move_macaddr_prim(mcip, fgrp, NULL, maddr, B_FALSE); 6702 multiclnt = B_TRUE; 6703 } 6704 ASSERT(mcip->mci_unicast->ma_nusers == 1); 6705 err = mac_remove_macaddr(mcip->mci_unicast); 6706 if (err != 0) { 6707 mac_rx_client_restart((mac_client_handle_t)mcip); 6708 if (multiclnt) { 6709 mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, 6710 B_TRUE); 6711 } 6712 return (err); 6713 } 6714 /* 6715 * Program the H/W Classifier first, if this fails we need 6716 * not proceed with the other stuff. 6717 */ 6718 if ((err = mac_add_macaddr(mip, tgrp, maddr, prim)) != 0) { 6719 /* Revert back the H/W Classifier */ 6720 if ((err = mac_add_macaddr(mip, fgrp, maddr, prim)) != 0) { 6721 /* 6722 * This should not fail now since it worked earlier, 6723 * should we panic? 6724 */ 6725 cmn_err(CE_WARN, 6726 "mac_rx_switch_group: switching %p back" 6727 " to group %p failed!!", (void *)mcip, 6728 (void *)fgrp); 6729 } 6730 mac_rx_client_restart((mac_client_handle_t)mcip); 6731 if (multiclnt) { 6732 mac_rx_move_macaddr_prim(mcip, fgrp, fgrp, maddr, 6733 B_TRUE); 6734 } 6735 return (err); 6736 } 6737 mcip->mci_unicast = mac_find_macaddr(mip, maddr); 6738 mac_rx_client_restart((mac_client_handle_t)mcip); 6739 if (multiclnt) 6740 mac_rx_move_macaddr_prim(mcip, fgrp, tgrp, maddr, B_TRUE); 6741 return (err); 6742 } 6743 6744 /* 6745 * Switch the MAC client from one group to another. This means we need 6746 * to remove the MAC address from the group, remove the MAC client, 6747 * teardown the SRSs and revert the group state. Then, we add the client 6748 * to the destination group, set the SRSs, and add the MAC address to the 6749 * group. 6750 */ 6751 int 6752 mac_rx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 6753 mac_group_t *tgrp) 6754 { 6755 int err; 6756 mac_group_state_t next_state; 6757 mac_client_impl_t *group_only_mcip; 6758 mac_client_impl_t *gmcip; 6759 mac_impl_t *mip = mcip->mci_mip; 6760 mac_grp_client_t *mgcp; 6761 6762 ASSERT(fgrp == mcip->mci_flent->fe_rx_ring_group); 6763 6764 if ((err = mac_rx_move_macaddr(mcip, fgrp, tgrp)) != 0) 6765 return (err); 6766 6767 /* 6768 * The group might be reserved, but SRSs may not be set up, e.g. 6769 * primary and its vlans using a reserved group. 6770 */ 6771 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED && 6772 MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 6773 mac_rx_srs_group_teardown(mcip->mci_flent, B_TRUE); 6774 } 6775 if (fgrp != MAC_DEFAULT_RX_GROUP(mip)) { 6776 mgcp = fgrp->mrg_clients; 6777 while (mgcp != NULL) { 6778 gmcip = mgcp->mgc_client; 6779 mgcp = mgcp->mgc_next; 6780 mac_group_remove_client(fgrp, gmcip); 6781 mac_group_add_client(tgrp, gmcip); 6782 gmcip->mci_flent->fe_rx_ring_group = tgrp; 6783 } 6784 mac_release_rx_group(mcip, fgrp); 6785 ASSERT(MAC_GROUP_NO_CLIENT(fgrp)); 6786 mac_set_group_state(fgrp, MAC_GROUP_STATE_REGISTERED); 6787 } else { 6788 mac_group_remove_client(fgrp, mcip); 6789 mac_group_add_client(tgrp, mcip); 6790 mcip->mci_flent->fe_rx_ring_group = tgrp; 6791 /* 6792 * If there are other clients (VLANs) sharing this address 6793 * we should be here only for the primary. 6794 */ 6795 if (mcip->mci_unicast->ma_nusers > 1) { 6796 /* 6797 * We need to move all the clients that are using 6798 * this h/w address. 6799 */ 6800 mgcp = fgrp->mrg_clients; 6801 while (mgcp != NULL) { 6802 gmcip = mgcp->mgc_client; 6803 mgcp = mgcp->mgc_next; 6804 if (mcip->mci_unicast == gmcip->mci_unicast) { 6805 mac_group_remove_client(fgrp, gmcip); 6806 mac_group_add_client(tgrp, gmcip); 6807 gmcip->mci_flent->fe_rx_ring_group = 6808 tgrp; 6809 } 6810 } 6811 } 6812 /* 6813 * The default group will still take the multicast, 6814 * broadcast traffic etc., so it won't go to 6815 * MAC_GROUP_STATE_REGISTERED. 6816 */ 6817 if (fgrp->mrg_state == MAC_GROUP_STATE_RESERVED) 6818 mac_rx_group_unmark(fgrp, MR_CONDEMNED); 6819 mac_set_group_state(fgrp, MAC_GROUP_STATE_SHARED); 6820 } 6821 next_state = mac_group_next_state(tgrp, &group_only_mcip, 6822 MAC_DEFAULT_RX_GROUP(mip), B_TRUE); 6823 mac_set_group_state(tgrp, next_state); 6824 /* 6825 * If the destination group is reserved, setup the SRSs etc. 6826 */ 6827 if (tgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 6828 mac_rx_srs_group_setup(mcip, mcip->mci_flent, SRST_LINK); 6829 mac_fanout_setup(mcip, mcip->mci_flent, 6830 MCIP_RESOURCE_PROPS(mcip), mac_rx_deliver, mcip, NULL, 6831 NULL); 6832 mac_rx_group_unmark(tgrp, MR_INCIPIENT); 6833 } else { 6834 mac_rx_switch_grp_to_sw(tgrp); 6835 } 6836 return (0); 6837 } 6838 6839 /* 6840 * Reserves a TX group for the specified share. Invoked by mac_tx_srs_setup() 6841 * when a share was allocated to the client. 6842 */ 6843 mac_group_t * 6844 mac_reserve_tx_group(mac_client_impl_t *mcip, boolean_t move) 6845 { 6846 mac_impl_t *mip = mcip->mci_mip; 6847 mac_group_t *grp = NULL; 6848 int rv; 6849 int i; 6850 int err; 6851 mac_group_t *defgrp; 6852 mac_share_handle_t share = mcip->mci_share; 6853 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 6854 int nrings; 6855 int defnrings; 6856 boolean_t need_exclgrp = B_FALSE; 6857 int need_rings = 0; 6858 mac_group_t *candidate_grp = NULL; 6859 mac_client_impl_t *gclient; 6860 mac_resource_props_t *gmrp; 6861 boolean_t txhw = mrp->mrp_mask & MRP_TX_RINGS; 6862 boolean_t unspec = mrp->mrp_mask & MRP_TXRINGS_UNSPEC; 6863 boolean_t isprimary; 6864 6865 isprimary = mcip->mci_flent->fe_type & FLOW_PRIMARY_MAC; 6866 /* 6867 * When we come here for a VLAN on the primary (dladm create-vlan), 6868 * we need to pair it along with the primary (to keep it consistent 6869 * with the RX side). So, we check if the primary is already assigned 6870 * to a group and return the group if so. The other way is also 6871 * true, i.e. the VLAN is already created and now we are plumbing 6872 * the primary. 6873 */ 6874 if (!move && isprimary) { 6875 for (gclient = mip->mi_clients_list; gclient != NULL; 6876 gclient = gclient->mci_client_next) { 6877 if (gclient->mci_flent->fe_type & FLOW_PRIMARY_MAC && 6878 gclient->mci_flent->fe_tx_ring_group != NULL) { 6879 return (gclient->mci_flent->fe_tx_ring_group); 6880 } 6881 } 6882 } 6883 6884 if (mip->mi_tx_groups == NULL || mip->mi_tx_group_count == 0) 6885 return (NULL); 6886 6887 /* For dynamic groups, default unspec to 1 */ 6888 if (txhw && unspec && 6889 mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 6890 mrp->mrp_ntxrings = 1; 6891 } 6892 /* 6893 * For static grouping we allow only specifying rings=0 and 6894 * unspecified 6895 */ 6896 if (txhw && mrp->mrp_ntxrings > 0 && 6897 mip->mi_tx_group_type == MAC_GROUP_TYPE_STATIC) { 6898 return (NULL); 6899 } 6900 6901 if (txhw) { 6902 /* 6903 * We have explicitly asked for a group (with ntxrings, 6904 * if unspec). 6905 */ 6906 if (unspec || mrp->mrp_ntxrings > 0) { 6907 need_exclgrp = B_TRUE; 6908 need_rings = mrp->mrp_ntxrings; 6909 } else if (mrp->mrp_ntxrings == 0) { 6910 /* 6911 * We have asked for a software group. 6912 */ 6913 return (NULL); 6914 } 6915 } 6916 defgrp = MAC_DEFAULT_TX_GROUP(mip); 6917 /* 6918 * The number of rings that the default group can donate. 6919 * We need to leave at least one ring - the default ring - in 6920 * this group. 6921 */ 6922 defnrings = defgrp->mrg_cur_count - 1; 6923 6924 /* 6925 * Primary gets default group unless explicitly told not 6926 * to (i.e. rings > 0). 6927 */ 6928 if (isprimary && !need_exclgrp) 6929 return (NULL); 6930 6931 nrings = (mrp->mrp_mask & MRP_TX_RINGS) != 0 ? mrp->mrp_ntxrings : 1; 6932 for (i = 0; i < mip->mi_tx_group_count; i++) { 6933 grp = &mip->mi_tx_groups[i]; 6934 if ((grp->mrg_state == MAC_GROUP_STATE_RESERVED) || 6935 (grp->mrg_state == MAC_GROUP_STATE_UNINIT)) { 6936 /* 6937 * Select a candidate for replacement if we don't 6938 * get an exclusive group. A candidate group is one 6939 * that didn't ask for an exclusive group, but got 6940 * one and it has enough rings (combined with what 6941 * the default group can donate) for the new MAC 6942 * client. 6943 */ 6944 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED && 6945 candidate_grp == NULL) { 6946 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6947 if (gclient == NULL) 6948 gclient = mac_get_grp_primary(grp); 6949 gmrp = MCIP_RESOURCE_PROPS(gclient); 6950 if (gclient->mci_share == NULL && 6951 (gmrp->mrp_mask & MRP_TX_RINGS) == 0 && 6952 (unspec || 6953 (grp->mrg_cur_count + defnrings) >= 6954 need_rings)) { 6955 candidate_grp = grp; 6956 } 6957 } 6958 continue; 6959 } 6960 /* 6961 * If the default can't donate let's just walk and 6962 * see if someone can vacate a group, so that we have 6963 * enough rings for this. 6964 */ 6965 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC || 6966 nrings <= defnrings) { 6967 if (grp->mrg_state == MAC_GROUP_STATE_REGISTERED) { 6968 rv = mac_start_group(grp); 6969 ASSERT(rv == 0); 6970 } 6971 break; 6972 } 6973 } 6974 6975 /* The default group */ 6976 if (i >= mip->mi_tx_group_count) { 6977 /* 6978 * If we need an exclusive group and have identified a 6979 * candidate group we switch the MAC client from the 6980 * candidate group to the default group and give the 6981 * candidate group to this client. 6982 */ 6983 if (need_exclgrp && candidate_grp != NULL) { 6984 /* 6985 * Switch the MAC client from the candidate group 6986 * to the default group. 6987 */ 6988 grp = candidate_grp; 6989 gclient = MAC_GROUP_ONLY_CLIENT(grp); 6990 if (gclient == NULL) 6991 gclient = mac_get_grp_primary(grp); 6992 mac_tx_client_quiesce((mac_client_handle_t)gclient); 6993 mac_tx_switch_group(gclient, grp, defgrp); 6994 mac_tx_client_restart((mac_client_handle_t)gclient); 6995 6996 /* 6997 * Give the candidate group with the specified number 6998 * of rings to this MAC client. 6999 */ 7000 ASSERT(grp->mrg_state == MAC_GROUP_STATE_REGISTERED); 7001 rv = mac_start_group(grp); 7002 ASSERT(rv == 0); 7003 7004 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) 7005 return (grp); 7006 7007 ASSERT(grp->mrg_cur_count == 0); 7008 ASSERT(defgrp->mrg_cur_count > need_rings); 7009 7010 err = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, 7011 defgrp, grp, share, need_rings); 7012 if (err == 0) { 7013 /* 7014 * For a share i_mac_group_allocate_rings gets 7015 * the rings from the driver, let's populate 7016 * the property for the client now. 7017 */ 7018 if (share != NULL) { 7019 mac_client_set_rings( 7020 (mac_client_handle_t)mcip, -1, 7021 grp->mrg_cur_count); 7022 } 7023 mip->mi_tx_group_free--; 7024 return (grp); 7025 } 7026 DTRACE_PROBE3(tx__group__reserve__alloc__rings, char *, 7027 mip->mi_name, int, grp->mrg_index, int, err); 7028 mac_stop_group(grp); 7029 } 7030 return (NULL); 7031 } 7032 /* 7033 * We got an exclusive group, but it is not dynamic. 7034 */ 7035 if (mip->mi_tx_group_type != MAC_GROUP_TYPE_DYNAMIC) { 7036 mip->mi_tx_group_free--; 7037 return (grp); 7038 } 7039 7040 rv = i_mac_group_allocate_rings(mip, MAC_RING_TYPE_TX, defgrp, grp, 7041 share, nrings); 7042 if (rv != 0) { 7043 DTRACE_PROBE3(tx__group__reserve__alloc__rings, 7044 char *, mip->mi_name, int, grp->mrg_index, int, rv); 7045 mac_stop_group(grp); 7046 return (NULL); 7047 } 7048 /* 7049 * For a share i_mac_group_allocate_rings gets the rings from the 7050 * driver, let's populate the property for the client now. 7051 */ 7052 if (share != NULL) { 7053 mac_client_set_rings((mac_client_handle_t)mcip, -1, 7054 grp->mrg_cur_count); 7055 } 7056 mip->mi_tx_group_free--; 7057 return (grp); 7058 } 7059 7060 void 7061 mac_release_tx_group(mac_client_impl_t *mcip, mac_group_t *grp) 7062 { 7063 mac_impl_t *mip = mcip->mci_mip; 7064 mac_share_handle_t share = mcip->mci_share; 7065 mac_ring_t *ring; 7066 mac_soft_ring_set_t *srs = MCIP_TX_SRS(mcip); 7067 mac_group_t *defgrp; 7068 7069 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7070 if (srs != NULL) { 7071 if (srs->srs_soft_ring_count > 0) { 7072 for (ring = grp->mrg_rings; ring != NULL; 7073 ring = ring->mr_next) { 7074 ASSERT(mac_tx_srs_ring_present(srs, ring)); 7075 mac_tx_invoke_callbacks(mcip, 7076 (mac_tx_cookie_t) 7077 mac_tx_srs_get_soft_ring(srs, ring)); 7078 mac_tx_srs_del_ring(srs, ring); 7079 } 7080 } else { 7081 ASSERT(srs->srs_tx.st_arg2 != NULL); 7082 srs->srs_tx.st_arg2 = NULL; 7083 mac_srs_stat_delete(srs); 7084 } 7085 } 7086 if (share != NULL) 7087 mip->mi_share_capab.ms_sremove(share, grp->mrg_driver); 7088 7089 /* move the ring back to the pool */ 7090 if (mip->mi_tx_group_type == MAC_GROUP_TYPE_DYNAMIC) { 7091 while ((ring = grp->mrg_rings) != NULL) 7092 (void) mac_group_mov_ring(mip, defgrp, ring); 7093 } 7094 mac_stop_group(grp); 7095 mip->mi_tx_group_free++; 7096 } 7097 7098 /* 7099 * Disassociate a MAC client from a group, i.e go through the rings in the 7100 * group and delete all the soft rings tied to them. 7101 */ 7102 static void 7103 mac_tx_dismantle_soft_rings(mac_group_t *fgrp, flow_entry_t *flent) 7104 { 7105 mac_client_impl_t *mcip = flent->fe_mcip; 7106 mac_soft_ring_set_t *tx_srs; 7107 mac_srs_tx_t *tx; 7108 mac_ring_t *ring; 7109 7110 tx_srs = flent->fe_tx_srs; 7111 tx = &tx_srs->srs_tx; 7112 7113 /* Single ring case we haven't created any soft rings */ 7114 if (tx->st_mode == SRS_TX_BW || tx->st_mode == SRS_TX_SERIALIZE || 7115 tx->st_mode == SRS_TX_DEFAULT) { 7116 tx->st_arg2 = NULL; 7117 mac_srs_stat_delete(tx_srs); 7118 /* Fanout case, where we have to dismantle the soft rings */ 7119 } else { 7120 for (ring = fgrp->mrg_rings; ring != NULL; 7121 ring = ring->mr_next) { 7122 ASSERT(mac_tx_srs_ring_present(tx_srs, ring)); 7123 mac_tx_invoke_callbacks(mcip, 7124 (mac_tx_cookie_t)mac_tx_srs_get_soft_ring(tx_srs, 7125 ring)); 7126 mac_tx_srs_del_ring(tx_srs, ring); 7127 } 7128 ASSERT(tx->st_arg2 == NULL); 7129 } 7130 } 7131 7132 /* 7133 * Switch the MAC client from one group to another. This means we need 7134 * to remove the MAC client, teardown the SRSs and revert the group state. 7135 * Then, we add the client to the destination roup, set the SRSs etc. 7136 */ 7137 void 7138 mac_tx_switch_group(mac_client_impl_t *mcip, mac_group_t *fgrp, 7139 mac_group_t *tgrp) 7140 { 7141 mac_client_impl_t *group_only_mcip; 7142 mac_impl_t *mip = mcip->mci_mip; 7143 flow_entry_t *flent = mcip->mci_flent; 7144 mac_group_t *defgrp; 7145 mac_grp_client_t *mgcp; 7146 mac_client_impl_t *gmcip; 7147 flow_entry_t *gflent; 7148 7149 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7150 ASSERT(fgrp == flent->fe_tx_ring_group); 7151 7152 if (fgrp == defgrp) { 7153 /* 7154 * If this is the primary we need to find any VLANs on 7155 * the primary and move them too. 7156 */ 7157 mac_group_remove_client(fgrp, mcip); 7158 mac_tx_dismantle_soft_rings(fgrp, flent); 7159 if (mcip->mci_unicast->ma_nusers > 1) { 7160 mgcp = fgrp->mrg_clients; 7161 while (mgcp != NULL) { 7162 gmcip = mgcp->mgc_client; 7163 mgcp = mgcp->mgc_next; 7164 if (mcip->mci_unicast != gmcip->mci_unicast) 7165 continue; 7166 mac_tx_client_quiesce( 7167 (mac_client_handle_t)gmcip); 7168 7169 gflent = gmcip->mci_flent; 7170 mac_group_remove_client(fgrp, gmcip); 7171 mac_tx_dismantle_soft_rings(fgrp, gflent); 7172 7173 mac_group_add_client(tgrp, gmcip); 7174 gflent->fe_tx_ring_group = tgrp; 7175 /* We could directly set this to SHARED */ 7176 tgrp->mrg_state = mac_group_next_state(tgrp, 7177 &group_only_mcip, defgrp, B_FALSE); 7178 7179 mac_tx_srs_group_setup(gmcip, gflent, 7180 SRST_LINK); 7181 mac_fanout_setup(gmcip, gflent, 7182 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 7183 gmcip, NULL, NULL); 7184 7185 mac_tx_client_restart( 7186 (mac_client_handle_t)gmcip); 7187 } 7188 } 7189 if (MAC_GROUP_NO_CLIENT(fgrp)) { 7190 mac_ring_t *ring; 7191 int cnt; 7192 int ringcnt; 7193 7194 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 7195 /* 7196 * Additionally, we also need to stop all 7197 * the rings in the default group, except 7198 * the default ring. The reason being 7199 * this group won't be released since it is 7200 * the default group, so the rings won't 7201 * be stopped otherwise. 7202 */ 7203 ringcnt = fgrp->mrg_cur_count; 7204 ring = fgrp->mrg_rings; 7205 for (cnt = 0; cnt < ringcnt; cnt++) { 7206 if (ring->mr_state == MR_INUSE && 7207 ring != 7208 (mac_ring_t *)mip->mi_default_tx_ring) { 7209 mac_stop_ring(ring); 7210 ring->mr_flag = 0; 7211 } 7212 ring = ring->mr_next; 7213 } 7214 } else if (MAC_GROUP_ONLY_CLIENT(fgrp) != NULL) { 7215 fgrp->mrg_state = MAC_GROUP_STATE_RESERVED; 7216 } else { 7217 ASSERT(fgrp->mrg_state == MAC_GROUP_STATE_SHARED); 7218 } 7219 } else { 7220 /* 7221 * We could have VLANs sharing the non-default group with 7222 * the primary. 7223 */ 7224 mgcp = fgrp->mrg_clients; 7225 while (mgcp != NULL) { 7226 gmcip = mgcp->mgc_client; 7227 mgcp = mgcp->mgc_next; 7228 if (gmcip == mcip) 7229 continue; 7230 mac_tx_client_quiesce((mac_client_handle_t)gmcip); 7231 gflent = gmcip->mci_flent; 7232 7233 mac_group_remove_client(fgrp, gmcip); 7234 mac_tx_dismantle_soft_rings(fgrp, gflent); 7235 7236 mac_group_add_client(tgrp, gmcip); 7237 gflent->fe_tx_ring_group = tgrp; 7238 /* We could directly set this to SHARED */ 7239 tgrp->mrg_state = mac_group_next_state(tgrp, 7240 &group_only_mcip, defgrp, B_FALSE); 7241 mac_tx_srs_group_setup(gmcip, gflent, SRST_LINK); 7242 mac_fanout_setup(gmcip, gflent, 7243 MCIP_RESOURCE_PROPS(gmcip), mac_rx_deliver, 7244 gmcip, NULL, NULL); 7245 7246 mac_tx_client_restart((mac_client_handle_t)gmcip); 7247 } 7248 mac_group_remove_client(fgrp, mcip); 7249 mac_release_tx_group(mcip, fgrp); 7250 fgrp->mrg_state = MAC_GROUP_STATE_REGISTERED; 7251 } 7252 7253 /* Add it to the tgroup */ 7254 mac_group_add_client(tgrp, mcip); 7255 flent->fe_tx_ring_group = tgrp; 7256 tgrp->mrg_state = mac_group_next_state(tgrp, &group_only_mcip, 7257 defgrp, B_FALSE); 7258 7259 mac_tx_srs_group_setup(mcip, flent, SRST_LINK); 7260 mac_fanout_setup(mcip, flent, MCIP_RESOURCE_PROPS(mcip), 7261 mac_rx_deliver, mcip, NULL, NULL); 7262 } 7263 7264 /* 7265 * This is a 1-time control path activity initiated by the client (IP). 7266 * The mac perimeter protects against other simultaneous control activities, 7267 * for example an ioctl that attempts to change the degree of fanout and 7268 * increase or decrease the number of softrings associated with this Tx SRS. 7269 */ 7270 static mac_tx_notify_cb_t * 7271 mac_client_tx_notify_add(mac_client_impl_t *mcip, 7272 mac_tx_notify_t notify, void *arg) 7273 { 7274 mac_cb_info_t *mcbi; 7275 mac_tx_notify_cb_t *mtnfp; 7276 7277 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7278 7279 mtnfp = kmem_zalloc(sizeof (mac_tx_notify_cb_t), KM_SLEEP); 7280 mtnfp->mtnf_fn = notify; 7281 mtnfp->mtnf_arg = arg; 7282 mtnfp->mtnf_link.mcb_objp = mtnfp; 7283 mtnfp->mtnf_link.mcb_objsize = sizeof (mac_tx_notify_cb_t); 7284 mtnfp->mtnf_link.mcb_flags = MCB_TX_NOTIFY_CB_T; 7285 7286 mcbi = &mcip->mci_tx_notify_cb_info; 7287 mutex_enter(mcbi->mcbi_lockp); 7288 mac_callback_add(mcbi, &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link); 7289 mutex_exit(mcbi->mcbi_lockp); 7290 return (mtnfp); 7291 } 7292 7293 static void 7294 mac_client_tx_notify_remove(mac_client_impl_t *mcip, mac_tx_notify_cb_t *mtnfp) 7295 { 7296 mac_cb_info_t *mcbi; 7297 mac_cb_t **cblist; 7298 7299 ASSERT(MAC_PERIM_HELD((mac_handle_t)mcip->mci_mip)); 7300 7301 if (!mac_callback_find(&mcip->mci_tx_notify_cb_info, 7302 &mcip->mci_tx_notify_cb_list, &mtnfp->mtnf_link)) { 7303 cmn_err(CE_WARN, 7304 "mac_client_tx_notify_remove: callback not " 7305 "found, mcip 0x%p mtnfp 0x%p", (void *)mcip, (void *)mtnfp); 7306 return; 7307 } 7308 7309 mcbi = &mcip->mci_tx_notify_cb_info; 7310 cblist = &mcip->mci_tx_notify_cb_list; 7311 mutex_enter(mcbi->mcbi_lockp); 7312 if (mac_callback_remove(mcbi, cblist, &mtnfp->mtnf_link)) 7313 kmem_free(mtnfp, sizeof (mac_tx_notify_cb_t)); 7314 else 7315 mac_callback_remove_wait(&mcip->mci_tx_notify_cb_info); 7316 mutex_exit(mcbi->mcbi_lockp); 7317 } 7318 7319 /* 7320 * mac_client_tx_notify(): 7321 * call to add and remove flow control callback routine. 7322 */ 7323 mac_tx_notify_handle_t 7324 mac_client_tx_notify(mac_client_handle_t mch, mac_tx_notify_t callb_func, 7325 void *ptr) 7326 { 7327 mac_client_impl_t *mcip = (mac_client_impl_t *)mch; 7328 mac_tx_notify_cb_t *mtnfp = NULL; 7329 7330 i_mac_perim_enter(mcip->mci_mip); 7331 7332 if (callb_func != NULL) { 7333 /* Add a notify callback */ 7334 mtnfp = mac_client_tx_notify_add(mcip, callb_func, ptr); 7335 } else { 7336 mac_client_tx_notify_remove(mcip, (mac_tx_notify_cb_t *)ptr); 7337 } 7338 i_mac_perim_exit(mcip->mci_mip); 7339 7340 return ((mac_tx_notify_handle_t)mtnfp); 7341 } 7342 7343 void 7344 mac_bridge_vectors(mac_bridge_tx_t txf, mac_bridge_rx_t rxf, 7345 mac_bridge_ref_t reff, mac_bridge_ls_t lsf) 7346 { 7347 mac_bridge_tx_cb = txf; 7348 mac_bridge_rx_cb = rxf; 7349 mac_bridge_ref_cb = reff; 7350 mac_bridge_ls_cb = lsf; 7351 } 7352 7353 int 7354 mac_bridge_set(mac_handle_t mh, mac_handle_t link) 7355 { 7356 mac_impl_t *mip = (mac_impl_t *)mh; 7357 int retv; 7358 7359 mutex_enter(&mip->mi_bridge_lock); 7360 if (mip->mi_bridge_link == NULL) { 7361 mip->mi_bridge_link = link; 7362 retv = 0; 7363 } else { 7364 retv = EBUSY; 7365 } 7366 mutex_exit(&mip->mi_bridge_lock); 7367 if (retv == 0) { 7368 mac_poll_state_change(mh, B_FALSE); 7369 mac_capab_update(mh); 7370 } 7371 return (retv); 7372 } 7373 7374 /* 7375 * Disable bridging on the indicated link. 7376 */ 7377 void 7378 mac_bridge_clear(mac_handle_t mh, mac_handle_t link) 7379 { 7380 mac_impl_t *mip = (mac_impl_t *)mh; 7381 7382 mutex_enter(&mip->mi_bridge_lock); 7383 ASSERT(mip->mi_bridge_link == link); 7384 mip->mi_bridge_link = NULL; 7385 mutex_exit(&mip->mi_bridge_lock); 7386 mac_poll_state_change(mh, B_TRUE); 7387 mac_capab_update(mh); 7388 } 7389 7390 void 7391 mac_no_active(mac_handle_t mh) 7392 { 7393 mac_impl_t *mip = (mac_impl_t *)mh; 7394 7395 i_mac_perim_enter(mip); 7396 mip->mi_state_flags |= MIS_NO_ACTIVE; 7397 i_mac_perim_exit(mip); 7398 } 7399 7400 /* 7401 * Walk the primary VLAN clients whenever the primary's rings property 7402 * changes and update the mac_resource_props_t for the VLAN's client. 7403 * We need to do this since we don't support setting these properties 7404 * on the primary's VLAN clients, but the VLAN clients have to 7405 * follow the primary w.r.t the rings property; 7406 */ 7407 void 7408 mac_set_prim_vlan_rings(mac_impl_t *mip, mac_resource_props_t *mrp) 7409 { 7410 mac_client_impl_t *vmcip; 7411 mac_resource_props_t *vmrp; 7412 7413 for (vmcip = mip->mi_clients_list; vmcip != NULL; 7414 vmcip = vmcip->mci_client_next) { 7415 if (!(vmcip->mci_flent->fe_type & FLOW_PRIMARY_MAC) || 7416 mac_client_vid((mac_client_handle_t)vmcip) == 7417 VLAN_ID_NONE) { 7418 continue; 7419 } 7420 vmrp = MCIP_RESOURCE_PROPS(vmcip); 7421 7422 vmrp->mrp_nrxrings = mrp->mrp_nrxrings; 7423 if (mrp->mrp_mask & MRP_RX_RINGS) 7424 vmrp->mrp_mask |= MRP_RX_RINGS; 7425 else if (vmrp->mrp_mask & MRP_RX_RINGS) 7426 vmrp->mrp_mask &= ~MRP_RX_RINGS; 7427 7428 vmrp->mrp_ntxrings = mrp->mrp_ntxrings; 7429 if (mrp->mrp_mask & MRP_TX_RINGS) 7430 vmrp->mrp_mask |= MRP_TX_RINGS; 7431 else if (vmrp->mrp_mask & MRP_TX_RINGS) 7432 vmrp->mrp_mask &= ~MRP_TX_RINGS; 7433 7434 if (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) 7435 vmrp->mrp_mask |= MRP_RXRINGS_UNSPEC; 7436 else 7437 vmrp->mrp_mask &= ~MRP_RXRINGS_UNSPEC; 7438 7439 if (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) 7440 vmrp->mrp_mask |= MRP_TXRINGS_UNSPEC; 7441 else 7442 vmrp->mrp_mask &= ~MRP_TXRINGS_UNSPEC; 7443 } 7444 } 7445 7446 /* 7447 * We are adding or removing ring(s) from a group. The source for taking 7448 * rings is the default group. The destination for giving rings back is 7449 * the default group. 7450 */ 7451 int 7452 mac_group_ring_modify(mac_client_impl_t *mcip, mac_group_t *group, 7453 mac_group_t *defgrp) 7454 { 7455 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7456 uint_t modify; 7457 int count; 7458 mac_ring_t *ring; 7459 mac_ring_t *next; 7460 mac_impl_t *mip = mcip->mci_mip; 7461 mac_ring_t **rings; 7462 uint_t ringcnt; 7463 int i = 0; 7464 boolean_t rx_group = group->mrg_type == MAC_RING_TYPE_RX; 7465 int start; 7466 int end; 7467 mac_group_t *tgrp; 7468 int j; 7469 int rv = 0; 7470 7471 /* 7472 * If we are asked for just a group, we give 1 ring, else 7473 * the specified number of rings. 7474 */ 7475 if (rx_group) { 7476 ringcnt = (mrp->mrp_mask & MRP_RXRINGS_UNSPEC) ? 1: 7477 mrp->mrp_nrxrings; 7478 } else { 7479 ringcnt = (mrp->mrp_mask & MRP_TXRINGS_UNSPEC) ? 1: 7480 mrp->mrp_ntxrings; 7481 } 7482 7483 /* don't allow modifying rings for a share for now. */ 7484 ASSERT(mcip->mci_share == NULL); 7485 7486 if (ringcnt == group->mrg_cur_count) 7487 return (0); 7488 7489 if (group->mrg_cur_count > ringcnt) { 7490 modify = group->mrg_cur_count - ringcnt; 7491 if (rx_group) { 7492 if (mip->mi_rx_donor_grp == group) { 7493 ASSERT(mac_is_primary_client(mcip)); 7494 mip->mi_rx_donor_grp = defgrp; 7495 } else { 7496 defgrp = mip->mi_rx_donor_grp; 7497 } 7498 } 7499 ring = group->mrg_rings; 7500 rings = kmem_alloc(modify * sizeof (mac_ring_handle_t), 7501 KM_SLEEP); 7502 j = 0; 7503 for (count = 0; count < modify; count++) { 7504 next = ring->mr_next; 7505 rv = mac_group_mov_ring(mip, defgrp, ring); 7506 if (rv != 0) { 7507 /* cleanup on failure */ 7508 for (j = 0; j < count; j++) { 7509 (void) mac_group_mov_ring(mip, group, 7510 rings[j]); 7511 } 7512 break; 7513 } 7514 rings[j++] = ring; 7515 ring = next; 7516 } 7517 kmem_free(rings, modify * sizeof (mac_ring_handle_t)); 7518 return (rv); 7519 } 7520 if (ringcnt >= MAX_RINGS_PER_GROUP) 7521 return (EINVAL); 7522 7523 modify = ringcnt - group->mrg_cur_count; 7524 7525 if (rx_group) { 7526 if (group != mip->mi_rx_donor_grp) 7527 defgrp = mip->mi_rx_donor_grp; 7528 else 7529 /* 7530 * This is the donor group with all the remaining 7531 * rings. Default group now gets to be the donor 7532 */ 7533 mip->mi_rx_donor_grp = defgrp; 7534 start = 1; 7535 end = mip->mi_rx_group_count; 7536 } else { 7537 start = 0; 7538 end = mip->mi_tx_group_count - 1; 7539 } 7540 /* 7541 * If the default doesn't have any rings, lets see if we can 7542 * take rings given to an h/w client that doesn't need it. 7543 * For now, we just see if there is any one client that can donate 7544 * all the required rings. 7545 */ 7546 if (defgrp->mrg_cur_count < (modify + 1)) { 7547 for (i = start; i < end; i++) { 7548 if (rx_group) { 7549 tgrp = &mip->mi_rx_groups[i]; 7550 if (tgrp == group || tgrp->mrg_state < 7551 MAC_GROUP_STATE_RESERVED) { 7552 continue; 7553 } 7554 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 7555 if (mcip == NULL) 7556 mcip = mac_get_grp_primary(tgrp); 7557 ASSERT(mcip != NULL); 7558 mrp = MCIP_RESOURCE_PROPS(mcip); 7559 if ((mrp->mrp_mask & MRP_RX_RINGS) != 0) 7560 continue; 7561 if ((tgrp->mrg_cur_count + 7562 defgrp->mrg_cur_count) < (modify + 1)) { 7563 continue; 7564 } 7565 if (mac_rx_switch_group(mcip, tgrp, 7566 defgrp) != 0) { 7567 return (ENOSPC); 7568 } 7569 } else { 7570 tgrp = &mip->mi_tx_groups[i]; 7571 if (tgrp == group || tgrp->mrg_state < 7572 MAC_GROUP_STATE_RESERVED) { 7573 continue; 7574 } 7575 mcip = MAC_GROUP_ONLY_CLIENT(tgrp); 7576 if (mcip == NULL) 7577 mcip = mac_get_grp_primary(tgrp); 7578 mrp = MCIP_RESOURCE_PROPS(mcip); 7579 if ((mrp->mrp_mask & MRP_TX_RINGS) != 0) 7580 continue; 7581 if ((tgrp->mrg_cur_count + 7582 defgrp->mrg_cur_count) < (modify + 1)) { 7583 continue; 7584 } 7585 /* OK, we can switch this to s/w */ 7586 mac_tx_client_quiesce( 7587 (mac_client_handle_t)mcip); 7588 mac_tx_switch_group(mcip, tgrp, defgrp); 7589 mac_tx_client_restart( 7590 (mac_client_handle_t)mcip); 7591 } 7592 } 7593 if (defgrp->mrg_cur_count < (modify + 1)) 7594 return (ENOSPC); 7595 } 7596 if ((rv = i_mac_group_allocate_rings(mip, group->mrg_type, defgrp, 7597 group, mcip->mci_share, modify)) != 0) { 7598 return (rv); 7599 } 7600 return (0); 7601 } 7602 7603 /* 7604 * Given the poolname in mac_resource_props, find the cpupart 7605 * that is associated with this pool. The cpupart will be used 7606 * later for finding the cpus to be bound to the networking threads. 7607 * 7608 * use_default is set B_TRUE if pools are enabled and pool_default 7609 * is returned. This avoids a 2nd lookup to set the poolname 7610 * for pool-effective. 7611 * 7612 * returns: 7613 * 7614 * NULL - pools are disabled or if the 'cpus' property is set. 7615 * cpupart of pool_default - pools are enabled and the pool 7616 * is not available or poolname is blank 7617 * cpupart of named pool - pools are enabled and the pool 7618 * is available. 7619 */ 7620 cpupart_t * 7621 mac_pset_find(mac_resource_props_t *mrp, boolean_t *use_default) 7622 { 7623 pool_t *pool; 7624 cpupart_t *cpupart; 7625 7626 *use_default = B_FALSE; 7627 7628 /* CPUs property is set */ 7629 if (mrp->mrp_mask & MRP_CPUS) 7630 return (NULL); 7631 7632 ASSERT(pool_lock_held()); 7633 7634 /* Pools are disabled, no pset */ 7635 if (pool_state == POOL_DISABLED) 7636 return (NULL); 7637 7638 /* Pools property is set */ 7639 if (mrp->mrp_mask & MRP_POOL) { 7640 if ((pool = pool_lookup_pool_by_name(mrp->mrp_pool)) == NULL) { 7641 /* Pool not found */ 7642 DTRACE_PROBE1(mac_pset_find_no_pool, char *, 7643 mrp->mrp_pool); 7644 *use_default = B_TRUE; 7645 pool = pool_default; 7646 } 7647 /* Pools property is not set */ 7648 } else { 7649 *use_default = B_TRUE; 7650 pool = pool_default; 7651 } 7652 7653 /* Find the CPU pset that corresponds to the pool */ 7654 mutex_enter(&cpu_lock); 7655 if ((cpupart = cpupart_find(pool->pool_pset->pset_id)) == NULL) { 7656 DTRACE_PROBE1(mac_find_pset_no_pset, psetid_t, 7657 pool->pool_pset->pset_id); 7658 } 7659 mutex_exit(&cpu_lock); 7660 7661 return (cpupart); 7662 } 7663 7664 void 7665 mac_set_pool_effective(boolean_t use_default, cpupart_t *cpupart, 7666 mac_resource_props_t *mrp, mac_resource_props_t *emrp) 7667 { 7668 ASSERT(pool_lock_held()); 7669 7670 if (cpupart != NULL) { 7671 emrp->mrp_mask |= MRP_POOL; 7672 if (use_default) { 7673 (void) strcpy(emrp->mrp_pool, 7674 "pool_default"); 7675 } else { 7676 ASSERT(strlen(mrp->mrp_pool) != 0); 7677 (void) strcpy(emrp->mrp_pool, 7678 mrp->mrp_pool); 7679 } 7680 } else { 7681 emrp->mrp_mask &= ~MRP_POOL; 7682 bzero(emrp->mrp_pool, MAXPATHLEN); 7683 } 7684 } 7685 7686 struct mac_pool_arg { 7687 char mpa_poolname[MAXPATHLEN]; 7688 pool_event_t mpa_what; 7689 }; 7690 7691 /*ARGSUSED*/ 7692 static uint_t 7693 mac_pool_link_update(mod_hash_key_t key, mod_hash_val_t *val, void *arg) 7694 { 7695 struct mac_pool_arg *mpa = arg; 7696 mac_impl_t *mip = (mac_impl_t *)val; 7697 mac_client_impl_t *mcip; 7698 mac_resource_props_t *mrp, *emrp; 7699 boolean_t pool_update = B_FALSE; 7700 boolean_t pool_clear = B_FALSE; 7701 boolean_t use_default = B_FALSE; 7702 cpupart_t *cpupart = NULL; 7703 7704 mrp = kmem_zalloc(sizeof (*mrp), KM_SLEEP); 7705 i_mac_perim_enter(mip); 7706 for (mcip = mip->mi_clients_list; mcip != NULL; 7707 mcip = mcip->mci_client_next) { 7708 pool_update = B_FALSE; 7709 pool_clear = B_FALSE; 7710 use_default = B_FALSE; 7711 mac_client_get_resources((mac_client_handle_t)mcip, mrp); 7712 emrp = MCIP_EFFECTIVE_PROPS(mcip); 7713 7714 /* 7715 * When pools are enabled 7716 */ 7717 if ((mpa->mpa_what == POOL_E_ENABLE) && 7718 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 7719 mrp->mrp_mask |= MRP_POOL; 7720 pool_update = B_TRUE; 7721 } 7722 7723 /* 7724 * When pools are disabled 7725 */ 7726 if ((mpa->mpa_what == POOL_E_DISABLE) && 7727 ((mrp->mrp_mask & MRP_CPUS) == 0)) { 7728 mrp->mrp_mask |= MRP_POOL; 7729 pool_clear = B_TRUE; 7730 } 7731 7732 /* 7733 * Look for links with the pool property set and the poolname 7734 * matching the one which is changing. 7735 */ 7736 if (strcmp(mrp->mrp_pool, mpa->mpa_poolname) == 0) { 7737 /* 7738 * The pool associated with the link has changed. 7739 */ 7740 if (mpa->mpa_what == POOL_E_CHANGE) { 7741 mrp->mrp_mask |= MRP_POOL; 7742 pool_update = B_TRUE; 7743 } 7744 } 7745 7746 /* 7747 * This link is associated with pool_default and 7748 * pool_default has changed. 7749 */ 7750 if ((mpa->mpa_what == POOL_E_CHANGE) && 7751 (strcmp(emrp->mrp_pool, "pool_default") == 0) && 7752 (strcmp(mpa->mpa_poolname, "pool_default") == 0)) { 7753 mrp->mrp_mask |= MRP_POOL; 7754 pool_update = B_TRUE; 7755 } 7756 7757 /* 7758 * Get new list of cpus for the pool, bind network 7759 * threads to new list of cpus and update resources. 7760 */ 7761 if (pool_update) { 7762 if (MCIP_DATAPATH_SETUP(mcip)) { 7763 pool_lock(); 7764 cpupart = mac_pset_find(mrp, &use_default); 7765 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 7766 mac_rx_deliver, mcip, NULL, cpupart); 7767 mac_set_pool_effective(use_default, cpupart, 7768 mrp, emrp); 7769 pool_unlock(); 7770 } 7771 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 7772 B_FALSE); 7773 } 7774 7775 /* 7776 * Clear the effective pool and bind network threads 7777 * to any available CPU. 7778 */ 7779 if (pool_clear) { 7780 if (MCIP_DATAPATH_SETUP(mcip)) { 7781 emrp->mrp_mask &= ~MRP_POOL; 7782 bzero(emrp->mrp_pool, MAXPATHLEN); 7783 mac_fanout_setup(mcip, mcip->mci_flent, mrp, 7784 mac_rx_deliver, mcip, NULL, NULL); 7785 } 7786 mac_update_resources(mrp, MCIP_RESOURCE_PROPS(mcip), 7787 B_FALSE); 7788 } 7789 } 7790 i_mac_perim_exit(mip); 7791 kmem_free(mrp, sizeof (*mrp)); 7792 return (MH_WALK_CONTINUE); 7793 } 7794 7795 static void 7796 mac_pool_update(void *arg) 7797 { 7798 mod_hash_walk(i_mac_impl_hash, mac_pool_link_update, arg); 7799 kmem_free(arg, sizeof (struct mac_pool_arg)); 7800 } 7801 7802 /* 7803 * Callback function to be executed when a noteworthy pool event 7804 * takes place. 7805 */ 7806 /* ARGSUSED */ 7807 static void 7808 mac_pool_event_cb(pool_event_t what, poolid_t id, void *arg) 7809 { 7810 pool_t *pool; 7811 char *poolname = NULL; 7812 struct mac_pool_arg *mpa; 7813 7814 pool_lock(); 7815 mpa = kmem_zalloc(sizeof (struct mac_pool_arg), KM_SLEEP); 7816 7817 switch (what) { 7818 case POOL_E_ENABLE: 7819 case POOL_E_DISABLE: 7820 break; 7821 7822 case POOL_E_CHANGE: 7823 pool = pool_lookup_pool_by_id(id); 7824 if (pool == NULL) { 7825 kmem_free(mpa, sizeof (struct mac_pool_arg)); 7826 pool_unlock(); 7827 return; 7828 } 7829 pool_get_name(pool, &poolname); 7830 (void) strlcpy(mpa->mpa_poolname, poolname, 7831 sizeof (mpa->mpa_poolname)); 7832 break; 7833 7834 default: 7835 kmem_free(mpa, sizeof (struct mac_pool_arg)); 7836 pool_unlock(); 7837 return; 7838 } 7839 pool_unlock(); 7840 7841 mpa->mpa_what = what; 7842 7843 mac_pool_update(mpa); 7844 } 7845 7846 /* 7847 * Set effective rings property. This could be called from datapath_setup/ 7848 * datapath_teardown or set-linkprop. 7849 * If the group is reserved we just go ahead and set the effective rings. 7850 * Additionally, for TX this could mean the default group has lost/gained 7851 * some rings, so if the default group is reserved, we need to adjust the 7852 * effective rings for the default group clients. For RX, if we are working 7853 * with the non-default group, we just need * to reset the effective props 7854 * for the default group clients. 7855 */ 7856 void 7857 mac_set_rings_effective(mac_client_impl_t *mcip) 7858 { 7859 mac_impl_t *mip = mcip->mci_mip; 7860 mac_group_t *grp; 7861 mac_group_t *defgrp; 7862 flow_entry_t *flent = mcip->mci_flent; 7863 mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip); 7864 mac_grp_client_t *mgcp; 7865 mac_client_impl_t *gmcip; 7866 7867 grp = flent->fe_rx_ring_group; 7868 if (grp != NULL) { 7869 defgrp = MAC_DEFAULT_RX_GROUP(mip); 7870 /* 7871 * If we have reserved a group, set the effective rings 7872 * to the ring count in the group. 7873 */ 7874 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7875 emrp->mrp_mask |= MRP_RX_RINGS; 7876 emrp->mrp_nrxrings = grp->mrg_cur_count; 7877 } 7878 7879 /* 7880 * We go through the clients in the shared group and 7881 * reset the effective properties. It is possible this 7882 * might have already been done for some client (i.e. 7883 * if some client is being moved to a group that is 7884 * already shared). The case where the default group is 7885 * RESERVED is taken care of above (note in the RX side if 7886 * there is a non-default group, the default group is always 7887 * SHARED). 7888 */ 7889 if (grp != defgrp || grp->mrg_state == MAC_GROUP_STATE_SHARED) { 7890 if (grp->mrg_state == MAC_GROUP_STATE_SHARED) 7891 mgcp = grp->mrg_clients; 7892 else 7893 mgcp = defgrp->mrg_clients; 7894 while (mgcp != NULL) { 7895 gmcip = mgcp->mgc_client; 7896 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7897 if (emrp->mrp_mask & MRP_RX_RINGS) { 7898 emrp->mrp_mask &= ~MRP_RX_RINGS; 7899 emrp->mrp_nrxrings = 0; 7900 } 7901 mgcp = mgcp->mgc_next; 7902 } 7903 } 7904 } 7905 7906 /* Now the TX side */ 7907 grp = flent->fe_tx_ring_group; 7908 if (grp != NULL) { 7909 defgrp = MAC_DEFAULT_TX_GROUP(mip); 7910 7911 if (grp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7912 emrp->mrp_mask |= MRP_TX_RINGS; 7913 emrp->mrp_ntxrings = grp->mrg_cur_count; 7914 } else if (grp->mrg_state == MAC_GROUP_STATE_SHARED) { 7915 mgcp = grp->mrg_clients; 7916 while (mgcp != NULL) { 7917 gmcip = mgcp->mgc_client; 7918 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7919 if (emrp->mrp_mask & MRP_TX_RINGS) { 7920 emrp->mrp_mask &= ~MRP_TX_RINGS; 7921 emrp->mrp_ntxrings = 0; 7922 } 7923 mgcp = mgcp->mgc_next; 7924 } 7925 } 7926 7927 /* 7928 * If the group is not the default group and the default 7929 * group is reserved, the ring count in the default group 7930 * might have changed, update it. 7931 */ 7932 if (grp != defgrp && 7933 defgrp->mrg_state == MAC_GROUP_STATE_RESERVED) { 7934 gmcip = MAC_GROUP_ONLY_CLIENT(defgrp); 7935 emrp = MCIP_EFFECTIVE_PROPS(gmcip); 7936 emrp->mrp_ntxrings = defgrp->mrg_cur_count; 7937 } 7938 } 7939 emrp = MCIP_EFFECTIVE_PROPS(mcip); 7940 } 7941 7942 /* 7943 * Check if the primary is in the default group. If so, see if we 7944 * can give it a an exclusive group now that another client is 7945 * being configured. We take the primary out of the default group 7946 * because the multicast/broadcast packets for the all the clients 7947 * will land in the default ring in the default group which means 7948 * any client in the default group, even if it is the only on in 7949 * the group, will lose exclusive access to the rings, hence 7950 * polling. 7951 */ 7952 mac_client_impl_t * 7953 mac_check_primary_relocation(mac_client_impl_t *mcip, boolean_t rxhw) 7954 { 7955 mac_impl_t *mip = mcip->mci_mip; 7956 mac_group_t *defgrp = MAC_DEFAULT_RX_GROUP(mip); 7957 flow_entry_t *flent = mcip->mci_flent; 7958 mac_resource_props_t *mrp = MCIP_RESOURCE_PROPS(mcip); 7959 uint8_t *mac_addr; 7960 mac_group_t *ngrp; 7961 7962 /* 7963 * Check if the primary is in the default group, if not 7964 * or if it is explicitly configured to be in the default 7965 * group OR set the RX rings property, return. 7966 */ 7967 if (flent->fe_rx_ring_group != defgrp || mrp->mrp_mask & MRP_RX_RINGS) 7968 return (NULL); 7969 7970 /* 7971 * If the new client needs an exclusive group and we 7972 * don't have another for the primary, return. 7973 */ 7974 if (rxhw && mip->mi_rxhwclnt_avail < 2) 7975 return (NULL); 7976 7977 mac_addr = flent->fe_flow_desc.fd_dst_mac; 7978 /* 7979 * We call this when we are setting up the datapath for 7980 * the first non-primary. 7981 */ 7982 ASSERT(mip->mi_nactiveclients == 2); 7983 /* 7984 * OK, now we have the primary that needs to be relocated. 7985 */ 7986 ngrp = mac_reserve_rx_group(mcip, mac_addr, B_TRUE); 7987 if (ngrp == NULL) 7988 return (NULL); 7989 if (mac_rx_switch_group(mcip, defgrp, ngrp) != 0) { 7990 mac_stop_group(ngrp); 7991 return (NULL); 7992 } 7993 return (mcip); 7994 } 7995