1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/note.h> 27 #include <sys/sysmacros.h> 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kmem.h> 32 #include <sys/cmn_err.h> 33 #include <sys/debug.h> 34 #include <sys/ddi.h> 35 #include <sys/sunndi.h> 36 #include <sys/ndi_impldefs.h> /* include prototypes */ 37 38 /* 39 * Interrupt Resource Management (IRM). 40 */ 41 42 #define DDI_IRM_BALANCE_DELAY (60) /* In seconds */ 43 44 #define DDI_IRM_HAS_CB(c) ((c) && (c->cb_flags & DDI_CB_FLAG_INTR)) 45 46 #define DDI_IRM_IS_REDUCIBLE(r) (((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \ 47 (r->ireq_type == DDI_INTR_TYPE_MSIX)) || \ 48 (r->ireq_flags & DDI_IRM_FLAG_NEW)) 49 50 extern pri_t minclsyspri; 51 52 /* Global policies */ 53 int irm_enable = 1; 54 boolean_t irm_active = B_FALSE; 55 int irm_default_policy = DDI_IRM_POLICY_LARGE; 56 uint_t irm_balance_delay = DDI_IRM_BALANCE_DELAY; 57 58 /* Global list of interrupt pools */ 59 kmutex_t irm_pools_lock; 60 list_t irm_pools_list; 61 62 /* Global debug tunables */ 63 #ifdef DEBUG 64 int irm_debug_policy = 0; 65 uint_t irm_debug_size = 0; 66 #endif /* DEBUG */ 67 68 static void irm_balance_thread(ddi_irm_pool_t *); 69 static void i_ddi_irm_balance(ddi_irm_pool_t *); 70 static void i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t); 71 static void i_ddi_irm_reduce(ddi_irm_pool_t *pool); 72 static int i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *, int, int); 73 static void i_ddi_irm_reduce_new(ddi_irm_pool_t *, int); 74 static void i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *); 75 static int i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *); 76 77 /* 78 * OS Initialization Routines 79 */ 80 81 /* 82 * irm_init() 83 * 84 * Initialize IRM subsystem before any drivers are attached. 85 */ 86 void 87 irm_init(void) 88 { 89 /* Do nothing if IRM is disabled */ 90 if (!irm_enable) 91 return; 92 93 /* Verify that the default balancing policy is valid */ 94 if (!DDI_IRM_POLICY_VALID(irm_default_policy)) 95 irm_default_policy = DDI_IRM_POLICY_LARGE; 96 97 /* Initialize the global list of interrupt pools */ 98 mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL); 99 list_create(&irm_pools_list, sizeof (ddi_irm_pool_t), 100 offsetof(ddi_irm_pool_t, ipool_link)); 101 } 102 103 /* 104 * i_ddi_irm_poststartup() 105 * 106 * IRM is not activated until after the IO subsystem is initialized. 107 * When activated, per-pool balancing threads are spawned and a flag 108 * is set so that all future pools will be activated when created. 109 * 110 * NOTE: the global variable 'irm_enable' disables IRM if zero. 111 */ 112 void 113 i_ddi_irm_poststartup(void) 114 { 115 ddi_irm_pool_t *pool_p; 116 117 /* Do nothing if IRM is disabled */ 118 if (!irm_enable) 119 return; 120 121 /* Lock the global list */ 122 mutex_enter(&irm_pools_lock); 123 124 /* Activate all defined pools */ 125 for (pool_p = list_head(&irm_pools_list); pool_p; 126 pool_p = list_next(&irm_pools_list, pool_p)) 127 pool_p->ipool_thread = thread_create(NULL, 0, 128 irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri); 129 130 /* Set future pools to be active */ 131 irm_active = B_TRUE; 132 133 /* Unlock the global list */ 134 mutex_exit(&irm_pools_lock); 135 } 136 137 /* 138 * NDI interfaces for creating/destroying IRM pools. 139 */ 140 141 /* 142 * ndi_irm_create() 143 * 144 * Nexus interface to create an IRM pool. Create the new 145 * pool and add it to the global list of interrupt pools. 146 */ 147 int 148 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp, 149 ddi_irm_pool_t **pool_retp) 150 { 151 ddi_irm_pool_t *pool_p; 152 153 ASSERT(dip != NULL); 154 ASSERT(paramsp != NULL); 155 ASSERT(pool_retp != NULL); 156 ASSERT(paramsp->iparams_total >= 1); 157 ASSERT(paramsp->iparams_types != 0); 158 159 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip)); 160 161 /* Check if IRM is enabled */ 162 if (!irm_enable) 163 return (NDI_FAILURE); 164 165 /* Validate parameters */ 166 if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) || 167 (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0)) 168 return (NDI_FAILURE); 169 170 /* Allocate and initialize the pool */ 171 pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP); 172 pool_p->ipool_owner = dip; 173 pool_p->ipool_policy = irm_default_policy; 174 pool_p->ipool_types = paramsp->iparams_types; 175 pool_p->ipool_totsz = paramsp->iparams_total; 176 pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC, 177 paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER)); 178 list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t), 179 offsetof(ddi_irm_req_t, ireq_link)); 180 list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t), 181 offsetof(ddi_irm_req_t, ireq_scratch_link)); 182 cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL); 183 mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL); 184 mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL); 185 186 /* Add to global list of pools */ 187 mutex_enter(&irm_pools_lock); 188 list_insert_tail(&irm_pools_list, pool_p); 189 mutex_exit(&irm_pools_lock); 190 191 /* If IRM is active, then activate the pool */ 192 if (irm_active) 193 pool_p->ipool_thread = thread_create(NULL, 0, 194 irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri); 195 196 *pool_retp = pool_p; 197 return (NDI_SUCCESS); 198 } 199 200 /* 201 * ndi_irm_destroy() 202 * 203 * Nexus interface to destroy an IRM pool. Destroy the pool 204 * and remove it from the global list of interrupt pools. 205 */ 206 int 207 ndi_irm_destroy(ddi_irm_pool_t *pool_p) 208 { 209 ASSERT(pool_p != NULL); 210 ASSERT(pool_p->ipool_resno == 0); 211 212 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n", 213 (void *)pool_p)); 214 215 /* Validate parameters */ 216 if (pool_p == NULL) 217 return (NDI_FAILURE); 218 219 /* Validate that pool is empty */ 220 if (pool_p->ipool_resno != 0) 221 return (NDI_BUSY); 222 223 /* Remove the pool from the global list */ 224 mutex_enter(&irm_pools_lock); 225 list_remove(&irm_pools_list, pool_p); 226 mutex_exit(&irm_pools_lock); 227 228 /* Terminate the balancing thread */ 229 mutex_enter(&pool_p->ipool_lock); 230 if (pool_p->ipool_thread && 231 (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) { 232 pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT; 233 cv_signal(&pool_p->ipool_cv); 234 mutex_exit(&pool_p->ipool_lock); 235 thread_join(pool_p->ipool_thread->t_did); 236 } else 237 mutex_exit(&pool_p->ipool_lock); 238 239 /* Destroy the pool */ 240 cv_destroy(&pool_p->ipool_cv); 241 mutex_destroy(&pool_p->ipool_lock); 242 mutex_destroy(&pool_p->ipool_navail_lock); 243 list_destroy(&pool_p->ipool_req_list); 244 list_destroy(&pool_p->ipool_scratch_list); 245 kmem_free(pool_p, sizeof (ddi_irm_pool_t)); 246 247 return (NDI_SUCCESS); 248 } 249 250 /* 251 * Insert/Modify/Remove Interrupt Requests 252 */ 253 254 /* 255 * i_ddi_irm_insert() 256 * 257 * Insert a new request into an interrupt pool, and balance the pool. 258 */ 259 int 260 i_ddi_irm_insert(dev_info_t *dip, int type, int count) 261 { 262 ddi_cb_t *cb_p; 263 ddi_irm_req_t *req_p; 264 devinfo_intr_t *intr_p; 265 ddi_irm_pool_t *pool_p; 266 uint_t nreq, nmin, npartial; 267 boolean_t irm_flag = B_FALSE; 268 269 ASSERT(dip != NULL); 270 ASSERT(DDI_INTR_TYPE_FLAG_VALID(type)); 271 ASSERT(count > 0); 272 273 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n", 274 (void *)dip, type, count)); 275 276 /* Validate parameters */ 277 if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) { 278 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n")); 279 return (DDI_EINVAL); 280 } 281 282 /* Check for an existing request */ 283 if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) && 284 (intr_p->devi_irm_req_p != NULL)) 285 return (DDI_SUCCESS); 286 287 /* Check for IRM support from the system */ 288 if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) { 289 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n")); 290 return (DDI_ENOTSUP); 291 } 292 293 /* Check for IRM support from the driver */ 294 if (((cb_p = DEVI(dip)->devi_cb_p) != NULL) && DDI_IRM_HAS_CB(cb_p) && 295 (type == DDI_INTR_TYPE_MSIX)) 296 irm_flag = B_TRUE; 297 298 /* Determine request size */ 299 nreq = (irm_flag) ? count : 300 MIN(count, i_ddi_intr_get_current_navail(dip, type)); 301 nmin = (irm_flag) ? 1 : nreq; 302 npartial = MIN(nreq, pool_p->ipool_defsz); 303 304 /* Allocate and initialize the request */ 305 req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP); 306 req_p->ireq_type = type; 307 req_p->ireq_dip = dip; 308 req_p->ireq_pool_p = pool_p; 309 req_p->ireq_nreq = nreq; 310 req_p->ireq_flags = DDI_IRM_FLAG_NEW; 311 if (DDI_IRM_HAS_CB(cb_p)) 312 req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK; 313 314 /* Lock the pool */ 315 mutex_enter(&pool_p->ipool_lock); 316 317 /* Check for minimal fit before inserting */ 318 if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) { 319 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n", 320 ddi_driver_name(dip), ddi_get_instance(dip)); 321 mutex_exit(&pool_p->ipool_lock); 322 kmem_free(req_p, sizeof (ddi_irm_req_t)); 323 return (DDI_EAGAIN); 324 } 325 326 /* Insert the request into the pool */ 327 pool_p->ipool_reqno += nreq; 328 pool_p->ipool_minno += nmin; 329 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p); 330 331 /* 332 * Try to fulfill the request. 333 * 334 * If all the interrupts are available, and either the request 335 * is static or the pool is active, then just take them directly. 336 * 337 * If only some of the interrupts are available, and the request 338 * can receive future callbacks, then take some now but queue the 339 * pool to be rebalanced later. 340 * 341 * Otherwise, immediately rebalance the pool and wait. 342 */ 343 if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) && 344 ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) { 345 346 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: " 347 "request completely fulfilled.\n")); 348 pool_p->ipool_resno += nreq; 349 req_p->ireq_navail = nreq; 350 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW); 351 352 } else if (irm_flag && 353 ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) { 354 355 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: " 356 "request partially fulfilled.\n")); 357 pool_p->ipool_resno += npartial; 358 req_p->ireq_navail = npartial; 359 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW); 360 i_ddi_irm_enqueue(pool_p, B_FALSE); 361 362 } else { 363 364 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: " 365 "request needs immediate rebalance.\n")); 366 i_ddi_irm_enqueue(pool_p, B_TRUE); 367 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW); 368 } 369 370 /* Fail if the request cannot be fulfilled at all */ 371 if (req_p->ireq_navail == 0) { 372 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n", 373 ddi_driver_name(dip), ddi_get_instance(dip)); 374 pool_p->ipool_reqno -= nreq; 375 pool_p->ipool_minno -= nmin; 376 list_remove(&pool_p->ipool_req_list, req_p); 377 mutex_exit(&pool_p->ipool_lock); 378 kmem_free(req_p, sizeof (ddi_irm_req_t)); 379 return (DDI_EAGAIN); 380 } 381 382 /* Unlock the pool */ 383 mutex_exit(&pool_p->ipool_lock); 384 385 intr_p->devi_irm_req_p = req_p; 386 return (DDI_SUCCESS); 387 } 388 389 /* 390 * i_ddi_irm_modify() 391 * 392 * Modify an existing request in an interrupt pool, and balance the pool. 393 */ 394 int 395 i_ddi_irm_modify(dev_info_t *dip, int nreq) 396 { 397 devinfo_intr_t *intr_p; 398 ddi_irm_req_t *req_p; 399 ddi_irm_pool_t *pool_p; 400 401 ASSERT(dip != NULL); 402 403 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n", 404 (void *)dip, nreq)); 405 406 /* Validate parameters */ 407 if ((dip == NULL) || (nreq < 1)) { 408 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n")); 409 return (DDI_EINVAL); 410 } 411 412 /* Check that the operation is supported */ 413 if (!(intr_p = DEVI(dip)->devi_intr_p) || 414 !(req_p = intr_p->devi_irm_req_p) || 415 !DDI_IRM_IS_REDUCIBLE(req_p)) { 416 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not supported\n")); 417 return (DDI_ENOTSUP); 418 } 419 420 /* Validate request size is not too large */ 421 if (nreq > intr_p->devi_intr_sup_nintrs) { 422 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n")); 423 return (DDI_EINVAL); 424 } 425 426 /* 427 * Modify request, but only if new size is different. 428 */ 429 if (nreq != req_p->ireq_nreq) { 430 431 /* Lock the pool */ 432 pool_p = req_p->ireq_pool_p; 433 mutex_enter(&pool_p->ipool_lock); 434 435 /* Update pool and request */ 436 pool_p->ipool_reqno -= req_p->ireq_nreq; 437 pool_p->ipool_reqno += nreq; 438 req_p->ireq_nreq = nreq; 439 440 /* Re-sort request in the pool */ 441 list_remove(&pool_p->ipool_req_list, req_p); 442 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p); 443 444 /* Queue pool to be rebalanced */ 445 i_ddi_irm_enqueue(pool_p, B_FALSE); 446 447 /* Unlock the pool */ 448 mutex_exit(&pool_p->ipool_lock); 449 } 450 451 return (DDI_SUCCESS); 452 } 453 454 /* 455 * i_ddi_irm_remove() 456 * 457 * Remove a request from an interrupt pool, and balance the pool. 458 */ 459 int 460 i_ddi_irm_remove(dev_info_t *dip) 461 { 462 devinfo_intr_t *intr_p; 463 ddi_irm_pool_t *pool_p; 464 ddi_irm_req_t *req_p; 465 uint_t nmin; 466 467 ASSERT(dip != NULL); 468 469 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip)); 470 471 /* Validate parameters */ 472 if (dip == NULL) { 473 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n")); 474 return (DDI_EINVAL); 475 } 476 477 /* Check if the device has a request */ 478 if (!(intr_p = DEVI(dip)->devi_intr_p) || 479 !(req_p = intr_p->devi_irm_req_p)) { 480 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n")); 481 return (DDI_EINVAL); 482 } 483 484 /* Lock the pool */ 485 pool_p = req_p->ireq_pool_p; 486 mutex_enter(&pool_p->ipool_lock); 487 488 /* Remove request */ 489 nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq; 490 pool_p->ipool_minno -= nmin; 491 pool_p->ipool_reqno -= req_p->ireq_nreq; 492 pool_p->ipool_resno -= req_p->ireq_navail; 493 list_remove(&pool_p->ipool_req_list, req_p); 494 495 /* Queue pool to be rebalanced */ 496 i_ddi_irm_enqueue(pool_p, B_FALSE); 497 498 /* Unlock the pool */ 499 mutex_exit(&pool_p->ipool_lock); 500 501 /* Destroy the request */ 502 intr_p->devi_irm_req_p = NULL; 503 kmem_free(req_p, sizeof (ddi_irm_req_t)); 504 505 return (DDI_SUCCESS); 506 } 507 508 /* 509 * i_ddi_irm_set_cb() 510 * 511 * Change the callback flag for a request, in response to 512 * a change in its callback registration. Then rebalance 513 * the interrupt pool. 514 * 515 * NOTE: the request is not locked because the navail value 516 * is not directly affected. The balancing thread may 517 * modify the navail value in the background after it 518 * locks the request itself. 519 */ 520 void 521 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag) 522 { 523 devinfo_intr_t *intr_p; 524 ddi_irm_pool_t *pool_p; 525 ddi_irm_req_t *req_p; 526 uint_t nreq; 527 528 ASSERT(dip != NULL); 529 530 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n", 531 (void *)dip, (int)has_cb_flag)); 532 533 /* Validate parameters */ 534 if (dip == NULL) 535 return; 536 537 /* Check for association with interrupt pool */ 538 if (!(intr_p = DEVI(dip)->devi_intr_p) || 539 !(req_p = intr_p->devi_irm_req_p)) { 540 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n")); 541 return; 542 } 543 544 /* Lock the pool */ 545 pool_p = req_p->ireq_pool_p; 546 mutex_enter(&pool_p->ipool_lock); 547 548 /* 549 * Update the request and the pool 550 */ 551 if (has_cb_flag) { 552 553 /* Update pool statistics */ 554 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) 555 pool_p->ipool_minno -= (req_p->ireq_nreq - 1); 556 557 /* Update request */ 558 req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK; 559 560 /* Rebalance in background */ 561 i_ddi_irm_enqueue(pool_p, B_FALSE); 562 563 } else { 564 565 /* Determine new request size */ 566 nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz); 567 568 /* Update pool statistics */ 569 pool_p->ipool_reqno -= req_p->ireq_nreq; 570 pool_p->ipool_reqno += nreq; 571 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) { 572 pool_p->ipool_minno -= 1; 573 pool_p->ipool_minno += nreq; 574 } else { 575 pool_p->ipool_minno -= req_p->ireq_nreq; 576 pool_p->ipool_minno += nreq; 577 } 578 579 /* Update request size, and re-sort in pool */ 580 req_p->ireq_nreq = nreq; 581 list_remove(&pool_p->ipool_req_list, req_p); 582 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p); 583 584 /* Rebalance synchronously, before losing callback */ 585 i_ddi_irm_enqueue(pool_p, B_TRUE); 586 587 /* Remove callback flag */ 588 req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK); 589 } 590 591 /* Unlock the pool */ 592 mutex_exit(&pool_p->ipool_lock); 593 } 594 595 /* 596 * Interrupt Pool Balancing 597 */ 598 599 /* 600 * irm_balance_thread() 601 * 602 * One instance of this thread operates per each defined IRM pool. 603 * It does the initial activation of the pool, as well as balancing 604 * any requests that were queued up before the pool was active. 605 * Once active, it waits forever to service balance operations. 606 */ 607 static void 608 irm_balance_thread(ddi_irm_pool_t *pool_p) 609 { 610 clock_t interval; 611 612 DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n", 613 (void *)pool_p)); 614 615 /* Lock the pool */ 616 mutex_enter(&pool_p->ipool_lock); 617 618 /* Perform initial balance if required */ 619 if (pool_p->ipool_reqno > pool_p->ipool_resno) 620 i_ddi_irm_balance(pool_p); 621 622 /* Activate the pool */ 623 pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE; 624 625 /* Main loop */ 626 for (;;) { 627 628 /* Compute the delay interval */ 629 interval = drv_usectohz(irm_balance_delay * 1000000); 630 631 /* Sleep until queued */ 632 cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock); 633 634 DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n")); 635 636 /* Wait one interval, or until there are waiters */ 637 if ((interval > 0) && 638 !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) && 639 !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) { 640 (void) cv_reltimedwait(&pool_p->ipool_cv, 641 &pool_p->ipool_lock, interval, TR_CLOCK_TICK); 642 } 643 644 /* Check if awakened to exit */ 645 if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) { 646 DDI_INTR_IRMDBG((CE_CONT, 647 "irm_balance_thread: exiting...\n")); 648 mutex_exit(&pool_p->ipool_lock); 649 thread_exit(); 650 } 651 652 /* Balance the pool */ 653 i_ddi_irm_balance(pool_p); 654 655 /* Notify waiters */ 656 if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) { 657 cv_broadcast(&pool_p->ipool_cv); 658 pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS); 659 } 660 661 /* Clear QUEUED condition */ 662 pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED); 663 } 664 } 665 666 /* 667 * i_ddi_irm_balance() 668 * 669 * Balance a pool. The general algorithm is to first reset all 670 * requests to their maximum size, use reduction algorithms to 671 * solve any imbalance, and then notify affected drivers. 672 */ 673 static void 674 i_ddi_irm_balance(ddi_irm_pool_t *pool_p) 675 { 676 ddi_irm_req_t *req_p; 677 678 #ifdef DEBUG 679 uint_t debug_totsz = 0; 680 int debug_policy = 0; 681 #endif /* DEBUG */ 682 683 ASSERT(pool_p != NULL); 684 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 685 686 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n", 687 (void *)pool_p)); 688 689 #ifdef DEBUG /* Adjust size and policy settings */ 690 if (irm_debug_size > pool_p->ipool_minno) { 691 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n", 692 irm_debug_size)); 693 debug_totsz = pool_p->ipool_totsz; 694 pool_p->ipool_totsz = irm_debug_size; 695 } 696 if (DDI_IRM_POLICY_VALID(irm_debug_policy)) { 697 DDI_INTR_IRMDBG((CE_CONT, 698 "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy)); 699 debug_policy = pool_p->ipool_policy; 700 pool_p->ipool_policy = irm_debug_policy; 701 } 702 #endif /* DEBUG */ 703 704 /* Lock the availability lock */ 705 mutex_enter(&pool_p->ipool_navail_lock); 706 707 /* 708 * Put all of the reducible requests into a scratch list. 709 * Reset each one of them to their maximum availability. 710 */ 711 for (req_p = list_head(&pool_p->ipool_req_list); req_p; 712 req_p = list_next(&pool_p->ipool_req_list, req_p)) { 713 if (DDI_IRM_IS_REDUCIBLE(req_p)) { 714 pool_p->ipool_resno -= req_p->ireq_navail; 715 req_p->ireq_scratch = req_p->ireq_navail; 716 req_p->ireq_navail = req_p->ireq_nreq; 717 pool_p->ipool_resno += req_p->ireq_navail; 718 list_insert_tail(&pool_p->ipool_scratch_list, req_p); 719 } 720 } 721 722 /* Balance the requests */ 723 i_ddi_irm_reduce(pool_p); 724 725 /* Unlock the availability lock */ 726 mutex_exit(&pool_p->ipool_navail_lock); 727 728 /* 729 * Process REMOVE notifications. 730 * 731 * If a driver fails to release interrupts: exclude it from 732 * further processing, correct the resulting imbalance, and 733 * start over again at the head of the scratch list. 734 */ 735 req_p = list_head(&pool_p->ipool_scratch_list); 736 while (req_p) { 737 if ((req_p->ireq_navail < req_p->ireq_scratch) && 738 (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) { 739 list_remove(&pool_p->ipool_scratch_list, req_p); 740 mutex_enter(&pool_p->ipool_navail_lock); 741 i_ddi_irm_reduce(pool_p); 742 mutex_exit(&pool_p->ipool_navail_lock); 743 req_p = list_head(&pool_p->ipool_scratch_list); 744 } else { 745 req_p = list_next(&pool_p->ipool_scratch_list, req_p); 746 } 747 } 748 749 /* 750 * Process ADD notifications. 751 * 752 * This is the last use of the scratch list, so empty it. 753 */ 754 while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) { 755 if (req_p->ireq_navail > req_p->ireq_scratch) { 756 (void) i_ddi_irm_notify(pool_p, req_p); 757 } 758 } 759 760 #ifdef DEBUG /* Restore size and policy settings */ 761 if (debug_totsz != 0) 762 pool_p->ipool_totsz = debug_totsz; 763 if (debug_policy != 0) 764 pool_p->ipool_policy = debug_policy; 765 #endif /* DEBUG */ 766 } 767 768 /* 769 * i_ddi_irm_reduce() 770 * 771 * Use reduction algorithms to correct an imbalance in a pool. 772 */ 773 static void 774 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p) 775 { 776 int imbalance; 777 778 ASSERT(pool_p != NULL); 779 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 780 ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy)); 781 782 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n", 783 (void *)pool_p)); 784 785 /* Compute the imbalance. Do nothing if already balanced. */ 786 if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0) 787 return; 788 789 /* 790 * Try policy based reduction first. If it failed, then 791 * possibly reduce new requests as a last resort. 792 */ 793 if (i_ddi_irm_reduce_by_policy(pool_p, imbalance, pool_p->ipool_policy) 794 != DDI_SUCCESS) { 795 796 DDI_INTR_IRMDBG((CE_CONT, 797 "i_ddi_irm_reduce: policy reductions failed.\n")); 798 799 /* Compute remaining imbalance */ 800 imbalance = pool_p->ipool_resno - pool_p->ipool_totsz; 801 802 ASSERT(imbalance > 0); 803 804 i_ddi_irm_reduce_new(pool_p, imbalance); 805 } 806 } 807 808 /* 809 * i_ddi_irm_enqueue() 810 * 811 * Queue a pool to be balanced. Signals the balancing thread to wake 812 * up and process the pool. If 'wait_flag' is true, then the current 813 * thread becomes a waiter and blocks until the balance is completed. 814 */ 815 static void 816 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag) 817 { 818 ASSERT(pool_p != NULL); 819 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 820 821 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n", 822 (void *)pool_p, (int)wait_flag)); 823 824 /* Do nothing if pool is already balanced */ 825 #ifndef DEBUG 826 if ((pool_p->ipool_reqno == pool_p->ipool_resno)) { 827 #else 828 if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) { 829 #endif /* DEBUG */ 830 DDI_INTR_IRMDBG((CE_CONT, 831 "i_ddi_irm_enqueue: pool already balanced\n")); 832 return; 833 } 834 835 /* Avoid deadlocks when IRM is not active */ 836 if (!irm_active && wait_flag) { 837 DDI_INTR_IRMDBG((CE_CONT, 838 "i_ddi_irm_enqueue: pool not active.\n")); 839 return; 840 } 841 842 if (wait_flag) 843 pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS; 844 845 if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) { 846 pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED; 847 cv_signal(&pool_p->ipool_cv); 848 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n")); 849 } 850 851 if (wait_flag) { 852 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n")); 853 cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock); 854 } 855 } 856 857 /* 858 * i_ddi_irm_reduce_by_policy() 859 * 860 * Reduces requests based on reduction policies. 861 * 862 * For the DDI_IRM_POLICY_LARGE reduction policy, the algorithm 863 * generally reduces larger requests first, before advancing 864 * to smaller requests. 865 * For the DDI_IRM_POLICY_EVEN reduction policy, the algorithm 866 * reduces requests evenly, without giving a specific preference 867 * to smaller or larger requests. Each iteration reduces all 868 * reducible requests by the same amount until the imbalance is 869 * corrected. 870 * 871 * The scratch list is initially sorted in descending order by current 872 * navail values, which are maximized prior to reduction. This sorted 873 * order is preserved. It avoids reducing requests below the threshold 874 * of the interrupt pool's default allocation size. 875 * 876 * Optimizations in this algorithm include trying to reduce multiple 877 * requests together. And the algorithm attempts to reduce in larger 878 * increments when possible to minimize the total number of iterations. 879 */ 880 static int 881 i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *pool_p, int imbalance, int policy) 882 { 883 ASSERT(pool_p != NULL); 884 ASSERT(imbalance > 0); 885 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 886 887 while (imbalance > 0) { 888 list_t *slist_p = &pool_p->ipool_scratch_list; 889 ddi_irm_req_t *req_p = list_head(slist_p), *last_p; 890 uint_t nreduce = 0, nremain = 0, stop_navail; 891 uint_t pool_defsz = pool_p->ipool_defsz; 892 uint_t reduction, max_redu; 893 894 /* Fail if none are reducible */ 895 if (!req_p || req_p->ireq_navail <= pool_defsz) { 896 DDI_INTR_IRMDBG((CE_CONT, 897 "i_ddi_irm_reduce_by_policy: Failure. " 898 "All requests have downsized to low limit.\n")); 899 return (DDI_FAILURE); 900 } 901 902 /* Count reducible requests */ 903 stop_navail = (policy == DDI_IRM_POLICY_LARGE) ? 904 req_p->ireq_navail - 1 : pool_defsz; 905 for (; req_p; req_p = list_next(slist_p, req_p)) { 906 if (req_p->ireq_navail <= stop_navail) 907 break; 908 nreduce++; 909 } 910 911 /* Compute reduction */ 912 last_p = req_p ? list_prev(slist_p, req_p) : list_tail(slist_p); 913 if ((policy == DDI_IRM_POLICY_LARGE) && req_p && 914 req_p->ireq_navail > pool_defsz) 915 reduction = last_p->ireq_navail - req_p->ireq_navail; 916 else 917 reduction = last_p->ireq_navail - pool_defsz; 918 919 if ((max_redu = reduction * nreduce) > imbalance) { 920 reduction = imbalance / nreduce; 921 nremain = imbalance % nreduce; 922 pool_p->ipool_resno -= imbalance; 923 imbalance = 0; 924 } else { 925 pool_p->ipool_resno -= max_redu; 926 imbalance -= max_redu; 927 } 928 929 /* Reduce */ 930 for (req_p = list_head(slist_p); (reduction != 0) && nreduce--; 931 req_p = list_next(slist_p, req_p)) { 932 req_p->ireq_navail -= reduction; 933 } 934 935 for (req_p = last_p; nremain--; 936 req_p = list_prev(slist_p, req_p)) { 937 req_p->ireq_navail--; 938 } 939 } 940 941 return (DDI_SUCCESS); 942 } 943 944 /* 945 * i_ddi_irm_reduce_new() 946 * 947 * Reduces new requests. This is only used as a last resort 948 * after another reduction algorithm failed. 949 * 950 * NOTE: The pool locking in i_ddi_irm_insert() ensures 951 * there can be only one new request at a time in a pool. 952 */ 953 static void 954 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance) 955 { 956 ddi_irm_req_t *req_p; 957 958 ASSERT(pool_p != NULL); 959 ASSERT(imbalance > 0); 960 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 961 962 DDI_INTR_IRMDBG((CE_CONT, 963 "i_ddi_irm_reduce_new: pool_p %p imbalance %d\n", 964 (void *)pool_p, imbalance)); 965 966 for (req_p = list_head(&pool_p->ipool_scratch_list); req_p; 967 req_p = list_next(&pool_p->ipool_scratch_list, req_p)) { 968 if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) { 969 ASSERT(req_p->ireq_navail >= imbalance); 970 req_p->ireq_navail -= imbalance; 971 pool_p->ipool_resno -= imbalance; 972 return; 973 } 974 } 975 976 /* should never go here */ 977 ASSERT(B_FALSE); 978 } 979 980 /* 981 * Miscellaneous Helper Functions 982 */ 983 984 /* 985 * i_ddi_intr_get_pool() 986 * 987 * Get an IRM pool that supplies interrupts of a specified type. 988 * Invokes a DDI_INTROP_GETPOOL to the bus nexus driver. Fails 989 * if no pool exists. 990 */ 991 ddi_irm_pool_t * 992 i_ddi_intr_get_pool(dev_info_t *dip, int type) 993 { 994 devinfo_intr_t *intr_p; 995 ddi_irm_pool_t *pool_p; 996 ddi_irm_req_t *req_p; 997 ddi_intr_handle_impl_t hdl; 998 999 ASSERT(dip != NULL); 1000 ASSERT(DDI_INTR_TYPE_FLAG_VALID(type)); 1001 1002 if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) && 1003 ((req_p = intr_p->devi_irm_req_p) != NULL) && 1004 ((pool_p = req_p->ireq_pool_p) != NULL) && 1005 (pool_p->ipool_types & type)) { 1006 return (pool_p); 1007 } 1008 1009 bzero(&hdl, sizeof (ddi_intr_handle_impl_t)); 1010 hdl.ih_dip = dip; 1011 hdl.ih_type = type; 1012 1013 if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL, 1014 &hdl, (void *)&pool_p) == DDI_SUCCESS) 1015 return (pool_p); 1016 1017 return (NULL); 1018 } 1019 1020 /* 1021 * i_ddi_irm_insertion_sort() 1022 * 1023 * Use the insertion sort method to insert a request into a list. 1024 * The list is sorted in descending order by request size. 1025 */ 1026 static void 1027 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p) 1028 { 1029 ddi_irm_req_t *next_p; 1030 1031 next_p = list_head(req_list); 1032 1033 while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq)) 1034 next_p = list_next(req_list, next_p); 1035 1036 list_insert_before(req_list, next_p, req_p); 1037 } 1038 1039 /* 1040 * i_ddi_irm_notify() 1041 * 1042 * Notify a driver of changes to its interrupt request using the 1043 * generic callback mechanism. Checks for errors in processing. 1044 */ 1045 static int 1046 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p) 1047 { 1048 ddi_cb_action_t action; 1049 ddi_cb_t *cb_p; 1050 uint_t nintrs; 1051 int ret, count; 1052 1053 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n", 1054 (void *)pool_p, (void *)req_p)); 1055 1056 /* Do not notify new or unchanged requests */ 1057 if ((req_p->ireq_navail == req_p->ireq_scratch) || 1058 (req_p->ireq_flags & DDI_IRM_FLAG_NEW)) 1059 return (DDI_SUCCESS); 1060 1061 /* Determine action and count */ 1062 if (req_p->ireq_navail > req_p->ireq_scratch) { 1063 action = DDI_CB_INTR_ADD; 1064 count = req_p->ireq_navail - req_p->ireq_scratch; 1065 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n", 1066 count)); 1067 } else { 1068 action = DDI_CB_INTR_REMOVE; 1069 count = req_p->ireq_scratch - req_p->ireq_navail; 1070 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n", 1071 count)); 1072 } 1073 1074 /* Lookup driver callback */ 1075 if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) { 1076 DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n")); 1077 return (DDI_FAILURE); 1078 } 1079 1080 /* Do callback */ 1081 ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count, 1082 cb_p->cb_arg1, cb_p->cb_arg2); 1083 1084 /* Log callback errors */ 1085 if (ret != DDI_SUCCESS) { 1086 cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n", 1087 ddi_driver_name(req_p->ireq_dip), 1088 ddi_get_instance(req_p->ireq_dip), (int)action, ret); 1089 } 1090 1091 /* Check if the driver exceeds its availability */ 1092 nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip); 1093 if (nintrs > req_p->ireq_navail) { 1094 cmn_err(CE_WARN, "%s%d: failed to release interrupts " 1095 "(nintrs=%d, navail=%d).\n", 1096 ddi_driver_name(req_p->ireq_dip), 1097 ddi_get_instance(req_p->ireq_dip), nintrs, 1098 req_p->ireq_navail); 1099 pool_p->ipool_resno += (nintrs - req_p->ireq_navail); 1100 req_p->ireq_navail = nintrs; 1101 return (DDI_FAILURE); 1102 } 1103 1104 /* Update request */ 1105 req_p->ireq_scratch = req_p->ireq_navail; 1106 1107 return (DDI_SUCCESS); 1108 } 1109 1110 /* 1111 * i_ddi_irm_debug_balance() 1112 * 1113 * A debug/test only routine to force the immediate, 1114 * synchronous rebalancing of an interrupt pool. 1115 */ 1116 #ifdef DEBUG 1117 void 1118 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag) 1119 { 1120 ddi_irm_pool_t *pool_p; 1121 int type; 1122 1123 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n", 1124 (void *)dip, (int)wait_flag)); 1125 1126 if (((type = i_ddi_intr_get_current_type(dip)) != 0) && 1127 ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) { 1128 mutex_enter(&pool_p->ipool_lock); 1129 i_ddi_irm_enqueue(pool_p, wait_flag); 1130 mutex_exit(&pool_p->ipool_lock); 1131 } 1132 } 1133 #endif 1134