1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/note.h> 27 #include <sys/sysmacros.h> 28 #include <sys/types.h> 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kmem.h> 32 #include <sys/cmn_err.h> 33 #include <sys/debug.h> 34 #include <sys/ddi.h> 35 #include <sys/sunndi.h> 36 #include <sys/ndi_impldefs.h> /* include prototypes */ 37 38 /* 39 * Interrupt Resource Management (IRM). 40 */ 41 42 #define DDI_IRM_BALANCE_DELAY (60) /* In seconds */ 43 44 #define DDI_IRM_HAS_CB(c) ((c) && (c->cb_flags & DDI_CB_FLAG_INTR)) 45 46 #define DDI_IRM_IS_REDUCIBLE(r) (((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \ 47 (r->ireq_type == DDI_INTR_TYPE_MSIX)) || \ 48 (r->ireq_flags & DDI_IRM_FLAG_NEW)) 49 50 extern pri_t minclsyspri; 51 52 /* Global policies */ 53 int irm_enable = 1; 54 boolean_t irm_active = B_FALSE; 55 int irm_default_policy = DDI_IRM_POLICY_LARGE; 56 uint_t irm_balance_delay = DDI_IRM_BALANCE_DELAY; 57 58 /* Global list of interrupt pools */ 59 kmutex_t irm_pools_lock; 60 list_t irm_pools_list; 61 62 /* Global debug tunables */ 63 #ifdef DEBUG 64 int irm_debug_policy = 0; 65 uint_t irm_debug_size = 0; 66 #endif /* DEBUG */ 67 68 static void irm_balance_thread(ddi_irm_pool_t *); 69 static void i_ddi_irm_balance(ddi_irm_pool_t *); 70 static void i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t); 71 static void i_ddi_irm_reduce(ddi_irm_pool_t *pool); 72 static int i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *, int, int); 73 static void i_ddi_irm_reduce_new(ddi_irm_pool_t *, int); 74 static void i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *); 75 static int i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *); 76 77 /* 78 * OS Initialization Routines 79 */ 80 81 /* 82 * irm_init() 83 * 84 * Initialize IRM subsystem before any drivers are attached. 85 */ 86 void 87 irm_init(void) 88 { 89 /* Do nothing if IRM is disabled */ 90 if (!irm_enable) 91 return; 92 93 /* Verify that the default balancing policy is valid */ 94 if (!DDI_IRM_POLICY_VALID(irm_default_policy)) 95 irm_default_policy = DDI_IRM_POLICY_LARGE; 96 97 /* Initialize the global list of interrupt pools */ 98 mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL); 99 list_create(&irm_pools_list, sizeof (ddi_irm_pool_t), 100 offsetof(ddi_irm_pool_t, ipool_link)); 101 } 102 103 /* 104 * i_ddi_irm_poststartup() 105 * 106 * IRM is not activated until after the IO subsystem is initialized. 107 * When activated, per-pool balancing threads are spawned and a flag 108 * is set so that all future pools will be activated when created. 109 * 110 * NOTE: the global variable 'irm_enable' disables IRM if zero. 111 */ 112 void 113 i_ddi_irm_poststartup(void) 114 { 115 ddi_irm_pool_t *pool_p; 116 117 /* Do nothing if IRM is disabled */ 118 if (!irm_enable) 119 return; 120 121 /* Lock the global list */ 122 mutex_enter(&irm_pools_lock); 123 124 /* Activate all defined pools */ 125 for (pool_p = list_head(&irm_pools_list); pool_p; 126 pool_p = list_next(&irm_pools_list, pool_p)) 127 pool_p->ipool_thread = thread_create(NULL, 0, 128 irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri); 129 130 /* Set future pools to be active */ 131 irm_active = B_TRUE; 132 133 /* Unlock the global list */ 134 mutex_exit(&irm_pools_lock); 135 } 136 137 /* 138 * NDI interfaces for creating/destroying IRM pools. 139 */ 140 141 /* 142 * ndi_irm_create() 143 * 144 * Nexus interface to create an IRM pool. Create the new 145 * pool and add it to the global list of interrupt pools. 146 */ 147 int 148 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp, 149 ddi_irm_pool_t **pool_retp) 150 { 151 ddi_irm_pool_t *pool_p; 152 153 ASSERT(dip != NULL); 154 ASSERT(paramsp != NULL); 155 ASSERT(pool_retp != NULL); 156 ASSERT(paramsp->iparams_total >= 1); 157 ASSERT(paramsp->iparams_types != 0); 158 159 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip)); 160 161 /* Check if IRM is enabled */ 162 if (!irm_enable) 163 return (NDI_FAILURE); 164 165 /* Validate parameters */ 166 if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) || 167 (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0)) 168 return (NDI_FAILURE); 169 170 /* Allocate and initialize the pool */ 171 pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP); 172 pool_p->ipool_owner = dip; 173 pool_p->ipool_policy = irm_default_policy; 174 pool_p->ipool_types = paramsp->iparams_types; 175 pool_p->ipool_totsz = paramsp->iparams_total; 176 pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC, 177 paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER)); 178 list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t), 179 offsetof(ddi_irm_req_t, ireq_link)); 180 list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t), 181 offsetof(ddi_irm_req_t, ireq_scratch_link)); 182 cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL); 183 mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL); 184 mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL); 185 186 /* Add to global list of pools */ 187 mutex_enter(&irm_pools_lock); 188 list_insert_tail(&irm_pools_list, pool_p); 189 mutex_exit(&irm_pools_lock); 190 191 /* If IRM is active, then activate the pool */ 192 if (irm_active) 193 pool_p->ipool_thread = thread_create(NULL, 0, 194 irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri); 195 196 *pool_retp = pool_p; 197 return (NDI_SUCCESS); 198 } 199 200 /* 201 * ndi_irm_destroy() 202 * 203 * Nexus interface to destroy an IRM pool. Destroy the pool 204 * and remove it from the global list of interrupt pools. 205 */ 206 int 207 ndi_irm_destroy(ddi_irm_pool_t *pool_p) 208 { 209 ASSERT(pool_p != NULL); 210 ASSERT(pool_p->ipool_resno == 0); 211 212 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n", 213 (void *)pool_p)); 214 215 /* Validate parameters */ 216 if (pool_p == NULL) 217 return (NDI_FAILURE); 218 219 /* Validate that pool is empty */ 220 if (pool_p->ipool_resno != 0) 221 return (NDI_BUSY); 222 223 /* Remove the pool from the global list */ 224 mutex_enter(&irm_pools_lock); 225 list_remove(&irm_pools_list, pool_p); 226 mutex_exit(&irm_pools_lock); 227 228 /* Terminate the balancing thread */ 229 mutex_enter(&pool_p->ipool_lock); 230 if (pool_p->ipool_thread && 231 (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) { 232 pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT; 233 cv_signal(&pool_p->ipool_cv); 234 mutex_exit(&pool_p->ipool_lock); 235 thread_join(pool_p->ipool_thread->t_did); 236 } else 237 mutex_exit(&pool_p->ipool_lock); 238 239 /* Destroy the pool */ 240 cv_destroy(&pool_p->ipool_cv); 241 mutex_destroy(&pool_p->ipool_lock); 242 mutex_destroy(&pool_p->ipool_navail_lock); 243 list_destroy(&pool_p->ipool_req_list); 244 list_destroy(&pool_p->ipool_scratch_list); 245 kmem_free(pool_p, sizeof (ddi_irm_pool_t)); 246 247 return (NDI_SUCCESS); 248 } 249 250 /* 251 * Insert/Modify/Remove Interrupt Requests 252 */ 253 254 /* 255 * i_ddi_irm_insert() 256 * 257 * Insert a new request into an interrupt pool, and balance the pool. 258 */ 259 int 260 i_ddi_irm_insert(dev_info_t *dip, int type, int count) 261 { 262 ddi_cb_t *cb_p; 263 ddi_irm_req_t *req_p; 264 devinfo_intr_t *intr_p; 265 ddi_irm_pool_t *pool_p; 266 uint_t nreq, nmin, npartial; 267 boolean_t irm_flag = B_FALSE; 268 269 ASSERT(dip != NULL); 270 ASSERT(DDI_INTR_TYPE_FLAG_VALID(type)); 271 ASSERT(count > 0); 272 273 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n", 274 (void *)dip, type, count)); 275 276 /* Validate parameters */ 277 if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) { 278 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n")); 279 return (DDI_EINVAL); 280 } 281 282 /* Check for an existing request */ 283 if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) && 284 (intr_p->devi_irm_req_p != NULL)) 285 return (DDI_SUCCESS); 286 287 /* Check for IRM support from the system */ 288 if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) { 289 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n")); 290 return (DDI_ENOTSUP); 291 } 292 293 /* Check for IRM support from the driver */ 294 if (((cb_p = DEVI(dip)->devi_cb_p) != NULL) && DDI_IRM_HAS_CB(cb_p) && 295 (type == DDI_INTR_TYPE_MSIX)) 296 irm_flag = B_TRUE; 297 298 /* Determine request size */ 299 nreq = (irm_flag) ? count : 300 MIN(count, i_ddi_intr_get_current_navail(dip, type)); 301 nmin = (irm_flag) ? 1 : nreq; 302 npartial = MIN(nreq, pool_p->ipool_defsz); 303 304 /* Allocate and initialize the request */ 305 req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP); 306 req_p->ireq_type = type; 307 req_p->ireq_dip = dip; 308 req_p->ireq_pool_p = pool_p; 309 req_p->ireq_nreq = nreq; 310 req_p->ireq_flags = DDI_IRM_FLAG_NEW; 311 if (DDI_IRM_HAS_CB(cb_p)) 312 req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK; 313 314 /* Lock the pool */ 315 mutex_enter(&pool_p->ipool_lock); 316 317 /* Check for minimal fit before inserting */ 318 if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) { 319 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n", 320 ddi_driver_name(dip), ddi_get_instance(dip)); 321 mutex_exit(&pool_p->ipool_lock); 322 kmem_free(req_p, sizeof (ddi_irm_req_t)); 323 return (DDI_EAGAIN); 324 } 325 326 /* Insert the request into the pool */ 327 pool_p->ipool_reqno += nreq; 328 pool_p->ipool_minno += nmin; 329 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p); 330 331 /* 332 * Try to fulfill the request. 333 * 334 * If all the interrupts are available, and either the request 335 * is static or the pool is active, then just take them directly. 336 * 337 * If only some of the interrupts are available, and the request 338 * can receive future callbacks, then take some now but queue the 339 * pool to be rebalanced later. 340 * 341 * Otherwise, immediately rebalance the pool and wait. 342 */ 343 if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) && 344 ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) { 345 346 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: " 347 "request completely fulfilled.\n")); 348 pool_p->ipool_resno += nreq; 349 req_p->ireq_navail = nreq; 350 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW); 351 352 } else if (irm_flag && 353 ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) { 354 355 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: " 356 "request partially fulfilled.\n")); 357 pool_p->ipool_resno += npartial; 358 req_p->ireq_navail = npartial; 359 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW); 360 i_ddi_irm_enqueue(pool_p, B_FALSE); 361 362 } else { 363 364 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: " 365 "request needs immediate rebalance.\n")); 366 i_ddi_irm_enqueue(pool_p, B_TRUE); 367 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW); 368 } 369 370 /* Fail if the request cannot be fulfilled at all */ 371 if (req_p->ireq_navail == 0) { 372 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n", 373 ddi_driver_name(dip), ddi_get_instance(dip)); 374 pool_p->ipool_reqno -= nreq; 375 pool_p->ipool_minno -= nmin; 376 list_remove(&pool_p->ipool_req_list, req_p); 377 mutex_exit(&pool_p->ipool_lock); 378 kmem_free(req_p, sizeof (ddi_irm_req_t)); 379 return (DDI_EAGAIN); 380 } 381 382 /* Unlock the pool */ 383 mutex_exit(&pool_p->ipool_lock); 384 385 intr_p->devi_irm_req_p = req_p; 386 return (DDI_SUCCESS); 387 } 388 389 /* 390 * i_ddi_irm_modify() 391 * 392 * Modify an existing request in an interrupt pool, and balance the pool. 393 */ 394 int 395 i_ddi_irm_modify(dev_info_t *dip, int nreq) 396 { 397 devinfo_intr_t *intr_p; 398 ddi_irm_req_t *req_p; 399 ddi_irm_pool_t *pool_p; 400 401 ASSERT(dip != NULL); 402 403 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n", 404 (void *)dip, nreq)); 405 406 /* Validate parameters */ 407 if ((dip == NULL) || (nreq < 1)) { 408 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n")); 409 return (DDI_EINVAL); 410 } 411 412 /* Check that the operation is supported */ 413 if (!(intr_p = DEVI(dip)->devi_intr_p) || 414 !(req_p = intr_p->devi_irm_req_p) || 415 !DDI_IRM_IS_REDUCIBLE(req_p)) { 416 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not supported\n")); 417 return (DDI_ENOTSUP); 418 } 419 420 /* Validate request size is not too large */ 421 if (nreq > intr_p->devi_intr_sup_nintrs) { 422 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n")); 423 return (DDI_EINVAL); 424 } 425 426 /* 427 * Modify request, but only if new size is different. 428 */ 429 if (nreq != req_p->ireq_nreq) { 430 431 /* Lock the pool */ 432 pool_p = req_p->ireq_pool_p; 433 mutex_enter(&pool_p->ipool_lock); 434 435 /* Update pool and request */ 436 pool_p->ipool_reqno -= req_p->ireq_nreq; 437 pool_p->ipool_reqno += nreq; 438 req_p->ireq_nreq = nreq; 439 440 /* Re-sort request in the pool */ 441 list_remove(&pool_p->ipool_req_list, req_p); 442 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p); 443 444 /* Queue pool to be rebalanced */ 445 i_ddi_irm_enqueue(pool_p, B_FALSE); 446 447 /* Unlock the pool */ 448 mutex_exit(&pool_p->ipool_lock); 449 } 450 451 return (DDI_SUCCESS); 452 } 453 454 /* 455 * i_ddi_irm_remove() 456 * 457 * Remove a request from an interrupt pool, and balance the pool. 458 */ 459 int 460 i_ddi_irm_remove(dev_info_t *dip) 461 { 462 devinfo_intr_t *intr_p; 463 ddi_irm_pool_t *pool_p; 464 ddi_irm_req_t *req_p; 465 uint_t nmin; 466 467 ASSERT(dip != NULL); 468 469 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip)); 470 471 /* Validate parameters */ 472 if (dip == NULL) { 473 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n")); 474 return (DDI_EINVAL); 475 } 476 477 /* Check if the device has a request */ 478 if (!(intr_p = DEVI(dip)->devi_intr_p) || 479 !(req_p = intr_p->devi_irm_req_p)) { 480 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n")); 481 return (DDI_EINVAL); 482 } 483 484 /* Lock the pool */ 485 pool_p = req_p->ireq_pool_p; 486 mutex_enter(&pool_p->ipool_lock); 487 488 /* Remove request */ 489 nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq; 490 pool_p->ipool_minno -= nmin; 491 pool_p->ipool_reqno -= req_p->ireq_nreq; 492 pool_p->ipool_resno -= req_p->ireq_navail; 493 list_remove(&pool_p->ipool_req_list, req_p); 494 495 /* Queue pool to be rebalanced */ 496 i_ddi_irm_enqueue(pool_p, B_FALSE); 497 498 /* Unlock the pool */ 499 mutex_exit(&pool_p->ipool_lock); 500 501 /* Destroy the request */ 502 intr_p->devi_irm_req_p = NULL; 503 kmem_free(req_p, sizeof (ddi_irm_req_t)); 504 505 return (DDI_SUCCESS); 506 } 507 508 /* 509 * i_ddi_irm_set_cb() 510 * 511 * Change the callback flag for a request, in response to 512 * a change in its callback registration. Then rebalance 513 * the interrupt pool. 514 * 515 * NOTE: the request is not locked because the navail value 516 * is not directly affected. The balancing thread may 517 * modify the navail value in the background after it 518 * locks the request itself. 519 */ 520 void 521 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag) 522 { 523 devinfo_intr_t *intr_p; 524 ddi_irm_pool_t *pool_p; 525 ddi_irm_req_t *req_p; 526 uint_t nreq; 527 528 ASSERT(dip != NULL); 529 530 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n", 531 (void *)dip, (int)has_cb_flag)); 532 533 /* Validate parameters */ 534 if (dip == NULL) 535 return; 536 537 /* Check for association with interrupt pool */ 538 if (!(intr_p = DEVI(dip)->devi_intr_p) || 539 !(req_p = intr_p->devi_irm_req_p)) { 540 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n")); 541 return; 542 } 543 544 /* Lock the pool */ 545 pool_p = req_p->ireq_pool_p; 546 mutex_enter(&pool_p->ipool_lock); 547 548 /* 549 * Update the request and the pool 550 */ 551 if (has_cb_flag) { 552 553 /* Update pool statistics */ 554 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) 555 pool_p->ipool_minno -= (req_p->ireq_nreq - 1); 556 557 /* Update request */ 558 req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK; 559 560 /* Rebalance in background */ 561 i_ddi_irm_enqueue(pool_p, B_FALSE); 562 563 } else { 564 565 /* Determine new request size */ 566 nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz); 567 568 /* Update pool statistics */ 569 pool_p->ipool_reqno -= req_p->ireq_nreq; 570 pool_p->ipool_reqno += nreq; 571 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) { 572 pool_p->ipool_minno -= 1; 573 pool_p->ipool_minno += nreq; 574 } else { 575 pool_p->ipool_minno -= req_p->ireq_nreq; 576 pool_p->ipool_minno += nreq; 577 } 578 579 /* Update request size, and re-sort in pool */ 580 req_p->ireq_nreq = nreq; 581 list_remove(&pool_p->ipool_req_list, req_p); 582 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p); 583 584 /* Rebalance synchronously, before losing callback */ 585 i_ddi_irm_enqueue(pool_p, B_TRUE); 586 587 /* Remove callback flag */ 588 req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK); 589 } 590 591 /* Unlock the pool */ 592 mutex_exit(&pool_p->ipool_lock); 593 } 594 595 /* 596 * Interrupt Pool Balancing 597 */ 598 599 /* 600 * irm_balance_thread() 601 * 602 * One instance of this thread operates per each defined IRM pool. 603 * It does the initial activation of the pool, as well as balancing 604 * any requests that were queued up before the pool was active. 605 * Once active, it waits forever to service balance operations. 606 */ 607 static void 608 irm_balance_thread(ddi_irm_pool_t *pool_p) 609 { 610 clock_t interval, wakeup; 611 612 DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n", 613 (void *)pool_p)); 614 615 /* Lock the pool */ 616 mutex_enter(&pool_p->ipool_lock); 617 618 /* Perform initial balance if required */ 619 if (pool_p->ipool_reqno > pool_p->ipool_resno) 620 i_ddi_irm_balance(pool_p); 621 622 /* Activate the pool */ 623 pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE; 624 625 /* Main loop */ 626 for (;;) { 627 628 /* Compute the delay interval */ 629 interval = drv_usectohz(irm_balance_delay * 1000000); 630 631 /* Sleep until queued */ 632 cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock); 633 634 DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n")); 635 636 /* Wait one interval, or until there are waiters */ 637 if ((interval > 0) && 638 !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) && 639 !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) { 640 wakeup = ddi_get_lbolt() + interval; 641 (void) cv_timedwait(&pool_p->ipool_cv, 642 &pool_p->ipool_lock, wakeup); 643 } 644 645 /* Check if awakened to exit */ 646 if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) { 647 DDI_INTR_IRMDBG((CE_CONT, 648 "irm_balance_thread: exiting...\n")); 649 mutex_exit(&pool_p->ipool_lock); 650 thread_exit(); 651 } 652 653 /* Balance the pool */ 654 i_ddi_irm_balance(pool_p); 655 656 /* Notify waiters */ 657 if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) { 658 cv_broadcast(&pool_p->ipool_cv); 659 pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS); 660 } 661 662 /* Clear QUEUED condition */ 663 pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED); 664 } 665 } 666 667 /* 668 * i_ddi_irm_balance() 669 * 670 * Balance a pool. The general algorithm is to first reset all 671 * requests to their maximum size, use reduction algorithms to 672 * solve any imbalance, and then notify affected drivers. 673 */ 674 static void 675 i_ddi_irm_balance(ddi_irm_pool_t *pool_p) 676 { 677 ddi_irm_req_t *req_p; 678 679 #ifdef DEBUG 680 uint_t debug_totsz = 0; 681 int debug_policy = 0; 682 #endif /* DEBUG */ 683 684 ASSERT(pool_p != NULL); 685 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 686 687 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n", 688 (void *)pool_p)); 689 690 #ifdef DEBUG /* Adjust size and policy settings */ 691 if (irm_debug_size > pool_p->ipool_minno) { 692 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n", 693 irm_debug_size)); 694 debug_totsz = pool_p->ipool_totsz; 695 pool_p->ipool_totsz = irm_debug_size; 696 } 697 if (DDI_IRM_POLICY_VALID(irm_debug_policy)) { 698 DDI_INTR_IRMDBG((CE_CONT, 699 "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy)); 700 debug_policy = pool_p->ipool_policy; 701 pool_p->ipool_policy = irm_debug_policy; 702 } 703 #endif /* DEBUG */ 704 705 /* Lock the availability lock */ 706 mutex_enter(&pool_p->ipool_navail_lock); 707 708 /* 709 * Put all of the reducible requests into a scratch list. 710 * Reset each one of them to their maximum availability. 711 */ 712 for (req_p = list_head(&pool_p->ipool_req_list); req_p; 713 req_p = list_next(&pool_p->ipool_req_list, req_p)) { 714 if (DDI_IRM_IS_REDUCIBLE(req_p)) { 715 pool_p->ipool_resno -= req_p->ireq_navail; 716 req_p->ireq_scratch = req_p->ireq_navail; 717 req_p->ireq_navail = req_p->ireq_nreq; 718 pool_p->ipool_resno += req_p->ireq_navail; 719 list_insert_tail(&pool_p->ipool_scratch_list, req_p); 720 } 721 } 722 723 /* Balance the requests */ 724 i_ddi_irm_reduce(pool_p); 725 726 /* Unlock the availability lock */ 727 mutex_exit(&pool_p->ipool_navail_lock); 728 729 /* 730 * Process REMOVE notifications. 731 * 732 * If a driver fails to release interrupts: exclude it from 733 * further processing, correct the resulting imbalance, and 734 * start over again at the head of the scratch list. 735 */ 736 req_p = list_head(&pool_p->ipool_scratch_list); 737 while (req_p) { 738 if ((req_p->ireq_navail < req_p->ireq_scratch) && 739 (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) { 740 list_remove(&pool_p->ipool_scratch_list, req_p); 741 mutex_enter(&pool_p->ipool_navail_lock); 742 i_ddi_irm_reduce(pool_p); 743 mutex_exit(&pool_p->ipool_navail_lock); 744 req_p = list_head(&pool_p->ipool_scratch_list); 745 } else { 746 req_p = list_next(&pool_p->ipool_scratch_list, req_p); 747 } 748 } 749 750 /* 751 * Process ADD notifications. 752 * 753 * This is the last use of the scratch list, so empty it. 754 */ 755 while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) { 756 if (req_p->ireq_navail > req_p->ireq_scratch) { 757 (void) i_ddi_irm_notify(pool_p, req_p); 758 } 759 } 760 761 #ifdef DEBUG /* Restore size and policy settings */ 762 if (debug_totsz != 0) 763 pool_p->ipool_totsz = debug_totsz; 764 if (debug_policy != 0) 765 pool_p->ipool_policy = debug_policy; 766 #endif /* DEBUG */ 767 } 768 769 /* 770 * i_ddi_irm_reduce() 771 * 772 * Use reduction algorithms to correct an imbalance in a pool. 773 */ 774 static void 775 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p) 776 { 777 int imbalance; 778 779 ASSERT(pool_p != NULL); 780 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 781 ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy)); 782 783 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n", 784 (void *)pool_p)); 785 786 /* Compute the imbalance. Do nothing if already balanced. */ 787 if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0) 788 return; 789 790 /* 791 * Try policy based reduction first. If it failed, then 792 * possibly reduce new requests as a last resort. 793 */ 794 if (i_ddi_irm_reduce_by_policy(pool_p, imbalance, pool_p->ipool_policy) 795 != DDI_SUCCESS) { 796 797 DDI_INTR_IRMDBG((CE_CONT, 798 "i_ddi_irm_reduce: policy reductions failed.\n")); 799 800 /* Compute remaining imbalance */ 801 imbalance = pool_p->ipool_resno - pool_p->ipool_totsz; 802 803 ASSERT(imbalance > 0); 804 805 i_ddi_irm_reduce_new(pool_p, imbalance); 806 } 807 } 808 809 /* 810 * i_ddi_irm_enqueue() 811 * 812 * Queue a pool to be balanced. Signals the balancing thread to wake 813 * up and process the pool. If 'wait_flag' is true, then the current 814 * thread becomes a waiter and blocks until the balance is completed. 815 */ 816 static void 817 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag) 818 { 819 ASSERT(pool_p != NULL); 820 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 821 822 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n", 823 (void *)pool_p, (int)wait_flag)); 824 825 /* Do nothing if pool is already balanced */ 826 #ifndef DEBUG 827 if ((pool_p->ipool_reqno == pool_p->ipool_resno)) { 828 #else 829 if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) { 830 #endif /* DEBUG */ 831 DDI_INTR_IRMDBG((CE_CONT, 832 "i_ddi_irm_enqueue: pool already balanced\n")); 833 return; 834 } 835 836 /* Avoid deadlocks when IRM is not active */ 837 if (!irm_active && wait_flag) { 838 DDI_INTR_IRMDBG((CE_CONT, 839 "i_ddi_irm_enqueue: pool not active.\n")); 840 return; 841 } 842 843 if (wait_flag) 844 pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS; 845 846 if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) { 847 pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED; 848 cv_signal(&pool_p->ipool_cv); 849 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n")); 850 } 851 852 if (wait_flag) { 853 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n")); 854 cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock); 855 } 856 } 857 858 /* 859 * i_ddi_irm_reduce_by_policy() 860 * 861 * Reduces requests based on reduction policies. 862 * 863 * For the DDI_IRM_POLICY_LARGE reduction policy, the algorithm 864 * generally reduces larger requests first, before advancing 865 * to smaller requests. 866 * For the DDI_IRM_POLICY_EVEN reduction policy, the algorithm 867 * reduces requests evenly, without giving a specific preference 868 * to smaller or larger requests. Each iteration reduces all 869 * reducible requests by the same amount until the imbalance is 870 * corrected. 871 * 872 * The scratch list is initially sorted in descending order by current 873 * navail values, which are maximized prior to reduction. This sorted 874 * order is preserved. It avoids reducing requests below the threshold 875 * of the interrupt pool's default allocation size. 876 * 877 * Optimizations in this algorithm include trying to reduce multiple 878 * requests together. And the algorithm attempts to reduce in larger 879 * increments when possible to minimize the total number of iterations. 880 */ 881 static int 882 i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *pool_p, int imbalance, int policy) 883 { 884 ASSERT(pool_p != NULL); 885 ASSERT(imbalance > 0); 886 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 887 888 while (imbalance > 0) { 889 list_t *slist_p = &pool_p->ipool_scratch_list; 890 ddi_irm_req_t *req_p = list_head(slist_p), *last_p; 891 uint_t nreduce = 0, nremain = 0, stop_navail; 892 uint_t pool_defsz = pool_p->ipool_defsz; 893 uint_t reduction, max_redu; 894 895 /* Fail if none are reducible */ 896 if (!req_p || req_p->ireq_navail <= pool_defsz) { 897 DDI_INTR_IRMDBG((CE_CONT, 898 "i_ddi_irm_reduce_by_policy: Failure. " 899 "All requests have downsized to low limit.\n")); 900 return (DDI_FAILURE); 901 } 902 903 /* Count reducible requests */ 904 stop_navail = (policy == DDI_IRM_POLICY_LARGE) ? 905 req_p->ireq_navail - 1 : pool_defsz; 906 for (; req_p; req_p = list_next(slist_p, req_p)) { 907 if (req_p->ireq_navail <= stop_navail) 908 break; 909 nreduce++; 910 } 911 912 /* Compute reduction */ 913 last_p = req_p ? list_prev(slist_p, req_p) : list_tail(slist_p); 914 if ((policy == DDI_IRM_POLICY_LARGE) && req_p && 915 req_p->ireq_navail > pool_defsz) 916 reduction = last_p->ireq_navail - req_p->ireq_navail; 917 else 918 reduction = last_p->ireq_navail - pool_defsz; 919 920 if ((max_redu = reduction * nreduce) > imbalance) { 921 reduction = imbalance / nreduce; 922 nremain = imbalance % nreduce; 923 pool_p->ipool_resno -= imbalance; 924 imbalance = 0; 925 } else { 926 pool_p->ipool_resno -= max_redu; 927 imbalance -= max_redu; 928 } 929 930 /* Reduce */ 931 for (req_p = list_head(slist_p); (reduction != 0) && nreduce--; 932 req_p = list_next(slist_p, req_p)) { 933 req_p->ireq_navail -= reduction; 934 } 935 936 for (req_p = last_p; nremain--; 937 req_p = list_prev(slist_p, req_p)) { 938 req_p->ireq_navail--; 939 } 940 } 941 942 return (DDI_SUCCESS); 943 } 944 945 /* 946 * i_ddi_irm_reduce_new() 947 * 948 * Reduces new requests. This is only used as a last resort 949 * after another reduction algorithm failed. 950 * 951 * NOTE: The pool locking in i_ddi_irm_insert() ensures 952 * there can be only one new request at a time in a pool. 953 */ 954 static void 955 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance) 956 { 957 ddi_irm_req_t *req_p; 958 959 ASSERT(pool_p != NULL); 960 ASSERT(imbalance > 0); 961 ASSERT(MUTEX_HELD(&pool_p->ipool_lock)); 962 963 DDI_INTR_IRMDBG((CE_CONT, 964 "i_ddi_irm_reduce_new: pool_p %p imbalance %d\n", 965 (void *)pool_p, imbalance)); 966 967 for (req_p = list_head(&pool_p->ipool_scratch_list); req_p; 968 req_p = list_next(&pool_p->ipool_scratch_list, req_p)) { 969 if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) { 970 ASSERT(req_p->ireq_navail >= imbalance); 971 req_p->ireq_navail -= imbalance; 972 pool_p->ipool_resno -= imbalance; 973 return; 974 } 975 } 976 977 /* should never go here */ 978 ASSERT(B_FALSE); 979 } 980 981 /* 982 * Miscellaneous Helper Functions 983 */ 984 985 /* 986 * i_ddi_intr_get_pool() 987 * 988 * Get an IRM pool that supplies interrupts of a specified type. 989 * Invokes a DDI_INTROP_GETPOOL to the bus nexus driver. Fails 990 * if no pool exists. 991 */ 992 ddi_irm_pool_t * 993 i_ddi_intr_get_pool(dev_info_t *dip, int type) 994 { 995 devinfo_intr_t *intr_p; 996 ddi_irm_pool_t *pool_p; 997 ddi_irm_req_t *req_p; 998 ddi_intr_handle_impl_t hdl; 999 1000 ASSERT(dip != NULL); 1001 ASSERT(DDI_INTR_TYPE_FLAG_VALID(type)); 1002 1003 if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) && 1004 ((req_p = intr_p->devi_irm_req_p) != NULL) && 1005 ((pool_p = req_p->ireq_pool_p) != NULL) && 1006 (pool_p->ipool_types & type)) { 1007 return (pool_p); 1008 } 1009 1010 bzero(&hdl, sizeof (ddi_intr_handle_impl_t)); 1011 hdl.ih_dip = dip; 1012 hdl.ih_type = type; 1013 1014 if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL, 1015 &hdl, (void *)&pool_p) == DDI_SUCCESS) 1016 return (pool_p); 1017 1018 return (NULL); 1019 } 1020 1021 /* 1022 * i_ddi_irm_insertion_sort() 1023 * 1024 * Use the insertion sort method to insert a request into a list. 1025 * The list is sorted in descending order by request size. 1026 */ 1027 static void 1028 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p) 1029 { 1030 ddi_irm_req_t *next_p; 1031 1032 next_p = list_head(req_list); 1033 1034 while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq)) 1035 next_p = list_next(req_list, next_p); 1036 1037 list_insert_before(req_list, next_p, req_p); 1038 } 1039 1040 /* 1041 * i_ddi_irm_notify() 1042 * 1043 * Notify a driver of changes to its interrupt request using the 1044 * generic callback mechanism. Checks for errors in processing. 1045 */ 1046 static int 1047 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p) 1048 { 1049 ddi_cb_action_t action; 1050 ddi_cb_t *cb_p; 1051 uint_t nintrs; 1052 int ret, count; 1053 1054 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n", 1055 (void *)pool_p, (void *)req_p)); 1056 1057 /* Do not notify new or unchanged requests */ 1058 if ((req_p->ireq_navail == req_p->ireq_scratch) || 1059 (req_p->ireq_flags & DDI_IRM_FLAG_NEW)) 1060 return (DDI_SUCCESS); 1061 1062 /* Determine action and count */ 1063 if (req_p->ireq_navail > req_p->ireq_scratch) { 1064 action = DDI_CB_INTR_ADD; 1065 count = req_p->ireq_navail - req_p->ireq_scratch; 1066 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n", 1067 count)); 1068 } else { 1069 action = DDI_CB_INTR_REMOVE; 1070 count = req_p->ireq_scratch - req_p->ireq_navail; 1071 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n", 1072 count)); 1073 } 1074 1075 /* Lookup driver callback */ 1076 if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) { 1077 DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n")); 1078 return (DDI_FAILURE); 1079 } 1080 1081 /* Do callback */ 1082 ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count, 1083 cb_p->cb_arg1, cb_p->cb_arg2); 1084 1085 /* Log callback errors */ 1086 if (ret != DDI_SUCCESS) { 1087 cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n", 1088 ddi_driver_name(req_p->ireq_dip), 1089 ddi_get_instance(req_p->ireq_dip), (int)action, ret); 1090 } 1091 1092 /* Check if the driver exceeds its availability */ 1093 nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip); 1094 if (nintrs > req_p->ireq_navail) { 1095 cmn_err(CE_WARN, "%s%d: failed to release interrupts " 1096 "(nintrs=%d, navail=%d).\n", 1097 ddi_driver_name(req_p->ireq_dip), 1098 ddi_get_instance(req_p->ireq_dip), nintrs, 1099 req_p->ireq_navail); 1100 pool_p->ipool_resno += (nintrs - req_p->ireq_navail); 1101 req_p->ireq_navail = nintrs; 1102 return (DDI_FAILURE); 1103 } 1104 1105 /* Update request */ 1106 req_p->ireq_scratch = req_p->ireq_navail; 1107 1108 return (DDI_SUCCESS); 1109 } 1110 1111 /* 1112 * i_ddi_irm_debug_balance() 1113 * 1114 * A debug/test only routine to force the immediate, 1115 * synchronous rebalancing of an interrupt pool. 1116 */ 1117 #ifdef DEBUG 1118 void 1119 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag) 1120 { 1121 ddi_irm_pool_t *pool_p; 1122 int type; 1123 1124 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n", 1125 (void *)dip, (int)wait_flag)); 1126 1127 if (((type = i_ddi_intr_get_current_type(dip)) != 0) && 1128 ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) { 1129 mutex_enter(&pool_p->ipool_lock); 1130 i_ddi_irm_enqueue(pool_p, wait_flag); 1131 mutex_exit(&pool_p->ipool_lock); 1132 } 1133 } 1134 #endif 1135