1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/note.h>
26 #include <sys/sysmacros.h>
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kmem.h>
31 #include <sys/cmn_err.h>
32 #include <sys/debug.h>
33 #include <sys/ddi.h>
34 #include <sys/sunndi.h>
35 #include <sys/ndi_impldefs.h> /* include prototypes */
36
37 #if defined(__i386) || defined(__amd64)
38 /*
39 * MSI-X allocation limit.
40 */
41 extern uint_t ddi_msix_alloc_limit;
42 #endif
43
44 /*
45 * Interrupt Resource Management (IRM).
46 */
47
48 #define DDI_IRM_BALANCE_DELAY (60) /* In seconds */
49
50 #define DDI_IRM_HAS_CB(c) ((c) && (c->cb_flags & DDI_CB_FLAG_INTR))
51
52 #define DDI_IRM_IS_REDUCIBLE(r) (((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \
53 (r->ireq_type == DDI_INTR_TYPE_MSIX)) || \
54 (r->ireq_flags & DDI_IRM_FLAG_NEW))
55
56 extern pri_t minclsyspri;
57
58 /* Global policies */
59 int irm_enable = 1;
60 boolean_t irm_active = B_FALSE;
61 int irm_default_policy = DDI_IRM_POLICY_LARGE;
62 uint_t irm_balance_delay = DDI_IRM_BALANCE_DELAY;
63
64 /* Global list of interrupt pools */
65 kmutex_t irm_pools_lock;
66 list_t irm_pools_list;
67
68 /* Global debug tunables */
69 #ifdef DEBUG
70 int irm_debug_policy = 0;
71 uint_t irm_debug_size = 0;
72 #endif /* DEBUG */
73
74 static void irm_balance_thread(ddi_irm_pool_t *);
75 static void i_ddi_irm_balance(ddi_irm_pool_t *);
76 static void i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t);
77 static void i_ddi_irm_reduce(ddi_irm_pool_t *pool);
78 static int i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *, int, int);
79 static void i_ddi_irm_reduce_new(ddi_irm_pool_t *, int);
80 static void i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *);
81 static int i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *);
82 static int i_ddi_irm_modify_increase(ddi_irm_req_t *, int);
83
84 /*
85 * OS Initialization Routines
86 */
87
88 /*
89 * irm_init()
90 *
91 * Initialize IRM subsystem before any drivers are attached.
92 */
93 void
irm_init(void)94 irm_init(void)
95 {
96 /* Do nothing if IRM is disabled */
97 if (!irm_enable)
98 return;
99
100 /* Verify that the default balancing policy is valid */
101 if (!DDI_IRM_POLICY_VALID(irm_default_policy))
102 irm_default_policy = DDI_IRM_POLICY_LARGE;
103
104 /* Initialize the global list of interrupt pools */
105 mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL);
106 list_create(&irm_pools_list, sizeof (ddi_irm_pool_t),
107 offsetof(ddi_irm_pool_t, ipool_link));
108 }
109
110 /*
111 * i_ddi_irm_poststartup()
112 *
113 * IRM is not activated until after the IO subsystem is initialized.
114 * When activated, per-pool balancing threads are spawned and a flag
115 * is set so that all future pools will be activated when created.
116 *
117 * NOTE: the global variable 'irm_enable' disables IRM if zero.
118 */
119 void
i_ddi_irm_poststartup(void)120 i_ddi_irm_poststartup(void)
121 {
122 ddi_irm_pool_t *pool_p;
123
124 /* Do nothing if IRM is disabled */
125 if (!irm_enable)
126 return;
127
128 /* Lock the global list */
129 mutex_enter(&irm_pools_lock);
130
131 /* Activate all defined pools */
132 for (pool_p = list_head(&irm_pools_list); pool_p;
133 pool_p = list_next(&irm_pools_list, pool_p))
134 pool_p->ipool_thread = thread_create(NULL, 0,
135 irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
136
137 /* Set future pools to be active */
138 irm_active = B_TRUE;
139
140 /* Unlock the global list */
141 mutex_exit(&irm_pools_lock);
142 }
143
144 /*
145 * NDI interfaces for creating/destroying IRM pools.
146 */
147
148 /*
149 * ndi_irm_create()
150 *
151 * Nexus interface to create an IRM pool. Create the new
152 * pool and add it to the global list of interrupt pools.
153 */
154 int
ndi_irm_create(dev_info_t * dip,ddi_irm_params_t * paramsp,ddi_irm_pool_t ** pool_retp)155 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp,
156 ddi_irm_pool_t **pool_retp)
157 {
158 ddi_irm_pool_t *pool_p;
159
160 ASSERT(dip != NULL);
161 ASSERT(paramsp != NULL);
162 ASSERT(pool_retp != NULL);
163 ASSERT(paramsp->iparams_total >= 1);
164 ASSERT(paramsp->iparams_types != 0);
165
166 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip));
167
168 /* Check if IRM is enabled */
169 if (!irm_enable)
170 return (NDI_FAILURE);
171
172 /* Validate parameters */
173 if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) ||
174 (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0))
175 return (NDI_FAILURE);
176
177 /* Allocate and initialize the pool */
178 pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP);
179 pool_p->ipool_owner = dip;
180 pool_p->ipool_policy = irm_default_policy;
181 pool_p->ipool_types = paramsp->iparams_types;
182 pool_p->ipool_totsz = paramsp->iparams_total;
183 pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC,
184 paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER));
185 list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t),
186 offsetof(ddi_irm_req_t, ireq_link));
187 list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t),
188 offsetof(ddi_irm_req_t, ireq_scratch_link));
189 cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL);
190 mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL);
191 mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL);
192
193 /* Add to global list of pools */
194 mutex_enter(&irm_pools_lock);
195 list_insert_tail(&irm_pools_list, pool_p);
196 mutex_exit(&irm_pools_lock);
197
198 /* If IRM is active, then activate the pool */
199 if (irm_active)
200 pool_p->ipool_thread = thread_create(NULL, 0,
201 irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
202
203 *pool_retp = pool_p;
204 return (NDI_SUCCESS);
205 }
206
207 /*
208 * ndi_irm_resize_pool()
209 *
210 * Nexus interface to resize IRM pool. If the pool size drops
211 * below the allocated number of vectors then initiate rebalance
212 * operation before resizing the pool. If rebalance operation fails
213 * then return NDI_FAILURE.
214 */
215 int
ndi_irm_resize_pool(ddi_irm_pool_t * pool_p,uint_t new_size)216 ndi_irm_resize_pool(ddi_irm_pool_t *pool_p, uint_t new_size)
217 {
218 uint_t prev_size;
219
220 ASSERT(pool_p != NULL);
221
222 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
223 " current-size 0x%x new-size 0x%x\n",
224 (void *)pool_p, pool_p->ipool_totsz, new_size));
225
226 if (pool_p == NULL)
227 return (NDI_EINVAL);
228
229 /* Check if IRM is enabled */
230 if (!irm_enable)
231 return (NDI_FAILURE);
232
233 mutex_enter(&pool_p->ipool_lock);
234
235 /*
236 * If we are increasing the pool size or if the reserved
237 * number of vectors is <= the new pool size then simply
238 * update the pool size and enqueue a reblance operation
239 * if necessary to use the new vectors.
240 */
241 if ((pool_p->ipool_totsz < new_size) ||
242 (pool_p->ipool_resno <= new_size)) {
243 /* set new pool size */
244 pool_p->ipool_totsz = new_size;
245 /* adjust the default allocation limit */
246 pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC,
247 MAX(DDI_MIN_MSIX_ALLOC, new_size / DDI_MSIX_ALLOC_DIVIDER));
248 /* queue a rebalance operation to use the new vectors */
249 if (pool_p->ipool_reqno > pool_p->ipool_resno)
250 i_ddi_irm_enqueue(pool_p, B_FALSE);
251 mutex_exit(&pool_p->ipool_lock);
252 return (NDI_SUCCESS);
253 }
254
255 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
256 " needs a rebalance operation\n", (void *)pool_p));
257
258 /*
259 * requires a rebalance operation
260 */
261 /* save the current pool size */
262 prev_size = pool_p->ipool_totsz;
263 /* set the pool size to the desired new value */
264 pool_p->ipool_totsz = new_size;
265 /* perform the rebalance operation */
266 i_ddi_irm_enqueue(pool_p, B_TRUE);
267
268 /*
269 * If rebalance operation couldn't free up enough
270 * vectors then fail the resize operation.
271 */
272 if (pool_p->ipool_resno > new_size) { /* rebalance failed */
273 /* restore the pool size to the previous value */
274 pool_p->ipool_totsz = prev_size;
275 /* enqueue a rebalance operation for the original pool size */
276 i_ddi_irm_enqueue(pool_p, B_FALSE);
277 mutex_exit(&pool_p->ipool_lock);
278 return (NDI_FAILURE);
279 } else { /* rebalance worked */
280 /* adjust the default allocation limit */
281 pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC,
282 MAX(DDI_MIN_MSIX_ALLOC, new_size / DDI_MSIX_ALLOC_DIVIDER));
283 mutex_exit(&pool_p->ipool_lock);
284 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_resize_pool: pool_p %p"
285 " resized from %x to %x\n",
286 (void *)pool_p, prev_size, pool_p->ipool_totsz));
287 return (NDI_SUCCESS);
288 }
289 }
290
291 /*
292 * ndi_irm_destroy()
293 *
294 * Nexus interface to destroy an IRM pool. Destroy the pool
295 * and remove it from the global list of interrupt pools.
296 */
297 int
ndi_irm_destroy(ddi_irm_pool_t * pool_p)298 ndi_irm_destroy(ddi_irm_pool_t *pool_p)
299 {
300 ASSERT(pool_p != NULL);
301 ASSERT(pool_p->ipool_resno == 0);
302
303 DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n",
304 (void *)pool_p));
305
306 /* Validate parameters */
307 if (pool_p == NULL)
308 return (NDI_FAILURE);
309
310 /* Validate that pool is empty */
311 if (pool_p->ipool_resno != 0)
312 return (NDI_BUSY);
313
314 /* Remove the pool from the global list */
315 mutex_enter(&irm_pools_lock);
316 list_remove(&irm_pools_list, pool_p);
317 mutex_exit(&irm_pools_lock);
318
319 /* Terminate the balancing thread */
320 mutex_enter(&pool_p->ipool_lock);
321 if (pool_p->ipool_thread &&
322 (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) {
323 pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT;
324 cv_signal(&pool_p->ipool_cv);
325 mutex_exit(&pool_p->ipool_lock);
326 thread_join(pool_p->ipool_thread->t_did);
327 } else
328 mutex_exit(&pool_p->ipool_lock);
329
330 /* Destroy the pool */
331 cv_destroy(&pool_p->ipool_cv);
332 mutex_destroy(&pool_p->ipool_lock);
333 mutex_destroy(&pool_p->ipool_navail_lock);
334 list_destroy(&pool_p->ipool_req_list);
335 list_destroy(&pool_p->ipool_scratch_list);
336 kmem_free(pool_p, sizeof (ddi_irm_pool_t));
337
338 return (NDI_SUCCESS);
339 }
340
341 /*
342 * Insert/Modify/Remove Interrupt Requests
343 */
344
345 /*
346 * i_ddi_irm_insert()
347 *
348 * Insert a new request into an interrupt pool, and balance the pool.
349 */
350 int
i_ddi_irm_insert(dev_info_t * dip,int type,int count)351 i_ddi_irm_insert(dev_info_t *dip, int type, int count)
352 {
353 ddi_irm_req_t *req_p;
354 devinfo_intr_t *intr_p;
355 ddi_irm_pool_t *pool_p;
356 uint_t nreq, nmin, npartial;
357 boolean_t irm_flag = B_FALSE;
358
359 ASSERT(dip != NULL);
360 ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
361 ASSERT(count > 0);
362
363 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n",
364 (void *)dip, type, count));
365
366 /* Validate parameters */
367 if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) {
368 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n"));
369 return (DDI_EINVAL);
370 }
371
372 /* Check for an existing request */
373 if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
374 (intr_p->devi_irm_req_p != NULL))
375 return (DDI_SUCCESS);
376
377 /* Check for IRM support from the system */
378 if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) {
379 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n"));
380 return (DDI_ENOTSUP);
381 }
382
383 /* Check for IRM support from the driver */
384 if (i_ddi_irm_supported(dip, type) == DDI_SUCCESS)
385 irm_flag = B_TRUE;
386
387 /* Determine request size */
388 nreq = (irm_flag) ? count :
389 MIN(count, i_ddi_intr_get_limit(dip, type, pool_p));
390 nmin = (irm_flag) ? 1 : nreq;
391 npartial = MIN(nreq, pool_p->ipool_defsz);
392
393 /* Allocate and initialize the request */
394 req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP);
395 req_p->ireq_type = type;
396 req_p->ireq_dip = dip;
397 req_p->ireq_pool_p = pool_p;
398 req_p->ireq_nreq = nreq;
399 req_p->ireq_flags = DDI_IRM_FLAG_NEW;
400 if (irm_flag)
401 req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
402
403 /* Lock the pool */
404 mutex_enter(&pool_p->ipool_lock);
405
406 /* Check for minimal fit before inserting */
407 if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) {
408 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
409 ddi_driver_name(dip), ddi_get_instance(dip));
410 mutex_exit(&pool_p->ipool_lock);
411 kmem_free(req_p, sizeof (ddi_irm_req_t));
412 return (DDI_EAGAIN);
413 }
414
415 /* Insert the request into the pool */
416 pool_p->ipool_reqno += nreq;
417 pool_p->ipool_minno += nmin;
418 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
419
420 /*
421 * Try to fulfill the request.
422 *
423 * If all the interrupts are available, and either the request
424 * is static or the pool is active, then just take them directly.
425 *
426 * If only some of the interrupts are available, and the request
427 * can receive future callbacks, then take some now but queue the
428 * pool to be rebalanced later.
429 *
430 * Otherwise, immediately rebalance the pool and wait.
431 */
432 if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) &&
433 ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) {
434
435 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
436 "request completely fulfilled.\n"));
437 pool_p->ipool_resno += nreq;
438 req_p->ireq_navail = nreq;
439 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
440
441 } else if (irm_flag &&
442 ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) {
443
444 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
445 "request partially fulfilled.\n"));
446 pool_p->ipool_resno += npartial;
447 req_p->ireq_navail = npartial;
448 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
449 i_ddi_irm_enqueue(pool_p, B_FALSE);
450
451 } else {
452
453 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
454 "request needs immediate rebalance.\n"));
455 i_ddi_irm_enqueue(pool_p, B_TRUE);
456 req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
457 }
458
459 /* Fail if the request cannot be fulfilled at all */
460 if (req_p->ireq_navail == 0) {
461 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
462 ddi_driver_name(dip), ddi_get_instance(dip));
463 pool_p->ipool_reqno -= nreq;
464 pool_p->ipool_minno -= nmin;
465 list_remove(&pool_p->ipool_req_list, req_p);
466 mutex_exit(&pool_p->ipool_lock);
467 kmem_free(req_p, sizeof (ddi_irm_req_t));
468 return (DDI_EAGAIN);
469 }
470
471 /* Unlock the pool */
472 mutex_exit(&pool_p->ipool_lock);
473
474 intr_p->devi_irm_req_p = req_p;
475 return (DDI_SUCCESS);
476 }
477
478 /*
479 * i_ddi_irm_modify()
480 *
481 * Modify an existing request in an interrupt pool, and balance the pool.
482 */
483 int
i_ddi_irm_modify(dev_info_t * dip,int nreq)484 i_ddi_irm_modify(dev_info_t *dip, int nreq)
485 {
486 devinfo_intr_t *intr_p;
487 ddi_irm_req_t *req_p;
488 ddi_irm_pool_t *pool_p;
489 int type;
490 int retval = DDI_SUCCESS;
491
492 ASSERT(dip != NULL);
493 ASSERT(nreq > 0);
494
495 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n",
496 (void *)dip, nreq));
497
498 /* Validate parameters */
499 if ((dip == NULL) || (nreq < 1)) {
500 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
501 return (DDI_EINVAL);
502 }
503
504 /* Do nothing if not mapped to an IRM pool */
505 if (((intr_p = DEVI(dip)->devi_intr_p) == NULL) ||
506 ((req_p = intr_p->devi_irm_req_p) == NULL))
507 return (DDI_SUCCESS);
508
509 /* Do nothing if new size is the same */
510 if (nreq == req_p->ireq_nreq)
511 return (DDI_SUCCESS);
512
513 /* Do not allow MSI requests to be resized */
514 if ((type = req_p->ireq_type) == DDI_INTR_TYPE_MSI) {
515 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid type\n"));
516 return (DDI_ENOTSUP);
517 }
518
519 /* Select the pool */
520 if ((pool_p = req_p->ireq_pool_p) == NULL) {
521 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: missing pool\n"));
522 return (DDI_FAILURE);
523 }
524
525 /* Validate request size is not too large */
526 if (nreq > i_ddi_intr_get_limit(dip, type, pool_p)) {
527 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
528 return (DDI_EINVAL);
529 }
530
531 /* Lock the pool */
532 mutex_enter(&pool_p->ipool_lock);
533
534 /*
535 * Process the modification.
536 *
537 * - To increase a non-IRM request, call the implementation in
538 * i_ddi_irm_modify_increase().
539 *
540 * - To decrease a non-IRM request, directly update the pool and
541 * request, then queue the pool for later rebalancing.
542 *
543 * - To modify an IRM request, always queue the pool for later
544 * rebalancing. IRM consumers rely upon callbacks for changes.
545 */
546 if ((nreq > req_p->ireq_nreq) &&
547 (i_ddi_irm_supported(dip, type) != DDI_SUCCESS)) {
548
549 retval = i_ddi_irm_modify_increase(req_p, nreq);
550
551 } else {
552
553 /* Update pool and request */
554 pool_p->ipool_reqno -= req_p->ireq_nreq;
555 pool_p->ipool_reqno += nreq;
556 if (i_ddi_irm_supported(dip, type) != DDI_SUCCESS) {
557 pool_p->ipool_minno -= req_p->ireq_navail;
558 pool_p->ipool_resno -= req_p->ireq_navail;
559 pool_p->ipool_minno += nreq;
560 pool_p->ipool_resno += nreq;
561 req_p->ireq_navail = nreq;
562 }
563 req_p->ireq_nreq = nreq;
564
565 /* Re-sort request into the pool */
566 list_remove(&pool_p->ipool_req_list, req_p);
567 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
568
569 /* Queue pool for asynchronous rebalance */
570 i_ddi_irm_enqueue(pool_p, B_FALSE);
571 }
572
573 /* Unlock the pool */
574 mutex_exit(&pool_p->ipool_lock);
575
576 return (retval);
577 }
578
579 /*
580 * i_ddi_irm_modify_increase()
581 *
582 * Increase a non-IRM request. The additional interrupts are
583 * directly taken from the pool when possible. Otherwise, an
584 * immediate, synchronous rebalance is performed. A temporary
585 * proxy request is used for any rebalance operation to ensure
586 * the request is not reduced below its current allocation.
587 *
588 * NOTE: pool must already be locked.
589 */
590 static int
i_ddi_irm_modify_increase(ddi_irm_req_t * req_p,int nreq)591 i_ddi_irm_modify_increase(ddi_irm_req_t *req_p, int nreq)
592 {
593 dev_info_t *dip = req_p->ireq_dip;
594 ddi_irm_pool_t *pool_p = req_p->ireq_pool_p;
595 ddi_irm_req_t new_req;
596 int count, delta;
597
598 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
599
600 /* Compute number of additional vectors */
601 count = nreq - req_p->ireq_nreq;
602
603 /* Check for minimal fit */
604 if ((pool_p->ipool_minno + count) > pool_p->ipool_totsz) {
605 cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
606 ddi_driver_name(dip), ddi_get_instance(dip));
607 return (DDI_EAGAIN);
608 }
609
610 /* Update the pool */
611 pool_p->ipool_reqno += count;
612 pool_p->ipool_minno += count;
613
614 /* Attempt direct implementation */
615 if ((pool_p->ipool_resno + count) <= pool_p->ipool_totsz) {
616 req_p->ireq_nreq += count;
617 req_p->ireq_navail += count;
618 pool_p->ipool_resno += count;
619 return (DDI_SUCCESS);
620 }
621
622 /* Rebalance required: fail if pool is not active */
623 if ((pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE) == 0) {
624 pool_p->ipool_reqno -= count;
625 pool_p->ipool_minno -= count;
626 return (DDI_EAGAIN);
627 }
628
629 /* Insert temporary proxy request */
630 bzero(&new_req, sizeof (ddi_irm_req_t));
631 new_req.ireq_dip = dip;
632 new_req.ireq_nreq = count;
633 new_req.ireq_pool_p = pool_p;
634 new_req.ireq_type = req_p->ireq_type;
635 new_req.ireq_flags = DDI_IRM_FLAG_NEW;
636 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, &new_req);
637
638 /* Synchronously rebalance */
639 i_ddi_irm_enqueue(pool_p, B_TRUE);
640
641 /* Remove proxy request, and merge into original request */
642 req_p->ireq_nreq += count;
643 if ((delta = (count - new_req.ireq_navail)) > 0) {
644 req_p->ireq_nreq -= delta;
645 pool_p->ipool_reqno -= delta;
646 pool_p->ipool_minno -= delta;
647 }
648 req_p->ireq_navail += new_req.ireq_navail;
649 list_remove(&pool_p->ipool_req_list, req_p);
650 list_remove(&pool_p->ipool_req_list, &new_req);
651 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
652
653 return (DDI_SUCCESS);
654 }
655
656 /*
657 * i_ddi_irm_remove()
658 *
659 * Remove a request from an interrupt pool, and balance the pool.
660 */
661 int
i_ddi_irm_remove(dev_info_t * dip)662 i_ddi_irm_remove(dev_info_t *dip)
663 {
664 devinfo_intr_t *intr_p;
665 ddi_irm_pool_t *pool_p;
666 ddi_irm_req_t *req_p;
667 uint_t nmin;
668
669 ASSERT(dip != NULL);
670
671 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip));
672
673 /* Validate parameters */
674 if (dip == NULL) {
675 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n"));
676 return (DDI_EINVAL);
677 }
678
679 /* Check if the device has a request */
680 if (!(intr_p = DEVI(dip)->devi_intr_p) ||
681 !(req_p = intr_p->devi_irm_req_p)) {
682 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n"));
683 return (DDI_EINVAL);
684 }
685
686 /* Lock the pool */
687 pool_p = req_p->ireq_pool_p;
688 mutex_enter(&pool_p->ipool_lock);
689
690 /* Remove request */
691 nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq;
692 pool_p->ipool_minno -= nmin;
693 pool_p->ipool_reqno -= req_p->ireq_nreq;
694 pool_p->ipool_resno -= req_p->ireq_navail;
695 list_remove(&pool_p->ipool_req_list, req_p);
696
697 /* Queue pool to be rebalanced */
698 i_ddi_irm_enqueue(pool_p, B_FALSE);
699
700 /* Unlock the pool */
701 mutex_exit(&pool_p->ipool_lock);
702
703 /* Destroy the request */
704 intr_p->devi_irm_req_p = NULL;
705 kmem_free(req_p, sizeof (ddi_irm_req_t));
706
707 return (DDI_SUCCESS);
708 }
709
710 /*
711 * i_ddi_irm_set_cb()
712 *
713 * Change the callback flag for a request, in response to
714 * a change in its callback registration. Then rebalance
715 * the interrupt pool.
716 *
717 * NOTE: the request is not locked because the navail value
718 * is not directly affected. The balancing thread may
719 * modify the navail value in the background after it
720 * locks the request itself.
721 */
722 void
i_ddi_irm_set_cb(dev_info_t * dip,boolean_t has_cb_flag)723 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag)
724 {
725 devinfo_intr_t *intr_p;
726 ddi_irm_pool_t *pool_p;
727 ddi_irm_req_t *req_p;
728 uint_t nreq;
729
730 ASSERT(dip != NULL);
731
732 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n",
733 (void *)dip, (int)has_cb_flag));
734
735 /* Validate parameters */
736 if (dip == NULL)
737 return;
738
739 /* Check for association with interrupt pool */
740 if (!(intr_p = DEVI(dip)->devi_intr_p) ||
741 !(req_p = intr_p->devi_irm_req_p)) {
742 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n"));
743 return;
744 }
745
746 /* Lock the pool */
747 pool_p = req_p->ireq_pool_p;
748 mutex_enter(&pool_p->ipool_lock);
749
750 /*
751 * Update the request and the pool
752 */
753 if (has_cb_flag) {
754
755 /* Update pool statistics */
756 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
757 pool_p->ipool_minno -= (req_p->ireq_nreq - 1);
758
759 /* Update request */
760 req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
761
762 /* Rebalance in background */
763 i_ddi_irm_enqueue(pool_p, B_FALSE);
764
765 } else {
766
767 /* Determine new request size */
768 nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz);
769
770 #if defined(__i386) || defined(__amd64)
771 /* Use the default static limit for non-IRM drivers */
772 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
773 nreq = MIN(nreq, ddi_msix_alloc_limit);
774 #endif
775
776 /* Update pool statistics */
777 pool_p->ipool_reqno -= req_p->ireq_nreq;
778 pool_p->ipool_reqno += nreq;
779 if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) {
780 pool_p->ipool_minno -= 1;
781 pool_p->ipool_minno += nreq;
782 } else {
783 pool_p->ipool_minno -= req_p->ireq_nreq;
784 pool_p->ipool_minno += nreq;
785 }
786
787 /* Update request size, and re-sort in pool */
788 req_p->ireq_nreq = nreq;
789 list_remove(&pool_p->ipool_req_list, req_p);
790 i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
791
792 /* Rebalance synchronously, before losing callback */
793 i_ddi_irm_enqueue(pool_p, B_TRUE);
794
795 /* Remove callback flag */
796 req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK);
797 }
798
799 /* Unlock the pool */
800 mutex_exit(&pool_p->ipool_lock);
801 }
802
803 /*
804 * i_ddi_irm_supported()
805 *
806 * Query if IRM is supported by a driver using a specific interrupt type.
807 * Notice that IRM is limited to MSI-X users with registered callbacks.
808 */
809 int
i_ddi_irm_supported(dev_info_t * dip,int type)810 i_ddi_irm_supported(dev_info_t *dip, int type)
811 {
812 ddi_cb_t *cb_p = DEVI(dip)->devi_cb_p;
813
814 return ((DDI_IRM_HAS_CB(cb_p) && (type == DDI_INTR_TYPE_MSIX)) ?
815 DDI_SUCCESS : DDI_ENOTSUP);
816 }
817
818 /*
819 * Interrupt Pool Balancing
820 */
821
822 /*
823 * irm_balance_thread()
824 *
825 * One instance of this thread operates per each defined IRM pool.
826 * It does the initial activation of the pool, as well as balancing
827 * any requests that were queued up before the pool was active.
828 * Once active, it waits forever to service balance operations.
829 */
830 static void
irm_balance_thread(ddi_irm_pool_t * pool_p)831 irm_balance_thread(ddi_irm_pool_t *pool_p)
832 {
833 clock_t interval;
834
835 DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n",
836 (void *)pool_p));
837
838 /* Lock the pool */
839 mutex_enter(&pool_p->ipool_lock);
840
841 /* Perform initial balance if required */
842 if (pool_p->ipool_reqno > pool_p->ipool_resno)
843 i_ddi_irm_balance(pool_p);
844
845 /* Activate the pool */
846 pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE;
847
848 /*
849 * Main loop.
850 * Iterate once first before wait on signal, in case there is signal
851 * sent before this thread being created
852 */
853 for (;;) {
854
855 /* Compute the delay interval */
856 interval = drv_usectohz(irm_balance_delay * 1000000);
857
858 /* Wait one interval, or until there are waiters */
859 if ((interval > 0) &&
860 !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) &&
861 !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) {
862 (void) cv_reltimedwait(&pool_p->ipool_cv,
863 &pool_p->ipool_lock, interval, TR_CLOCK_TICK);
864 }
865
866 /* Check if awakened to exit */
867 if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) {
868 DDI_INTR_IRMDBG((CE_CONT,
869 "irm_balance_thread: exiting...\n"));
870 mutex_exit(&pool_p->ipool_lock);
871 thread_exit();
872 }
873
874 /* Balance the pool */
875 i_ddi_irm_balance(pool_p);
876
877 /* Notify waiters */
878 if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) {
879 cv_broadcast(&pool_p->ipool_cv);
880 pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS);
881 }
882
883 /* Clear QUEUED condition */
884 pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED);
885
886 /* Sleep until queued */
887 cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
888
889 DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n"));
890 }
891 }
892
893 /*
894 * i_ddi_irm_balance()
895 *
896 * Balance a pool. The general algorithm is to first reset all
897 * requests to their maximum size, use reduction algorithms to
898 * solve any imbalance, and then notify affected drivers.
899 */
900 static void
i_ddi_irm_balance(ddi_irm_pool_t * pool_p)901 i_ddi_irm_balance(ddi_irm_pool_t *pool_p)
902 {
903 ddi_irm_req_t *req_p;
904
905 #ifdef DEBUG
906 uint_t debug_totsz = 0;
907 int debug_policy = 0;
908 #endif /* DEBUG */
909
910 ASSERT(pool_p != NULL);
911 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
912
913 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n",
914 (void *)pool_p));
915
916 #ifndef DEBUG
917 if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
918 #else
919 if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
920 #endif /* DEBUG */
921 DDI_INTR_IRMDBG((CE_CONT,
922 "i_ddi_irm_balance: pool already balanced\n"));
923 return;
924 }
925
926 #ifdef DEBUG /* Adjust size and policy settings */
927 if (irm_debug_size > pool_p->ipool_minno) {
928 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n",
929 irm_debug_size));
930 debug_totsz = pool_p->ipool_totsz;
931 pool_p->ipool_totsz = irm_debug_size;
932 }
933 if (DDI_IRM_POLICY_VALID(irm_debug_policy)) {
934 DDI_INTR_IRMDBG((CE_CONT,
935 "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy));
936 debug_policy = pool_p->ipool_policy;
937 pool_p->ipool_policy = irm_debug_policy;
938 }
939 #endif /* DEBUG */
940
941 /* Lock the availability lock */
942 mutex_enter(&pool_p->ipool_navail_lock);
943
944 /*
945 * Put all of the reducible requests into a scratch list.
946 * Reset each one of them to their maximum availability.
947 */
948 for (req_p = list_head(&pool_p->ipool_req_list); req_p;
949 req_p = list_next(&pool_p->ipool_req_list, req_p)) {
950 if (DDI_IRM_IS_REDUCIBLE(req_p)) {
951 pool_p->ipool_resno -= req_p->ireq_navail;
952 req_p->ireq_scratch = req_p->ireq_navail;
953 req_p->ireq_navail = req_p->ireq_nreq;
954 pool_p->ipool_resno += req_p->ireq_navail;
955 list_insert_tail(&pool_p->ipool_scratch_list, req_p);
956 }
957 }
958
959 /* Balance the requests */
960 i_ddi_irm_reduce(pool_p);
961
962 /* Unlock the availability lock */
963 mutex_exit(&pool_p->ipool_navail_lock);
964
965 /*
966 * Process REMOVE notifications.
967 *
968 * If a driver fails to release interrupts: exclude it from
969 * further processing, correct the resulting imbalance, and
970 * start over again at the head of the scratch list.
971 */
972 req_p = list_head(&pool_p->ipool_scratch_list);
973 while (req_p) {
974 if ((req_p->ireq_navail < req_p->ireq_scratch) &&
975 (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) {
976 list_remove(&pool_p->ipool_scratch_list, req_p);
977 mutex_enter(&pool_p->ipool_navail_lock);
978 i_ddi_irm_reduce(pool_p);
979 mutex_exit(&pool_p->ipool_navail_lock);
980 req_p = list_head(&pool_p->ipool_scratch_list);
981 } else {
982 req_p = list_next(&pool_p->ipool_scratch_list, req_p);
983 }
984 }
985
986 /*
987 * Process ADD notifications.
988 *
989 * This is the last use of the scratch list, so empty it.
990 */
991 while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) {
992 if (req_p->ireq_navail > req_p->ireq_scratch) {
993 (void) i_ddi_irm_notify(pool_p, req_p);
994 }
995 }
996
997 #ifdef DEBUG /* Restore size and policy settings */
998 if (debug_totsz != 0)
999 pool_p->ipool_totsz = debug_totsz;
1000 if (debug_policy != 0)
1001 pool_p->ipool_policy = debug_policy;
1002 #endif /* DEBUG */
1003 }
1004
1005 /*
1006 * i_ddi_irm_reduce()
1007 *
1008 * Use reduction algorithms to correct an imbalance in a pool.
1009 */
1010 static void
1011 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p)
1012 {
1013 int imbalance;
1014
1015 ASSERT(pool_p != NULL);
1016 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1017 ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy));
1018
1019 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n",
1020 (void *)pool_p));
1021
1022 /* Compute the imbalance. Do nothing if already balanced. */
1023 if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0)
1024 return;
1025
1026 /*
1027 * Try policy based reduction first. If it failed, then
1028 * possibly reduce new requests as a last resort.
1029 */
1030 if (i_ddi_irm_reduce_by_policy(pool_p, imbalance, pool_p->ipool_policy)
1031 != DDI_SUCCESS) {
1032
1033 DDI_INTR_IRMDBG((CE_CONT,
1034 "i_ddi_irm_reduce: policy reductions failed.\n"));
1035
1036 /* Compute remaining imbalance */
1037 imbalance = pool_p->ipool_resno - pool_p->ipool_totsz;
1038
1039 ASSERT(imbalance > 0);
1040
1041 i_ddi_irm_reduce_new(pool_p, imbalance);
1042 }
1043 }
1044
1045 /*
1046 * i_ddi_irm_enqueue()
1047 *
1048 * Queue a pool to be balanced. Signals the balancing thread to wake
1049 * up and process the pool. If 'wait_flag' is true, then the current
1050 * thread becomes a waiter and blocks until the balance is completed.
1051 */
1052 static void
1053 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag)
1054 {
1055 ASSERT(pool_p != NULL);
1056 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1057
1058 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n",
1059 (void *)pool_p, (int)wait_flag));
1060
1061 /* Do nothing if pool is already balanced */
1062 #ifndef DEBUG
1063 if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
1064 #else
1065 if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
1066 #endif /* DEBUG */
1067 DDI_INTR_IRMDBG((CE_CONT,
1068 "i_ddi_irm_enqueue: pool already balanced\n"));
1069 return;
1070 }
1071
1072 /* Avoid deadlocks when IRM is not active */
1073 if (!irm_active && wait_flag) {
1074 DDI_INTR_IRMDBG((CE_CONT,
1075 "i_ddi_irm_enqueue: pool not active.\n"));
1076 return;
1077 }
1078
1079 if (wait_flag)
1080 pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS;
1081
1082 if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) {
1083 pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED;
1084 cv_signal(&pool_p->ipool_cv);
1085 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n"));
1086 }
1087
1088 if (wait_flag) {
1089 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n"));
1090 cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
1091 }
1092 }
1093
1094 /*
1095 * i_ddi_irm_reduce_by_policy()
1096 *
1097 * Reduces requests based on reduction policies.
1098 *
1099 * For the DDI_IRM_POLICY_LARGE reduction policy, the algorithm
1100 * generally reduces larger requests first, before advancing
1101 * to smaller requests.
1102 * For the DDI_IRM_POLICY_EVEN reduction policy, the algorithm
1103 * reduces requests evenly, without giving a specific preference
1104 * to smaller or larger requests. Each iteration reduces all
1105 * reducible requests by the same amount until the imbalance is
1106 * corrected.
1107 *
1108 * The scratch list is initially sorted in descending order by current
1109 * navail values, which are maximized prior to reduction. This sorted
1110 * order is preserved. It avoids reducing requests below the threshold
1111 * of the interrupt pool's default allocation size.
1112 *
1113 * Optimizations in this algorithm include trying to reduce multiple
1114 * requests together. And the algorithm attempts to reduce in larger
1115 * increments when possible to minimize the total number of iterations.
1116 */
1117 static int
1118 i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *pool_p, int imbalance, int policy)
1119 {
1120 ASSERT(pool_p != NULL);
1121 ASSERT(imbalance > 0);
1122 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1123
1124 while (imbalance > 0) {
1125 list_t *slist_p = &pool_p->ipool_scratch_list;
1126 ddi_irm_req_t *req_p = list_head(slist_p), *last_p;
1127 uint_t nreduce = 0, nremain = 0, stop_navail;
1128 uint_t pool_defsz = pool_p->ipool_defsz;
1129 uint_t reduction, max_redu;
1130
1131 /* Fail if none are reducible */
1132 if (!req_p || req_p->ireq_navail <= pool_defsz) {
1133 DDI_INTR_IRMDBG((CE_CONT,
1134 "i_ddi_irm_reduce_by_policy: Failure. "
1135 "All requests have downsized to low limit.\n"));
1136 return (DDI_FAILURE);
1137 }
1138
1139 /* Count reducible requests */
1140 stop_navail = (policy == DDI_IRM_POLICY_LARGE) ?
1141 req_p->ireq_navail - 1 : pool_defsz;
1142 for (; req_p; req_p = list_next(slist_p, req_p)) {
1143 if (req_p->ireq_navail <= stop_navail)
1144 break;
1145 nreduce++;
1146 }
1147
1148 /* Compute reduction */
1149 last_p = req_p ? list_prev(slist_p, req_p) : list_tail(slist_p);
1150 if ((policy == DDI_IRM_POLICY_LARGE) && req_p &&
1151 req_p->ireq_navail > pool_defsz)
1152 reduction = last_p->ireq_navail - req_p->ireq_navail;
1153 else
1154 reduction = last_p->ireq_navail - pool_defsz;
1155
1156 if ((max_redu = reduction * nreduce) > imbalance) {
1157 reduction = imbalance / nreduce;
1158 nremain = imbalance % nreduce;
1159 pool_p->ipool_resno -= imbalance;
1160 imbalance = 0;
1161 } else {
1162 pool_p->ipool_resno -= max_redu;
1163 imbalance -= max_redu;
1164 }
1165
1166 /* Reduce */
1167 for (req_p = list_head(slist_p); (reduction != 0) && nreduce--;
1168 req_p = list_next(slist_p, req_p)) {
1169 req_p->ireq_navail -= reduction;
1170 }
1171
1172 for (req_p = last_p; nremain--;
1173 req_p = list_prev(slist_p, req_p)) {
1174 req_p->ireq_navail--;
1175 }
1176 }
1177
1178 return (DDI_SUCCESS);
1179 }
1180
1181 /*
1182 * i_ddi_irm_reduce_new()
1183 *
1184 * Reduces new requests. This is only used as a last resort
1185 * after another reduction algorithm failed.
1186 *
1187 * NOTE: The pool locking in i_ddi_irm_insert() ensures
1188 * there can be only one new request at a time in a pool.
1189 */
1190 static void
1191 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance)
1192 {
1193 ddi_irm_req_t *req_p;
1194
1195 ASSERT(pool_p != NULL);
1196 ASSERT(imbalance > 0);
1197 ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1198
1199 DDI_INTR_IRMDBG((CE_CONT,
1200 "i_ddi_irm_reduce_new: pool_p %p imbalance %d\n",
1201 (void *)pool_p, imbalance));
1202
1203 for (req_p = list_head(&pool_p->ipool_scratch_list); req_p;
1204 req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1205 if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) {
1206 ASSERT(req_p->ireq_navail >= imbalance);
1207 req_p->ireq_navail -= imbalance;
1208 pool_p->ipool_resno -= imbalance;
1209 return;
1210 }
1211 }
1212
1213 /* should never go here */
1214 ASSERT(B_FALSE);
1215 }
1216
1217 /*
1218 * Miscellaneous Helper Functions
1219 */
1220
1221 /*
1222 * i_ddi_intr_get_pool()
1223 *
1224 * Get an IRM pool that supplies interrupts of a specified type.
1225 * Invokes a DDI_INTROP_GETPOOL to the bus nexus driver. Fails
1226 * if no pool exists.
1227 */
1228 ddi_irm_pool_t *
1229 i_ddi_intr_get_pool(dev_info_t *dip, int type)
1230 {
1231 devinfo_intr_t *intr_p;
1232 ddi_irm_pool_t *pool_p;
1233 ddi_irm_req_t *req_p;
1234 ddi_intr_handle_impl_t hdl;
1235
1236 ASSERT(dip != NULL);
1237 ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
1238
1239 if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
1240 ((req_p = intr_p->devi_irm_req_p) != NULL) &&
1241 ((pool_p = req_p->ireq_pool_p) != NULL) &&
1242 (pool_p->ipool_types & type)) {
1243 return (pool_p);
1244 }
1245
1246 bzero(&hdl, sizeof (ddi_intr_handle_impl_t));
1247 hdl.ih_dip = dip;
1248 hdl.ih_type = type;
1249
1250 if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL,
1251 &hdl, (void *)&pool_p) == DDI_SUCCESS)
1252 return (pool_p);
1253
1254 return (NULL);
1255 }
1256
1257 /*
1258 * i_ddi_irm_insertion_sort()
1259 *
1260 * Use the insertion sort method to insert a request into a list.
1261 * The list is sorted in descending order by request size.
1262 */
1263 static void
1264 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p)
1265 {
1266 ddi_irm_req_t *next_p;
1267
1268 next_p = list_head(req_list);
1269
1270 while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq))
1271 next_p = list_next(req_list, next_p);
1272
1273 list_insert_before(req_list, next_p, req_p);
1274 }
1275
1276 /*
1277 * i_ddi_irm_notify()
1278 *
1279 * Notify a driver of changes to its interrupt request using the
1280 * generic callback mechanism. Checks for errors in processing.
1281 */
1282 static int
1283 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
1284 {
1285 ddi_cb_action_t action;
1286 ddi_cb_t *cb_p;
1287 uint_t nintrs;
1288 int ret, count;
1289
1290 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n",
1291 (void *)pool_p, (void *)req_p));
1292
1293 /* Do not notify new or unchanged requests */
1294 if ((req_p->ireq_navail == req_p->ireq_scratch) ||
1295 (req_p->ireq_flags & DDI_IRM_FLAG_NEW))
1296 return (DDI_SUCCESS);
1297
1298 /* Determine action and count */
1299 if (req_p->ireq_navail > req_p->ireq_scratch) {
1300 action = DDI_CB_INTR_ADD;
1301 count = req_p->ireq_navail - req_p->ireq_scratch;
1302 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n",
1303 count));
1304 } else {
1305 action = DDI_CB_INTR_REMOVE;
1306 count = req_p->ireq_scratch - req_p->ireq_navail;
1307 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n",
1308 count));
1309 }
1310
1311 /* Lookup driver callback */
1312 if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) {
1313 DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n"));
1314 return (DDI_FAILURE);
1315 }
1316
1317 /* Do callback */
1318 ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count,
1319 cb_p->cb_arg1, cb_p->cb_arg2);
1320
1321 /* Log callback errors */
1322 if (ret != DDI_SUCCESS) {
1323 cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
1324 ddi_driver_name(req_p->ireq_dip),
1325 ddi_get_instance(req_p->ireq_dip), (int)action, ret);
1326 }
1327
1328 /* Check if the driver exceeds its availability */
1329 nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip);
1330 if (nintrs > req_p->ireq_navail) {
1331 cmn_err(CE_WARN, "%s%d: failed to release interrupts "
1332 "(nintrs=%d, navail=%d).\n",
1333 ddi_driver_name(req_p->ireq_dip),
1334 ddi_get_instance(req_p->ireq_dip), nintrs,
1335 req_p->ireq_navail);
1336 pool_p->ipool_resno += (nintrs - req_p->ireq_navail);
1337 req_p->ireq_navail = nintrs;
1338 return (DDI_FAILURE);
1339 }
1340
1341 /* Update request */
1342 req_p->ireq_scratch = req_p->ireq_navail;
1343
1344 return (DDI_SUCCESS);
1345 }
1346
1347 /*
1348 * i_ddi_irm_debug_balance()
1349 *
1350 * A debug/test only routine to force the immediate,
1351 * synchronous rebalancing of an interrupt pool.
1352 */
1353 #ifdef DEBUG
1354 void
1355 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag)
1356 {
1357 ddi_irm_pool_t *pool_p;
1358 int type;
1359
1360 DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n",
1361 (void *)dip, (int)wait_flag));
1362
1363 if (((type = i_ddi_intr_get_current_type(dip)) != 0) &&
1364 ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) {
1365 mutex_enter(&pool_p->ipool_lock);
1366 i_ddi_irm_enqueue(pool_p, wait_flag);
1367 mutex_exit(&pool_p->ipool_lock);
1368 }
1369 }
1370 #endif
1371