xref: /illumos-gate/usr/src/uts/common/os/ddi_intr_irm.c (revision 5df5713f81d69c1a0797f99b13e95e220da00ef9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/note.h>
27 #include <sys/sysmacros.h>
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/ddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/ndi_impldefs.h>	/* include prototypes */
37 
38 /*
39  * Interrupt Resource Management (IRM).
40  */
41 
42 #define	DDI_IRM_BALANCE_DELAY	(60)	/* In seconds */
43 
44 #define	DDI_IRM_HAS_CB(c)	((c) && (c->cb_flags & DDI_CB_FLAG_INTR))
45 
46 #define	DDI_IRM_IS_REDUCIBLE(r)	(((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \
47 				(r->ireq_type == DDI_INTR_TYPE_MSIX)) || \
48 				(r->ireq_flags & DDI_IRM_FLAG_NEW))
49 
50 extern pri_t	minclsyspri;
51 
52 /* Global policies */
53 int		irm_enable = 1;
54 boolean_t	irm_active = B_FALSE;
55 int		irm_default_policy = DDI_IRM_POLICY_LARGE;
56 uint_t		irm_balance_delay = DDI_IRM_BALANCE_DELAY;
57 
58 /* Global list of interrupt pools */
59 kmutex_t	irm_pools_lock;
60 list_t		irm_pools_list;
61 
62 /* Global debug tunables */
63 #ifdef	DEBUG
64 int		irm_debug_policy = 0;
65 uint_t		irm_debug_size = 0;
66 #endif	/* DEBUG */
67 
68 static void	irm_balance_thread(ddi_irm_pool_t *);
69 static void	i_ddi_irm_balance(ddi_irm_pool_t *);
70 static void	i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t);
71 static void	i_ddi_irm_reduce(ddi_irm_pool_t *pool);
72 static int	i_ddi_irm_reduce_large(ddi_irm_pool_t *, int);
73 static void	i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *);
74 static int	i_ddi_irm_reduce_even(ddi_irm_pool_t *, int);
75 static void	i_ddi_irm_reduce_new(ddi_irm_pool_t *, int);
76 static void	i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *);
77 static int	i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *);
78 
79 /*
80  * OS Initialization Routines
81  */
82 
83 /*
84  * irm_init()
85  *
86  *	Initialize IRM subsystem before any drivers are attached.
87  */
88 void
89 irm_init(void)
90 {
91 	/* Do nothing if IRM is disabled */
92 	if (!irm_enable)
93 		return;
94 
95 	/* Verify that the default balancing policy is valid */
96 	if (!DDI_IRM_POLICY_VALID(irm_default_policy))
97 		irm_default_policy = DDI_IRM_POLICY_LARGE;
98 
99 	/* Initialize the global list of interrupt pools */
100 	mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL);
101 	list_create(&irm_pools_list, sizeof (ddi_irm_pool_t),
102 	    offsetof(ddi_irm_pool_t, ipool_link));
103 }
104 
105 /*
106  * i_ddi_irm_poststartup()
107  *
108  *	IRM is not activated until after the IO subsystem is initialized.
109  *	When activated, per-pool balancing threads are spawned and a flag
110  *	is set so that all future pools will be activated when created.
111  *
112  *	NOTE: the global variable 'irm_enable' disables IRM if zero.
113  */
114 void
115 i_ddi_irm_poststartup(void)
116 {
117 	ddi_irm_pool_t	*pool_p;
118 
119 	/* Do nothing if IRM is disabled */
120 	if (!irm_enable)
121 		return;
122 
123 	/* Lock the global list */
124 	mutex_enter(&irm_pools_lock);
125 
126 	/* Activate all defined pools */
127 	for (pool_p = list_head(&irm_pools_list); pool_p;
128 	    pool_p = list_next(&irm_pools_list, pool_p))
129 		pool_p->ipool_thread = thread_create(NULL, 0,
130 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
131 
132 	/* Set future pools to be active */
133 	irm_active = B_TRUE;
134 
135 	/* Unlock the global list */
136 	mutex_exit(&irm_pools_lock);
137 }
138 
139 /*
140  * NDI interfaces for creating/destroying IRM pools.
141  */
142 
143 /*
144  * ndi_irm_create()
145  *
146  *	Nexus interface to create an IRM pool.  Create the new
147  *	pool and add it to the global list of interrupt pools.
148  */
149 int
150 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp,
151     ddi_irm_pool_t **pool_retp)
152 {
153 	ddi_irm_pool_t	*pool_p;
154 
155 	ASSERT(dip != NULL);
156 	ASSERT(paramsp != NULL);
157 	ASSERT(pool_retp != NULL);
158 	ASSERT(paramsp->iparams_total >= 1);
159 	ASSERT(paramsp->iparams_types != 0);
160 
161 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip));
162 
163 	/* Check if IRM is enabled */
164 	if (!irm_enable)
165 		return (NDI_FAILURE);
166 
167 	/* Validate parameters */
168 	if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) ||
169 	    (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0))
170 		return (NDI_FAILURE);
171 
172 	/* Allocate and initialize the pool */
173 	pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP);
174 	pool_p->ipool_owner = dip;
175 	pool_p->ipool_policy = irm_default_policy;
176 	pool_p->ipool_types = paramsp->iparams_types;
177 	pool_p->ipool_totsz = paramsp->iparams_total;
178 	pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC,
179 	    paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER));
180 	list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t),
181 	    offsetof(ddi_irm_req_t, ireq_link));
182 	list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t),
183 	    offsetof(ddi_irm_req_t, ireq_scratch_link));
184 	cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL);
185 	mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL);
186 	mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL);
187 
188 	/* Add to global list of pools */
189 	mutex_enter(&irm_pools_lock);
190 	list_insert_tail(&irm_pools_list, pool_p);
191 	mutex_exit(&irm_pools_lock);
192 
193 	/* If IRM is active, then activate the pool */
194 	if (irm_active)
195 		pool_p->ipool_thread = thread_create(NULL, 0,
196 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
197 
198 	*pool_retp = pool_p;
199 	return (NDI_SUCCESS);
200 }
201 
202 /*
203  * ndi_irm_destroy()
204  *
205  *	Nexus interface to destroy an IRM pool.  Destroy the pool
206  *	and remove it from the global list of interrupt pools.
207  */
208 int
209 ndi_irm_destroy(ddi_irm_pool_t *pool_p)
210 {
211 	ASSERT(pool_p != NULL);
212 	ASSERT(pool_p->ipool_resno == 0);
213 
214 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n",
215 	    (void *)pool_p));
216 
217 	/* Validate parameters */
218 	if (pool_p == NULL)
219 		return (NDI_FAILURE);
220 
221 	/* Validate that pool is empty */
222 	if (pool_p->ipool_resno != 0)
223 		return (NDI_BUSY);
224 
225 	/* Remove the pool from the global list */
226 	mutex_enter(&irm_pools_lock);
227 	list_remove(&irm_pools_list, pool_p);
228 	mutex_exit(&irm_pools_lock);
229 
230 	/* Terminate the balancing thread */
231 	mutex_enter(&pool_p->ipool_lock);
232 	if (pool_p->ipool_thread &&
233 	    (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) {
234 		pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT;
235 		cv_signal(&pool_p->ipool_cv);
236 		mutex_exit(&pool_p->ipool_lock);
237 		thread_join(pool_p->ipool_thread->t_did);
238 	} else
239 		mutex_exit(&pool_p->ipool_lock);
240 
241 	/* Destroy the pool */
242 	cv_destroy(&pool_p->ipool_cv);
243 	mutex_destroy(&pool_p->ipool_lock);
244 	mutex_destroy(&pool_p->ipool_navail_lock);
245 	list_destroy(&pool_p->ipool_req_list);
246 	list_destroy(&pool_p->ipool_scratch_list);
247 	kmem_free(pool_p, sizeof (ddi_irm_pool_t));
248 
249 	return (NDI_SUCCESS);
250 }
251 
252 /*
253  * Insert/Modify/Remove Interrupt Requests
254  */
255 
256 /*
257  * i_ddi_irm_insert()
258  *
259  *	Insert a new request into an interrupt pool, and balance the pool.
260  */
261 int
262 i_ddi_irm_insert(dev_info_t *dip, int type, int count)
263 {
264 	ddi_cb_t	*cb_p;
265 	ddi_irm_req_t	*req_p;
266 	devinfo_intr_t	*intr_p;
267 	ddi_irm_pool_t	*pool_p;
268 	uint_t		nreq, nmin, npartial;
269 	boolean_t	irm_flag = B_FALSE;
270 
271 	ASSERT(dip != NULL);
272 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
273 	ASSERT(count > 0);
274 
275 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n",
276 	    (void *)dip, type, count));
277 
278 	/* Validate parameters */
279 	if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) {
280 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n"));
281 		return (DDI_EINVAL);
282 	}
283 
284 	/* Check for an existing request */
285 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
286 	    (intr_p->devi_irm_req_p != NULL))
287 		return (DDI_SUCCESS);
288 
289 	/* Check for IRM support from the system */
290 	if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) {
291 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n"));
292 		return (DDI_ENOTSUP);
293 	}
294 
295 	/* Check for IRM support from the driver */
296 	if (((cb_p = DEVI(dip)->devi_cb_p) != NULL) && DDI_IRM_HAS_CB(cb_p) &&
297 	    (type == DDI_INTR_TYPE_MSIX))
298 		irm_flag = B_TRUE;
299 
300 	/* Determine request size */
301 	nreq = (irm_flag) ? count :
302 	    MIN(count, i_ddi_intr_get_current_navail(dip, type));
303 	nmin = (irm_flag) ? 1 : nreq;
304 	npartial = MIN(nreq, pool_p->ipool_defsz);
305 
306 	/* Allocate and initialize the request */
307 	req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP);
308 	req_p->ireq_type = type;
309 	req_p->ireq_dip = dip;
310 	req_p->ireq_pool_p = pool_p;
311 	req_p->ireq_nreq = nreq;
312 	req_p->ireq_flags = DDI_IRM_FLAG_NEW;
313 	if (DDI_IRM_HAS_CB(cb_p))
314 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
315 
316 	/* Lock the pool */
317 	mutex_enter(&pool_p->ipool_lock);
318 
319 	/* Check for minimal fit before inserting */
320 	if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) {
321 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
322 		    ddi_driver_name(dip), ddi_get_instance(dip));
323 		mutex_exit(&pool_p->ipool_lock);
324 		kmem_free(req_p, sizeof (ddi_irm_req_t));
325 		return (DDI_EAGAIN);
326 	}
327 
328 	/* Insert the request into the pool */
329 	pool_p->ipool_reqno += nreq;
330 	pool_p->ipool_minno += nmin;
331 	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
332 
333 	/*
334 	 * Try to fulfill the request.
335 	 *
336 	 * If all the interrupts are available, and either the request
337 	 * is static or the pool is active, then just take them directly.
338 	 *
339 	 * If only some of the interrupts are available, and the request
340 	 * can receive future callbacks, then take some now but queue the
341 	 * pool to be rebalanced later.
342 	 *
343 	 * Otherwise, immediately rebalance the pool and wait.
344 	 */
345 	if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) &&
346 	    ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) {
347 
348 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
349 		    "request completely fulfilled.\n"));
350 		pool_p->ipool_resno += nreq;
351 		req_p->ireq_navail = nreq;
352 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
353 
354 	} else if (irm_flag &&
355 	    ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) {
356 
357 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
358 		    "request partially fulfilled.\n"));
359 		pool_p->ipool_resno += npartial;
360 		req_p->ireq_navail = npartial;
361 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
362 		i_ddi_irm_enqueue(pool_p, B_FALSE);
363 
364 	} else {
365 
366 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
367 		    "request needs immediate rebalance.\n"));
368 		i_ddi_irm_enqueue(pool_p, B_TRUE);
369 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
370 	}
371 
372 	/* Fail if the request cannot be fulfilled at all */
373 	if (req_p->ireq_navail == 0) {
374 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
375 		    ddi_driver_name(dip), ddi_get_instance(dip));
376 		mutex_exit(&pool_p->ipool_lock);
377 		pool_p->ipool_reqno -= nreq;
378 		pool_p->ipool_minno -= nmin;
379 		list_remove(&pool_p->ipool_req_list, req_p);
380 		kmem_free(req_p, sizeof (ddi_irm_req_t));
381 		return (DDI_EAGAIN);
382 	}
383 
384 	/* Unlock the pool */
385 	mutex_exit(&pool_p->ipool_lock);
386 
387 	intr_p->devi_irm_req_p = req_p;
388 	return (DDI_SUCCESS);
389 }
390 
391 /*
392  * i_ddi_irm_modify()
393  *
394  *	Modify an existing request in an interrupt pool, and balance the pool.
395  */
396 int
397 i_ddi_irm_modify(dev_info_t *dip, int nreq)
398 {
399 	devinfo_intr_t	*intr_p;
400 	ddi_irm_req_t	*req_p;
401 	ddi_irm_pool_t	*pool_p;
402 
403 	ASSERT(dip != NULL);
404 
405 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n",
406 	    (void *)dip, nreq));
407 
408 	/* Validate parameters */
409 	if ((dip == NULL) || (nreq < 1)) {
410 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
411 		return (DDI_EINVAL);
412 	}
413 
414 	/* Check that the operation is supported */
415 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
416 	    !(req_p = intr_p->devi_irm_req_p) ||
417 	    !DDI_IRM_IS_REDUCIBLE(req_p)) {
418 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not supported\n"));
419 		return (DDI_ENOTSUP);
420 	}
421 
422 	/* Validate request size is not too large */
423 	if (nreq > intr_p->devi_intr_sup_nintrs) {
424 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
425 		return (DDI_EINVAL);
426 	}
427 
428 	/*
429 	 * Modify request, but only if new size is different.
430 	 */
431 	if (nreq != req_p->ireq_nreq) {
432 
433 		/* Lock the pool */
434 		pool_p = req_p->ireq_pool_p;
435 		mutex_enter(&pool_p->ipool_lock);
436 
437 		/* Update pool and request */
438 		pool_p->ipool_reqno -= req_p->ireq_nreq;
439 		pool_p->ipool_reqno += nreq;
440 		req_p->ireq_nreq = nreq;
441 
442 		/* Re-sort request in the pool */
443 		list_remove(&pool_p->ipool_req_list, req_p);
444 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
445 
446 		/* Queue pool to be rebalanced */
447 		i_ddi_irm_enqueue(pool_p, B_FALSE);
448 
449 		/* Unlock the pool */
450 		mutex_exit(&pool_p->ipool_lock);
451 	}
452 
453 	return (DDI_SUCCESS);
454 }
455 
456 /*
457  * i_ddi_irm_remove()
458  *
459  *	Remove a request from an interrupt pool, and balance the pool.
460  */
461 int
462 i_ddi_irm_remove(dev_info_t *dip)
463 {
464 	devinfo_intr_t	*intr_p;
465 	ddi_irm_pool_t	*pool_p;
466 	ddi_irm_req_t	*req_p;
467 	uint_t		nmin;
468 
469 	ASSERT(dip != NULL);
470 
471 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip));
472 
473 	/* Validate parameters */
474 	if (dip == NULL) {
475 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n"));
476 		return (DDI_EINVAL);
477 	}
478 
479 	/* Check if the device has a request */
480 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
481 	    !(req_p = intr_p->devi_irm_req_p)) {
482 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n"));
483 		return (DDI_EINVAL);
484 	}
485 
486 	/* Lock the pool */
487 	pool_p = req_p->ireq_pool_p;
488 	mutex_enter(&pool_p->ipool_lock);
489 
490 	/* Remove request */
491 	nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq;
492 	pool_p->ipool_minno -= nmin;
493 	pool_p->ipool_reqno -= req_p->ireq_nreq;
494 	pool_p->ipool_resno -= req_p->ireq_navail;
495 	list_remove(&pool_p->ipool_req_list, req_p);
496 
497 	/* Queue pool to be rebalanced */
498 	i_ddi_irm_enqueue(pool_p, B_FALSE);
499 
500 	/* Unlock the pool */
501 	mutex_exit(&pool_p->ipool_lock);
502 
503 	/* Destroy the request */
504 	intr_p->devi_irm_req_p = NULL;
505 	kmem_free(req_p, sizeof (ddi_irm_req_t));
506 
507 	return (DDI_SUCCESS);
508 }
509 
510 /*
511  * i_ddi_irm_set_cb()
512  *
513  *	Change the callback flag for a request, in response to
514  *	a change in its callback registration.  Then rebalance
515  *	the interrupt pool.
516  *
517  *	NOTE: the request is not locked because the navail value
518  *	      is not directly affected.  The balancing thread may
519  *	      modify the navail value in the background after it
520  *	      locks the request itself.
521  */
522 void
523 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag)
524 {
525 	devinfo_intr_t	*intr_p;
526 	ddi_irm_pool_t	*pool_p;
527 	ddi_irm_req_t	*req_p;
528 	uint_t		nreq;
529 
530 	ASSERT(dip != NULL);
531 
532 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n",
533 	    (void *)dip, (int)has_cb_flag));
534 
535 	/* Validate parameters */
536 	if (dip == NULL)
537 		return;
538 
539 	/* Check for association with interrupt pool */
540 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
541 	    !(req_p = intr_p->devi_irm_req_p)) {
542 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n"));
543 		return;
544 	}
545 
546 	/* Lock the pool */
547 	pool_p = req_p->ireq_pool_p;
548 	mutex_enter(&pool_p->ipool_lock);
549 
550 	/*
551 	 * Update the request and the pool
552 	 */
553 	if (has_cb_flag) {
554 
555 		/* Update pool statistics */
556 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
557 			pool_p->ipool_minno -= (req_p->ireq_nreq - 1);
558 
559 		/* Update request */
560 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
561 
562 		/* Rebalance in background */
563 		i_ddi_irm_enqueue(pool_p, B_FALSE);
564 
565 	} else {
566 
567 		/* Determine new request size */
568 		nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz);
569 
570 		/* Update pool statistics */
571 		pool_p->ipool_reqno -= req_p->ireq_nreq;
572 		pool_p->ipool_reqno += nreq;
573 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) {
574 			pool_p->ipool_minno -= 1;
575 			pool_p->ipool_minno += nreq;
576 		} else {
577 			pool_p->ipool_minno -= req_p->ireq_nreq;
578 			pool_p->ipool_minno += nreq;
579 		}
580 
581 		/* Update request size, and re-sort in pool */
582 		req_p->ireq_nreq = nreq;
583 		list_remove(&pool_p->ipool_req_list, req_p);
584 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
585 
586 		/* Rebalance synchronously, before losing callback */
587 		i_ddi_irm_enqueue(pool_p, B_TRUE);
588 
589 		/* Remove callback flag */
590 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK);
591 	}
592 
593 	/* Unlock the pool */
594 	mutex_exit(&pool_p->ipool_lock);
595 }
596 
597 /*
598  * Interrupt Pool Balancing
599  */
600 
601 /*
602  * irm_balance_thread()
603  *
604  *	One instance of this thread operates per each defined IRM pool.
605  *	It does the initial activation of the pool, as well as balancing
606  *	any requests that were queued up before the pool was active.
607  *	Once active, it waits forever to service balance operations.
608  */
609 static void
610 irm_balance_thread(ddi_irm_pool_t *pool_p)
611 {
612 	clock_t		interval, wakeup;
613 
614 	DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n",
615 	    (void *)pool_p));
616 
617 	/* Lock the pool */
618 	mutex_enter(&pool_p->ipool_lock);
619 
620 	/* Perform initial balance if required */
621 	if (pool_p->ipool_reqno > pool_p->ipool_resno)
622 		i_ddi_irm_balance(pool_p);
623 
624 	/* Activate the pool */
625 	pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE;
626 
627 	/* Main loop */
628 	for (;;) {
629 
630 		/* Compute the delay interval */
631 		interval = drv_usectohz(irm_balance_delay * 1000000);
632 
633 		/* Sleep until queued */
634 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
635 
636 		DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n"));
637 
638 		/* Wait one interval, or until there are waiters */
639 		if ((interval > 0) &&
640 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) &&
641 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) {
642 			wakeup = ddi_get_lbolt() + interval;
643 			(void) cv_timedwait(&pool_p->ipool_cv,
644 			    &pool_p->ipool_lock, wakeup);
645 		}
646 
647 		/* Check if awakened to exit */
648 		if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) {
649 			DDI_INTR_IRMDBG((CE_CONT,
650 			    "irm_balance_thread: exiting...\n"));
651 			mutex_exit(&pool_p->ipool_lock);
652 			thread_exit();
653 		}
654 
655 		/* Balance the pool */
656 		i_ddi_irm_balance(pool_p);
657 
658 		/* Notify waiters */
659 		if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) {
660 			cv_broadcast(&pool_p->ipool_cv);
661 			pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS);
662 		}
663 
664 		/* Clear QUEUED condition */
665 		pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED);
666 	}
667 }
668 
669 /*
670  * i_ddi_irm_balance()
671  *
672  *	Balance a pool.  The general algorithm is to first reset all
673  *	requests to their maximum size, use reduction algorithms to
674  *	solve any imbalance, and then notify affected drivers.
675  */
676 static void
677 i_ddi_irm_balance(ddi_irm_pool_t *pool_p)
678 {
679 	ddi_irm_req_t	*req_p;
680 
681 #ifdef	DEBUG
682 	uint_t		debug_totsz = 0;
683 	int		debug_policy = 0;
684 #endif	/* DEBUG */
685 
686 	ASSERT(pool_p != NULL);
687 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
688 
689 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n",
690 	    (void *)pool_p));
691 
692 #ifdef	DEBUG	/* Adjust size and policy settings */
693 	if (irm_debug_size > pool_p->ipool_minno) {
694 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n",
695 		    irm_debug_size));
696 		debug_totsz = pool_p->ipool_totsz;
697 		pool_p->ipool_totsz = irm_debug_size;
698 	}
699 	if (DDI_IRM_POLICY_VALID(irm_debug_policy)) {
700 		DDI_INTR_IRMDBG((CE_CONT,
701 		    "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy));
702 		debug_policy = pool_p->ipool_policy;
703 		pool_p->ipool_policy = irm_debug_policy;
704 	}
705 #endif	/* DEBUG */
706 
707 	/* Lock the availability lock */
708 	mutex_enter(&pool_p->ipool_navail_lock);
709 
710 	/*
711 	 * Put all of the reducible requests into a scratch list.
712 	 * Reset each one of them to their maximum availability.
713 	 */
714 	for (req_p = list_head(&pool_p->ipool_req_list); req_p;
715 	    req_p = list_next(&pool_p->ipool_req_list, req_p)) {
716 		if (DDI_IRM_IS_REDUCIBLE(req_p)) {
717 			pool_p->ipool_resno -= req_p->ireq_navail;
718 			req_p->ireq_scratch = req_p->ireq_navail;
719 			req_p->ireq_navail = req_p->ireq_nreq;
720 			pool_p->ipool_resno += req_p->ireq_navail;
721 			list_insert_tail(&pool_p->ipool_scratch_list, req_p);
722 		}
723 	}
724 
725 	/* Balance the requests */
726 	i_ddi_irm_reduce(pool_p);
727 
728 	/* Unlock the availability lock */
729 	mutex_exit(&pool_p->ipool_navail_lock);
730 
731 	/*
732 	 * Process REMOVE notifications.
733 	 *
734 	 * If a driver fails to release interrupts: exclude it from
735 	 * further processing, correct the resulting imbalance, and
736 	 * start over again at the head of the scratch list.
737 	 */
738 	req_p = list_head(&pool_p->ipool_scratch_list);
739 	while (req_p) {
740 		if ((req_p->ireq_navail < req_p->ireq_scratch) &&
741 		    (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) {
742 			list_remove(&pool_p->ipool_scratch_list, req_p);
743 			mutex_enter(&pool_p->ipool_navail_lock);
744 			i_ddi_irm_reduce(pool_p);
745 			mutex_exit(&pool_p->ipool_navail_lock);
746 			req_p = list_head(&pool_p->ipool_scratch_list);
747 		} else {
748 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
749 		}
750 	}
751 
752 	/*
753 	 * Process ADD notifications.
754 	 *
755 	 * This is the last use of the scratch list, so empty it.
756 	 */
757 	while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) {
758 		if (req_p->ireq_navail > req_p->ireq_scratch) {
759 			(void) i_ddi_irm_notify(pool_p, req_p);
760 		}
761 	}
762 
763 #ifdef	DEBUG	/* Restore size and policy settings */
764 	if (debug_totsz != 0)
765 		pool_p->ipool_totsz = debug_totsz;
766 	if (debug_policy != 0)
767 		pool_p->ipool_policy = debug_policy;
768 #endif	/* DEBUG */
769 }
770 
771 /*
772  * i_ddi_irm_reduce()
773  *
774  *	Use reduction algorithms to correct an imbalance in a pool.
775  */
776 static void
777 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p)
778 {
779 	int	ret, imbalance;
780 
781 	ASSERT(pool_p != NULL);
782 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
783 	ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy));
784 
785 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n",
786 	    (void *)pool_p));
787 
788 	/* Compute the imbalance.  Do nothing if already balanced. */
789 	if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0)
790 		return;
791 
792 	/* Reduce by policy */
793 	switch (pool_p->ipool_policy) {
794 	case DDI_IRM_POLICY_LARGE:
795 		ret = i_ddi_irm_reduce_large(pool_p, imbalance);
796 		break;
797 	case DDI_IRM_POLICY_EVEN:
798 		ret = i_ddi_irm_reduce_even(pool_p, imbalance);
799 		break;
800 	}
801 
802 	/*
803 	 * If the policy based reductions failed, then
804 	 * possibly reduce new requests as a last resort.
805 	 */
806 	if (ret != DDI_SUCCESS) {
807 
808 		DDI_INTR_IRMDBG((CE_CONT,
809 		    "i_ddi_irm_reduce: policy reductions failed.\n"));
810 
811 		/* Compute remaining imbalance */
812 		imbalance = pool_p->ipool_resno - pool_p->ipool_totsz;
813 
814 		ASSERT(imbalance > 0);
815 
816 		i_ddi_irm_reduce_new(pool_p, imbalance);
817 	}
818 }
819 
820 /*
821  * i_ddi_irm_enqueue()
822  *
823  *	Queue a pool to be balanced.  Signals the balancing thread to wake
824  *	up and process the pool.  If 'wait_flag' is true, then the current
825  *	thread becomes a waiter and blocks until the balance is completed.
826  */
827 static void
828 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag)
829 {
830 	ASSERT(pool_p != NULL);
831 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
832 
833 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n",
834 	    (void *)pool_p, (int)wait_flag));
835 
836 	/* Do nothing if pool is already balanced */
837 #ifndef	DEBUG
838 	if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
839 #else
840 	if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
841 #endif	/* DEBUG */
842 		DDI_INTR_IRMDBG((CE_CONT,
843 		    "i_ddi_irm_enqueue: pool already balanced\n"));
844 		return;
845 	}
846 
847 	/* Avoid deadlocks when IRM is not active */
848 	if (!irm_active && wait_flag) {
849 		DDI_INTR_IRMDBG((CE_CONT,
850 		    "i_ddi_irm_enqueue: pool not active.\n"));
851 		return;
852 	}
853 
854 	if (wait_flag)
855 		pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS;
856 
857 	if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) {
858 		pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED;
859 		cv_signal(&pool_p->ipool_cv);
860 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n"));
861 	}
862 
863 	if (wait_flag) {
864 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n"));
865 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
866 	}
867 }
868 
869 /*
870  * Reduction Algorithms, Used For Balancing
871  */
872 
873 /*
874  * i_ddi_irm_reduce_large()
875  *
876  *	Algorithm for the DDI_IRM_POLICY_LARGE reduction policy.
877  *
878  *	This algorithm generally reduces larger requests first, before
879  *	advancing to smaller requests.  The scratch list is initially
880  *	sorted in descending order by current navail values, which are
881  *	maximized prior to reduction.  This sorted order is preserved,
882  *	but within a range of equally sized requests they are secondarily
883  *	sorted in ascending order by initial nreq value.  The head of the
884  *	list is always selected for reduction, since it is the current
885  *	largest request.  After being reduced, it is sorted further into
886  *	the list before the next iteration.
887  *
888  *	Optimizations in this algorithm include trying to reduce multiple
889  *	requests together if they are equally sized.  And the algorithm
890  *	attempts to reduce in larger increments when possible to minimize
891  *	the total number of iterations.
892  */
893 static int
894 i_ddi_irm_reduce_large(ddi_irm_pool_t *pool_p, int imbalance)
895 {
896 	ddi_irm_req_t	*head_p, *next_p;
897 	int		next_navail, nreqs, reduction;
898 
899 	ASSERT(pool_p != NULL);
900 	ASSERT(imbalance > 0);
901 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
902 
903 	DDI_INTR_IRMDBG((CE_CONT,
904 	    "i_ddi_irm_reduce_large: pool_p %p imbalance %d\n", (void *)pool_p,
905 	    imbalance));
906 
907 	while (imbalance > 0) {
908 
909 		head_p = list_head(&pool_p->ipool_scratch_list);
910 
911 		/* Fail if nothing is reducible */
912 		if (head_p->ireq_navail <= pool_p->ipool_defsz) {
913 			DDI_INTR_IRMDBG((CE_CONT,
914 			    "i_ddi_irm_reduce_large: Failure. "
915 			    "All requests have downsized to low limit.\n"));
916 			return (DDI_FAILURE);
917 		}
918 
919 		/* Count the number of equally sized requests */
920 		for (nreqs = 1, next_p = head_p;
921 		    (next_p = list_next(&pool_p->ipool_scratch_list, next_p)) !=
922 		    NULL && (head_p->ireq_navail == next_p->ireq_navail);
923 		    nreqs++)
924 			;
925 
926 		next_navail = next_p ? next_p->ireq_navail : 0;
927 		reduction = head_p->ireq_navail -
928 		    MAX(next_navail, pool_p->ipool_defsz);
929 
930 		if ((reduction * nreqs) > imbalance) {
931 			reduction = imbalance / nreqs;
932 
933 			if (reduction == 0) {
934 				reduction = 1;
935 				nreqs = imbalance;
936 			}
937 		}
938 
939 		next_p = head_p;
940 		while (nreqs--) {
941 			imbalance -= reduction;
942 			next_p->ireq_navail -= reduction;
943 			pool_p->ipool_resno -= reduction;
944 			next_p = list_next(&pool_p->ipool_scratch_list, next_p);
945 		}
946 
947 		if (next_p && next_p->ireq_navail > head_p->ireq_navail) {
948 			ASSERT(imbalance == 0);
949 			i_ddi_irm_reduce_large_resort(pool_p);
950 		}
951 	}
952 
953 	return (DDI_SUCCESS);
954 }
955 
956 /*
957  * i_ddi_irm_reduce_large_resort()
958  *
959  *	Helper function for i_ddi_irm_reduce_large().  Once a request
960  *	is reduced, this resorts it further down into the list as necessary.
961  */
962 static void
963 i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *pool_p)
964 {
965 	ddi_irm_req_t	*start_p, *end_p, *next_p;
966 
967 	ASSERT(pool_p != NULL);
968 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
969 
970 	start_p = list_head(&pool_p->ipool_scratch_list);
971 	end_p = list_next(&pool_p->ipool_scratch_list, start_p);
972 	while (end_p && start_p->ireq_navail == end_p->ireq_navail)
973 		end_p = list_next(&pool_p->ipool_scratch_list, end_p);
974 
975 	next_p = end_p;
976 	while (next_p && (next_p->ireq_navail > start_p->ireq_navail))
977 		next_p = list_next(&pool_p->ipool_scratch_list, next_p);
978 
979 	while (start_p != end_p) {
980 		list_remove(&pool_p->ipool_scratch_list, start_p);
981 		list_insert_before(&pool_p->ipool_scratch_list, next_p,
982 		    start_p);
983 		start_p = list_head(&pool_p->ipool_scratch_list);
984 	}
985 }
986 
987 /*
988  * i_ddi_irm_reduce_even()
989  *
990  *	Algorithm for the DDI_IRM_POLICY_EVEN reduction policy.
991  *
992  *	This algorithm reduces requests evenly, without giving a
993  *	specific preference to smaller or larger requests.  Each
994  *	iteration reduces all reducible requests by the same amount
995  *	until the imbalance is corrected.  Although when possible,
996  *	it tries to avoid reducing requests below the threshold of
997  *	the interrupt pool's default allocation size.
998  *
999  *	An optimization in this algorithm is to reduce the requests
1000  *	in larger increments during each iteration, to minimize the
1001  *	total number of iterations required.
1002  */
1003 static int
1004 i_ddi_irm_reduce_even(ddi_irm_pool_t *pool_p, int imbalance)
1005 {
1006 	ddi_irm_req_t	*req_p, *last_p;
1007 	uint_t		nmin = pool_p->ipool_defsz;
1008 	uint_t		nreduce, reduction;
1009 
1010 	ASSERT(pool_p != NULL);
1011 	ASSERT(imbalance > 0);
1012 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1013 
1014 	DDI_INTR_IRMDBG((CE_CONT,
1015 	    "i_ddi_irm_reduce_even: pool_p %p imbalance %d\n",
1016 	    (void *)pool_p, imbalance));
1017 
1018 	while (imbalance > 0) {
1019 
1020 		/* Count reducible requests */
1021 		nreduce = 0;
1022 		for (req_p = list_head(&pool_p->ipool_scratch_list); req_p;
1023 		    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1024 			if (req_p->ireq_navail <= nmin)
1025 				break;
1026 			last_p = req_p;
1027 			nreduce++;
1028 		}
1029 
1030 		/* Fail if none are reducible */
1031 		if (nreduce == 0) {
1032 			DDI_INTR_IRMDBG((CE_CONT,
1033 			    "i_ddi_irm_reduce_even: Failure. "
1034 			    "All requests have downsized to low limit.\n"));
1035 			return (DDI_FAILURE);
1036 		}
1037 
1038 		/* Compute reduction */
1039 		if (nreduce < imbalance) {
1040 			reduction = last_p->ireq_navail - nmin;
1041 			if ((reduction * nreduce) > imbalance) {
1042 				reduction = imbalance / nreduce;
1043 			}
1044 		} else {
1045 			reduction = 1;
1046 		}
1047 
1048 		/* Start at head of list, but skip excess */
1049 		req_p = list_head(&pool_p->ipool_scratch_list);
1050 		while (nreduce > imbalance) {
1051 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
1052 			nreduce--;
1053 		}
1054 
1055 		/* Do reductions */
1056 		while (req_p && (nreduce > 0)) {
1057 			imbalance -= reduction;
1058 			req_p->ireq_navail -= reduction;
1059 			pool_p->ipool_resno -= reduction;
1060 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
1061 			nreduce--;
1062 		}
1063 	}
1064 
1065 	return (DDI_SUCCESS);
1066 }
1067 
1068 /*
1069  * i_ddi_irm_reduce_new()
1070  *
1071  *	Reduces new requests.  This is only used as a last resort
1072  *	after another reduction algorithm failed.
1073  */
1074 static void
1075 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance)
1076 {
1077 	ddi_irm_req_t	*req_p;
1078 	uint_t		nreduce;
1079 
1080 	ASSERT(pool_p != NULL);
1081 	ASSERT(imbalance > 0);
1082 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1083 
1084 	while (imbalance > 0) {
1085 		nreduce = 0;
1086 		for (req_p = list_head(&pool_p->ipool_scratch_list);
1087 		    req_p && (imbalance > 0);
1088 		    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1089 			if (req_p->ireq_flags & DDI_IRM_FLAG_NEW &&
1090 			    req_p->ireq_navail > 1) {
1091 				req_p->ireq_navail--;
1092 				pool_p->ipool_resno--;
1093 				imbalance--;
1094 				nreduce++;
1095 			}
1096 		}
1097 
1098 		if (nreduce == 0)
1099 			break;
1100 	}
1101 
1102 	for (req_p = list_head(&pool_p->ipool_scratch_list);
1103 	    req_p && (imbalance > 0);
1104 	    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1105 		if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) {
1106 			ASSERT(req_p->ireq_navail == 1);
1107 			req_p->ireq_navail--;
1108 			pool_p->ipool_resno--;
1109 			imbalance--;
1110 		}
1111 	}
1112 }
1113 
1114 /*
1115  * Miscellaneous Helper Functions
1116  */
1117 
1118 /*
1119  * i_ddi_intr_get_pool()
1120  *
1121  *	Get an IRM pool that supplies interrupts of a specified type.
1122  *	Invokes a DDI_INTROP_GETPOOL to the bus nexus driver.  Fails
1123  *	if no pool exists.
1124  */
1125 ddi_irm_pool_t *
1126 i_ddi_intr_get_pool(dev_info_t *dip, int type)
1127 {
1128 	devinfo_intr_t		*intr_p;
1129 	ddi_irm_pool_t		*pool_p;
1130 	ddi_irm_req_t		*req_p;
1131 	ddi_intr_handle_impl_t	hdl;
1132 
1133 	ASSERT(dip != NULL);
1134 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
1135 
1136 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
1137 	    ((req_p = intr_p->devi_irm_req_p) != NULL) &&
1138 	    ((pool_p = req_p->ireq_pool_p) != NULL) &&
1139 	    (pool_p->ipool_types & type)) {
1140 		return (pool_p);
1141 	}
1142 
1143 	bzero(&hdl, sizeof (ddi_intr_handle_impl_t));
1144 	hdl.ih_dip = dip;
1145 	hdl.ih_type = type;
1146 
1147 	if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL,
1148 	    &hdl, (void *)&pool_p) == DDI_SUCCESS)
1149 		return (pool_p);
1150 
1151 	return (NULL);
1152 }
1153 
1154 /*
1155  * i_ddi_irm_insertion_sort()
1156  *
1157  *	Use the insertion sort method to insert a request into a list.
1158  *	The list is sorted in descending order by request size.
1159  */
1160 static void
1161 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p)
1162 {
1163 	ddi_irm_req_t	*next_p;
1164 
1165 	next_p = list_head(req_list);
1166 
1167 	while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq))
1168 		next_p = list_next(req_list, next_p);
1169 
1170 	list_insert_before(req_list, next_p, req_p);
1171 }
1172 
1173 /*
1174  * i_ddi_irm_notify()
1175  *
1176  *	Notify a driver of changes to its interrupt request using the
1177  *	generic callback mechanism.  Checks for errors in processing.
1178  */
1179 static int
1180 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
1181 {
1182 	ddi_cb_action_t	action;
1183 	ddi_cb_t	*cb_p;
1184 	uint_t		nintrs;
1185 	int		ret, count;
1186 
1187 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n",
1188 	    (void *)pool_p, (void *)req_p));
1189 
1190 	/* Do not notify new or unchanged requests */
1191 	if ((req_p->ireq_navail == req_p->ireq_scratch) ||
1192 	    (req_p->ireq_flags & DDI_IRM_FLAG_NEW))
1193 		return (DDI_SUCCESS);
1194 
1195 	/* Determine action and count */
1196 	if (req_p->ireq_navail > req_p->ireq_scratch) {
1197 		action = DDI_CB_INTR_ADD;
1198 		count = req_p->ireq_navail - req_p->ireq_scratch;
1199 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n",
1200 		    count));
1201 	} else {
1202 		action = DDI_CB_INTR_REMOVE;
1203 		count = req_p->ireq_scratch - req_p->ireq_navail;
1204 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n",
1205 		    count));
1206 	}
1207 
1208 	/* Lookup driver callback */
1209 	if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) {
1210 		DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n"));
1211 		return (DDI_FAILURE);
1212 	}
1213 
1214 	/* Do callback */
1215 	ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count,
1216 	    cb_p->cb_arg1, cb_p->cb_arg2);
1217 
1218 	/* Log callback errors */
1219 	if (ret != DDI_SUCCESS) {
1220 		cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
1221 		    ddi_driver_name(req_p->ireq_dip),
1222 		    ddi_get_instance(req_p->ireq_dip), (int)action, ret);
1223 	}
1224 
1225 	/* Check if the driver exceeds its availability */
1226 	nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip);
1227 	if (nintrs > req_p->ireq_navail) {
1228 		cmn_err(CE_WARN, "%s%d: failed to release interrupts "
1229 		    "(nintrs=%d, navail=%d).\n",
1230 		    ddi_driver_name(req_p->ireq_dip),
1231 		    ddi_get_instance(req_p->ireq_dip), nintrs,
1232 		    req_p->ireq_navail);
1233 		pool_p->ipool_resno += (nintrs - req_p->ireq_navail);
1234 		req_p->ireq_navail = nintrs;
1235 		return (DDI_FAILURE);
1236 	}
1237 
1238 	/* Update request */
1239 	req_p->ireq_scratch = req_p->ireq_navail;
1240 
1241 	return (DDI_SUCCESS);
1242 }
1243 
1244 /*
1245  * i_ddi_irm_debug_balance()
1246  *
1247  *	A debug/test only routine to force the immediate,
1248  *	synchronous rebalancing of an interrupt pool.
1249  */
1250 #ifdef	DEBUG
1251 void
1252 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag)
1253 {
1254 	ddi_irm_pool_t	*pool_p;
1255 	int		type;
1256 
1257 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n",
1258 	    (void *)dip, (int)wait_flag));
1259 
1260 	if (((type = i_ddi_intr_get_current_type(dip)) != 0) &&
1261 	    ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) {
1262 		mutex_enter(&pool_p->ipool_lock);
1263 		i_ddi_irm_enqueue(pool_p, wait_flag);
1264 		mutex_exit(&pool_p->ipool_lock);
1265 	}
1266 }
1267 #endif
1268