xref: /titanic_51/usr/src/uts/common/os/ddi_intr_irm.c (revision 33f2fefd46350ca5992567761c46a5b70f864340)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/note.h>
27 #include <sys/sysmacros.h>
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/ddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/ndi_impldefs.h>	/* include prototypes */
37 
38 /*
39  * Interrupt Resource Management (IRM).
40  */
41 
42 #define	DDI_IRM_BALANCE_DELAY	(60)	/* In seconds */
43 
44 #define	DDI_IRM_HAS_CB(c)	((c) && (c->cb_flags & DDI_CB_FLAG_INTR))
45 
46 #define	DDI_IRM_IS_REDUCIBLE(r)	(((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \
47 				(r->ireq_type == DDI_INTR_TYPE_MSIX)) || \
48 				(r->ireq_flags & DDI_IRM_FLAG_NEW))
49 
50 extern pri_t	minclsyspri;
51 
52 /* Global policies */
53 int		irm_enable = 1;
54 boolean_t	irm_active = B_FALSE;
55 int		irm_default_policy = DDI_IRM_POLICY_LARGE;
56 uint_t		irm_balance_delay = DDI_IRM_BALANCE_DELAY;
57 
58 /* Global list of interrupt pools */
59 kmutex_t	irm_pools_lock;
60 list_t		irm_pools_list;
61 
62 /* Global debug tunables */
63 #ifdef	DEBUG
64 int		irm_debug_policy = 0;
65 uint_t		irm_debug_size = 0;
66 #endif	/* DEBUG */
67 
68 static void	irm_balance_thread(ddi_irm_pool_t *);
69 static void	i_ddi_irm_balance(ddi_irm_pool_t *);
70 static void	i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t);
71 static void	i_ddi_irm_reduce(ddi_irm_pool_t *pool);
72 static int	i_ddi_irm_reduce_large(ddi_irm_pool_t *, int);
73 static void	i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *);
74 static int	i_ddi_irm_reduce_even(ddi_irm_pool_t *, int);
75 static void	i_ddi_irm_reduce_new(ddi_irm_pool_t *, int);
76 static void	i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *);
77 static int	i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *);
78 
79 /*
80  * OS Initialization Routines
81  */
82 
83 /*
84  * irm_init()
85  *
86  *	Initialize IRM subsystem before any drivers are attached.
87  */
88 void
89 irm_init(void)
90 {
91 	/* Do nothing if IRM is disabled */
92 	if (!irm_enable)
93 		return;
94 
95 	/* Verify that the default balancing policy is valid */
96 	if (!DDI_IRM_POLICY_VALID(irm_default_policy))
97 		irm_default_policy = DDI_IRM_POLICY_LARGE;
98 
99 	/* Initialize the global list of interrupt pools */
100 	mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL);
101 	list_create(&irm_pools_list, sizeof (ddi_irm_pool_t),
102 	    offsetof(ddi_irm_pool_t, ipool_link));
103 }
104 
105 /*
106  * i_ddi_irm_poststartup()
107  *
108  *	IRM is not activated until after the IO subsystem is initialized.
109  *	When activated, per-pool balancing threads are spawned and a flag
110  *	is set so that all future pools will be activated when created.
111  *
112  *	NOTE: the global variable 'irm_enable' disables IRM if zero.
113  */
114 void
115 i_ddi_irm_poststartup(void)
116 {
117 	ddi_irm_pool_t	*pool_p;
118 
119 	/* Do nothing if IRM is disabled */
120 	if (!irm_enable)
121 		return;
122 
123 	/* Lock the global list */
124 	mutex_enter(&irm_pools_lock);
125 
126 	/* Activate all defined pools */
127 	for (pool_p = list_head(&irm_pools_list); pool_p;
128 	    pool_p = list_next(&irm_pools_list, pool_p))
129 		pool_p->ipool_thread = thread_create(NULL, 0,
130 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
131 
132 	/* Set future pools to be active */
133 	irm_active = B_TRUE;
134 
135 	/* Unlock the global list */
136 	mutex_exit(&irm_pools_lock);
137 }
138 
139 /*
140  * NDI interfaces for creating/destroying IRM pools.
141  */
142 
143 /*
144  * ndi_irm_create()
145  *
146  *	Nexus interface to create an IRM pool.  Create the new
147  *	pool and add it to the global list of interrupt pools.
148  */
149 int
150 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp,
151     ddi_irm_pool_t **pool_retp)
152 {
153 	ddi_irm_pool_t	*pool_p;
154 
155 	ASSERT(dip != NULL);
156 	ASSERT(paramsp != NULL);
157 	ASSERT(pool_retp != NULL);
158 	ASSERT(paramsp->iparams_total >= 1);
159 	ASSERT(paramsp->iparams_types != 0);
160 	ASSERT(paramsp->iparams_default >= 1);
161 
162 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip));
163 
164 	/* Check if IRM is enabled */
165 	if (!irm_enable)
166 		return (NDI_FAILURE);
167 
168 	/* Validate parameters */
169 	if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) ||
170 	    (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0) ||
171 	    (paramsp->iparams_default < 1))
172 		return (NDI_FAILURE);
173 
174 	/* Allocate and initialize the pool */
175 	pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP);
176 	pool_p->ipool_owner = dip;
177 	pool_p->ipool_policy = irm_default_policy;
178 	pool_p->ipool_types = paramsp->iparams_types;
179 	pool_p->ipool_totsz = paramsp->iparams_total;
180 	pool_p->ipool_defsz = paramsp->iparams_default;
181 	list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t),
182 	    offsetof(ddi_irm_req_t, ireq_link));
183 	list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t),
184 	    offsetof(ddi_irm_req_t, ireq_scratch_link));
185 	cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL);
186 	mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL);
187 	mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL);
188 
189 	/* Add to global list of pools */
190 	mutex_enter(&irm_pools_lock);
191 	list_insert_tail(&irm_pools_list, pool_p);
192 	mutex_exit(&irm_pools_lock);
193 
194 	/* If IRM is active, then activate the pool */
195 	if (irm_active)
196 		pool_p->ipool_thread = thread_create(NULL, 0,
197 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
198 
199 	*pool_retp = pool_p;
200 	return (NDI_SUCCESS);
201 }
202 
203 /*
204  * ndi_irm_destroy()
205  *
206  *	Nexus interface to destroy an IRM pool.  Destroy the pool
207  *	and remove it from the global list of interrupt pools.
208  */
209 int
210 ndi_irm_destroy(ddi_irm_pool_t *pool_p)
211 {
212 	ASSERT(pool_p != NULL);
213 	ASSERT(pool_p->ipool_resno == 0);
214 
215 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n",
216 	    (void *)pool_p));
217 
218 	/* Validate parameters */
219 	if (pool_p == NULL)
220 		return (NDI_FAILURE);
221 
222 	/* Validate that pool is empty */
223 	if (pool_p->ipool_resno != 0)
224 		return (NDI_BUSY);
225 
226 	/* Remove the pool from the global list */
227 	mutex_enter(&irm_pools_lock);
228 	list_remove(&irm_pools_list, pool_p);
229 	mutex_exit(&irm_pools_lock);
230 
231 	/* Terminate the balancing thread */
232 	mutex_enter(&pool_p->ipool_lock);
233 	if (pool_p->ipool_thread &&
234 	    (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) {
235 		pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT;
236 		cv_signal(&pool_p->ipool_cv);
237 		thread_join(pool_p->ipool_thread->t_did);
238 	}
239 	mutex_exit(&pool_p->ipool_lock);
240 
241 	/* Destroy the pool */
242 	cv_destroy(&pool_p->ipool_cv);
243 	mutex_destroy(&pool_p->ipool_lock);
244 	mutex_destroy(&pool_p->ipool_navail_lock);
245 	list_destroy(&pool_p->ipool_req_list);
246 	list_destroy(&pool_p->ipool_scratch_list);
247 	kmem_free(pool_p, sizeof (ddi_irm_pool_t));
248 
249 	return (NDI_SUCCESS);
250 }
251 
252 /*
253  * Insert/Modify/Remove Interrupt Requests
254  */
255 
256 /*
257  * i_ddi_irm_insert()
258  *
259  *	Insert a new request into an interrupt pool, and balance the pool.
260  */
261 int
262 i_ddi_irm_insert(dev_info_t *dip, int type, int count)
263 {
264 	ddi_cb_t	*cb_p;
265 	ddi_irm_req_t	*req_p;
266 	devinfo_intr_t	*intr_p;
267 	ddi_irm_pool_t	*pool_p;
268 	uint_t		nreq, nmin, npartial;
269 	boolean_t	irm_flag = B_FALSE;
270 
271 	ASSERT(dip != NULL);
272 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
273 	ASSERT(count > 0);
274 
275 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n",
276 	    (void *)dip, type, count));
277 
278 	/* Validate parameters */
279 	if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) {
280 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n"));
281 		return (DDI_EINVAL);
282 	}
283 
284 	/* Check for an existing request */
285 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
286 	    (intr_p->devi_irm_req_p != NULL))
287 		return (DDI_SUCCESS);
288 
289 	/* Check for IRM support from the system */
290 	if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) {
291 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n"));
292 		return (DDI_ENOTSUP);
293 	}
294 
295 	/* Check for IRM support from the driver */
296 	if (((cb_p = DEVI(dip)->devi_cb_p) != NULL) && DDI_IRM_HAS_CB(cb_p) &&
297 	    (type == DDI_INTR_TYPE_MSIX))
298 		irm_flag = B_TRUE;
299 
300 	/* Determine request size */
301 	nreq = (irm_flag) ? count : i_ddi_intr_get_current_navail(dip, type);
302 	nmin = (irm_flag) ? 1 : nreq;
303 	npartial = MIN(nreq, pool_p->ipool_defsz);
304 
305 	/* Allocate and initialize the request */
306 	req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP);
307 	req_p->ireq_type = type;
308 	req_p->ireq_dip = dip;
309 	req_p->ireq_pool_p = pool_p;
310 	req_p->ireq_nreq = nreq;
311 	req_p->ireq_flags = DDI_IRM_FLAG_NEW;
312 	if (DDI_IRM_HAS_CB(cb_p))
313 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
314 
315 	/* Lock the pool */
316 	mutex_enter(&pool_p->ipool_lock);
317 
318 	/* Check for minimal fit before inserting */
319 	if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) {
320 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
321 		    ddi_driver_name(dip), ddi_get_instance(dip));
322 		mutex_exit(&pool_p->ipool_lock);
323 		kmem_free(req_p, sizeof (ddi_irm_req_t));
324 		return (DDI_EAGAIN);
325 	}
326 
327 	/* Insert the request into the pool */
328 	pool_p->ipool_reqno += nreq;
329 	pool_p->ipool_minno += nmin;
330 	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
331 
332 	/*
333 	 * Try to fulfill the request.
334 	 *
335 	 * If all the interrupts are available, and either the request
336 	 * is static or the pool is active, then just take them directly.
337 	 *
338 	 * If only some of the interrupts are available, and the request
339 	 * can receive future callbacks, then take some now but queue the
340 	 * pool to be rebalanced later.
341 	 *
342 	 * Otherwise, immediately rebalance the pool and wait.
343 	 */
344 	if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) &&
345 	    ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) {
346 
347 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
348 		    "request completely fulfilled.\n"));
349 		pool_p->ipool_resno += nreq;
350 		req_p->ireq_navail = nreq;
351 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
352 
353 	} else if (irm_flag &&
354 	    ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) {
355 
356 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
357 		    "request partially fulfilled.\n"));
358 		pool_p->ipool_resno += npartial;
359 		req_p->ireq_navail = npartial;
360 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
361 		i_ddi_irm_enqueue(pool_p, B_FALSE);
362 
363 	} else {
364 
365 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
366 		    "request needs immediate rebalance.\n"));
367 		i_ddi_irm_enqueue(pool_p, B_TRUE);
368 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
369 	}
370 
371 	/* Fail if the request cannot be fulfilled at all */
372 	if (req_p->ireq_navail == 0) {
373 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
374 		    ddi_driver_name(dip), ddi_get_instance(dip));
375 		mutex_exit(&pool_p->ipool_lock);
376 		pool_p->ipool_reqno -= nreq;
377 		pool_p->ipool_minno -= nmin;
378 		list_remove(&pool_p->ipool_req_list, req_p);
379 		kmem_free(req_p, sizeof (ddi_irm_req_t));
380 		return (DDI_EAGAIN);
381 	}
382 
383 	/* Unlock the pool */
384 	mutex_exit(&pool_p->ipool_lock);
385 
386 	intr_p->devi_irm_req_p = req_p;
387 	return (DDI_SUCCESS);
388 }
389 
390 /*
391  * i_ddi_irm_modify()
392  *
393  *	Modify an existing request in an interrupt pool, and balance the pool.
394  */
395 int
396 i_ddi_irm_modify(dev_info_t *dip, int nreq)
397 {
398 	devinfo_intr_t	*intr_p;
399 	ddi_irm_req_t	*req_p;
400 	ddi_irm_pool_t	*pool_p;
401 
402 	ASSERT(dip != NULL);
403 
404 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n",
405 	    (void *)dip, nreq));
406 
407 	/* Validate parameters */
408 	if ((dip == NULL) || (nreq < 1)) {
409 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
410 		return (DDI_EINVAL);
411 	}
412 
413 	/* Check that the operation is supported */
414 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
415 	    !(req_p = intr_p->devi_irm_req_p) ||
416 	    !DDI_IRM_IS_REDUCIBLE(req_p)) {
417 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not supported\n"));
418 		return (DDI_ENOTSUP);
419 	}
420 
421 	/* Validate request size is not too large */
422 	if (nreq > intr_p->devi_intr_sup_nintrs) {
423 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
424 		return (DDI_EINVAL);
425 	}
426 
427 	/*
428 	 * Modify request, but only if new size is different.
429 	 */
430 	if (nreq != req_p->ireq_nreq) {
431 
432 		/* Lock the pool */
433 		pool_p = req_p->ireq_pool_p;
434 		mutex_enter(&pool_p->ipool_lock);
435 
436 		/* Update pool and request */
437 		pool_p->ipool_reqno -= req_p->ireq_nreq;
438 		pool_p->ipool_reqno += nreq;
439 		req_p->ireq_nreq = nreq;
440 
441 		/* Re-sort request in the pool */
442 		list_remove(&pool_p->ipool_req_list, req_p);
443 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
444 
445 		/* Queue pool to be rebalanced */
446 		i_ddi_irm_enqueue(pool_p, B_FALSE);
447 
448 		/* Unlock the pool */
449 		mutex_exit(&pool_p->ipool_lock);
450 	}
451 
452 	return (DDI_SUCCESS);
453 }
454 
455 /*
456  * i_ddi_irm_remove()
457  *
458  *	Remove a request from an interrupt pool, and balance the pool.
459  */
460 int
461 i_ddi_irm_remove(dev_info_t *dip)
462 {
463 	devinfo_intr_t	*intr_p;
464 	ddi_irm_pool_t	*pool_p;
465 	ddi_irm_req_t	*req_p;
466 	uint_t		nmin;
467 
468 	ASSERT(dip != NULL);
469 
470 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip));
471 
472 	/* Validate parameters */
473 	if (dip == NULL) {
474 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n"));
475 		return (DDI_EINVAL);
476 	}
477 
478 	/* Check if the device has a request */
479 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
480 	    !(req_p = intr_p->devi_irm_req_p)) {
481 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n"));
482 		return (DDI_EINVAL);
483 	}
484 
485 	/* Lock the pool */
486 	pool_p = req_p->ireq_pool_p;
487 	mutex_enter(&pool_p->ipool_lock);
488 
489 	/* Remove request */
490 	nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq;
491 	pool_p->ipool_minno -= nmin;
492 	pool_p->ipool_reqno -= req_p->ireq_nreq;
493 	pool_p->ipool_resno -= req_p->ireq_navail;
494 	list_remove(&pool_p->ipool_req_list, req_p);
495 
496 	/* Queue pool to be rebalanced */
497 	i_ddi_irm_enqueue(pool_p, B_FALSE);
498 
499 	/* Unlock the pool */
500 	mutex_exit(&pool_p->ipool_lock);
501 
502 	/* Destroy the request */
503 	intr_p->devi_irm_req_p = NULL;
504 	kmem_free(req_p, sizeof (ddi_irm_req_t));
505 
506 	return (DDI_SUCCESS);
507 }
508 
509 /*
510  * i_ddi_irm_set_cb()
511  *
512  *	Change the callback flag for a request, in response to
513  *	a change in its callback registration.  Then rebalance
514  *	the interrupt pool.
515  *
516  *	NOTE: the request is not locked because the navail value
517  *	      is not directly affected.  The balancing thread may
518  *	      modify the navail value in the background after it
519  *	      locks the request itself.
520  */
521 void
522 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag)
523 {
524 	devinfo_intr_t	*intr_p;
525 	ddi_irm_pool_t	*pool_p;
526 	ddi_irm_req_t	*req_p;
527 	uint_t		nreq;
528 
529 	ASSERT(dip != NULL);
530 
531 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n",
532 	    (void *)dip, (int)has_cb_flag));
533 
534 	/* Validate parameters */
535 	if (dip == NULL)
536 		return;
537 
538 	/* Check for association with interrupt pool */
539 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
540 	    !(req_p = intr_p->devi_irm_req_p)) {
541 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n"));
542 		return;
543 	}
544 
545 	/* Lock the pool */
546 	pool_p = req_p->ireq_pool_p;
547 	mutex_enter(&pool_p->ipool_lock);
548 
549 	/*
550 	 * Update the request and the pool
551 	 */
552 	if (has_cb_flag) {
553 
554 		/* Update pool statistics */
555 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
556 			pool_p->ipool_minno -= (req_p->ireq_nreq - 1);
557 
558 		/* Update request */
559 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
560 
561 		/* Rebalance in background */
562 		i_ddi_irm_enqueue(pool_p, B_FALSE);
563 
564 	} else {
565 
566 		/* Determine new request size */
567 		nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz);
568 
569 		/* Update pool statistics */
570 		pool_p->ipool_reqno -= req_p->ireq_nreq;
571 		pool_p->ipool_reqno += nreq;
572 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) {
573 			pool_p->ipool_minno -= 1;
574 			pool_p->ipool_minno += nreq;
575 		} else {
576 			pool_p->ipool_minno -= req_p->ireq_nreq;
577 			pool_p->ipool_minno += nreq;
578 		}
579 
580 		/* Update request size, and re-sort in pool */
581 		req_p->ireq_nreq = nreq;
582 		list_remove(&pool_p->ipool_req_list, req_p);
583 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
584 
585 		/* Rebalance synchronously, before losing callback */
586 		i_ddi_irm_enqueue(pool_p, B_TRUE);
587 
588 		/* Remove callback flag */
589 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK);
590 	}
591 
592 	/* Unlock the pool */
593 	mutex_exit(&pool_p->ipool_lock);
594 }
595 
596 /*
597  * Interrupt Pool Balancing
598  */
599 
600 /*
601  * irm_balance_thread()
602  *
603  *	One instance of this thread operates per each defined IRM pool.
604  *	It does the initial activation of the pool, as well as balancing
605  *	any requests that were queued up before the pool was active.
606  *	Once active, it waits forever to service balance operations.
607  */
608 static void
609 irm_balance_thread(ddi_irm_pool_t *pool_p)
610 {
611 	clock_t		interval, wakeup;
612 
613 	DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n",
614 	    (void *)pool_p));
615 
616 	/* Lock the pool */
617 	mutex_enter(&pool_p->ipool_lock);
618 
619 	/* Perform initial balance if required */
620 	if (pool_p->ipool_reqno > pool_p->ipool_resno)
621 		i_ddi_irm_balance(pool_p);
622 
623 	/* Activate the pool */
624 	pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE;
625 
626 	/* Main loop */
627 	for (;;) {
628 
629 		/* Compute the delay interval */
630 		interval = drv_usectohz(irm_balance_delay * 1000000);
631 
632 		/* Sleep until queued */
633 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
634 
635 		DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n"));
636 
637 		/* Wait one interval, or until there are waiters */
638 		if ((interval > 0) &&
639 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) &&
640 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) {
641 			wakeup = ddi_get_lbolt() + interval;
642 			(void) cv_timedwait(&pool_p->ipool_cv,
643 			    &pool_p->ipool_lock, wakeup);
644 		}
645 
646 		/* Check if awakened to exit */
647 		if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) {
648 			DDI_INTR_IRMDBG((CE_CONT,
649 			    "irm_balance_thread: exiting...\n"));
650 			mutex_exit(&pool_p->ipool_lock);
651 			thread_exit();
652 		}
653 
654 		/* Balance the pool */
655 		i_ddi_irm_balance(pool_p);
656 
657 		/* Notify waiters */
658 		if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) {
659 			cv_broadcast(&pool_p->ipool_cv);
660 			pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS);
661 		}
662 
663 		/* Clear QUEUED condition */
664 		pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED);
665 	}
666 }
667 
668 /*
669  * i_ddi_irm_balance()
670  *
671  *	Balance a pool.  The general algorithm is to first reset all
672  *	requests to their maximum size, use reduction algorithms to
673  *	solve any imbalance, and then notify affected drivers.
674  */
675 static void
676 i_ddi_irm_balance(ddi_irm_pool_t *pool_p)
677 {
678 	ddi_irm_req_t	*req_p;
679 
680 #ifdef	DEBUG
681 	uint_t		debug_totsz = 0;
682 	int		debug_policy = 0;
683 #endif	/* DEBUG */
684 
685 	ASSERT(pool_p != NULL);
686 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
687 
688 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n",
689 	    (void *)pool_p));
690 
691 #ifdef	DEBUG	/* Adjust size and policy settings */
692 	if (irm_debug_size > pool_p->ipool_minno) {
693 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n",
694 		    irm_debug_size));
695 		debug_totsz = pool_p->ipool_totsz;
696 		pool_p->ipool_totsz = irm_debug_size;
697 	}
698 	if (DDI_IRM_POLICY_VALID(irm_debug_policy)) {
699 		DDI_INTR_IRMDBG((CE_CONT,
700 		    "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy));
701 		debug_policy = pool_p->ipool_policy;
702 		pool_p->ipool_policy = irm_debug_policy;
703 	}
704 #endif	/* DEBUG */
705 
706 	/* Lock the availability lock */
707 	mutex_enter(&pool_p->ipool_navail_lock);
708 
709 	/*
710 	 * Put all of the reducible requests into a scratch list.
711 	 * Reset each one of them to their maximum availability.
712 	 */
713 	for (req_p = list_head(&pool_p->ipool_req_list); req_p;
714 	    req_p = list_next(&pool_p->ipool_req_list, req_p)) {
715 		if (DDI_IRM_IS_REDUCIBLE(req_p)) {
716 			pool_p->ipool_resno -= req_p->ireq_navail;
717 			req_p->ireq_scratch = req_p->ireq_navail;
718 			req_p->ireq_navail = req_p->ireq_nreq;
719 			pool_p->ipool_resno += req_p->ireq_navail;
720 			list_insert_tail(&pool_p->ipool_scratch_list, req_p);
721 		}
722 	}
723 
724 	/* Balance the requests */
725 	i_ddi_irm_reduce(pool_p);
726 
727 	/* Unlock the availability lock */
728 	mutex_exit(&pool_p->ipool_navail_lock);
729 
730 	/*
731 	 * Process REMOVE notifications.
732 	 *
733 	 * If a driver fails to release interrupts: exclude it from
734 	 * further processing, correct the resulting imbalance, and
735 	 * start over again at the head of the scratch list.
736 	 */
737 	req_p = list_head(&pool_p->ipool_scratch_list);
738 	while (req_p) {
739 		if ((req_p->ireq_navail < req_p->ireq_scratch) &&
740 		    (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) {
741 			list_remove(&pool_p->ipool_scratch_list, req_p);
742 			mutex_enter(&pool_p->ipool_navail_lock);
743 			i_ddi_irm_reduce(pool_p);
744 			mutex_exit(&pool_p->ipool_navail_lock);
745 			req_p = list_head(&pool_p->ipool_scratch_list);
746 		} else {
747 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
748 		}
749 	}
750 
751 	/*
752 	 * Process ADD notifications.
753 	 *
754 	 * This is the last use of the scratch list, so empty it.
755 	 */
756 	while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) {
757 		if (req_p->ireq_navail > req_p->ireq_scratch) {
758 			(void) i_ddi_irm_notify(pool_p, req_p);
759 		}
760 	}
761 
762 #ifdef	DEBUG	/* Restore size and policy settings */
763 	if (debug_totsz != 0)
764 		pool_p->ipool_totsz = debug_totsz;
765 	if (debug_policy != 0)
766 		pool_p->ipool_policy = debug_policy;
767 #endif	/* DEBUG */
768 }
769 
770 /*
771  * i_ddi_irm_reduce()
772  *
773  *	Use reduction algorithms to correct an imbalance in a pool.
774  */
775 static void
776 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p)
777 {
778 	int	ret, imbalance;
779 
780 	ASSERT(pool_p != NULL);
781 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
782 	ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy));
783 
784 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n",
785 	    (void *)pool_p));
786 
787 	/* Compute the imbalance.  Do nothing if already balanced. */
788 	if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0)
789 		return;
790 
791 	/* Reduce by policy */
792 	switch (pool_p->ipool_policy) {
793 	case DDI_IRM_POLICY_LARGE:
794 		ret = i_ddi_irm_reduce_large(pool_p, imbalance);
795 		break;
796 	case DDI_IRM_POLICY_EVEN:
797 		ret = i_ddi_irm_reduce_even(pool_p, imbalance);
798 		break;
799 	}
800 
801 	/*
802 	 * If the policy based reductions failed, then
803 	 * possibly reduce new requests as a last resort.
804 	 */
805 	if (ret != DDI_SUCCESS) {
806 
807 		DDI_INTR_IRMDBG((CE_CONT,
808 		    "i_ddi_irm_reduce: policy reductions failed.\n"));
809 
810 		/* Compute remaining imbalance */
811 		imbalance = pool_p->ipool_resno - pool_p->ipool_totsz;
812 
813 		ASSERT(imbalance > 0);
814 
815 		i_ddi_irm_reduce_new(pool_p, imbalance);
816 	}
817 }
818 
819 /*
820  * i_ddi_irm_enqueue()
821  *
822  *	Queue a pool to be balanced.  Signals the balancing thread to wake
823  *	up and process the pool.  If 'wait_flag' is true, then the current
824  *	thread becomes a waiter and blocks until the balance is completed.
825  */
826 static void
827 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag)
828 {
829 	ASSERT(pool_p != NULL);
830 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
831 
832 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n",
833 	    (void *)pool_p, (int)wait_flag));
834 
835 	/* Do nothing if pool is already balanced */
836 #ifndef	DEBUG
837 	if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
838 #else
839 	if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
840 #endif	/* DEBUG */
841 		DDI_INTR_IRMDBG((CE_CONT,
842 		    "i_ddi_irm_enqueue: pool already balanced\n"));
843 		return;
844 	}
845 
846 	/* Avoid deadlocks when IRM is not active */
847 	if (!irm_active && wait_flag) {
848 		DDI_INTR_IRMDBG((CE_CONT,
849 		    "i_ddi_irm_enqueue: pool not active.\n"));
850 		return;
851 	}
852 
853 	if (wait_flag)
854 		pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS;
855 
856 	if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) {
857 		pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED;
858 		cv_signal(&pool_p->ipool_cv);
859 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n"));
860 	}
861 
862 	if (wait_flag) {
863 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n"));
864 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
865 	}
866 }
867 
868 /*
869  * Reduction Algorithms, Used For Balancing
870  */
871 
872 /*
873  * i_ddi_irm_reduce_large()
874  *
875  *	Algorithm for the DDI_IRM_POLICY_LARGE reduction policy.
876  *
877  *	This algorithm generally reduces larger requests first, before
878  *	advancing to smaller requests.  The scratch list is initially
879  *	sorted in descending order by current navail values, which are
880  *	maximized prior to reduction.  This sorted order is preserved,
881  *	but within a range of equally sized requests they are secondarily
882  *	sorted in ascending order by initial nreq value.  The head of the
883  *	list is always selected for reduction, since it is the current
884  *	largest request.  After being reduced, it is sorted further into
885  *	the list before the next iteration.
886  *
887  *	Optimizations in this algorithm include trying to reduce multiple
888  *	requests together if they are equally sized.  And the algorithm
889  *	attempts to reduce in larger increments when possible to minimize
890  *	the total number of iterations.
891  */
892 static int
893 i_ddi_irm_reduce_large(ddi_irm_pool_t *pool_p, int imbalance)
894 {
895 	ddi_irm_req_t	*req_p, *next_p;
896 	int		nreqs, reduction;
897 
898 	ASSERT(pool_p != NULL);
899 	ASSERT(imbalance > 0);
900 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
901 
902 	DDI_INTR_IRMDBG((CE_CONT,
903 	    "i_ddi_irm_reduce_large: pool_p %p imbalance %d\n", (void *)pool_p,
904 	    imbalance));
905 
906 	while (imbalance > 0) {
907 
908 		req_p = list_head(&pool_p->ipool_scratch_list);
909 		next_p = list_next(&pool_p->ipool_scratch_list, req_p);
910 
911 		/* Fail if nothing is reducible */
912 		if (req_p->ireq_navail == 1) {
913 			DDI_INTR_IRMDBG((CE_CONT,
914 			    "i_ddi_irm_reduce_large: failure.\n"));
915 			return (DDI_FAILURE);
916 		}
917 
918 		/* Count the number of equally sized requests */
919 		nreqs = 1;
920 		while (next_p && (req_p->ireq_navail == next_p->ireq_navail)) {
921 			next_p = list_next(&pool_p->ipool_scratch_list, next_p);
922 			nreqs++;
923 		}
924 
925 		/* Try to reduce multiple requests together */
926 		if (nreqs > 1) {
927 
928 			if (next_p) {
929 				reduction = req_p->ireq_navail -
930 				    (next_p->ireq_navail + 1);
931 			} else {
932 				reduction = req_p->ireq_navail - 1;
933 			}
934 
935 			if ((reduction * nreqs) > imbalance)
936 				reduction = imbalance / nreqs;
937 
938 			if (reduction > 0) {
939 				while (req_p && (req_p != next_p)) {
940 					imbalance -= reduction;
941 					req_p->ireq_navail -= reduction;
942 					pool_p->ipool_resno -= reduction;
943 					req_p = list_next(
944 					    &pool_p->ipool_scratch_list, req_p);
945 				}
946 				continue;
947 			}
948 		}
949 
950 		/* Or just reduce the current request */
951 		next_p = list_next(&pool_p->ipool_scratch_list, req_p);
952 		if (next_p && (req_p->ireq_navail > next_p->ireq_navail)) {
953 			reduction = req_p->ireq_navail - next_p->ireq_navail;
954 			reduction = MIN(reduction, imbalance);
955 		} else {
956 			reduction = 1;
957 		}
958 		imbalance -= reduction;
959 		req_p->ireq_navail -= reduction;
960 		pool_p->ipool_resno -= reduction;
961 
962 		/* Re-sort the scratch list if not yet finished */
963 		if (imbalance > 0) {
964 			i_ddi_irm_reduce_large_resort(pool_p);
965 		}
966 	}
967 
968 	return (DDI_SUCCESS);
969 }
970 
971 /*
972  * i_ddi_irm_reduce_large_resort()
973  *
974  *	Helper function for i_ddi_irm_reduce_large().  Once a request
975  *	is reduced, this resorts it further down into the list as necessary.
976  */
977 static void
978 i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *pool_p)
979 {
980 	ddi_irm_req_t	*req_p, *next_p;
981 
982 	ASSERT(pool_p != NULL);
983 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
984 
985 	req_p = list_remove_head(&pool_p->ipool_scratch_list);
986 	next_p = list_head(&pool_p->ipool_scratch_list);
987 
988 	while (next_p &&
989 	    ((next_p->ireq_navail > req_p->ireq_navail) ||
990 	    ((next_p->ireq_navail == req_p->ireq_navail) &&
991 	    (next_p->ireq_nreq < req_p->ireq_nreq))))
992 		next_p = list_next(&pool_p->ipool_scratch_list, next_p);
993 
994 	list_insert_before(&pool_p->ipool_scratch_list, next_p, req_p);
995 }
996 
997 /*
998  * i_ddi_irm_reduce_even()
999  *
1000  *	Algorithm for the DDI_IRM_POLICY_EVEN reduction policy.
1001  *
1002  *	This algorithm reduces requests evenly, without giving a
1003  *	specific preference to smaller or larger requests.  Each
1004  *	iteration reduces all reducible requests by the same amount
1005  *	until the imbalance is corrected.  Although when possible,
1006  *	it tries to avoid reducing requests below the threshold of
1007  *	the interrupt pool's default allocation size.
1008  *
1009  *	An optimization in this algorithm is to reduce the requests
1010  *	in larger increments during each iteration, to minimize the
1011  *	total number of iterations required.
1012  */
1013 static int
1014 i_ddi_irm_reduce_even(ddi_irm_pool_t *pool_p, int imbalance)
1015 {
1016 	ddi_irm_req_t	*req_p, *last_p;
1017 	uint_t		nmin = pool_p->ipool_defsz;
1018 	uint_t		nreduce, reduction;
1019 
1020 	ASSERT(pool_p != NULL);
1021 	ASSERT(imbalance > 0);
1022 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1023 
1024 	DDI_INTR_IRMDBG((CE_CONT,
1025 	    "i_ddi_irm_reduce_even: pool_p %p imbalance %d\n",
1026 	    (void *)pool_p, imbalance));
1027 
1028 	while ((nmin > 0) && (imbalance > 0)) {
1029 
1030 		/* Count reducible requests */
1031 		nreduce = 0;
1032 		for (req_p = list_head(&pool_p->ipool_scratch_list); req_p;
1033 		    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1034 			if (req_p->ireq_navail <= nmin)
1035 				break;
1036 			last_p = req_p;
1037 			nreduce++;
1038 		}
1039 
1040 		/* If none are reducible, try a lower minimum */
1041 		if (nreduce == 0) {
1042 			nmin--;
1043 			continue;
1044 		}
1045 
1046 		/* Compute reduction */
1047 		if (nreduce < imbalance) {
1048 			reduction = last_p->ireq_navail - nmin;
1049 			if ((reduction * nreduce) > imbalance) {
1050 				reduction = imbalance / nreduce;
1051 			}
1052 		} else {
1053 			reduction = 1;
1054 		}
1055 
1056 		/* Start at head of list, but skip excess */
1057 		req_p = list_head(&pool_p->ipool_scratch_list);
1058 		while (nreduce > imbalance) {
1059 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
1060 			nreduce--;
1061 		}
1062 
1063 		/* Do reductions */
1064 		while (req_p && (nreduce > 0)) {
1065 			imbalance -= reduction;
1066 			req_p->ireq_navail -= reduction;
1067 			pool_p->ipool_resno -= reduction;
1068 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
1069 			nreduce--;
1070 		}
1071 	}
1072 
1073 	if (nmin == 0) {
1074 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce_even: failure.\n"));
1075 		return (DDI_FAILURE);
1076 	}
1077 
1078 	return (DDI_SUCCESS);
1079 }
1080 
1081 /*
1082  * i_ddi_irm_reduce_new()
1083  *
1084  *	Reduces new requests to zero.  This is only used as a
1085  *	last resort after another reduction algorithm failed.
1086  */
1087 static void
1088 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance)
1089 {
1090 	ddi_irm_req_t	*req_p;
1091 
1092 	ASSERT(pool_p != NULL);
1093 	ASSERT(imbalance > 0);
1094 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1095 
1096 	for (req_p = list_head(&pool_p->ipool_scratch_list);
1097 	    req_p && (imbalance > 0);
1098 	    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1099 		ASSERT(req_p->ireq_navail == 1);
1100 		if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) {
1101 			req_p->ireq_navail--;
1102 			pool_p->ipool_resno--;
1103 			imbalance--;
1104 		}
1105 	}
1106 }
1107 
1108 /*
1109  * Miscellaneous Helper Functions
1110  */
1111 
1112 /*
1113  * i_ddi_intr_get_pool()
1114  *
1115  *	Get an IRM pool that supplies interrupts of a specified type.
1116  *	Invokes a DDI_INTROP_GETPOOL to the bus nexus driver.  Fails
1117  *	if no pool exists.
1118  */
1119 ddi_irm_pool_t *
1120 i_ddi_intr_get_pool(dev_info_t *dip, int type)
1121 {
1122 	devinfo_intr_t		*intr_p;
1123 	ddi_irm_pool_t		*pool_p;
1124 	ddi_irm_req_t		*req_p;
1125 	ddi_intr_handle_impl_t	hdl;
1126 
1127 	ASSERT(dip != NULL);
1128 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
1129 
1130 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
1131 	    ((req_p = intr_p->devi_irm_req_p) != NULL) &&
1132 	    ((pool_p = req_p->ireq_pool_p) != NULL) &&
1133 	    (pool_p->ipool_types & type)) {
1134 		return (pool_p);
1135 	}
1136 
1137 	bzero(&hdl, sizeof (ddi_intr_handle_impl_t));
1138 	hdl.ih_dip = dip;
1139 	hdl.ih_type = type;
1140 
1141 	if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL,
1142 	    &hdl, (void *)&pool_p) == DDI_SUCCESS)
1143 		return (pool_p);
1144 
1145 	return (NULL);
1146 }
1147 
1148 /*
1149  * i_ddi_irm_insertion_sort()
1150  *
1151  *	Use the insertion sort method to insert a request into a list.
1152  *	The list is sorted in descending order by request size.
1153  */
1154 static void
1155 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p)
1156 {
1157 	ddi_irm_req_t	*next_p;
1158 
1159 	next_p = list_head(req_list);
1160 
1161 	while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq))
1162 		next_p = list_next(req_list, next_p);
1163 
1164 	list_insert_before(req_list, next_p, req_p);
1165 }
1166 
1167 /*
1168  * i_ddi_irm_notify()
1169  *
1170  *	Notify a driver of changes to its interrupt request using the
1171  *	generic callback mechanism.  Checks for errors in processing.
1172  */
1173 static int
1174 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
1175 {
1176 	ddi_cb_action_t	action;
1177 	ddi_cb_t	*cb_p;
1178 	uint_t		nintrs;
1179 	int		ret, count;
1180 
1181 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n",
1182 	    (void *)pool_p, (void *)req_p));
1183 
1184 	/* Do not notify new or unchanged requests */
1185 	if ((req_p->ireq_navail == req_p->ireq_scratch) ||
1186 	    (req_p->ireq_flags & DDI_IRM_FLAG_NEW))
1187 		return (DDI_SUCCESS);
1188 
1189 	/* Determine action and count */
1190 	if (req_p->ireq_navail > req_p->ireq_scratch) {
1191 		action = DDI_CB_INTR_ADD;
1192 		count = req_p->ireq_navail - req_p->ireq_scratch;
1193 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n",
1194 		    count));
1195 	} else {
1196 		action = DDI_CB_INTR_REMOVE;
1197 		count = req_p->ireq_scratch - req_p->ireq_navail;
1198 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n",
1199 		    count));
1200 	}
1201 
1202 	/* Lookup driver callback */
1203 	if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) {
1204 		DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n"));
1205 		return (DDI_FAILURE);
1206 	}
1207 
1208 	/* Do callback */
1209 	ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count,
1210 	    cb_p->cb_arg1, cb_p->cb_arg2);
1211 
1212 	/* Log callback errors */
1213 	if (ret != DDI_SUCCESS) {
1214 		cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
1215 		    ddi_driver_name(req_p->ireq_dip),
1216 		    ddi_get_instance(req_p->ireq_dip), (int)action, ret);
1217 	}
1218 
1219 	/* Check if the driver exceeds its availability */
1220 	nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip);
1221 	if (nintrs > req_p->ireq_navail) {
1222 		cmn_err(CE_WARN, "%s%d: failed to release interrupts "
1223 		    "(nintrs=%d, navail=%d).\n",
1224 		    ddi_driver_name(req_p->ireq_dip),
1225 		    ddi_get_instance(req_p->ireq_dip), nintrs,
1226 		    req_p->ireq_navail);
1227 		pool_p->ipool_resno += (nintrs - req_p->ireq_navail);
1228 		req_p->ireq_navail = nintrs;
1229 		return (DDI_FAILURE);
1230 	}
1231 
1232 	/* Update request */
1233 	req_p->ireq_scratch = req_p->ireq_navail;
1234 
1235 	return (DDI_SUCCESS);
1236 }
1237 
1238 /*
1239  * i_ddi_irm_debug_balance()
1240  *
1241  *	A debug/test only routine to force the immediate,
1242  *	synchronous rebalancing of an interrupt pool.
1243  */
1244 #ifdef	DEBUG
1245 void
1246 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag)
1247 {
1248 	ddi_irm_pool_t	*pool_p;
1249 	int		type;
1250 
1251 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n",
1252 	    (void *)dip, (int)wait_flag));
1253 
1254 	if (((type = i_ddi_intr_get_current_type(dip)) != 0) &&
1255 	    ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) {
1256 		mutex_enter(&pool_p->ipool_lock);
1257 		i_ddi_irm_enqueue(pool_p, wait_flag);
1258 		mutex_exit(&pool_p->ipool_lock);
1259 	}
1260 }
1261 #endif
1262