xref: /titanic_41/usr/src/uts/common/os/ddi_intr_irm.c (revision 9a5d73e03cd3312ddb571a748c40a63c58bd66e5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/note.h>
27 #include <sys/sysmacros.h>
28 #include <sys/types.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kmem.h>
32 #include <sys/cmn_err.h>
33 #include <sys/debug.h>
34 #include <sys/ddi.h>
35 #include <sys/sunndi.h>
36 #include <sys/ndi_impldefs.h>	/* include prototypes */
37 
38 /*
39  * Interrupt Resource Management (IRM).
40  */
41 
42 #define	DDI_IRM_BALANCE_DELAY	(60)	/* In seconds */
43 
44 #define	DDI_IRM_HAS_CB(c)	((c) && (c->cb_flags & DDI_CB_FLAG_INTR))
45 
46 #define	DDI_IRM_IS_REDUCIBLE(r)	(((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \
47 				(r->ireq_type == DDI_INTR_TYPE_MSIX)) || \
48 				(r->ireq_flags & DDI_IRM_FLAG_NEW))
49 
50 extern pri_t	minclsyspri;
51 
52 /* Global policies */
53 int		irm_enable = 1;
54 boolean_t	irm_active = B_FALSE;
55 int		irm_default_policy = DDI_IRM_POLICY_LARGE;
56 uint_t		irm_balance_delay = DDI_IRM_BALANCE_DELAY;
57 
58 /* Global list of interrupt pools */
59 kmutex_t	irm_pools_lock;
60 list_t		irm_pools_list;
61 
62 /* Global debug tunables */
63 #ifdef	DEBUG
64 int		irm_debug_policy = 0;
65 uint_t		irm_debug_size = 0;
66 #endif	/* DEBUG */
67 
68 static void	irm_balance_thread(ddi_irm_pool_t *);
69 static void	i_ddi_irm_balance(ddi_irm_pool_t *);
70 static void	i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t);
71 static void	i_ddi_irm_reduce(ddi_irm_pool_t *pool);
72 static int	i_ddi_irm_reduce_large(ddi_irm_pool_t *, int);
73 static void	i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *);
74 static int	i_ddi_irm_reduce_even(ddi_irm_pool_t *, int);
75 static void	i_ddi_irm_reduce_new(ddi_irm_pool_t *, int);
76 static void	i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *);
77 static int	i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *);
78 
79 /*
80  * OS Initialization Routines
81  */
82 
83 /*
84  * irm_init()
85  *
86  *	Initialize IRM subsystem before any drivers are attached.
87  */
88 void
89 irm_init(void)
90 {
91 	/* Do nothing if IRM is disabled */
92 	if (!irm_enable)
93 		return;
94 
95 	/* Verify that the default balancing policy is valid */
96 	if (!DDI_IRM_POLICY_VALID(irm_default_policy))
97 		irm_default_policy = DDI_IRM_POLICY_LARGE;
98 
99 	/* Initialize the global list of interrupt pools */
100 	mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL);
101 	list_create(&irm_pools_list, sizeof (ddi_irm_pool_t),
102 	    offsetof(ddi_irm_pool_t, ipool_link));
103 }
104 
105 /*
106  * i_ddi_irm_poststartup()
107  *
108  *	IRM is not activated until after the IO subsystem is initialized.
109  *	When activated, per-pool balancing threads are spawned and a flag
110  *	is set so that all future pools will be activated when created.
111  *
112  *	NOTE: the global variable 'irm_enable' disables IRM if zero.
113  */
114 void
115 i_ddi_irm_poststartup(void)
116 {
117 	ddi_irm_pool_t	*pool_p;
118 
119 	/* Do nothing if IRM is disabled */
120 	if (!irm_enable)
121 		return;
122 
123 	/* Lock the global list */
124 	mutex_enter(&irm_pools_lock);
125 
126 	/* Activate all defined pools */
127 	for (pool_p = list_head(&irm_pools_list); pool_p;
128 	    pool_p = list_next(&irm_pools_list, pool_p))
129 		pool_p->ipool_thread = thread_create(NULL, 0,
130 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
131 
132 	/* Set future pools to be active */
133 	irm_active = B_TRUE;
134 
135 	/* Unlock the global list */
136 	mutex_exit(&irm_pools_lock);
137 }
138 
139 /*
140  * NDI interfaces for creating/destroying IRM pools.
141  */
142 
143 /*
144  * ndi_irm_create()
145  *
146  *	Nexus interface to create an IRM pool.  Create the new
147  *	pool and add it to the global list of interrupt pools.
148  */
149 int
150 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp,
151     ddi_irm_pool_t **pool_retp)
152 {
153 	ddi_irm_pool_t	*pool_p;
154 
155 	ASSERT(dip != NULL);
156 	ASSERT(paramsp != NULL);
157 	ASSERT(pool_retp != NULL);
158 	ASSERT(paramsp->iparams_total >= 1);
159 	ASSERT(paramsp->iparams_types != 0);
160 	ASSERT(paramsp->iparams_default >= 1);
161 
162 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip));
163 
164 	/* Check if IRM is enabled */
165 	if (!irm_enable)
166 		return (NDI_FAILURE);
167 
168 	/* Validate parameters */
169 	if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) ||
170 	    (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0) ||
171 	    (paramsp->iparams_default < 1))
172 		return (NDI_FAILURE);
173 
174 	/* Allocate and initialize the pool */
175 	pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP);
176 	pool_p->ipool_owner = dip;
177 	pool_p->ipool_policy = irm_default_policy;
178 	pool_p->ipool_types = paramsp->iparams_types;
179 	pool_p->ipool_totsz = paramsp->iparams_total;
180 	pool_p->ipool_defsz = paramsp->iparams_default;
181 	list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t),
182 	    offsetof(ddi_irm_req_t, ireq_link));
183 	list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t),
184 	    offsetof(ddi_irm_req_t, ireq_scratch_link));
185 	cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL);
186 	mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL);
187 	mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL);
188 
189 	/* Add to global list of pools */
190 	mutex_enter(&irm_pools_lock);
191 	list_insert_tail(&irm_pools_list, pool_p);
192 	mutex_exit(&irm_pools_lock);
193 
194 	/* If IRM is active, then activate the pool */
195 	if (irm_active)
196 		pool_p->ipool_thread = thread_create(NULL, 0,
197 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
198 
199 	*pool_retp = pool_p;
200 	return (NDI_SUCCESS);
201 }
202 
203 /*
204  * ndi_irm_destroy()
205  *
206  *	Nexus interface to destroy an IRM pool.  Destroy the pool
207  *	and remove it from the global list of interrupt pools.
208  */
209 int
210 ndi_irm_destroy(ddi_irm_pool_t *pool_p)
211 {
212 	ASSERT(pool_p != NULL);
213 	ASSERT(pool_p->ipool_resno == 0);
214 
215 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n",
216 	    (void *)pool_p));
217 
218 	/* Validate parameters */
219 	if (pool_p == NULL)
220 		return (NDI_FAILURE);
221 
222 	/* Validate that pool is empty */
223 	if (pool_p->ipool_resno != 0)
224 		return (NDI_BUSY);
225 
226 	/* Remove the pool from the global list */
227 	mutex_enter(&irm_pools_lock);
228 	list_remove(&irm_pools_list, pool_p);
229 	mutex_exit(&irm_pools_lock);
230 
231 	/* Terminate the balancing thread */
232 	mutex_enter(&pool_p->ipool_lock);
233 	if (pool_p->ipool_thread &&
234 	    (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) {
235 		pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT;
236 		cv_signal(&pool_p->ipool_cv);
237 		mutex_exit(&pool_p->ipool_lock);
238 		thread_join(pool_p->ipool_thread->t_did);
239 	} else
240 		mutex_exit(&pool_p->ipool_lock);
241 
242 	/* Destroy the pool */
243 	cv_destroy(&pool_p->ipool_cv);
244 	mutex_destroy(&pool_p->ipool_lock);
245 	mutex_destroy(&pool_p->ipool_navail_lock);
246 	list_destroy(&pool_p->ipool_req_list);
247 	list_destroy(&pool_p->ipool_scratch_list);
248 	kmem_free(pool_p, sizeof (ddi_irm_pool_t));
249 
250 	return (NDI_SUCCESS);
251 }
252 
253 /*
254  * Insert/Modify/Remove Interrupt Requests
255  */
256 
257 /*
258  * i_ddi_irm_insert()
259  *
260  *	Insert a new request into an interrupt pool, and balance the pool.
261  */
262 int
263 i_ddi_irm_insert(dev_info_t *dip, int type, int count)
264 {
265 	ddi_cb_t	*cb_p;
266 	ddi_irm_req_t	*req_p;
267 	devinfo_intr_t	*intr_p;
268 	ddi_irm_pool_t	*pool_p;
269 	uint_t		nreq, nmin, npartial;
270 	boolean_t	irm_flag = B_FALSE;
271 
272 	ASSERT(dip != NULL);
273 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
274 	ASSERT(count > 0);
275 
276 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n",
277 	    (void *)dip, type, count));
278 
279 	/* Validate parameters */
280 	if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) {
281 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n"));
282 		return (DDI_EINVAL);
283 	}
284 
285 	/* Check for an existing request */
286 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
287 	    (intr_p->devi_irm_req_p != NULL))
288 		return (DDI_SUCCESS);
289 
290 	/* Check for IRM support from the system */
291 	if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) {
292 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n"));
293 		return (DDI_ENOTSUP);
294 	}
295 
296 	/* Check for IRM support from the driver */
297 	if (((cb_p = DEVI(dip)->devi_cb_p) != NULL) && DDI_IRM_HAS_CB(cb_p) &&
298 	    (type == DDI_INTR_TYPE_MSIX))
299 		irm_flag = B_TRUE;
300 
301 	/* Determine request size */
302 	nreq = (irm_flag) ? count : i_ddi_intr_get_current_navail(dip, type);
303 	nmin = (irm_flag) ? 1 : nreq;
304 	npartial = MIN(nreq, pool_p->ipool_defsz);
305 
306 	/* Allocate and initialize the request */
307 	req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP);
308 	req_p->ireq_type = type;
309 	req_p->ireq_dip = dip;
310 	req_p->ireq_pool_p = pool_p;
311 	req_p->ireq_nreq = nreq;
312 	req_p->ireq_flags = DDI_IRM_FLAG_NEW;
313 	if (DDI_IRM_HAS_CB(cb_p))
314 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
315 
316 	/* Lock the pool */
317 	mutex_enter(&pool_p->ipool_lock);
318 
319 	/* Check for minimal fit before inserting */
320 	if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) {
321 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
322 		    ddi_driver_name(dip), ddi_get_instance(dip));
323 		mutex_exit(&pool_p->ipool_lock);
324 		kmem_free(req_p, sizeof (ddi_irm_req_t));
325 		return (DDI_EAGAIN);
326 	}
327 
328 	/* Insert the request into the pool */
329 	pool_p->ipool_reqno += nreq;
330 	pool_p->ipool_minno += nmin;
331 	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
332 
333 	/*
334 	 * Try to fulfill the request.
335 	 *
336 	 * If all the interrupts are available, and either the request
337 	 * is static or the pool is active, then just take them directly.
338 	 *
339 	 * If only some of the interrupts are available, and the request
340 	 * can receive future callbacks, then take some now but queue the
341 	 * pool to be rebalanced later.
342 	 *
343 	 * Otherwise, immediately rebalance the pool and wait.
344 	 */
345 	if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) &&
346 	    ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) {
347 
348 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
349 		    "request completely fulfilled.\n"));
350 		pool_p->ipool_resno += nreq;
351 		req_p->ireq_navail = nreq;
352 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
353 
354 	} else if (irm_flag &&
355 	    ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) {
356 
357 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
358 		    "request partially fulfilled.\n"));
359 		pool_p->ipool_resno += npartial;
360 		req_p->ireq_navail = npartial;
361 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
362 		i_ddi_irm_enqueue(pool_p, B_FALSE);
363 
364 	} else {
365 
366 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
367 		    "request needs immediate rebalance.\n"));
368 		i_ddi_irm_enqueue(pool_p, B_TRUE);
369 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
370 	}
371 
372 	/* Fail if the request cannot be fulfilled at all */
373 	if (req_p->ireq_navail == 0) {
374 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
375 		    ddi_driver_name(dip), ddi_get_instance(dip));
376 		mutex_exit(&pool_p->ipool_lock);
377 		pool_p->ipool_reqno -= nreq;
378 		pool_p->ipool_minno -= nmin;
379 		list_remove(&pool_p->ipool_req_list, req_p);
380 		kmem_free(req_p, sizeof (ddi_irm_req_t));
381 		return (DDI_EAGAIN);
382 	}
383 
384 	/* Unlock the pool */
385 	mutex_exit(&pool_p->ipool_lock);
386 
387 	intr_p->devi_irm_req_p = req_p;
388 	return (DDI_SUCCESS);
389 }
390 
391 /*
392  * i_ddi_irm_modify()
393  *
394  *	Modify an existing request in an interrupt pool, and balance the pool.
395  */
396 int
397 i_ddi_irm_modify(dev_info_t *dip, int nreq)
398 {
399 	devinfo_intr_t	*intr_p;
400 	ddi_irm_req_t	*req_p;
401 	ddi_irm_pool_t	*pool_p;
402 
403 	ASSERT(dip != NULL);
404 
405 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n",
406 	    (void *)dip, nreq));
407 
408 	/* Validate parameters */
409 	if ((dip == NULL) || (nreq < 1)) {
410 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
411 		return (DDI_EINVAL);
412 	}
413 
414 	/* Check that the operation is supported */
415 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
416 	    !(req_p = intr_p->devi_irm_req_p) ||
417 	    !DDI_IRM_IS_REDUCIBLE(req_p)) {
418 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not supported\n"));
419 		return (DDI_ENOTSUP);
420 	}
421 
422 	/* Validate request size is not too large */
423 	if (nreq > intr_p->devi_intr_sup_nintrs) {
424 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
425 		return (DDI_EINVAL);
426 	}
427 
428 	/*
429 	 * Modify request, but only if new size is different.
430 	 */
431 	if (nreq != req_p->ireq_nreq) {
432 
433 		/* Lock the pool */
434 		pool_p = req_p->ireq_pool_p;
435 		mutex_enter(&pool_p->ipool_lock);
436 
437 		/* Update pool and request */
438 		pool_p->ipool_reqno -= req_p->ireq_nreq;
439 		pool_p->ipool_reqno += nreq;
440 		req_p->ireq_nreq = nreq;
441 
442 		/* Re-sort request in the pool */
443 		list_remove(&pool_p->ipool_req_list, req_p);
444 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
445 
446 		/* Queue pool to be rebalanced */
447 		i_ddi_irm_enqueue(pool_p, B_FALSE);
448 
449 		/* Unlock the pool */
450 		mutex_exit(&pool_p->ipool_lock);
451 	}
452 
453 	return (DDI_SUCCESS);
454 }
455 
456 /*
457  * i_ddi_irm_remove()
458  *
459  *	Remove a request from an interrupt pool, and balance the pool.
460  */
461 int
462 i_ddi_irm_remove(dev_info_t *dip)
463 {
464 	devinfo_intr_t	*intr_p;
465 	ddi_irm_pool_t	*pool_p;
466 	ddi_irm_req_t	*req_p;
467 	uint_t		nmin;
468 
469 	ASSERT(dip != NULL);
470 
471 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip));
472 
473 	/* Validate parameters */
474 	if (dip == NULL) {
475 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n"));
476 		return (DDI_EINVAL);
477 	}
478 
479 	/* Check if the device has a request */
480 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
481 	    !(req_p = intr_p->devi_irm_req_p)) {
482 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n"));
483 		return (DDI_EINVAL);
484 	}
485 
486 	/* Lock the pool */
487 	pool_p = req_p->ireq_pool_p;
488 	mutex_enter(&pool_p->ipool_lock);
489 
490 	/* Remove request */
491 	nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq;
492 	pool_p->ipool_minno -= nmin;
493 	pool_p->ipool_reqno -= req_p->ireq_nreq;
494 	pool_p->ipool_resno -= req_p->ireq_navail;
495 	list_remove(&pool_p->ipool_req_list, req_p);
496 
497 	/* Queue pool to be rebalanced */
498 	i_ddi_irm_enqueue(pool_p, B_FALSE);
499 
500 	/* Unlock the pool */
501 	mutex_exit(&pool_p->ipool_lock);
502 
503 	/* Destroy the request */
504 	intr_p->devi_irm_req_p = NULL;
505 	kmem_free(req_p, sizeof (ddi_irm_req_t));
506 
507 	return (DDI_SUCCESS);
508 }
509 
510 /*
511  * i_ddi_irm_set_cb()
512  *
513  *	Change the callback flag for a request, in response to
514  *	a change in its callback registration.  Then rebalance
515  *	the interrupt pool.
516  *
517  *	NOTE: the request is not locked because the navail value
518  *	      is not directly affected.  The balancing thread may
519  *	      modify the navail value in the background after it
520  *	      locks the request itself.
521  */
522 void
523 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag)
524 {
525 	devinfo_intr_t	*intr_p;
526 	ddi_irm_pool_t	*pool_p;
527 	ddi_irm_req_t	*req_p;
528 	uint_t		nreq;
529 
530 	ASSERT(dip != NULL);
531 
532 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n",
533 	    (void *)dip, (int)has_cb_flag));
534 
535 	/* Validate parameters */
536 	if (dip == NULL)
537 		return;
538 
539 	/* Check for association with interrupt pool */
540 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
541 	    !(req_p = intr_p->devi_irm_req_p)) {
542 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n"));
543 		return;
544 	}
545 
546 	/* Lock the pool */
547 	pool_p = req_p->ireq_pool_p;
548 	mutex_enter(&pool_p->ipool_lock);
549 
550 	/*
551 	 * Update the request and the pool
552 	 */
553 	if (has_cb_flag) {
554 
555 		/* Update pool statistics */
556 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
557 			pool_p->ipool_minno -= (req_p->ireq_nreq - 1);
558 
559 		/* Update request */
560 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
561 
562 		/* Rebalance in background */
563 		i_ddi_irm_enqueue(pool_p, B_FALSE);
564 
565 	} else {
566 
567 		/* Determine new request size */
568 		nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz);
569 
570 		/* Update pool statistics */
571 		pool_p->ipool_reqno -= req_p->ireq_nreq;
572 		pool_p->ipool_reqno += nreq;
573 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) {
574 			pool_p->ipool_minno -= 1;
575 			pool_p->ipool_minno += nreq;
576 		} else {
577 			pool_p->ipool_minno -= req_p->ireq_nreq;
578 			pool_p->ipool_minno += nreq;
579 		}
580 
581 		/* Update request size, and re-sort in pool */
582 		req_p->ireq_nreq = nreq;
583 		list_remove(&pool_p->ipool_req_list, req_p);
584 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
585 
586 		/* Rebalance synchronously, before losing callback */
587 		i_ddi_irm_enqueue(pool_p, B_TRUE);
588 
589 		/* Remove callback flag */
590 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK);
591 	}
592 
593 	/* Unlock the pool */
594 	mutex_exit(&pool_p->ipool_lock);
595 }
596 
597 /*
598  * Interrupt Pool Balancing
599  */
600 
601 /*
602  * irm_balance_thread()
603  *
604  *	One instance of this thread operates per each defined IRM pool.
605  *	It does the initial activation of the pool, as well as balancing
606  *	any requests that were queued up before the pool was active.
607  *	Once active, it waits forever to service balance operations.
608  */
609 static void
610 irm_balance_thread(ddi_irm_pool_t *pool_p)
611 {
612 	clock_t		interval, wakeup;
613 
614 	DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n",
615 	    (void *)pool_p));
616 
617 	/* Lock the pool */
618 	mutex_enter(&pool_p->ipool_lock);
619 
620 	/* Perform initial balance if required */
621 	if (pool_p->ipool_reqno > pool_p->ipool_resno)
622 		i_ddi_irm_balance(pool_p);
623 
624 	/* Activate the pool */
625 	pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE;
626 
627 	/* Main loop */
628 	for (;;) {
629 
630 		/* Compute the delay interval */
631 		interval = drv_usectohz(irm_balance_delay * 1000000);
632 
633 		/* Sleep until queued */
634 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
635 
636 		DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n"));
637 
638 		/* Wait one interval, or until there are waiters */
639 		if ((interval > 0) &&
640 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) &&
641 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) {
642 			wakeup = ddi_get_lbolt() + interval;
643 			(void) cv_timedwait(&pool_p->ipool_cv,
644 			    &pool_p->ipool_lock, wakeup);
645 		}
646 
647 		/* Check if awakened to exit */
648 		if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) {
649 			DDI_INTR_IRMDBG((CE_CONT,
650 			    "irm_balance_thread: exiting...\n"));
651 			mutex_exit(&pool_p->ipool_lock);
652 			thread_exit();
653 		}
654 
655 		/* Balance the pool */
656 		i_ddi_irm_balance(pool_p);
657 
658 		/* Notify waiters */
659 		if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) {
660 			cv_broadcast(&pool_p->ipool_cv);
661 			pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS);
662 		}
663 
664 		/* Clear QUEUED condition */
665 		pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED);
666 	}
667 }
668 
669 /*
670  * i_ddi_irm_balance()
671  *
672  *	Balance a pool.  The general algorithm is to first reset all
673  *	requests to their maximum size, use reduction algorithms to
674  *	solve any imbalance, and then notify affected drivers.
675  */
676 static void
677 i_ddi_irm_balance(ddi_irm_pool_t *pool_p)
678 {
679 	ddi_irm_req_t	*req_p;
680 
681 #ifdef	DEBUG
682 	uint_t		debug_totsz = 0;
683 	int		debug_policy = 0;
684 #endif	/* DEBUG */
685 
686 	ASSERT(pool_p != NULL);
687 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
688 
689 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n",
690 	    (void *)pool_p));
691 
692 #ifdef	DEBUG	/* Adjust size and policy settings */
693 	if (irm_debug_size > pool_p->ipool_minno) {
694 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n",
695 		    irm_debug_size));
696 		debug_totsz = pool_p->ipool_totsz;
697 		pool_p->ipool_totsz = irm_debug_size;
698 	}
699 	if (DDI_IRM_POLICY_VALID(irm_debug_policy)) {
700 		DDI_INTR_IRMDBG((CE_CONT,
701 		    "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy));
702 		debug_policy = pool_p->ipool_policy;
703 		pool_p->ipool_policy = irm_debug_policy;
704 	}
705 #endif	/* DEBUG */
706 
707 	/* Lock the availability lock */
708 	mutex_enter(&pool_p->ipool_navail_lock);
709 
710 	/*
711 	 * Put all of the reducible requests into a scratch list.
712 	 * Reset each one of them to their maximum availability.
713 	 */
714 	for (req_p = list_head(&pool_p->ipool_req_list); req_p;
715 	    req_p = list_next(&pool_p->ipool_req_list, req_p)) {
716 		if (DDI_IRM_IS_REDUCIBLE(req_p)) {
717 			pool_p->ipool_resno -= req_p->ireq_navail;
718 			req_p->ireq_scratch = req_p->ireq_navail;
719 			req_p->ireq_navail = req_p->ireq_nreq;
720 			pool_p->ipool_resno += req_p->ireq_navail;
721 			list_insert_tail(&pool_p->ipool_scratch_list, req_p);
722 		}
723 	}
724 
725 	/* Balance the requests */
726 	i_ddi_irm_reduce(pool_p);
727 
728 	/* Unlock the availability lock */
729 	mutex_exit(&pool_p->ipool_navail_lock);
730 
731 	/*
732 	 * Process REMOVE notifications.
733 	 *
734 	 * If a driver fails to release interrupts: exclude it from
735 	 * further processing, correct the resulting imbalance, and
736 	 * start over again at the head of the scratch list.
737 	 */
738 	req_p = list_head(&pool_p->ipool_scratch_list);
739 	while (req_p) {
740 		if ((req_p->ireq_navail < req_p->ireq_scratch) &&
741 		    (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) {
742 			list_remove(&pool_p->ipool_scratch_list, req_p);
743 			mutex_enter(&pool_p->ipool_navail_lock);
744 			i_ddi_irm_reduce(pool_p);
745 			mutex_exit(&pool_p->ipool_navail_lock);
746 			req_p = list_head(&pool_p->ipool_scratch_list);
747 		} else {
748 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
749 		}
750 	}
751 
752 	/*
753 	 * Process ADD notifications.
754 	 *
755 	 * This is the last use of the scratch list, so empty it.
756 	 */
757 	while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) {
758 		if (req_p->ireq_navail > req_p->ireq_scratch) {
759 			(void) i_ddi_irm_notify(pool_p, req_p);
760 		}
761 	}
762 
763 #ifdef	DEBUG	/* Restore size and policy settings */
764 	if (debug_totsz != 0)
765 		pool_p->ipool_totsz = debug_totsz;
766 	if (debug_policy != 0)
767 		pool_p->ipool_policy = debug_policy;
768 #endif	/* DEBUG */
769 }
770 
771 /*
772  * i_ddi_irm_reduce()
773  *
774  *	Use reduction algorithms to correct an imbalance in a pool.
775  */
776 static void
777 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p)
778 {
779 	int	ret, imbalance;
780 
781 	ASSERT(pool_p != NULL);
782 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
783 	ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy));
784 
785 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n",
786 	    (void *)pool_p));
787 
788 	/* Compute the imbalance.  Do nothing if already balanced. */
789 	if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0)
790 		return;
791 
792 	/* Reduce by policy */
793 	switch (pool_p->ipool_policy) {
794 	case DDI_IRM_POLICY_LARGE:
795 		ret = i_ddi_irm_reduce_large(pool_p, imbalance);
796 		break;
797 	case DDI_IRM_POLICY_EVEN:
798 		ret = i_ddi_irm_reduce_even(pool_p, imbalance);
799 		break;
800 	}
801 
802 	/*
803 	 * If the policy based reductions failed, then
804 	 * possibly reduce new requests as a last resort.
805 	 */
806 	if (ret != DDI_SUCCESS) {
807 
808 		DDI_INTR_IRMDBG((CE_CONT,
809 		    "i_ddi_irm_reduce: policy reductions failed.\n"));
810 
811 		/* Compute remaining imbalance */
812 		imbalance = pool_p->ipool_resno - pool_p->ipool_totsz;
813 
814 		ASSERT(imbalance > 0);
815 
816 		i_ddi_irm_reduce_new(pool_p, imbalance);
817 	}
818 }
819 
820 /*
821  * i_ddi_irm_enqueue()
822  *
823  *	Queue a pool to be balanced.  Signals the balancing thread to wake
824  *	up and process the pool.  If 'wait_flag' is true, then the current
825  *	thread becomes a waiter and blocks until the balance is completed.
826  */
827 static void
828 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag)
829 {
830 	ASSERT(pool_p != NULL);
831 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
832 
833 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n",
834 	    (void *)pool_p, (int)wait_flag));
835 
836 	/* Do nothing if pool is already balanced */
837 #ifndef	DEBUG
838 	if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
839 #else
840 	if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
841 #endif	/* DEBUG */
842 		DDI_INTR_IRMDBG((CE_CONT,
843 		    "i_ddi_irm_enqueue: pool already balanced\n"));
844 		return;
845 	}
846 
847 	/* Avoid deadlocks when IRM is not active */
848 	if (!irm_active && wait_flag) {
849 		DDI_INTR_IRMDBG((CE_CONT,
850 		    "i_ddi_irm_enqueue: pool not active.\n"));
851 		return;
852 	}
853 
854 	if (wait_flag)
855 		pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS;
856 
857 	if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) {
858 		pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED;
859 		cv_signal(&pool_p->ipool_cv);
860 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n"));
861 	}
862 
863 	if (wait_flag) {
864 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n"));
865 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
866 	}
867 }
868 
869 /*
870  * Reduction Algorithms, Used For Balancing
871  */
872 
873 /*
874  * i_ddi_irm_reduce_large()
875  *
876  *	Algorithm for the DDI_IRM_POLICY_LARGE reduction policy.
877  *
878  *	This algorithm generally reduces larger requests first, before
879  *	advancing to smaller requests.  The scratch list is initially
880  *	sorted in descending order by current navail values, which are
881  *	maximized prior to reduction.  This sorted order is preserved,
882  *	but within a range of equally sized requests they are secondarily
883  *	sorted in ascending order by initial nreq value.  The head of the
884  *	list is always selected for reduction, since it is the current
885  *	largest request.  After being reduced, it is sorted further into
886  *	the list before the next iteration.
887  *
888  *	Optimizations in this algorithm include trying to reduce multiple
889  *	requests together if they are equally sized.  And the algorithm
890  *	attempts to reduce in larger increments when possible to minimize
891  *	the total number of iterations.
892  */
893 static int
894 i_ddi_irm_reduce_large(ddi_irm_pool_t *pool_p, int imbalance)
895 {
896 	ddi_irm_req_t	*req_p, *next_p;
897 	int		nreqs, reduction;
898 
899 	ASSERT(pool_p != NULL);
900 	ASSERT(imbalance > 0);
901 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
902 
903 	DDI_INTR_IRMDBG((CE_CONT,
904 	    "i_ddi_irm_reduce_large: pool_p %p imbalance %d\n", (void *)pool_p,
905 	    imbalance));
906 
907 	while (imbalance > 0) {
908 
909 		req_p = list_head(&pool_p->ipool_scratch_list);
910 		next_p = list_next(&pool_p->ipool_scratch_list, req_p);
911 
912 		/* Fail if nothing is reducible */
913 		if (req_p->ireq_navail == 1) {
914 			DDI_INTR_IRMDBG((CE_CONT,
915 			    "i_ddi_irm_reduce_large: failure.\n"));
916 			return (DDI_FAILURE);
917 		}
918 
919 		/* Count the number of equally sized requests */
920 		nreqs = 1;
921 		while (next_p && (req_p->ireq_navail == next_p->ireq_navail)) {
922 			next_p = list_next(&pool_p->ipool_scratch_list, next_p);
923 			nreqs++;
924 		}
925 
926 		/* Try to reduce multiple requests together */
927 		if (nreqs > 1) {
928 
929 			if (next_p) {
930 				reduction = req_p->ireq_navail -
931 				    (next_p->ireq_navail + 1);
932 			} else {
933 				reduction = req_p->ireq_navail - 1;
934 			}
935 
936 			if ((reduction * nreqs) > imbalance)
937 				reduction = imbalance / nreqs;
938 
939 			if (reduction > 0) {
940 				while (req_p && (req_p != next_p)) {
941 					imbalance -= reduction;
942 					req_p->ireq_navail -= reduction;
943 					pool_p->ipool_resno -= reduction;
944 					req_p = list_next(
945 					    &pool_p->ipool_scratch_list, req_p);
946 				}
947 				continue;
948 			}
949 		}
950 
951 		/* Or just reduce the current request */
952 		next_p = list_next(&pool_p->ipool_scratch_list, req_p);
953 		if (next_p && (req_p->ireq_navail > next_p->ireq_navail)) {
954 			reduction = req_p->ireq_navail - next_p->ireq_navail;
955 			reduction = MIN(reduction, imbalance);
956 		} else {
957 			reduction = 1;
958 		}
959 		imbalance -= reduction;
960 		req_p->ireq_navail -= reduction;
961 		pool_p->ipool_resno -= reduction;
962 
963 		/* Re-sort the scratch list if not yet finished */
964 		if (imbalance > 0) {
965 			i_ddi_irm_reduce_large_resort(pool_p);
966 		}
967 	}
968 
969 	return (DDI_SUCCESS);
970 }
971 
972 /*
973  * i_ddi_irm_reduce_large_resort()
974  *
975  *	Helper function for i_ddi_irm_reduce_large().  Once a request
976  *	is reduced, this resorts it further down into the list as necessary.
977  */
978 static void
979 i_ddi_irm_reduce_large_resort(ddi_irm_pool_t *pool_p)
980 {
981 	ddi_irm_req_t	*req_p, *next_p;
982 
983 	ASSERT(pool_p != NULL);
984 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
985 
986 	req_p = list_remove_head(&pool_p->ipool_scratch_list);
987 	next_p = list_head(&pool_p->ipool_scratch_list);
988 
989 	while (next_p &&
990 	    ((next_p->ireq_navail > req_p->ireq_navail) ||
991 	    ((next_p->ireq_navail == req_p->ireq_navail) &&
992 	    (next_p->ireq_nreq < req_p->ireq_nreq))))
993 		next_p = list_next(&pool_p->ipool_scratch_list, next_p);
994 
995 	list_insert_before(&pool_p->ipool_scratch_list, next_p, req_p);
996 }
997 
998 /*
999  * i_ddi_irm_reduce_even()
1000  *
1001  *	Algorithm for the DDI_IRM_POLICY_EVEN reduction policy.
1002  *
1003  *	This algorithm reduces requests evenly, without giving a
1004  *	specific preference to smaller or larger requests.  Each
1005  *	iteration reduces all reducible requests by the same amount
1006  *	until the imbalance is corrected.  Although when possible,
1007  *	it tries to avoid reducing requests below the threshold of
1008  *	the interrupt pool's default allocation size.
1009  *
1010  *	An optimization in this algorithm is to reduce the requests
1011  *	in larger increments during each iteration, to minimize the
1012  *	total number of iterations required.
1013  */
1014 static int
1015 i_ddi_irm_reduce_even(ddi_irm_pool_t *pool_p, int imbalance)
1016 {
1017 	ddi_irm_req_t	*req_p, *last_p;
1018 	uint_t		nmin = pool_p->ipool_defsz;
1019 	uint_t		nreduce, reduction;
1020 
1021 	ASSERT(pool_p != NULL);
1022 	ASSERT(imbalance > 0);
1023 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1024 
1025 	DDI_INTR_IRMDBG((CE_CONT,
1026 	    "i_ddi_irm_reduce_even: pool_p %p imbalance %d\n",
1027 	    (void *)pool_p, imbalance));
1028 
1029 	while ((nmin > 0) && (imbalance > 0)) {
1030 
1031 		/* Count reducible requests */
1032 		nreduce = 0;
1033 		for (req_p = list_head(&pool_p->ipool_scratch_list); req_p;
1034 		    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1035 			if (req_p->ireq_navail <= nmin)
1036 				break;
1037 			last_p = req_p;
1038 			nreduce++;
1039 		}
1040 
1041 		/* If none are reducible, try a lower minimum */
1042 		if (nreduce == 0) {
1043 			nmin--;
1044 			continue;
1045 		}
1046 
1047 		/* Compute reduction */
1048 		if (nreduce < imbalance) {
1049 			reduction = last_p->ireq_navail - nmin;
1050 			if ((reduction * nreduce) > imbalance) {
1051 				reduction = imbalance / nreduce;
1052 			}
1053 		} else {
1054 			reduction = 1;
1055 		}
1056 
1057 		/* Start at head of list, but skip excess */
1058 		req_p = list_head(&pool_p->ipool_scratch_list);
1059 		while (nreduce > imbalance) {
1060 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
1061 			nreduce--;
1062 		}
1063 
1064 		/* Do reductions */
1065 		while (req_p && (nreduce > 0)) {
1066 			imbalance -= reduction;
1067 			req_p->ireq_navail -= reduction;
1068 			pool_p->ipool_resno -= reduction;
1069 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
1070 			nreduce--;
1071 		}
1072 	}
1073 
1074 	if (nmin == 0) {
1075 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce_even: failure.\n"));
1076 		return (DDI_FAILURE);
1077 	}
1078 
1079 	return (DDI_SUCCESS);
1080 }
1081 
1082 /*
1083  * i_ddi_irm_reduce_new()
1084  *
1085  *	Reduces new requests to zero.  This is only used as a
1086  *	last resort after another reduction algorithm failed.
1087  */
1088 static void
1089 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance)
1090 {
1091 	ddi_irm_req_t	*req_p;
1092 
1093 	ASSERT(pool_p != NULL);
1094 	ASSERT(imbalance > 0);
1095 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1096 
1097 	for (req_p = list_head(&pool_p->ipool_scratch_list);
1098 	    req_p && (imbalance > 0);
1099 	    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1100 		ASSERT(req_p->ireq_navail == 1);
1101 		if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) {
1102 			req_p->ireq_navail--;
1103 			pool_p->ipool_resno--;
1104 			imbalance--;
1105 		}
1106 	}
1107 }
1108 
1109 /*
1110  * Miscellaneous Helper Functions
1111  */
1112 
1113 /*
1114  * i_ddi_intr_get_pool()
1115  *
1116  *	Get an IRM pool that supplies interrupts of a specified type.
1117  *	Invokes a DDI_INTROP_GETPOOL to the bus nexus driver.  Fails
1118  *	if no pool exists.
1119  */
1120 ddi_irm_pool_t *
1121 i_ddi_intr_get_pool(dev_info_t *dip, int type)
1122 {
1123 	devinfo_intr_t		*intr_p;
1124 	ddi_irm_pool_t		*pool_p;
1125 	ddi_irm_req_t		*req_p;
1126 	ddi_intr_handle_impl_t	hdl;
1127 
1128 	ASSERT(dip != NULL);
1129 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
1130 
1131 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
1132 	    ((req_p = intr_p->devi_irm_req_p) != NULL) &&
1133 	    ((pool_p = req_p->ireq_pool_p) != NULL) &&
1134 	    (pool_p->ipool_types & type)) {
1135 		return (pool_p);
1136 	}
1137 
1138 	bzero(&hdl, sizeof (ddi_intr_handle_impl_t));
1139 	hdl.ih_dip = dip;
1140 	hdl.ih_type = type;
1141 
1142 	if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL,
1143 	    &hdl, (void *)&pool_p) == DDI_SUCCESS)
1144 		return (pool_p);
1145 
1146 	return (NULL);
1147 }
1148 
1149 /*
1150  * i_ddi_irm_insertion_sort()
1151  *
1152  *	Use the insertion sort method to insert a request into a list.
1153  *	The list is sorted in descending order by request size.
1154  */
1155 static void
1156 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p)
1157 {
1158 	ddi_irm_req_t	*next_p;
1159 
1160 	next_p = list_head(req_list);
1161 
1162 	while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq))
1163 		next_p = list_next(req_list, next_p);
1164 
1165 	list_insert_before(req_list, next_p, req_p);
1166 }
1167 
1168 /*
1169  * i_ddi_irm_notify()
1170  *
1171  *	Notify a driver of changes to its interrupt request using the
1172  *	generic callback mechanism.  Checks for errors in processing.
1173  */
1174 static int
1175 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
1176 {
1177 	ddi_cb_action_t	action;
1178 	ddi_cb_t	*cb_p;
1179 	uint_t		nintrs;
1180 	int		ret, count;
1181 
1182 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n",
1183 	    (void *)pool_p, (void *)req_p));
1184 
1185 	/* Do not notify new or unchanged requests */
1186 	if ((req_p->ireq_navail == req_p->ireq_scratch) ||
1187 	    (req_p->ireq_flags & DDI_IRM_FLAG_NEW))
1188 		return (DDI_SUCCESS);
1189 
1190 	/* Determine action and count */
1191 	if (req_p->ireq_navail > req_p->ireq_scratch) {
1192 		action = DDI_CB_INTR_ADD;
1193 		count = req_p->ireq_navail - req_p->ireq_scratch;
1194 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n",
1195 		    count));
1196 	} else {
1197 		action = DDI_CB_INTR_REMOVE;
1198 		count = req_p->ireq_scratch - req_p->ireq_navail;
1199 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n",
1200 		    count));
1201 	}
1202 
1203 	/* Lookup driver callback */
1204 	if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) {
1205 		DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n"));
1206 		return (DDI_FAILURE);
1207 	}
1208 
1209 	/* Do callback */
1210 	ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count,
1211 	    cb_p->cb_arg1, cb_p->cb_arg2);
1212 
1213 	/* Log callback errors */
1214 	if (ret != DDI_SUCCESS) {
1215 		cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
1216 		    ddi_driver_name(req_p->ireq_dip),
1217 		    ddi_get_instance(req_p->ireq_dip), (int)action, ret);
1218 	}
1219 
1220 	/* Check if the driver exceeds its availability */
1221 	nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip);
1222 	if (nintrs > req_p->ireq_navail) {
1223 		cmn_err(CE_WARN, "%s%d: failed to release interrupts "
1224 		    "(nintrs=%d, navail=%d).\n",
1225 		    ddi_driver_name(req_p->ireq_dip),
1226 		    ddi_get_instance(req_p->ireq_dip), nintrs,
1227 		    req_p->ireq_navail);
1228 		pool_p->ipool_resno += (nintrs - req_p->ireq_navail);
1229 		req_p->ireq_navail = nintrs;
1230 		return (DDI_FAILURE);
1231 	}
1232 
1233 	/* Update request */
1234 	req_p->ireq_scratch = req_p->ireq_navail;
1235 
1236 	return (DDI_SUCCESS);
1237 }
1238 
1239 /*
1240  * i_ddi_irm_debug_balance()
1241  *
1242  *	A debug/test only routine to force the immediate,
1243  *	synchronous rebalancing of an interrupt pool.
1244  */
1245 #ifdef	DEBUG
1246 void
1247 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag)
1248 {
1249 	ddi_irm_pool_t	*pool_p;
1250 	int		type;
1251 
1252 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n",
1253 	    (void *)dip, (int)wait_flag));
1254 
1255 	if (((type = i_ddi_intr_get_current_type(dip)) != 0) &&
1256 	    ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) {
1257 		mutex_enter(&pool_p->ipool_lock);
1258 		i_ddi_irm_enqueue(pool_p, wait_flag);
1259 		mutex_exit(&pool_p->ipool_lock);
1260 	}
1261 }
1262 #endif
1263