xref: /titanic_44/usr/src/uts/common/os/ddi_intr_irm.c (revision e58a33b62cd4c9a6815fd752ce58b5f389289da1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/note.h>
26 #include <sys/sysmacros.h>
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kmem.h>
31 #include <sys/cmn_err.h>
32 #include <sys/debug.h>
33 #include <sys/ddi.h>
34 #include <sys/sunndi.h>
35 #include <sys/ndi_impldefs.h>	/* include prototypes */
36 
37 /*
38  * Interrupt Resource Management (IRM).
39  */
40 
41 #define	DDI_IRM_BALANCE_DELAY	(60)	/* In seconds */
42 
43 #define	DDI_IRM_HAS_CB(c)	((c) && (c->cb_flags & DDI_CB_FLAG_INTR))
44 
45 #define	DDI_IRM_IS_REDUCIBLE(r)	(((r->ireq_flags & DDI_IRM_FLAG_CALLBACK) && \
46 				(r->ireq_type == DDI_INTR_TYPE_MSIX)) || \
47 				(r->ireq_flags & DDI_IRM_FLAG_NEW))
48 
49 extern pri_t	minclsyspri;
50 
51 /* Global policies */
52 int		irm_enable = 1;
53 boolean_t	irm_active = B_FALSE;
54 int		irm_default_policy = DDI_IRM_POLICY_LARGE;
55 uint_t		irm_balance_delay = DDI_IRM_BALANCE_DELAY;
56 
57 /* Global list of interrupt pools */
58 kmutex_t	irm_pools_lock;
59 list_t		irm_pools_list;
60 
61 /* Global debug tunables */
62 #ifdef	DEBUG
63 int		irm_debug_policy = 0;
64 uint_t		irm_debug_size = 0;
65 #endif	/* DEBUG */
66 
67 static void	irm_balance_thread(ddi_irm_pool_t *);
68 static void	i_ddi_irm_balance(ddi_irm_pool_t *);
69 static void	i_ddi_irm_enqueue(ddi_irm_pool_t *, boolean_t);
70 static void	i_ddi_irm_reduce(ddi_irm_pool_t *pool);
71 static int	i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *, int, int);
72 static void	i_ddi_irm_reduce_new(ddi_irm_pool_t *, int);
73 static void	i_ddi_irm_insertion_sort(list_t *, ddi_irm_req_t *);
74 static int	i_ddi_irm_notify(ddi_irm_pool_t *, ddi_irm_req_t *);
75 static int	i_ddi_irm_modify_increase(ddi_irm_req_t *, int);
76 
77 /*
78  * OS Initialization Routines
79  */
80 
81 /*
82  * irm_init()
83  *
84  *	Initialize IRM subsystem before any drivers are attached.
85  */
86 void
87 irm_init(void)
88 {
89 	/* Do nothing if IRM is disabled */
90 	if (!irm_enable)
91 		return;
92 
93 	/* Verify that the default balancing policy is valid */
94 	if (!DDI_IRM_POLICY_VALID(irm_default_policy))
95 		irm_default_policy = DDI_IRM_POLICY_LARGE;
96 
97 	/* Initialize the global list of interrupt pools */
98 	mutex_init(&irm_pools_lock, NULL, MUTEX_DRIVER, NULL);
99 	list_create(&irm_pools_list, sizeof (ddi_irm_pool_t),
100 	    offsetof(ddi_irm_pool_t, ipool_link));
101 }
102 
103 /*
104  * i_ddi_irm_poststartup()
105  *
106  *	IRM is not activated until after the IO subsystem is initialized.
107  *	When activated, per-pool balancing threads are spawned and a flag
108  *	is set so that all future pools will be activated when created.
109  *
110  *	NOTE: the global variable 'irm_enable' disables IRM if zero.
111  */
112 void
113 i_ddi_irm_poststartup(void)
114 {
115 	ddi_irm_pool_t	*pool_p;
116 
117 	/* Do nothing if IRM is disabled */
118 	if (!irm_enable)
119 		return;
120 
121 	/* Lock the global list */
122 	mutex_enter(&irm_pools_lock);
123 
124 	/* Activate all defined pools */
125 	for (pool_p = list_head(&irm_pools_list); pool_p;
126 	    pool_p = list_next(&irm_pools_list, pool_p))
127 		pool_p->ipool_thread = thread_create(NULL, 0,
128 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
129 
130 	/* Set future pools to be active */
131 	irm_active = B_TRUE;
132 
133 	/* Unlock the global list */
134 	mutex_exit(&irm_pools_lock);
135 }
136 
137 /*
138  * NDI interfaces for creating/destroying IRM pools.
139  */
140 
141 /*
142  * ndi_irm_create()
143  *
144  *	Nexus interface to create an IRM pool.  Create the new
145  *	pool and add it to the global list of interrupt pools.
146  */
147 int
148 ndi_irm_create(dev_info_t *dip, ddi_irm_params_t *paramsp,
149     ddi_irm_pool_t **pool_retp)
150 {
151 	ddi_irm_pool_t	*pool_p;
152 
153 	ASSERT(dip != NULL);
154 	ASSERT(paramsp != NULL);
155 	ASSERT(pool_retp != NULL);
156 	ASSERT(paramsp->iparams_total >= 1);
157 	ASSERT(paramsp->iparams_types != 0);
158 
159 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_create: dip %p\n", (void *)dip));
160 
161 	/* Check if IRM is enabled */
162 	if (!irm_enable)
163 		return (NDI_FAILURE);
164 
165 	/* Validate parameters */
166 	if ((dip == NULL) || (paramsp == NULL) || (pool_retp == NULL) ||
167 	    (paramsp->iparams_total < 1) || (paramsp->iparams_types == 0))
168 		return (NDI_FAILURE);
169 
170 	/* Allocate and initialize the pool */
171 	pool_p = kmem_zalloc(sizeof (ddi_irm_pool_t), KM_SLEEP);
172 	pool_p->ipool_owner = dip;
173 	pool_p->ipool_policy = irm_default_policy;
174 	pool_p->ipool_types = paramsp->iparams_types;
175 	pool_p->ipool_totsz = paramsp->iparams_total;
176 	pool_p->ipool_defsz = MIN(DDI_MAX_MSIX_ALLOC, MAX(DDI_MIN_MSIX_ALLOC,
177 	    paramsp->iparams_total / DDI_MSIX_ALLOC_DIVIDER));
178 	list_create(&pool_p->ipool_req_list, sizeof (ddi_irm_req_t),
179 	    offsetof(ddi_irm_req_t, ireq_link));
180 	list_create(&pool_p->ipool_scratch_list, sizeof (ddi_irm_req_t),
181 	    offsetof(ddi_irm_req_t, ireq_scratch_link));
182 	cv_init(&pool_p->ipool_cv, NULL, CV_DRIVER, NULL);
183 	mutex_init(&pool_p->ipool_lock, NULL, MUTEX_DRIVER, NULL);
184 	mutex_init(&pool_p->ipool_navail_lock, NULL, MUTEX_DRIVER, NULL);
185 
186 	/* Add to global list of pools */
187 	mutex_enter(&irm_pools_lock);
188 	list_insert_tail(&irm_pools_list, pool_p);
189 	mutex_exit(&irm_pools_lock);
190 
191 	/* If IRM is active, then activate the pool */
192 	if (irm_active)
193 		pool_p->ipool_thread = thread_create(NULL, 0,
194 		    irm_balance_thread, pool_p, 0, &p0, TS_RUN, minclsyspri);
195 
196 	*pool_retp = pool_p;
197 	return (NDI_SUCCESS);
198 }
199 
200 /*
201  * ndi_irm_destroy()
202  *
203  *	Nexus interface to destroy an IRM pool.  Destroy the pool
204  *	and remove it from the global list of interrupt pools.
205  */
206 int
207 ndi_irm_destroy(ddi_irm_pool_t *pool_p)
208 {
209 	ASSERT(pool_p != NULL);
210 	ASSERT(pool_p->ipool_resno == 0);
211 
212 	DDI_INTR_IRMDBG((CE_CONT, "ndi_irm_destroy: pool_p %p\n",
213 	    (void *)pool_p));
214 
215 	/* Validate parameters */
216 	if (pool_p == NULL)
217 		return (NDI_FAILURE);
218 
219 	/* Validate that pool is empty */
220 	if (pool_p->ipool_resno != 0)
221 		return (NDI_BUSY);
222 
223 	/* Remove the pool from the global list */
224 	mutex_enter(&irm_pools_lock);
225 	list_remove(&irm_pools_list, pool_p);
226 	mutex_exit(&irm_pools_lock);
227 
228 	/* Terminate the balancing thread */
229 	mutex_enter(&pool_p->ipool_lock);
230 	if (pool_p->ipool_thread &&
231 	    (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) {
232 		pool_p->ipool_flags |= DDI_IRM_FLAG_EXIT;
233 		cv_signal(&pool_p->ipool_cv);
234 		mutex_exit(&pool_p->ipool_lock);
235 		thread_join(pool_p->ipool_thread->t_did);
236 	} else
237 		mutex_exit(&pool_p->ipool_lock);
238 
239 	/* Destroy the pool */
240 	cv_destroy(&pool_p->ipool_cv);
241 	mutex_destroy(&pool_p->ipool_lock);
242 	mutex_destroy(&pool_p->ipool_navail_lock);
243 	list_destroy(&pool_p->ipool_req_list);
244 	list_destroy(&pool_p->ipool_scratch_list);
245 	kmem_free(pool_p, sizeof (ddi_irm_pool_t));
246 
247 	return (NDI_SUCCESS);
248 }
249 
250 /*
251  * Insert/Modify/Remove Interrupt Requests
252  */
253 
254 /*
255  * i_ddi_irm_insert()
256  *
257  *	Insert a new request into an interrupt pool, and balance the pool.
258  */
259 int
260 i_ddi_irm_insert(dev_info_t *dip, int type, int count)
261 {
262 	ddi_irm_req_t	*req_p;
263 	devinfo_intr_t	*intr_p;
264 	ddi_irm_pool_t	*pool_p;
265 	uint_t		nreq, nmin, npartial;
266 	boolean_t	irm_flag = B_FALSE;
267 
268 	ASSERT(dip != NULL);
269 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
270 	ASSERT(count > 0);
271 
272 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: dip %p type %d count %d\n",
273 	    (void *)dip, type, count));
274 
275 	/* Validate parameters */
276 	if ((dip == NULL) || (count < 1) || !DDI_INTR_TYPE_FLAG_VALID(type)) {
277 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: invalid args\n"));
278 		return (DDI_EINVAL);
279 	}
280 
281 	/* Check for an existing request */
282 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
283 	    (intr_p->devi_irm_req_p != NULL))
284 		return (DDI_SUCCESS);
285 
286 	/* Check for IRM support from the system */
287 	if ((pool_p = i_ddi_intr_get_pool(dip, type)) == NULL) {
288 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: not supported\n"));
289 		return (DDI_ENOTSUP);
290 	}
291 
292 	/* Check for IRM support from the driver */
293 	if (i_ddi_irm_supported(dip, type) == DDI_SUCCESS)
294 		irm_flag = B_TRUE;
295 
296 	/* Determine request size */
297 	nreq = (irm_flag) ? count :
298 	    MIN(count, i_ddi_intr_get_limit(dip, type, pool_p));
299 	nmin = (irm_flag) ? 1 : nreq;
300 	npartial = MIN(nreq, pool_p->ipool_defsz);
301 
302 	/* Allocate and initialize the request */
303 	req_p = kmem_zalloc(sizeof (ddi_irm_req_t), KM_SLEEP);
304 	req_p->ireq_type = type;
305 	req_p->ireq_dip = dip;
306 	req_p->ireq_pool_p = pool_p;
307 	req_p->ireq_nreq = nreq;
308 	req_p->ireq_flags = DDI_IRM_FLAG_NEW;
309 	if (irm_flag)
310 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
311 
312 	/* Lock the pool */
313 	mutex_enter(&pool_p->ipool_lock);
314 
315 	/* Check for minimal fit before inserting */
316 	if ((pool_p->ipool_minno + nmin) > pool_p->ipool_totsz) {
317 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
318 		    ddi_driver_name(dip), ddi_get_instance(dip));
319 		mutex_exit(&pool_p->ipool_lock);
320 		kmem_free(req_p, sizeof (ddi_irm_req_t));
321 		return (DDI_EAGAIN);
322 	}
323 
324 	/* Insert the request into the pool */
325 	pool_p->ipool_reqno += nreq;
326 	pool_p->ipool_minno += nmin;
327 	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
328 
329 	/*
330 	 * Try to fulfill the request.
331 	 *
332 	 * If all the interrupts are available, and either the request
333 	 * is static or the pool is active, then just take them directly.
334 	 *
335 	 * If only some of the interrupts are available, and the request
336 	 * can receive future callbacks, then take some now but queue the
337 	 * pool to be rebalanced later.
338 	 *
339 	 * Otherwise, immediately rebalance the pool and wait.
340 	 */
341 	if ((!irm_flag || (pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE)) &&
342 	    ((pool_p->ipool_resno + nreq) <= pool_p->ipool_totsz)) {
343 
344 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
345 		    "request completely fulfilled.\n"));
346 		pool_p->ipool_resno += nreq;
347 		req_p->ireq_navail = nreq;
348 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
349 
350 	} else if (irm_flag &&
351 	    ((pool_p->ipool_resno + npartial) <= pool_p->ipool_totsz)) {
352 
353 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
354 		    "request partially fulfilled.\n"));
355 		pool_p->ipool_resno += npartial;
356 		req_p->ireq_navail = npartial;
357 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
358 		i_ddi_irm_enqueue(pool_p, B_FALSE);
359 
360 	} else {
361 
362 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_insert: "
363 		    "request needs immediate rebalance.\n"));
364 		i_ddi_irm_enqueue(pool_p, B_TRUE);
365 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_NEW);
366 	}
367 
368 	/* Fail if the request cannot be fulfilled at all */
369 	if (req_p->ireq_navail == 0) {
370 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
371 		    ddi_driver_name(dip), ddi_get_instance(dip));
372 		pool_p->ipool_reqno -= nreq;
373 		pool_p->ipool_minno -= nmin;
374 		list_remove(&pool_p->ipool_req_list, req_p);
375 		mutex_exit(&pool_p->ipool_lock);
376 		kmem_free(req_p, sizeof (ddi_irm_req_t));
377 		return (DDI_EAGAIN);
378 	}
379 
380 	/* Unlock the pool */
381 	mutex_exit(&pool_p->ipool_lock);
382 
383 	intr_p->devi_irm_req_p = req_p;
384 	return (DDI_SUCCESS);
385 }
386 
387 /*
388  * i_ddi_irm_modify()
389  *
390  *	Modify an existing request in an interrupt pool, and balance the pool.
391  */
392 int
393 i_ddi_irm_modify(dev_info_t *dip, int nreq)
394 {
395 	devinfo_intr_t	*intr_p;
396 	ddi_irm_req_t	*req_p;
397 	ddi_irm_pool_t	*pool_p;
398 	int		type;
399 	int		retval = DDI_SUCCESS;
400 
401 	ASSERT(dip != NULL);
402 	ASSERT(nreq > 0);
403 
404 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: dip %p nreq %d\n",
405 	    (void *)dip, nreq));
406 
407 	/* Validate parameters */
408 	if ((dip == NULL) || (nreq < 1)) {
409 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
410 		return (DDI_EINVAL);
411 	}
412 
413 	/* Do nothing if not mapped to an IRM pool */
414 	if (((intr_p = DEVI(dip)->devi_intr_p) == NULL) ||
415 	    ((req_p = intr_p->devi_irm_req_p) == NULL))
416 		return (DDI_SUCCESS);
417 
418 	/* Do nothing if new size is the same */
419 	if (nreq == req_p->ireq_nreq)
420 		return (DDI_SUCCESS);
421 
422 	/* Do not allow MSI requests to be resized */
423 	if ((type = req_p->ireq_type) == DDI_INTR_TYPE_MSI) {
424 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid type\n"));
425 		return (DDI_ENOTSUP);
426 	}
427 
428 	/* Select the pool */
429 	if ((pool_p = req_p->ireq_pool_p) == NULL) {
430 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: missing pool\n"));
431 		return (DDI_FAILURE);
432 	}
433 
434 	/* Validate request size is not too large */
435 	if (nreq > i_ddi_intr_get_limit(dip, type, pool_p)) {
436 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: invalid args\n"));
437 		return (DDI_EINVAL);
438 	}
439 
440 	/* Lock the pool */
441 	mutex_enter(&pool_p->ipool_lock);
442 
443 	/*
444 	 * Process the modification.
445 	 *
446 	 *	- To increase a non-IRM request, call the implementation in
447 	 *	  i_ddi_irm_modify_increase().
448 	 *
449 	 *	- To decrease a non-IRM request, directly update the pool and
450 	 *	  request, then queue the pool for later rebalancing.
451 	 *
452 	 *	- To modify an IRM request, always queue the pool for later
453 	 *	  rebalancing.  IRM consumers rely upon callbacks for changes.
454 	 */
455 	if ((nreq > req_p->ireq_nreq) &&
456 	    (i_ddi_irm_supported(dip, type) != DDI_SUCCESS)) {
457 
458 		retval = i_ddi_irm_modify_increase(req_p, nreq);
459 
460 	} else {
461 
462 		/* Update pool and request */
463 		pool_p->ipool_reqno -= req_p->ireq_nreq;
464 		pool_p->ipool_reqno += nreq;
465 		if (i_ddi_irm_supported(dip, type) != DDI_SUCCESS) {
466 			pool_p->ipool_minno -= req_p->ireq_navail;
467 			pool_p->ipool_resno -= req_p->ireq_navail;
468 			pool_p->ipool_minno += nreq;
469 			pool_p->ipool_resno += nreq;
470 			req_p->ireq_navail = nreq;
471 		}
472 		req_p->ireq_nreq = nreq;
473 
474 		/* Re-sort request into the pool */
475 		list_remove(&pool_p->ipool_req_list, req_p);
476 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
477 
478 		/* Queue pool for asynchronous rebalance */
479 		i_ddi_irm_enqueue(pool_p, B_FALSE);
480 	}
481 
482 	/* Unlock the pool */
483 	mutex_exit(&pool_p->ipool_lock);
484 
485 	return (retval);
486 }
487 
488 /*
489  * i_ddi_irm_modify_increase()
490  *
491  *	Increase a non-IRM request.  The additional interrupts are
492  *	directly taken from the pool when possible.  Otherwise, an
493  *	immediate, synchronous rebalance is performed.  A temporary
494  *	proxy request is used for any rebalance operation to ensure
495  *	the request is not reduced below its current allocation.
496  *
497  *	NOTE: pool must already be locked.
498  */
499 static int
500 i_ddi_irm_modify_increase(ddi_irm_req_t *req_p, int nreq)
501 {
502 	dev_info_t	*dip = req_p->ireq_dip;
503 	ddi_irm_pool_t	*pool_p = req_p->ireq_pool_p;
504 	ddi_irm_req_t	new_req;
505 	int		count, delta;
506 
507 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
508 
509 	/* Compute number of additional vectors */
510 	count = nreq - req_p->ireq_nreq;
511 
512 	/* Check for minimal fit */
513 	if ((pool_p->ipool_minno + count) > pool_p->ipool_totsz) {
514 		cmn_err(CE_WARN, "%s%d: interrupt pool too full.\n",
515 		    ddi_driver_name(dip), ddi_get_instance(dip));
516 		return (DDI_EAGAIN);
517 	}
518 
519 	/* Update the pool */
520 	pool_p->ipool_reqno += count;
521 	pool_p->ipool_minno += count;
522 
523 	/* Attempt direct implementation */
524 	if ((pool_p->ipool_resno + count) <= pool_p->ipool_totsz) {
525 		req_p->ireq_nreq += count;
526 		req_p->ireq_navail += count;
527 		pool_p->ipool_resno += count;
528 		return (DDI_SUCCESS);
529 	}
530 
531 	/* Rebalance required: fail if pool is not active */
532 	if ((pool_p->ipool_flags & DDI_IRM_FLAG_ACTIVE) == 0) {
533 		pool_p->ipool_reqno -= count;
534 		pool_p->ipool_minno -= count;
535 		return (DDI_EAGAIN);
536 	}
537 
538 	/* Insert temporary proxy request */
539 	bzero(&new_req, sizeof (ddi_irm_req_t));
540 	new_req.ireq_dip = dip;
541 	new_req.ireq_nreq = count;
542 	new_req.ireq_pool_p = pool_p;
543 	new_req.ireq_type = req_p->ireq_type;
544 	new_req.ireq_flags = DDI_IRM_FLAG_NEW;
545 	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, &new_req);
546 
547 	/* Synchronously rebalance */
548 	i_ddi_irm_enqueue(pool_p, B_TRUE);
549 
550 	/* Remove proxy request, and merge into original request */
551 	req_p->ireq_nreq += count;
552 	if ((delta = (count - new_req.ireq_navail)) > 0) {
553 		req_p->ireq_nreq -= delta;
554 		pool_p->ipool_reqno -= delta;
555 		pool_p->ipool_minno -= delta;
556 	}
557 	req_p->ireq_navail += new_req.ireq_navail;
558 	list_remove(&pool_p->ipool_req_list, req_p);
559 	list_remove(&pool_p->ipool_req_list, &new_req);
560 	i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
561 
562 	return (DDI_SUCCESS);
563 }
564 
565 /*
566  * i_ddi_irm_remove()
567  *
568  *	Remove a request from an interrupt pool, and balance the pool.
569  */
570 int
571 i_ddi_irm_remove(dev_info_t *dip)
572 {
573 	devinfo_intr_t	*intr_p;
574 	ddi_irm_pool_t	*pool_p;
575 	ddi_irm_req_t	*req_p;
576 	uint_t		nmin;
577 
578 	ASSERT(dip != NULL);
579 
580 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: dip %p\n", (void *)dip));
581 
582 	/* Validate parameters */
583 	if (dip == NULL) {
584 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_remove: invalid args\n"));
585 		return (DDI_EINVAL);
586 	}
587 
588 	/* Check if the device has a request */
589 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
590 	    !(req_p = intr_p->devi_irm_req_p)) {
591 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_modify: not found\n"));
592 		return (DDI_EINVAL);
593 	}
594 
595 	/* Lock the pool */
596 	pool_p = req_p->ireq_pool_p;
597 	mutex_enter(&pool_p->ipool_lock);
598 
599 	/* Remove request */
600 	nmin = DDI_IRM_IS_REDUCIBLE(req_p) ? 1 : req_p->ireq_nreq;
601 	pool_p->ipool_minno -= nmin;
602 	pool_p->ipool_reqno -= req_p->ireq_nreq;
603 	pool_p->ipool_resno -= req_p->ireq_navail;
604 	list_remove(&pool_p->ipool_req_list, req_p);
605 
606 	/* Queue pool to be rebalanced */
607 	i_ddi_irm_enqueue(pool_p, B_FALSE);
608 
609 	/* Unlock the pool */
610 	mutex_exit(&pool_p->ipool_lock);
611 
612 	/* Destroy the request */
613 	intr_p->devi_irm_req_p = NULL;
614 	kmem_free(req_p, sizeof (ddi_irm_req_t));
615 
616 	return (DDI_SUCCESS);
617 }
618 
619 /*
620  * i_ddi_irm_set_cb()
621  *
622  *	Change the callback flag for a request, in response to
623  *	a change in its callback registration.  Then rebalance
624  *	the interrupt pool.
625  *
626  *	NOTE: the request is not locked because the navail value
627  *	      is not directly affected.  The balancing thread may
628  *	      modify the navail value in the background after it
629  *	      locks the request itself.
630  */
631 void
632 i_ddi_irm_set_cb(dev_info_t *dip, boolean_t has_cb_flag)
633 {
634 	devinfo_intr_t	*intr_p;
635 	ddi_irm_pool_t	*pool_p;
636 	ddi_irm_req_t	*req_p;
637 	uint_t		nreq;
638 
639 	ASSERT(dip != NULL);
640 
641 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: dip %p has_cb_flag %d\n",
642 	    (void *)dip, (int)has_cb_flag));
643 
644 	/* Validate parameters */
645 	if (dip == NULL)
646 		return;
647 
648 	/* Check for association with interrupt pool */
649 	if (!(intr_p = DEVI(dip)->devi_intr_p) ||
650 	    !(req_p = intr_p->devi_irm_req_p)) {
651 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_set_cb: not in pool\n"));
652 		return;
653 	}
654 
655 	/* Lock the pool */
656 	pool_p = req_p->ireq_pool_p;
657 	mutex_enter(&pool_p->ipool_lock);
658 
659 	/*
660 	 * Update the request and the pool
661 	 */
662 	if (has_cb_flag) {
663 
664 		/* Update pool statistics */
665 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX)
666 			pool_p->ipool_minno -= (req_p->ireq_nreq - 1);
667 
668 		/* Update request */
669 		req_p->ireq_flags |= DDI_IRM_FLAG_CALLBACK;
670 
671 		/* Rebalance in background */
672 		i_ddi_irm_enqueue(pool_p, B_FALSE);
673 
674 	} else {
675 
676 		/* Determine new request size */
677 		nreq = MIN(req_p->ireq_nreq, pool_p->ipool_defsz);
678 
679 		/* Update pool statistics */
680 		pool_p->ipool_reqno -= req_p->ireq_nreq;
681 		pool_p->ipool_reqno += nreq;
682 		if (req_p->ireq_type == DDI_INTR_TYPE_MSIX) {
683 			pool_p->ipool_minno -= 1;
684 			pool_p->ipool_minno += nreq;
685 		} else {
686 			pool_p->ipool_minno -= req_p->ireq_nreq;
687 			pool_p->ipool_minno += nreq;
688 		}
689 
690 		/* Update request size, and re-sort in pool */
691 		req_p->ireq_nreq = nreq;
692 		list_remove(&pool_p->ipool_req_list, req_p);
693 		i_ddi_irm_insertion_sort(&pool_p->ipool_req_list, req_p);
694 
695 		/* Rebalance synchronously, before losing callback */
696 		i_ddi_irm_enqueue(pool_p, B_TRUE);
697 
698 		/* Remove callback flag */
699 		req_p->ireq_flags &= ~(DDI_IRM_FLAG_CALLBACK);
700 	}
701 
702 	/* Unlock the pool */
703 	mutex_exit(&pool_p->ipool_lock);
704 }
705 
706 /*
707  * i_ddi_irm_supported()
708  *
709  *	Query if IRM is supported by a driver using a specific interrupt type.
710  *	Notice that IRM is limited to MSI-X users with registered callbacks.
711  */
712 int
713 i_ddi_irm_supported(dev_info_t *dip, int type)
714 {
715 	ddi_cb_t	*cb_p = DEVI(dip)->devi_cb_p;
716 
717 	return ((DDI_IRM_HAS_CB(cb_p) && (type == DDI_INTR_TYPE_MSIX)) ?
718 	    DDI_SUCCESS : DDI_ENOTSUP);
719 }
720 
721 /*
722  * Interrupt Pool Balancing
723  */
724 
725 /*
726  * irm_balance_thread()
727  *
728  *	One instance of this thread operates per each defined IRM pool.
729  *	It does the initial activation of the pool, as well as balancing
730  *	any requests that were queued up before the pool was active.
731  *	Once active, it waits forever to service balance operations.
732  */
733 static void
734 irm_balance_thread(ddi_irm_pool_t *pool_p)
735 {
736 	clock_t		interval;
737 
738 	DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: pool_p %p\n",
739 	    (void *)pool_p));
740 
741 	/* Lock the pool */
742 	mutex_enter(&pool_p->ipool_lock);
743 
744 	/* Perform initial balance if required */
745 	if (pool_p->ipool_reqno > pool_p->ipool_resno)
746 		i_ddi_irm_balance(pool_p);
747 
748 	/* Activate the pool */
749 	pool_p->ipool_flags |= DDI_IRM_FLAG_ACTIVE;
750 
751 	/* Main loop */
752 	for (;;) {
753 
754 		/* Compute the delay interval */
755 		interval = drv_usectohz(irm_balance_delay * 1000000);
756 
757 		/* Sleep until queued */
758 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
759 
760 		DDI_INTR_IRMDBG((CE_CONT, "irm_balance_thread: signaled.\n"));
761 
762 		/* Wait one interval, or until there are waiters */
763 		if ((interval > 0) &&
764 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) &&
765 		    !(pool_p->ipool_flags & DDI_IRM_FLAG_EXIT)) {
766 			(void) cv_reltimedwait(&pool_p->ipool_cv,
767 			    &pool_p->ipool_lock, interval, TR_CLOCK_TICK);
768 		}
769 
770 		/* Check if awakened to exit */
771 		if (pool_p->ipool_flags & DDI_IRM_FLAG_EXIT) {
772 			DDI_INTR_IRMDBG((CE_CONT,
773 			    "irm_balance_thread: exiting...\n"));
774 			mutex_exit(&pool_p->ipool_lock);
775 			thread_exit();
776 		}
777 
778 		/* Balance the pool */
779 		i_ddi_irm_balance(pool_p);
780 
781 		/* Notify waiters */
782 		if (pool_p->ipool_flags & DDI_IRM_FLAG_WAITERS) {
783 			cv_broadcast(&pool_p->ipool_cv);
784 			pool_p->ipool_flags &= ~(DDI_IRM_FLAG_WAITERS);
785 		}
786 
787 		/* Clear QUEUED condition */
788 		pool_p->ipool_flags &= ~(DDI_IRM_FLAG_QUEUED);
789 	}
790 }
791 
792 /*
793  * i_ddi_irm_balance()
794  *
795  *	Balance a pool.  The general algorithm is to first reset all
796  *	requests to their maximum size, use reduction algorithms to
797  *	solve any imbalance, and then notify affected drivers.
798  */
799 static void
800 i_ddi_irm_balance(ddi_irm_pool_t *pool_p)
801 {
802 	ddi_irm_req_t	*req_p;
803 
804 #ifdef	DEBUG
805 	uint_t		debug_totsz = 0;
806 	int		debug_policy = 0;
807 #endif	/* DEBUG */
808 
809 	ASSERT(pool_p != NULL);
810 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
811 
812 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: pool_p %p\n",
813 	    (void *)pool_p));
814 
815 #ifdef	DEBUG	/* Adjust size and policy settings */
816 	if (irm_debug_size > pool_p->ipool_minno) {
817 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_balance: debug size %d\n",
818 		    irm_debug_size));
819 		debug_totsz = pool_p->ipool_totsz;
820 		pool_p->ipool_totsz = irm_debug_size;
821 	}
822 	if (DDI_IRM_POLICY_VALID(irm_debug_policy)) {
823 		DDI_INTR_IRMDBG((CE_CONT,
824 		    "i_ddi_irm_balance: debug policy %d\n", irm_debug_policy));
825 		debug_policy = pool_p->ipool_policy;
826 		pool_p->ipool_policy = irm_debug_policy;
827 	}
828 #endif	/* DEBUG */
829 
830 	/* Lock the availability lock */
831 	mutex_enter(&pool_p->ipool_navail_lock);
832 
833 	/*
834 	 * Put all of the reducible requests into a scratch list.
835 	 * Reset each one of them to their maximum availability.
836 	 */
837 	for (req_p = list_head(&pool_p->ipool_req_list); req_p;
838 	    req_p = list_next(&pool_p->ipool_req_list, req_p)) {
839 		if (DDI_IRM_IS_REDUCIBLE(req_p)) {
840 			pool_p->ipool_resno -= req_p->ireq_navail;
841 			req_p->ireq_scratch = req_p->ireq_navail;
842 			req_p->ireq_navail = req_p->ireq_nreq;
843 			pool_p->ipool_resno += req_p->ireq_navail;
844 			list_insert_tail(&pool_p->ipool_scratch_list, req_p);
845 		}
846 	}
847 
848 	/* Balance the requests */
849 	i_ddi_irm_reduce(pool_p);
850 
851 	/* Unlock the availability lock */
852 	mutex_exit(&pool_p->ipool_navail_lock);
853 
854 	/*
855 	 * Process REMOVE notifications.
856 	 *
857 	 * If a driver fails to release interrupts: exclude it from
858 	 * further processing, correct the resulting imbalance, and
859 	 * start over again at the head of the scratch list.
860 	 */
861 	req_p = list_head(&pool_p->ipool_scratch_list);
862 	while (req_p) {
863 		if ((req_p->ireq_navail < req_p->ireq_scratch) &&
864 		    (i_ddi_irm_notify(pool_p, req_p) != DDI_SUCCESS)) {
865 			list_remove(&pool_p->ipool_scratch_list, req_p);
866 			mutex_enter(&pool_p->ipool_navail_lock);
867 			i_ddi_irm_reduce(pool_p);
868 			mutex_exit(&pool_p->ipool_navail_lock);
869 			req_p = list_head(&pool_p->ipool_scratch_list);
870 		} else {
871 			req_p = list_next(&pool_p->ipool_scratch_list, req_p);
872 		}
873 	}
874 
875 	/*
876 	 * Process ADD notifications.
877 	 *
878 	 * This is the last use of the scratch list, so empty it.
879 	 */
880 	while (req_p = list_remove_head(&pool_p->ipool_scratch_list)) {
881 		if (req_p->ireq_navail > req_p->ireq_scratch) {
882 			(void) i_ddi_irm_notify(pool_p, req_p);
883 		}
884 	}
885 
886 #ifdef	DEBUG	/* Restore size and policy settings */
887 	if (debug_totsz != 0)
888 		pool_p->ipool_totsz = debug_totsz;
889 	if (debug_policy != 0)
890 		pool_p->ipool_policy = debug_policy;
891 #endif	/* DEBUG */
892 }
893 
894 /*
895  * i_ddi_irm_reduce()
896  *
897  *	Use reduction algorithms to correct an imbalance in a pool.
898  */
899 static void
900 i_ddi_irm_reduce(ddi_irm_pool_t *pool_p)
901 {
902 	int	imbalance;
903 
904 	ASSERT(pool_p != NULL);
905 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
906 	ASSERT(DDI_IRM_POLICY_VALID(pool_p->ipool_policy));
907 
908 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_reduce: pool_p %p\n",
909 	    (void *)pool_p));
910 
911 	/* Compute the imbalance.  Do nothing if already balanced. */
912 	if ((imbalance = pool_p->ipool_resno - pool_p->ipool_totsz) <= 0)
913 		return;
914 
915 	/*
916 	 * Try policy based reduction first. If it failed, then
917 	 * possibly reduce new requests as a last resort.
918 	 */
919 	if (i_ddi_irm_reduce_by_policy(pool_p, imbalance, pool_p->ipool_policy)
920 	    != DDI_SUCCESS) {
921 
922 		DDI_INTR_IRMDBG((CE_CONT,
923 		    "i_ddi_irm_reduce: policy reductions failed.\n"));
924 
925 		/* Compute remaining imbalance */
926 		imbalance = pool_p->ipool_resno - pool_p->ipool_totsz;
927 
928 		ASSERT(imbalance > 0);
929 
930 		i_ddi_irm_reduce_new(pool_p, imbalance);
931 	}
932 }
933 
934 /*
935  * i_ddi_irm_enqueue()
936  *
937  *	Queue a pool to be balanced.  Signals the balancing thread to wake
938  *	up and process the pool.  If 'wait_flag' is true, then the current
939  *	thread becomes a waiter and blocks until the balance is completed.
940  */
941 static void
942 i_ddi_irm_enqueue(ddi_irm_pool_t *pool_p, boolean_t wait_flag)
943 {
944 	ASSERT(pool_p != NULL);
945 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
946 
947 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool_p %p wait_flag %d\n",
948 	    (void *)pool_p, (int)wait_flag));
949 
950 	/* Do nothing if pool is already balanced */
951 #ifndef	DEBUG
952 	if ((pool_p->ipool_reqno == pool_p->ipool_resno)) {
953 #else
954 	if ((pool_p->ipool_reqno == pool_p->ipool_resno) && !irm_debug_size) {
955 #endif	/* DEBUG */
956 		DDI_INTR_IRMDBG((CE_CONT,
957 		    "i_ddi_irm_enqueue: pool already balanced\n"));
958 		return;
959 	}
960 
961 	/* Avoid deadlocks when IRM is not active */
962 	if (!irm_active && wait_flag) {
963 		DDI_INTR_IRMDBG((CE_CONT,
964 		    "i_ddi_irm_enqueue: pool not active.\n"));
965 		return;
966 	}
967 
968 	if (wait_flag)
969 		pool_p->ipool_flags |= DDI_IRM_FLAG_WAITERS;
970 
971 	if (wait_flag || !(pool_p->ipool_flags & DDI_IRM_FLAG_QUEUED)) {
972 		pool_p->ipool_flags |= DDI_IRM_FLAG_QUEUED;
973 		cv_signal(&pool_p->ipool_cv);
974 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: pool queued.\n"));
975 	}
976 
977 	if (wait_flag) {
978 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_enqueue: waiting...\n"));
979 		cv_wait(&pool_p->ipool_cv, &pool_p->ipool_lock);
980 	}
981 }
982 
983 /*
984  * i_ddi_irm_reduce_by_policy()
985  *
986  *	Reduces requests based on reduction policies.
987  *
988  *	For the DDI_IRM_POLICY_LARGE reduction policy, the algorithm
989  *	generally reduces larger requests first, before advancing
990  *	to smaller requests.
991  *	For the DDI_IRM_POLICY_EVEN reduction policy, the algorithm
992  *	reduces requests evenly, without giving a specific preference
993  *	to smaller or larger requests. Each iteration reduces all
994  *	reducible requests by the same amount until the imbalance is
995  *	corrected.
996  *
997  *	The scratch list is initially sorted in descending order by current
998  *	navail values, which are maximized prior to reduction. This sorted
999  *	order is preserved.  It avoids reducing requests below the threshold
1000  *	of the interrupt pool's default allocation size.
1001  *
1002  *	Optimizations in this algorithm include trying to reduce multiple
1003  *	requests together.  And the algorithm attempts to reduce in larger
1004  *	increments when possible to minimize the total number of iterations.
1005  */
1006 static int
1007 i_ddi_irm_reduce_by_policy(ddi_irm_pool_t *pool_p, int imbalance, int policy)
1008 {
1009 	ASSERT(pool_p != NULL);
1010 	ASSERT(imbalance > 0);
1011 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1012 
1013 	while (imbalance > 0) {
1014 		list_t		*slist_p = &pool_p->ipool_scratch_list;
1015 		ddi_irm_req_t	*req_p = list_head(slist_p), *last_p;
1016 		uint_t		nreduce = 0, nremain = 0, stop_navail;
1017 		uint_t		pool_defsz = pool_p->ipool_defsz;
1018 		uint_t		reduction, max_redu;
1019 
1020 		/* Fail if none are reducible */
1021 		if (!req_p || req_p->ireq_navail <= pool_defsz) {
1022 			DDI_INTR_IRMDBG((CE_CONT,
1023 			    "i_ddi_irm_reduce_by_policy: Failure. "
1024 			    "All requests have downsized to low limit.\n"));
1025 			return (DDI_FAILURE);
1026 		}
1027 
1028 		/* Count reducible requests */
1029 		stop_navail = (policy == DDI_IRM_POLICY_LARGE) ?
1030 		    req_p->ireq_navail - 1 : pool_defsz;
1031 		for (; req_p; req_p = list_next(slist_p, req_p)) {
1032 			if (req_p->ireq_navail <= stop_navail)
1033 				break;
1034 			nreduce++;
1035 		}
1036 
1037 		/* Compute reduction */
1038 		last_p = req_p ? list_prev(slist_p, req_p) : list_tail(slist_p);
1039 		if ((policy == DDI_IRM_POLICY_LARGE) && req_p &&
1040 		    req_p->ireq_navail > pool_defsz)
1041 			reduction = last_p->ireq_navail - req_p->ireq_navail;
1042 		else
1043 			reduction = last_p->ireq_navail - pool_defsz;
1044 
1045 		if ((max_redu = reduction * nreduce) > imbalance) {
1046 			reduction = imbalance / nreduce;
1047 			nremain = imbalance % nreduce;
1048 			pool_p->ipool_resno -= imbalance;
1049 			imbalance = 0;
1050 		} else {
1051 			pool_p->ipool_resno -= max_redu;
1052 			imbalance -= max_redu;
1053 		}
1054 
1055 		/* Reduce */
1056 		for (req_p = list_head(slist_p); (reduction != 0) && nreduce--;
1057 		    req_p = list_next(slist_p, req_p)) {
1058 			req_p->ireq_navail -= reduction;
1059 		}
1060 
1061 		for (req_p = last_p; nremain--;
1062 		    req_p = list_prev(slist_p, req_p)) {
1063 			req_p->ireq_navail--;
1064 		}
1065 	}
1066 
1067 	return (DDI_SUCCESS);
1068 }
1069 
1070 /*
1071  * i_ddi_irm_reduce_new()
1072  *
1073  *	Reduces new requests.  This is only used as a last resort
1074  *	after another reduction algorithm failed.
1075  *
1076  *	NOTE: The pool locking in i_ddi_irm_insert() ensures
1077  *	there can be only one new request at a time in a pool.
1078  */
1079 static void
1080 i_ddi_irm_reduce_new(ddi_irm_pool_t *pool_p, int imbalance)
1081 {
1082 	ddi_irm_req_t	*req_p;
1083 
1084 	ASSERT(pool_p != NULL);
1085 	ASSERT(imbalance > 0);
1086 	ASSERT(MUTEX_HELD(&pool_p->ipool_lock));
1087 
1088 	DDI_INTR_IRMDBG((CE_CONT,
1089 	    "i_ddi_irm_reduce_new: pool_p %p imbalance %d\n",
1090 	    (void *)pool_p, imbalance));
1091 
1092 	for (req_p = list_head(&pool_p->ipool_scratch_list); req_p;
1093 	    req_p = list_next(&pool_p->ipool_scratch_list, req_p)) {
1094 		if (req_p->ireq_flags & DDI_IRM_FLAG_NEW) {
1095 			ASSERT(req_p->ireq_navail >= imbalance);
1096 			req_p->ireq_navail -= imbalance;
1097 			pool_p->ipool_resno -= imbalance;
1098 			return;
1099 		}
1100 	}
1101 
1102 	/* should never go here */
1103 	ASSERT(B_FALSE);
1104 }
1105 
1106 /*
1107  * Miscellaneous Helper Functions
1108  */
1109 
1110 /*
1111  * i_ddi_intr_get_pool()
1112  *
1113  *	Get an IRM pool that supplies interrupts of a specified type.
1114  *	Invokes a DDI_INTROP_GETPOOL to the bus nexus driver.  Fails
1115  *	if no pool exists.
1116  */
1117 ddi_irm_pool_t *
1118 i_ddi_intr_get_pool(dev_info_t *dip, int type)
1119 {
1120 	devinfo_intr_t		*intr_p;
1121 	ddi_irm_pool_t		*pool_p;
1122 	ddi_irm_req_t		*req_p;
1123 	ddi_intr_handle_impl_t	hdl;
1124 
1125 	ASSERT(dip != NULL);
1126 	ASSERT(DDI_INTR_TYPE_FLAG_VALID(type));
1127 
1128 	if (((intr_p = DEVI(dip)->devi_intr_p) != NULL) &&
1129 	    ((req_p = intr_p->devi_irm_req_p) != NULL) &&
1130 	    ((pool_p = req_p->ireq_pool_p) != NULL) &&
1131 	    (pool_p->ipool_types & type)) {
1132 		return (pool_p);
1133 	}
1134 
1135 	bzero(&hdl, sizeof (ddi_intr_handle_impl_t));
1136 	hdl.ih_dip = dip;
1137 	hdl.ih_type = type;
1138 
1139 	if (i_ddi_intr_ops(dip, dip, DDI_INTROP_GETPOOL,
1140 	    &hdl, (void *)&pool_p) == DDI_SUCCESS)
1141 		return (pool_p);
1142 
1143 	return (NULL);
1144 }
1145 
1146 /*
1147  * i_ddi_irm_insertion_sort()
1148  *
1149  *	Use the insertion sort method to insert a request into a list.
1150  *	The list is sorted in descending order by request size.
1151  */
1152 static void
1153 i_ddi_irm_insertion_sort(list_t *req_list, ddi_irm_req_t *req_p)
1154 {
1155 	ddi_irm_req_t	*next_p;
1156 
1157 	next_p = list_head(req_list);
1158 
1159 	while (next_p && (next_p->ireq_nreq > req_p->ireq_nreq))
1160 		next_p = list_next(req_list, next_p);
1161 
1162 	list_insert_before(req_list, next_p, req_p);
1163 }
1164 
1165 /*
1166  * i_ddi_irm_notify()
1167  *
1168  *	Notify a driver of changes to its interrupt request using the
1169  *	generic callback mechanism.  Checks for errors in processing.
1170  */
1171 static int
1172 i_ddi_irm_notify(ddi_irm_pool_t *pool_p, ddi_irm_req_t *req_p)
1173 {
1174 	ddi_cb_action_t	action;
1175 	ddi_cb_t	*cb_p;
1176 	uint_t		nintrs;
1177 	int		ret, count;
1178 
1179 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: pool_p %p req_p %p\n",
1180 	    (void *)pool_p, (void *)req_p));
1181 
1182 	/* Do not notify new or unchanged requests */
1183 	if ((req_p->ireq_navail == req_p->ireq_scratch) ||
1184 	    (req_p->ireq_flags & DDI_IRM_FLAG_NEW))
1185 		return (DDI_SUCCESS);
1186 
1187 	/* Determine action and count */
1188 	if (req_p->ireq_navail > req_p->ireq_scratch) {
1189 		action = DDI_CB_INTR_ADD;
1190 		count = req_p->ireq_navail - req_p->ireq_scratch;
1191 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: adding %d\n",
1192 		    count));
1193 	} else {
1194 		action = DDI_CB_INTR_REMOVE;
1195 		count = req_p->ireq_scratch - req_p->ireq_navail;
1196 		DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_notify: removing %d\n",
1197 		    count));
1198 	}
1199 
1200 	/* Lookup driver callback */
1201 	if ((cb_p = DEVI(req_p->ireq_dip)->devi_cb_p) == NULL) {
1202 		DDI_INTR_IRMDBG((CE_WARN, "i_ddi_irm_notify: no callback!\n"));
1203 		return (DDI_FAILURE);
1204 	}
1205 
1206 	/* Do callback */
1207 	ret = cb_p->cb_func(req_p->ireq_dip, action, (void *)(uintptr_t)count,
1208 	    cb_p->cb_arg1, cb_p->cb_arg2);
1209 
1210 	/* Log callback errors */
1211 	if (ret != DDI_SUCCESS) {
1212 		cmn_err(CE_WARN, "%s%d: failed callback (action=%d, ret=%d)\n",
1213 		    ddi_driver_name(req_p->ireq_dip),
1214 		    ddi_get_instance(req_p->ireq_dip), (int)action, ret);
1215 	}
1216 
1217 	/* Check if the driver exceeds its availability */
1218 	nintrs = i_ddi_intr_get_current_nintrs(req_p->ireq_dip);
1219 	if (nintrs > req_p->ireq_navail) {
1220 		cmn_err(CE_WARN, "%s%d: failed to release interrupts "
1221 		    "(nintrs=%d, navail=%d).\n",
1222 		    ddi_driver_name(req_p->ireq_dip),
1223 		    ddi_get_instance(req_p->ireq_dip), nintrs,
1224 		    req_p->ireq_navail);
1225 		pool_p->ipool_resno += (nintrs - req_p->ireq_navail);
1226 		req_p->ireq_navail = nintrs;
1227 		return (DDI_FAILURE);
1228 	}
1229 
1230 	/* Update request */
1231 	req_p->ireq_scratch = req_p->ireq_navail;
1232 
1233 	return (DDI_SUCCESS);
1234 }
1235 
1236 /*
1237  * i_ddi_irm_debug_balance()
1238  *
1239  *	A debug/test only routine to force the immediate,
1240  *	synchronous rebalancing of an interrupt pool.
1241  */
1242 #ifdef	DEBUG
1243 void
1244 i_ddi_irm_debug_balance(dev_info_t *dip, boolean_t wait_flag)
1245 {
1246 	ddi_irm_pool_t	*pool_p;
1247 	int		type;
1248 
1249 	DDI_INTR_IRMDBG((CE_CONT, "i_ddi_irm_debug_balance: dip %p wait %d\n",
1250 	    (void *)dip, (int)wait_flag));
1251 
1252 	if (((type = i_ddi_intr_get_current_type(dip)) != 0) &&
1253 	    ((pool_p = i_ddi_intr_get_pool(dip, type)) != NULL)) {
1254 		mutex_enter(&pool_p->ipool_lock);
1255 		i_ddi_irm_enqueue(pool_p, wait_flag);
1256 		mutex_exit(&pool_p->ipool_lock);
1257 	}
1258 }
1259 #endif
1260