xref: /titanic_50/usr/src/uts/sun4/io/px/px_ib.c (revision f8d2de6bd2421da1926f3daa456d161670decdf7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX Interrupt Block implementation
31  */
32 
33 #include <sys/types.h>
34 #include <sys/kmem.h>
35 #include <sys/async.h>
36 #include <sys/systm.h>		/* panicstr */
37 #include <sys/spl.h>
38 #include <sys/sunddi.h>
39 #include <sys/machsystm.h>	/* intr_dist_add */
40 #include <sys/ddi_impldefs.h>
41 #include <sys/cpuvar.h>
42 #include "px_obj.h"
43 
44 /*LINTLIBRARY*/
45 
46 static void px_ib_intr_redist(void *arg, int32_t weight_max, int32_t weight);
47 static void px_ib_intr_dist_en(dev_info_t *dip, cpuid_t cpu_id, devino_t ino,
48     boolean_t wait_flag);
49 static uint_t px_ib_intr_reset(void *arg);
50 
51 int
52 px_ib_attach(px_t *px_p)
53 {
54 	dev_info_t	*dip = px_p->px_dip;
55 	px_ib_t		*ib_p;
56 	sysino_t	sysino;
57 	px_fault_t	*fault_p = &px_p->px_fault;
58 
59 	DBG(DBG_IB, dip, "px_ib_attach\n");
60 
61 	if (px_lib_intr_devino_to_sysino(px_p->px_dip,
62 	    px_p->px_inos[PX_INTR_PEC], &sysino) != DDI_SUCCESS)
63 		return (DDI_FAILURE);
64 
65 	/*
66 	 * Allocate interrupt block state structure and link it to
67 	 * the px state structure.
68 	 */
69 	ib_p = kmem_zalloc(sizeof (px_ib_t), KM_SLEEP);
70 	px_p->px_ib_p = ib_p;
71 	ib_p->ib_px_p = px_p;
72 	ib_p->ib_ino_lst = (px_ib_ino_info_t *)NULL;
73 
74 	mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
75 	mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
76 
77 	bus_func_register(BF_TYPE_RESINTR, px_ib_intr_reset, ib_p);
78 
79 	intr_dist_add_weighted(px_ib_intr_redist, ib_p);
80 
81 	/*
82 	 * Initialize PEC fault data structure
83 	 */
84 	fault_p->px_fh_dip = dip;
85 	fault_p->px_fh_sysino = sysino;
86 	fault_p->px_err_func = px_err_dmc_pec_intr;
87 	fault_p->px_intr_ino = px_p->px_inos[PX_INTR_PEC];
88 
89 	return (DDI_SUCCESS);
90 }
91 
92 void
93 px_ib_detach(px_t *px_p)
94 {
95 	px_ib_t		*ib_p = px_p->px_ib_p;
96 	dev_info_t	*dip = px_p->px_dip;
97 
98 	DBG(DBG_IB, dip, "px_ib_detach\n");
99 
100 	bus_func_unregister(BF_TYPE_RESINTR, px_ib_intr_reset, ib_p);
101 	intr_dist_rem_weighted(px_ib_intr_redist, ib_p);
102 
103 	mutex_destroy(&ib_p->ib_ino_lst_mutex);
104 	mutex_destroy(&ib_p->ib_intr_lock);
105 
106 	px_ib_free_ino_all(ib_p);
107 
108 	px_p->px_ib_p = NULL;
109 	kmem_free(ib_p, sizeof (px_ib_t));
110 }
111 
112 static struct {
113 	kstat_named_t ihks_name;
114 	kstat_named_t ihks_type;
115 	kstat_named_t ihks_cpu;
116 	kstat_named_t ihks_pil;
117 	kstat_named_t ihks_time;
118 	kstat_named_t ihks_ino;
119 	kstat_named_t ihks_cookie;
120 	kstat_named_t ihks_devpath;
121 	kstat_named_t ihks_buspath;
122 } px_ih_ks_template = {
123 	{ "name",	KSTAT_DATA_CHAR },
124 	{ "type",	KSTAT_DATA_CHAR },
125 	{ "cpu",	KSTAT_DATA_UINT64 },
126 	{ "pil",	KSTAT_DATA_UINT64 },
127 	{ "time",	KSTAT_DATA_UINT64 },
128 	{ "ino",	KSTAT_DATA_UINT64 },
129 	{ "cookie",	KSTAT_DATA_UINT64 },
130 	{ "devpath",	KSTAT_DATA_STRING },
131 	{ "buspath",	KSTAT_DATA_STRING },
132 };
133 
134 static uint32_t ih_instance;
135 static kmutex_t ih_ks_template_lock;
136 
137 int
138 ih_ks_update(kstat_t *ksp, int rw)
139 {
140 	px_ih_t *ih_p = ksp->ks_private;
141 	int maxlen = sizeof (px_ih_ks_template.ihks_name.value.c);
142 	px_ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
143 	px_t *px_p = ib_p->ib_px_p;
144 	devino_t ino;
145 	sysino_t sysino;
146 	char ih_devpath[MAXPATHLEN];
147 	char ih_buspath[MAXPATHLEN];
148 
149 	ino = ih_p->ih_ino_p->ino_ino;
150 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
151 
152 	(void) snprintf(px_ih_ks_template.ihks_name.value.c, maxlen, "%s%d",
153 	    ddi_driver_name(ih_p->ih_dip),
154 	    ddi_get_instance(ih_p->ih_dip));
155 
156 	(void) strcpy(px_ih_ks_template.ihks_type.value.c,
157 	    (ih_p->ih_rec_type == 0) ? "fixed" : "msi");
158 	px_ih_ks_template.ihks_cpu.value.ui64 = ih_p->ih_ino_p->ino_cpuid;
159 	px_ih_ks_template.ihks_pil.value.ui64 = ih_p->ih_ino_p->ino_pil;
160 	px_ih_ks_template.ihks_time.value.ui64 = ih_p->ih_nsec + (uint64_t)
161 	    tick2ns((hrtime_t)ih_p->ih_ticks, ih_p->ih_ino_p->ino_cpuid);
162 	px_ih_ks_template.ihks_ino.value.ui64 = ino;
163 	px_ih_ks_template.ihks_cookie.value.ui64 = sysino;
164 
165 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
166 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
167 	kstat_named_setstr(&px_ih_ks_template.ihks_devpath, ih_devpath);
168 	kstat_named_setstr(&px_ih_ks_template.ihks_buspath, ih_buspath);
169 
170 	return (0);
171 }
172 
173 void
174 px_ib_intr_enable(px_t *px_p, cpuid_t cpu_id, devino_t ino)
175 {
176 	px_ib_t		*ib_p = px_p->px_ib_p;
177 	sysino_t	sysino;
178 
179 	/*
180 	 * Determine the cpu for the interrupt
181 	 */
182 	mutex_enter(&ib_p->ib_intr_lock);
183 
184 	DBG(DBG_IB, px_p->px_dip,
185 	    "px_ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
186 
187 	if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino,
188 	    &sysino) != DDI_SUCCESS) {
189 		DBG(DBG_IB, px_p->px_dip,
190 		    "px_ib_intr_enable: px_intr_devino_to_sysino() failed\n");
191 
192 		mutex_exit(&ib_p->ib_intr_lock);
193 		return;
194 	}
195 
196 	PX_INTR_ENABLE(px_p->px_dip, sysino, cpu_id);
197 
198 	mutex_exit(&ib_p->ib_intr_lock);
199 }
200 
201 /*ARGSUSED*/
202 void
203 px_ib_intr_disable(px_ib_t *ib_p, devino_t ino, int wait)
204 {
205 	sysino_t	sysino;
206 
207 	mutex_enter(&ib_p->ib_intr_lock);
208 
209 	DBG(DBG_IB, ib_p->ib_px_p->px_dip, "px_ib_intr_disable: ino=%x\n", ino);
210 
211 	/* Disable the interrupt */
212 	if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip, ino,
213 	    &sysino) != DDI_SUCCESS) {
214 		DBG(DBG_IB, ib_p->ib_px_p->px_dip,
215 		    "px_ib_intr_disable: px_intr_devino_to_sysino() failed\n");
216 
217 		mutex_exit(&ib_p->ib_intr_lock);
218 		return;
219 	}
220 
221 	PX_INTR_DISABLE(ib_p->ib_px_p->px_dip, sysino);
222 
223 	mutex_exit(&ib_p->ib_intr_lock);
224 }
225 
226 
227 static void
228 px_ib_intr_dist_en(dev_info_t *dip, cpuid_t cpu_id, devino_t ino,
229     boolean_t wait_flag)
230 {
231 	uint32_t	old_cpu_id;
232 	sysino_t	sysino;
233 	intr_valid_state_t	enabled = 0;
234 	hrtime_t	start_time;
235 	intr_state_t	intr_state;
236 	int		e = DDI_SUCCESS;
237 
238 	DBG(DBG_IB, dip, "px_ib_intr_dist_en: ino=0x%x\n", ino);
239 
240 	if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS) {
241 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: "
242 		    "px_intr_devino_to_sysino() failed, ino 0x%x\n", ino);
243 		return;
244 	}
245 
246 	/* Skip enabling disabled interrupts */
247 	if (px_lib_intr_getvalid(dip, sysino, &enabled) != DDI_SUCCESS) {
248 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: px_intr_getvalid() "
249 		    "failed, sysino 0x%x\n", sysino);
250 		return;
251 	}
252 	if (!enabled)
253 		return;
254 
255 	/* Done if redistributed onto the same cpuid */
256 	if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS) {
257 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: "
258 		    "px_intr_gettarget() failed\n");
259 		return;
260 	}
261 	if (cpu_id == old_cpu_id)
262 		return;
263 
264 	if (!wait_flag)
265 		goto done;
266 
267 	/* Busy wait on pending interrupts */
268 	PX_INTR_DISABLE(dip, sysino);
269 
270 	for (start_time = gethrtime(); !panicstr &&
271 	    ((e = px_lib_intr_getstate(dip, sysino, &intr_state)) ==
272 		DDI_SUCCESS) &&
273 	    (intr_state == INTR_DELIVERED_STATE); /* */) {
274 		if (gethrtime() - start_time > px_intrpend_timeout) {
275 			cmn_err(CE_WARN,
276 			    "%s%d: px_ib_intr_dist_en: sysino 0x%x(ino 0x%x) "
277 			    "from cpu id 0x%x to 0x%x timeout",
278 			    ddi_driver_name(dip), ddi_get_instance(dip),
279 			    sysino, ino, old_cpu_id, cpu_id);
280 
281 			e = DDI_FAILURE;
282 			break;
283 		}
284 	}
285 
286 	if (e != DDI_SUCCESS)
287 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: failed, "
288 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
289 
290 done:
291 	PX_INTR_ENABLE(dip, sysino, cpu_id);
292 }
293 
294 
295 /*
296  * Redistribute interrupts of the specified weight. The first call has a weight
297  * of weight_max, which can be used to trigger initialization for
298  * redistribution. The inos with weight [weight_max, inf.) should be processed
299  * on the "weight == weight_max" call.  This first call is followed by calls
300  * of decreasing weights, inos of that weight should be processed.  The final
301  * call specifies a weight of zero, this can be used to trigger processing of
302  * stragglers.
303  */
304 static void
305 px_ib_intr_redist(void *arg, int32_t weight_max, int32_t weight)
306 {
307 	px_ib_t		*ib_p = (px_ib_t *)arg;
308 	px_t		*px_p = ib_p->ib_px_p;
309 	dev_info_t	*dip = px_p->px_dip;
310 	px_ib_ino_info_t *ino_p;
311 	px_ih_t		*ih_lst;
312 	int32_t		dweight = 0;
313 	int		i;
314 
315 	/* Redistribute internal interrupts */
316 	if (weight == 0) {
317 		devino_t	ino_pec = px_p->px_inos[PX_INTR_PEC];
318 
319 		mutex_enter(&ib_p->ib_intr_lock);
320 		px_ib_intr_dist_en(dip, intr_dist_cpuid(), ino_pec, B_FALSE);
321 		mutex_exit(&ib_p->ib_intr_lock);
322 	}
323 
324 	/* Redistribute device interrupts */
325 	mutex_enter(&ib_p->ib_ino_lst_mutex);
326 
327 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next) {
328 		uint32_t orig_cpuid;
329 
330 		/*
331 		 * Recomputes the sum of interrupt weights of devices that
332 		 * share the same ino upon first call marked by
333 		 * (weight == weight_max).
334 		 */
335 		if (weight == weight_max) {
336 			ino_p->ino_intr_weight = 0;
337 			for (i = 0, ih_lst = ino_p->ino_ih_head;
338 			    i < ino_p->ino_ih_size;
339 			    i++, ih_lst = ih_lst->ih_next) {
340 				dweight = i_ddi_get_intr_weight(ih_lst->ih_dip);
341 				if (dweight > 0)
342 					ino_p->ino_intr_weight += dweight;
343 			}
344 		}
345 
346 		/*
347 		 * As part of redistributing weighted interrupts over cpus,
348 		 * nexus redistributes device interrupts and updates
349 		 * cpu weight. The purpose is for the most light weighted
350 		 * cpu to take the next interrupt and gain weight, therefore
351 		 * attention demanding device gains more cpu attention by
352 		 * making itself heavy.
353 		 */
354 		if ((weight == ino_p->ino_intr_weight) ||
355 		    ((weight >= weight_max) &&
356 		    (ino_p->ino_intr_weight >= weight_max))) {
357 			orig_cpuid = ino_p->ino_cpuid;
358 			if (cpu[orig_cpuid] == NULL)
359 				orig_cpuid = CPU->cpu_id;
360 
361 			/* select cpuid to target and mark ino established */
362 			ino_p->ino_cpuid = intr_dist_cpuid();
363 
364 			/* Add device weight to targeted cpu. */
365 			for (i = 0, ih_lst = ino_p->ino_ih_head;
366 			    i < ino_p->ino_ih_size;
367 			    i++, ih_lst = ih_lst->ih_next) {
368 				hrtime_t ticks;
369 
370 				dweight = i_ddi_get_intr_weight(ih_lst->ih_dip);
371 				intr_dist_cpuid_add_device_weight(
372 				    ino_p->ino_cpuid, ih_lst->ih_dip, dweight);
373 
374 				/*
375 				 * different cpus may have different clock
376 				 * speeds. to account for this, whenever an
377 				 * interrupt is moved to a new CPU, we
378 				 * convert the accumulated ticks into nsec,
379 				 * based upon the clock rate of the prior
380 				 * CPU.
381 				 *
382 				 * It is possible that the prior CPU no longer
383 				 * exists. In this case, fall back to using
384 				 * this CPU's clock rate.
385 				 *
386 				 * Note that the value in ih_ticks has already
387 				 * been corrected for any power savings mode
388 				 * which might have been in effect.
389 				 *
390 				 * because we are updating two fields in
391 				 * ih_t we must lock ih_ks_template_lock to
392 				 * prevent someone from reading the kstats
393 				 * after we set ih_ticks to 0 and before we
394 				 * increment ih_nsec to compensate.
395 				 *
396 				 * we must also protect against the interrupt
397 				 * arriving and incrementing ih_ticks between
398 				 * the time we read it and when we reset it
399 				 * to 0. To do this we use atomic_swap.
400 				 */
401 
402 				mutex_enter(&ih_ks_template_lock);
403 				ticks = atomic_swap_64(&ih_lst->ih_ticks, 0);
404 				ih_lst->ih_nsec += (uint64_t)
405 				    tick2ns(ticks, orig_cpuid);
406 				mutex_exit(&ih_ks_template_lock);
407 			}
408 
409 			/* enable interrupt on new targeted cpu */
410 			px_ib_intr_dist_en(dip, ino_p->ino_cpuid,
411 			    ino_p->ino_ino, B_TRUE);
412 		}
413 	}
414 	mutex_exit(&ib_p->ib_ino_lst_mutex);
415 }
416 
417 /*
418  * Reset interrupts to IDLE.  This function is called during
419  * panic handling after redistributing interrupts; it's needed to
420  * support dumping to network devices after 'sync' from OBP.
421  *
422  * N.B.  This routine runs in a context where all other threads
423  * are permanently suspended.
424  */
425 static uint_t
426 px_ib_intr_reset(void *arg)
427 {
428 	px_ib_t		*ib_p = (px_ib_t *)arg;
429 
430 	DBG(DBG_IB, ib_p->ib_px_p->px_dip, "px_ib_intr_reset\n");
431 
432 	if (px_lib_intr_reset(ib_p->ib_px_p->px_dip) != DDI_SUCCESS)
433 		return (BF_FATAL);
434 
435 	return (BF_NONE);
436 }
437 
438 /*
439  * Locate ino_info structure on ib_p->ib_ino_lst according to ino#
440  * returns NULL if not found.
441  */
442 px_ib_ino_info_t *
443 px_ib_locate_ino(px_ib_t *ib_p, devino_t ino_num)
444 {
445 	px_ib_ino_info_t	*ino_p = ib_p->ib_ino_lst;
446 
447 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
448 
449 	for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next);
450 
451 	return (ino_p);
452 }
453 
454 px_ib_ino_info_t *
455 px_ib_new_ino(px_ib_t *ib_p, devino_t ino_num, px_ih_t *ih_p)
456 {
457 	px_ib_ino_info_t	*ino_p = kmem_alloc(sizeof (px_ib_ino_info_t),
458 	    KM_SLEEP);
459 	sysino_t	sysino;
460 
461 	ino_p->ino_ino = ino_num;
462 	ino_p->ino_ib_p = ib_p;
463 	ino_p->ino_unclaimed = 0;
464 
465 	if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip, ino_p->ino_ino,
466 	    &sysino) != DDI_SUCCESS)
467 		return (NULL);
468 
469 	ino_p->ino_sysino = sysino;
470 
471 	/*
472 	 * Cannot disable interrupt since we might share slot
473 	 */
474 	ih_p->ih_next = ih_p;
475 	ino_p->ino_ih_head = ih_p;
476 	ino_p->ino_ih_tail = ih_p;
477 	ino_p->ino_ih_start = ih_p;
478 	ino_p->ino_ih_size = 1;
479 
480 	ino_p->ino_next = ib_p->ib_ino_lst;
481 	ib_p->ib_ino_lst = ino_p;
482 
483 	return (ino_p);
484 }
485 
486 /*
487  * The ino_p is retrieved by previous call to px_ib_locate_ino().
488  */
489 void
490 px_ib_delete_ino(px_ib_t *ib_p, px_ib_ino_info_t *ino_p)
491 {
492 	px_ib_ino_info_t	*list = ib_p->ib_ino_lst;
493 
494 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
495 
496 	if (list == ino_p)
497 		ib_p->ib_ino_lst = list->ino_next;
498 	else {
499 		for (; list->ino_next != ino_p; list = list->ino_next);
500 		list->ino_next = ino_p->ino_next;
501 	}
502 }
503 
504 /*
505  * Free all ino when we are detaching.
506  */
507 void
508 px_ib_free_ino_all(px_ib_t *ib_p)
509 {
510 	px_ib_ino_info_t	*tmp = ib_p->ib_ino_lst;
511 	px_ib_ino_info_t	*next = NULL;
512 
513 	while (tmp) {
514 		next = tmp->ino_next;
515 		kmem_free(tmp, sizeof (px_ib_ino_info_t));
516 		tmp = next;
517 	}
518 }
519 
520 int
521 px_ib_ino_add_intr(px_t *px_p, px_ib_ino_info_t *ino_p, px_ih_t *ih_p)
522 {
523 	px_ib_t		*ib_p = ino_p->ino_ib_p;
524 	devino_t	ino = ino_p->ino_ino;
525 	sysino_t	sysino = ino_p->ino_sysino;
526 	dev_info_t	*dip = px_p->px_dip;
527 	cpuid_t		curr_cpu;
528 	hrtime_t	start_time;
529 	intr_state_t	intr_state;
530 	int		ret = DDI_SUCCESS;
531 
532 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
533 	ASSERT(ib_p == px_p->px_ib_p);
534 
535 	DBG(DBG_IB, dip, "px_ib_ino_add_intr ino=%x\n", ino_p->ino_ino);
536 
537 	/* Disable the interrupt */
538 	if ((ret = px_lib_intr_gettarget(dip, sysino,
539 	    &curr_cpu)) != DDI_SUCCESS) {
540 		DBG(DBG_IB, dip,
541 		    "px_ib_ino_add_intr px_intr_gettarget() failed\n");
542 
543 		return (ret);
544 	}
545 
546 	PX_INTR_DISABLE(dip, sysino);
547 
548 	/* Busy wait on pending interrupt */
549 	for (start_time = gethrtime(); !panicstr &&
550 	    ((ret = px_lib_intr_getstate(dip, sysino, &intr_state))
551 	    == DDI_SUCCESS) && (intr_state == INTR_DELIVERED_STATE); /* */) {
552 		if (gethrtime() - start_time > px_intrpend_timeout) {
553 			cmn_err(CE_WARN, "%s%d: px_ib_ino_add_intr: pending "
554 			    "sysino 0x%x(ino 0x%x) timeout",
555 			    ddi_driver_name(dip), ddi_get_instance(dip),
556 			    sysino, ino);
557 
558 			ret = DDI_FAILURE;
559 			break;
560 		}
561 	}
562 
563 	if (ret != DDI_SUCCESS) {
564 		DBG(DBG_IB, dip, "px_ib_ino_add_intr: failed, "
565 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
566 
567 		return (ret);
568 	}
569 
570 	/* Link up px_ispec_t portion of the ppd */
571 	ih_p->ih_next = ino_p->ino_ih_head;
572 	ino_p->ino_ih_tail->ih_next = ih_p;
573 	ino_p->ino_ih_tail = ih_p;
574 
575 	ino_p->ino_ih_start = ino_p->ino_ih_head;
576 	ino_p->ino_ih_size++;
577 
578 	/*
579 	 * If the interrupt was previously blocked (left in pending state)
580 	 * because of jabber we need to clear the pending state in case the
581 	 * jabber has gone away.
582 	 */
583 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max) {
584 		cmn_err(CE_WARN,
585 		    "%s%d: px_ib_ino_add_intr: ino 0x%x has been unblocked",
586 		    ddi_driver_name(dip), ddi_get_instance(dip), ino);
587 
588 		ino_p->ino_unclaimed = 0;
589 		if ((ret = px_lib_intr_setstate(dip, sysino,
590 		    INTR_IDLE_STATE)) != DDI_SUCCESS) {
591 			DBG(DBG_IB, px_p->px_dip,
592 			    "px_ib_ino_add_intr px_intr_setstate failed\n");
593 
594 			return (ret);
595 		}
596 	}
597 
598 	/* Re-enable interrupt */
599 	PX_INTR_ENABLE(dip, sysino, curr_cpu);
600 
601 	return (ret);
602 }
603 
604 /*
605  * Removes px_ispec_t from the ino's link list.
606  * uses hardware mutex to lock out interrupt threads.
607  * Side effects: interrupt belongs to that ino is turned off on return.
608  * if we are sharing PX slot with other inos, the caller needs
609  * to turn it back on.
610  */
611 int
612 px_ib_ino_rem_intr(px_t *px_p, px_ib_ino_info_t *ino_p, px_ih_t *ih_p)
613 {
614 	devino_t	ino = ino_p->ino_ino;
615 	sysino_t	sysino = ino_p->ino_sysino;
616 	dev_info_t	*dip = px_p->px_dip;
617 	px_ih_t		*ih_lst = ino_p->ino_ih_head;
618 	hrtime_t	start_time;
619 	intr_state_t	intr_state;
620 	int		i, ret = DDI_SUCCESS;
621 
622 	ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
623 
624 	DBG(DBG_IB, px_p->px_dip, "px_ib_ino_rem_intr ino=%x\n",
625 	    ino_p->ino_ino);
626 
627 	/* Disable the interrupt */
628 	PX_INTR_DISABLE(px_p->px_dip, sysino);
629 
630 	if (ino_p->ino_ih_size == 1) {
631 		if (ih_lst != ih_p)
632 			goto not_found;
633 
634 		/* No need to set head/tail as ino_p will be freed */
635 		goto reset;
636 	}
637 
638 	/* Busy wait on pending interrupt */
639 	for (start_time = gethrtime(); !panicstr &&
640 	    ((ret = px_lib_intr_getstate(dip, sysino, &intr_state))
641 	    == DDI_SUCCESS) && (intr_state == INTR_DELIVERED_STATE); /* */) {
642 		if (gethrtime() - start_time > px_intrpend_timeout) {
643 			cmn_err(CE_WARN, "%s%d: px_ib_ino_rem_intr: pending "
644 			    "sysino 0x%x(ino 0x%x) timeout",
645 			    ddi_driver_name(dip), ddi_get_instance(dip),
646 			    sysino, ino);
647 
648 			ret = DDI_FAILURE;
649 			break;
650 		}
651 	}
652 
653 	if (ret != DDI_SUCCESS) {
654 		DBG(DBG_IB, dip, "px_ib_ino_rem_intr: failed, "
655 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
656 
657 		return (ret);
658 	}
659 
660 	/*
661 	 * If the interrupt was previously blocked (left in pending state)
662 	 * because of jabber we need to clear the pending state in case the
663 	 * jabber has gone away.
664 	 */
665 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max) {
666 		cmn_err(CE_WARN, "%s%d: px_ib_ino_rem_intr: "
667 		    "ino 0x%x has been unblocked",
668 		    ddi_driver_name(dip), ddi_get_instance(dip), ino);
669 
670 		ino_p->ino_unclaimed = 0;
671 		if ((ret = px_lib_intr_setstate(dip, sysino,
672 		    INTR_IDLE_STATE)) != DDI_SUCCESS) {
673 			DBG(DBG_IB, px_p->px_dip,
674 			    "px_ib_ino_rem_intr px_intr_setstate failed\n");
675 
676 			return (ret);
677 		}
678 	}
679 
680 	/* Search the link list for ih_p */
681 	for (i = 0; (i < ino_p->ino_ih_size) &&
682 	    (ih_lst->ih_next != ih_p); i++, ih_lst = ih_lst->ih_next);
683 
684 	if (ih_lst->ih_next != ih_p)
685 		goto not_found;
686 
687 	/* Remove ih_p from the link list and maintain the head/tail */
688 	ih_lst->ih_next = ih_p->ih_next;
689 
690 	if (ino_p->ino_ih_head == ih_p)
691 		ino_p->ino_ih_head = ih_p->ih_next;
692 	if (ino_p->ino_ih_tail == ih_p)
693 		ino_p->ino_ih_tail = ih_lst;
694 
695 	ino_p->ino_ih_start = ino_p->ino_ih_head;
696 
697 reset:
698 	if (ih_p->ih_config_handle)
699 		pci_config_teardown(&ih_p->ih_config_handle);
700 	if (ih_p->ih_ksp != NULL)
701 		kstat_delete(ih_p->ih_ksp);
702 
703 	kmem_free(ih_p, sizeof (px_ih_t));
704 	ino_p->ino_ih_size--;
705 
706 	return (ret);
707 
708 not_found:
709 	DBG(DBG_R_INTX, ino_p->ino_ib_p->ib_px_p->px_dip,
710 		"ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
711 
712 	return (DDI_FAILURE);
713 }
714 
715 px_ih_t *
716 px_ib_ino_locate_intr(px_ib_ino_info_t *ino_p, dev_info_t *rdip,
717     uint32_t inum, msiq_rec_type_t rec_type, msgcode_t msg_code)
718 {
719 	px_ih_t	*ih_lst = ino_p->ino_ih_head;
720 	int	i;
721 
722 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_lst = ih_lst->ih_next) {
723 		if ((ih_lst->ih_dip == rdip) && (ih_lst->ih_inum == inum) &&
724 		    (ih_lst->ih_rec_type == rec_type) &&
725 		    (ih_lst->ih_msg_code == msg_code))
726 			return (ih_lst);
727 	}
728 
729 	return ((px_ih_t *)NULL);
730 }
731 
732 px_ih_t *
733 px_ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
734     uint_t (*int_handler)(caddr_t int_handler_arg1, caddr_t int_handler_arg2),
735     caddr_t int_handler_arg1, caddr_t int_handler_arg2,
736     msiq_rec_type_t rec_type, msgcode_t msg_code)
737 {
738 	px_ih_t	*ih_p;
739 
740 	ih_p = kmem_alloc(sizeof (px_ih_t), KM_SLEEP);
741 	ih_p->ih_dip = rdip;
742 	ih_p->ih_inum = inum;
743 	ih_p->ih_intr_state = PX_INTR_STATE_DISABLE;
744 	ih_p->ih_handler = int_handler;
745 	ih_p->ih_handler_arg1 = int_handler_arg1;
746 	ih_p->ih_handler_arg2 = int_handler_arg2;
747 	ih_p->ih_config_handle = NULL;
748 	ih_p->ih_rec_type = rec_type;
749 	ih_p->ih_msg_code = msg_code;
750 	ih_p->ih_nsec = 0;
751 	ih_p->ih_ticks = 0;
752 
753 	/*
754 	 * Create pci_intrs::: kstats for all ih types except messages,
755 	 * which represent unusual conditions and don't need to be tracked.
756 	 */
757 	ih_p->ih_ksp = NULL;
758 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
759 		ih_p->ih_ksp = kstat_create("pci_intrs",
760 		    atomic_inc_32_nv(&ih_instance), "config", "interrupts",
761 		    KSTAT_TYPE_NAMED,
762 		    sizeof (px_ih_ks_template) / sizeof (kstat_named_t),
763 		    KSTAT_FLAG_VIRTUAL);
764 	}
765 	if (ih_p->ih_ksp != NULL) {
766 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
767 		ih_p->ih_ksp->ks_lock = &ih_ks_template_lock;
768 		ih_p->ih_ksp->ks_data = &px_ih_ks_template;
769 		ih_p->ih_ksp->ks_private = ih_p;
770 		ih_p->ih_ksp->ks_update = ih_ks_update;
771 	}
772 
773 	return (ih_p);
774 }
775 
776 /*
777  * Only used for fixed or legacy interrupts.
778  */
779 int
780 px_ib_update_intr_state(px_t *px_p, dev_info_t *rdip,
781     uint_t inum, devino_t ino, uint_t new_intr_state)
782 {
783 	px_ib_t		*ib_p = px_p->px_ib_p;
784 	px_ib_ino_info_t *ino_p;
785 	px_ih_t		*ih_p;
786 	int		ret = DDI_FAILURE;
787 
788 	DBG(DBG_IB, px_p->px_dip, "ib_update_intr_state: %s%d "
789 	    "inum %x devino %x state %x\n", ddi_driver_name(rdip),
790 	    ddi_get_instance(rdip), inum, ino, new_intr_state);
791 
792 	mutex_enter(&ib_p->ib_ino_lst_mutex);
793 
794 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {
795 		if (ih_p = px_ib_ino_locate_intr(ino_p, rdip, inum, 0, 0)) {
796 			ih_p->ih_intr_state = new_intr_state;
797 			ret = DDI_SUCCESS;
798 		}
799 	}
800 
801 	mutex_exit(&ib_p->ib_ino_lst_mutex);
802 	return (ret);
803 }
804