xref: /titanic_41/usr/src/uts/sun4/io/px/px_ib.c (revision b509e89b2befbaa42939abad9da1d7f5a8c6aaae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PX Interrupt Block implementation
30  */
31 
32 #include <sys/types.h>
33 #include <sys/kmem.h>
34 #include <sys/async.h>
35 #include <sys/systm.h>		/* panicstr */
36 #include <sys/spl.h>
37 #include <sys/sunddi.h>
38 #include <sys/machsystm.h>	/* intr_dist_add */
39 #include <sys/ddi_impldefs.h>
40 #include <sys/cpuvar.h>
41 #include <sys/time.h>
42 #include "px_obj.h"
43 
44 /*LINTLIBRARY*/
45 
46 static void px_ib_intr_redist(void *arg, int32_t weight_max, int32_t weight);
47 static void px_ib_cpu_ticks_to_ih_nsec(px_ib_t *ib_p, px_ih_t *ih_p,
48     uint32_t cpu_id);
49 static uint_t px_ib_intr_reset(void *arg);
50 static void px_fill_in_intr_devs(pcitool_intr_dev_t *dev, char *driver_name,
51     char *path_name, int instance);
52 
53 extern uint64_t xc_tick_jump_limit;
54 
55 int
56 px_ib_attach(px_t *px_p)
57 {
58 	dev_info_t	*dip = px_p->px_dip;
59 	px_ib_t		*ib_p;
60 	sysino_t	sysino;
61 	px_fault_t	*fault_p = &px_p->px_fault;
62 
63 	DBG(DBG_IB, dip, "px_ib_attach\n");
64 
65 	if (px_lib_intr_devino_to_sysino(px_p->px_dip,
66 	    px_p->px_inos[PX_INTR_PEC], &sysino) != DDI_SUCCESS)
67 		return (DDI_FAILURE);
68 
69 	/*
70 	 * Allocate interrupt block state structure and link it to
71 	 * the px state structure.
72 	 */
73 	ib_p = kmem_zalloc(sizeof (px_ib_t), KM_SLEEP);
74 	px_p->px_ib_p = ib_p;
75 	ib_p->ib_px_p = px_p;
76 	ib_p->ib_ino_lst = (px_ino_t *)NULL;
77 
78 	mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
79 	mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
80 
81 	bus_func_register(BF_TYPE_RESINTR, px_ib_intr_reset, ib_p);
82 
83 	intr_dist_add_weighted(px_ib_intr_redist, ib_p);
84 
85 	/*
86 	 * Initialize PEC fault data structure
87 	 */
88 	fault_p->px_fh_dip = dip;
89 	fault_p->px_fh_sysino = sysino;
90 	fault_p->px_err_func = px_err_dmc_pec_intr;
91 	fault_p->px_intr_ino = px_p->px_inos[PX_INTR_PEC];
92 
93 	return (DDI_SUCCESS);
94 }
95 
96 void
97 px_ib_detach(px_t *px_p)
98 {
99 	px_ib_t		*ib_p = px_p->px_ib_p;
100 	dev_info_t	*dip = px_p->px_dip;
101 
102 	DBG(DBG_IB, dip, "px_ib_detach\n");
103 
104 	bus_func_unregister(BF_TYPE_RESINTR, px_ib_intr_reset, ib_p);
105 	intr_dist_rem_weighted(px_ib_intr_redist, ib_p);
106 
107 	mutex_destroy(&ib_p->ib_ino_lst_mutex);
108 	mutex_destroy(&ib_p->ib_intr_lock);
109 
110 	px_ib_free_ino_all(ib_p);
111 
112 	px_p->px_ib_p = NULL;
113 	kmem_free(ib_p, sizeof (px_ib_t));
114 }
115 
116 void
117 px_ib_intr_enable(px_t *px_p, cpuid_t cpu_id, devino_t ino)
118 {
119 	px_ib_t		*ib_p = px_p->px_ib_p;
120 	sysino_t	sysino;
121 
122 	/*
123 	 * Determine the cpu for the interrupt
124 	 */
125 	mutex_enter(&ib_p->ib_intr_lock);
126 
127 	DBG(DBG_IB, px_p->px_dip,
128 	    "px_ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
129 
130 	if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino,
131 	    &sysino) != DDI_SUCCESS) {
132 		DBG(DBG_IB, px_p->px_dip,
133 		    "px_ib_intr_enable: px_intr_devino_to_sysino() failed\n");
134 
135 		mutex_exit(&ib_p->ib_intr_lock);
136 		return;
137 	}
138 
139 	PX_INTR_ENABLE(px_p->px_dip, sysino, cpu_id);
140 	px_lib_intr_setstate(px_p->px_dip, sysino, INTR_IDLE_STATE);
141 
142 	mutex_exit(&ib_p->ib_intr_lock);
143 }
144 
145 /*ARGSUSED*/
146 void
147 px_ib_intr_disable(px_ib_t *ib_p, devino_t ino, int wait)
148 {
149 	sysino_t	sysino;
150 
151 	mutex_enter(&ib_p->ib_intr_lock);
152 
153 	DBG(DBG_IB, ib_p->ib_px_p->px_dip, "px_ib_intr_disable: ino=%x\n", ino);
154 
155 	/* Disable the interrupt */
156 	if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip, ino,
157 	    &sysino) != DDI_SUCCESS) {
158 		DBG(DBG_IB, ib_p->ib_px_p->px_dip,
159 		    "px_ib_intr_disable: px_intr_devino_to_sysino() failed\n");
160 
161 		mutex_exit(&ib_p->ib_intr_lock);
162 		return;
163 	}
164 
165 	PX_INTR_DISABLE(ib_p->ib_px_p->px_dip, sysino);
166 
167 	mutex_exit(&ib_p->ib_intr_lock);
168 }
169 
170 
171 void
172 px_ib_intr_dist_en(dev_info_t *dip, cpuid_t cpu_id, devino_t ino,
173     boolean_t wait_flag)
174 {
175 	uint32_t	old_cpu_id;
176 	sysino_t	sysino;
177 	intr_valid_state_t	enabled = 0;
178 	hrtime_t	start_time, prev, curr, interval, jump;
179 	hrtime_t	intr_timeout;
180 	intr_state_t	intr_state;
181 	int		e = DDI_SUCCESS;
182 
183 	DBG(DBG_IB, dip, "px_ib_intr_dist_en: ino=0x%x\n", ino);
184 
185 	if (px_lib_intr_devino_to_sysino(dip, ino, &sysino) != DDI_SUCCESS) {
186 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: "
187 		    "px_intr_devino_to_sysino() failed, ino 0x%x\n", ino);
188 		return;
189 	}
190 
191 	/* Skip enabling disabled interrupts */
192 	if (px_lib_intr_getvalid(dip, sysino, &enabled) != DDI_SUCCESS) {
193 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: px_intr_getvalid() "
194 		    "failed, sysino 0x%x\n", sysino);
195 		return;
196 	}
197 	if (!enabled)
198 		return;
199 
200 	/* Done if redistributed onto the same cpuid */
201 	if (px_lib_intr_gettarget(dip, sysino, &old_cpu_id) != DDI_SUCCESS) {
202 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: "
203 		    "px_intr_gettarget() failed\n");
204 		return;
205 	}
206 	if (cpu_id == old_cpu_id)
207 		return;
208 
209 	if (!wait_flag)
210 		goto done;
211 
212 	/* Busy wait on pending interrupts */
213 	PX_INTR_DISABLE(dip, sysino);
214 
215 	intr_timeout = px_intrpend_timeout;
216 	jump = TICK_TO_NSEC(xc_tick_jump_limit);
217 
218 	for (curr = start_time = gethrtime(); !panicstr &&
219 	    ((e = px_lib_intr_getstate(dip, sysino, &intr_state)) ==
220 	    DDI_SUCCESS) &&
221 	    (intr_state == INTR_DELIVERED_STATE); /* */) {
222 		/*
223 		 * If we have a really large jump in hrtime, it is most
224 		 * probably because we entered the debugger (or OBP,
225 		 * in general). So, we adjust the timeout accordingly
226 		 * to prevent declaring an interrupt timeout. The
227 		 * master-interrupt mechanism in OBP should deliver
228 		 * the interrupts properly.
229 		 */
230 		prev = curr;
231 		curr = gethrtime();
232 		interval = curr - prev;
233 		if (interval > jump)
234 			intr_timeout += interval;
235 		if (curr - start_time > intr_timeout) {
236 			cmn_err(CE_WARN,
237 			    "%s%d: px_ib_intr_dist_en: sysino 0x%lx(ino 0x%x) "
238 			    "from cpu id 0x%x to 0x%x timeout",
239 			    ddi_driver_name(dip), ddi_get_instance(dip),
240 			    sysino, ino, old_cpu_id, cpu_id);
241 
242 			e = DDI_FAILURE;
243 			break;
244 		}
245 	}
246 
247 	if (e != DDI_SUCCESS)
248 		DBG(DBG_IB, dip, "px_ib_intr_dist_en: failed, "
249 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
250 
251 done:
252 	PX_INTR_ENABLE(dip, sysino, cpu_id);
253 }
254 
255 static void
256 px_ib_cpu_ticks_to_ih_nsec(px_ib_t *ib_p, px_ih_t *ih_p, uint32_t cpu_id)
257 {
258 	extern kmutex_t pxintr_ks_template_lock;
259 	hrtime_t ticks;
260 
261 	/*
262 	 * Because we are updating two fields in ih_t we must lock
263 	 * pxintr_ks_template_lock to prevent someone from reading the
264 	 * kstats after we set ih_ticks to 0 and before we increment
265 	 * ih_nsec to compensate.
266 	 *
267 	 * We must also protect against the interrupt arriving and incrementing
268 	 * ih_ticks between the time we read it and when we reset it to 0.
269 	 * To do this we use atomic_swap.
270 	 */
271 
272 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
273 
274 	mutex_enter(&pxintr_ks_template_lock);
275 	ticks = atomic_swap_64(&ih_p->ih_ticks, 0);
276 	ih_p->ih_nsec += (uint64_t)tick2ns(ticks, cpu_id);
277 	mutex_exit(&pxintr_ks_template_lock);
278 }
279 
280 
281 /*
282  * Redistribute interrupts of the specified weight. The first call has a weight
283  * of weight_max, which can be used to trigger initialization for
284  * redistribution. The inos with weight [weight_max, inf.) should be processed
285  * on the "weight == weight_max" call.  This first call is followed by calls
286  * of decreasing weights, inos of that weight should be processed.  The final
287  * call specifies a weight of zero, this can be used to trigger processing of
288  * stragglers.
289  */
290 static void
291 px_ib_intr_redist(void *arg, int32_t weight_max, int32_t weight)
292 {
293 	px_ib_t		*ib_p = (px_ib_t *)arg;
294 	px_t		*px_p = ib_p->ib_px_p;
295 	dev_info_t	*dip = px_p->px_dip;
296 	px_ino_t	*ino_p;
297 	px_ino_pil_t	*ipil_p;
298 	px_ih_t		*ih_lst;
299 	int32_t		dweight = 0;
300 	int		i;
301 
302 	/* Redistribute internal interrupts */
303 	if (weight == 0) {
304 		mutex_enter(&ib_p->ib_intr_lock);
305 		px_ib_intr_dist_en(dip, intr_dist_cpuid(),
306 		    px_p->px_inos[PX_INTR_PEC], B_FALSE);
307 		mutex_exit(&ib_p->ib_intr_lock);
308 
309 		px_hp_intr_redist(px_p);
310 	}
311 
312 	/* Redistribute device interrupts */
313 	mutex_enter(&ib_p->ib_ino_lst_mutex);
314 
315 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next_p) {
316 		uint32_t orig_cpuid;
317 
318 		/*
319 		 * Recomputes the sum of interrupt weights of devices that
320 		 * share the same ino upon first call marked by
321 		 * (weight == weight_max).
322 		 */
323 		if (weight == weight_max) {
324 			ino_p->ino_intr_weight = 0;
325 
326 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
327 			    ipil_p = ipil_p->ipil_next_p) {
328 				for (i = 0, ih_lst = ipil_p->ipil_ih_head;
329 				    i < ipil_p->ipil_ih_size; i++,
330 				    ih_lst = ih_lst->ih_next) {
331 					dweight = i_ddi_get_intr_weight(
332 					    ih_lst->ih_dip);
333 					if (dweight > 0)
334 						ino_p->ino_intr_weight +=
335 						    dweight;
336 				}
337 			}
338 		}
339 
340 		/*
341 		 * As part of redistributing weighted interrupts over cpus,
342 		 * nexus redistributes device interrupts and updates
343 		 * cpu weight. The purpose is for the most light weighted
344 		 * cpu to take the next interrupt and gain weight, therefore
345 		 * attention demanding device gains more cpu attention by
346 		 * making itself heavy.
347 		 */
348 		if ((weight == ino_p->ino_intr_weight) ||
349 		    ((weight >= weight_max) &&
350 		    (ino_p->ino_intr_weight >= weight_max))) {
351 			orig_cpuid = ino_p->ino_cpuid;
352 			if (cpu[orig_cpuid] == NULL)
353 				orig_cpuid = CPU->cpu_id;
354 
355 			/* select cpuid to target and mark ino established */
356 			ino_p->ino_cpuid = intr_dist_cpuid();
357 
358 			/* Add device weight to targeted cpu. */
359 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
360 			    ipil_p = ipil_p->ipil_next_p) {
361 				for (i = 0, ih_lst = ipil_p->ipil_ih_head;
362 				    i < ipil_p->ipil_ih_size; i++,
363 				    ih_lst = ih_lst->ih_next) {
364 
365 					dweight = i_ddi_get_intr_weight(
366 					    ih_lst->ih_dip);
367 					intr_dist_cpuid_add_device_weight(
368 					    ino_p->ino_cpuid, ih_lst->ih_dip,
369 					    dweight);
370 
371 					/*
372 					 * Different cpus may have different
373 					 * clock speeds. to account for this,
374 					 * whenever an interrupt is moved to a
375 					 * new CPU, we convert the accumulated
376 					 * ticks into nsec, based upon the clock
377 					 * rate of the prior CPU.
378 					 *
379 					 * It is possible that the prior CPU no
380 					 * longer exists. In this case, fall
381 					 * back to using this CPU's clock rate.
382 					 *
383 					 * Note that the value in ih_ticks has
384 					 * already been corrected for any power
385 					 * savings mode which might have been
386 					 * in effect.
387 					 */
388 					px_ib_cpu_ticks_to_ih_nsec(ib_p, ih_lst,
389 					    orig_cpuid);
390 				}
391 			}
392 
393 			/* enable interrupt on new targeted cpu */
394 			px_ib_intr_dist_en(dip, ino_p->ino_cpuid,
395 			    ino_p->ino_ino, B_TRUE);
396 		}
397 	}
398 	mutex_exit(&ib_p->ib_ino_lst_mutex);
399 }
400 
401 /*
402  * Reset interrupts to IDLE.  This function is called during
403  * panic handling after redistributing interrupts; it's needed to
404  * support dumping to network devices after 'sync' from OBP.
405  *
406  * N.B.  This routine runs in a context where all other threads
407  * are permanently suspended.
408  */
409 static uint_t
410 px_ib_intr_reset(void *arg)
411 {
412 	px_ib_t		*ib_p = (px_ib_t *)arg;
413 
414 	DBG(DBG_IB, ib_p->ib_px_p->px_dip, "px_ib_intr_reset\n");
415 
416 	if (px_lib_intr_reset(ib_p->ib_px_p->px_dip) != DDI_SUCCESS)
417 		return (BF_FATAL);
418 
419 	return (BF_NONE);
420 }
421 
422 /*
423  * Locate px_ino_t structure on ib_p->ib_ino_lst according to ino#
424  * returns NULL if not found.
425  */
426 px_ino_t *
427 px_ib_locate_ino(px_ib_t *ib_p, devino_t ino_num)
428 {
429 	px_ino_t	*ino_p = ib_p->ib_ino_lst;
430 
431 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
432 
433 	for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next_p)
434 		;
435 
436 	return (ino_p);
437 }
438 
439 px_ino_pil_t *
440 px_ib_new_ino_pil(px_ib_t *ib_p, devino_t ino_num, uint_t pil, px_ih_t *ih_p)
441 {
442 	px_ino_pil_t	*ipil_p = kmem_zalloc(sizeof (px_ino_pil_t), KM_SLEEP);
443 	px_ino_t	*ino_p;
444 
445 	if ((ino_p = px_ib_locate_ino(ib_p, ino_num)) == NULL) {
446 		sysino_t	sysino;
447 
448 		if (px_lib_intr_devino_to_sysino(ib_p->ib_px_p->px_dip,
449 		    ino_num, &sysino) != DDI_SUCCESS)
450 			return (NULL);
451 
452 		ino_p = kmem_zalloc(sizeof (px_ino_t), KM_SLEEP);
453 
454 		ino_p->ino_next_p = ib_p->ib_ino_lst;
455 		ib_p->ib_ino_lst = ino_p;
456 
457 		ino_p->ino_ino = ino_num;
458 		ino_p->ino_sysino = sysino;
459 		ino_p->ino_ib_p = ib_p;
460 		ino_p->ino_unclaimed_intrs = 0;
461 		ino_p->ino_lopil = pil;
462 	}
463 
464 	ih_p->ih_next = ih_p;
465 	ipil_p->ipil_pil = pil;
466 	ipil_p->ipil_ih_head = ih_p;
467 	ipil_p->ipil_ih_tail = ih_p;
468 	ipil_p->ipil_ih_start = ih_p;
469 	ipil_p->ipil_ih_size = 1;
470 	ipil_p->ipil_ino_p = ino_p;
471 
472 	ipil_p->ipil_next_p = ino_p->ino_ipil_p;
473 	ino_p->ino_ipil_p = ipil_p;
474 	ino_p->ino_ipil_size++;
475 
476 	if (ino_p->ino_lopil > pil)
477 		ino_p->ino_lopil = pil;
478 
479 	return (ipil_p);
480 }
481 
482 void
483 px_ib_delete_ino_pil(px_ib_t *ib_p, px_ino_pil_t *ipil_p)
484 {
485 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
486 	ushort_t	pil = ipil_p->ipil_pil;
487 	px_ino_pil_t	*prev, *next;
488 
489 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
490 
491 	if (ino_p->ino_ipil_p == ipil_p)
492 		ino_p->ino_ipil_p = ipil_p->ipil_next_p;
493 	else {
494 		for (prev = next = ino_p->ino_ipil_p; next != ipil_p;
495 		    prev = next, next = next->ipil_next_p)
496 			;
497 
498 		if (prev)
499 			prev->ipil_next_p = ipil_p->ipil_next_p;
500 	}
501 
502 	kmem_free(ipil_p, sizeof (px_ino_pil_t));
503 
504 	if ((--ino_p->ino_ipil_size) && (ino_p->ino_lopil == pil)) {
505 		for (next = ino_p->ino_ipil_p, pil = next->ipil_pil;
506 		    next; next = next->ipil_next_p) {
507 
508 			if (pil > next->ipil_pil)
509 				pil = next->ipil_pil;
510 		}
511 		/*
512 		 * Value stored in pil should be the lowest pil.
513 		 */
514 		ino_p->ino_lopil = pil;
515 	}
516 
517 	if (ino_p->ino_ipil_size)
518 		return;
519 
520 	if (ib_p->ib_ino_lst == ino_p)
521 		ib_p->ib_ino_lst = ino_p->ino_next_p;
522 	else {
523 		px_ino_t	*list = ib_p->ib_ino_lst;
524 
525 		for (; list->ino_next_p != ino_p; list = list->ino_next_p)
526 			;
527 		list->ino_next_p = ino_p->ino_next_p;
528 	}
529 }
530 
531 /*
532  * Free all ino when we are detaching.
533  */
534 void
535 px_ib_free_ino_all(px_ib_t *ib_p)
536 {
537 	px_ino_t	*ino_p = ib_p->ib_ino_lst;
538 	px_ino_t	*next = NULL;
539 
540 	while (ino_p) {
541 		next = ino_p->ino_next_p;
542 		kmem_free(ino_p, sizeof (px_ino_t));
543 		ino_p = next;
544 	}
545 }
546 
547 /*
548  * Locate px_ino_pil_t structure on ino_p->ino_ipil_p according to ino#
549  * returns NULL if not found.
550  */
551 px_ino_pil_t *
552 px_ib_ino_locate_ipil(px_ino_t *ino_p, uint_t pil)
553 {
554 	px_ino_pil_t	*ipil_p = ino_p->ino_ipil_p;
555 
556 	for (; ipil_p && ipil_p->ipil_pil != pil; ipil_p = ipil_p->ipil_next_p)
557 		;
558 
559 	return (ipil_p);
560 }
561 
562 int
563 px_ib_ino_add_intr(px_t *px_p, px_ino_pil_t *ipil_p, px_ih_t *ih_p)
564 {
565 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
566 	px_ib_t		*ib_p = ino_p->ino_ib_p;
567 	devino_t	ino = ino_p->ino_ino;
568 	sysino_t	sysino = ino_p->ino_sysino;
569 	dev_info_t	*dip = px_p->px_dip;
570 	cpuid_t		curr_cpu;
571 	hrtime_t	start_time;
572 	intr_state_t	intr_state;
573 	int		ret = DDI_SUCCESS;
574 
575 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
576 	ASSERT(ib_p == px_p->px_ib_p);
577 
578 	DBG(DBG_IB, dip, "px_ib_ino_add_intr ino=%x\n", ino_p->ino_ino);
579 
580 	/* Disable the interrupt */
581 	if ((ret = px_lib_intr_gettarget(dip, sysino,
582 	    &curr_cpu)) != DDI_SUCCESS) {
583 		DBG(DBG_IB, dip,
584 		    "px_ib_ino_add_intr px_intr_gettarget() failed\n");
585 
586 		return (ret);
587 	}
588 
589 	PX_INTR_DISABLE(dip, sysino);
590 
591 	/* Busy wait on pending interrupt */
592 	for (start_time = gethrtime(); !panicstr &&
593 	    ((ret = px_lib_intr_getstate(dip, sysino, &intr_state))
594 	    == DDI_SUCCESS) && (intr_state == INTR_DELIVERED_STATE); /* */) {
595 		if (gethrtime() - start_time > px_intrpend_timeout) {
596 			cmn_err(CE_WARN, "%s%d: px_ib_ino_add_intr: pending "
597 			    "sysino 0x%lx(ino 0x%x) timeout",
598 			    ddi_driver_name(dip), ddi_get_instance(dip),
599 			    sysino, ino);
600 
601 			ret = DDI_FAILURE;
602 			break;
603 		}
604 	}
605 
606 	/*
607 	 * If the interrupt was previously blocked (left in pending state)
608 	 * because of jabber we need to clear the pending state in case the
609 	 * jabber has gone away.
610 	 */
611 	if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max) {
612 		cmn_err(CE_WARN,
613 		    "%s%d: px_ib_ino_add_intr: ino 0x%x has been unblocked",
614 		    ddi_driver_name(dip), ddi_get_instance(dip), ino);
615 
616 		ino_p->ino_unclaimed_intrs = 0;
617 		ret = px_lib_intr_setstate(dip, sysino, INTR_IDLE_STATE);
618 	}
619 
620 	if (ret != DDI_SUCCESS) {
621 		DBG(DBG_IB, dip, "px_ib_ino_add_intr: failed, "
622 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
623 
624 		return (ret);
625 	}
626 
627 	/* Link up px_ih_t */
628 	ih_p->ih_next = ipil_p->ipil_ih_head;
629 	ipil_p->ipil_ih_tail->ih_next = ih_p;
630 	ipil_p->ipil_ih_tail = ih_p;
631 
632 	ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
633 	ipil_p->ipil_ih_size++;
634 
635 	/* Re-enable interrupt */
636 	PX_INTR_ENABLE(dip, sysino, curr_cpu);
637 
638 	return (ret);
639 }
640 
641 /*
642  * Removes px_ih_t from the ino's link list.
643  * uses hardware mutex to lock out interrupt threads.
644  * Side effects: interrupt belongs to that ino is turned off on return.
645  * if we are sharing PX slot with other inos, the caller needs
646  * to turn it back on.
647  */
648 int
649 px_ib_ino_rem_intr(px_t *px_p, px_ino_pil_t *ipil_p, px_ih_t *ih_p)
650 {
651 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
652 	devino_t	ino = ino_p->ino_ino;
653 	sysino_t	sysino = ino_p->ino_sysino;
654 	dev_info_t	*dip = px_p->px_dip;
655 	px_ih_t		*ih_lst = ipil_p->ipil_ih_head;
656 	hrtime_t	start_time;
657 	intr_state_t	intr_state;
658 	int		i, ret = DDI_SUCCESS;
659 
660 	ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
661 
662 	DBG(DBG_IB, px_p->px_dip, "px_ib_ino_rem_intr ino=%x\n",
663 	    ino_p->ino_ino);
664 
665 	/* Disable the interrupt */
666 	PX_INTR_DISABLE(px_p->px_dip, sysino);
667 
668 	if (ipil_p->ipil_ih_size == 1) {
669 		if (ih_lst != ih_p)
670 			goto not_found;
671 
672 		/* No need to set head/tail as ino_p will be freed */
673 		goto reset;
674 	}
675 
676 	/* Busy wait on pending interrupt */
677 	for (start_time = gethrtime(); !panicstr &&
678 	    ((ret = px_lib_intr_getstate(dip, sysino, &intr_state))
679 	    == DDI_SUCCESS) && (intr_state == INTR_DELIVERED_STATE); /* */) {
680 		if (gethrtime() - start_time > px_intrpend_timeout) {
681 			cmn_err(CE_WARN, "%s%d: px_ib_ino_rem_intr: pending "
682 			    "sysino 0x%lx(ino 0x%x) timeout",
683 			    ddi_driver_name(dip), ddi_get_instance(dip),
684 			    sysino, ino);
685 
686 			ret = DDI_FAILURE;
687 			break;
688 		}
689 	}
690 
691 	/*
692 	 * If the interrupt was previously blocked (left in pending state)
693 	 * because of jabber we need to clear the pending state in case the
694 	 * jabber has gone away.
695 	 */
696 	if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max) {
697 		cmn_err(CE_WARN, "%s%d: px_ib_ino_rem_intr: "
698 		    "ino 0x%x has been unblocked",
699 		    ddi_driver_name(dip), ddi_get_instance(dip), ino);
700 
701 		ino_p->ino_unclaimed_intrs = 0;
702 		ret = px_lib_intr_setstate(dip, sysino, INTR_IDLE_STATE);
703 	}
704 
705 	if (ret != DDI_SUCCESS) {
706 		DBG(DBG_IB, dip, "px_ib_ino_rem_intr: failed, "
707 		    "ino 0x%x sysino 0x%x\n", ino, sysino);
708 
709 		return (ret);
710 	}
711 
712 	/* Search the link list for ih_p */
713 	for (i = 0; (i < ipil_p->ipil_ih_size) &&
714 	    (ih_lst->ih_next != ih_p); i++, ih_lst = ih_lst->ih_next)
715 		;
716 
717 	if (ih_lst->ih_next != ih_p)
718 		goto not_found;
719 
720 	/* Remove ih_p from the link list and maintain the head/tail */
721 	ih_lst->ih_next = ih_p->ih_next;
722 
723 	if (ipil_p->ipil_ih_head == ih_p)
724 		ipil_p->ipil_ih_head = ih_p->ih_next;
725 	if (ipil_p->ipil_ih_tail == ih_p)
726 		ipil_p->ipil_ih_tail = ih_lst;
727 
728 	ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
729 
730 reset:
731 	if (ih_p->ih_config_handle)
732 		pci_config_teardown(&ih_p->ih_config_handle);
733 	if (ih_p->ih_ksp != NULL)
734 		kstat_delete(ih_p->ih_ksp);
735 
736 	kmem_free(ih_p, sizeof (px_ih_t));
737 	ipil_p->ipil_ih_size--;
738 
739 	return (ret);
740 
741 not_found:
742 	DBG(DBG_R_INTX, ino_p->ino_ib_p->ib_px_p->px_dip,
743 	    "ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
744 
745 	return (DDI_FAILURE);
746 }
747 
748 px_ih_t *
749 px_ib_intr_locate_ih(px_ino_pil_t *ipil_p, dev_info_t *rdip,
750     uint32_t inum, msiq_rec_type_t rec_type, msgcode_t msg_code)
751 {
752 	px_ih_t	*ih_p = ipil_p->ipil_ih_head;
753 	int	i;
754 
755 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
756 		if ((ih_p->ih_dip == rdip) && (ih_p->ih_inum == inum) &&
757 		    (ih_p->ih_rec_type == rec_type) &&
758 		    (ih_p->ih_msg_code == msg_code))
759 			return (ih_p);
760 	}
761 
762 	return ((px_ih_t *)NULL);
763 }
764 
765 px_ih_t *
766 px_ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
767     uint_t (*int_handler)(caddr_t int_handler_arg1, caddr_t int_handler_arg2),
768     caddr_t int_handler_arg1, caddr_t int_handler_arg2,
769     msiq_rec_type_t rec_type, msgcode_t msg_code)
770 {
771 	px_ih_t	*ih_p;
772 
773 	ih_p = kmem_alloc(sizeof (px_ih_t), KM_SLEEP);
774 	ih_p->ih_dip = rdip;
775 	ih_p->ih_inum = inum;
776 	ih_p->ih_intr_state = PX_INTR_STATE_DISABLE;
777 	ih_p->ih_handler = int_handler;
778 	ih_p->ih_handler_arg1 = int_handler_arg1;
779 	ih_p->ih_handler_arg2 = int_handler_arg2;
780 	ih_p->ih_config_handle = NULL;
781 	ih_p->ih_rec_type = rec_type;
782 	ih_p->ih_msg_code = msg_code;
783 	ih_p->ih_nsec = 0;
784 	ih_p->ih_ticks = 0;
785 	ih_p->ih_ksp = NULL;
786 
787 	return (ih_p);
788 }
789 
790 int
791 px_ib_update_intr_state(px_t *px_p, dev_info_t *rdip,
792     uint_t inum, devino_t ino, uint_t pil,
793     uint_t new_intr_state, msiq_rec_type_t rec_type,
794     msgcode_t msg_code)
795 {
796 	px_ib_t		*ib_p = px_p->px_ib_p;
797 	px_ino_t	*ino_p;
798 	px_ino_pil_t	*ipil_p;
799 	px_ih_t		*ih_p;
800 	int		ret = DDI_FAILURE;
801 
802 	DBG(DBG_IB, px_p->px_dip, "px_ib_update_intr_state: %s%d "
803 	    "inum %x devino %x pil %x state %x\n", ddi_driver_name(rdip),
804 	    ddi_get_instance(rdip), inum, ino, pil, new_intr_state);
805 
806 	mutex_enter(&ib_p->ib_ino_lst_mutex);
807 
808 	ino_p = px_ib_locate_ino(ib_p, ino);
809 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, pil))) {
810 		if (ih_p = px_ib_intr_locate_ih(ipil_p, rdip, inum, rec_type,
811 		    msg_code)) {
812 			ih_p->ih_intr_state = new_intr_state;
813 			ret = DDI_SUCCESS;
814 		}
815 	}
816 
817 	mutex_exit(&ib_p->ib_ino_lst_mutex);
818 	return (ret);
819 }
820 
821 
822 static void
823 px_fill_in_intr_devs(pcitool_intr_dev_t *dev, char *driver_name,
824     char *path_name, int instance)
825 {
826 	(void) strncpy(dev->driver_name, driver_name, MAXMODCONFNAME-1);
827 	dev->driver_name[MAXMODCONFNAME] = '\0';
828 	(void) strncpy(dev->path, path_name, MAXPATHLEN-1);
829 	dev->dev_inst = instance;
830 }
831 
832 
833 /*
834  * Return the dips or number of dips associated with a given interrupt block.
835  * Size of dips array arg is passed in as dips_ret arg.
836  * Number of dips returned is returned in dips_ret arg.
837  * Array of dips gets returned in the dips argument.
838  * Function returns number of dips existing for the given interrupt block.
839  *
840  * Note: this function assumes an enabled/valid INO, which is why it returns
841  * the px node and (Internal) when it finds no other devices (and *devs_ret > 0)
842  */
843 uint8_t
844 pxtool_ib_get_ino_devs(
845     px_t *px_p, uint32_t ino, uint8_t *devs_ret, pcitool_intr_dev_t *devs)
846 {
847 	px_ib_t		*ib_p = px_p->px_ib_p;
848 	px_ino_t	*ino_p;
849 	px_ino_pil_t	*ipil_p;
850 	px_ih_t 	*ih_p;
851 	uint32_t 	num_devs = 0;
852 	char		pathname[MAXPATHLEN];
853 	int		i, j;
854 
855 	mutex_enter(&ib_p->ib_ino_lst_mutex);
856 	ino_p = px_ib_locate_ino(ib_p, ino);
857 	if (ino_p != NULL) {
858 		for (j = 0, ipil_p = ino_p->ino_ipil_p; ipil_p;
859 		    ipil_p = ipil_p->ipil_next_p) {
860 			num_devs += ipil_p->ipil_ih_size;
861 
862 			for (i = 0, ih_p = ipil_p->ipil_ih_head;
863 			    ((i < ipil_p->ipil_ih_size) && (i < *devs_ret));
864 			    i++, j++, ih_p = ih_p->ih_next) {
865 				(void) ddi_pathname(ih_p->ih_dip, pathname);
866 				px_fill_in_intr_devs(&devs[i],
867 				    (char *)ddi_driver_name(ih_p->ih_dip),
868 				    pathname, ddi_get_instance(ih_p->ih_dip));
869 			}
870 		}
871 
872 		*devs_ret = j;
873 	} else if (*devs_ret > 0) {
874 		(void) ddi_pathname(px_p->px_dip, pathname);
875 		strcat(pathname, " (Internal)");
876 		px_fill_in_intr_devs(&devs[0],
877 		    (char *)ddi_driver_name(px_p->px_dip),  pathname,
878 		    ddi_get_instance(px_p->px_dip));
879 		num_devs = *devs_ret = 1;
880 	}
881 
882 	mutex_exit(&ib_p->ib_ino_lst_mutex);
883 
884 	return (num_devs);
885 }
886 
887 
888 void
889 px_ib_log_new_cpu(px_ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id,
890     uint32_t ino)
891 {
892 	px_ino_t	*ino_p;
893 	px_ino_pil_t	*ipil_p;
894 	px_ih_t 	*ih_p;
895 	int		i;
896 
897 	mutex_enter(&ib_p->ib_ino_lst_mutex);
898 
899 	/* Log in OS data structures the new CPU. */
900 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {
901 
902 		/* Log in OS data structures the new CPU. */
903 		ino_p->ino_cpuid = new_cpu_id;
904 
905 		for (ipil_p = ino_p->ino_ipil_p; ipil_p;
906 		    ipil_p = ipil_p->ipil_next_p) {
907 			for (i = 0, ih_p = ipil_p->ipil_ih_head;
908 			    (i < ipil_p->ipil_ih_size);
909 			    i++, ih_p = ih_p->ih_next) {
910 				/*
911 				 * Account for any residual time
912 				 * to be logged for old cpu.
913 				 */
914 				px_ib_cpu_ticks_to_ih_nsec(ib_p,
915 				    ih_p, old_cpu_id);
916 			}
917 		}
918 	}
919 
920 	mutex_exit(&ib_p->ib_ino_lst_mutex);
921 }
922