xref: /titanic_41/usr/src/uts/sun4u/io/pci/pci_ib.c (revision 28cdc3d776761766afeb198769d1b70ed7e0f2e1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PCI Interrupt Block (RISCx) implementation
31  *	initialization
32  *	interrupt enable/disable/clear and mapping register manipulation
33  */
34 
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/systm.h>		/* panicstr */
39 #include <sys/spl.h>
40 #include <sys/sunddi.h>
41 #include <sys/machsystm.h>	/* intr_dist_add */
42 #include <sys/ddi_impldefs.h>
43 #include <sys/clock.h>
44 #include <sys/cpuvar.h>
45 #include <sys/pci/pci_obj.h>
46 
47 #ifdef _STARFIRE
48 #include <sys/starfire.h>
49 #endif /* _STARFIRE */
50 
51 /*LINTLIBRARY*/
52 static uint_t ib_intr_reset(void *arg);
53 
54 void
55 ib_create(pci_t *pci_p)
56 {
57 	dev_info_t *dip = pci_p->pci_dip;
58 	ib_t *ib_p;
59 	uintptr_t a;
60 	int i;
61 
62 	/*
63 	 * Allocate interrupt block state structure and link it to
64 	 * the pci state structure.
65 	 */
66 	ib_p = kmem_zalloc(sizeof (ib_t), KM_SLEEP);
67 	pci_p->pci_ib_p = ib_p;
68 	ib_p->ib_pci_p = pci_p;
69 
70 	a = pci_ib_setup(ib_p);
71 
72 	/*
73 	 * Determine virtual addresses of interrupt mapping, clear and diag
74 	 * registers that have common offsets.
75 	 */
76 	ib_p->ib_slot_clear_intr_regs =
77 		a + COMMON_IB_SLOT_CLEAR_INTR_REG_OFFSET;
78 	ib_p->ib_intr_retry_timer_reg =
79 		(uint64_t *)(a + COMMON_IB_INTR_RETRY_TIMER_OFFSET);
80 	ib_p->ib_slot_intr_state_diag_reg =
81 		(uint64_t *)(a + COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
82 	ib_p->ib_obio_intr_state_diag_reg =
83 		(uint64_t *)(a + COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
84 
85 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
86 		ib_p->ib_upa_imr[0] = (volatile uint64_t *)
87 				(a + COMMON_IB_UPA0_INTR_MAP_REG_OFFSET);
88 		ib_p->ib_upa_imr[1] = (volatile uint64_t *)
89 				(a + COMMON_IB_UPA1_INTR_MAP_REG_OFFSET);
90 	}
91 
92 	DEBUG2(DBG_ATTACH, dip, "ib_create: slot_imr=%x, slot_cir=%x\n",
93 		ib_p->ib_slot_intr_map_regs, ib_p->ib_obio_intr_map_regs);
94 	DEBUG2(DBG_ATTACH, dip, "ib_create: obio_imr=%x, obio_cir=%x\n",
95 		ib_p->ib_slot_clear_intr_regs, ib_p->ib_obio_clear_intr_regs);
96 	DEBUG2(DBG_ATTACH, dip, "ib_create: upa0_imr=%x, upa1_imr=%x\n",
97 		ib_p->ib_upa_imr[0], ib_p->ib_upa_imr[1]);
98 	DEBUG3(DBG_ATTACH, dip,
99 		"ib_create: retry_timer=%x, obio_diag=%x slot_diag=%x\n",
100 		ib_p->ib_intr_retry_timer_reg,
101 		ib_p->ib_obio_intr_state_diag_reg,
102 		ib_p->ib_slot_intr_state_diag_reg);
103 
104 	ib_p->ib_ino_lst = (ib_ino_info_t *)NULL;
105 	mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
106 	mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
107 
108 	DEBUG1(DBG_ATTACH, dip, "ib_create: numproxy=%x\n",
109 		pci_p->pci_numproxy);
110 	for (i = 1; i <= pci_p->pci_numproxy; i++) {
111 		set_intr_mapping_reg(pci_p->pci_id,
112 			(uint64_t *)ib_p->ib_upa_imr[i - 1], i);
113 	}
114 
115 	ib_configure(ib_p);
116 	bus_func_register(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
117 }
118 
119 void
120 ib_destroy(pci_t *pci_p)
121 {
122 	ib_t *ib_p = pci_p->pci_ib_p;
123 	dev_info_t *dip = pci_p->pci_dip;
124 
125 	DEBUG0(DBG_IB, dip, "ib_destroy\n");
126 	bus_func_unregister(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
127 
128 	intr_dist_rem_weighted(ib_intr_dist_all, ib_p);
129 	mutex_destroy(&ib_p->ib_ino_lst_mutex);
130 	mutex_destroy(&ib_p->ib_intr_lock);
131 
132 	ib_free_ino_all(ib_p);
133 
134 	kmem_free(ib_p, sizeof (ib_t));
135 	pci_p->pci_ib_p = NULL;
136 }
137 
138 void
139 ib_configure(ib_t *ib_p)
140 {
141 	/* XXX could be different between psycho and schizo */
142 	*ib_p->ib_intr_retry_timer_reg = pci_intr_retry_intv;
143 }
144 
145 /*
146  * can only used for psycho internal interrupts thermal, power,
147  * ue, ce, pbm
148  */
149 void
150 ib_intr_enable(pci_t *pci_p, ib_ino_t ino)
151 {
152 	ib_t *ib_p = pci_p->pci_ib_p;
153 	ib_mondo_t mondo = IB_INO_TO_MONDO(ib_p, ino);
154 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
155 	uint_t cpu_id;
156 
157 	/*
158 	 * Determine the cpu for the interrupt.
159 	 */
160 	mutex_enter(&ib_p->ib_intr_lock);
161 	cpu_id = intr_dist_cpuid();
162 #ifdef _STARFIRE
163 	cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie, cpu_id,
164 		IB_GET_MAPREG_INO(ino));
165 #endif /* _STARFIRE */
166 	DEBUG2(DBG_IB, pci_p->pci_dip,
167 		"ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
168 
169 	*imr_p = ib_get_map_reg(mondo, cpu_id);
170 	IB_INO_INTR_CLEAR(ib_clear_intr_reg_addr(ib_p, ino));
171 	mutex_exit(&ib_p->ib_intr_lock);
172 }
173 
174 /*
175  * Disable the interrupt via its interrupt mapping register.
176  * Can only be used for internal interrupts: thermal, power, ue, ce, pbm.
177  * If called under interrupt context, wait should be set to 0
178  */
179 void
180 ib_intr_disable(ib_t *ib_p, ib_ino_t ino, int wait)
181 {
182 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
183 	volatile uint64_t *state_reg_p = IB_INO_INTR_STATE_REG(ib_p, ino);
184 	hrtime_t start_time;
185 
186 	/* disable the interrupt */
187 	mutex_enter(&ib_p->ib_intr_lock);
188 	IB_INO_INTR_OFF(imr_p);
189 	*imr_p;	/* flush previous write */
190 	mutex_exit(&ib_p->ib_intr_lock);
191 
192 	if (!wait)
193 		goto wait_done;
194 
195 	start_time = gethrtime();
196 	/* busy wait if there is interrupt being processed */
197 	while (IB_INO_INTR_PENDING(state_reg_p, ino) && !panicstr) {
198 		if (gethrtime() - start_time > pci_intrpend_timeout) {
199 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
200 			cmn_err(CE_WARN, "%s:%s: ib_intr_disable timeout %x",
201 				pbm_p->pbm_nameinst_str,
202 				pbm_p->pbm_nameaddr_str, ino);
203 				break;
204 		}
205 	}
206 wait_done:
207 	IB_INO_INTR_PEND(ib_clear_intr_reg_addr(ib_p, ino));
208 #ifdef _STARFIRE
209 	pc_ittrans_cleanup(IB2CB(ib_p)->cb_ittrans_cookie,
210 	    (volatile uint64_t *)(uintptr_t)ino);
211 #endif /* _STARFIRE */
212 }
213 
214 /* can only used for psycho internal interrupts thermal, power, ue, ce, pbm */
215 void
216 ib_nintr_clear(ib_t *ib_p, ib_ino_t ino)
217 {
218 	uint64_t *clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
219 	IB_INO_INTR_CLEAR(clr_reg);
220 }
221 
222 /*
223  * distribute PBM and UPA interrupts. ino is set to 0 by caller if we
224  * are dealing with UPA interrupts (without inos).
225  */
226 void
227 ib_intr_dist_nintr(ib_t *ib_p, ib_ino_t ino, volatile uint64_t *imr_p)
228 {
229 	volatile uint64_t imr = *imr_p;
230 	uint32_t cpu_id;
231 
232 	if (!IB_INO_INTR_ISON(imr))
233 		return;
234 
235 	cpu_id = intr_dist_cpuid();
236 
237 #ifdef _STARFIRE
238 	if (ino) {
239 		cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie,
240 			cpu_id, IB_GET_MAPREG_INO(ino));
241 	}
242 #else /* _STARFIRE */
243 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id)
244 		return;
245 #endif /* _STARFIRE */
246 
247 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
248 	imr = *imr_p;	/* flush previous write */
249 }
250 
251 /*
252  * Converts into nsec, ticks logged with a given CPU.  Adds nsec to ih.
253  */
254 /*ARGSUSED*/
255 void
256 ib_cpu_ticks_to_ih_nsec(ib_t *ib_p, ih_t *ih_p, uint32_t cpu_id)
257 {
258 	extern kmutex_t pciintr_ks_template_lock;
259 	hrtime_t ticks;
260 
261 	/*
262 	 * Because we are updating two fields in ih_t we must lock
263 	 * pciintr_ks_template_lock to prevent someone from reading the
264 	 * kstats after we set ih_ticks to 0 and before we increment
265 	 * ih_nsec to compensate.
266 	 *
267 	 * We must also protect against the interrupt arriving and incrementing
268 	 * ih_ticks between the time we read it and when we reset it to 0.
269 	 * To do this we use atomic_swap.
270 	 */
271 
272 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
273 
274 	mutex_enter(&pciintr_ks_template_lock);
275 	ticks = atomic_swap_64(&ih_p->ih_ticks, 0);
276 	ih_p->ih_nsec += (uint64_t)tick2ns(ticks, cpu_id);
277 	mutex_exit(&pciintr_ks_template_lock);
278 }
279 
280 static void
281 ib_intr_dist(ib_t *ib_p, ib_ino_info_t *ino_p)
282 {
283 	uint32_t cpu_id = ino_p->ino_cpuid;
284 	ib_ino_t ino = ino_p->ino_ino;
285 	volatile uint64_t imr, *imr_p, *state_reg;
286 	hrtime_t start_time;
287 
288 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
289 	imr_p = ib_intr_map_reg_addr(ib_p, ino);
290 	state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
291 
292 #ifdef _STARFIRE
293 	/*
294 	 * For Starfire it is a pain to check the current target for
295 	 * the mondo since we have to read the PC asics ITTR slot
296 	 * assigned to this mondo. It will be much easier to assume
297 	 * the current target is always different and do the target
298 	 * reprogram all the time.
299 	 */
300 	cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie, cpu_id,
301 		IB_GET_MAPREG_INO(ino));
302 #else
303 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id) /* same cpu, no reprog */
304 		return;
305 #endif /* _STARFIRE */
306 
307 	/* disable interrupt, this could disrupt devices sharing our slot */
308 	IB_INO_INTR_OFF(imr_p);
309 	imr = *imr_p;	/* flush previous write */
310 
311 	/* busy wait if there is interrupt being processed */
312 	start_time = gethrtime();
313 	while (IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
314 		if (gethrtime() - start_time > pci_intrpend_timeout) {
315 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
316 			cmn_err(CE_WARN, "%s:%s: ib_intr_dist(%p,%x) timeout",
317 				pbm_p->pbm_nameinst_str,
318 				pbm_p->pbm_nameaddr_str,
319 				imr_p, IB_INO_TO_MONDO(ib_p, ino));
320 			break;
321 		}
322 	}
323 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
324 	imr = *imr_p;	/* flush previous write */
325 }
326 
327 /*
328  * Redistribute interrupts of the specified weight. The first call has a weight
329  * of weight_max, which can be used to trigger initialization for
330  * redistribution. The inos with weight [weight_max, inf.) should be processed
331  * on the "weight == weight_max" call.  This first call is followed by calls
332  * of decreasing weights, inos of that weight should be processed.  The final
333  * call specifies a weight of zero, this can be used to trigger processing of
334  * stragglers.
335  */
336 void
337 ib_intr_dist_all(void *arg, int32_t weight_max, int32_t weight)
338 {
339 	ib_t *ib_p = (ib_t *)arg;
340 	pci_t *pci_p = ib_p->ib_pci_p;
341 	ib_ino_info_t *ino_p;
342 	ih_t *ih_lst;
343 	int32_t dweight;
344 	int i;
345 
346 	if (weight == 0) {
347 		mutex_enter(&ib_p->ib_intr_lock);
348 		if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
349 			for (i = 0; i < 2; i++)
350 				ib_intr_dist_nintr(ib_p, 0,
351 				    ib_p->ib_upa_imr[i]);
352 		}
353 		mutex_exit(&ib_p->ib_intr_lock);
354 	}
355 
356 	mutex_enter(&ib_p->ib_ino_lst_mutex);
357 
358 	/* Perform special processing for first call of a redistribution. */
359 	if (weight == weight_max) {
360 		for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next) {
361 
362 			/*
363 			 * Clear ino_established of each ino on first call.
364 			 * The ino_established field may be used by a pci
365 			 * nexus driver's pci_intr_dist_cpuid implementation
366 			 * when detection of established pci slot-cpu binding
367 			 * for multi function pci cards.
368 			 */
369 			ino_p->ino_established = 0;
370 
371 			/*
372 			 * recompute the ino_intr_weight based on the device
373 			 * weight of all devinfo nodes sharing the ino (this
374 			 * will allow us to pick up new weights established by
375 			 * i_ddi_set_intr_weight()).
376 			 */
377 			ino_p->ino_intr_weight = 0;
378 			for (i = 0, ih_lst = ino_p->ino_ih_head;
379 			    i < ino_p->ino_ih_size;
380 			    i++, ih_lst = ih_lst->ih_next) {
381 				dweight = i_ddi_get_intr_weight(ih_lst->ih_dip);
382 				if (dweight > 0)
383 					ino_p->ino_intr_weight += dweight;
384 			}
385 		}
386 	}
387 
388 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next) {
389 		uint32_t orig_cpuid;
390 
391 		/*
392 		 * Get the weight of the ino and determine if we are going to
393 		 * process call.  We wait until an ib_intr_dist_all call of
394 		 * the proper weight occurs to support redistribution of all
395 		 * heavy weighted interrupts first (across all nexus driver
396 		 * instances).  This is done to ensure optimal
397 		 * INTR_WEIGHTED_DIST behavior.
398 		 */
399 		if ((weight == ino_p->ino_intr_weight) ||
400 		    ((weight >= weight_max) &&
401 		    (ino_p->ino_intr_weight >= weight_max))) {
402 			/* select cpuid to target and mark ino established */
403 			orig_cpuid = ino_p->ino_cpuid;
404 			if (cpu[orig_cpuid] == NULL)
405 				orig_cpuid = CPU->cpu_id;
406 			ino_p->ino_cpuid = pci_intr_dist_cpuid(ib_p, ino_p);
407 			ino_p->ino_established = 1;
408 
409 			/* Add device weight of ino devinfos to targeted cpu. */
410 			for (i = 0, ih_lst = ino_p->ino_ih_head;
411 			    i < ino_p->ino_ih_size;
412 			    i++, ih_lst = ih_lst->ih_next) {
413 
414 				dweight = i_ddi_get_intr_weight(ih_lst->ih_dip);
415 				intr_dist_cpuid_add_device_weight(
416 				    ino_p->ino_cpuid, ih_lst->ih_dip, dweight);
417 
418 				/*
419 				 * different cpus may have different clock
420 				 * speeds. to account for this, whenever an
421 				 * interrupt is moved to a new CPU, we
422 				 * convert the accumulated ticks into nsec,
423 				 * based upon the clock rate of the prior
424 				 * CPU.
425 				 *
426 				 * It is possible that the prior CPU no longer
427 				 * exists. In this case, fall back to using
428 				 * this CPU's clock rate.
429 				 *
430 				 * Note that the value in ih_ticks has already
431 				 * been corrected for any power savings mode
432 				 * which might have been in effect.
433 				 */
434 
435 				ib_cpu_ticks_to_ih_nsec(ib_p, ih_lst,
436 				    orig_cpuid);
437 			}
438 
439 			/* program the hardware */
440 			ib_intr_dist(ib_p, ino_p);
441 		}
442 	}
443 	mutex_exit(&ib_p->ib_ino_lst_mutex);
444 }
445 
446 /*
447  * Reset interrupts to IDLE.  This function is called during
448  * panic handling after redistributing interrupts; it's needed to
449  * support dumping to network devices after 'sync' from OBP.
450  *
451  * N.B.  This routine runs in a context where all other threads
452  * are permanently suspended.
453  */
454 static uint_t
455 ib_intr_reset(void *arg)
456 {
457 	ib_t *ib_p = (ib_t *)arg;
458 	ib_ino_t ino;
459 	uint64_t *clr_reg;
460 
461 	/*
462 	 * Note that we only actually care about interrupts that are
463 	 * potentially from network devices.
464 	 */
465 	for (ino = 0; ino <= ib_p->ib_max_ino; ino++) {
466 		clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
467 		IB_INO_INTR_CLEAR(clr_reg);
468 	}
469 
470 	return (BF_NONE);
471 }
472 
473 void
474 ib_suspend(ib_t *ib_p)
475 {
476 	ib_ino_info_t *ip;
477 	pci_t *pci_p = ib_p->ib_pci_p;
478 
479 	/* save ino_lst interrupts' mapping registers content */
480 	mutex_enter(&ib_p->ib_ino_lst_mutex);
481 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next)
482 		ip->ino_map_reg_save = *ip->ino_map_reg;
483 	mutex_exit(&ib_p->ib_ino_lst_mutex);
484 
485 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
486 		ib_p->ib_upa_imr_state[0] = *ib_p->ib_upa_imr[0];
487 		ib_p->ib_upa_imr_state[1] = *ib_p->ib_upa_imr[1];
488 	}
489 }
490 
491 void
492 ib_resume(ib_t *ib_p)
493 {
494 	ib_ino_info_t *ip;
495 	pci_t *pci_p = ib_p->ib_pci_p;
496 
497 	/* restore ino_lst interrupts' mapping registers content */
498 	mutex_enter(&ib_p->ib_ino_lst_mutex);
499 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next) {
500 		IB_INO_INTR_CLEAR(ip->ino_clr_reg);	 /* set intr to idle */
501 		*ip->ino_map_reg = ip->ino_map_reg_save; /* restore IMR */
502 	}
503 	mutex_exit(&ib_p->ib_ino_lst_mutex);
504 
505 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
506 		*ib_p->ib_upa_imr[0] = ib_p->ib_upa_imr_state[0];
507 		*ib_p->ib_upa_imr[1] = ib_p->ib_upa_imr_state[1];
508 	}
509 }
510 
511 /*
512  * locate ino_info structure on ib_p->ib_ino_lst according to ino#
513  * returns NULL if not found.
514  */
515 ib_ino_info_t *
516 ib_locate_ino(ib_t *ib_p, ib_ino_t ino_num)
517 {
518 	ib_ino_info_t *ino_p = ib_p->ib_ino_lst;
519 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
520 
521 	for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next);
522 	return (ino_p);
523 }
524 
525 #define	IB_INO_TO_SLOT(ino) (IB_IS_OBIO_INO(ino) ? 0xff : ((ino) & 0x1f) >> 2)
526 
527 ib_ino_info_t *
528 ib_new_ino(ib_t *ib_p, ib_ino_t ino_num, ih_t *ih_p)
529 {
530 	ib_ino_info_t *ino_p = kmem_alloc(sizeof (ib_ino_info_t), KM_SLEEP);
531 	ino_p->ino_ino = ino_num;
532 	ino_p->ino_slot_no = IB_INO_TO_SLOT(ino_num);
533 	ino_p->ino_ib_p = ib_p;
534 	ino_p->ino_clr_reg = ib_clear_intr_reg_addr(ib_p, ino_num);
535 	ino_p->ino_map_reg = ib_intr_map_reg_addr(ib_p, ino_num);
536 	ino_p->ino_unclaimed = 0;
537 
538 	/*
539 	 * cannot disable interrupt since we might share slot
540 	 * IB_INO_INTR_OFF(ino_p->ino_map_reg);
541 	 */
542 
543 	ih_p->ih_next = ih_p;
544 	ino_p->ino_ih_head = ih_p;
545 	ino_p->ino_ih_tail = ih_p;
546 	ino_p->ino_ih_start = ih_p;
547 	ino_p->ino_ih_size = 1;
548 
549 	ino_p->ino_next = ib_p->ib_ino_lst;
550 	ib_p->ib_ino_lst = ino_p;
551 	return (ino_p);
552 }
553 
554 /* the ino_p is retrieved by previous call to ib_locate_ino() */
555 void
556 ib_delete_ino(ib_t *ib_p, ib_ino_info_t *ino_p)
557 {
558 	ib_ino_info_t *list = ib_p->ib_ino_lst;
559 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
560 	if (list == ino_p)
561 		ib_p->ib_ino_lst = list->ino_next;
562 	else {
563 		for (; list->ino_next != ino_p; list = list->ino_next);
564 		list->ino_next = ino_p->ino_next;
565 	}
566 }
567 
568 /* free all ino when we are detaching */
569 void
570 ib_free_ino_all(ib_t *ib_p)
571 {
572 	ib_ino_info_t *tmp = ib_p->ib_ino_lst;
573 	ib_ino_info_t *next = NULL;
574 	while (tmp) {
575 		next = tmp->ino_next;
576 		kmem_free(tmp, sizeof (ib_ino_info_t));
577 		tmp = next;
578 	}
579 }
580 
581 void
582 ib_ino_add_intr(pci_t *pci_p, ib_ino_info_t *ino_p, ih_t *ih_p)
583 {
584 	ib_ino_t ino = ino_p->ino_ino;
585 	ib_t *ib_p = ino_p->ino_ib_p;
586 	volatile uint64_t *state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
587 	hrtime_t start_time;
588 
589 	ASSERT(ib_p == pci_p->pci_ib_p);
590 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
591 
592 	/* disable interrupt, this could disrupt devices sharing our slot */
593 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
594 	*ino_p->ino_map_reg;
595 
596 	/* do NOT modify the link list until after the busy wait */
597 
598 	/*
599 	 * busy wait if there is interrupt being processed.
600 	 * either the pending state will be cleared by the interrupt wrapper
601 	 * or the interrupt will be marked as blocked indicating that it was
602 	 * jabbering.
603 	 */
604 	start_time = gethrtime();
605 	while ((ino_p->ino_unclaimed <= pci_unclaimed_intr_max) &&
606 		IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
607 		if (gethrtime() - start_time > pci_intrpend_timeout) {
608 			pbm_t *pbm_p = pci_p->pci_pbm_p;
609 			cmn_err(CE_WARN, "%s:%s: ib_ino_add_intr %x timeout",
610 				pbm_p->pbm_nameinst_str,
611 				pbm_p->pbm_nameaddr_str, ino);
612 			break;
613 		}
614 	}
615 
616 	/* link up pci_ispec_t portion of the ppd */
617 	ih_p->ih_next = ino_p->ino_ih_head;
618 	ino_p->ino_ih_tail->ih_next = ih_p;
619 	ino_p->ino_ih_tail = ih_p;
620 
621 	ino_p->ino_ih_start = ino_p->ino_ih_head;
622 	ino_p->ino_ih_size++;
623 
624 	/*
625 	 * if the interrupt was previously blocked (left in pending state)
626 	 * because of jabber we need to clear the pending state in case the
627 	 * jabber has gone away.
628 	 */
629 	if (ino_p->ino_unclaimed > pci_unclaimed_intr_max) {
630 		cmn_err(CE_WARN,
631 		    "%s%d: ib_ino_add_intr: ino 0x%x has been unblocked",
632 		    ddi_driver_name(pci_p->pci_dip),
633 		    ddi_get_instance(pci_p->pci_dip),
634 		    ino_p->ino_ino);
635 		ino_p->ino_unclaimed = 0;
636 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
637 	}
638 
639 	/* re-enable interrupt */
640 	IB_INO_INTR_ON(ino_p->ino_map_reg);
641 	*ino_p->ino_map_reg;
642 }
643 
644 /*
645  * removes pci_ispec_t from the ino's link list.
646  * uses hardware mutex to lock out interrupt threads.
647  * Side effects: interrupt belongs to that ino is turned off on return.
648  * if we are sharing PCI slot with other inos, the caller needs
649  * to turn it back on.
650  */
651 void
652 ib_ino_rem_intr(pci_t *pci_p, ib_ino_info_t *ino_p, ih_t *ih_p)
653 {
654 	int i;
655 	ib_ino_t ino = ino_p->ino_ino;
656 	ih_t *ih_lst = ino_p->ino_ih_head;
657 	volatile uint64_t *state_reg =
658 		IB_INO_INTR_STATE_REG(ino_p->ino_ib_p, ino);
659 	hrtime_t start_time;
660 
661 	ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
662 	/* disable interrupt, this could disrupt devices sharing our slot */
663 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
664 	*ino_p->ino_map_reg;
665 
666 	/* do NOT modify the link list until after the busy wait */
667 
668 	/*
669 	 * busy wait if there is interrupt being processed.
670 	 * either the pending state will be cleared by the interrupt wrapper
671 	 * or the interrupt will be marked as blocked indicating that it was
672 	 * jabbering.
673 	 */
674 	start_time = gethrtime();
675 	while ((ino_p->ino_unclaimed <= pci_unclaimed_intr_max) &&
676 		IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
677 		if (gethrtime() - start_time > pci_intrpend_timeout) {
678 			pbm_t *pbm_p = pci_p->pci_pbm_p;
679 			cmn_err(CE_WARN, "%s:%s: ib_ino_rem_intr %x timeout",
680 				pbm_p->pbm_nameinst_str,
681 				pbm_p->pbm_nameaddr_str, ino);
682 			break;
683 		}
684 	}
685 
686 	if (ino_p->ino_ih_size == 1) {
687 		if (ih_lst != ih_p)
688 			goto not_found;
689 		/* no need to set head/tail as ino_p will be freed */
690 		goto reset;
691 	}
692 
693 	/*
694 	 * if the interrupt was previously blocked (left in pending state)
695 	 * because of jabber we need to clear the pending state in case the
696 	 * jabber has gone away.
697 	 */
698 	if (ino_p->ino_unclaimed > pci_unclaimed_intr_max) {
699 		cmn_err(CE_WARN,
700 		    "%s%d: ib_ino_rem_intr: ino 0x%x has been unblocked",
701 		    ddi_driver_name(pci_p->pci_dip),
702 		    ddi_get_instance(pci_p->pci_dip),
703 		    ino_p->ino_ino);
704 		ino_p->ino_unclaimed = 0;
705 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
706 	}
707 
708 	/* search the link list for ih_p */
709 	for (i = 0;
710 		(i < ino_p->ino_ih_size) && (ih_lst->ih_next != ih_p);
711 		i++, ih_lst = ih_lst->ih_next);
712 	if (ih_lst->ih_next != ih_p)
713 		goto not_found;
714 
715 	/* remove ih_p from the link list and maintain the head/tail */
716 	ih_lst->ih_next = ih_p->ih_next;
717 	if (ino_p->ino_ih_head == ih_p)
718 		ino_p->ino_ih_head = ih_p->ih_next;
719 	if (ino_p->ino_ih_tail == ih_p)
720 		ino_p->ino_ih_tail = ih_lst;
721 	ino_p->ino_ih_start = ino_p->ino_ih_head;
722 reset:
723 	if (ih_p->ih_config_handle)
724 		pci_config_teardown(&ih_p->ih_config_handle);
725 	if (ih_p->ih_ksp != NULL)
726 		kstat_delete(ih_p->ih_ksp);
727 	kmem_free(ih_p, sizeof (ih_t));
728 	ino_p->ino_ih_size--;
729 
730 	return;
731 not_found:
732 	DEBUG2(DBG_R_INTX, ino_p->ino_ib_p->ib_pci_p->pci_dip,
733 		"ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
734 }
735 
736 ih_t *
737 ib_ino_locate_intr(ib_ino_info_t *ino_p, dev_info_t *rdip, uint32_t inum)
738 {
739 	ih_t *ih_lst = ino_p->ino_ih_head;
740 	int i;
741 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_lst = ih_lst->ih_next) {
742 		if (ih_lst->ih_dip == rdip &&
743 		    ih_lst->ih_inum == inum)
744 			return (ih_lst);
745 	}
746 	return ((ih_t *)NULL);
747 }
748 
749 ih_t *
750 ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
751 	uint_t (*int_handler)(caddr_t int_handler_arg1,
752 	caddr_t int_handler_arg2),
753 	caddr_t int_handler_arg1,
754 	caddr_t int_handler_arg2)
755 {
756 	ih_t *ih_p;
757 
758 	ih_p = kmem_alloc(sizeof (ih_t), KM_SLEEP);
759 	ih_p->ih_dip = rdip;
760 	ih_p->ih_inum = inum;
761 	ih_p->ih_intr_state = PCI_INTR_STATE_DISABLE;
762 	ih_p->ih_handler = int_handler;
763 	ih_p->ih_handler_arg1 = int_handler_arg1;
764 	ih_p->ih_handler_arg2 = int_handler_arg2;
765 	ih_p->ih_config_handle = NULL;
766 	ih_p->ih_nsec = 0;
767 	ih_p->ih_ticks = 0;
768 	ih_p->ih_ksp = NULL;
769 
770 	return (ih_p);
771 }
772 
773 int
774 ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip,
775 	ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state)
776 {
777 	ib_t		*ib_p = pci_p->pci_ib_p;
778 	ib_ino_info_t	*ino_p;
779 	ib_mondo_t	mondo;
780 	ih_t		*ih_p;
781 	int		ret = DDI_FAILURE;
782 
783 	/*
784 	 * For PULSE interrupts, pci driver don't allocate
785 	 * ib_ino_info_t and ih_t data structures and also,
786 	 * not maintains any interrupt state information.
787 	 * So, just return success from here.
788 	 */
789 	if (hdlp->ih_vector & PCI_PULSE_INO) {
790 		DEBUG0(DBG_IB, ib_p->ib_pci_p->pci_dip,
791 		    "ib_update_intr_state: PULSE interrupt, return success\n");
792 
793 		return (DDI_SUCCESS);
794 	}
795 
796 	mutex_enter(&ib_p->ib_ino_lst_mutex);
797 
798 	if ((mondo = pci_xlate_intr(pci_p->pci_dip, rdip, pci_p->pci_ib_p,
799 	    IB_MONDO_TO_INO(hdlp->ih_vector))) == 0) {
800 		mutex_exit(&ib_p->ib_ino_lst_mutex);
801 		return (ret);
802 	}
803 
804 	if (ino_p = ib_locate_ino(ib_p, IB_MONDO_TO_INO(mondo))) {
805 		if (ih_p = ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum)) {
806 			ih_p->ih_intr_state = new_intr_state;
807 			ret = DDI_SUCCESS;
808 		}
809 	}
810 
811 	mutex_exit(&ib_p->ib_ino_lst_mutex);
812 	return (ret);
813 }
814 
815 /*
816  * Return the dips or number of dips associated with a given interrupt block.
817  * Size of dips array arg is passed in as dips_ret arg.
818  * Number of dips returned is returned in dips_ret arg.
819  * Array of dips gets returned in the dips argument.
820  * Function returns number of dips existing for the given interrupt block.
821  *
822  */
823 uint8_t
824 ib_get_ino_devs(
825 	ib_t *ib_p, uint32_t ino, uint8_t *devs_ret, pcitool_intr_dev_t *devs)
826 {
827 	ib_ino_info_t *ino_p;
828 	ih_t *ih_p;
829 	uint32_t num_devs = 0;
830 	int i;
831 
832 	mutex_enter(&ib_p->ib_ino_lst_mutex);
833 	ino_p = ib_locate_ino(ib_p, ino);
834 	if (ino_p != NULL) {
835 		num_devs = ino_p->ino_ih_size;
836 		for (i = 0, ih_p = ino_p->ino_ih_head;
837 		    ((i < ino_p->ino_ih_size) && (i < *devs_ret));
838 		    i++, ih_p = ih_p->ih_next) {
839 			(void) strncpy(devs[i].driver_name,
840 			    ddi_driver_name(ih_p->ih_dip), MAXMODCONFNAME-1);
841 			devs[i].driver_name[MAXMODCONFNAME] = '\0';
842 			(void) ddi_pathname(ih_p->ih_dip, devs[i].path);
843 			devs[i].dev_inst = ddi_get_instance(ih_p->ih_dip);
844 		}
845 		*devs_ret = i;
846 	}
847 
848 	mutex_exit(&ib_p->ib_ino_lst_mutex);
849 
850 	return (num_devs);
851 }
852 
853 void ib_log_new_cpu(ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id,
854 	uint32_t ino)
855 {
856 	ib_ino_info_t *ino_p;
857 
858 	mutex_enter(&ib_p->ib_ino_lst_mutex);
859 
860 	/* Log in OS data structures the new CPU. */
861 	ino_p = ib_locate_ino(ib_p, ino);
862 	if (ino_p != NULL) {
863 
864 		/* Log in OS data structures the new CPU. */
865 		ino_p->ino_cpuid = new_cpu_id;
866 
867 		/* Account for any residual time to be logged for old cpu. */
868 		ib_cpu_ticks_to_ih_nsec(ib_p, ino_p->ino_ih_head, old_cpu_id);
869 	}
870 
871 	mutex_exit(&ib_p->ib_ino_lst_mutex);
872 }
873