xref: /titanic_41/usr/src/uts/sun4u/io/pci/pci_ib.c (revision 8e50dcc9f00b393d43e6aa42b820bcbf1d3e1ce4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PCI Interrupt Block (RISCx) implementation
30  *	initialization
31  *	interrupt enable/disable/clear and mapping register manipulation
32  */
33 
34 #include <sys/types.h>
35 #include <sys/kmem.h>
36 #include <sys/async.h>
37 #include <sys/systm.h>		/* panicstr */
38 #include <sys/spl.h>
39 #include <sys/sunddi.h>
40 #include <sys/machsystm.h>	/* intr_dist_add */
41 #include <sys/ddi_impldefs.h>
42 #include <sys/clock.h>
43 #include <sys/cpuvar.h>
44 #include <sys/pci/pci_obj.h>
45 
46 #ifdef _STARFIRE
47 #include <sys/starfire.h>
48 #endif /* _STARFIRE */
49 
50 /*LINTLIBRARY*/
51 static uint_t ib_intr_reset(void *arg);
52 
53 void
54 ib_create(pci_t *pci_p)
55 {
56 	dev_info_t *dip = pci_p->pci_dip;
57 	ib_t *ib_p;
58 	uintptr_t a;
59 	int i;
60 
61 	/*
62 	 * Allocate interrupt block state structure and link it to
63 	 * the pci state structure.
64 	 */
65 	ib_p = kmem_zalloc(sizeof (ib_t), KM_SLEEP);
66 	pci_p->pci_ib_p = ib_p;
67 	ib_p->ib_pci_p = pci_p;
68 
69 	a = pci_ib_setup(ib_p);
70 
71 	/*
72 	 * Determine virtual addresses of interrupt mapping, clear and diag
73 	 * registers that have common offsets.
74 	 */
75 	ib_p->ib_slot_clear_intr_regs =
76 	    a + COMMON_IB_SLOT_CLEAR_INTR_REG_OFFSET;
77 	ib_p->ib_intr_retry_timer_reg =
78 	    (uint64_t *)(a + COMMON_IB_INTR_RETRY_TIMER_OFFSET);
79 	ib_p->ib_slot_intr_state_diag_reg =
80 	    (uint64_t *)(a + COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
81 	ib_p->ib_obio_intr_state_diag_reg =
82 	    (uint64_t *)(a + COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
83 
84 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
85 		ib_p->ib_upa_imr[0] = (volatile uint64_t *)
86 		    (a + COMMON_IB_UPA0_INTR_MAP_REG_OFFSET);
87 		ib_p->ib_upa_imr[1] = (volatile uint64_t *)
88 		    (a + COMMON_IB_UPA1_INTR_MAP_REG_OFFSET);
89 	}
90 
91 	DEBUG2(DBG_ATTACH, dip, "ib_create: slot_imr=%x, slot_cir=%x\n",
92 	    ib_p->ib_slot_intr_map_regs, ib_p->ib_obio_intr_map_regs);
93 	DEBUG2(DBG_ATTACH, dip, "ib_create: obio_imr=%x, obio_cir=%x\n",
94 	    ib_p->ib_slot_clear_intr_regs, ib_p->ib_obio_clear_intr_regs);
95 	DEBUG2(DBG_ATTACH, dip, "ib_create: upa0_imr=%x, upa1_imr=%x\n",
96 	    ib_p->ib_upa_imr[0], ib_p->ib_upa_imr[1]);
97 	DEBUG3(DBG_ATTACH, dip,
98 	    "ib_create: retry_timer=%x, obio_diag=%x slot_diag=%x\n",
99 	    ib_p->ib_intr_retry_timer_reg,
100 	    ib_p->ib_obio_intr_state_diag_reg,
101 	    ib_p->ib_slot_intr_state_diag_reg);
102 
103 	ib_p->ib_ino_lst = (ib_ino_info_t *)NULL;
104 	mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
105 	mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
106 
107 	DEBUG1(DBG_ATTACH, dip, "ib_create: numproxy=%x\n",
108 	    pci_p->pci_numproxy);
109 	for (i = 1; i <= pci_p->pci_numproxy; i++) {
110 		set_intr_mapping_reg(pci_p->pci_id,
111 		    (uint64_t *)ib_p->ib_upa_imr[i - 1], i);
112 	}
113 
114 	ib_configure(ib_p);
115 	bus_func_register(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
116 }
117 
118 void
119 ib_destroy(pci_t *pci_p)
120 {
121 	ib_t *ib_p = pci_p->pci_ib_p;
122 	dev_info_t *dip = pci_p->pci_dip;
123 
124 	DEBUG0(DBG_IB, dip, "ib_destroy\n");
125 	bus_func_unregister(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
126 
127 	intr_dist_rem_weighted(ib_intr_dist_all, ib_p);
128 	mutex_destroy(&ib_p->ib_ino_lst_mutex);
129 	mutex_destroy(&ib_p->ib_intr_lock);
130 
131 	ib_free_ino_all(ib_p);
132 
133 	kmem_free(ib_p, sizeof (ib_t));
134 	pci_p->pci_ib_p = NULL;
135 }
136 
137 void
138 ib_configure(ib_t *ib_p)
139 {
140 	/* XXX could be different between psycho and schizo */
141 	*ib_p->ib_intr_retry_timer_reg = pci_intr_retry_intv;
142 }
143 
144 /*
145  * can only used for psycho internal interrupts thermal, power,
146  * ue, ce, pbm
147  */
148 void
149 ib_intr_enable(pci_t *pci_p, ib_ino_t ino)
150 {
151 	ib_t *ib_p = pci_p->pci_ib_p;
152 	ib_mondo_t mondo = IB_INO_TO_MONDO(ib_p, ino);
153 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
154 	uint_t cpu_id;
155 
156 	/*
157 	 * Determine the cpu for the interrupt.
158 	 */
159 	mutex_enter(&ib_p->ib_intr_lock);
160 	cpu_id = intr_dist_cpuid();
161 #ifdef _STARFIRE
162 	cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie, cpu_id,
163 	    IB_GET_MAPREG_INO(ino));
164 #endif /* _STARFIRE */
165 	DEBUG2(DBG_IB, pci_p->pci_dip,
166 	    "ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
167 
168 	*imr_p = ib_get_map_reg(mondo, cpu_id);
169 	IB_INO_INTR_CLEAR(ib_clear_intr_reg_addr(ib_p, ino));
170 	mutex_exit(&ib_p->ib_intr_lock);
171 }
172 
173 /*
174  * Disable the interrupt via its interrupt mapping register.
175  * Can only be used for internal interrupts: thermal, power, ue, ce, pbm.
176  * If called under interrupt context, wait should be set to 0
177  */
178 void
179 ib_intr_disable(ib_t *ib_p, ib_ino_t ino, int wait)
180 {
181 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
182 	volatile uint64_t *state_reg_p = IB_INO_INTR_STATE_REG(ib_p, ino);
183 	hrtime_t start_time;
184 
185 	/* disable the interrupt */
186 	mutex_enter(&ib_p->ib_intr_lock);
187 	IB_INO_INTR_OFF(imr_p);
188 	*imr_p;	/* flush previous write */
189 	mutex_exit(&ib_p->ib_intr_lock);
190 
191 	if (!wait)
192 		goto wait_done;
193 
194 	start_time = gethrtime();
195 	/* busy wait if there is interrupt being processed */
196 	while (IB_INO_INTR_PENDING(state_reg_p, ino) && !panicstr) {
197 		if (gethrtime() - start_time > pci_intrpend_timeout) {
198 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
199 			cmn_err(CE_WARN, "%s:%s: ib_intr_disable timeout %x",
200 			    pbm_p->pbm_nameinst_str,
201 			    pbm_p->pbm_nameaddr_str, ino);
202 				break;
203 		}
204 	}
205 wait_done:
206 	IB_INO_INTR_PEND(ib_clear_intr_reg_addr(ib_p, ino));
207 #ifdef _STARFIRE
208 	pc_ittrans_cleanup(IB2CB(ib_p)->cb_ittrans_cookie,
209 	    (volatile uint64_t *)(uintptr_t)ino);
210 #endif /* _STARFIRE */
211 }
212 
213 /* can only used for psycho internal interrupts thermal, power, ue, ce, pbm */
214 void
215 ib_nintr_clear(ib_t *ib_p, ib_ino_t ino)
216 {
217 	uint64_t *clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
218 	IB_INO_INTR_CLEAR(clr_reg);
219 }
220 
221 /*
222  * distribute PBM and UPA interrupts. ino is set to 0 by caller if we
223  * are dealing with UPA interrupts (without inos).
224  */
225 void
226 ib_intr_dist_nintr(ib_t *ib_p, ib_ino_t ino, volatile uint64_t *imr_p)
227 {
228 	volatile uint64_t imr = *imr_p;
229 	uint32_t cpu_id;
230 
231 	if (!IB_INO_INTR_ISON(imr))
232 		return;
233 
234 	cpu_id = intr_dist_cpuid();
235 
236 #ifdef _STARFIRE
237 	if (ino) {
238 		cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie,
239 		    cpu_id, IB_GET_MAPREG_INO(ino));
240 	}
241 #else /* _STARFIRE */
242 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id)
243 		return;
244 #endif /* _STARFIRE */
245 
246 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
247 	imr = *imr_p;	/* flush previous write */
248 }
249 
250 /*
251  * Converts into nsec, ticks logged with a given CPU.  Adds nsec to ih.
252  */
253 /*ARGSUSED*/
254 void
255 ib_cpu_ticks_to_ih_nsec(ib_t *ib_p, ih_t *ih_p, uint32_t cpu_id)
256 {
257 	extern kmutex_t pciintr_ks_template_lock;
258 	hrtime_t ticks;
259 
260 	/*
261 	 * Because we are updating two fields in ih_t we must lock
262 	 * pciintr_ks_template_lock to prevent someone from reading the
263 	 * kstats after we set ih_ticks to 0 and before we increment
264 	 * ih_nsec to compensate.
265 	 *
266 	 * We must also protect against the interrupt arriving and incrementing
267 	 * ih_ticks between the time we read it and when we reset it to 0.
268 	 * To do this we use atomic_swap.
269 	 */
270 
271 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
272 
273 	mutex_enter(&pciintr_ks_template_lock);
274 	ticks = atomic_swap_64(&ih_p->ih_ticks, 0);
275 	ih_p->ih_nsec += (uint64_t)tick2ns(ticks, cpu_id);
276 	mutex_exit(&pciintr_ks_template_lock);
277 }
278 
279 static void
280 ib_intr_dist(ib_t *ib_p, ib_ino_info_t *ino_p)
281 {
282 	uint32_t cpu_id = ino_p->ino_cpuid;
283 	ib_ino_t ino = ino_p->ino_ino;
284 	volatile uint64_t imr, *imr_p, *state_reg;
285 	hrtime_t start_time;
286 
287 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
288 	imr_p = ib_intr_map_reg_addr(ib_p, ino);
289 	state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
290 
291 #ifdef _STARFIRE
292 	/*
293 	 * For Starfire it is a pain to check the current target for
294 	 * the mondo since we have to read the PC asics ITTR slot
295 	 * assigned to this mondo. It will be much easier to assume
296 	 * the current target is always different and do the target
297 	 * reprogram all the time.
298 	 */
299 	cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie, cpu_id,
300 	    IB_GET_MAPREG_INO(ino));
301 #else
302 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id) /* same cpu, no reprog */
303 		return;
304 #endif /* _STARFIRE */
305 
306 	/* disable interrupt, this could disrupt devices sharing our slot */
307 	IB_INO_INTR_OFF(imr_p);
308 	imr = *imr_p;	/* flush previous write */
309 
310 	/* busy wait if there is interrupt being processed */
311 	start_time = gethrtime();
312 	while (IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
313 		if (gethrtime() - start_time > pci_intrpend_timeout) {
314 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
315 			cmn_err(CE_WARN, "%s:%s: ib_intr_dist(%p,%x) timeout",
316 			    pbm_p->pbm_nameinst_str,
317 			    pbm_p->pbm_nameaddr_str,
318 			    imr_p, IB_INO_TO_MONDO(ib_p, ino));
319 			break;
320 		}
321 	}
322 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
323 	imr = *imr_p;	/* flush previous write */
324 }
325 
326 /*
327  * Redistribute interrupts of the specified weight. The first call has a weight
328  * of weight_max, which can be used to trigger initialization for
329  * redistribution. The inos with weight [weight_max, inf.) should be processed
330  * on the "weight == weight_max" call.  This first call is followed by calls
331  * of decreasing weights, inos of that weight should be processed.  The final
332  * call specifies a weight of zero, this can be used to trigger processing of
333  * stragglers.
334  */
335 void
336 ib_intr_dist_all(void *arg, int32_t weight_max, int32_t weight)
337 {
338 	ib_t *ib_p = (ib_t *)arg;
339 	pci_t *pci_p = ib_p->ib_pci_p;
340 	ib_ino_info_t *ino_p;
341 	ib_ino_pil_t *ipil_p;
342 	ih_t *ih_lst;
343 	int32_t dweight;
344 	int i;
345 
346 	if (weight == 0) {
347 		mutex_enter(&ib_p->ib_intr_lock);
348 		if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
349 			for (i = 0; i < 2; i++)
350 				ib_intr_dist_nintr(ib_p, 0,
351 				    ib_p->ib_upa_imr[i]);
352 		}
353 		mutex_exit(&ib_p->ib_intr_lock);
354 	}
355 
356 	mutex_enter(&ib_p->ib_ino_lst_mutex);
357 
358 	/* Perform special processing for first call of a redistribution. */
359 	if (weight == weight_max) {
360 		for (ino_p = ib_p->ib_ino_lst; ino_p;
361 		    ino_p = ino_p->ino_next_p) {
362 
363 			/*
364 			 * Clear ino_established of each ino on first call.
365 			 * The ino_established field may be used by a pci
366 			 * nexus driver's pci_intr_dist_cpuid implementation
367 			 * when detection of established pci slot-cpu binding
368 			 * for multi function pci cards.
369 			 */
370 			ino_p->ino_established = 0;
371 
372 			/*
373 			 * recompute the ino_intr_weight based on the device
374 			 * weight of all devinfo nodes sharing the ino (this
375 			 * will allow us to pick up new weights established by
376 			 * i_ddi_set_intr_weight()).
377 			 */
378 			ino_p->ino_intr_weight = 0;
379 
380 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
381 			    ipil_p = ipil_p->ipil_next_p) {
382 				for (i = 0, ih_lst = ipil_p->ipil_ih_head;
383 				    i < ipil_p->ipil_ih_size; i++,
384 				    ih_lst = ih_lst->ih_next) {
385 					dweight = i_ddi_get_intr_weight
386 					    (ih_lst->ih_dip);
387 					if (dweight > 0)
388 						ino_p->ino_intr_weight +=
389 						    dweight;
390 				}
391 			}
392 		}
393 	}
394 
395 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next_p) {
396 		uint32_t orig_cpuid;
397 
398 		/*
399 		 * Get the weight of the ino and determine if we are going to
400 		 * process call.  We wait until an ib_intr_dist_all call of
401 		 * the proper weight occurs to support redistribution of all
402 		 * heavy weighted interrupts first (across all nexus driver
403 		 * instances).  This is done to ensure optimal
404 		 * INTR_WEIGHTED_DIST behavior.
405 		 */
406 		if ((weight == ino_p->ino_intr_weight) ||
407 		    ((weight >= weight_max) &&
408 		    (ino_p->ino_intr_weight >= weight_max))) {
409 			/* select cpuid to target and mark ino established */
410 			orig_cpuid = ino_p->ino_cpuid;
411 			if (cpu[orig_cpuid] == NULL)
412 				orig_cpuid = CPU->cpu_id;
413 			ino_p->ino_cpuid = pci_intr_dist_cpuid(ib_p, ino_p);
414 			ino_p->ino_established = 1;
415 
416 			/* Add device weight of ino devinfos to targeted cpu. */
417 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
418 			    ipil_p = ipil_p->ipil_next_p) {
419 				for (i = 0, ih_lst = ipil_p->ipil_ih_head;
420 				    i < ipil_p->ipil_ih_size; i++,
421 				    ih_lst = ih_lst->ih_next) {
422 
423 					dweight = i_ddi_get_intr_weight(
424 					    ih_lst->ih_dip);
425 					intr_dist_cpuid_add_device_weight(
426 					    ino_p->ino_cpuid, ih_lst->ih_dip,
427 					    dweight);
428 
429 					/*
430 					 * Different cpus may have different
431 					 * clock speeds. to account for this,
432 					 * whenever an interrupt is moved to a
433 					 * new CPU, we convert the accumulated
434 					 * ticks into nsec, based upon the clock
435 					 * rate of the prior CPU.
436 					 *
437 					 * It is possible that the prior CPU no
438 					 * longer exists. In this case, fall
439 					 * back to using this CPU's clock rate.
440 					 *
441 					 * Note that the value in ih_ticks has
442 					 * already been corrected for any power
443 					 * savings mode which might have been
444 					 * in effect.
445 					 */
446 					ib_cpu_ticks_to_ih_nsec(ib_p, ih_lst,
447 					    orig_cpuid);
448 				}
449 			}
450 
451 			/* program the hardware */
452 			ib_intr_dist(ib_p, ino_p);
453 		}
454 	}
455 	mutex_exit(&ib_p->ib_ino_lst_mutex);
456 }
457 
458 /*
459  * Reset interrupts to IDLE.  This function is called during
460  * panic handling after redistributing interrupts; it's needed to
461  * support dumping to network devices after 'sync' from OBP.
462  *
463  * N.B.  This routine runs in a context where all other threads
464  * are permanently suspended.
465  */
466 static uint_t
467 ib_intr_reset(void *arg)
468 {
469 	ib_t *ib_p = (ib_t *)arg;
470 	ib_ino_t ino;
471 	uint64_t *clr_reg;
472 
473 	/*
474 	 * Note that we only actually care about interrupts that are
475 	 * potentially from network devices.
476 	 */
477 	for (ino = 0; ino <= ib_p->ib_max_ino; ino++) {
478 		clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
479 		IB_INO_INTR_CLEAR(clr_reg);
480 	}
481 
482 	return (BF_NONE);
483 }
484 
485 void
486 ib_suspend(ib_t *ib_p)
487 {
488 	ib_ino_info_t *ip;
489 	pci_t *pci_p = ib_p->ib_pci_p;
490 
491 	/* save ino_lst interrupts' mapping registers content */
492 	mutex_enter(&ib_p->ib_ino_lst_mutex);
493 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next_p)
494 		ip->ino_map_reg_save = *ip->ino_map_reg;
495 	mutex_exit(&ib_p->ib_ino_lst_mutex);
496 
497 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
498 		ib_p->ib_upa_imr_state[0] = *ib_p->ib_upa_imr[0];
499 		ib_p->ib_upa_imr_state[1] = *ib_p->ib_upa_imr[1];
500 	}
501 }
502 
503 void
504 ib_resume(ib_t *ib_p)
505 {
506 	ib_ino_info_t *ip;
507 	pci_t *pci_p = ib_p->ib_pci_p;
508 
509 	/* restore ino_lst interrupts' mapping registers content */
510 	mutex_enter(&ib_p->ib_ino_lst_mutex);
511 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next_p) {
512 		IB_INO_INTR_CLEAR(ip->ino_clr_reg);	 /* set intr to idle */
513 		*ip->ino_map_reg = ip->ino_map_reg_save; /* restore IMR */
514 	}
515 	mutex_exit(&ib_p->ib_ino_lst_mutex);
516 
517 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
518 		*ib_p->ib_upa_imr[0] = ib_p->ib_upa_imr_state[0];
519 		*ib_p->ib_upa_imr[1] = ib_p->ib_upa_imr_state[1];
520 	}
521 }
522 
523 /*
524  * locate ino_info structure on ib_p->ib_ino_lst according to ino#
525  * returns NULL if not found.
526  */
527 ib_ino_info_t *
528 ib_locate_ino(ib_t *ib_p, ib_ino_t ino_num)
529 {
530 	ib_ino_info_t *ino_p = ib_p->ib_ino_lst;
531 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
532 
533 	for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next_p)
534 		;
535 	return (ino_p);
536 }
537 
538 #define	IB_INO_TO_SLOT(ino) (IB_IS_OBIO_INO(ino) ? 0xff : ((ino) & 0x1f) >> 2)
539 
540 ib_ino_pil_t *
541 ib_new_ino_pil(ib_t *ib_p, ib_ino_t ino_num, uint_t pil, ih_t *ih_p)
542 {
543 	ib_ino_pil_t	*ipil_p = kmem_zalloc(sizeof (ib_ino_pil_t), KM_SLEEP);
544 	ib_ino_info_t	*ino_p;
545 
546 	if ((ino_p = ib_locate_ino(ib_p, ino_num)) == NULL) {
547 		ino_p = kmem_zalloc(sizeof (ib_ino_info_t), KM_SLEEP);
548 
549 		ino_p->ino_next_p = ib_p->ib_ino_lst;
550 		ib_p->ib_ino_lst = ino_p;
551 
552 		ino_p->ino_ino = ino_num;
553 		ino_p->ino_slot_no = IB_INO_TO_SLOT(ino_num);
554 		ino_p->ino_ib_p = ib_p;
555 		ino_p->ino_clr_reg = ib_clear_intr_reg_addr(ib_p, ino_num);
556 		ino_p->ino_map_reg = ib_intr_map_reg_addr(ib_p, ino_num);
557 		ino_p->ino_unclaimed_intrs = 0;
558 		ino_p->ino_lopil = pil;
559 	}
560 
561 	ih_p->ih_next = ih_p;
562 	ipil_p->ipil_pil = pil;
563 	ipil_p->ipil_ih_head = ih_p;
564 	ipil_p->ipil_ih_tail = ih_p;
565 	ipil_p->ipil_ih_start = ih_p;
566 	ipil_p->ipil_ih_size = 1;
567 	ipil_p->ipil_ino_p = ino_p;
568 
569 	ipil_p->ipil_next_p = ino_p->ino_ipil_p;
570 	ino_p->ino_ipil_p = ipil_p;
571 	ino_p->ino_ipil_size++;
572 
573 	if (ino_p->ino_lopil > pil)
574 		ino_p->ino_lopil = pil;
575 
576 	return (ipil_p);
577 }
578 
579 void
580 ib_delete_ino_pil(ib_t *ib_p, ib_ino_pil_t *ipil_p)
581 {
582 	ib_ino_info_t	*ino_p = ipil_p->ipil_ino_p;
583 	ib_ino_pil_t	*prev, *next;
584 	ushort_t	pil = ipil_p->ipil_pil;
585 
586 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
587 
588 	if (ino_p->ino_ipil_p == ipil_p)
589 		ino_p->ino_ipil_p = ipil_p->ipil_next_p;
590 	else {
591 		for (prev = next = ino_p->ino_ipil_p; next != ipil_p;
592 		    prev = next, next = next->ipil_next_p)
593 			;
594 
595 		if (prev)
596 			prev->ipil_next_p = ipil_p->ipil_next_p;
597 	}
598 
599 	kmem_free(ipil_p, sizeof (ib_ino_pil_t));
600 
601 	if ((--ino_p->ino_ipil_size) && (ino_p->ino_lopil == pil)) {
602 		for (next = ino_p->ino_ipil_p, pil = next->ipil_pil;
603 		    next; next = next->ipil_next_p) {
604 
605 			if (pil > next->ipil_pil)
606 				pil = next->ipil_pil;
607 		}
608 		/*
609 		 * Value stored in pil should be the lowest pil.
610 		 */
611 		ino_p->ino_lopil = pil;
612 	}
613 
614 	if (ino_p->ino_ipil_size)
615 		return;
616 
617 	if (ib_p->ib_ino_lst == ino_p)
618 		ib_p->ib_ino_lst = ino_p->ino_next_p;
619 	else {
620 		ib_ino_info_t	*list = ib_p->ib_ino_lst;
621 
622 		for (; list->ino_next_p != ino_p; list = list->ino_next_p)
623 			;
624 		list->ino_next_p = ino_p->ino_next_p;
625 	}
626 }
627 
628 /* free all ino when we are detaching */
629 void
630 ib_free_ino_all(ib_t *ib_p)
631 {
632 	ib_ino_info_t *ino_p = ib_p->ib_ino_lst;
633 	ib_ino_info_t *next = NULL;
634 
635 	while (ino_p) {
636 		next = ino_p->ino_next_p;
637 		kmem_free(ino_p, sizeof (ib_ino_info_t));
638 		ino_p = next;
639 	}
640 }
641 
642 /*
643  * Locate ib_ino_pil_t structure on ino_p->ino_ipil_p according to ino#
644  * returns NULL if not found.
645  */
646 ib_ino_pil_t *
647 ib_ino_locate_ipil(ib_ino_info_t *ino_p, uint_t pil)
648 {
649 	ib_ino_pil_t	*ipil_p = ino_p->ino_ipil_p;
650 
651 	for (; ipil_p && ipil_p->ipil_pil != pil; ipil_p = ipil_p->ipil_next_p)
652 		;
653 
654 	return (ipil_p);
655 }
656 
657 void
658 ib_ino_add_intr(pci_t *pci_p, ib_ino_pil_t *ipil_p, ih_t *ih_p)
659 {
660 	ib_ino_info_t *ino_p = ipil_p->ipil_ino_p;
661 	ib_ino_t ino = ino_p->ino_ino;
662 	ib_t *ib_p = ino_p->ino_ib_p;
663 	volatile uint64_t *state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
664 	hrtime_t start_time;
665 
666 	ASSERT(ib_p == pci_p->pci_ib_p);
667 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
668 
669 	/* disable interrupt, this could disrupt devices sharing our slot */
670 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
671 	*ino_p->ino_map_reg;
672 
673 	/* do NOT modify the link list until after the busy wait */
674 
675 	/*
676 	 * busy wait if there is interrupt being processed.
677 	 * either the pending state will be cleared by the interrupt wrapper
678 	 * or the interrupt will be marked as blocked indicating that it was
679 	 * jabbering.
680 	 */
681 	start_time = gethrtime();
682 	while ((ino_p->ino_unclaimed_intrs <= pci_unclaimed_intr_max) &&
683 	    IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
684 		if (gethrtime() - start_time > pci_intrpend_timeout) {
685 			pbm_t *pbm_p = pci_p->pci_pbm_p;
686 			cmn_err(CE_WARN, "%s:%s: ib_ino_add_intr %x timeout",
687 			    pbm_p->pbm_nameinst_str,
688 			    pbm_p->pbm_nameaddr_str, ino);
689 			break;
690 		}
691 	}
692 
693 	/* link up ih_t */
694 	ih_p->ih_next = ipil_p->ipil_ih_head;
695 	ipil_p->ipil_ih_tail->ih_next = ih_p;
696 	ipil_p->ipil_ih_tail = ih_p;
697 
698 	ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
699 	ipil_p->ipil_ih_size++;
700 
701 	/*
702 	 * if the interrupt was previously blocked (left in pending state)
703 	 * because of jabber we need to clear the pending state in case the
704 	 * jabber has gone away.
705 	 */
706 	if (ino_p->ino_unclaimed_intrs > pci_unclaimed_intr_max) {
707 		cmn_err(CE_WARN,
708 		    "%s%d: ib_ino_add_intr: ino 0x%x has been unblocked",
709 		    ddi_driver_name(pci_p->pci_dip),
710 		    ddi_get_instance(pci_p->pci_dip),
711 		    ino_p->ino_ino);
712 		ino_p->ino_unclaimed_intrs = 0;
713 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
714 	}
715 
716 	/* re-enable interrupt */
717 	IB_INO_INTR_ON(ino_p->ino_map_reg);
718 	*ino_p->ino_map_reg;
719 }
720 
721 /*
722  * removes pci_ispec_t from the ino's link list.
723  * uses hardware mutex to lock out interrupt threads.
724  * Side effects: interrupt belongs to that ino is turned off on return.
725  * if we are sharing PCI slot with other inos, the caller needs
726  * to turn it back on.
727  */
728 void
729 ib_ino_rem_intr(pci_t *pci_p, ib_ino_pil_t *ipil_p, ih_t *ih_p)
730 {
731 	ib_ino_info_t *ino_p = ipil_p->ipil_ino_p;
732 	int i;
733 	ib_ino_t ino = ino_p->ino_ino;
734 	ih_t *ih_lst = ipil_p->ipil_ih_head;
735 	volatile uint64_t *state_reg =
736 	    IB_INO_INTR_STATE_REG(ino_p->ino_ib_p, ino);
737 	hrtime_t start_time;
738 
739 	ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
740 	/* disable interrupt, this could disrupt devices sharing our slot */
741 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
742 	*ino_p->ino_map_reg;
743 
744 	/* do NOT modify the link list until after the busy wait */
745 
746 	/*
747 	 * busy wait if there is interrupt being processed.
748 	 * either the pending state will be cleared by the interrupt wrapper
749 	 * or the interrupt will be marked as blocked indicating that it was
750 	 * jabbering.
751 	 */
752 	start_time = gethrtime();
753 	while ((ino_p->ino_unclaimed_intrs <= pci_unclaimed_intr_max) &&
754 	    IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
755 		if (gethrtime() - start_time > pci_intrpend_timeout) {
756 			pbm_t *pbm_p = pci_p->pci_pbm_p;
757 			cmn_err(CE_WARN, "%s:%s: ib_ino_rem_intr %x timeout",
758 			    pbm_p->pbm_nameinst_str,
759 			    pbm_p->pbm_nameaddr_str, ino);
760 			break;
761 		}
762 	}
763 
764 	if (ipil_p->ipil_ih_size == 1) {
765 		if (ih_lst != ih_p)
766 			goto not_found;
767 		/* no need to set head/tail as ino_p will be freed */
768 		goto reset;
769 	}
770 
771 	/*
772 	 * if the interrupt was previously blocked (left in pending state)
773 	 * because of jabber we need to clear the pending state in case the
774 	 * jabber has gone away.
775 	 */
776 	if (ino_p->ino_unclaimed_intrs > pci_unclaimed_intr_max) {
777 		cmn_err(CE_WARN,
778 		    "%s%d: ib_ino_rem_intr: ino 0x%x has been unblocked",
779 		    ddi_driver_name(pci_p->pci_dip),
780 		    ddi_get_instance(pci_p->pci_dip),
781 		    ino_p->ino_ino);
782 		ino_p->ino_unclaimed_intrs = 0;
783 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
784 	}
785 
786 	/* search the link list for ih_p */
787 	for (i = 0;
788 	    (i < ipil_p->ipil_ih_size) && (ih_lst->ih_next != ih_p);
789 	    i++, ih_lst = ih_lst->ih_next)
790 		;
791 	if (ih_lst->ih_next != ih_p)
792 		goto not_found;
793 
794 	/* remove ih_p from the link list and maintain the head/tail */
795 	ih_lst->ih_next = ih_p->ih_next;
796 	if (ipil_p->ipil_ih_head == ih_p)
797 		ipil_p->ipil_ih_head = ih_p->ih_next;
798 	if (ipil_p->ipil_ih_tail == ih_p)
799 		ipil_p->ipil_ih_tail = ih_lst;
800 	ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
801 reset:
802 	if (ih_p->ih_config_handle)
803 		pci_config_teardown(&ih_p->ih_config_handle);
804 	if (ih_p->ih_ksp != NULL)
805 		kstat_delete(ih_p->ih_ksp);
806 	kmem_free(ih_p, sizeof (ih_t));
807 	ipil_p->ipil_ih_size--;
808 
809 	return;
810 not_found:
811 	DEBUG2(DBG_R_INTX, ino_p->ino_ib_p->ib_pci_p->pci_dip,
812 	    "ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
813 }
814 
815 ih_t *
816 ib_intr_locate_ih(ib_ino_pil_t *ipil_p, dev_info_t *rdip, uint32_t inum)
817 {
818 	ih_t *ih_p = ipil_p->ipil_ih_head;
819 	int i;
820 
821 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
822 		if (ih_p->ih_dip == rdip && ih_p->ih_inum == inum)
823 			return (ih_p);
824 	}
825 
826 	return ((ih_t *)NULL);
827 }
828 
829 ih_t *
830 ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
831 	uint_t (*int_handler)(caddr_t int_handler_arg1,
832 	caddr_t int_handler_arg2),
833 	caddr_t int_handler_arg1,
834 	caddr_t int_handler_arg2)
835 {
836 	ih_t *ih_p;
837 
838 	ih_p = kmem_alloc(sizeof (ih_t), KM_SLEEP);
839 	ih_p->ih_dip = rdip;
840 	ih_p->ih_inum = inum;
841 	ih_p->ih_intr_state = PCI_INTR_STATE_DISABLE;
842 	ih_p->ih_handler = int_handler;
843 	ih_p->ih_handler_arg1 = int_handler_arg1;
844 	ih_p->ih_handler_arg2 = int_handler_arg2;
845 	ih_p->ih_config_handle = NULL;
846 	ih_p->ih_nsec = 0;
847 	ih_p->ih_ticks = 0;
848 	ih_p->ih_ksp = NULL;
849 
850 	return (ih_p);
851 }
852 
853 int
854 ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip,
855 	ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state)
856 {
857 	ib_t		*ib_p = pci_p->pci_ib_p;
858 	ib_ino_info_t	*ino_p;
859 	ib_ino_pil_t	*ipil_p;
860 	ib_mondo_t	mondo;
861 	ih_t		*ih_p;
862 	int		ret = DDI_FAILURE;
863 
864 	/*
865 	 * For PULSE interrupts, pci driver don't allocate
866 	 * ib_ino_info_t and ih_t data structures and also,
867 	 * not maintains any interrupt state information.
868 	 * So, just return success from here.
869 	 */
870 	if (hdlp->ih_vector & PCI_PULSE_INO) {
871 		DEBUG0(DBG_IB, ib_p->ib_pci_p->pci_dip,
872 		    "ib_update_intr_state: PULSE interrupt, return success\n");
873 
874 		return (DDI_SUCCESS);
875 	}
876 
877 	mutex_enter(&ib_p->ib_ino_lst_mutex);
878 
879 	if ((mondo = pci_xlate_intr(pci_p->pci_dip, rdip, pci_p->pci_ib_p,
880 	    IB_MONDO_TO_INO(hdlp->ih_vector))) == 0) {
881 		mutex_exit(&ib_p->ib_ino_lst_mutex);
882 		return (ret);
883 	}
884 
885 	ino_p = ib_locate_ino(ib_p, IB_MONDO_TO_INO(mondo));
886 	if (ino_p && (ipil_p = ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
887 		if (ih_p = ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum)) {
888 			ih_p->ih_intr_state = new_intr_state;
889 			ret = DDI_SUCCESS;
890 		}
891 	}
892 
893 	mutex_exit(&ib_p->ib_ino_lst_mutex);
894 	return (ret);
895 }
896 
897 /*
898  * Return the dips or number of dips associated with a given interrupt block.
899  * Size of dips array arg is passed in as dips_ret arg.
900  * Number of dips returned is returned in dips_ret arg.
901  * Array of dips gets returned in the dips argument.
902  * Function returns number of dips existing for the given interrupt block.
903  *
904  */
905 uint8_t
906 ib_get_ino_devs(
907 	ib_t *ib_p, uint32_t ino, uint8_t *devs_ret, pcitool_intr_dev_t *devs)
908 {
909 	ib_ino_info_t	*ino_p;
910 	ib_ino_pil_t	*ipil_p;
911 	ih_t		*ih_p;
912 	uint32_t	num_devs = 0;
913 	int		i, j;
914 
915 	mutex_enter(&ib_p->ib_ino_lst_mutex);
916 	ino_p = ib_locate_ino(ib_p, ino);
917 	if (ino_p != NULL) {
918 		for (j = 0, ipil_p = ino_p->ino_ipil_p; ipil_p;
919 		    ipil_p = ipil_p->ipil_next_p) {
920 			num_devs += ipil_p->ipil_ih_size;
921 
922 			for (i = 0, ih_p = ipil_p->ipil_ih_head;
923 			    ((i < ipil_p->ipil_ih_size) && (i < *devs_ret));
924 			    i++, j++, ih_p = ih_p->ih_next) {
925 				(void) strncpy(devs[i].driver_name,
926 				    ddi_driver_name(ih_p->ih_dip),
927 				    MAXMODCONFNAME-1);
928 				devs[i].driver_name[MAXMODCONFNAME] = '\0';
929 				(void) ddi_pathname(ih_p->ih_dip, devs[i].path);
930 				devs[i].dev_inst =
931 				    ddi_get_instance(ih_p->ih_dip);
932 			}
933 		}
934 		*devs_ret = j;
935 	}
936 
937 	mutex_exit(&ib_p->ib_ino_lst_mutex);
938 
939 	return (num_devs);
940 }
941 
942 void ib_log_new_cpu(ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id,
943 	uint32_t ino)
944 {
945 	ib_ino_info_t	*ino_p;
946 	ib_ino_pil_t	*ipil_p;
947 	ih_t		*ih_p;
948 	int		i;
949 
950 	mutex_enter(&ib_p->ib_ino_lst_mutex);
951 
952 	/* Log in OS data structures the new CPU. */
953 	ino_p = ib_locate_ino(ib_p, ino);
954 	if (ino_p != NULL) {
955 
956 		/* Log in OS data structures the new CPU. */
957 		ino_p->ino_cpuid = new_cpu_id;
958 
959 		for (ipil_p = ino_p->ino_ipil_p; ipil_p;
960 		    ipil_p = ipil_p->ipil_next_p) {
961 			for (i = 0, ih_p = ipil_p->ipil_ih_head;
962 			    (i < ipil_p->ipil_ih_size);
963 			    i++, ih_p = ih_p->ih_next) {
964 				/*
965 				 * Account for any residual time
966 				 * to be logged for old cpu.
967 				 */
968 				ib_cpu_ticks_to_ih_nsec(ib_p,
969 				    ipil_p->ipil_ih_head, old_cpu_id);
970 			}
971 		}
972 	}
973 
974 	mutex_exit(&ib_p->ib_ino_lst_mutex);
975 }
976