xref: /titanic_41/usr/src/uts/sun4u/io/pci/pci_ib.c (revision f33c1cdb6d38eb0715f03cf492f31c3d4d395c98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * PCI Interrupt Block (RISCx) implementation
28  *	initialization
29  *	interrupt enable/disable/clear and mapping register manipulation
30  */
31 
32 #include <sys/types.h>
33 #include <sys/kmem.h>
34 #include <sys/async.h>
35 #include <sys/systm.h>		/* panicstr */
36 #include <sys/spl.h>
37 #include <sys/sunddi.h>
38 #include <sys/machsystm.h>	/* intr_dist_add */
39 #include <sys/ddi_impldefs.h>
40 #include <sys/clock.h>
41 #include <sys/cpuvar.h>
42 #include <sys/pci/pci_obj.h>
43 
44 #ifdef _STARFIRE
45 #include <sys/starfire.h>
46 #endif /* _STARFIRE */
47 
48 /*LINTLIBRARY*/
49 static uint_t ib_intr_reset(void *arg);
50 
51 void
52 ib_create(pci_t *pci_p)
53 {
54 	dev_info_t *dip = pci_p->pci_dip;
55 	ib_t *ib_p;
56 	uintptr_t a;
57 	int i;
58 
59 	/*
60 	 * Allocate interrupt block state structure and link it to
61 	 * the pci state structure.
62 	 */
63 	ib_p = kmem_zalloc(sizeof (ib_t), KM_SLEEP);
64 	pci_p->pci_ib_p = ib_p;
65 	ib_p->ib_pci_p = pci_p;
66 
67 	a = pci_ib_setup(ib_p);
68 
69 	/*
70 	 * Determine virtual addresses of interrupt mapping, clear and diag
71 	 * registers that have common offsets.
72 	 */
73 	ib_p->ib_slot_clear_intr_regs =
74 	    a + COMMON_IB_SLOT_CLEAR_INTR_REG_OFFSET;
75 	ib_p->ib_intr_retry_timer_reg =
76 	    (uint64_t *)(a + COMMON_IB_INTR_RETRY_TIMER_OFFSET);
77 	ib_p->ib_slot_intr_state_diag_reg =
78 	    (uint64_t *)(a + COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
79 	ib_p->ib_obio_intr_state_diag_reg =
80 	    (uint64_t *)(a + COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
81 
82 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
83 		ib_p->ib_upa_imr[0] = (volatile uint64_t *)
84 		    (a + COMMON_IB_UPA0_INTR_MAP_REG_OFFSET);
85 		ib_p->ib_upa_imr[1] = (volatile uint64_t *)
86 		    (a + COMMON_IB_UPA1_INTR_MAP_REG_OFFSET);
87 	}
88 
89 	DEBUG2(DBG_ATTACH, dip, "ib_create: slot_imr=%x, slot_cir=%x\n",
90 	    ib_p->ib_slot_intr_map_regs, ib_p->ib_obio_intr_map_regs);
91 	DEBUG2(DBG_ATTACH, dip, "ib_create: obio_imr=%x, obio_cir=%x\n",
92 	    ib_p->ib_slot_clear_intr_regs, ib_p->ib_obio_clear_intr_regs);
93 	DEBUG2(DBG_ATTACH, dip, "ib_create: upa0_imr=%x, upa1_imr=%x\n",
94 	    ib_p->ib_upa_imr[0], ib_p->ib_upa_imr[1]);
95 	DEBUG3(DBG_ATTACH, dip,
96 	    "ib_create: retry_timer=%x, obio_diag=%x slot_diag=%x\n",
97 	    ib_p->ib_intr_retry_timer_reg,
98 	    ib_p->ib_obio_intr_state_diag_reg,
99 	    ib_p->ib_slot_intr_state_diag_reg);
100 
101 	ib_p->ib_ino_lst = (ib_ino_info_t *)NULL;
102 	mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
103 	mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
104 
105 	DEBUG1(DBG_ATTACH, dip, "ib_create: numproxy=%x\n",
106 	    pci_p->pci_numproxy);
107 	for (i = 1; i <= pci_p->pci_numproxy; i++) {
108 		set_intr_mapping_reg(pci_p->pci_id,
109 		    (uint64_t *)ib_p->ib_upa_imr[i - 1], i);
110 	}
111 
112 	ib_configure(ib_p);
113 	bus_func_register(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
114 }
115 
116 void
117 ib_destroy(pci_t *pci_p)
118 {
119 	ib_t *ib_p = pci_p->pci_ib_p;
120 	dev_info_t *dip = pci_p->pci_dip;
121 
122 	DEBUG0(DBG_IB, dip, "ib_destroy\n");
123 	bus_func_unregister(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
124 
125 	intr_dist_rem_weighted(ib_intr_dist_all, ib_p);
126 	mutex_destroy(&ib_p->ib_ino_lst_mutex);
127 	mutex_destroy(&ib_p->ib_intr_lock);
128 
129 	ib_free_ino_all(ib_p);
130 
131 	kmem_free(ib_p, sizeof (ib_t));
132 	pci_p->pci_ib_p = NULL;
133 }
134 
135 void
136 ib_configure(ib_t *ib_p)
137 {
138 	/* XXX could be different between psycho and schizo */
139 	*ib_p->ib_intr_retry_timer_reg = pci_intr_retry_intv;
140 }
141 
142 /*
143  * can only used for psycho internal interrupts thermal, power,
144  * ue, ce, pbm
145  */
146 void
147 ib_intr_enable(pci_t *pci_p, ib_ino_t ino)
148 {
149 	ib_t *ib_p = pci_p->pci_ib_p;
150 	ib_mondo_t mondo = IB_INO_TO_MONDO(ib_p, ino);
151 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
152 	uint_t cpu_id;
153 
154 	/*
155 	 * Determine the cpu for the interrupt.
156 	 */
157 	mutex_enter(&ib_p->ib_intr_lock);
158 	cpu_id = intr_dist_cpuid();
159 #ifdef _STARFIRE
160 	cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie, cpu_id,
161 	    IB_GET_MAPREG_INO(ino));
162 #endif /* _STARFIRE */
163 	DEBUG2(DBG_IB, pci_p->pci_dip,
164 	    "ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
165 
166 	*imr_p = ib_get_map_reg(mondo, cpu_id);
167 	IB_INO_INTR_CLEAR(ib_clear_intr_reg_addr(ib_p, ino));
168 	mutex_exit(&ib_p->ib_intr_lock);
169 }
170 
171 /*
172  * Disable the interrupt via its interrupt mapping register.
173  * Can only be used for internal interrupts: thermal, power, ue, ce, pbm.
174  * If called under interrupt context, wait should be set to 0
175  */
176 void
177 ib_intr_disable(ib_t *ib_p, ib_ino_t ino, int wait)
178 {
179 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
180 	volatile uint64_t *state_reg_p = IB_INO_INTR_STATE_REG(ib_p, ino);
181 	hrtime_t start_time;
182 
183 	/* disable the interrupt */
184 	mutex_enter(&ib_p->ib_intr_lock);
185 	IB_INO_INTR_OFF(imr_p);
186 	*imr_p;	/* flush previous write */
187 	mutex_exit(&ib_p->ib_intr_lock);
188 
189 	if (!wait)
190 		goto wait_done;
191 
192 	start_time = gethrtime();
193 	/* busy wait if there is interrupt being processed */
194 	while (IB_INO_INTR_PENDING(state_reg_p, ino) && !panicstr) {
195 		if (gethrtime() - start_time > pci_intrpend_timeout) {
196 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
197 			cmn_err(CE_WARN, "%s:%s: ib_intr_disable timeout %x",
198 			    pbm_p->pbm_nameinst_str,
199 			    pbm_p->pbm_nameaddr_str, ino);
200 				break;
201 		}
202 	}
203 wait_done:
204 	IB_INO_INTR_PEND(ib_clear_intr_reg_addr(ib_p, ino));
205 #ifdef _STARFIRE
206 	pc_ittrans_cleanup(IB2CB(ib_p)->cb_ittrans_cookie,
207 	    (volatile uint64_t *)(uintptr_t)ino);
208 #endif /* _STARFIRE */
209 }
210 
211 /* can only used for psycho internal interrupts thermal, power, ue, ce, pbm */
212 void
213 ib_nintr_clear(ib_t *ib_p, ib_ino_t ino)
214 {
215 	uint64_t *clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
216 	IB_INO_INTR_CLEAR(clr_reg);
217 }
218 
219 /*
220  * distribute PBM and UPA interrupts. ino is set to 0 by caller if we
221  * are dealing with UPA interrupts (without inos).
222  */
223 void
224 ib_intr_dist_nintr(ib_t *ib_p, ib_ino_t ino, volatile uint64_t *imr_p)
225 {
226 	volatile uint64_t imr = *imr_p;
227 	uint32_t cpu_id;
228 
229 	if (!IB_INO_INTR_ISON(imr))
230 		return;
231 
232 	cpu_id = intr_dist_cpuid();
233 
234 #ifdef _STARFIRE
235 	if (ino) {
236 		cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie,
237 		    cpu_id, IB_GET_MAPREG_INO(ino));
238 	}
239 #else /* _STARFIRE */
240 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id)
241 		return;
242 #endif /* _STARFIRE */
243 
244 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
245 	imr = *imr_p;	/* flush previous write */
246 }
247 
248 /*
249  * Converts into nsec, ticks logged with a given CPU.  Adds nsec to ih.
250  */
251 /*ARGSUSED*/
252 void
253 ib_cpu_ticks_to_ih_nsec(ib_t *ib_p, ih_t *ih_p, uint32_t cpu_id)
254 {
255 	extern kmutex_t pciintr_ks_template_lock;
256 	hrtime_t ticks;
257 
258 	/*
259 	 * Because we are updating two fields in ih_t we must lock
260 	 * pciintr_ks_template_lock to prevent someone from reading the
261 	 * kstats after we set ih_ticks to 0 and before we increment
262 	 * ih_nsec to compensate.
263 	 *
264 	 * We must also protect against the interrupt arriving and incrementing
265 	 * ih_ticks between the time we read it and when we reset it to 0.
266 	 * To do this we use atomic_swap.
267 	 */
268 
269 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
270 
271 	mutex_enter(&pciintr_ks_template_lock);
272 	ticks = atomic_swap_64(&ih_p->ih_ticks, 0);
273 	ih_p->ih_nsec += (uint64_t)tick2ns(ticks, cpu_id);
274 	mutex_exit(&pciintr_ks_template_lock);
275 }
276 
277 static void
278 ib_intr_dist(ib_t *ib_p, ib_ino_info_t *ino_p)
279 {
280 	uint32_t cpu_id = ino_p->ino_cpuid;
281 	ib_ino_t ino = ino_p->ino_ino;
282 	volatile uint64_t imr, *imr_p, *state_reg;
283 	hrtime_t start_time;
284 
285 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
286 	imr_p = ib_intr_map_reg_addr(ib_p, ino);
287 	state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
288 
289 #ifdef _STARFIRE
290 	/*
291 	 * For Starfire it is a pain to check the current target for
292 	 * the mondo since we have to read the PC asics ITTR slot
293 	 * assigned to this mondo. It will be much easier to assume
294 	 * the current target is always different and do the target
295 	 * reprogram all the time.
296 	 */
297 	cpu_id = pc_translate_tgtid(IB2CB(ib_p)->cb_ittrans_cookie, cpu_id,
298 	    IB_GET_MAPREG_INO(ino));
299 #else
300 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id) /* same cpu, no reprog */
301 		return;
302 #endif /* _STARFIRE */
303 
304 	/* disable interrupt, this could disrupt devices sharing our slot */
305 	IB_INO_INTR_OFF(imr_p);
306 	imr = *imr_p;	/* flush previous write */
307 
308 	/* busy wait if there is interrupt being processed */
309 	start_time = gethrtime();
310 	while (IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
311 		if (gethrtime() - start_time > pci_intrpend_timeout) {
312 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
313 			cmn_err(CE_WARN, "%s:%s: ib_intr_dist(%p,%x) timeout",
314 			    pbm_p->pbm_nameinst_str,
315 			    pbm_p->pbm_nameaddr_str,
316 			    imr_p, IB_INO_TO_MONDO(ib_p, ino));
317 			break;
318 		}
319 	}
320 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
321 	imr = *imr_p;	/* flush previous write */
322 }
323 
324 /*
325  * Redistribute interrupts of the specified weight. The first call has a weight
326  * of weight_max, which can be used to trigger initialization for
327  * redistribution. The inos with weight [weight_max, inf.) should be processed
328  * on the "weight == weight_max" call.  This first call is followed by calls
329  * of decreasing weights, inos of that weight should be processed.  The final
330  * call specifies a weight of zero, this can be used to trigger processing of
331  * stragglers.
332  */
333 void
334 ib_intr_dist_all(void *arg, int32_t weight_max, int32_t weight)
335 {
336 	ib_t *ib_p = (ib_t *)arg;
337 	pci_t *pci_p = ib_p->ib_pci_p;
338 	ib_ino_info_t *ino_p;
339 	ib_ino_pil_t *ipil_p;
340 	ih_t *ih_lst;
341 	int32_t dweight;
342 	int i;
343 
344 	if (weight == 0) {
345 		mutex_enter(&ib_p->ib_intr_lock);
346 		if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
347 			for (i = 0; i < 2; i++)
348 				ib_intr_dist_nintr(ib_p, 0,
349 				    ib_p->ib_upa_imr[i]);
350 		}
351 		mutex_exit(&ib_p->ib_intr_lock);
352 	}
353 
354 	mutex_enter(&ib_p->ib_ino_lst_mutex);
355 
356 	/* Perform special processing for first call of a redistribution. */
357 	if (weight == weight_max) {
358 		for (ino_p = ib_p->ib_ino_lst; ino_p;
359 		    ino_p = ino_p->ino_next_p) {
360 
361 			/*
362 			 * Clear ino_established of each ino on first call.
363 			 * The ino_established field may be used by a pci
364 			 * nexus driver's pci_intr_dist_cpuid implementation
365 			 * when detection of established pci slot-cpu binding
366 			 * for multi function pci cards.
367 			 */
368 			ino_p->ino_established = 0;
369 
370 			/*
371 			 * recompute the ino_intr_weight based on the device
372 			 * weight of all devinfo nodes sharing the ino (this
373 			 * will allow us to pick up new weights established by
374 			 * i_ddi_set_intr_weight()).
375 			 */
376 			ino_p->ino_intr_weight = 0;
377 
378 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
379 			    ipil_p = ipil_p->ipil_next_p) {
380 				for (i = 0, ih_lst = ipil_p->ipil_ih_head;
381 				    i < ipil_p->ipil_ih_size; i++,
382 				    ih_lst = ih_lst->ih_next) {
383 					dweight = i_ddi_get_intr_weight
384 					    (ih_lst->ih_dip);
385 					if (dweight > 0)
386 						ino_p->ino_intr_weight +=
387 						    dweight;
388 				}
389 			}
390 		}
391 	}
392 
393 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next_p) {
394 		uint32_t orig_cpuid;
395 
396 		/*
397 		 * Get the weight of the ino and determine if we are going to
398 		 * process call.  We wait until an ib_intr_dist_all call of
399 		 * the proper weight occurs to support redistribution of all
400 		 * heavy weighted interrupts first (across all nexus driver
401 		 * instances).  This is done to ensure optimal
402 		 * INTR_WEIGHTED_DIST behavior.
403 		 */
404 		if ((weight == ino_p->ino_intr_weight) ||
405 		    ((weight >= weight_max) &&
406 		    (ino_p->ino_intr_weight >= weight_max))) {
407 			/* select cpuid to target and mark ino established */
408 			orig_cpuid = ino_p->ino_cpuid;
409 			if (cpu[orig_cpuid] == NULL)
410 				orig_cpuid = CPU->cpu_id;
411 			ino_p->ino_cpuid = pci_intr_dist_cpuid(ib_p, ino_p);
412 			ino_p->ino_established = 1;
413 
414 			/* Add device weight of ino devinfos to targeted cpu. */
415 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
416 			    ipil_p = ipil_p->ipil_next_p) {
417 				for (i = 0, ih_lst = ipil_p->ipil_ih_head;
418 				    i < ipil_p->ipil_ih_size; i++,
419 				    ih_lst = ih_lst->ih_next) {
420 
421 					dweight = i_ddi_get_intr_weight(
422 					    ih_lst->ih_dip);
423 					intr_dist_cpuid_add_device_weight(
424 					    ino_p->ino_cpuid, ih_lst->ih_dip,
425 					    dweight);
426 
427 					/*
428 					 * Different cpus may have different
429 					 * clock speeds. to account for this,
430 					 * whenever an interrupt is moved to a
431 					 * new CPU, we convert the accumulated
432 					 * ticks into nsec, based upon the clock
433 					 * rate of the prior CPU.
434 					 *
435 					 * It is possible that the prior CPU no
436 					 * longer exists. In this case, fall
437 					 * back to using this CPU's clock rate.
438 					 *
439 					 * Note that the value in ih_ticks has
440 					 * already been corrected for any power
441 					 * savings mode which might have been
442 					 * in effect.
443 					 */
444 					ib_cpu_ticks_to_ih_nsec(ib_p, ih_lst,
445 					    orig_cpuid);
446 				}
447 			}
448 
449 			/* program the hardware */
450 			ib_intr_dist(ib_p, ino_p);
451 		}
452 	}
453 	mutex_exit(&ib_p->ib_ino_lst_mutex);
454 }
455 
456 /*
457  * Reset interrupts to IDLE.  This function is called during
458  * panic handling after redistributing interrupts; it's needed to
459  * support dumping to network devices after 'sync' from OBP.
460  *
461  * N.B.  This routine runs in a context where all other threads
462  * are permanently suspended.
463  */
464 static uint_t
465 ib_intr_reset(void *arg)
466 {
467 	ib_t *ib_p = (ib_t *)arg;
468 	ib_ino_t ino;
469 	uint64_t *clr_reg;
470 
471 	/*
472 	 * Note that we only actually care about interrupts that are
473 	 * potentially from network devices.
474 	 */
475 	for (ino = 0; ino <= ib_p->ib_max_ino; ino++) {
476 		clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
477 		IB_INO_INTR_CLEAR(clr_reg);
478 	}
479 
480 	return (BF_NONE);
481 }
482 
483 void
484 ib_suspend(ib_t *ib_p)
485 {
486 	ib_ino_info_t *ip;
487 	pci_t *pci_p = ib_p->ib_pci_p;
488 
489 	/* save ino_lst interrupts' mapping registers content */
490 	mutex_enter(&ib_p->ib_ino_lst_mutex);
491 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next_p)
492 		ip->ino_map_reg_save = *ip->ino_map_reg;
493 	mutex_exit(&ib_p->ib_ino_lst_mutex);
494 
495 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
496 		ib_p->ib_upa_imr_state[0] = *ib_p->ib_upa_imr[0];
497 		ib_p->ib_upa_imr_state[1] = *ib_p->ib_upa_imr[1];
498 	}
499 }
500 
501 void
502 ib_resume(ib_t *ib_p)
503 {
504 	ib_ino_info_t *ip;
505 	pci_t *pci_p = ib_p->ib_pci_p;
506 
507 	/* restore ino_lst interrupts' mapping registers content */
508 	mutex_enter(&ib_p->ib_ino_lst_mutex);
509 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next_p) {
510 		IB_INO_INTR_CLEAR(ip->ino_clr_reg);	 /* set intr to idle */
511 		*ip->ino_map_reg = ip->ino_map_reg_save; /* restore IMR */
512 	}
513 	mutex_exit(&ib_p->ib_ino_lst_mutex);
514 
515 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
516 		*ib_p->ib_upa_imr[0] = ib_p->ib_upa_imr_state[0];
517 		*ib_p->ib_upa_imr[1] = ib_p->ib_upa_imr_state[1];
518 	}
519 }
520 
521 /*
522  * locate ino_info structure on ib_p->ib_ino_lst according to ino#
523  * returns NULL if not found.
524  */
525 ib_ino_info_t *
526 ib_locate_ino(ib_t *ib_p, ib_ino_t ino_num)
527 {
528 	ib_ino_info_t *ino_p = ib_p->ib_ino_lst;
529 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
530 
531 	for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next_p)
532 		;
533 	return (ino_p);
534 }
535 
536 #define	IB_INO_TO_SLOT(ino) (IB_IS_OBIO_INO(ino) ? 0xff : ((ino) & 0x1f) >> 2)
537 
538 ib_ino_pil_t *
539 ib_new_ino_pil(ib_t *ib_p, ib_ino_t ino_num, uint_t pil, ih_t *ih_p)
540 {
541 	ib_ino_pil_t	*ipil_p = kmem_zalloc(sizeof (ib_ino_pil_t), KM_SLEEP);
542 	ib_ino_info_t	*ino_p;
543 
544 	if ((ino_p = ib_locate_ino(ib_p, ino_num)) == NULL) {
545 		ino_p = kmem_zalloc(sizeof (ib_ino_info_t), KM_SLEEP);
546 
547 		ino_p->ino_next_p = ib_p->ib_ino_lst;
548 		ib_p->ib_ino_lst = ino_p;
549 
550 		ino_p->ino_ino = ino_num;
551 		ino_p->ino_slot_no = IB_INO_TO_SLOT(ino_num);
552 		ino_p->ino_ib_p = ib_p;
553 		ino_p->ino_clr_reg = ib_clear_intr_reg_addr(ib_p, ino_num);
554 		ino_p->ino_map_reg = ib_intr_map_reg_addr(ib_p, ino_num);
555 		ino_p->ino_unclaimed_intrs = 0;
556 		ino_p->ino_lopil = pil;
557 	}
558 
559 	ih_p->ih_next = ih_p;
560 	ipil_p->ipil_pil = pil;
561 	ipil_p->ipil_ih_head = ih_p;
562 	ipil_p->ipil_ih_tail = ih_p;
563 	ipil_p->ipil_ih_start = ih_p;
564 	ipil_p->ipil_ih_size = 1;
565 	ipil_p->ipil_ino_p = ino_p;
566 
567 	ipil_p->ipil_next_p = ino_p->ino_ipil_p;
568 	ino_p->ino_ipil_p = ipil_p;
569 	ino_p->ino_ipil_size++;
570 
571 	if (ino_p->ino_lopil > pil)
572 		ino_p->ino_lopil = pil;
573 
574 	return (ipil_p);
575 }
576 
577 void
578 ib_delete_ino_pil(ib_t *ib_p, ib_ino_pil_t *ipil_p)
579 {
580 	ib_ino_info_t	*ino_p = ipil_p->ipil_ino_p;
581 	ib_ino_pil_t	*prev, *next;
582 	ushort_t	pil = ipil_p->ipil_pil;
583 
584 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
585 
586 	if (ino_p->ino_ipil_p == ipil_p)
587 		ino_p->ino_ipil_p = ipil_p->ipil_next_p;
588 	else {
589 		for (prev = next = ino_p->ino_ipil_p; next != ipil_p;
590 		    prev = next, next = next->ipil_next_p)
591 			;
592 
593 		if (prev)
594 			prev->ipil_next_p = ipil_p->ipil_next_p;
595 	}
596 
597 	kmem_free(ipil_p, sizeof (ib_ino_pil_t));
598 
599 	if ((--ino_p->ino_ipil_size) && (ino_p->ino_lopil == pil)) {
600 		for (next = ino_p->ino_ipil_p, pil = next->ipil_pil;
601 		    next; next = next->ipil_next_p) {
602 
603 			if (pil > next->ipil_pil)
604 				pil = next->ipil_pil;
605 		}
606 		/*
607 		 * Value stored in pil should be the lowest pil.
608 		 */
609 		ino_p->ino_lopil = pil;
610 	}
611 
612 	if (ino_p->ino_ipil_size)
613 		return;
614 
615 	if (ib_p->ib_ino_lst == ino_p)
616 		ib_p->ib_ino_lst = ino_p->ino_next_p;
617 	else {
618 		ib_ino_info_t	*list = ib_p->ib_ino_lst;
619 
620 		for (; list->ino_next_p != ino_p; list = list->ino_next_p)
621 			;
622 		list->ino_next_p = ino_p->ino_next_p;
623 	}
624 }
625 
626 /* free all ino when we are detaching */
627 void
628 ib_free_ino_all(ib_t *ib_p)
629 {
630 	ib_ino_info_t *ino_p = ib_p->ib_ino_lst;
631 	ib_ino_info_t *next = NULL;
632 
633 	while (ino_p) {
634 		next = ino_p->ino_next_p;
635 		kmem_free(ino_p, sizeof (ib_ino_info_t));
636 		ino_p = next;
637 	}
638 }
639 
640 /*
641  * Locate ib_ino_pil_t structure on ino_p->ino_ipil_p according to ino#
642  * returns NULL if not found.
643  */
644 ib_ino_pil_t *
645 ib_ino_locate_ipil(ib_ino_info_t *ino_p, uint_t pil)
646 {
647 	ib_ino_pil_t	*ipil_p = ino_p->ino_ipil_p;
648 
649 	for (; ipil_p && ipil_p->ipil_pil != pil; ipil_p = ipil_p->ipil_next_p)
650 		;
651 
652 	return (ipil_p);
653 }
654 
655 void
656 ib_ino_add_intr(pci_t *pci_p, ib_ino_pil_t *ipil_p, ih_t *ih_p)
657 {
658 	ib_ino_info_t *ino_p = ipil_p->ipil_ino_p;
659 	ib_ino_t ino = ino_p->ino_ino;
660 	ib_t *ib_p = ino_p->ino_ib_p;
661 	volatile uint64_t *state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
662 	hrtime_t start_time;
663 
664 	ASSERT(ib_p == pci_p->pci_ib_p);
665 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
666 
667 	/* disable interrupt, this could disrupt devices sharing our slot */
668 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
669 	*ino_p->ino_map_reg;
670 
671 	/* do NOT modify the link list until after the busy wait */
672 
673 	/*
674 	 * busy wait if there is interrupt being processed.
675 	 * either the pending state will be cleared by the interrupt wrapper
676 	 * or the interrupt will be marked as blocked indicating that it was
677 	 * jabbering.
678 	 */
679 	start_time = gethrtime();
680 	while ((ino_p->ino_unclaimed_intrs <= pci_unclaimed_intr_max) &&
681 	    IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
682 		if (gethrtime() - start_time > pci_intrpend_timeout) {
683 			pbm_t *pbm_p = pci_p->pci_pbm_p;
684 			cmn_err(CE_WARN, "%s:%s: ib_ino_add_intr %x timeout",
685 			    pbm_p->pbm_nameinst_str,
686 			    pbm_p->pbm_nameaddr_str, ino);
687 			break;
688 		}
689 	}
690 
691 	/* link up ih_t */
692 	ih_p->ih_next = ipil_p->ipil_ih_head;
693 	ipil_p->ipil_ih_tail->ih_next = ih_p;
694 	ipil_p->ipil_ih_tail = ih_p;
695 
696 	ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
697 	ipil_p->ipil_ih_size++;
698 
699 	/*
700 	 * if the interrupt was previously blocked (left in pending state)
701 	 * because of jabber we need to clear the pending state in case the
702 	 * jabber has gone away.
703 	 */
704 	if (ino_p->ino_unclaimed_intrs > pci_unclaimed_intr_max) {
705 		cmn_err(CE_WARN,
706 		    "%s%d: ib_ino_add_intr: ino 0x%x has been unblocked",
707 		    ddi_driver_name(pci_p->pci_dip),
708 		    ddi_get_instance(pci_p->pci_dip),
709 		    ino_p->ino_ino);
710 		ino_p->ino_unclaimed_intrs = 0;
711 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
712 	}
713 
714 	/* re-enable interrupt */
715 	IB_INO_INTR_ON(ino_p->ino_map_reg);
716 	*ino_p->ino_map_reg;
717 }
718 
719 /*
720  * removes pci_ispec_t from the ino's link list.
721  * uses hardware mutex to lock out interrupt threads.
722  * Side effects: interrupt belongs to that ino is turned off on return.
723  * if we are sharing PCI slot with other inos, the caller needs
724  * to turn it back on.
725  */
726 void
727 ib_ino_rem_intr(pci_t *pci_p, ib_ino_pil_t *ipil_p, ih_t *ih_p)
728 {
729 	ib_ino_info_t *ino_p = ipil_p->ipil_ino_p;
730 	int i;
731 	ib_ino_t ino = ino_p->ino_ino;
732 	ih_t *ih_lst = ipil_p->ipil_ih_head;
733 	volatile uint64_t *state_reg =
734 	    IB_INO_INTR_STATE_REG(ino_p->ino_ib_p, ino);
735 	hrtime_t start_time;
736 
737 	ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
738 	/* disable interrupt, this could disrupt devices sharing our slot */
739 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
740 	*ino_p->ino_map_reg;
741 
742 	/* do NOT modify the link list until after the busy wait */
743 
744 	/*
745 	 * busy wait if there is interrupt being processed.
746 	 * either the pending state will be cleared by the interrupt wrapper
747 	 * or the interrupt will be marked as blocked indicating that it was
748 	 * jabbering.
749 	 */
750 	start_time = gethrtime();
751 	while ((ino_p->ino_unclaimed_intrs <= pci_unclaimed_intr_max) &&
752 	    IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
753 		if (gethrtime() - start_time > pci_intrpend_timeout) {
754 			pbm_t *pbm_p = pci_p->pci_pbm_p;
755 			cmn_err(CE_WARN, "%s:%s: ib_ino_rem_intr %x timeout",
756 			    pbm_p->pbm_nameinst_str,
757 			    pbm_p->pbm_nameaddr_str, ino);
758 			break;
759 		}
760 	}
761 
762 	if (ipil_p->ipil_ih_size == 1) {
763 		if (ih_lst != ih_p)
764 			goto not_found;
765 		/* no need to set head/tail as ino_p will be freed */
766 		goto reset;
767 	}
768 
769 	/*
770 	 * if the interrupt was previously blocked (left in pending state)
771 	 * because of jabber we need to clear the pending state in case the
772 	 * jabber has gone away.
773 	 */
774 	if (ino_p->ino_unclaimed_intrs > pci_unclaimed_intr_max) {
775 		cmn_err(CE_WARN,
776 		    "%s%d: ib_ino_rem_intr: ino 0x%x has been unblocked",
777 		    ddi_driver_name(pci_p->pci_dip),
778 		    ddi_get_instance(pci_p->pci_dip),
779 		    ino_p->ino_ino);
780 		ino_p->ino_unclaimed_intrs = 0;
781 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
782 	}
783 
784 	/* search the link list for ih_p */
785 	for (i = 0;
786 	    (i < ipil_p->ipil_ih_size) && (ih_lst->ih_next != ih_p);
787 	    i++, ih_lst = ih_lst->ih_next)
788 		;
789 	if (ih_lst->ih_next != ih_p)
790 		goto not_found;
791 
792 	/* remove ih_p from the link list and maintain the head/tail */
793 	ih_lst->ih_next = ih_p->ih_next;
794 	if (ipil_p->ipil_ih_head == ih_p)
795 		ipil_p->ipil_ih_head = ih_p->ih_next;
796 	if (ipil_p->ipil_ih_tail == ih_p)
797 		ipil_p->ipil_ih_tail = ih_lst;
798 	ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
799 reset:
800 	if (ih_p->ih_config_handle)
801 		pci_config_teardown(&ih_p->ih_config_handle);
802 	if (ih_p->ih_ksp != NULL)
803 		kstat_delete(ih_p->ih_ksp);
804 	kmem_free(ih_p, sizeof (ih_t));
805 	ipil_p->ipil_ih_size--;
806 
807 	return;
808 not_found:
809 	DEBUG2(DBG_R_INTX, ino_p->ino_ib_p->ib_pci_p->pci_dip,
810 	    "ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
811 }
812 
813 ih_t *
814 ib_intr_locate_ih(ib_ino_pil_t *ipil_p, dev_info_t *rdip, uint32_t inum)
815 {
816 	ih_t *ih_p = ipil_p->ipil_ih_head;
817 	int i;
818 
819 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
820 		if (ih_p->ih_dip == rdip && ih_p->ih_inum == inum)
821 			return (ih_p);
822 	}
823 
824 	return ((ih_t *)NULL);
825 }
826 
827 ih_t *
828 ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
829 	uint_t (*int_handler)(caddr_t int_handler_arg1,
830 	caddr_t int_handler_arg2),
831 	caddr_t int_handler_arg1,
832 	caddr_t int_handler_arg2)
833 {
834 	ih_t *ih_p;
835 
836 	ih_p = kmem_alloc(sizeof (ih_t), KM_SLEEP);
837 	ih_p->ih_dip = rdip;
838 	ih_p->ih_inum = inum;
839 	ih_p->ih_intr_state = PCI_INTR_STATE_DISABLE;
840 	ih_p->ih_handler = int_handler;
841 	ih_p->ih_handler_arg1 = int_handler_arg1;
842 	ih_p->ih_handler_arg2 = int_handler_arg2;
843 	ih_p->ih_config_handle = NULL;
844 	ih_p->ih_nsec = 0;
845 	ih_p->ih_ticks = 0;
846 	ih_p->ih_ksp = NULL;
847 
848 	return (ih_p);
849 }
850 
851 int
852 ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip,
853 	ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state)
854 {
855 	ib_t		*ib_p = pci_p->pci_ib_p;
856 	ib_ino_info_t	*ino_p;
857 	ib_ino_pil_t	*ipil_p;
858 	ib_mondo_t	mondo;
859 	ih_t		*ih_p;
860 	int		ret = DDI_FAILURE;
861 
862 	/*
863 	 * For PULSE interrupts, pci driver don't allocate
864 	 * ib_ino_info_t and ih_t data structures and also,
865 	 * not maintains any interrupt state information.
866 	 * So, just return success from here.
867 	 */
868 	if (hdlp->ih_vector & PCI_PULSE_INO) {
869 		DEBUG0(DBG_IB, ib_p->ib_pci_p->pci_dip,
870 		    "ib_update_intr_state: PULSE interrupt, return success\n");
871 
872 		return (DDI_SUCCESS);
873 	}
874 
875 	mutex_enter(&ib_p->ib_ino_lst_mutex);
876 
877 	if ((mondo = pci_xlate_intr(pci_p->pci_dip, rdip, pci_p->pci_ib_p,
878 	    IB_MONDO_TO_INO(hdlp->ih_vector))) == 0) {
879 		mutex_exit(&ib_p->ib_ino_lst_mutex);
880 		return (ret);
881 	}
882 
883 	ino_p = ib_locate_ino(ib_p, IB_MONDO_TO_INO(mondo));
884 	if (ino_p && (ipil_p = ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
885 		if (ih_p = ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum)) {
886 			ih_p->ih_intr_state = new_intr_state;
887 			ret = DDI_SUCCESS;
888 		}
889 	}
890 
891 	mutex_exit(&ib_p->ib_ino_lst_mutex);
892 	return (ret);
893 }
894 
895 /*
896  * Get interrupt CPU for a given ino.
897  * Return info only for inos which are already mapped to devices.
898  */
899 /*ARGSUSED*/
900 int
901 ib_get_intr_target(pci_t *pci_p, ib_ino_t ino, int *cpu_id_p)
902 {
903 	dev_info_t		*dip = pci_p->pci_dip;
904 	ib_t			*ib_p = pci_p->pci_ib_p;
905 	volatile uint64_t	*imregp;
906 	uint64_t		imregval;
907 
908 	DEBUG1(DBG_IB, dip, "ib_get_intr_target: ino %x\n", ino);
909 
910 	imregp = ib_intr_map_reg_addr(ib_p, ino);
911 	imregval = *imregp;
912 
913 	*cpu_id_p = ib_map_reg_get_cpu(imregval);
914 
915 	DEBUG1(DBG_IB, dip, "ib_get_intr_target: cpu_id %x\n", *cpu_id_p);
916 
917 	return (DDI_SUCCESS);
918 }
919 
920 /*
921  * Associate a new CPU with a given ino.
922  * Operate only on inos which are already mapped to devices.
923  */
924 int
925 ib_set_intr_target(pci_t *pci_p, ib_ino_t ino, int cpu_id)
926 {
927 	dev_info_t		*dip = pci_p->pci_dip;
928 	ib_t			*ib_p = pci_p->pci_ib_p;
929 	int			ret = DDI_SUCCESS;
930 	uint32_t		old_cpu_id;
931 	hrtime_t		start_time;
932 	uint64_t		imregval;
933 	uint64_t		new_imregval;
934 	volatile uint64_t	*imregp;
935 	volatile uint64_t	*idregp;
936 	extern const int	_ncpu;
937 	extern cpu_t		*cpu[];
938 
939 	DEBUG2(DBG_IB, dip, "ib_set_intr_target: ino %x cpu_id %x\n",
940 	    ino, cpu_id);
941 
942 	imregp = (uint64_t *)ib_intr_map_reg_addr(ib_p, ino);
943 	idregp = IB_INO_INTR_STATE_REG(ib_p, ino);
944 
945 	/* Save original mapreg value. */
946 	imregval = *imregp;
947 	DEBUG1(DBG_IB, dip, "ib_set_intr_target: orig mapreg value: 0x%llx\n",
948 	    imregval);
949 
950 	/* Operate only on inos which are already enabled. */
951 	if (!(imregval & COMMON_INTR_MAP_REG_VALID))
952 		return (DDI_FAILURE);
953 
954 	/* Is this request a noop? */
955 	if ((old_cpu_id = ib_map_reg_get_cpu(imregval)) == cpu_id)
956 		return (DDI_SUCCESS);
957 
958 	/* Clear the interrupt valid/enable bit for particular ino. */
959 	DEBUG0(DBG_IB, dip, "Clearing intr_enabled...\n");
960 	*imregp = imregval & ~COMMON_INTR_MAP_REG_VALID;
961 
962 	/* Wait until there are no more pending interrupts. */
963 	start_time = gethrtime();
964 
965 	DEBUG0(DBG_IB, dip, "About to check for pending interrupts...\n");
966 
967 	while (IB_INO_INTR_PENDING(idregp, ino)) {
968 		DEBUG0(DBG_IB, dip, "Waiting for pending ints to clear\n");
969 		if ((gethrtime() - start_time) < pci_intrpend_timeout) {
970 			continue;
971 		} else { /* Timed out waiting. */
972 			DEBUG0(DBG_IB, dip, "Timed out waiting \n");
973 			return (DDI_EPENDING);
974 		}
975 	}
976 
977 	new_imregval = *imregp;
978 
979 	DEBUG1(DBG_IB, dip,
980 	    "after disabling intr, mapreg value: 0x%llx\n", new_imregval);
981 
982 	/*
983 	 * Get lock, validate cpu and write new mapreg value.
984 	 */
985 	mutex_enter(&cpu_lock);
986 	if ((cpu_id < _ncpu) && (cpu[cpu_id] && cpu_is_online(cpu[cpu_id]))) {
987 		/* Prepare new mapreg value with intr enabled and new cpu_id. */
988 		new_imregval &=
989 		    COMMON_INTR_MAP_REG_IGN | COMMON_INTR_MAP_REG_INO;
990 		new_imregval = ib_get_map_reg(new_imregval, cpu_id);
991 
992 		DEBUG1(DBG_IB, dip, "Writing new mapreg value:0x%llx\n",
993 		    new_imregval);
994 
995 		*imregp = new_imregval;
996 
997 		ib_log_new_cpu(ib_p, old_cpu_id, cpu_id, ino);
998 	} else {	/* Invalid cpu.  Restore original register image. */
999 		DEBUG0(DBG_IB, dip,
1000 		    "Invalid cpuid: writing orig mapreg value\n");
1001 
1002 		*imregp = imregval;
1003 		ret = DDI_EINVAL;
1004 	}
1005 	mutex_exit(&cpu_lock);
1006 
1007 	return (ret);
1008 }
1009 
1010 
1011 /*
1012  * Return the dips or number of dips associated with a given interrupt block.
1013  * Size of dips array arg is passed in as dips_ret arg.
1014  * Number of dips returned is returned in dips_ret arg.
1015  * Array of dips gets returned in the dips argument.
1016  * Function returns number of dips existing for the given interrupt block.
1017  *
1018  */
1019 uint8_t
1020 ib_get_ino_devs(
1021 	ib_t *ib_p, uint32_t ino, uint8_t *devs_ret, pcitool_intr_dev_t *devs)
1022 {
1023 	ib_ino_info_t	*ino_p;
1024 	ib_ino_pil_t	*ipil_p;
1025 	ih_t		*ih_p;
1026 	uint32_t	num_devs = 0;
1027 	int		i, j;
1028 
1029 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1030 	ino_p = ib_locate_ino(ib_p, ino);
1031 	if (ino_p != NULL) {
1032 		for (j = 0, ipil_p = ino_p->ino_ipil_p; ipil_p;
1033 		    ipil_p = ipil_p->ipil_next_p) {
1034 			num_devs += ipil_p->ipil_ih_size;
1035 
1036 			for (i = 0, ih_p = ipil_p->ipil_ih_head;
1037 			    ((i < ipil_p->ipil_ih_size) && (i < *devs_ret));
1038 			    i++, j++, ih_p = ih_p->ih_next) {
1039 				(void) strncpy(devs[i].driver_name,
1040 				    ddi_driver_name(ih_p->ih_dip),
1041 				    MAXMODCONFNAME-1);
1042 				devs[i].driver_name[MAXMODCONFNAME] = '\0';
1043 				(void) ddi_pathname(ih_p->ih_dip, devs[i].path);
1044 				devs[i].dev_inst =
1045 				    ddi_get_instance(ih_p->ih_dip);
1046 			}
1047 		}
1048 		*devs_ret = j;
1049 	}
1050 
1051 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1052 
1053 	return (num_devs);
1054 }
1055 
1056 void ib_log_new_cpu(ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id,
1057 	uint32_t ino)
1058 {
1059 	ib_ino_info_t	*ino_p;
1060 	ib_ino_pil_t	*ipil_p;
1061 	ih_t		*ih_p;
1062 	int		i;
1063 
1064 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1065 
1066 	/* Log in OS data structures the new CPU. */
1067 	ino_p = ib_locate_ino(ib_p, ino);
1068 	if (ino_p != NULL) {
1069 
1070 		/* Log in OS data structures the new CPU. */
1071 		ino_p->ino_cpuid = new_cpu_id;
1072 
1073 		for (ipil_p = ino_p->ino_ipil_p; ipil_p;
1074 		    ipil_p = ipil_p->ipil_next_p) {
1075 			for (i = 0, ih_p = ipil_p->ipil_ih_head;
1076 			    (i < ipil_p->ipil_ih_size);
1077 			    i++, ih_p = ih_p->ih_next) {
1078 				/*
1079 				 * Account for any residual time
1080 				 * to be logged for old cpu.
1081 				 */
1082 				ib_cpu_ticks_to_ih_nsec(ib_p,
1083 				    ipil_p->ipil_ih_head, old_cpu_id);
1084 			}
1085 		}
1086 	}
1087 
1088 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1089 }
1090