xref: /illumos-gate/usr/src/uts/sun4u/io/pci/pci_ib.c (revision 99ea293e719ac006d413e4fde6ac0d5cd4dd6c59)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright 2019 Peter Tribble.
27  */
28 
29 /*
30  * PCI Interrupt Block (RISCx) implementation
31  *	initialization
32  *	interrupt enable/disable/clear and mapping register manipulation
33  */
34 
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/systm.h>		/* panicstr */
39 #include <sys/spl.h>
40 #include <sys/sunddi.h>
41 #include <sys/machsystm.h>	/* intr_dist_add */
42 #include <sys/ddi_impldefs.h>
43 #include <sys/clock.h>
44 #include <sys/cpuvar.h>
45 #include <sys/pci/pci_obj.h>
46 
47 /*LINTLIBRARY*/
48 static uint_t ib_intr_reset(void *arg);
49 
50 void
51 ib_create(pci_t *pci_p)
52 {
53 	dev_info_t *dip = pci_p->pci_dip;
54 	ib_t *ib_p;
55 	uintptr_t a;
56 	int i;
57 
58 	/*
59 	 * Allocate interrupt block state structure and link it to
60 	 * the pci state structure.
61 	 */
62 	ib_p = kmem_zalloc(sizeof (ib_t), KM_SLEEP);
63 	pci_p->pci_ib_p = ib_p;
64 	ib_p->ib_pci_p = pci_p;
65 
66 	a = pci_ib_setup(ib_p);
67 
68 	/*
69 	 * Determine virtual addresses of interrupt mapping, clear and diag
70 	 * registers that have common offsets.
71 	 */
72 	ib_p->ib_slot_clear_intr_regs =
73 	    a + COMMON_IB_SLOT_CLEAR_INTR_REG_OFFSET;
74 	ib_p->ib_intr_retry_timer_reg =
75 	    (uint64_t *)(a + COMMON_IB_INTR_RETRY_TIMER_OFFSET);
76 	ib_p->ib_slot_intr_state_diag_reg =
77 	    (uint64_t *)(a + COMMON_IB_SLOT_INTR_STATE_DIAG_REG);
78 	ib_p->ib_obio_intr_state_diag_reg =
79 	    (uint64_t *)(a + COMMON_IB_OBIO_INTR_STATE_DIAG_REG);
80 
81 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
82 		ib_p->ib_upa_imr[0] = (volatile uint64_t *)
83 		    (a + COMMON_IB_UPA0_INTR_MAP_REG_OFFSET);
84 		ib_p->ib_upa_imr[1] = (volatile uint64_t *)
85 		    (a + COMMON_IB_UPA1_INTR_MAP_REG_OFFSET);
86 	}
87 
88 	DEBUG2(DBG_ATTACH, dip, "ib_create: slot_imr=%x, slot_cir=%x\n",
89 	    ib_p->ib_slot_intr_map_regs, ib_p->ib_obio_intr_map_regs);
90 	DEBUG2(DBG_ATTACH, dip, "ib_create: obio_imr=%x, obio_cir=%x\n",
91 	    ib_p->ib_slot_clear_intr_regs, ib_p->ib_obio_clear_intr_regs);
92 	DEBUG2(DBG_ATTACH, dip, "ib_create: upa0_imr=%x, upa1_imr=%x\n",
93 	    ib_p->ib_upa_imr[0], ib_p->ib_upa_imr[1]);
94 	DEBUG3(DBG_ATTACH, dip,
95 	    "ib_create: retry_timer=%x, obio_diag=%x slot_diag=%x\n",
96 	    ib_p->ib_intr_retry_timer_reg,
97 	    ib_p->ib_obio_intr_state_diag_reg,
98 	    ib_p->ib_slot_intr_state_diag_reg);
99 
100 	ib_p->ib_ino_lst = (ib_ino_info_t *)NULL;
101 	mutex_init(&ib_p->ib_intr_lock, NULL, MUTEX_DRIVER, NULL);
102 	mutex_init(&ib_p->ib_ino_lst_mutex, NULL, MUTEX_DRIVER, NULL);
103 
104 	DEBUG1(DBG_ATTACH, dip, "ib_create: numproxy=%x\n",
105 	    pci_p->pci_numproxy);
106 	for (i = 1; i <= pci_p->pci_numproxy; i++) {
107 		set_intr_mapping_reg(pci_p->pci_id,
108 		    (uint64_t *)ib_p->ib_upa_imr[i - 1], i);
109 	}
110 
111 	ib_configure(ib_p);
112 	bus_func_register(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
113 }
114 
115 void
116 ib_destroy(pci_t *pci_p)
117 {
118 	ib_t *ib_p = pci_p->pci_ib_p;
119 	dev_info_t *dip = pci_p->pci_dip;
120 
121 	DEBUG0(DBG_IB, dip, "ib_destroy\n");
122 	bus_func_unregister(BF_TYPE_RESINTR, ib_intr_reset, ib_p);
123 
124 	intr_dist_rem_weighted(ib_intr_dist_all, ib_p);
125 	mutex_destroy(&ib_p->ib_ino_lst_mutex);
126 	mutex_destroy(&ib_p->ib_intr_lock);
127 
128 	ib_free_ino_all(ib_p);
129 
130 	kmem_free(ib_p, sizeof (ib_t));
131 	pci_p->pci_ib_p = NULL;
132 }
133 
134 void
135 ib_configure(ib_t *ib_p)
136 {
137 	/* XXX could be different between psycho and schizo */
138 	*ib_p->ib_intr_retry_timer_reg = pci_intr_retry_intv;
139 }
140 
141 /*
142  * can only used for psycho internal interrupts thermal, power,
143  * ue, ce, pbm
144  */
145 void
146 ib_intr_enable(pci_t *pci_p, ib_ino_t ino)
147 {
148 	ib_t *ib_p = pci_p->pci_ib_p;
149 	ib_mondo_t mondo = IB_INO_TO_MONDO(ib_p, ino);
150 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
151 	uint_t cpu_id;
152 
153 	/*
154 	 * Determine the cpu for the interrupt.
155 	 */
156 	mutex_enter(&ib_p->ib_intr_lock);
157 	cpu_id = intr_dist_cpuid();
158 	DEBUG2(DBG_IB, pci_p->pci_dip,
159 	    "ib_intr_enable: ino=%x cpu_id=%x\n", ino, cpu_id);
160 
161 	*imr_p = ib_get_map_reg(mondo, cpu_id);
162 	IB_INO_INTR_CLEAR(ib_clear_intr_reg_addr(ib_p, ino));
163 	mutex_exit(&ib_p->ib_intr_lock);
164 }
165 
166 /*
167  * Disable the interrupt via its interrupt mapping register.
168  * Can only be used for internal interrupts: thermal, power, ue, ce, pbm.
169  * If called under interrupt context, wait should be set to 0
170  */
171 void
172 ib_intr_disable(ib_t *ib_p, ib_ino_t ino, int wait)
173 {
174 	volatile uint64_t *imr_p = ib_intr_map_reg_addr(ib_p, ino);
175 	volatile uint64_t *state_reg_p = IB_INO_INTR_STATE_REG(ib_p, ino);
176 	hrtime_t start_time;
177 
178 	/* disable the interrupt */
179 	mutex_enter(&ib_p->ib_intr_lock);
180 	IB_INO_INTR_OFF(imr_p);
181 	*imr_p;	/* flush previous write */
182 	mutex_exit(&ib_p->ib_intr_lock);
183 
184 	if (!wait)
185 		goto wait_done;
186 
187 	start_time = gethrtime();
188 	/* busy wait if there is interrupt being processed */
189 	while (IB_INO_INTR_PENDING(state_reg_p, ino) && !panicstr) {
190 		if (gethrtime() - start_time > pci_intrpend_timeout) {
191 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
192 			cmn_err(CE_WARN, "%s:%s: ib_intr_disable timeout %x",
193 			    pbm_p->pbm_nameinst_str,
194 			    pbm_p->pbm_nameaddr_str, ino);
195 				break;
196 		}
197 	}
198 wait_done:
199 	IB_INO_INTR_PEND(ib_clear_intr_reg_addr(ib_p, ino));
200 }
201 
202 /* can only used for psycho internal interrupts thermal, power, ue, ce, pbm */
203 void
204 ib_nintr_clear(ib_t *ib_p, ib_ino_t ino)
205 {
206 	uint64_t *clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
207 	IB_INO_INTR_CLEAR(clr_reg);
208 }
209 
210 /*
211  * distribute PBM and UPA interrupts. ino is set to 0 by caller if we
212  * are dealing with UPA interrupts (without inos).
213  */
214 void
215 ib_intr_dist_nintr(ib_t *ib_p, ib_ino_t ino, volatile uint64_t *imr_p)
216 {
217 	volatile uint64_t imr = *imr_p;
218 	uint32_t cpu_id;
219 
220 	if (!IB_INO_INTR_ISON(imr))
221 		return;
222 
223 	cpu_id = intr_dist_cpuid();
224 
225 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id)
226 		return;
227 
228 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
229 	imr = *imr_p;	/* flush previous write */
230 }
231 
232 /*
233  * Converts into nsec, ticks logged with a given CPU.  Adds nsec to ih.
234  */
235 /*ARGSUSED*/
236 void
237 ib_cpu_ticks_to_ih_nsec(ib_t *ib_p, ih_t *ih_p, uint32_t cpu_id)
238 {
239 	extern kmutex_t pciintr_ks_template_lock;
240 	hrtime_t ticks;
241 
242 	/*
243 	 * Because we are updating two fields in ih_t we must lock
244 	 * pciintr_ks_template_lock to prevent someone from reading the
245 	 * kstats after we set ih_ticks to 0 and before we increment
246 	 * ih_nsec to compensate.
247 	 *
248 	 * We must also protect against the interrupt arriving and incrementing
249 	 * ih_ticks between the time we read it and when we reset it to 0.
250 	 * To do this we use atomic_swap.
251 	 */
252 
253 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
254 
255 	mutex_enter(&pciintr_ks_template_lock);
256 	ticks = atomic_swap_64(&ih_p->ih_ticks, 0);
257 	ih_p->ih_nsec += (uint64_t)tick2ns(ticks, cpu_id);
258 	mutex_exit(&pciintr_ks_template_lock);
259 }
260 
261 static void
262 ib_intr_dist(ib_t *ib_p, ib_ino_info_t *ino_p)
263 {
264 	uint32_t cpu_id = ino_p->ino_cpuid;
265 	ib_ino_t ino = ino_p->ino_ino;
266 	volatile uint64_t imr, *imr_p, *state_reg;
267 	hrtime_t start_time;
268 
269 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
270 	imr_p = ib_intr_map_reg_addr(ib_p, ino);
271 	state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
272 
273 	if (ib_map_reg_get_cpu(*imr_p) == cpu_id) /* same cpu, no reprog */
274 		return;
275 
276 	/* disable interrupt, this could disrupt devices sharing our slot */
277 	IB_INO_INTR_OFF(imr_p);
278 	imr = *imr_p;	/* flush previous write */
279 
280 	/* busy wait if there is interrupt being processed */
281 	start_time = gethrtime();
282 	while (IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
283 		if (gethrtime() - start_time > pci_intrpend_timeout) {
284 			pbm_t *pbm_p = ib_p->ib_pci_p->pci_pbm_p;
285 			cmn_err(CE_WARN, "%s:%s: ib_intr_dist(%p,%x) timeout",
286 			    pbm_p->pbm_nameinst_str,
287 			    pbm_p->pbm_nameaddr_str,
288 			    imr_p, IB_INO_TO_MONDO(ib_p, ino));
289 			break;
290 		}
291 	}
292 	*imr_p = ib_get_map_reg(IB_IMR2MONDO(imr), cpu_id);
293 	imr = *imr_p;	/* flush previous write */
294 }
295 
296 /*
297  * Redistribute interrupts of the specified weight. The first call has a weight
298  * of weight_max, which can be used to trigger initialization for
299  * redistribution. The inos with weight [weight_max, inf.) should be processed
300  * on the "weight == weight_max" call.  This first call is followed by calls
301  * of decreasing weights, inos of that weight should be processed.  The final
302  * call specifies a weight of zero, this can be used to trigger processing of
303  * stragglers.
304  */
305 void
306 ib_intr_dist_all(void *arg, int32_t weight_max, int32_t weight)
307 {
308 	ib_t *ib_p = (ib_t *)arg;
309 	pci_t *pci_p = ib_p->ib_pci_p;
310 	ib_ino_info_t *ino_p;
311 	ib_ino_pil_t *ipil_p;
312 	ih_t *ih_lst;
313 	int32_t dweight;
314 	int i;
315 
316 	if (weight == 0) {
317 		mutex_enter(&ib_p->ib_intr_lock);
318 		if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
319 			for (i = 0; i < 2; i++)
320 				ib_intr_dist_nintr(ib_p, 0,
321 				    ib_p->ib_upa_imr[i]);
322 		}
323 		mutex_exit(&ib_p->ib_intr_lock);
324 	}
325 
326 	mutex_enter(&ib_p->ib_ino_lst_mutex);
327 
328 	/* Perform special processing for first call of a redistribution. */
329 	if (weight == weight_max) {
330 		for (ino_p = ib_p->ib_ino_lst; ino_p;
331 		    ino_p = ino_p->ino_next_p) {
332 
333 			/*
334 			 * Clear ino_established of each ino on first call.
335 			 * The ino_established field may be used by a pci
336 			 * nexus driver's pci_intr_dist_cpuid implementation
337 			 * when detection of established pci slot-cpu binding
338 			 * for multi function pci cards.
339 			 */
340 			ino_p->ino_established = 0;
341 
342 			/*
343 			 * recompute the ino_intr_weight based on the device
344 			 * weight of all devinfo nodes sharing the ino (this
345 			 * will allow us to pick up new weights established by
346 			 * i_ddi_set_intr_weight()).
347 			 */
348 			ino_p->ino_intr_weight = 0;
349 
350 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
351 			    ipil_p = ipil_p->ipil_next_p) {
352 				for (i = 0, ih_lst = ipil_p->ipil_ih_head;
353 				    i < ipil_p->ipil_ih_size; i++,
354 				    ih_lst = ih_lst->ih_next) {
355 					dweight = i_ddi_get_intr_weight
356 					    (ih_lst->ih_dip);
357 					if (dweight > 0)
358 						ino_p->ino_intr_weight +=
359 						    dweight;
360 				}
361 			}
362 		}
363 	}
364 
365 	for (ino_p = ib_p->ib_ino_lst; ino_p; ino_p = ino_p->ino_next_p) {
366 		uint32_t orig_cpuid;
367 
368 		/*
369 		 * Get the weight of the ino and determine if we are going to
370 		 * process call.  We wait until an ib_intr_dist_all call of
371 		 * the proper weight occurs to support redistribution of all
372 		 * heavy weighted interrupts first (across all nexus driver
373 		 * instances).  This is done to ensure optimal
374 		 * INTR_WEIGHTED_DIST behavior.
375 		 */
376 		if ((weight == ino_p->ino_intr_weight) ||
377 		    ((weight >= weight_max) &&
378 		    (ino_p->ino_intr_weight >= weight_max))) {
379 			/* select cpuid to target and mark ino established */
380 			orig_cpuid = ino_p->ino_cpuid;
381 			if (cpu[orig_cpuid] == NULL)
382 				orig_cpuid = CPU->cpu_id;
383 			ino_p->ino_cpuid = pci_intr_dist_cpuid(ib_p, ino_p);
384 			ino_p->ino_established = 1;
385 
386 			/* Add device weight of ino devinfos to targeted cpu. */
387 			for (ipil_p = ino_p->ino_ipil_p; ipil_p;
388 			    ipil_p = ipil_p->ipil_next_p) {
389 				for (i = 0, ih_lst = ipil_p->ipil_ih_head;
390 				    i < ipil_p->ipil_ih_size; i++,
391 				    ih_lst = ih_lst->ih_next) {
392 
393 					dweight = i_ddi_get_intr_weight(
394 					    ih_lst->ih_dip);
395 					intr_dist_cpuid_add_device_weight(
396 					    ino_p->ino_cpuid, ih_lst->ih_dip,
397 					    dweight);
398 
399 					/*
400 					 * Different cpus may have different
401 					 * clock speeds. to account for this,
402 					 * whenever an interrupt is moved to a
403 					 * new CPU, we convert the accumulated
404 					 * ticks into nsec, based upon the clock
405 					 * rate of the prior CPU.
406 					 *
407 					 * It is possible that the prior CPU no
408 					 * longer exists. In this case, fall
409 					 * back to using this CPU's clock rate.
410 					 *
411 					 * Note that the value in ih_ticks has
412 					 * already been corrected for any power
413 					 * savings mode which might have been
414 					 * in effect.
415 					 */
416 					ib_cpu_ticks_to_ih_nsec(ib_p, ih_lst,
417 					    orig_cpuid);
418 				}
419 			}
420 
421 			/* program the hardware */
422 			ib_intr_dist(ib_p, ino_p);
423 		}
424 	}
425 	mutex_exit(&ib_p->ib_ino_lst_mutex);
426 }
427 
428 /*
429  * Reset interrupts to IDLE.  This function is called during
430  * panic handling after redistributing interrupts; it's needed to
431  * support dumping to network devices after 'sync' from OBP.
432  *
433  * N.B.  This routine runs in a context where all other threads
434  * are permanently suspended.
435  */
436 static uint_t
437 ib_intr_reset(void *arg)
438 {
439 	ib_t *ib_p = (ib_t *)arg;
440 	ib_ino_t ino;
441 	uint64_t *clr_reg;
442 
443 	/*
444 	 * Note that we only actually care about interrupts that are
445 	 * potentially from network devices.
446 	 */
447 	for (ino = 0; ino <= ib_p->ib_max_ino; ino++) {
448 		clr_reg = ib_clear_intr_reg_addr(ib_p, ino);
449 		IB_INO_INTR_CLEAR(clr_reg);
450 	}
451 
452 	return (BF_NONE);
453 }
454 
455 void
456 ib_suspend(ib_t *ib_p)
457 {
458 	ib_ino_info_t *ip;
459 	pci_t *pci_p = ib_p->ib_pci_p;
460 
461 	/* save ino_lst interrupts' mapping registers content */
462 	mutex_enter(&ib_p->ib_ino_lst_mutex);
463 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next_p)
464 		ip->ino_map_reg_save = *ip->ino_map_reg;
465 	mutex_exit(&ib_p->ib_ino_lst_mutex);
466 
467 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
468 		ib_p->ib_upa_imr_state[0] = *ib_p->ib_upa_imr[0];
469 		ib_p->ib_upa_imr_state[1] = *ib_p->ib_upa_imr[1];
470 	}
471 }
472 
473 void
474 ib_resume(ib_t *ib_p)
475 {
476 	ib_ino_info_t *ip;
477 	pci_t *pci_p = ib_p->ib_pci_p;
478 
479 	/* restore ino_lst interrupts' mapping registers content */
480 	mutex_enter(&ib_p->ib_ino_lst_mutex);
481 	for (ip = ib_p->ib_ino_lst; ip; ip = ip->ino_next_p) {
482 		IB_INO_INTR_CLEAR(ip->ino_clr_reg);	 /* set intr to idle */
483 		*ip->ino_map_reg = ip->ino_map_reg_save; /* restore IMR */
484 	}
485 	mutex_exit(&ib_p->ib_ino_lst_mutex);
486 
487 	if (CHIP_TYPE(pci_p) != PCI_CHIP_XMITS) {
488 		*ib_p->ib_upa_imr[0] = ib_p->ib_upa_imr_state[0];
489 		*ib_p->ib_upa_imr[1] = ib_p->ib_upa_imr_state[1];
490 	}
491 }
492 
493 /*
494  * locate ino_info structure on ib_p->ib_ino_lst according to ino#
495  * returns NULL if not found.
496  */
497 ib_ino_info_t *
498 ib_locate_ino(ib_t *ib_p, ib_ino_t ino_num)
499 {
500 	ib_ino_info_t *ino_p = ib_p->ib_ino_lst;
501 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
502 
503 	for (; ino_p && ino_p->ino_ino != ino_num; ino_p = ino_p->ino_next_p)
504 		;
505 	return (ino_p);
506 }
507 
508 #define	IB_INO_TO_SLOT(ino) (IB_IS_OBIO_INO(ino) ? 0xff : ((ino) & 0x1f) >> 2)
509 
510 ib_ino_pil_t *
511 ib_new_ino_pil(ib_t *ib_p, ib_ino_t ino_num, uint_t pil, ih_t *ih_p)
512 {
513 	ib_ino_pil_t	*ipil_p = kmem_zalloc(sizeof (ib_ino_pil_t), KM_SLEEP);
514 	ib_ino_info_t	*ino_p;
515 
516 	if ((ino_p = ib_locate_ino(ib_p, ino_num)) == NULL) {
517 		ino_p = kmem_zalloc(sizeof (ib_ino_info_t), KM_SLEEP);
518 
519 		ino_p->ino_next_p = ib_p->ib_ino_lst;
520 		ib_p->ib_ino_lst = ino_p;
521 
522 		ino_p->ino_ino = ino_num;
523 		ino_p->ino_slot_no = IB_INO_TO_SLOT(ino_num);
524 		ino_p->ino_ib_p = ib_p;
525 		ino_p->ino_clr_reg = ib_clear_intr_reg_addr(ib_p, ino_num);
526 		ino_p->ino_map_reg = ib_intr_map_reg_addr(ib_p, ino_num);
527 		ino_p->ino_unclaimed_intrs = 0;
528 		ino_p->ino_lopil = pil;
529 	}
530 
531 	ih_p->ih_next = ih_p;
532 	ipil_p->ipil_pil = pil;
533 	ipil_p->ipil_ih_head = ih_p;
534 	ipil_p->ipil_ih_tail = ih_p;
535 	ipil_p->ipil_ih_start = ih_p;
536 	ipil_p->ipil_ih_size = 1;
537 	ipil_p->ipil_ino_p = ino_p;
538 
539 	ipil_p->ipil_next_p = ino_p->ino_ipil_p;
540 	ino_p->ino_ipil_p = ipil_p;
541 	ino_p->ino_ipil_size++;
542 
543 	if (ino_p->ino_lopil > pil)
544 		ino_p->ino_lopil = pil;
545 
546 	return (ipil_p);
547 }
548 
549 void
550 ib_delete_ino_pil(ib_t *ib_p, ib_ino_pil_t *ipil_p)
551 {
552 	ib_ino_info_t	*ino_p = ipil_p->ipil_ino_p;
553 	ib_ino_pil_t	*prev, *next;
554 	ushort_t	pil = ipil_p->ipil_pil;
555 
556 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
557 
558 	if (ino_p->ino_ipil_p == ipil_p)
559 		ino_p->ino_ipil_p = ipil_p->ipil_next_p;
560 	else {
561 		for (prev = next = ino_p->ino_ipil_p; next != ipil_p;
562 		    prev = next, next = next->ipil_next_p)
563 			;
564 
565 		if (prev)
566 			prev->ipil_next_p = ipil_p->ipil_next_p;
567 	}
568 
569 	kmem_free(ipil_p, sizeof (ib_ino_pil_t));
570 
571 	if ((--ino_p->ino_ipil_size) && (ino_p->ino_lopil == pil)) {
572 		for (next = ino_p->ino_ipil_p, pil = next->ipil_pil;
573 		    next; next = next->ipil_next_p) {
574 
575 			if (pil > next->ipil_pil)
576 				pil = next->ipil_pil;
577 		}
578 		/*
579 		 * Value stored in pil should be the lowest pil.
580 		 */
581 		ino_p->ino_lopil = pil;
582 	}
583 
584 	if (ino_p->ino_ipil_size)
585 		return;
586 
587 	if (ib_p->ib_ino_lst == ino_p)
588 		ib_p->ib_ino_lst = ino_p->ino_next_p;
589 	else {
590 		ib_ino_info_t	*list = ib_p->ib_ino_lst;
591 
592 		for (; list->ino_next_p != ino_p; list = list->ino_next_p)
593 			;
594 		list->ino_next_p = ino_p->ino_next_p;
595 	}
596 }
597 
598 /* free all ino when we are detaching */
599 void
600 ib_free_ino_all(ib_t *ib_p)
601 {
602 	ib_ino_info_t *ino_p = ib_p->ib_ino_lst;
603 	ib_ino_info_t *next = NULL;
604 
605 	while (ino_p) {
606 		next = ino_p->ino_next_p;
607 		kmem_free(ino_p, sizeof (ib_ino_info_t));
608 		ino_p = next;
609 	}
610 }
611 
612 /*
613  * Locate ib_ino_pil_t structure on ino_p->ino_ipil_p according to ino#
614  * returns NULL if not found.
615  */
616 ib_ino_pil_t *
617 ib_ino_locate_ipil(ib_ino_info_t *ino_p, uint_t pil)
618 {
619 	ib_ino_pil_t	*ipil_p = ino_p->ino_ipil_p;
620 
621 	for (; ipil_p && ipil_p->ipil_pil != pil; ipil_p = ipil_p->ipil_next_p)
622 		;
623 
624 	return (ipil_p);
625 }
626 
627 void
628 ib_ino_add_intr(pci_t *pci_p, ib_ino_pil_t *ipil_p, ih_t *ih_p)
629 {
630 	ib_ino_info_t *ino_p = ipil_p->ipil_ino_p;
631 	ib_ino_t ino = ino_p->ino_ino;
632 	ib_t *ib_p = ino_p->ino_ib_p;
633 	volatile uint64_t *state_reg = IB_INO_INTR_STATE_REG(ib_p, ino);
634 	hrtime_t start_time;
635 
636 	ASSERT(ib_p == pci_p->pci_ib_p);
637 	ASSERT(MUTEX_HELD(&ib_p->ib_ino_lst_mutex));
638 
639 	/* disable interrupt, this could disrupt devices sharing our slot */
640 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
641 	*ino_p->ino_map_reg;
642 
643 	/* do NOT modify the link list until after the busy wait */
644 
645 	/*
646 	 * busy wait if there is interrupt being processed.
647 	 * either the pending state will be cleared by the interrupt wrapper
648 	 * or the interrupt will be marked as blocked indicating that it was
649 	 * jabbering.
650 	 */
651 	start_time = gethrtime();
652 	while ((ino_p->ino_unclaimed_intrs <= pci_unclaimed_intr_max) &&
653 	    IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
654 		if (gethrtime() - start_time > pci_intrpend_timeout) {
655 			pbm_t *pbm_p = pci_p->pci_pbm_p;
656 			cmn_err(CE_WARN, "%s:%s: ib_ino_add_intr %x timeout",
657 			    pbm_p->pbm_nameinst_str,
658 			    pbm_p->pbm_nameaddr_str, ino);
659 			break;
660 		}
661 	}
662 
663 	/* link up ih_t */
664 	ih_p->ih_next = ipil_p->ipil_ih_head;
665 	ipil_p->ipil_ih_tail->ih_next = ih_p;
666 	ipil_p->ipil_ih_tail = ih_p;
667 
668 	ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
669 	ipil_p->ipil_ih_size++;
670 
671 	/*
672 	 * if the interrupt was previously blocked (left in pending state)
673 	 * because of jabber we need to clear the pending state in case the
674 	 * jabber has gone away.
675 	 */
676 	if (ino_p->ino_unclaimed_intrs > pci_unclaimed_intr_max) {
677 		cmn_err(CE_WARN,
678 		    "%s%d: ib_ino_add_intr: ino 0x%x has been unblocked",
679 		    ddi_driver_name(pci_p->pci_dip),
680 		    ddi_get_instance(pci_p->pci_dip),
681 		    ino_p->ino_ino);
682 		ino_p->ino_unclaimed_intrs = 0;
683 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
684 	}
685 
686 	/* re-enable interrupt */
687 	IB_INO_INTR_ON(ino_p->ino_map_reg);
688 	*ino_p->ino_map_reg;
689 }
690 
691 /*
692  * removes pci_ispec_t from the ino's link list.
693  * uses hardware mutex to lock out interrupt threads.
694  * Side effects: interrupt belongs to that ino is turned off on return.
695  * if we are sharing PCI slot with other inos, the caller needs
696  * to turn it back on.
697  */
698 void
699 ib_ino_rem_intr(pci_t *pci_p, ib_ino_pil_t *ipil_p, ih_t *ih_p)
700 {
701 	ib_ino_info_t *ino_p = ipil_p->ipil_ino_p;
702 	int i;
703 	ib_ino_t ino = ino_p->ino_ino;
704 	ih_t *ih_lst = ipil_p->ipil_ih_head;
705 	volatile uint64_t *state_reg =
706 	    IB_INO_INTR_STATE_REG(ino_p->ino_ib_p, ino);
707 	hrtime_t start_time;
708 
709 	ASSERT(MUTEX_HELD(&ino_p->ino_ib_p->ib_ino_lst_mutex));
710 	/* disable interrupt, this could disrupt devices sharing our slot */
711 	IB_INO_INTR_OFF(ino_p->ino_map_reg);
712 	*ino_p->ino_map_reg;
713 
714 	/* do NOT modify the link list until after the busy wait */
715 
716 	/*
717 	 * busy wait if there is interrupt being processed.
718 	 * either the pending state will be cleared by the interrupt wrapper
719 	 * or the interrupt will be marked as blocked indicating that it was
720 	 * jabbering.
721 	 */
722 	start_time = gethrtime();
723 	while ((ino_p->ino_unclaimed_intrs <= pci_unclaimed_intr_max) &&
724 	    IB_INO_INTR_PENDING(state_reg, ino) && !panicstr) {
725 		if (gethrtime() - start_time > pci_intrpend_timeout) {
726 			pbm_t *pbm_p = pci_p->pci_pbm_p;
727 			cmn_err(CE_WARN, "%s:%s: ib_ino_rem_intr %x timeout",
728 			    pbm_p->pbm_nameinst_str,
729 			    pbm_p->pbm_nameaddr_str, ino);
730 			break;
731 		}
732 	}
733 
734 	if (ipil_p->ipil_ih_size == 1) {
735 		if (ih_lst != ih_p)
736 			goto not_found;
737 		/* no need to set head/tail as ino_p will be freed */
738 		goto reset;
739 	}
740 
741 	/*
742 	 * if the interrupt was previously blocked (left in pending state)
743 	 * because of jabber we need to clear the pending state in case the
744 	 * jabber has gone away.
745 	 */
746 	if (ino_p->ino_unclaimed_intrs > pci_unclaimed_intr_max) {
747 		cmn_err(CE_WARN,
748 		    "%s%d: ib_ino_rem_intr: ino 0x%x has been unblocked",
749 		    ddi_driver_name(pci_p->pci_dip),
750 		    ddi_get_instance(pci_p->pci_dip),
751 		    ino_p->ino_ino);
752 		ino_p->ino_unclaimed_intrs = 0;
753 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
754 	}
755 
756 	/* search the link list for ih_p */
757 	for (i = 0;
758 	    (i < ipil_p->ipil_ih_size) && (ih_lst->ih_next != ih_p);
759 	    i++, ih_lst = ih_lst->ih_next)
760 		;
761 	if (ih_lst->ih_next != ih_p)
762 		goto not_found;
763 
764 	/* remove ih_p from the link list and maintain the head/tail */
765 	ih_lst->ih_next = ih_p->ih_next;
766 	if (ipil_p->ipil_ih_head == ih_p)
767 		ipil_p->ipil_ih_head = ih_p->ih_next;
768 	if (ipil_p->ipil_ih_tail == ih_p)
769 		ipil_p->ipil_ih_tail = ih_lst;
770 	ipil_p->ipil_ih_start = ipil_p->ipil_ih_head;
771 reset:
772 	if (ih_p->ih_config_handle)
773 		pci_config_teardown(&ih_p->ih_config_handle);
774 	if (ih_p->ih_ksp != NULL)
775 		kstat_delete(ih_p->ih_ksp);
776 	kmem_free(ih_p, sizeof (ih_t));
777 	ipil_p->ipil_ih_size--;
778 
779 	return;
780 not_found:
781 	DEBUG2(DBG_R_INTX, ino_p->ino_ib_p->ib_pci_p->pci_dip,
782 	    "ino_p=%x does not have ih_p=%x\n", ino_p, ih_p);
783 }
784 
785 ih_t *
786 ib_intr_locate_ih(ib_ino_pil_t *ipil_p, dev_info_t *rdip, uint32_t inum)
787 {
788 	ih_t *ih_p = ipil_p->ipil_ih_head;
789 	int i;
790 
791 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
792 		if (ih_p->ih_dip == rdip && ih_p->ih_inum == inum)
793 			return (ih_p);
794 	}
795 
796 	return ((ih_t *)NULL);
797 }
798 
799 ih_t *
800 ib_alloc_ih(dev_info_t *rdip, uint32_t inum,
801 	uint_t (*int_handler)(caddr_t int_handler_arg1,
802 	caddr_t int_handler_arg2),
803 	caddr_t int_handler_arg1,
804 	caddr_t int_handler_arg2)
805 {
806 	ih_t *ih_p;
807 
808 	ih_p = kmem_alloc(sizeof (ih_t), KM_SLEEP);
809 	ih_p->ih_dip = rdip;
810 	ih_p->ih_inum = inum;
811 	ih_p->ih_intr_state = PCI_INTR_STATE_DISABLE;
812 	ih_p->ih_handler = int_handler;
813 	ih_p->ih_handler_arg1 = int_handler_arg1;
814 	ih_p->ih_handler_arg2 = int_handler_arg2;
815 	ih_p->ih_config_handle = NULL;
816 	ih_p->ih_nsec = 0;
817 	ih_p->ih_ticks = 0;
818 	ih_p->ih_ksp = NULL;
819 
820 	return (ih_p);
821 }
822 
823 int
824 ib_update_intr_state(pci_t *pci_p, dev_info_t *rdip,
825 	ddi_intr_handle_impl_t *hdlp, uint_t new_intr_state)
826 {
827 	ib_t		*ib_p = pci_p->pci_ib_p;
828 	ib_ino_info_t	*ino_p;
829 	ib_ino_pil_t	*ipil_p;
830 	ib_mondo_t	mondo;
831 	ih_t		*ih_p;
832 	int		ret = DDI_FAILURE;
833 
834 	/*
835 	 * For PULSE interrupts, pci driver don't allocate
836 	 * ib_ino_info_t and ih_t data structures and also,
837 	 * not maintains any interrupt state information.
838 	 * So, just return success from here.
839 	 */
840 	if (hdlp->ih_vector & PCI_PULSE_INO) {
841 		DEBUG0(DBG_IB, ib_p->ib_pci_p->pci_dip,
842 		    "ib_update_intr_state: PULSE interrupt, return success\n");
843 
844 		return (DDI_SUCCESS);
845 	}
846 
847 	mutex_enter(&ib_p->ib_ino_lst_mutex);
848 
849 	if ((mondo = pci_xlate_intr(pci_p->pci_dip, rdip, pci_p->pci_ib_p,
850 	    IB_MONDO_TO_INO(hdlp->ih_vector))) == 0) {
851 		mutex_exit(&ib_p->ib_ino_lst_mutex);
852 		return (ret);
853 	}
854 
855 	ino_p = ib_locate_ino(ib_p, IB_MONDO_TO_INO(mondo));
856 	if (ino_p && (ipil_p = ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
857 		if (ih_p = ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum)) {
858 			ih_p->ih_intr_state = new_intr_state;
859 			ret = DDI_SUCCESS;
860 		}
861 	}
862 
863 	mutex_exit(&ib_p->ib_ino_lst_mutex);
864 	return (ret);
865 }
866 
867 /*
868  * Get interrupt CPU for a given ino.
869  * Return info only for inos which are already mapped to devices.
870  */
871 /*ARGSUSED*/
872 int
873 ib_get_intr_target(pci_t *pci_p, ib_ino_t ino, int *cpu_id_p)
874 {
875 	dev_info_t		*dip = pci_p->pci_dip;
876 	ib_t			*ib_p = pci_p->pci_ib_p;
877 	volatile uint64_t	*imregp;
878 	uint64_t		imregval;
879 
880 	DEBUG1(DBG_IB, dip, "ib_get_intr_target: ino %x\n", ino);
881 
882 	imregp = ib_intr_map_reg_addr(ib_p, ino);
883 	imregval = *imregp;
884 
885 	*cpu_id_p = ib_map_reg_get_cpu(imregval);
886 
887 	DEBUG1(DBG_IB, dip, "ib_get_intr_target: cpu_id %x\n", *cpu_id_p);
888 
889 	return (DDI_SUCCESS);
890 }
891 
892 /*
893  * Associate a new CPU with a given ino.
894  * Operate only on inos which are already mapped to devices.
895  */
896 int
897 ib_set_intr_target(pci_t *pci_p, ib_ino_t ino, int cpu_id)
898 {
899 	dev_info_t		*dip = pci_p->pci_dip;
900 	ib_t			*ib_p = pci_p->pci_ib_p;
901 	int			ret = DDI_SUCCESS;
902 	uint32_t		old_cpu_id;
903 	hrtime_t		start_time;
904 	uint64_t		imregval;
905 	uint64_t		new_imregval;
906 	volatile uint64_t	*imregp;
907 	volatile uint64_t	*idregp;
908 	extern const int	_ncpu;
909 	extern cpu_t		*cpu[];
910 
911 	DEBUG2(DBG_IB, dip, "ib_set_intr_target: ino %x cpu_id %x\n",
912 	    ino, cpu_id);
913 
914 	imregp = (uint64_t *)ib_intr_map_reg_addr(ib_p, ino);
915 	idregp = IB_INO_INTR_STATE_REG(ib_p, ino);
916 
917 	/* Save original mapreg value. */
918 	imregval = *imregp;
919 	DEBUG1(DBG_IB, dip, "ib_set_intr_target: orig mapreg value: 0x%llx\n",
920 	    imregval);
921 
922 	/* Operate only on inos which are already enabled. */
923 	if (!(imregval & COMMON_INTR_MAP_REG_VALID))
924 		return (DDI_FAILURE);
925 
926 	/* Is this request a noop? */
927 	if ((old_cpu_id = ib_map_reg_get_cpu(imregval)) == cpu_id)
928 		return (DDI_SUCCESS);
929 
930 	/* Clear the interrupt valid/enable bit for particular ino. */
931 	DEBUG0(DBG_IB, dip, "Clearing intr_enabled...\n");
932 	*imregp = imregval & ~COMMON_INTR_MAP_REG_VALID;
933 
934 	/* Wait until there are no more pending interrupts. */
935 	start_time = gethrtime();
936 
937 	DEBUG0(DBG_IB, dip, "About to check for pending interrupts...\n");
938 
939 	while (IB_INO_INTR_PENDING(idregp, ino)) {
940 		DEBUG0(DBG_IB, dip, "Waiting for pending ints to clear\n");
941 		if ((gethrtime() - start_time) < pci_intrpend_timeout) {
942 			continue;
943 		} else { /* Timed out waiting. */
944 			DEBUG0(DBG_IB, dip, "Timed out waiting \n");
945 			return (DDI_EPENDING);
946 		}
947 	}
948 
949 	new_imregval = *imregp;
950 
951 	DEBUG1(DBG_IB, dip,
952 	    "after disabling intr, mapreg value: 0x%llx\n", new_imregval);
953 
954 	/*
955 	 * Get lock, validate cpu and write new mapreg value.
956 	 */
957 	mutex_enter(&cpu_lock);
958 	if ((cpu_id < _ncpu) && (cpu[cpu_id] && cpu_is_online(cpu[cpu_id]))) {
959 		/* Prepare new mapreg value with intr enabled and new cpu_id. */
960 		new_imregval &=
961 		    COMMON_INTR_MAP_REG_IGN | COMMON_INTR_MAP_REG_INO;
962 		new_imregval = ib_get_map_reg(new_imregval, cpu_id);
963 
964 		DEBUG1(DBG_IB, dip, "Writing new mapreg value:0x%llx\n",
965 		    new_imregval);
966 
967 		*imregp = new_imregval;
968 
969 		ib_log_new_cpu(ib_p, old_cpu_id, cpu_id, ino);
970 	} else {	/* Invalid cpu.  Restore original register image. */
971 		DEBUG0(DBG_IB, dip,
972 		    "Invalid cpuid: writing orig mapreg value\n");
973 
974 		*imregp = imregval;
975 		ret = DDI_EINVAL;
976 	}
977 	mutex_exit(&cpu_lock);
978 
979 	return (ret);
980 }
981 
982 
983 /*
984  * Return the dips or number of dips associated with a given interrupt block.
985  * Size of dips array arg is passed in as dips_ret arg.
986  * Number of dips returned is returned in dips_ret arg.
987  * Array of dips gets returned in the dips argument.
988  * Function returns number of dips existing for the given interrupt block.
989  *
990  */
991 uint8_t
992 ib_get_ino_devs(
993 	ib_t *ib_p, uint32_t ino, uint8_t *devs_ret, pcitool_intr_dev_t *devs)
994 {
995 	ib_ino_info_t	*ino_p;
996 	ib_ino_pil_t	*ipil_p;
997 	ih_t		*ih_p;
998 	uint32_t	num_devs = 0;
999 	int		i, j;
1000 
1001 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1002 	ino_p = ib_locate_ino(ib_p, ino);
1003 	if (ino_p != NULL) {
1004 		for (j = 0, ipil_p = ino_p->ino_ipil_p; ipil_p;
1005 		    ipil_p = ipil_p->ipil_next_p) {
1006 			num_devs += ipil_p->ipil_ih_size;
1007 
1008 			for (i = 0, ih_p = ipil_p->ipil_ih_head;
1009 			    ((i < ipil_p->ipil_ih_size) && (i < *devs_ret));
1010 			    i++, j++, ih_p = ih_p->ih_next) {
1011 				(void) strlcpy(devs[i].driver_name,
1012 				    ddi_driver_name(ih_p->ih_dip),
1013 				    MAXMODCONFNAME);
1014 				(void) ddi_pathname(ih_p->ih_dip, devs[i].path);
1015 				devs[i].dev_inst =
1016 				    ddi_get_instance(ih_p->ih_dip);
1017 			}
1018 		}
1019 		*devs_ret = j;
1020 	}
1021 
1022 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1023 
1024 	return (num_devs);
1025 }
1026 
1027 void ib_log_new_cpu(ib_t *ib_p, uint32_t old_cpu_id, uint32_t new_cpu_id,
1028 	uint32_t ino)
1029 {
1030 	ib_ino_info_t	*ino_p;
1031 	ib_ino_pil_t	*ipil_p;
1032 	ih_t		*ih_p;
1033 	int		i;
1034 
1035 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1036 
1037 	/* Log in OS data structures the new CPU. */
1038 	ino_p = ib_locate_ino(ib_p, ino);
1039 	if (ino_p != NULL) {
1040 
1041 		/* Log in OS data structures the new CPU. */
1042 		ino_p->ino_cpuid = new_cpu_id;
1043 
1044 		for (ipil_p = ino_p->ino_ipil_p; ipil_p;
1045 		    ipil_p = ipil_p->ipil_next_p) {
1046 			for (i = 0, ih_p = ipil_p->ipil_ih_head;
1047 			    (i < ipil_p->ipil_ih_size);
1048 			    i++, ih_p = ih_p->ih_next) {
1049 				/*
1050 				 * Account for any residual time
1051 				 * to be logged for old cpu.
1052 				 */
1053 				ib_cpu_ticks_to_ih_nsec(ib_p,
1054 				    ipil_p->ipil_ih_head, old_cpu_id);
1055 			}
1056 		}
1057 	}
1058 
1059 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1060 }
1061