xref: /titanic_52/usr/src/uts/sun4u/io/pci/pci_intr.c (revision 193974072f41a843678abf5f61979c748687e66b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PCI nexus interrupt handling:
30  *	PCI device interrupt handler wrapper
31  *	pil lookup routine
32  *	PCI device interrupt related initchild code
33  */
34 
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/spl.h>
39 #include <sys/sunddi.h>
40 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
41 #include <sys/ddi_impldefs.h>
42 #include <sys/pci/pci_obj.h>
43 #include <sys/sdt.h>
44 #include <sys/clock.h>
45 
46 #ifdef _STARFIRE
47 #include <sys/starfire.h>
48 #endif /* _STARFIRE */
49 
50 /*
51  * interrupt jabber:
52  *
53  * When an interrupt line is jabbering, every time the state machine for the
54  * associated ino is idled, a new mondo will be sent and the ino will go into
55  * the pending state again. The mondo will cause a new call to
56  * pci_intr_wrapper() which normally idles the ino's state machine which would
57  * precipitate another trip round the loop.
58  * The loop can be broken by preventing the ino's state machine from being
59  * idled when an interrupt line is jabbering. See the comment at the
60  * beginning of pci_intr_wrapper() explaining how the 'interrupt jabber
61  * protection' code does this.
62  */
63 
64 /*LINTLIBRARY*/
65 
66 #ifdef NOT_DEFINED
67 /*
68  * This array is used to determine the sparc PIL at the which the
69  * handler for a given INO will execute.  This table is for onboard
70  * devices only.  A different scheme will be used for plug-in cards.
71  */
72 
73 uint_t ino_to_pil[] = {
74 
75 	/* pil */		/* ino */
76 
77 	0, 0, 0, 0,  		/* 0x00 - 0x03: bus A slot 0 int#A, B, C, D */
78 	0, 0, 0, 0,		/* 0x04 - 0x07: bus A slot 1 int#A, B, C, D */
79 	0, 0, 0, 0,  		/* 0x08 - 0x0B: unused */
80 	0, 0, 0, 0,		/* 0x0C - 0x0F: unused */
81 
82 	0, 0, 0, 0,  		/* 0x10 - 0x13: bus B slot 0 int#A, B, C, D */
83 	0, 0, 0, 0,		/* 0x14 - 0x17: bus B slot 1 int#A, B, C, D */
84 	0, 0, 0, 0,  		/* 0x18 - 0x1B: bus B slot 2 int#A, B, C, D */
85 	4, 0, 0, 0,		/* 0x1C - 0x1F: bus B slot 3 int#A, B, C, D */
86 
87 	4,			/* 0x20: SCSI */
88 	6,			/* 0x21: ethernet */
89 	3,			/* 0x22: parallel port */
90 	9,			/* 0x23: audio record */
91 	9,			/* 0x24: audio playback */
92 	14,			/* 0x25: power fail */
93 	4,			/* 0x26: 2nd SCSI */
94 	8,			/* 0x27: floppy */
95 	14,			/* 0x28: thermal warning */
96 	12,			/* 0x29: keyboard */
97 	12,			/* 0x2A: mouse */
98 	12,			/* 0x2B: serial */
99 	0,			/* 0x2C: timer/counter 0 */
100 	0,			/* 0x2D: timer/counter 1 */
101 	14,			/* 0x2E: uncorrectable ECC errors */
102 	14,			/* 0x2F: correctable ECC errors */
103 	14,			/* 0x30: PCI bus A error */
104 	14,			/* 0x31: PCI bus B error */
105 	14,			/* 0x32: power management wakeup */
106 	14,			/* 0x33 */
107 	14,			/* 0x34 */
108 	14,			/* 0x35 */
109 	14,			/* 0x36 */
110 	14,			/* 0x37 */
111 	14,			/* 0x38 */
112 	14,			/* 0x39 */
113 	14,			/* 0x3a */
114 	14,			/* 0x3b */
115 	14,			/* 0x3c */
116 	14,			/* 0x3d */
117 	14,			/* 0x3e */
118 	14,			/* 0x3f */
119 	14			/* 0x40 */
120 };
121 #endif /* NOT_DEFINED */
122 
123 
124 #define	PCI_SIMBA_VENID		0x108e	/* vendor id for simba */
125 #define	PCI_SIMBA_DEVID		0x5000	/* device id for simba */
126 
127 /*
128  * map_pcidev_cfg_reg - create mapping to pci device configuration registers
129  *			if we have a simba AND a pci to pci bridge along the
130  *			device path.
131  *			Called with corresponding mutexes held!!
132  *
133  * XXX	  XXX	XXX	The purpose of this routine is to overcome a hardware
134  *			defect in Sabre CPU and Simba bridge configuration
135  *			which does not drain DMA write data stalled in
136  *			PCI to PCI bridges (such as the DEC bridge) beyond
137  *			Simba. This routine will setup the data structures
138  *			to allow the pci_intr_wrapper to perform a manual
139  *			drain data operation before passing the control to
140  *			interrupt handlers of device drivers.
141  * return value:
142  * DDI_SUCCESS
143  * DDI_FAILURE		if unable to create mapping
144  */
145 static int
146 map_pcidev_cfg_reg(dev_info_t *dip, dev_info_t *rdip, ddi_acc_handle_t *hdl_p)
147 {
148 	dev_info_t *cdip;
149 	dev_info_t *pci_dip = NULL;
150 	pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
151 	int simba_found = 0, pci_bridge_found = 0;
152 
153 	for (cdip = rdip; cdip && cdip != dip; cdip = ddi_get_parent(cdip)) {
154 		ddi_acc_handle_t config_handle;
155 		uint32_t vendor_id = ddi_getprop(DDI_DEV_T_ANY, cdip,
156 		    DDI_PROP_DONTPASS, "vendor-id", 0xffff);
157 
158 		DEBUG4(DBG_A_INTX, pci_p->pci_dip,
159 		    "map dev cfg reg for %s%d: @%s%d\n",
160 		    ddi_driver_name(rdip), ddi_get_instance(rdip),
161 		    ddi_driver_name(cdip), ddi_get_instance(cdip));
162 
163 		if (ddi_prop_exists(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
164 		    "no-dma-interrupt-sync"))
165 			continue;
166 
167 		/* continue to search up-stream if not a PCI device */
168 		if (vendor_id == 0xffff)
169 			continue;
170 
171 		/* record the deepest pci device */
172 		if (!pci_dip)
173 			pci_dip = cdip;
174 
175 		/* look for simba */
176 		if (vendor_id == PCI_SIMBA_VENID) {
177 			uint32_t device_id = ddi_getprop(DDI_DEV_T_ANY,
178 			    cdip, DDI_PROP_DONTPASS, "device-id", -1);
179 			if (device_id == PCI_SIMBA_DEVID) {
180 				simba_found = 1;
181 				DEBUG0(DBG_A_INTX, pci_p->pci_dip,
182 				    "\tFound simba\n");
183 				continue; /* do not check bridge if simba */
184 			}
185 		}
186 
187 		/* look for pci to pci bridge */
188 		if (pci_config_setup(cdip, &config_handle) != DDI_SUCCESS) {
189 			cmn_err(CE_WARN,
190 			    "%s%d: can't get brdg cfg space for %s%d\n",
191 			    ddi_driver_name(dip), ddi_get_instance(dip),
192 			    ddi_driver_name(cdip), ddi_get_instance(cdip));
193 			return (DDI_FAILURE);
194 		}
195 		if (pci_config_get8(config_handle, PCI_CONF_BASCLASS)
196 		    == PCI_CLASS_BRIDGE) {
197 			DEBUG0(DBG_A_INTX, pci_p->pci_dip,
198 			    "\tFound PCI to xBus bridge\n");
199 			pci_bridge_found = 1;
200 		}
201 		pci_config_teardown(&config_handle);
202 	}
203 
204 	if (!pci_bridge_found)
205 		return (DDI_SUCCESS);
206 	if (!simba_found && (CHIP_TYPE(pci_p) < PCI_CHIP_SCHIZO))
207 		return (DDI_SUCCESS);
208 	if (pci_config_setup(pci_dip, hdl_p) != DDI_SUCCESS) {
209 		cmn_err(CE_WARN, "%s%d: can not get config space for %s%d\n",
210 		    ddi_driver_name(dip), ddi_get_instance(dip),
211 		    ddi_driver_name(cdip), ddi_get_instance(cdip));
212 		return (DDI_FAILURE);
213 	}
214 	return (DDI_SUCCESS);
215 }
216 
217 /*
218  * If the unclaimed interrupt count has reached the limit set by
219  * pci_unclaimed_intr_max within the time limit, then all interrupts
220  * on this ino is blocked by not idling the interrupt state machine.
221  */
222 static int
223 pci_spurintr(ib_ino_pil_t *ipil_p) {
224 	ib_ino_info_t	*ino_p = ipil_p->ipil_ino_p;
225 	ih_t		*ih_p = ipil_p->ipil_ih_start;
226 	pci_t		*pci_p = ino_p->ino_ib_p->ib_pci_p;
227 	char		*err_fmt_str;
228 	boolean_t	blocked = B_FALSE;
229 	int		i;
230 
231 	if (ino_p->ino_unclaimed_intrs > pci_unclaimed_intr_max)
232 		return (DDI_INTR_CLAIMED);
233 
234 	if (!ino_p->ino_unclaimed_intrs)
235 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
236 
237 	ino_p->ino_unclaimed_intrs++;
238 
239 	if (ino_p->ino_unclaimed_intrs <= pci_unclaimed_intr_max)
240 		goto clear;
241 
242 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
243 	    > pci_spurintr_duration) {
244 		ino_p->ino_unclaimed_intrs = 0;
245 		goto clear;
246 	}
247 	err_fmt_str = "%s%d: ino 0x%x blocked";
248 	blocked = B_TRUE;
249 	goto warn;
250 clear:
251 	if (!pci_spurintr_msgs) { /* tomatillo errata #71 spurious mondo */
252 		/* clear the pending state */
253 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
254 		return (DDI_INTR_CLAIMED);
255 	}
256 
257 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
258 warn:
259 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(pci_p->pci_dip), ino_p->ino_ino);
260 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next)
261 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
262 		    ih_p->ih_inum);
263 	cmn_err(CE_CONT, "!\n");
264 	if (blocked == B_FALSE)  /* clear the pending state */
265 		IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
266 
267 	return (DDI_INTR_CLAIMED);
268 }
269 
270 /*
271  * pci_intr_wrapper
272  *
273  * This routine is used as wrapper around interrupt handlers installed by child
274  * device drivers.  This routine invokes the driver interrupt handlers and
275  * examines the return codes.
276  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
277  * least one handler claims the interrupt then the counter is halved and the
278  * interrupt state machine is idled. If no handler claims the interrupt then
279  * the counter is incremented by one and the state machine is idled.
280  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
281  * then the interrupt state machine is not idled thus preventing any further
282  * interrupts on that ino. The state machine will only be idled again if a
283  * handler is subsequently added or removed.
284  *
285  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
286  * DDI_INTR_UNCLAIMED otherwise.
287  */
288 
289 extern uint64_t intr_get_time(void);
290 
291 uint_t
292 pci_intr_wrapper(caddr_t arg)
293 {
294 	ib_ino_pil_t	*ipil_p = (ib_ino_pil_t *)arg;
295 	ib_ino_info_t	*ino_p = ipil_p->ipil_ino_p;
296 	uint_t		result = 0, r = DDI_INTR_UNCLAIMED;
297 	pci_t		*pci_p = ino_p->ino_ib_p->ib_pci_p;
298 	pbm_t		*pbm_p = pci_p->pci_pbm_p;
299 	ih_t		*ih_p = ipil_p->ipil_ih_start;
300 	int		i;
301 
302 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
303 		dev_info_t *dip = ih_p->ih_dip;
304 		uint_t (*handler)() = ih_p->ih_handler;
305 		caddr_t arg1 = ih_p->ih_handler_arg1;
306 		caddr_t arg2 = ih_p->ih_handler_arg2;
307 		ddi_acc_handle_t cfg_hdl = ih_p->ih_config_handle;
308 
309 		if (pci_intr_dma_sync && cfg_hdl && pbm_p->pbm_sync_reg_pa) {
310 			(void) pci_config_get16(cfg_hdl, PCI_CONF_VENID);
311 			pci_pbm_dma_sync(pbm_p, ino_p->ino_ino);
312 		}
313 
314 		if (ih_p->ih_intr_state == PCI_INTR_STATE_DISABLE) {
315 			DEBUG3(DBG_INTR, pci_p->pci_dip,
316 			    "pci_intr_wrapper: %s%d interrupt %d is disabled\n",
317 			    ddi_driver_name(dip), ddi_get_instance(dip),
318 			    ino_p->ino_ino);
319 
320 			continue;
321 		}
322 
323 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
324 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
325 
326 		r = (*handler)(arg1, arg2);
327 
328 		/*
329 		 * Account for time used by this interrupt. Protect against
330 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
331 		 * using atomic ops.
332 		 */
333 
334 		if (ipil_p->ipil_pil <= LOCK_LEVEL)
335 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
336 
337 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
338 		    void *, handler, caddr_t, arg1, int, r);
339 
340 		result += r;
341 
342 		if (pci_check_all_handlers)
343 			continue;
344 		if (result)
345 			break;
346 	}
347 
348 	if (result)
349 		ino_p->ino_claimed |= (1 << ipil_p->ipil_pil);
350 
351 	/* Interrupt can only be cleared after all pil levels are handled */
352 	if (ipil_p->ipil_pil != ino_p->ino_lopil)
353 		return (DDI_INTR_CLAIMED);
354 
355 	if (!ino_p->ino_claimed)
356 		return (pci_spurintr(ipil_p));
357 
358 	ino_p->ino_unclaimed_intrs = 0;
359 	ino_p->ino_claimed = 0;
360 
361 	/* Clear the pending state */
362 	IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
363 
364 	return (DDI_INTR_CLAIMED);
365 }
366 
367 dev_info_t *
368 get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
369 {
370 	dev_info_t *cdip = rdip;
371 
372 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
373 		;
374 
375 	return (cdip);
376 }
377 
378 /* default class to pil value mapping */
379 pci_class_val_t pci_default_pil [] = {
380 	{0x000000, 0xff0000, 0x1},	/* Class code for pre-2.0 devices */
381 	{0x010000, 0xff0000, 0x4},	/* Mass Storage Controller */
382 	{0x020000, 0xff0000, 0x6},	/* Network Controller */
383 	{0x030000, 0xff0000, 0x9},	/* Display Controller */
384 	{0x040000, 0xff0000, 0x8},	/* Multimedia Controller */
385 	{0x050000, 0xff0000, 0xb},	/* Memory Controller */
386 	{0x060000, 0xff0000, 0xb},	/* Bridge Controller */
387 	{0x0c0000, 0xffff00, 0x9},	/* Serial Bus, FireWire (IEEE 1394) */
388 	{0x0c0100, 0xffff00, 0x4},	/* Serial Bus, ACCESS.bus */
389 	{0x0c0200, 0xffff00, 0x4},	/* Serial Bus, SSA */
390 	{0x0c0300, 0xffff00, 0x9},	/* Serial Bus Universal Serial Bus */
391 	{0x0c0400, 0xffff00, 0x6},	/* Serial Bus, Fibre Channel */
392 	{0x0c0600, 0xffff00, 0x6}	/* Serial Bus, Infiniband */
393 };
394 
395 /*
396  * Default class to intr_weight value mapping (% of CPU).  A driver.conf
397  * entry on or above the pci node like
398  *
399  *	pci-class-intr-weights= 0x020000, 0xff0000, 30;
400  *
401  * can be used to augment or override entries in the default table below.
402  *
403  * NB: The values below give NICs preference on redistribution, and provide
404  * NICs some isolation from other interrupt sources. We need better interfaces
405  * that allow the NIC driver to identify a specific NIC instance as high
406  * bandwidth, and thus deserving of separation from other low bandwidth
407  * NICs additional isolation from other interrupt sources.
408  *
409  * NB: We treat Infiniband like a NIC.
410  */
411 pci_class_val_t pci_default_intr_weight [] = {
412 	{0x020000, 0xff0000, 35},	/* Network Controller */
413 	{0x010000, 0xff0000, 10},	/* Mass Storage Controller */
414 	{0x0c0400, 0xffff00, 10},	/* Serial Bus, Fibre Channel */
415 	{0x0c0600, 0xffff00, 50}	/* Serial Bus, Infiniband */
416 };
417 
418 static uint32_t
419 pci_match_class_val(uint32_t key, pci_class_val_t *rec_p, int nrec,
420     uint32_t default_val)
421 {
422 	int i;
423 
424 	for (i = 0; i < nrec; rec_p++, i++) {
425 		if ((rec_p->class_code & rec_p->class_mask) ==
426 		    (key & rec_p->class_mask))
427 			return (rec_p->class_val);
428 	}
429 
430 	return (default_val);
431 }
432 
433 /*
434  * Return the configuration value, based on class code and sub class code,
435  * from the specified property based or default pci_class_val_t table.
436  */
437 uint32_t
438 pci_class_to_val(dev_info_t *rdip, char *property_name, pci_class_val_t *rec_p,
439     int nrec, uint32_t default_val)
440 {
441 	int property_len;
442 	uint32_t class_code;
443 	pci_class_val_t *conf;
444 	uint32_t val = default_val;
445 
446 	/*
447 	 * Use the "class-code" property to get the base and sub class
448 	 * codes for the requesting device.
449 	 */
450 	class_code = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, rdip,
451 	    DDI_PROP_DONTPASS, "class-code", -1);
452 
453 	if (class_code == -1)
454 		return (val);
455 
456 	/* look up the val from the default table */
457 	val = pci_match_class_val(class_code, rec_p, nrec, val);
458 
459 
460 	/* see if there is a more specific property specified value */
461 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
462 	    property_name, (caddr_t)&conf, &property_len))
463 			return (val);
464 
465 	if ((property_len % sizeof (pci_class_val_t)) == 0)
466 		val = pci_match_class_val(class_code, conf,
467 		    property_len / sizeof (pci_class_val_t), val);
468 	kmem_free(conf, property_len);
469 	return (val);
470 }
471 
472 /* pci_class_to_pil: return the pil for a given PCI device. */
473 uint32_t
474 pci_class_to_pil(dev_info_t *rdip)
475 {
476 	uint32_t pil;
477 
478 	/* Default pil is 1 */
479 	pil = pci_class_to_val(rdip,
480 	    "pci-class-priorities", pci_default_pil,
481 	    sizeof (pci_default_pil) / sizeof (pci_class_val_t), 1);
482 
483 	/* Range check the result */
484 	if (pil >= 0xf)
485 		pil = 1;
486 
487 	return (pil);
488 }
489 
490 /* pci_class_to_intr_weight: return the intr_weight for a given PCI device. */
491 int32_t
492 pci_class_to_intr_weight(dev_info_t *rdip)
493 {
494 	int32_t intr_weight;
495 
496 	/* default weight is 0% */
497 	intr_weight = pci_class_to_val(rdip,
498 	    "pci-class-intr-weights", pci_default_intr_weight,
499 	    sizeof (pci_default_intr_weight) / sizeof (pci_class_val_t), 0);
500 
501 	/* range check the result */
502 	if (intr_weight < 0)
503 		intr_weight = 0;
504 	if (intr_weight > 1000)
505 		intr_weight = 1000;
506 
507 	return (intr_weight);
508 }
509 
510 static struct {
511 	kstat_named_t pciintr_ks_name;
512 	kstat_named_t pciintr_ks_type;
513 	kstat_named_t pciintr_ks_cpu;
514 	kstat_named_t pciintr_ks_pil;
515 	kstat_named_t pciintr_ks_time;
516 	kstat_named_t pciintr_ks_ino;
517 	kstat_named_t pciintr_ks_cookie;
518 	kstat_named_t pciintr_ks_devpath;
519 	kstat_named_t pciintr_ks_buspath;
520 } pciintr_ks_template = {
521 	{ "name",	KSTAT_DATA_CHAR },
522 	{ "type",	KSTAT_DATA_CHAR },
523 	{ "cpu",	KSTAT_DATA_UINT64 },
524 	{ "pil",	KSTAT_DATA_UINT64 },
525 	{ "time",	KSTAT_DATA_UINT64 },
526 	{ "ino",	KSTAT_DATA_UINT64 },
527 	{ "cookie",	KSTAT_DATA_UINT64 },
528 	{ "devpath",	KSTAT_DATA_STRING },
529 	{ "buspath",	KSTAT_DATA_STRING },
530 };
531 static uint32_t pciintr_ks_instance;
532 static char ih_devpath[MAXPATHLEN];
533 static char ih_buspath[MAXPATHLEN];
534 
535 kmutex_t pciintr_ks_template_lock;
536 
537 int
538 pci_ks_update(kstat_t *ksp, int rw)
539 {
540 	ih_t		*ih_p = ksp->ks_private;
541 	int	maxlen = sizeof (pciintr_ks_template.pciintr_ks_name.value.c);
542 	ib_ino_pil_t	*ipil_p = ih_p->ih_ipil_p;
543 	ib_ino_info_t	*ino_p = ipil_p->ipil_ino_p;
544 	ib_t		*ib_p = ino_p->ino_ib_p;
545 	pci_t		*pci_p = ib_p->ib_pci_p;
546 	ib_ino_t	ino;
547 
548 	ino = ino_p->ino_ino;
549 
550 	(void) snprintf(pciintr_ks_template.pciintr_ks_name.value.c, maxlen,
551 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
552 	    ddi_get_instance(ih_p->ih_dip));
553 
554 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
555 	(void) ddi_pathname(pci_p->pci_dip, ih_buspath);
556 	kstat_named_setstr(&pciintr_ks_template.pciintr_ks_devpath, ih_devpath);
557 	kstat_named_setstr(&pciintr_ks_template.pciintr_ks_buspath, ih_buspath);
558 
559 	if (ih_p->ih_intr_state == PCI_INTR_STATE_ENABLE) {
560 		(void) strcpy(pciintr_ks_template.pciintr_ks_type.value.c,
561 		    "fixed");
562 		pciintr_ks_template.pciintr_ks_cpu.value.ui64 =
563 		    ino_p->ino_cpuid;
564 		pciintr_ks_template.pciintr_ks_pil.value.ui64 =
565 		    ipil_p->ipil_pil;
566 		pciintr_ks_template.pciintr_ks_time.value.ui64 = ih_p->ih_nsec +
567 		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
568 		    ino_p->ino_cpuid);
569 		pciintr_ks_template.pciintr_ks_ino.value.ui64 = ino;
570 		pciintr_ks_template.pciintr_ks_cookie.value.ui64 =
571 		    IB_INO_TO_MONDO(ib_p, ino);
572 	} else {
573 		(void) strcpy(pciintr_ks_template.pciintr_ks_type.value.c,
574 		    "disabled");
575 		pciintr_ks_template.pciintr_ks_cpu.value.ui64 = 0;
576 		pciintr_ks_template.pciintr_ks_pil.value.ui64 = 0;
577 		pciintr_ks_template.pciintr_ks_time.value.ui64 = 0;
578 		pciintr_ks_template.pciintr_ks_ino.value.ui64 = 0;
579 		pciintr_ks_template.pciintr_ks_cookie.value.ui64 = 0;
580 	}
581 
582 	return (0);
583 }
584 
585 int
586 pci_add_intr(dev_info_t *dip, dev_info_t *rdip, ddi_intr_handle_impl_t *hdlp)
587 {
588 	pci_t		*pci_p = get_pci_soft_state(ddi_get_instance(dip));
589 	ib_t		*ib_p = pci_p->pci_ib_p;
590 	cb_t		*cb_p = pci_p->pci_cb_p;
591 	ih_t		*ih_p;
592 	ib_ino_t	ino;
593 	ib_ino_info_t	*ino_p;	/* pulse interrupts have no ino */
594 	ib_ino_pil_t	*ipil_p, *ipil_list;
595 	ib_mondo_t	mondo;
596 	uint32_t	cpu_id;
597 	int		ret;
598 	int32_t		weight;
599 
600 	ino = IB_MONDO_TO_INO(hdlp->ih_vector);
601 
602 	DEBUG3(DBG_A_INTX, dip, "pci_add_intr: rdip=%s%d ino=%x\n",
603 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
604 
605 	if (ino > ib_p->ib_max_ino) {
606 		DEBUG1(DBG_A_INTX, dip, "ino %x is invalid\n", ino);
607 		return (DDI_INTR_NOTFOUND);
608 	}
609 
610 	if (hdlp->ih_vector & PCI_PULSE_INO) {
611 		volatile uint64_t *map_reg_addr;
612 		map_reg_addr = ib_intr_map_reg_addr(ib_p, ino);
613 
614 		mondo = pci_xlate_intr(dip, rdip, ib_p, ino);
615 		if (mondo == 0)
616 			goto fail1;
617 
618 		hdlp->ih_vector = CB_MONDO_TO_XMONDO(cb_p, mondo);
619 
620 		if (i_ddi_add_ivintr(hdlp) != DDI_SUCCESS)
621 			goto fail1;
622 
623 		/*
624 		 * Select cpu and program.
625 		 *
626 		 * Since there is no good way to always derive cpuid in
627 		 * pci_remove_intr for PCI_PULSE_INO (esp. for STARFIRE), we
628 		 * don't add (or remove) device weight for pulsed interrupt
629 		 * sources.
630 		 */
631 		mutex_enter(&ib_p->ib_intr_lock);
632 		cpu_id = intr_dist_cpuid();
633 		*map_reg_addr = ib_get_map_reg(mondo, cpu_id);
634 		mutex_exit(&ib_p->ib_intr_lock);
635 		*map_reg_addr;	/* flush previous write */
636 		goto done;
637 	}
638 
639 	if ((mondo = pci_xlate_intr(dip, rdip, pci_p->pci_ib_p, ino)) == 0)
640 		goto fail1;
641 
642 	ino = IB_MONDO_TO_INO(mondo);
643 
644 	mutex_enter(&ib_p->ib_ino_lst_mutex);
645 	ih_p = ib_alloc_ih(rdip, hdlp->ih_inum,
646 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
647 	if (map_pcidev_cfg_reg(dip, rdip, &ih_p->ih_config_handle))
648 		goto fail2;
649 
650 	ino_p = ib_locate_ino(ib_p, ino);
651 	ipil_list = ino_p ? ino_p->ino_ipil_p:NULL;
652 
653 	/* Sharing ino */
654 	if (ino_p && (ipil_p = ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
655 		if (ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum)) {
656 			DEBUG1(DBG_A_INTX, dip, "dup intr #%d\n",
657 			    hdlp->ih_inum);
658 			goto fail3;
659 		}
660 
661 		/* add weight to the cpu that we are already targeting */
662 		cpu_id = ino_p->ino_cpuid;
663 		weight = pci_class_to_intr_weight(rdip);
664 		intr_dist_cpuid_add_device_weight(cpu_id, rdip, weight);
665 
666 		ib_ino_add_intr(pci_p, ipil_p, ih_p);
667 		goto ino_done;
668 	}
669 
670 	if (hdlp->ih_pri == 0)
671 		hdlp->ih_pri = pci_class_to_pil(rdip);
672 
673 	ipil_p = ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
674 	ino_p = ipil_p->ipil_ino_p;
675 
676 	hdlp->ih_vector = CB_MONDO_TO_XMONDO(cb_p, mondo);
677 
678 	/* Store this global mondo */
679 	ino_p->ino_mondo = hdlp->ih_vector;
680 
681 	DEBUG2(DBG_A_INTX, dip, "pci_add_intr:  pil=0x%x mondo=0x%x\n",
682 	    hdlp->ih_pri, hdlp->ih_vector);
683 
684 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
685 	    (ddi_intr_handler_t *)pci_intr_wrapper, (caddr_t)ipil_p, NULL);
686 
687 	ret = i_ddi_add_ivintr(hdlp);
688 
689 	/*
690 	 * Restore original interrupt handler
691 	 * and arguments in interrupt handle.
692 	 */
693 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
694 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
695 
696 	if (ret != DDI_SUCCESS)
697 		goto fail4;
698 
699 	/* Save the pil for this ino */
700 	ipil_p->ipil_pil = hdlp->ih_pri;
701 
702 	/* clear and enable interrupt */
703 	IB_INO_INTR_CLEAR(ino_p->ino_clr_reg);
704 
705 	/*
706 	 * Select cpu and compute weight, saving both for sharing and removal.
707 	 */
708 	if (ipil_list == NULL)
709 		ino_p->ino_cpuid = pci_intr_dist_cpuid(ib_p, ino_p);
710 
711 	cpu_id = ino_p->ino_cpuid;
712 	ino_p->ino_established = 1;
713 	weight = pci_class_to_intr_weight(rdip);
714 	intr_dist_cpuid_add_device_weight(cpu_id, rdip, weight);
715 
716 #ifdef _STARFIRE
717 	cpu_id = pc_translate_tgtid(cb_p->cb_ittrans_cookie, cpu_id,
718 	    IB_GET_MAPREG_INO(ino));
719 #endif /* _STARFIRE */
720 	if (!ipil_list) {
721 		*ino_p->ino_map_reg = ib_get_map_reg(mondo, cpu_id);
722 		*ino_p->ino_map_reg;
723 	}
724 ino_done:
725 	ih_p->ih_ipil_p = ipil_p;
726 	ih_p->ih_ksp = kstat_create("pci_intrs",
727 	    atomic_inc_32_nv(&pciintr_ks_instance), "config", "interrupts",
728 	    KSTAT_TYPE_NAMED,
729 	    sizeof (pciintr_ks_template) / sizeof (kstat_named_t),
730 	    KSTAT_FLAG_VIRTUAL);
731 	if (ih_p->ih_ksp != NULL) {
732 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
733 		ih_p->ih_ksp->ks_lock = &pciintr_ks_template_lock;
734 		ih_p->ih_ksp->ks_data = &pciintr_ks_template;
735 		ih_p->ih_ksp->ks_private = ih_p;
736 		ih_p->ih_ksp->ks_update = pci_ks_update;
737 		kstat_install(ih_p->ih_ksp);
738 	}
739 	ib_ino_map_reg_share(ib_p, ino, ino_p);
740 	mutex_exit(&ib_p->ib_ino_lst_mutex);
741 done:
742 	DEBUG2(DBG_A_INTX, dip, "done! Interrupt 0x%x pil=%x\n",
743 	    hdlp->ih_vector, hdlp->ih_pri);
744 	return (DDI_SUCCESS);
745 fail4:
746 	ib_delete_ino_pil(ib_p, ipil_p);
747 fail3:
748 	if (ih_p->ih_config_handle)
749 		pci_config_teardown(&ih_p->ih_config_handle);
750 fail2:
751 	mutex_exit(&ib_p->ib_ino_lst_mutex);
752 	kmem_free(ih_p, sizeof (ih_t));
753 fail1:
754 	DEBUG2(DBG_A_INTX, dip, "Failed! Interrupt 0x%x pil=%x\n",
755 	    hdlp->ih_vector, hdlp->ih_pri);
756 	return (DDI_FAILURE);
757 }
758 
759 int
760 pci_remove_intr(dev_info_t *dip, dev_info_t *rdip, ddi_intr_handle_impl_t *hdlp)
761 {
762 	pci_t		*pci_p = get_pci_soft_state(ddi_get_instance(dip));
763 	ib_t		*ib_p = pci_p->pci_ib_p;
764 	cb_t		*cb_p = pci_p->pci_cb_p;
765 	ib_ino_t	ino;
766 	ib_mondo_t	mondo;
767 	ib_ino_info_t	*ino_p;	/* non-pulse only */
768 	ib_ino_pil_t	*ipil_p; /* non-pulse only */
769 	ih_t		*ih_p;	/* non-pulse only */
770 
771 	ino = IB_MONDO_TO_INO(hdlp->ih_vector);
772 
773 	DEBUG3(DBG_R_INTX, dip, "pci_rem_intr: rdip=%s%d ino=%x\n",
774 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
775 
776 	if (hdlp->ih_vector & PCI_PULSE_INO) { /* pulse interrupt */
777 		volatile uint64_t *map_reg_addr;
778 
779 		/*
780 		 * No weight was added by pci_add_intr for PCI_PULSE_INO
781 		 * because it is difficult to determine cpuid here.
782 		 */
783 		map_reg_addr = ib_intr_map_reg_addr(ib_p, ino);
784 		IB_INO_INTR_RESET(map_reg_addr);	/* disable intr */
785 		*map_reg_addr;
786 
787 		mondo = pci_xlate_intr(dip, rdip, ib_p, ino);
788 		if (mondo == 0) {
789 			DEBUG1(DBG_R_INTX, dip,
790 			    "can't get mondo for ino %x\n", ino);
791 			return (DDI_FAILURE);
792 		}
793 
794 		if (hdlp->ih_pri == 0)
795 			hdlp->ih_pri = pci_class_to_pil(rdip);
796 
797 		hdlp->ih_vector = CB_MONDO_TO_XMONDO(cb_p, mondo);
798 
799 		DEBUG2(DBG_R_INTX, dip, "pci_rem_intr: pil=0x%x mondo=0x%x\n",
800 		    hdlp->ih_pri, hdlp->ih_vector);
801 
802 		i_ddi_rem_ivintr(hdlp);
803 
804 		DEBUG2(DBG_R_INTX, dip, "pulse success mondo=%x reg=%p\n",
805 		    mondo, map_reg_addr);
806 		return (DDI_SUCCESS);
807 	}
808 
809 	/* Translate the interrupt property */
810 	mondo = pci_xlate_intr(dip, rdip, pci_p->pci_ib_p, ino);
811 	if (mondo == 0) {
812 		DEBUG1(DBG_R_INTX, dip, "can't get mondo for ino %x\n", ino);
813 		return (DDI_FAILURE);
814 	}
815 	ino = IB_MONDO_TO_INO(mondo);
816 
817 	mutex_enter(&ib_p->ib_ino_lst_mutex);
818 	ino_p = ib_locate_ino(ib_p, ino);
819 	if (!ino_p) {
820 		int r = cb_remove_xintr(pci_p, dip, rdip, ino, mondo);
821 		if (r != DDI_SUCCESS)
822 			cmn_err(CE_WARN, "%s%d-xintr: ino %x is invalid",
823 			    ddi_driver_name(dip), ddi_get_instance(dip), ino);
824 		mutex_exit(&ib_p->ib_ino_lst_mutex);
825 		return (r);
826 	}
827 
828 	ipil_p = ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
829 	ih_p = ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum);
830 	ib_ino_rem_intr(pci_p, ipil_p, ih_p);
831 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
832 	if (ipil_p->ipil_ih_size == 0) {
833 		IB_INO_INTR_PEND(ib_clear_intr_reg_addr(ib_p, ino));
834 		hdlp->ih_vector = CB_MONDO_TO_XMONDO(cb_p, mondo);
835 
836 		i_ddi_rem_ivintr(hdlp);
837 		ib_delete_ino_pil(ib_p, ipil_p);
838 	}
839 
840 	/* re-enable interrupt only if mapping register still shared */
841 	if (ib_ino_map_reg_unshare(ib_p, ino, ino_p) || ino_p->ino_ipil_size) {
842 		IB_INO_INTR_ON(ino_p->ino_map_reg);
843 		*ino_p->ino_map_reg;
844 	}
845 	mutex_exit(&ib_p->ib_ino_lst_mutex);
846 
847 	if (ino_p->ino_ipil_size == 0)
848 		kmem_free(ino_p, sizeof (ib_ino_info_t));
849 
850 	DEBUG1(DBG_R_INTX, dip, "success! mondo=%x\n", mondo);
851 	return (DDI_SUCCESS);
852 }
853 
854 /*
855  * free the pci_inos array allocated during pci_intr_setup. the actual
856  * interrupts are torn down by their respective block destroy routines:
857  * cb_destroy, pbm_destroy, and ib_destroy.
858  */
859 void
860 pci_intr_teardown(pci_t *pci_p)
861 {
862 	kmem_free(pci_p->pci_inos, pci_p->pci_inos_len);
863 	pci_p->pci_inos = NULL;
864 	pci_p->pci_inos_len = 0;
865 }
866