xref: /titanic_44/usr/src/uts/sun4/io/px/px_intr.c (revision e429788e241121c1f81089f762558027000ea25f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PX nexus interrupt handling:
30  *	PX device interrupt handler wrapper
31  *	PIL lookup routine
32  *	PX device interrupt related initchild code
33  */
34 
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/spl.h>
39 #include <sys/sunddi.h>
40 #include <sys/fm/protocol.h>
41 #include <sys/fm/util.h>
42 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
43 #include <sys/ddi_impldefs.h>
44 #include <sys/sdt.h>
45 #include <sys/atomic.h>
46 #include "px_obj.h"
47 #include <sys/ontrap.h>
48 #include <sys/membar.h>
49 #include <sys/clock.h>
50 
51 /*
52  * interrupt jabber:
53  *
54  * When an interrupt line is jabbering, every time the state machine for the
55  * associated ino is idled, a new mondo will be sent and the ino will go into
56  * the pending state again. The mondo will cause a new call to
57  * px_intr_wrapper() which normally idles the ino's state machine which would
58  * precipitate another trip round the loop.
59  *
60  * The loop can be broken by preventing the ino's state machine from being
61  * idled when an interrupt line is jabbering. See the comment at the
62  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
63  * protection' code does this.
64  */
65 
66 /*LINTLIBRARY*/
67 
68 /*
69  * If the unclaimed interrupt count has reached the limit set by
70  * pci_unclaimed_intr_max within the time limit, then all interrupts
71  * on this ino is blocked by not idling the interrupt state machine.
72  */
73 static int
74 px_spurintr(px_ib_ino_info_t *ino_p)
75 {
76 	px_ih_t	*ih_p = ino_p->ino_ih_start;
77 	px_t	*px_p = ino_p->ino_ib_p->ib_px_p;
78 	char	*err_fmt_str;
79 	int	i;
80 
81 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max)
82 		return (DDI_INTR_CLAIMED);
83 
84 	if (!ino_p->ino_unclaimed)
85 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
86 
87 	ino_p->ino_unclaimed++;
88 
89 	if (ino_p->ino_unclaimed <= px_unclaimed_intr_max)
90 		goto clear;
91 
92 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
93 	    > px_spurintr_duration) {
94 		ino_p->ino_unclaimed = 0;
95 		goto clear;
96 	}
97 	err_fmt_str = "%s%d: ino 0x%x blocked";
98 	goto warn;
99 clear:
100 	/* Clear the pending state */
101 	if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
102 	    INTR_IDLE_STATE) != DDI_SUCCESS)
103 		return (DDI_INTR_UNCLAIMED);
104 
105 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
106 warn:
107 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
108 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next)
109 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
110 		    ih_p->ih_inum);
111 	cmn_err(CE_CONT, "!\n");
112 	return (DDI_INTR_CLAIMED);
113 }
114 
115 extern uint64_t intr_get_time(void);
116 
117 /*
118  * px_intx_intr (INTx or legacy interrupt handler)
119  *
120  * This routine is used as wrapper around interrupt handlers installed by child
121  * device drivers.  This routine invokes the driver interrupt handlers and
122  * examines the return codes.
123  *
124  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
125  * least one handler claims the interrupt then the counter is halved and the
126  * interrupt state machine is idled. If no handler claims the interrupt then
127  * the counter is incremented by one and the state machine is idled.
128  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
129  * then the interrupt state machine is not idled thus preventing any further
130  * interrupts on that ino. The state machine will only be idled again if a
131  * handler is subsequently added or removed.
132  *
133  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
134  * DDI_INTR_UNCLAIMED otherwise.
135  */
136 uint_t
137 px_intx_intr(caddr_t arg)
138 {
139 	px_ib_ino_info_t *ino_p = (px_ib_ino_info_t *)arg;
140 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
141 	px_ih_t		*ih_p = ino_p->ino_ih_start;
142 	uint_t		result = 0, r;
143 	int		i;
144 
145 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
146 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
147 	    ino_p->ino_ino, ino_p->ino_sysino, ino_p->ino_pil,
148 	    ino_p->ino_ih_size, ino_p->ino_ih_head);
149 
150 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next) {
151 		dev_info_t *dip = ih_p->ih_dip;
152 		uint_t (*handler)() = ih_p->ih_handler;
153 		caddr_t arg1 = ih_p->ih_handler_arg1;
154 		caddr_t arg2 = ih_p->ih_handler_arg2;
155 
156 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
157 			DBG(DBG_INTX_INTR, px_p->px_dip,
158 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
159 			    ddi_driver_name(dip), ddi_get_instance(dip),
160 			    ino_p->ino_ino);
161 
162 			continue;
163 		}
164 
165 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
166 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
167 		    ino_p->ino_ino, handler, arg1, arg2);
168 
169 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
170 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
171 
172 		r = (*handler)(arg1, arg2);
173 
174 		/*
175 		 * Account for time used by this interrupt. Protect against
176 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
177 		 * using atomic ops.
178 		 */
179 
180 		if (ino_p->ino_pil <= LOCK_LEVEL)
181 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
182 
183 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
184 		    void *, handler, caddr_t, arg1, int, r);
185 
186 		result += r;
187 
188 		if (px_check_all_handlers)
189 			continue;
190 		if (result)
191 			break;
192 	}
193 
194 	if (!result && px_unclaimed_intr_block)
195 		return (px_spurintr(ino_p));
196 
197 	ino_p->ino_unclaimed = 0;
198 
199 	/* Clear the pending state */
200 	if (px_lib_intr_setstate(ino_p->ino_ib_p->ib_px_p->px_dip,
201 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
202 		return (DDI_INTR_UNCLAIMED);
203 
204 	return (DDI_INTR_CLAIMED);
205 }
206 
207 /*
208  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
209  *
210  * This routine is used as wrapper around interrupt handlers installed by child
211  * device drivers.  This routine invokes the driver interrupt handlers and
212  * examines the return codes.
213  *
214  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
215  * least one handler claims the interrupt then the counter is halved and the
216  * interrupt state machine is idled. If no handler claims the interrupt then
217  * the counter is incremented by one and the state machine is idled.
218  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
219  * then the interrupt state machine is not idled thus preventing any further
220  * interrupts on that ino. The state machine will only be idled again if a
221  * handler is subsequently added or removed.
222  *
223  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
224  * DDI_INTR_UNCLAIMED otherwise.
225  */
226 uint_t
227 px_msiq_intr(caddr_t arg)
228 {
229 	px_ib_ino_info_t	*ino_p = (px_ib_ino_info_t *)arg;
230 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
231 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
232 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
233 	dev_info_t	*dip = px_p->px_dip;
234 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
235 	msiqhead_t	curr_msiq_rec_cnt, new_msiq_rec_cnt;
236 	msgcode_t	msg_code;
237 	px_ih_t		*ih_p;
238 	int		i, ret;
239 
240 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
241 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
242 	    ino_p->ino_pil, ino_p->ino_ih_size, ino_p->ino_ih_head);
243 
244 	/* Read current MSIQ head index */
245 	px_lib_msiq_gethead(dip, msiq_p->msiq_id, &curr_msiq_rec_cnt);
246 	msiq_p->msiq_curr = (uint64_t)((caddr_t)msiq_p->msiq_base +
247 	    curr_msiq_rec_cnt * sizeof (msiq_rec_t));
248 	new_msiq_rec_cnt = curr_msiq_rec_cnt;
249 
250 	/* Read next MSIQ record */
251 	px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
252 
253 	/*
254 	 * Process current MSIQ record as long as record type
255 	 * field is non-zero.
256 	 */
257 	while (msiq_rec_p->msiq_rec_type) {
258 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
259 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
260 		    msiq_rec_p->msiq_rec_type, msiq_rec_p->msiq_rec_rid);
261 
262 		/* Get the pointer next EQ record */
263 		msiq_p->msiq_curr = (uint64_t)
264 		    ((caddr_t)msiq_p->msiq_curr + sizeof (msiq_rec_t));
265 
266 		/* Check for overflow condition */
267 		if (msiq_p->msiq_curr >= (uint64_t)((caddr_t)msiq_p->msiq_base +
268 		    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t)))
269 			msiq_p->msiq_curr = msiq_p->msiq_base;
270 
271 		/* Check MSIQ record type */
272 		switch (msiq_rec_p->msiq_rec_type) {
273 		case MSG_REC:
274 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
275 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
276 			    "record, msg type 0x%x\n", msg_code);
277 			break;
278 		case MSI32_REC:
279 		case MSI64_REC:
280 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
281 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
282 			    "msi 0x%x\n", msg_code);
283 
284 			/* Clear MSI state */
285 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
286 			    PCI_MSI_STATE_IDLE);
287 			break;
288 		default:
289 			msg_code = 0;
290 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
291 			    "record type is not supported",
292 			    ddi_driver_name(dip), ddi_get_instance(dip),
293 			    msiq_rec_p->msiq_rec_type);
294 			goto next_rec;
295 		}
296 
297 		/*
298 		 * Scan through px_ih_t linked list, searching for the
299 		 * right px_ih_t, matching MSIQ record data.
300 		 */
301 		for (i = 0, ih_p = ino_p->ino_ih_start;
302 		    ih_p && (i < ino_p->ino_ih_size) &&
303 		    ((ih_p->ih_msg_code != msg_code) ||
304 		    (ih_p->ih_rec_type != msiq_rec_p->msiq_rec_type));
305 		    ih_p = ih_p->ih_next, i++);
306 
307 		if ((ih_p->ih_msg_code == msg_code) &&
308 		    (ih_p->ih_rec_type == msiq_rec_p->msiq_rec_type)) {
309 			dev_info_t *dip = ih_p->ih_dip;
310 			uint_t (*handler)() = ih_p->ih_handler;
311 			caddr_t arg1 = ih_p->ih_handler_arg1;
312 			caddr_t arg2 = ih_p->ih_handler_arg2;
313 
314 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
315 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
316 			    msg_code, handler, arg1, arg2);
317 
318 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
319 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
320 
321 			/*
322 			 * Special case for PCIE Error Messages.
323 			 * The current frame work doesn't fit PCIE Err Msgs
324 			 * This should be fixed when PCIE MESSAGES as a whole
325 			 * is architected correctly.
326 			 */
327 			if ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
328 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
329 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL)) {
330 				ret = px_err_fabric_intr(px_p, msg_code,
331 				    msiq_rec_p->msiq_rec_rid);
332 			} else
333 				ret = (*handler)(arg1, arg2);
334 
335 			/*
336 			 * Account for time used by this interrupt. Protect
337 			 * against conflicting writes to ih_ticks from
338 			 * ib_intr_dist_all() by using atomic ops.
339 			 */
340 
341 			if (ino_p->ino_pil <= LOCK_LEVEL)
342 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
343 
344 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
345 			    void *, handler, caddr_t, arg1, int, ret);
346 		} else {
347 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
348 			    "Not found matching MSIQ record\n");
349 
350 			/* px_spurintr(ino_p); */
351 			ino_p->ino_unclaimed++;
352 		}
353 
354 next_rec:
355 		new_msiq_rec_cnt++;
356 
357 		/* Zero out msiq_rec_type field */
358 		msiq_rec_p->msiq_rec_type = 0;
359 
360 		/* Read next MSIQ record */
361 		px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
362 	}
363 
364 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
365 	    (new_msiq_rec_cnt - curr_msiq_rec_cnt));
366 
367 	/*  Update MSIQ head index with no of MSIQ records processed */
368 	if (new_msiq_rec_cnt > curr_msiq_rec_cnt)  {
369 		if (new_msiq_rec_cnt >= msiq_state_p->msiq_rec_cnt)
370 			new_msiq_rec_cnt -= msiq_state_p->msiq_rec_cnt;
371 
372 		px_lib_msiq_sethead(dip, msiq_p->msiq_id, new_msiq_rec_cnt);
373 	}
374 
375 	/* Clear the pending state */
376 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
377 	    INTR_IDLE_STATE) != DDI_SUCCESS)
378 		return (DDI_INTR_UNCLAIMED);
379 
380 	return (DDI_INTR_CLAIMED);
381 }
382 
383 dev_info_t *
384 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
385 {
386 	dev_info_t	*cdip = rdip;
387 
388 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
389 		;
390 
391 	return (cdip);
392 }
393 
394 /* Default class to pil value mapping */
395 px_class_val_t px_default_pil [] = {
396 	{0x000000, 0xff0000, 0x1},	/* Class code for pre-2.0 devices */
397 	{0x010000, 0xff0000, 0x4},	/* Mass Storage Controller */
398 	{0x020000, 0xff0000, 0x6},	/* Network Controller */
399 	{0x030000, 0xff0000, 0x9},	/* Display Controller */
400 	{0x040000, 0xff0000, 0x9},	/* Multimedia Controller */
401 	{0x050000, 0xff0000, 0x9},	/* Memory Controller */
402 	{0x060000, 0xff0000, 0x9},	/* Bridge Controller */
403 	{0x0c0000, 0xffff00, 0x9},	/* Serial Bus, FireWire (IEEE 1394) */
404 	{0x0c0100, 0xffff00, 0x4},	/* Serial Bus, ACCESS.bus */
405 	{0x0c0200, 0xffff00, 0x4},	/* Serial Bus, SSA */
406 	{0x0c0300, 0xffff00, 0x9},	/* Serial Bus Universal Serial Bus */
407 	{0x0c0400, 0xffff00, 0x6},	/* Serial Bus, Fibre Channel */
408 	{0x0c0600, 0xffff00, 0x6}	/* Serial Bus, Infiniband */
409 };
410 
411 /*
412  * Default class to intr_weight value mapping (% of CPU).  A driver.conf
413  * entry on or above the pci node like
414  *
415  *	pci-class-intr-weights= 0x020000, 0xff0000, 30;
416  *
417  * can be used to augment or override entries in the default table below.
418  *
419  * NB: The values below give NICs preference on redistribution, and provide
420  * NICs some isolation from other interrupt sources. We need better interfaces
421  * that allow the NIC driver to identify a specific NIC instance as high
422  * bandwidth, and thus deserving of separation from other low bandwidth
423  * NICs additional isolation from other interrupt sources.
424  *
425  * NB: We treat Infiniband like a NIC.
426  */
427 px_class_val_t px_default_intr_weight [] = {
428 	{0x020000, 0xff0000, 35},	/* Network Controller */
429 	{0x010000, 0xff0000, 10},	/* Mass Storage Controller */
430 	{0x0c0400, 0xffff00, 10},	/* Serial Bus, Fibre Channel */
431 	{0x0c0600, 0xffff00, 50}	/* Serial Bus, Infiniband */
432 };
433 
434 static uint32_t
435 px_match_class_val(uint32_t key, px_class_val_t *rec_p, int nrec,
436     uint32_t default_val)
437 {
438 	int	i;
439 
440 	for (i = 0; i < nrec; rec_p++, i++) {
441 		if ((rec_p->class_code & rec_p->class_mask) ==
442 		    (key & rec_p->class_mask))
443 			return (rec_p->class_val);
444 	}
445 
446 	return (default_val);
447 }
448 
449 /*
450  * px_class_to_val
451  *
452  * Return the configuration value, based on class code and sub class code,
453  * from the specified property based or default px_class_val_t table.
454  */
455 uint32_t
456 px_class_to_val(dev_info_t *rdip, char *property_name, px_class_val_t *rec_p,
457     int nrec, uint32_t default_val)
458 {
459 	int property_len;
460 	uint32_t class_code;
461 	px_class_val_t *conf;
462 	uint32_t val = default_val;
463 
464 	/*
465 	 * Use the "class-code" property to get the base and sub class
466 	 * codes for the requesting device.
467 	 */
468 	class_code = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, rdip,
469 	    DDI_PROP_DONTPASS, "class-code", -1);
470 
471 	if (class_code == -1)
472 		return (val);
473 
474 	/* look up the val from the default table */
475 	val = px_match_class_val(class_code, rec_p, nrec, val);
476 
477 	/* see if there is a more specific property specified value */
478 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
479 	    property_name, (caddr_t)&conf, &property_len))
480 		return (val);
481 
482 	if ((property_len % sizeof (px_class_val_t)) == 0)
483 		val = px_match_class_val(class_code, conf,
484 		    property_len / sizeof (px_class_val_t), val);
485 	kmem_free(conf, property_len);
486 	return (val);
487 }
488 
489 /* px_class_to_pil: return the pil for a given device. */
490 uint32_t
491 px_class_to_pil(dev_info_t *rdip)
492 {
493 	uint32_t pil;
494 
495 	/* default pil is 0 (uninitialized) */
496 	pil = px_class_to_val(rdip,
497 	    "pci-class-priorities", px_default_pil,
498 	    sizeof (px_default_pil) / sizeof (px_class_val_t), 0);
499 
500 	/* range check the result */
501 	if (pil >= 0xf)
502 		pil = 0;
503 
504 	return (pil);
505 }
506 
507 /* px_class_to_intr_weight: return the intr_weight for a given device. */
508 static int32_t
509 px_class_to_intr_weight(dev_info_t *rdip)
510 {
511 	int32_t intr_weight;
512 
513 	/* default weight is 0% */
514 	intr_weight = px_class_to_val(rdip,
515 	    "pci-class-intr-weights", px_default_intr_weight,
516 	    sizeof (px_default_intr_weight) / sizeof (px_class_val_t), 0);
517 
518 	/* range check the result */
519 	if (intr_weight < 0)
520 		intr_weight = 0;
521 	if (intr_weight > 1000)
522 		intr_weight = 1000;
523 
524 	return (intr_weight);
525 }
526 
527 /* ARGSUSED */
528 int
529 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
530     ddi_intr_handle_impl_t *hdlp, void *result)
531 {
532 	px_t	*px_p = DIP_TO_STATE(dip);
533 	int	ret = DDI_SUCCESS;
534 
535 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
536 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
537 
538 	switch (intr_op) {
539 	case DDI_INTROP_GETCAP:
540 		ret = pci_intx_get_cap(rdip, (int *)result);
541 		break;
542 	case DDI_INTROP_SETCAP:
543 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
544 		ret = DDI_ENOTSUP;
545 		break;
546 	case DDI_INTROP_ALLOC:
547 		*(int *)result = hdlp->ih_scratch1;
548 		break;
549 	case DDI_INTROP_FREE:
550 		break;
551 	case DDI_INTROP_GETPRI:
552 		*(int *)result = hdlp->ih_pri ?
553 		    hdlp->ih_pri : px_class_to_pil(rdip);
554 		break;
555 	case DDI_INTROP_SETPRI:
556 		break;
557 	case DDI_INTROP_ADDISR:
558 		ret = px_add_intx_intr(dip, rdip, hdlp);
559 		break;
560 	case DDI_INTROP_REMISR:
561 		ret = px_rem_intx_intr(dip, rdip, hdlp);
562 		break;
563 	case DDI_INTROP_ENABLE:
564 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
565 		    hdlp->ih_vector, PX_INTR_STATE_ENABLE, 0, 0);
566 		break;
567 	case DDI_INTROP_DISABLE:
568 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
569 		    hdlp->ih_vector, PX_INTR_STATE_DISABLE, 0, 0);
570 		break;
571 	case DDI_INTROP_SETMASK:
572 		ret = pci_intx_set_mask(rdip);
573 		break;
574 	case DDI_INTROP_CLRMASK:
575 		ret = pci_intx_clr_mask(rdip);
576 		break;
577 	case DDI_INTROP_GETPENDING:
578 		ret = pci_intx_get_pending(rdip, (int *)result);
579 		break;
580 	case DDI_INTROP_NINTRS:
581 	case DDI_INTROP_NAVAIL:
582 		*(int *)result = i_ddi_get_nintrs(rdip);
583 		break;
584 	default:
585 		ret = DDI_ENOTSUP;
586 		break;
587 	}
588 
589 	return (ret);
590 }
591 
592 /* ARGSUSED */
593 int
594 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
595     ddi_intr_handle_impl_t *hdlp, void *result)
596 {
597 	px_t			*px_p = DIP_TO_STATE(dip);
598 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
599 	msiq_rec_type_t		msiq_rec_type;
600 	msi_type_t		msi_type;
601 	uint64_t		msi_addr;
602 	msinum_t		msi_num;
603 	msiqid_t		msiq_id;
604 	uint_t			nintrs;
605 	int			i, ret = DDI_SUCCESS;
606 
607 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
608 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
609 
610 	/* Check for MSI64 support */
611 	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
612 		msiq_rec_type = MSI64_REC;
613 		msi_type = MSI64_TYPE;
614 		msi_addr = msi_state_p->msi_addr64;
615 	} else {
616 		msiq_rec_type = MSI32_REC;
617 		msi_type = MSI32_TYPE;
618 		msi_addr = msi_state_p->msi_addr32;
619 	}
620 
621 	switch (intr_op) {
622 	case DDI_INTROP_GETCAP:
623 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
624 		break;
625 	case DDI_INTROP_SETCAP:
626 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
627 		ret = DDI_ENOTSUP;
628 		break;
629 	case DDI_INTROP_ALLOC:
630 		/*
631 		 * We need to restrict this allocation in future
632 		 * based on Resource Management policies.
633 		 */
634 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_inum,
635 		    hdlp->ih_scratch1, (uintptr_t)hdlp->ih_scratch2, &msi_num,
636 		    (int *)result)) != DDI_SUCCESS) {
637 			DBG(DBG_INTROPS, dip, "px_msix_ops: allocation "
638 			    "failed, rdip 0x%p type 0x%d inum 0x%x "
639 			    "count 0x%x\n", rdip, hdlp->ih_type, hdlp->ih_inum,
640 			    hdlp->ih_scratch1);
641 
642 			return (ret);
643 		}
644 
645 		if ((hdlp->ih_type == DDI_INTR_TYPE_MSIX) &&
646 		    (i_ddi_get_msix(rdip) == NULL)) {
647 			ddi_intr_msix_t		*msix_p;
648 
649 			if (msix_p = pci_msix_init(rdip)) {
650 				i_ddi_set_msix(rdip, msix_p);
651 				break;
652 			}
653 
654 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI-X allocation "
655 			    "failed, rdip 0x%p inum 0x%x\n", rdip,
656 			    hdlp->ih_inum);
657 
658 			(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
659 			    hdlp->ih_scratch1);
660 
661 			return (DDI_FAILURE);
662 		}
663 
664 		break;
665 	case DDI_INTROP_FREE:
666 		(void) pci_msi_disable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
667 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
668 
669 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
670 			goto msi_free;
671 
672 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
673 			break;
674 
675 		if (((i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1) == 0) &&
676 		    (i_ddi_get_msix(rdip))) {
677 			pci_msix_fini(i_ddi_get_msix(rdip));
678 			i_ddi_set_msix(rdip, NULL);
679 		}
680 msi_free:
681 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
682 		    hdlp->ih_scratch1);
683 		break;
684 	case DDI_INTROP_GETPRI:
685 		*(int *)result = hdlp->ih_pri ?
686 		    hdlp->ih_pri : px_class_to_pil(rdip);
687 		break;
688 	case DDI_INTROP_SETPRI:
689 		break;
690 	case DDI_INTROP_ADDISR:
691 		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
692 		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
693 			return (ret);
694 
695 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
696 		    msiq_rec_type, msi_num, &msiq_id)) != DDI_SUCCESS) {
697 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
698 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
699 			return (ret);
700 		}
701 
702 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
703 
704 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
705 		    msiq_id, msi_type)) != DDI_SUCCESS) {
706 			(void) px_rem_msiq_intr(dip, rdip,
707 			    hdlp, msiq_rec_type, msi_num, msiq_id);
708 			return (ret);
709 		}
710 
711 		if ((ret = px_lib_msi_setstate(dip, msi_num,
712 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
713 			(void) px_rem_msiq_intr(dip, rdip,
714 			    hdlp, msiq_rec_type, msi_num, msiq_id);
715 			return (ret);
716 		}
717 
718 		hdlp->ih_vector = msi_num;
719 		break;
720 	case DDI_INTROP_DUPVEC:
721 		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
722 		    "new_vector: %x\n", hdlp->ih_inum, hdlp->ih_scratch1);
723 
724 		ret = pci_msix_dup(hdlp->ih_dip, hdlp->ih_inum,
725 		    hdlp->ih_scratch1);
726 		break;
727 	case DDI_INTROP_REMISR:
728 		msi_num = hdlp->ih_vector;
729 
730 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
731 		    &msiq_id)) != DDI_SUCCESS)
732 			return (ret);
733 
734 		if ((ret = px_lib_msi_setstate(dip, msi_num,
735 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
736 			return (ret);
737 
738 		ret = px_rem_msiq_intr(dip, rdip,
739 		    hdlp, msiq_rec_type, msi_num, msiq_id);
740 
741 		hdlp->ih_vector = 0;
742 		break;
743 	case DDI_INTROP_ENABLE:
744 		msi_num = hdlp->ih_vector;
745 
746 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
747 		    PCI_MSI_VALID)) != DDI_SUCCESS)
748 			return (ret);
749 
750 		if (pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) {
751 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
752 
753 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
754 			    nintrs, hdlp->ih_inum, msi_addr,
755 			    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
756 				return (ret);
757 
758 			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type,
759 			    hdlp->ih_inum)) != DDI_SUCCESS)
760 				return (ret);
761 		}
762 
763 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
764 		    hdlp->ih_inum)) != DDI_SUCCESS)
765 			return (ret);
766 
767 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
768 			break;
769 
770 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
771 		    &msiq_id)) != DDI_SUCCESS)
772 			return (ret);
773 
774 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
775 		    px_msiqid_to_devino(px_p, msiq_id), PX_INTR_STATE_ENABLE,
776 		    msiq_rec_type, msi_num);
777 
778 		break;
779 	case DDI_INTROP_DISABLE:
780 		msi_num = hdlp->ih_vector;
781 
782 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
783 		    hdlp->ih_inum)) != DDI_SUCCESS)
784 			return (ret);
785 
786 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
787 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
788 			return (ret);
789 
790 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
791 			break;
792 
793 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
794 		    &msiq_id)) != DDI_SUCCESS)
795 			return (ret);
796 
797 		ret = px_ib_update_intr_state(px_p, rdip,
798 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
799 		    PX_INTR_STATE_DISABLE, msiq_rec_type, msi_num);
800 
801 		break;
802 	case DDI_INTROP_BLOCKENABLE:
803 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
804 		msi_num = hdlp->ih_vector;
805 
806 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
807 		    nintrs, hdlp->ih_inum, msi_addr,
808 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
809 			return (ret);
810 
811 		for (i = 0; i < nintrs; i++, msi_num++) {
812 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
813 			    PCI_MSI_VALID)) != DDI_SUCCESS)
814 				return (ret);
815 
816 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
817 			    &msiq_id)) != DDI_SUCCESS)
818 				return (ret);
819 
820 			if ((ret = px_ib_update_intr_state(px_p, rdip,
821 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
822 			    msiq_id), PX_INTR_STATE_ENABLE, msiq_rec_type,
823 			    msi_num)) != DDI_SUCCESS)
824 				return (ret);
825 		}
826 
827 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
828 		break;
829 	case DDI_INTROP_BLOCKDISABLE:
830 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
831 		msi_num = hdlp->ih_vector;
832 
833 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
834 		    hdlp->ih_inum)) != DDI_SUCCESS)
835 			return (ret);
836 
837 		for (i = 0; i < nintrs; i++, msi_num++) {
838 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
839 			    PCI_MSI_INVALID)) != DDI_SUCCESS)
840 				return (ret);
841 
842 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
843 			    &msiq_id)) != DDI_SUCCESS)
844 				return (ret);
845 
846 			if ((ret = px_ib_update_intr_state(px_p, rdip,
847 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
848 			    msiq_id), PX_INTR_STATE_DISABLE, msiq_rec_type,
849 			    msi_num)) != DDI_SUCCESS)
850 				return (ret);
851 		}
852 
853 		break;
854 	case DDI_INTROP_SETMASK:
855 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
856 		break;
857 	case DDI_INTROP_CLRMASK:
858 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
859 		break;
860 	case DDI_INTROP_GETPENDING:
861 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
862 		    hdlp->ih_inum, (int *)result);
863 		break;
864 	case DDI_INTROP_NINTRS:
865 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
866 		break;
867 	case DDI_INTROP_NAVAIL:
868 		/* XXX - a new interface may be needed */
869 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
870 		break;
871 	default:
872 		ret = DDI_ENOTSUP;
873 		break;
874 	}
875 
876 	return (ret);
877 }
878 
879 static struct {
880 	kstat_named_t pxintr_ks_name;
881 	kstat_named_t pxintr_ks_type;
882 	kstat_named_t pxintr_ks_cpu;
883 	kstat_named_t pxintr_ks_pil;
884 	kstat_named_t pxintr_ks_time;
885 	kstat_named_t pxintr_ks_ino;
886 	kstat_named_t pxintr_ks_cookie;
887 	kstat_named_t pxintr_ks_devpath;
888 	kstat_named_t pxintr_ks_buspath;
889 } pxintr_ks_template = {
890 	{ "name",	KSTAT_DATA_CHAR },
891 	{ "type",	KSTAT_DATA_CHAR },
892 	{ "cpu",	KSTAT_DATA_UINT64 },
893 	{ "pil",	KSTAT_DATA_UINT64 },
894 	{ "time",	KSTAT_DATA_UINT64 },
895 	{ "ino",	KSTAT_DATA_UINT64 },
896 	{ "cookie",	KSTAT_DATA_UINT64 },
897 	{ "devpath",	KSTAT_DATA_STRING },
898 	{ "buspath",	KSTAT_DATA_STRING },
899 };
900 
901 static uint32_t pxintr_ks_instance;
902 static char ih_devpath[MAXPATHLEN];
903 static char ih_buspath[MAXPATHLEN];
904 kmutex_t pxintr_ks_template_lock;
905 
906 int
907 px_ks_update(kstat_t *ksp, int rw)
908 {
909 	px_ih_t *ih_p = ksp->ks_private;
910 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
911 	px_ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
912 	px_t *px_p = ib_p->ib_px_p;
913 	devino_t ino;
914 	sysino_t sysino;
915 
916 	ino = ih_p->ih_ino_p->ino_ino;
917 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
918 
919 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
920 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
921 	    ddi_get_instance(ih_p->ih_dip));
922 
923 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
924 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
925 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
926 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
927 
928 	if (ih_p->ih_intr_state == PX_INTR_STATE_ENABLE) {
929 
930 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
931 		    (ih_p->ih_rec_type == 0) ? "fixed" : "msi");
932 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 =
933 		    ih_p->ih_ino_p->ino_cpuid;
934 		pxintr_ks_template.pxintr_ks_pil.value.ui64 =
935 		    ih_p->ih_ino_p->ino_pil;
936 		pxintr_ks_template.pxintr_ks_time.value.ui64 = ih_p->ih_nsec +
937 		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
938 			ih_p->ih_ino_p->ino_cpuid);
939 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
940 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
941 	} else {
942 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
943 		    "disabled");
944 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = 0;
945 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = 0;
946 		pxintr_ks_template.pxintr_ks_time.value.ui64 = 0;
947 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = 0;
948 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = 0;
949 	}
950 	return (0);
951 }
952 
953 void
954 px_create_intr_kstats(px_ih_t *ih_p)
955 {
956 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
957 
958 	ASSERT(ih_p->ih_ksp == NULL);
959 
960 	/*
961 	 * Create pci_intrs::: kstats for all ih types except messages,
962 	 * which represent unusual conditions and don't need to be tracked.
963 	 */
964 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
965 		ih_p->ih_ksp = kstat_create("pci_intrs",
966 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
967 		    "interrupts", KSTAT_TYPE_NAMED,
968 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
969 		    KSTAT_FLAG_VIRTUAL);
970 	}
971 	if (ih_p->ih_ksp != NULL) {
972 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
973 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
974 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
975 		ih_p->ih_ksp->ks_private = ih_p;
976 		ih_p->ih_ksp->ks_update = px_ks_update;
977 	}
978 }
979 
980 /*
981  * px_add_intx_intr:
982  *
983  * This function is called to register INTx and legacy hardware
984  * interrupt pins interrupts.
985  */
986 int
987 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
988     ddi_intr_handle_impl_t *hdlp)
989 {
990 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
991 	px_ib_t		*ib_p = px_p->px_ib_p;
992 	devino_t	ino;
993 	px_ih_t		*ih_p;
994 	px_ib_ino_info_t *ino_p;
995 	int32_t		weight;
996 	int		ret = DDI_SUCCESS;
997 
998 	ino = hdlp->ih_vector;
999 
1000 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
1001 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
1002 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
1003 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1004 
1005 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
1006 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
1007 
1008 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1009 
1010 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
1011 		uint32_t intr_index = hdlp->ih_inum;
1012 		if (px_ib_ino_locate_intr(ino_p, rdip, intr_index, 0, 0)) {
1013 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
1014 			    "dup intr #%d\n", intr_index);
1015 
1016 			ret = DDI_FAILURE;
1017 			goto fail1;
1018 		}
1019 
1020 		/* Save mondo value in hdlp */
1021 		hdlp->ih_vector = ino_p->ino_sysino;
1022 
1023 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
1024 		    != DDI_SUCCESS)
1025 			goto fail1;
1026 	} else {
1027 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
1028 
1029 		if (hdlp->ih_pri == 0)
1030 			hdlp->ih_pri = px_class_to_pil(rdip);
1031 
1032 		/* Save mondo value in hdlp */
1033 		hdlp->ih_vector = ino_p->ino_sysino;
1034 
1035 		DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
1036 		    hdlp->ih_pri, hdlp->ih_vector);
1037 
1038 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1039 		    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ino_p, NULL);
1040 
1041 		ret = i_ddi_add_ivintr(hdlp);
1042 
1043 		/*
1044 		 * Restore original interrupt handler
1045 		 * and arguments in interrupt handle.
1046 		 */
1047 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1048 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1049 
1050 		if (ret != DDI_SUCCESS)
1051 			goto fail2;
1052 
1053 		/* Save the pil for this ino */
1054 		ino_p->ino_pil = hdlp->ih_pri;
1055 
1056 		/* select cpu, saving it for sharing and removal */
1057 		ino_p->ino_cpuid = intr_dist_cpuid();
1058 
1059 		/* Enable interrupt */
1060 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1061 	}
1062 
1063 	/* add weight to the cpu that we are already targeting */
1064 	weight = px_class_to_intr_weight(rdip);
1065 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1066 
1067 	ih_p->ih_ino_p = ino_p;
1068 	px_create_intr_kstats(ih_p);
1069 	if (ih_p->ih_ksp)
1070 		kstat_install(ih_p->ih_ksp);
1071 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1072 
1073 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1074 	    ino_p->ino_sysino, hdlp->ih_pri);
1075 
1076 	return (ret);
1077 fail2:
1078 	px_ib_delete_ino(ib_p, ino_p);
1079 fail1:
1080 	if (ih_p->ih_config_handle)
1081 		pci_config_teardown(&ih_p->ih_config_handle);
1082 
1083 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1084 	kmem_free(ih_p, sizeof (px_ih_t));
1085 
1086 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1087 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1088 
1089 	return (ret);
1090 }
1091 
1092 /*
1093  * px_rem_intx_intr:
1094  *
1095  * This function is called to unregister INTx and legacy hardware
1096  * interrupt pins interrupts.
1097  */
1098 int
1099 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1100     ddi_intr_handle_impl_t *hdlp)
1101 {
1102 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1103 	px_ib_t		*ib_p = px_p->px_ib_p;
1104 	devino_t	ino;
1105 	cpuid_t		curr_cpu;
1106 	px_ib_ino_info_t	*ino_p;
1107 	px_ih_t		*ih_p;
1108 	int		ret = DDI_SUCCESS;
1109 
1110 	ino = hdlp->ih_vector;
1111 
1112 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1113 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1114 
1115 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1116 
1117 	ino_p = px_ib_locate_ino(ib_p, ino);
1118 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum, 0, 0);
1119 
1120 	/* Get the current cpu */
1121 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1122 	    &curr_cpu)) != DDI_SUCCESS)
1123 		goto fail;
1124 
1125 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1126 		goto fail;
1127 
1128 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1129 
1130 	if (ino_p->ino_ih_size == 0) {
1131 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1132 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1133 			goto fail;
1134 
1135 		hdlp->ih_vector = ino_p->ino_sysino;
1136 		i_ddi_rem_ivintr(hdlp);
1137 
1138 		px_ib_delete_ino(ib_p, ino_p);
1139 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1140 	} else {
1141 		/* Re-enable interrupt only if mapping regsiter still shared */
1142 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1143 	}
1144 
1145 fail:
1146 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1147 	return (ret);
1148 }
1149 
1150 /*
1151  * px_add_msiq_intr:
1152  *
1153  * This function is called to register MSI/Xs and PCIe message interrupts.
1154  */
1155 int
1156 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1157     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1158     msgcode_t msg_code, msiqid_t *msiq_id_p)
1159 {
1160 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1161 	px_ib_t		*ib_p = px_p->px_ib_p;
1162 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1163 	devino_t	ino;
1164 	px_ih_t		*ih_p;
1165 	px_ib_ino_info_t	*ino_p;
1166 	int32_t		weight;
1167 	int		ret = DDI_SUCCESS;
1168 
1169 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
1170 	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
1171 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1172 
1173 	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
1174 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1175 		    "msiq allocation failed\n");
1176 		return (ret);
1177 	}
1178 
1179 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1180 
1181 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1182 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1183 
1184 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1185 
1186 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
1187 		uint32_t intr_index = hdlp->ih_inum;
1188 		if (px_ib_ino_locate_intr(ino_p, rdip,
1189 		    intr_index, rec_type, msg_code)) {
1190 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1191 			    "dup intr #%d\n", intr_index);
1192 
1193 			ret = DDI_FAILURE;
1194 			goto fail1;
1195 		}
1196 
1197 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
1198 		    != DDI_SUCCESS)
1199 			goto fail1;
1200 	} else {
1201 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
1202 
1203 		ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1204 		    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1205 
1206 		if (hdlp->ih_pri == 0)
1207 			hdlp->ih_pri = px_class_to_pil(rdip);
1208 
1209 		/* Save mondo value in hdlp */
1210 		hdlp->ih_vector = ino_p->ino_sysino;
1211 
1212 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1213 		    hdlp->ih_pri, hdlp->ih_vector);
1214 
1215 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1216 		    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ino_p, NULL);
1217 
1218 		ret = i_ddi_add_ivintr(hdlp);
1219 
1220 		/*
1221 		 * Restore original interrupt handler
1222 		 * and arguments in interrupt handle.
1223 		 */
1224 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1225 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1226 
1227 		if (ret != DDI_SUCCESS)
1228 			goto fail2;
1229 
1230 		/* Save the pil for this ino */
1231 		ino_p->ino_pil = hdlp->ih_pri;
1232 
1233 		/* Enable MSIQ */
1234 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1235 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1236 
1237 		/* select cpu, saving it for sharing and removal */
1238 		ino_p->ino_cpuid = intr_dist_cpuid();
1239 
1240 		/* Enable interrupt */
1241 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino_p->ino_ino);
1242 	}
1243 
1244 	/* add weight to the cpu that we are already targeting */
1245 	weight = px_class_to_intr_weight(rdip);
1246 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1247 
1248 	ih_p->ih_ino_p = ino_p;
1249 	px_create_intr_kstats(ih_p);
1250 	if (ih_p->ih_ksp)
1251 		kstat_install(ih_p->ih_ksp);
1252 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1253 
1254 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1255 	    ino_p->ino_sysino, hdlp->ih_pri);
1256 
1257 	return (ret);
1258 fail2:
1259 	px_ib_delete_ino(ib_p, ino_p);
1260 fail1:
1261 	if (ih_p->ih_config_handle)
1262 		pci_config_teardown(&ih_p->ih_config_handle);
1263 
1264 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1265 	kmem_free(ih_p, sizeof (px_ih_t));
1266 
1267 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1268 	    ino_p->ino_sysino, hdlp->ih_pri);
1269 
1270 	return (ret);
1271 }
1272 
1273 /*
1274  * px_rem_msiq_intr:
1275  *
1276  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1277  */
1278 int
1279 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1280     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1281     msgcode_t msg_code, msiqid_t msiq_id)
1282 {
1283 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1284 	px_ib_t		*ib_p = px_p->px_ib_p;
1285 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1286 	cpuid_t		curr_cpu;
1287 	px_ib_ino_info_t *ino_p;
1288 	px_ih_t		*ih_p;
1289 	int		ret = DDI_SUCCESS;
1290 
1291 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1292 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1293 
1294 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1295 
1296 	ino_p = px_ib_locate_ino(ib_p, ino);
1297 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum,
1298 	    rec_type, msg_code);
1299 
1300 	/* Get the current cpu */
1301 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1302 	    &curr_cpu)) != DDI_SUCCESS)
1303 		goto fail;
1304 
1305 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1306 		goto fail;
1307 
1308 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1309 
1310 	if (ino_p->ino_ih_size == 0) {
1311 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1312 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1313 			goto fail;
1314 
1315 		px_lib_msiq_setvalid(dip, px_devino_to_msiqid(px_p, ino),
1316 		    PCI_MSIQ_INVALID);
1317 
1318 		hdlp->ih_vector = ino_p->ino_sysino;
1319 		i_ddi_rem_ivintr(hdlp);
1320 
1321 		px_ib_delete_ino(ib_p, ino_p);
1322 
1323 		(void) px_msiq_free(px_p, msiq_id);
1324 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1325 	} else {
1326 		/* Re-enable interrupt only if mapping regsiter still shared */
1327 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1328 	}
1329 
1330 fail:
1331 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1332 	return (ret);
1333 }
1334