xref: /titanic_41/usr/src/uts/sun4/io/px/px_intr.c (revision 6dfee4834394825da35b977ca71cdc965bc7b6a4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX nexus interrupt handling:
31  *	PX device interrupt handler wrapper
32  *	PIL lookup routine
33  *	PX device interrupt related initchild code
34  */
35 
36 #include <sys/types.h>
37 #include <sys/kmem.h>
38 #include <sys/async.h>
39 #include <sys/spl.h>
40 #include <sys/sunddi.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/fm/util.h>
43 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
44 #include <sys/ddi_impldefs.h>
45 #include <sys/sdt.h>
46 #include <sys/atomic.h>
47 #include "px_obj.h"
48 #include <sys/ontrap.h>
49 #include <sys/membar.h>
50 #include <sys/clock.h>
51 
52 /*
53  * interrupt jabber:
54  *
55  * When an interrupt line is jabbering, every time the state machine for the
56  * associated ino is idled, a new mondo will be sent and the ino will go into
57  * the pending state again. The mondo will cause a new call to
58  * px_intr_wrapper() which normally idles the ino's state machine which would
59  * precipitate another trip round the loop.
60  *
61  * The loop can be broken by preventing the ino's state machine from being
62  * idled when an interrupt line is jabbering. See the comment at the
63  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
64  * protection' code does this.
65  */
66 
67 /*LINTLIBRARY*/
68 
69 /*
70  * If the unclaimed interrupt count has reached the limit set by
71  * pci_unclaimed_intr_max within the time limit, then all interrupts
72  * on this ino is blocked by not idling the interrupt state machine.
73  */
74 static int
75 px_spurintr(px_ib_ino_info_t *ino_p)
76 {
77 	px_ih_t	*ih_p = ino_p->ino_ih_start;
78 	px_t	*px_p = ino_p->ino_ib_p->ib_px_p;
79 	char	*err_fmt_str;
80 	int	i;
81 
82 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max)
83 		return (DDI_INTR_CLAIMED);
84 
85 	if (!ino_p->ino_unclaimed)
86 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
87 
88 	ino_p->ino_unclaimed++;
89 
90 	if (ino_p->ino_unclaimed <= px_unclaimed_intr_max)
91 		goto clear;
92 
93 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
94 	    > px_spurintr_duration) {
95 		ino_p->ino_unclaimed = 0;
96 		goto clear;
97 	}
98 	err_fmt_str = "%s%d: ino 0x%x blocked";
99 	goto warn;
100 clear:
101 	/* Clear the pending state */
102 	if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
103 	    INTR_IDLE_STATE) != DDI_SUCCESS)
104 		return (DDI_INTR_UNCLAIMED);
105 
106 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
107 warn:
108 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
109 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next)
110 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
111 		    ih_p->ih_inum);
112 	cmn_err(CE_CONT, "!\n");
113 	return (DDI_INTR_CLAIMED);
114 }
115 
116 extern uint64_t intr_get_time(void);
117 
118 /*
119  * px_intx_intr (INTx or legacy interrupt handler)
120  *
121  * This routine is used as wrapper around interrupt handlers installed by child
122  * device drivers.  This routine invokes the driver interrupt handlers and
123  * examines the return codes.
124  *
125  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
126  * least one handler claims the interrupt then the counter is halved and the
127  * interrupt state machine is idled. If no handler claims the interrupt then
128  * the counter is incremented by one and the state machine is idled.
129  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
130  * then the interrupt state machine is not idled thus preventing any further
131  * interrupts on that ino. The state machine will only be idled again if a
132  * handler is subsequently added or removed.
133  *
134  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
135  * DDI_INTR_UNCLAIMED otherwise.
136  */
137 uint_t
138 px_intx_intr(caddr_t arg)
139 {
140 	px_ib_ino_info_t *ino_p = (px_ib_ino_info_t *)arg;
141 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
142 	px_ih_t		*ih_p = ino_p->ino_ih_start;
143 	uint_t		result = 0, r;
144 	int		i;
145 
146 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
147 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
148 	    ino_p->ino_ino, ino_p->ino_sysino, ino_p->ino_pil,
149 	    ino_p->ino_ih_size, ino_p->ino_ih_head);
150 
151 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next) {
152 		dev_info_t *dip = ih_p->ih_dip;
153 		uint_t (*handler)() = ih_p->ih_handler;
154 		caddr_t arg1 = ih_p->ih_handler_arg1;
155 		caddr_t arg2 = ih_p->ih_handler_arg2;
156 
157 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
158 			DBG(DBG_INTX_INTR, px_p->px_dip,
159 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
160 			    ddi_driver_name(dip), ddi_get_instance(dip),
161 			    ino_p->ino_ino);
162 
163 			continue;
164 		}
165 
166 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
167 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
168 		    ino_p->ino_ino, handler, arg1, arg2);
169 
170 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
171 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
172 
173 		r = (*handler)(arg1, arg2);
174 
175 		/*
176 		 * Account for time used by this interrupt. Protect against
177 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
178 		 * using atomic ops.
179 		 */
180 
181 		if (ino_p->ino_pil <= LOCK_LEVEL)
182 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
183 
184 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
185 		    void *, handler, caddr_t, arg1, int, r);
186 
187 		result += r;
188 
189 		if (px_check_all_handlers)
190 			continue;
191 		if (result)
192 			break;
193 	}
194 
195 	if (!result && px_unclaimed_intr_block)
196 		return (px_spurintr(ino_p));
197 
198 	ino_p->ino_unclaimed = 0;
199 
200 	/* Clear the pending state */
201 	if (px_lib_intr_setstate(ino_p->ino_ib_p->ib_px_p->px_dip,
202 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
203 		return (DDI_INTR_UNCLAIMED);
204 
205 	return (DDI_INTR_CLAIMED);
206 }
207 
208 /*
209  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
210  *
211  * This routine is used as wrapper around interrupt handlers installed by child
212  * device drivers.  This routine invokes the driver interrupt handlers and
213  * examines the return codes.
214  *
215  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
216  * least one handler claims the interrupt then the counter is halved and the
217  * interrupt state machine is idled. If no handler claims the interrupt then
218  * the counter is incremented by one and the state machine is idled.
219  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
220  * then the interrupt state machine is not idled thus preventing any further
221  * interrupts on that ino. The state machine will only be idled again if a
222  * handler is subsequently added or removed.
223  *
224  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
225  * DDI_INTR_UNCLAIMED otherwise.
226  */
227 uint_t
228 px_msiq_intr(caddr_t arg)
229 {
230 	px_ib_ino_info_t	*ino_p = (px_ib_ino_info_t *)arg;
231 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
232 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
233 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
234 	dev_info_t	*dip = px_p->px_dip;
235 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
236 	msiqhead_t	curr_msiq_rec_cnt, new_msiq_rec_cnt;
237 	msgcode_t	msg_code;
238 	px_ih_t		*ih_p;
239 	int		ret;
240 
241 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
242 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
243 	    ino_p->ino_pil, ino_p->ino_ih_size, ino_p->ino_ih_head);
244 
245 	/* Read current MSIQ head index */
246 	px_lib_msiq_gethead(dip, msiq_p->msiq_id, &curr_msiq_rec_cnt);
247 	msiq_p->msiq_curr = (uint64_t)((caddr_t)msiq_p->msiq_base +
248 	    curr_msiq_rec_cnt * sizeof (msiq_rec_t));
249 	new_msiq_rec_cnt = curr_msiq_rec_cnt;
250 
251 	/* Read next MSIQ record */
252 	px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
253 
254 	/*
255 	 * Process current MSIQ record as long as record type
256 	 * field is non-zero.
257 	 */
258 	while (msiq_rec_p->msiq_rec_type) {
259 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
260 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
261 		    msiq_rec_p->msiq_rec_type, msiq_rec_p->msiq_rec_rid);
262 
263 		/* Get the pointer next EQ record */
264 		msiq_p->msiq_curr = (uint64_t)
265 		    ((caddr_t)msiq_p->msiq_curr + sizeof (msiq_rec_t));
266 
267 		/* Check for overflow condition */
268 		if (msiq_p->msiq_curr >= (uint64_t)((caddr_t)msiq_p->msiq_base +
269 		    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t)))
270 			msiq_p->msiq_curr = msiq_p->msiq_base;
271 
272 		/* Check MSIQ record type */
273 		switch (msiq_rec_p->msiq_rec_type) {
274 		case MSG_REC:
275 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
276 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
277 			    "record, msg type 0x%x\n", msg_code);
278 			break;
279 		case MSI32_REC:
280 		case MSI64_REC:
281 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
282 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
283 			    "msi 0x%x\n", msg_code);
284 
285 			/* Clear MSI state */
286 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
287 			    PCI_MSI_STATE_IDLE);
288 			break;
289 		default:
290 			msg_code = 0;
291 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
292 			    "record type is not supported",
293 			    ddi_driver_name(dip), ddi_get_instance(dip),
294 			    msiq_rec_p->msiq_rec_type);
295 			goto next_rec;
296 		}
297 
298 		ih_p = ino_p->ino_ih_start;
299 
300 		/*
301 		 * Scan through px_ih_t linked list, searching for the
302 		 * right px_ih_t, matching MSIQ record data.
303 		 */
304 		while ((ih_p) && (ih_p->ih_msg_code != msg_code) &&
305 		    (ih_p->ih_rec_type != msiq_rec_p->msiq_rec_type))
306 			ih_p = ih_p->ih_next;
307 
308 		if ((ih_p->ih_msg_code == msg_code) &&
309 		    (ih_p->ih_rec_type == msiq_rec_p->msiq_rec_type)) {
310 			dev_info_t *dip = ih_p->ih_dip;
311 			uint_t (*handler)() = ih_p->ih_handler;
312 			caddr_t arg1 = ih_p->ih_handler_arg1;
313 			caddr_t arg2 = ih_p->ih_handler_arg2;
314 
315 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
316 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
317 			    msg_code, handler, arg1, arg2);
318 
319 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
320 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
321 
322 			/*
323 			 * Special case for PCIE Error Messages.
324 			 * The current frame work doesn't fit PCIE Err Msgs
325 			 * This should be fixed when PCIE MESSAGES as a whole
326 			 * is architected correctly.
327 			 */
328 			if ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
329 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
330 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL)) {
331 				ret = px_err_fabric_intr(px_p, msg_code,
332 				    msiq_rec_p->msiq_rec_rid);
333 			} else
334 				ret = (*handler)(arg1, arg2);
335 
336 			/*
337 			 * Account for time used by this interrupt. Protect
338 			 * against conflicting writes to ih_ticks from
339 			 * ib_intr_dist_all() by using atomic ops.
340 			 */
341 
342 			if (ino_p->ino_pil <= LOCK_LEVEL)
343 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
344 
345 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
346 			    void *, handler, caddr_t, arg1, int, ret);
347 		} else {
348 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
349 			    "Not found matching MSIQ record\n");
350 
351 			/* px_spurintr(ino_p); */
352 			ino_p->ino_unclaimed++;
353 		}
354 
355 next_rec:
356 		new_msiq_rec_cnt++;
357 
358 		/* Zero out msiq_rec_type field */
359 		msiq_rec_p->msiq_rec_type = 0;
360 
361 		/* Read next MSIQ record */
362 		px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
363 	}
364 
365 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
366 	    (new_msiq_rec_cnt - curr_msiq_rec_cnt));
367 
368 	/*  Update MSIQ head index with no of MSIQ records processed */
369 	if (new_msiq_rec_cnt > curr_msiq_rec_cnt)  {
370 		if (new_msiq_rec_cnt >= msiq_state_p->msiq_rec_cnt)
371 			new_msiq_rec_cnt -= msiq_state_p->msiq_rec_cnt;
372 
373 		px_lib_msiq_sethead(dip, msiq_p->msiq_id, new_msiq_rec_cnt);
374 	}
375 
376 	/* Clear the pending state */
377 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
378 	    INTR_IDLE_STATE) != DDI_SUCCESS)
379 		return (DDI_INTR_UNCLAIMED);
380 
381 	return (DDI_INTR_CLAIMED);
382 }
383 
384 dev_info_t *
385 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
386 {
387 	dev_info_t	*cdip = rdip;
388 
389 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
390 		;
391 
392 	return (cdip);
393 }
394 
395 /* Default class to pil value mapping */
396 px_class_val_t px_default_pil [] = {
397 	{0x000000, 0xff0000, 0x1},	/* Class code for pre-2.0 devices */
398 	{0x010000, 0xff0000, 0x4},	/* Mass Storage Controller */
399 	{0x020000, 0xff0000, 0x6},	/* Network Controller */
400 	{0x030000, 0xff0000, 0x9},	/* Display Controller */
401 	{0x040000, 0xff0000, 0x9},	/* Multimedia Controller */
402 	{0x050000, 0xff0000, 0xb},	/* Memory Controller */
403 	{0x060000, 0xff0000, 0xb},	/* Bridge Controller */
404 	{0x0c0000, 0xffff00, 0x9},	/* Serial Bus, FireWire (IEEE 1394) */
405 	{0x0c0100, 0xffff00, 0x4},	/* Serial Bus, ACCESS.bus */
406 	{0x0c0200, 0xffff00, 0x4},	/* Serial Bus, SSA */
407 	{0x0c0300, 0xffff00, 0x9},	/* Serial Bus Universal Serial Bus */
408 	{0x0c0400, 0xffff00, 0x6},	/* Serial Bus, Fibre Channel */
409 	{0x0c0600, 0xffff00, 0x6}	/* Serial Bus, Infiniband */
410 };
411 
412 /*
413  * Default class to intr_weight value mapping (% of CPU).  A driver.conf
414  * entry on or above the pci node like
415  *
416  *	pci-class-intr-weights= 0x020000, 0xff0000, 30;
417  *
418  * can be used to augment or override entries in the default table below.
419  *
420  * NB: The values below give NICs preference on redistribution, and provide
421  * NICs some isolation from other interrupt sources. We need better interfaces
422  * that allow the NIC driver to identify a specific NIC instance as high
423  * bandwidth, and thus deserving of separation from other low bandwidth
424  * NICs additional isolation from other interrupt sources.
425  *
426  * NB: We treat Infiniband like a NIC.
427  */
428 px_class_val_t px_default_intr_weight [] = {
429 	{0x020000, 0xff0000, 35},	/* Network Controller */
430 	{0x010000, 0xff0000, 10},	/* Mass Storage Controller */
431 	{0x0c0400, 0xffff00, 10},	/* Serial Bus, Fibre Channel */
432 	{0x0c0600, 0xffff00, 50}	/* Serial Bus, Infiniband */
433 };
434 
435 static uint32_t
436 px_match_class_val(uint32_t key, px_class_val_t *rec_p, int nrec,
437     uint32_t default_val)
438 {
439 	int	i;
440 
441 	for (i = 0; i < nrec; rec_p++, i++) {
442 		if ((rec_p->class_code & rec_p->class_mask) ==
443 		    (key & rec_p->class_mask))
444 			return (rec_p->class_val);
445 	}
446 
447 	return (default_val);
448 }
449 
450 /*
451  * px_class_to_val
452  *
453  * Return the configuration value, based on class code and sub class code,
454  * from the specified property based or default px_class_val_t table.
455  */
456 uint32_t
457 px_class_to_val(dev_info_t *rdip, char *property_name, px_class_val_t *rec_p,
458     int nrec, uint32_t default_val)
459 {
460 	int property_len;
461 	uint32_t class_code;
462 	px_class_val_t *conf;
463 	uint32_t val = default_val;
464 
465 	/*
466 	 * Use the "class-code" property to get the base and sub class
467 	 * codes for the requesting device.
468 	 */
469 	class_code = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, rdip,
470 	    DDI_PROP_DONTPASS, "class-code", -1);
471 
472 	if (class_code == -1)
473 		return (val);
474 
475 	/* look up the val from the default table */
476 	val = px_match_class_val(class_code, rec_p, nrec, val);
477 
478 	/* see if there is a more specific property specified value */
479 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
480 	    property_name, (caddr_t)&conf, &property_len))
481 		return (val);
482 
483 	if ((property_len % sizeof (px_class_val_t)) == 0)
484 		val = px_match_class_val(class_code, conf,
485 		    property_len / sizeof (px_class_val_t), val);
486 	kmem_free(conf, property_len);
487 	return (val);
488 }
489 
490 /* px_class_to_pil: return the pil for a given device. */
491 uint32_t
492 px_class_to_pil(dev_info_t *rdip)
493 {
494 	uint32_t pil;
495 
496 	/* default pil is 0 (uninitialized) */
497 	pil = px_class_to_val(rdip,
498 	    "pci-class-priorities", px_default_pil,
499 	    sizeof (px_default_pil) / sizeof (px_class_val_t), 0);
500 
501 	/* range check the result */
502 	if (pil >= 0xf)
503 		pil = 0;
504 
505 	return (pil);
506 }
507 
508 /* px_class_to_intr_weight: return the intr_weight for a given device. */
509 static int32_t
510 px_class_to_intr_weight(dev_info_t *rdip)
511 {
512 	int32_t intr_weight;
513 
514 	/* default weight is 0% */
515 	intr_weight = px_class_to_val(rdip,
516 	    "pci-class-intr-weights", px_default_intr_weight,
517 	    sizeof (px_default_intr_weight) / sizeof (px_class_val_t), 0);
518 
519 	/* range check the result */
520 	if (intr_weight < 0)
521 		intr_weight = 0;
522 	if (intr_weight > 1000)
523 		intr_weight = 1000;
524 
525 	return (intr_weight);
526 }
527 
528 /* ARGSUSED */
529 int
530 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
531     ddi_intr_handle_impl_t *hdlp, void *result)
532 {
533 	px_t	*px_p = DIP_TO_STATE(dip);
534 	int	ret = DDI_SUCCESS;
535 
536 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
537 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
538 
539 	switch (intr_op) {
540 	case DDI_INTROP_GETCAP:
541 		ret = pci_intx_get_cap(rdip, (int *)result);
542 		break;
543 	case DDI_INTROP_SETCAP:
544 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
545 		ret = DDI_ENOTSUP;
546 		break;
547 	case DDI_INTROP_ALLOC:
548 		*(int *)result = hdlp->ih_scratch1;
549 		break;
550 	case DDI_INTROP_FREE:
551 		break;
552 	case DDI_INTROP_GETPRI:
553 		*(int *)result = hdlp->ih_pri ?
554 		    hdlp->ih_pri : px_class_to_pil(rdip);
555 		break;
556 	case DDI_INTROP_SETPRI:
557 		break;
558 	case DDI_INTROP_ADDISR:
559 		ret = px_add_intx_intr(dip, rdip, hdlp);
560 		break;
561 	case DDI_INTROP_REMISR:
562 		ret = px_rem_intx_intr(dip, rdip, hdlp);
563 		break;
564 	case DDI_INTROP_ENABLE:
565 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
566 		    hdlp->ih_vector, PX_INTR_STATE_ENABLE, 0, 0);
567 		break;
568 	case DDI_INTROP_DISABLE:
569 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
570 		    hdlp->ih_vector, PX_INTR_STATE_DISABLE, 0, 0);
571 		break;
572 	case DDI_INTROP_SETMASK:
573 		ret = pci_intx_set_mask(rdip);
574 		break;
575 	case DDI_INTROP_CLRMASK:
576 		ret = pci_intx_clr_mask(rdip);
577 		break;
578 	case DDI_INTROP_GETPENDING:
579 		ret = pci_intx_get_pending(rdip, (int *)result);
580 		break;
581 	case DDI_INTROP_NINTRS:
582 	case DDI_INTROP_NAVAIL:
583 		*(int *)result = i_ddi_get_nintrs(rdip);
584 		break;
585 	case DDI_INTROP_SUPPORTED_TYPES:
586 		*(int *)result = DDI_INTR_TYPE_FIXED;
587 		break;
588 	default:
589 		ret = DDI_ENOTSUP;
590 		break;
591 	}
592 
593 	return (ret);
594 }
595 
596 /* ARGSUSED */
597 int
598 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
599     ddi_intr_handle_impl_t *hdlp, void *result)
600 {
601 	px_t			*px_p = DIP_TO_STATE(dip);
602 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
603 	msinum_t		msi_num;
604 	msiqid_t		msiq_id;
605 	uint_t			nintrs;
606 	int			i, ret = DDI_SUCCESS;
607 
608 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
609 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
610 
611 	switch (intr_op) {
612 	case DDI_INTROP_GETCAP:
613 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
614 		break;
615 	case DDI_INTROP_SETCAP:
616 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
617 		ret = DDI_ENOTSUP;
618 		break;
619 	case DDI_INTROP_ALLOC:
620 		/*
621 		 * We need to restrict this allocation in future
622 		 * based on Resource Management policies.
623 		 */
624 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_inum,
625 		    hdlp->ih_scratch1, hdlp->ih_scratch2, &msi_num,
626 		    (int *)result)) != DDI_SUCCESS) {
627 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI allocation "
628 			    "failed, rdip 0x%p inum 0x%x count 0x%x\n",
629 			    rdip, hdlp->ih_inum, hdlp->ih_scratch1);
630 
631 			return (ret);
632 		}
633 
634 		break;
635 	case DDI_INTROP_FREE:
636 		(void) pci_msi_disable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
637 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
638 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
639 		    hdlp->ih_scratch1);
640 		break;
641 	case DDI_INTROP_GETPRI:
642 		*(int *)result = hdlp->ih_pri ?
643 		    hdlp->ih_pri : px_class_to_pil(rdip);
644 		break;
645 	case DDI_INTROP_SETPRI:
646 		break;
647 	case DDI_INTROP_ADDISR:
648 		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
649 		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
650 			return (ret);
651 
652 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
653 		    MSI32_REC, msi_num, &msiq_id)) != DDI_SUCCESS) {
654 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
655 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
656 			return (ret);
657 		}
658 
659 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
660 
661 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
662 		    msiq_id, MSI32_TYPE)) != DDI_SUCCESS) {
663 			(void) px_rem_msiq_intr(dip, rdip,
664 			    hdlp, MSI32_REC, msi_num, msiq_id);
665 			return (ret);
666 		}
667 
668 		if ((ret = px_lib_msi_setstate(dip, msi_num,
669 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
670 			(void) px_rem_msiq_intr(dip, rdip,
671 			    hdlp, MSI32_REC, msi_num, msiq_id);
672 			return (ret);
673 		}
674 
675 		hdlp->ih_vector = msi_num;
676 		break;
677 	case DDI_INTROP_DUPVEC:
678 		DBG(DBG_INTROPS, dip, "px_msix_ops: DupIsr is not supported\n");
679 		ret = DDI_ENOTSUP;
680 		break;
681 	case DDI_INTROP_REMISR:
682 		msi_num = hdlp->ih_vector;
683 
684 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
685 		    &msiq_id)) != DDI_SUCCESS)
686 			return (ret);
687 
688 		if ((ret = px_lib_msi_setstate(dip, msi_num,
689 		    PCI_MSI_STATE_DELIVERED)) != DDI_SUCCESS)
690 			return (ret);
691 
692 		ret = px_rem_msiq_intr(dip, rdip,
693 		    hdlp, MSI32_REC, msi_num, msiq_id);
694 
695 		hdlp->ih_vector = 0;
696 		break;
697 	case DDI_INTROP_ENABLE:
698 		msi_num = hdlp->ih_vector;
699 
700 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
701 		    PCI_MSI_VALID)) != DDI_SUCCESS)
702 			return (ret);
703 
704 		if (pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) {
705 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
706 
707 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
708 			    nintrs, hdlp->ih_inum, msi_state_p->msi_addr32,
709 			    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
710 				return (ret);
711 
712 			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type,
713 			    hdlp->ih_inum)) != DDI_SUCCESS)
714 				return (ret);
715 		}
716 
717 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
718 		    hdlp->ih_inum)) != DDI_SUCCESS)
719 			return (ret);
720 
721 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
722 		    &msiq_id)) != DDI_SUCCESS)
723 			return (ret);
724 
725 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
726 		    px_msiqid_to_devino(px_p, msiq_id), PX_INTR_STATE_ENABLE,
727 		    MSI32_REC, msi_num);
728 
729 		break;
730 	case DDI_INTROP_DISABLE:
731 		msi_num = hdlp->ih_vector;
732 
733 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
734 		    hdlp->ih_inum)) != DDI_SUCCESS)
735 			return (ret);
736 
737 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
738 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
739 			return (ret);
740 
741 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
742 		    &msiq_id)) != DDI_SUCCESS)
743 			return (ret);
744 
745 		ret = px_ib_update_intr_state(px_p, rdip,
746 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
747 		    PX_INTR_STATE_DISABLE, MSI32_REC, msi_num);
748 
749 		break;
750 	case DDI_INTROP_BLOCKENABLE:
751 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
752 		msi_num = hdlp->ih_vector;
753 
754 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
755 		    nintrs, hdlp->ih_inum, msi_state_p->msi_addr32,
756 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
757 			return (ret);
758 
759 		for (i = 0; i < nintrs; i++, msi_num++) {
760 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
761 			    PCI_MSI_VALID)) != DDI_SUCCESS)
762 				return (ret);
763 
764 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
765 			    &msiq_id)) != DDI_SUCCESS)
766 				return (ret);
767 
768 			if ((ret = px_ib_update_intr_state(px_p, rdip,
769 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
770 			    msiq_id), PX_INTR_STATE_ENABLE, MSI32_REC, msi_num))
771 			    != DDI_SUCCESS)
772 				return (ret);
773 		}
774 
775 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
776 		break;
777 	case DDI_INTROP_BLOCKDISABLE:
778 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
779 		msi_num = hdlp->ih_vector;
780 
781 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
782 		    hdlp->ih_inum)) != DDI_SUCCESS)
783 			return (ret);
784 
785 		for (i = 0; i < nintrs; i++, msi_num++) {
786 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
787 			    PCI_MSI_INVALID)) != DDI_SUCCESS)
788 				return (ret);
789 
790 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
791 			    &msiq_id)) != DDI_SUCCESS)
792 				return (ret);
793 
794 			if ((ret = px_ib_update_intr_state(px_p, rdip,
795 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
796 			    msiq_id), PX_INTR_STATE_DISABLE, MSI32_REC,
797 			    msi_num)) != DDI_SUCCESS)
798 				return (ret);
799 		}
800 
801 		break;
802 	case DDI_INTROP_SETMASK:
803 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
804 		break;
805 	case DDI_INTROP_CLRMASK:
806 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
807 		break;
808 	case DDI_INTROP_GETPENDING:
809 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
810 		    hdlp->ih_inum, (int *)result);
811 		break;
812 	case DDI_INTROP_NINTRS:
813 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
814 		break;
815 	case DDI_INTROP_NAVAIL:
816 		/* XXX - a new interface may be needed */
817 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
818 		break;
819 	case DDI_INTROP_SUPPORTED_TYPES:
820 		ret = pci_msi_get_supported_type(rdip, (int *)result);
821 		break;
822 	default:
823 		ret = DDI_ENOTSUP;
824 		break;
825 	}
826 
827 	return (ret);
828 }
829 
830 static struct {
831 	kstat_named_t pxintr_ks_name;
832 	kstat_named_t pxintr_ks_type;
833 	kstat_named_t pxintr_ks_cpu;
834 	kstat_named_t pxintr_ks_pil;
835 	kstat_named_t pxintr_ks_time;
836 	kstat_named_t pxintr_ks_ino;
837 	kstat_named_t pxintr_ks_cookie;
838 	kstat_named_t pxintr_ks_devpath;
839 	kstat_named_t pxintr_ks_buspath;
840 } pxintr_ks_template = {
841 	{ "name",	KSTAT_DATA_CHAR },
842 	{ "type",	KSTAT_DATA_CHAR },
843 	{ "cpu",	KSTAT_DATA_UINT64 },
844 	{ "pil",	KSTAT_DATA_UINT64 },
845 	{ "time",	KSTAT_DATA_UINT64 },
846 	{ "ino",	KSTAT_DATA_UINT64 },
847 	{ "cookie",	KSTAT_DATA_UINT64 },
848 	{ "devpath",	KSTAT_DATA_STRING },
849 	{ "buspath",	KSTAT_DATA_STRING },
850 };
851 
852 static uint32_t pxintr_ks_instance;
853 kmutex_t pxintr_ks_template_lock;
854 
855 int
856 px_ks_update(kstat_t *ksp, int rw)
857 {
858 	px_ih_t *ih_p = ksp->ks_private;
859 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
860 	px_ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
861 	px_t *px_p = ib_p->ib_px_p;
862 	devino_t ino;
863 	sysino_t sysino;
864 	char ih_devpath[MAXPATHLEN];
865 	char ih_buspath[MAXPATHLEN];
866 
867 	ino = ih_p->ih_ino_p->ino_ino;
868 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
869 
870 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
871 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
872 	    ddi_get_instance(ih_p->ih_dip));
873 
874 	(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
875 	    (ih_p->ih_rec_type == 0) ? "fixed" : "msi");
876 	pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ih_p->ih_ino_p->ino_cpuid;
877 	pxintr_ks_template.pxintr_ks_pil.value.ui64 = ih_p->ih_ino_p->ino_pil;
878 	pxintr_ks_template.pxintr_ks_time.value.ui64 =
879 	    ih_p->ih_nsec + (uint64_t)
880 	    tick2ns((hrtime_t)ih_p->ih_ticks, ih_p->ih_ino_p->ino_cpuid);
881 	pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
882 	pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
883 
884 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
885 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
886 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
887 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
888 
889 	return (0);
890 }
891 
892 void
893 px_create_intr_kstats(px_ih_t *ih_p)
894 {
895 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
896 
897 	ASSERT(ih_p->ih_ksp == NULL);
898 
899 	/*
900 	 * Create pci_intrs::: kstats for all ih types except messages,
901 	 * which represent unusual conditions and don't need to be tracked.
902 	 */
903 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
904 		ih_p->ih_ksp = kstat_create("pci_intrs",
905 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
906 		    "interrupts", KSTAT_TYPE_NAMED,
907 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
908 		    KSTAT_FLAG_VIRTUAL);
909 	}
910 	if (ih_p->ih_ksp != NULL) {
911 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
912 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
913 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
914 		ih_p->ih_ksp->ks_private = ih_p;
915 		ih_p->ih_ksp->ks_update = px_ks_update;
916 	}
917 }
918 
919 /*
920  * px_add_intx_intr:
921  *
922  * This function is called to register INTx and legacy hardware
923  * interrupt pins interrupts.
924  */
925 int
926 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
927     ddi_intr_handle_impl_t *hdlp)
928 {
929 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
930 	px_ib_t		*ib_p = px_p->px_ib_p;
931 	devino_t	ino;
932 	px_ih_t		*ih_p;
933 	px_ib_ino_info_t *ino_p;
934 	int32_t		weight;
935 	int		ret = DDI_SUCCESS;
936 
937 	ino = hdlp->ih_vector;
938 
939 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
940 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
941 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
942 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
943 
944 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
945 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
946 
947 	mutex_enter(&ib_p->ib_ino_lst_mutex);
948 
949 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
950 		uint32_t intr_index = hdlp->ih_inum;
951 		if (px_ib_ino_locate_intr(ino_p, rdip, intr_index, 0, 0)) {
952 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
953 			    "dup intr #%d\n", intr_index);
954 
955 			ret = DDI_FAILURE;
956 			goto fail1;
957 		}
958 
959 		/* Save mondo value in hdlp */
960 		hdlp->ih_vector = ino_p->ino_sysino;
961 
962 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
963 		    != DDI_SUCCESS)
964 			goto fail1;
965 	} else {
966 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
967 
968 		if (hdlp->ih_pri == 0)
969 			hdlp->ih_pri = px_class_to_pil(rdip);
970 
971 		/* Save mondo value in hdlp */
972 		hdlp->ih_vector = ino_p->ino_sysino;
973 
974 		DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
975 		    hdlp->ih_pri, hdlp->ih_vector);
976 
977 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
978 		    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ino_p, NULL);
979 
980 		ret = i_ddi_add_ivintr(hdlp);
981 
982 		/*
983 		 * Restore original interrupt handler
984 		 * and arguments in interrupt handle.
985 		 */
986 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
987 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
988 
989 		if (ret != DDI_SUCCESS)
990 			goto fail2;
991 
992 		/* Save the pil for this ino */
993 		ino_p->ino_pil = hdlp->ih_pri;
994 
995 		/* select cpu, saving it for sharing and removal */
996 		ino_p->ino_cpuid = intr_dist_cpuid();
997 
998 		/* Enable interrupt */
999 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1000 	}
1001 
1002 	/* add weight to the cpu that we are already targeting */
1003 	weight = px_class_to_intr_weight(rdip);
1004 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1005 
1006 	ih_p->ih_ino_p = ino_p;
1007 	px_create_intr_kstats(ih_p);
1008 	if (ih_p->ih_ksp)
1009 		kstat_install(ih_p->ih_ksp);
1010 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1011 
1012 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1013 	    ino_p->ino_sysino, hdlp->ih_pri);
1014 
1015 	return (ret);
1016 fail2:
1017 	px_ib_delete_ino(ib_p, ino_p);
1018 fail1:
1019 	if (ih_p->ih_config_handle)
1020 		pci_config_teardown(&ih_p->ih_config_handle);
1021 
1022 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1023 	kmem_free(ih_p, sizeof (px_ih_t));
1024 
1025 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1026 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1027 
1028 	return (ret);
1029 }
1030 
1031 /*
1032  * px_rem_intx_intr:
1033  *
1034  * This function is called to unregister INTx and legacy hardware
1035  * interrupt pins interrupts.
1036  */
1037 int
1038 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1039     ddi_intr_handle_impl_t *hdlp)
1040 {
1041 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1042 	px_ib_t		*ib_p = px_p->px_ib_p;
1043 	devino_t	ino;
1044 	cpuid_t		curr_cpu;
1045 	px_ib_ino_info_t	*ino_p;
1046 	px_ih_t		*ih_p;
1047 	int		ret = DDI_SUCCESS;
1048 
1049 	ino = hdlp->ih_vector;
1050 
1051 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1052 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1053 
1054 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1055 
1056 	ino_p = px_ib_locate_ino(ib_p, ino);
1057 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum, 0, 0);
1058 
1059 	/* Get the current cpu */
1060 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1061 	    &curr_cpu)) != DDI_SUCCESS)
1062 		goto fail;
1063 
1064 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1065 		goto fail;
1066 
1067 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1068 
1069 	if (ino_p->ino_ih_size == 0) {
1070 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1071 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1072 			goto fail;
1073 
1074 		hdlp->ih_vector = ino_p->ino_sysino;
1075 		i_ddi_rem_ivintr(hdlp);
1076 
1077 		px_ib_delete_ino(ib_p, ino_p);
1078 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1079 	} else {
1080 		/* Re-enable interrupt only if mapping regsiter still shared */
1081 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1082 	}
1083 
1084 fail:
1085 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1086 	return (ret);
1087 }
1088 
1089 /*
1090  * px_add_msiq_intr:
1091  *
1092  * This function is called to register MSI/Xs and PCIe message interrupts.
1093  */
1094 int
1095 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1096     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1097     msgcode_t msg_code, msiqid_t *msiq_id_p)
1098 {
1099 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1100 	px_ib_t		*ib_p = px_p->px_ib_p;
1101 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1102 	devino_t	ino;
1103 	px_ih_t		*ih_p;
1104 	px_ib_ino_info_t	*ino_p;
1105 	int32_t		weight;
1106 	int		ret = DDI_SUCCESS;
1107 
1108 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
1109 	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
1110 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1111 
1112 	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
1113 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1114 		    "msiq allocation failed\n");
1115 		return (ret);
1116 	}
1117 
1118 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1119 
1120 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1121 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1122 
1123 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1124 
1125 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
1126 		uint32_t intr_index = hdlp->ih_inum;
1127 		if (px_ib_ino_locate_intr(ino_p, rdip,
1128 		    intr_index, rec_type, msg_code)) {
1129 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1130 			    "dup intr #%d\n", intr_index);
1131 
1132 			ret = DDI_FAILURE;
1133 			goto fail1;
1134 		}
1135 
1136 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
1137 		    != DDI_SUCCESS)
1138 			goto fail1;
1139 	} else {
1140 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
1141 
1142 		ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1143 		    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1144 
1145 		if (hdlp->ih_pri == 0)
1146 			hdlp->ih_pri = px_class_to_pil(rdip);
1147 
1148 		/* Save mondo value in hdlp */
1149 		hdlp->ih_vector = ino_p->ino_sysino;
1150 
1151 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1152 		    hdlp->ih_pri, hdlp->ih_vector);
1153 
1154 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1155 		    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ino_p, NULL);
1156 
1157 		ret = i_ddi_add_ivintr(hdlp);
1158 
1159 		/*
1160 		 * Restore original interrupt handler
1161 		 * and arguments in interrupt handle.
1162 		 */
1163 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1164 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1165 
1166 		if (ret != DDI_SUCCESS)
1167 			goto fail2;
1168 
1169 		/* Save the pil for this ino */
1170 		ino_p->ino_pil = hdlp->ih_pri;
1171 
1172 		/* Enable MSIQ */
1173 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1174 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1175 
1176 		/* select cpu, saving it for sharing and removal */
1177 		ino_p->ino_cpuid = intr_dist_cpuid();
1178 
1179 		/* Enable interrupt */
1180 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino_p->ino_ino);
1181 	}
1182 
1183 	/* add weight to the cpu that we are already targeting */
1184 	weight = px_class_to_intr_weight(rdip);
1185 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1186 
1187 	ih_p->ih_ino_p = ino_p;
1188 	px_create_intr_kstats(ih_p);
1189 	if (ih_p->ih_ksp)
1190 		kstat_install(ih_p->ih_ksp);
1191 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1192 
1193 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1194 	    ino_p->ino_sysino, hdlp->ih_pri);
1195 
1196 	return (ret);
1197 fail2:
1198 	px_ib_delete_ino(ib_p, ino_p);
1199 fail1:
1200 	if (ih_p->ih_config_handle)
1201 		pci_config_teardown(&ih_p->ih_config_handle);
1202 
1203 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1204 	kmem_free(ih_p, sizeof (px_ih_t));
1205 
1206 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1207 	    ino_p->ino_sysino, hdlp->ih_pri);
1208 
1209 	return (ret);
1210 }
1211 
1212 /*
1213  * px_rem_msiq_intr:
1214  *
1215  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1216  */
1217 int
1218 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1219     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1220     msgcode_t msg_code, msiqid_t msiq_id)
1221 {
1222 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1223 	px_ib_t		*ib_p = px_p->px_ib_p;
1224 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1225 	cpuid_t		curr_cpu;
1226 	px_ib_ino_info_t *ino_p;
1227 	px_ih_t		*ih_p;
1228 	int		ret = DDI_SUCCESS;
1229 
1230 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1231 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1232 
1233 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1234 
1235 	ino_p = px_ib_locate_ino(ib_p, ino);
1236 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum,
1237 	    rec_type, msg_code);
1238 
1239 	/* Get the current cpu */
1240 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1241 	    &curr_cpu)) != DDI_SUCCESS)
1242 		goto fail;
1243 
1244 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1245 		goto fail;
1246 
1247 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1248 
1249 	if (ino_p->ino_ih_size == 0) {
1250 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1251 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1252 			goto fail;
1253 
1254 		px_lib_msiq_setvalid(dip, px_devino_to_msiqid(px_p, ino),
1255 		    PCI_MSIQ_INVALID);
1256 
1257 		hdlp->ih_vector = ino_p->ino_sysino;
1258 		i_ddi_rem_ivintr(hdlp);
1259 
1260 		px_ib_delete_ino(ib_p, ino_p);
1261 
1262 		(void) px_msiq_free(px_p, msiq_id);
1263 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1264 	} else {
1265 		/* Re-enable interrupt only if mapping regsiter still shared */
1266 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1267 	}
1268 
1269 fail:
1270 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1271 	return (ret);
1272 }
1273