xref: /illumos-gate/usr/src/uts/sun4/io/px/px_intr.c (revision d1a180b0452ce86577a43be3245d2eacdeec1a34)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX nexus interrupt handling:
31  *	PX device interrupt handler wrapper
32  *	PIL lookup routine
33  *	PX device interrupt related initchild code
34  */
35 
36 #include <sys/types.h>
37 #include <sys/kmem.h>
38 #include <sys/async.h>
39 #include <sys/spl.h>
40 #include <sys/sunddi.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/fm/util.h>
43 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
44 #include <sys/ddi_impldefs.h>
45 #include <sys/sdt.h>
46 #include <sys/atomic.h>
47 #include "px_obj.h"
48 #include <sys/ontrap.h>
49 #include <sys/membar.h>
50 #include <sys/clock.h>
51 
52 /*
53  * interrupt jabber:
54  *
55  * When an interrupt line is jabbering, every time the state machine for the
56  * associated ino is idled, a new mondo will be sent and the ino will go into
57  * the pending state again. The mondo will cause a new call to
58  * px_intr_wrapper() which normally idles the ino's state machine which would
59  * precipitate another trip round the loop.
60  *
61  * The loop can be broken by preventing the ino's state machine from being
62  * idled when an interrupt line is jabbering. See the comment at the
63  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
64  * protection' code does this.
65  */
66 
67 /*LINTLIBRARY*/
68 
69 /*
70  * If the unclaimed interrupt count has reached the limit set by
71  * pci_unclaimed_intr_max within the time limit, then all interrupts
72  * on this ino is blocked by not idling the interrupt state machine.
73  */
74 static int
75 px_spurintr(px_ib_ino_info_t *ino_p)
76 {
77 	px_ih_t	*ih_p = ino_p->ino_ih_start;
78 	px_t	*px_p = ino_p->ino_ib_p->ib_px_p;
79 	char	*err_fmt_str;
80 	int	i;
81 
82 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max)
83 		return (DDI_INTR_CLAIMED);
84 
85 	if (!ino_p->ino_unclaimed)
86 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
87 
88 	ino_p->ino_unclaimed++;
89 
90 	if (ino_p->ino_unclaimed <= px_unclaimed_intr_max)
91 		goto clear;
92 
93 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
94 	    > px_spurintr_duration) {
95 		ino_p->ino_unclaimed = 0;
96 		goto clear;
97 	}
98 	err_fmt_str = "%s%d: ino 0x%x blocked";
99 	goto warn;
100 clear:
101 	/* Clear the pending state */
102 	if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
103 	    INTR_IDLE_STATE) != DDI_SUCCESS)
104 		return (DDI_INTR_UNCLAIMED);
105 
106 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
107 warn:
108 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
109 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next)
110 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
111 		    ih_p->ih_inum);
112 	cmn_err(CE_CONT, "!\n");
113 	return (DDI_INTR_CLAIMED);
114 }
115 
116 extern uint64_t intr_get_time(void);
117 
118 /*
119  * px_intx_intr (INTx or legacy interrupt handler)
120  *
121  * This routine is used as wrapper around interrupt handlers installed by child
122  * device drivers.  This routine invokes the driver interrupt handlers and
123  * examines the return codes.
124  *
125  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
126  * least one handler claims the interrupt then the counter is halved and the
127  * interrupt state machine is idled. If no handler claims the interrupt then
128  * the counter is incremented by one and the state machine is idled.
129  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
130  * then the interrupt state machine is not idled thus preventing any further
131  * interrupts on that ino. The state machine will only be idled again if a
132  * handler is subsequently added or removed.
133  *
134  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
135  * DDI_INTR_UNCLAIMED otherwise.
136  */
137 uint_t
138 px_intx_intr(caddr_t arg)
139 {
140 	px_ib_ino_info_t *ino_p = (px_ib_ino_info_t *)arg;
141 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
142 	px_ih_t		*ih_p = ino_p->ino_ih_start;
143 	uint_t		result = 0, r;
144 	int		i;
145 
146 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
147 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
148 	    ino_p->ino_ino, ino_p->ino_sysino, ino_p->ino_pil,
149 	    ino_p->ino_ih_size, ino_p->ino_ih_head);
150 
151 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next) {
152 		dev_info_t *dip = ih_p->ih_dip;
153 		uint_t (*handler)() = ih_p->ih_handler;
154 		caddr_t arg1 = ih_p->ih_handler_arg1;
155 		caddr_t arg2 = ih_p->ih_handler_arg2;
156 
157 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
158 			DBG(DBG_INTX_INTR, px_p->px_dip,
159 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
160 			    ddi_driver_name(dip), ddi_get_instance(dip),
161 			    ino_p->ino_ino);
162 
163 			continue;
164 		}
165 
166 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
167 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
168 		    ino_p->ino_ino, handler, arg1, arg2);
169 
170 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
171 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
172 
173 		r = (*handler)(arg1, arg2);
174 
175 		/*
176 		 * Account for time used by this interrupt. Protect against
177 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
178 		 * using atomic ops.
179 		 */
180 
181 		if (ino_p->ino_pil <= LOCK_LEVEL)
182 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
183 
184 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
185 		    void *, handler, caddr_t, arg1, int, r);
186 
187 		result += r;
188 
189 		if (px_check_all_handlers)
190 			continue;
191 		if (result)
192 			break;
193 	}
194 
195 	if (!result && px_unclaimed_intr_block)
196 		return (px_spurintr(ino_p));
197 
198 	ino_p->ino_unclaimed = 0;
199 
200 	/* Clear the pending state */
201 	if (px_lib_intr_setstate(ino_p->ino_ib_p->ib_px_p->px_dip,
202 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
203 		return (DDI_INTR_UNCLAIMED);
204 
205 	return (DDI_INTR_CLAIMED);
206 }
207 
208 /*
209  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
210  *
211  * This routine is used as wrapper around interrupt handlers installed by child
212  * device drivers.  This routine invokes the driver interrupt handlers and
213  * examines the return codes.
214  *
215  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
216  * least one handler claims the interrupt then the counter is halved and the
217  * interrupt state machine is idled. If no handler claims the interrupt then
218  * the counter is incremented by one and the state machine is idled.
219  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
220  * then the interrupt state machine is not idled thus preventing any further
221  * interrupts on that ino. The state machine will only be idled again if a
222  * handler is subsequently added or removed.
223  *
224  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
225  * DDI_INTR_UNCLAIMED otherwise.
226  */
227 uint_t
228 px_msiq_intr(caddr_t arg)
229 {
230 	px_ib_ino_info_t	*ino_p = (px_ib_ino_info_t *)arg;
231 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
232 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
233 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
234 	dev_info_t	*dip = px_p->px_dip;
235 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
236 	msiqhead_t	curr_msiq_rec_cnt, new_msiq_rec_cnt;
237 	msgcode_t	msg_code;
238 	px_ih_t		*ih_p;
239 	int		ret;
240 
241 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
242 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
243 	    ino_p->ino_pil, ino_p->ino_ih_size, ino_p->ino_ih_head);
244 
245 	/* Read current MSIQ head index */
246 	px_lib_msiq_gethead(dip, msiq_p->msiq_id, &curr_msiq_rec_cnt);
247 	msiq_p->msiq_curr = (uint64_t)((caddr_t)msiq_p->msiq_base +
248 	    curr_msiq_rec_cnt * sizeof (msiq_rec_t));
249 	new_msiq_rec_cnt = curr_msiq_rec_cnt;
250 
251 	/* Read next MSIQ record */
252 	px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
253 
254 	/*
255 	 * Process current MSIQ record as long as record type
256 	 * field is non-zero.
257 	 */
258 	while (msiq_rec_p->msiq_rec_type) {
259 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
260 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
261 		    msiq_rec_p->msiq_rec_type, msiq_rec_p->msiq_rec_rid);
262 
263 		/* Get the pointer next EQ record */
264 		msiq_p->msiq_curr = (uint64_t)
265 		    ((caddr_t)msiq_p->msiq_curr + sizeof (msiq_rec_t));
266 
267 		/* Check for overflow condition */
268 		if (msiq_p->msiq_curr >= (uint64_t)((caddr_t)msiq_p->msiq_base +
269 		    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t)))
270 			msiq_p->msiq_curr = msiq_p->msiq_base;
271 
272 		/* Check MSIQ record type */
273 		switch (msiq_rec_p->msiq_rec_type) {
274 		case MSG_REC:
275 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
276 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
277 			    "record, msg type 0x%x\n", msg_code);
278 			break;
279 		case MSI32_REC:
280 		case MSI64_REC:
281 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
282 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
283 			    "msi 0x%x\n", msg_code);
284 
285 			/* Clear MSI state */
286 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
287 			    PCI_MSI_STATE_IDLE);
288 			break;
289 		default:
290 			msg_code = 0;
291 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
292 			    "record type is not supported",
293 			    ddi_driver_name(dip), ddi_get_instance(dip),
294 			    msiq_rec_p->msiq_rec_type);
295 			goto next_rec;
296 		}
297 
298 		ih_p = ino_p->ino_ih_start;
299 
300 		/*
301 		 * Scan through px_ih_t linked list, searching for the
302 		 * right px_ih_t, matching MSIQ record data.
303 		 */
304 		while ((ih_p) && (ih_p->ih_msg_code != msg_code) &&
305 		    (ih_p->ih_rec_type != msiq_rec_p->msiq_rec_type))
306 			ih_p = ih_p->ih_next;
307 
308 		if ((ih_p->ih_msg_code == msg_code) &&
309 		    (ih_p->ih_rec_type == msiq_rec_p->msiq_rec_type)) {
310 			dev_info_t *dip = ih_p->ih_dip;
311 			uint_t (*handler)() = ih_p->ih_handler;
312 			caddr_t arg1 = ih_p->ih_handler_arg1;
313 			caddr_t arg2 = ih_p->ih_handler_arg2;
314 
315 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
316 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
317 			    msg_code, handler, arg1, arg2);
318 
319 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
320 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
321 
322 			/*
323 			 * Special case for PCIE Error Messages.
324 			 * The current frame work doesn't fit PCIE Err Msgs
325 			 * This should be fixed when PCIE MESSAGES as a whole
326 			 * is architected correctly.
327 			 */
328 			if ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
329 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
330 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL)) {
331 				ret = px_err_fabric_intr(px_p, msg_code,
332 				    msiq_rec_p->msiq_rec_rid);
333 			} else
334 				ret = (*handler)(arg1, arg2);
335 
336 			/*
337 			 * Account for time used by this interrupt. Protect
338 			 * against conflicting writes to ih_ticks from
339 			 * ib_intr_dist_all() by using atomic ops.
340 			 */
341 
342 			if (ino_p->ino_pil <= LOCK_LEVEL)
343 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
344 
345 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
346 			    void *, handler, caddr_t, arg1, int, ret);
347 		} else {
348 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
349 			    "Not found matching MSIQ record\n");
350 
351 			/* px_spurintr(ino_p); */
352 			ino_p->ino_unclaimed++;
353 		}
354 
355 next_rec:
356 		new_msiq_rec_cnt++;
357 
358 		/* Zero out msiq_rec_type field */
359 		msiq_rec_p->msiq_rec_type = 0;
360 
361 		/* Read next MSIQ record */
362 		px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
363 	}
364 
365 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
366 	    (new_msiq_rec_cnt - curr_msiq_rec_cnt));
367 
368 	/*  Update MSIQ head index with no of MSIQ records processed */
369 	if (new_msiq_rec_cnt > curr_msiq_rec_cnt)  {
370 		if (new_msiq_rec_cnt >= msiq_state_p->msiq_rec_cnt)
371 			new_msiq_rec_cnt -= msiq_state_p->msiq_rec_cnt;
372 
373 		px_lib_msiq_sethead(dip, msiq_p->msiq_id, new_msiq_rec_cnt);
374 	}
375 
376 	/* Clear the pending state */
377 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
378 	    INTR_IDLE_STATE) != DDI_SUCCESS)
379 		return (DDI_INTR_UNCLAIMED);
380 
381 	return (DDI_INTR_CLAIMED);
382 }
383 
384 dev_info_t *
385 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
386 {
387 	dev_info_t	*cdip = rdip;
388 
389 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
390 		;
391 
392 	return (cdip);
393 }
394 
395 /* Default class to pil value mapping */
396 px_class_val_t px_default_pil [] = {
397 	{0x000000, 0xff0000, 0x1},	/* Class code for pre-2.0 devices */
398 	{0x010000, 0xff0000, 0x4},	/* Mass Storage Controller */
399 	{0x020000, 0xff0000, 0x6},	/* Network Controller */
400 	{0x030000, 0xff0000, 0x9},	/* Display Controller */
401 	{0x040000, 0xff0000, 0x9},	/* Multimedia Controller */
402 	{0x050000, 0xff0000, 0xb},	/* Memory Controller */
403 	{0x060000, 0xff0000, 0xb},	/* Bridge Controller */
404 	{0x0c0000, 0xffff00, 0x9},	/* Serial Bus, FireWire (IEEE 1394) */
405 	{0x0c0100, 0xffff00, 0x4},	/* Serial Bus, ACCESS.bus */
406 	{0x0c0200, 0xffff00, 0x4},	/* Serial Bus, SSA */
407 	{0x0c0300, 0xffff00, 0x9},	/* Serial Bus Universal Serial Bus */
408 	{0x0c0400, 0xffff00, 0x6},	/* Serial Bus, Fibre Channel */
409 	{0x0c0600, 0xffff00, 0x6}	/* Serial Bus, Infiniband */
410 };
411 
412 /*
413  * Default class to intr_weight value mapping (% of CPU).  A driver.conf
414  * entry on or above the pci node like
415  *
416  *	pci-class-intr-weights= 0x020000, 0xff0000, 30;
417  *
418  * can be used to augment or override entries in the default table below.
419  *
420  * NB: The values below give NICs preference on redistribution, and provide
421  * NICs some isolation from other interrupt sources. We need better interfaces
422  * that allow the NIC driver to identify a specific NIC instance as high
423  * bandwidth, and thus deserving of separation from other low bandwidth
424  * NICs additional isolation from other interrupt sources.
425  *
426  * NB: We treat Infiniband like a NIC.
427  */
428 px_class_val_t px_default_intr_weight [] = {
429 	{0x020000, 0xff0000, 35},	/* Network Controller */
430 	{0x010000, 0xff0000, 10},	/* Mass Storage Controller */
431 	{0x0c0400, 0xffff00, 10},	/* Serial Bus, Fibre Channel */
432 	{0x0c0600, 0xffff00, 50}	/* Serial Bus, Infiniband */
433 };
434 
435 static uint32_t
436 px_match_class_val(uint32_t key, px_class_val_t *rec_p, int nrec,
437     uint32_t default_val)
438 {
439 	int	i;
440 
441 	for (i = 0; i < nrec; rec_p++, i++) {
442 		if ((rec_p->class_code & rec_p->class_mask) ==
443 		    (key & rec_p->class_mask))
444 			return (rec_p->class_val);
445 	}
446 
447 	return (default_val);
448 }
449 
450 /*
451  * px_class_to_val
452  *
453  * Return the configuration value, based on class code and sub class code,
454  * from the specified property based or default px_class_val_t table.
455  */
456 uint32_t
457 px_class_to_val(dev_info_t *rdip, char *property_name, px_class_val_t *rec_p,
458     int nrec, uint32_t default_val)
459 {
460 	int property_len;
461 	uint32_t class_code;
462 	px_class_val_t *conf;
463 	uint32_t val = default_val;
464 
465 	/*
466 	 * Use the "class-code" property to get the base and sub class
467 	 * codes for the requesting device.
468 	 */
469 	class_code = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, rdip,
470 	    DDI_PROP_DONTPASS, "class-code", -1);
471 
472 	if (class_code == -1)
473 		return (val);
474 
475 	/* look up the val from the default table */
476 	val = px_match_class_val(class_code, rec_p, nrec, val);
477 
478 	/* see if there is a more specific property specified value */
479 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
480 	    property_name, (caddr_t)&conf, &property_len))
481 		return (val);
482 
483 	if ((property_len % sizeof (px_class_val_t)) == 0)
484 		val = px_match_class_val(class_code, conf,
485 		    property_len / sizeof (px_class_val_t), val);
486 	kmem_free(conf, property_len);
487 	return (val);
488 }
489 
490 /* px_class_to_pil: return the pil for a given device. */
491 uint32_t
492 px_class_to_pil(dev_info_t *rdip)
493 {
494 	uint32_t pil;
495 
496 	/* default pil is 0 (uninitialized) */
497 	pil = px_class_to_val(rdip,
498 	    "pci-class-priorities", px_default_pil,
499 	    sizeof (px_default_pil) / sizeof (px_class_val_t), 0);
500 
501 	/* range check the result */
502 	if (pil >= 0xf)
503 		pil = 0;
504 
505 	return (pil);
506 }
507 
508 /* px_class_to_intr_weight: return the intr_weight for a given device. */
509 static int32_t
510 px_class_to_intr_weight(dev_info_t *rdip)
511 {
512 	int32_t intr_weight;
513 
514 	/* default weight is 0% */
515 	intr_weight = px_class_to_val(rdip,
516 	    "pci-class-intr-weights", px_default_intr_weight,
517 	    sizeof (px_default_intr_weight) / sizeof (px_class_val_t), 0);
518 
519 	/* range check the result */
520 	if (intr_weight < 0)
521 		intr_weight = 0;
522 	if (intr_weight > 1000)
523 		intr_weight = 1000;
524 
525 	return (intr_weight);
526 }
527 
528 /* ARGSUSED */
529 int
530 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
531     ddi_intr_handle_impl_t *hdlp, void *result)
532 {
533 	px_t	*px_p = DIP_TO_STATE(dip);
534 	int	ret = DDI_SUCCESS;
535 
536 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
537 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
538 
539 	switch (intr_op) {
540 	case DDI_INTROP_GETCAP:
541 		ret = pci_intx_get_cap(rdip, (int *)result);
542 		break;
543 	case DDI_INTROP_SETCAP:
544 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
545 		ret = DDI_ENOTSUP;
546 		break;
547 	case DDI_INTROP_ALLOC:
548 		*(int *)result = hdlp->ih_scratch1;
549 		break;
550 	case DDI_INTROP_FREE:
551 		break;
552 	case DDI_INTROP_GETPRI:
553 		*(int *)result = hdlp->ih_pri ?
554 		    hdlp->ih_pri : px_class_to_pil(rdip);
555 		break;
556 	case DDI_INTROP_SETPRI:
557 		break;
558 	case DDI_INTROP_ADDISR:
559 		ret = px_add_intx_intr(dip, rdip, hdlp);
560 		break;
561 	case DDI_INTROP_REMISR:
562 		ret = px_rem_intx_intr(dip, rdip, hdlp);
563 		break;
564 	case DDI_INTROP_ENABLE:
565 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
566 		    hdlp->ih_vector, PX_INTR_STATE_ENABLE, 0, 0);
567 		break;
568 	case DDI_INTROP_DISABLE:
569 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
570 		    hdlp->ih_vector, PX_INTR_STATE_DISABLE, 0, 0);
571 		break;
572 	case DDI_INTROP_SETMASK:
573 		ret = pci_intx_set_mask(rdip);
574 		break;
575 	case DDI_INTROP_CLRMASK:
576 		ret = pci_intx_clr_mask(rdip);
577 		break;
578 	case DDI_INTROP_GETPENDING:
579 		ret = pci_intx_get_pending(rdip, (int *)result);
580 		break;
581 	case DDI_INTROP_NINTRS:
582 	case DDI_INTROP_NAVAIL:
583 		*(int *)result = i_ddi_get_nintrs(rdip);
584 		break;
585 	default:
586 		ret = DDI_ENOTSUP;
587 		break;
588 	}
589 
590 	return (ret);
591 }
592 
593 /* ARGSUSED */
594 int
595 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
596     ddi_intr_handle_impl_t *hdlp, void *result)
597 {
598 	px_t			*px_p = DIP_TO_STATE(dip);
599 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
600 	msiq_rec_type_t		msiq_rec_type;
601 	msi_type_t		msi_type;
602 	uint64_t		msi_addr;
603 	msinum_t		msi_num;
604 	msiqid_t		msiq_id;
605 	uint_t			nintrs;
606 	int			i, ret = DDI_SUCCESS;
607 
608 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
609 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
610 
611 	/* Check for MSI64 support */
612 	if (hdlp->ih_cap & DDI_INTR_FLAG_MSI64) {
613 		msiq_rec_type = MSI64_REC;
614 		msi_type = MSI64_TYPE;
615 		msi_addr = msi_state_p->msi_addr64 ?
616 		    msi_state_p->msi_addr64:msi_state_p->msi_addr32;
617 	} else {
618 		msiq_rec_type = MSI32_REC;
619 		msi_type = MSI32_TYPE;
620 		msi_addr = msi_state_p->msi_addr32;
621 	}
622 
623 	switch (intr_op) {
624 	case DDI_INTROP_GETCAP:
625 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
626 		break;
627 	case DDI_INTROP_SETCAP:
628 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
629 		ret = DDI_ENOTSUP;
630 		break;
631 	case DDI_INTROP_ALLOC:
632 		/*
633 		 * We need to restrict this allocation in future
634 		 * based on Resource Management policies.
635 		 */
636 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_inum,
637 		    hdlp->ih_scratch1, hdlp->ih_scratch2, &msi_num,
638 		    (int *)result)) != DDI_SUCCESS) {
639 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI allocation "
640 			    "failed, rdip 0x%p inum 0x%x count 0x%x\n",
641 			    rdip, hdlp->ih_inum, hdlp->ih_scratch1);
642 
643 			return (ret);
644 		}
645 
646 		break;
647 	case DDI_INTROP_FREE:
648 		(void) pci_msi_disable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
649 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
650 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
651 		    hdlp->ih_scratch1);
652 		break;
653 	case DDI_INTROP_GETPRI:
654 		*(int *)result = hdlp->ih_pri ?
655 		    hdlp->ih_pri : px_class_to_pil(rdip);
656 		break;
657 	case DDI_INTROP_SETPRI:
658 		break;
659 	case DDI_INTROP_ADDISR:
660 		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
661 		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
662 			return (ret);
663 
664 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
665 		    msiq_rec_type, msi_num, &msiq_id)) != DDI_SUCCESS) {
666 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
667 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
668 			return (ret);
669 		}
670 
671 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
672 
673 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
674 		    msiq_id, msi_type)) != DDI_SUCCESS) {
675 			(void) px_rem_msiq_intr(dip, rdip,
676 			    hdlp, msiq_rec_type, msi_num, msiq_id);
677 			return (ret);
678 		}
679 
680 		if ((ret = px_lib_msi_setstate(dip, msi_num,
681 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
682 			(void) px_rem_msiq_intr(dip, rdip,
683 			    hdlp, msiq_rec_type, msi_num, msiq_id);
684 			return (ret);
685 		}
686 
687 		hdlp->ih_vector = msi_num;
688 		break;
689 	case DDI_INTROP_DUPVEC:
690 		DBG(DBG_INTROPS, dip, "px_msix_ops: DupIsr is not supported\n");
691 		ret = DDI_ENOTSUP;
692 		break;
693 	case DDI_INTROP_REMISR:
694 		msi_num = hdlp->ih_vector;
695 
696 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
697 		    &msiq_id)) != DDI_SUCCESS)
698 			return (ret);
699 
700 		if ((ret = px_lib_msi_setstate(dip, msi_num,
701 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
702 			return (ret);
703 
704 		ret = px_rem_msiq_intr(dip, rdip,
705 		    hdlp, msiq_rec_type, msi_num, msiq_id);
706 
707 		hdlp->ih_vector = 0;
708 		break;
709 	case DDI_INTROP_ENABLE:
710 		msi_num = hdlp->ih_vector;
711 
712 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
713 		    PCI_MSI_VALID)) != DDI_SUCCESS)
714 			return (ret);
715 
716 		if (pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) {
717 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
718 
719 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
720 			    nintrs, hdlp->ih_inum, msi_addr,
721 			    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
722 				return (ret);
723 
724 			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type,
725 			    hdlp->ih_inum)) != DDI_SUCCESS)
726 				return (ret);
727 		}
728 
729 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
730 		    hdlp->ih_inum)) != DDI_SUCCESS)
731 			return (ret);
732 
733 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
734 		    &msiq_id)) != DDI_SUCCESS)
735 			return (ret);
736 
737 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
738 		    px_msiqid_to_devino(px_p, msiq_id), PX_INTR_STATE_ENABLE,
739 		    msiq_rec_type, msi_num);
740 
741 		break;
742 	case DDI_INTROP_DISABLE:
743 		msi_num = hdlp->ih_vector;
744 
745 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
746 		    hdlp->ih_inum)) != DDI_SUCCESS)
747 			return (ret);
748 
749 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
750 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
751 			return (ret);
752 
753 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
754 		    &msiq_id)) != DDI_SUCCESS)
755 			return (ret);
756 
757 		ret = px_ib_update_intr_state(px_p, rdip,
758 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
759 		    PX_INTR_STATE_DISABLE, msiq_rec_type, msi_num);
760 
761 		break;
762 	case DDI_INTROP_BLOCKENABLE:
763 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
764 		msi_num = hdlp->ih_vector;
765 
766 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
767 		    nintrs, hdlp->ih_inum, msi_addr,
768 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
769 			return (ret);
770 
771 		for (i = 0; i < nintrs; i++, msi_num++) {
772 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
773 			    PCI_MSI_VALID)) != DDI_SUCCESS)
774 				return (ret);
775 
776 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
777 			    &msiq_id)) != DDI_SUCCESS)
778 				return (ret);
779 
780 			if ((ret = px_ib_update_intr_state(px_p, rdip,
781 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
782 			    msiq_id), PX_INTR_STATE_ENABLE, msiq_rec_type,
783 			    msi_num)) != DDI_SUCCESS)
784 				return (ret);
785 		}
786 
787 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
788 		break;
789 	case DDI_INTROP_BLOCKDISABLE:
790 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
791 		msi_num = hdlp->ih_vector;
792 
793 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
794 		    hdlp->ih_inum)) != DDI_SUCCESS)
795 			return (ret);
796 
797 		for (i = 0; i < nintrs; i++, msi_num++) {
798 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
799 			    PCI_MSI_INVALID)) != DDI_SUCCESS)
800 				return (ret);
801 
802 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
803 			    &msiq_id)) != DDI_SUCCESS)
804 				return (ret);
805 
806 			if ((ret = px_ib_update_intr_state(px_p, rdip,
807 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
808 			    msiq_id), PX_INTR_STATE_DISABLE, msiq_rec_type,
809 			    msi_num)) != DDI_SUCCESS)
810 				return (ret);
811 		}
812 
813 		break;
814 	case DDI_INTROP_SETMASK:
815 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
816 		break;
817 	case DDI_INTROP_CLRMASK:
818 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
819 		break;
820 	case DDI_INTROP_GETPENDING:
821 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
822 		    hdlp->ih_inum, (int *)result);
823 		break;
824 	case DDI_INTROP_NINTRS:
825 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
826 		break;
827 	case DDI_INTROP_NAVAIL:
828 		/* XXX - a new interface may be needed */
829 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
830 		break;
831 	default:
832 		ret = DDI_ENOTSUP;
833 		break;
834 	}
835 
836 	return (ret);
837 }
838 
839 static struct {
840 	kstat_named_t pxintr_ks_name;
841 	kstat_named_t pxintr_ks_type;
842 	kstat_named_t pxintr_ks_cpu;
843 	kstat_named_t pxintr_ks_pil;
844 	kstat_named_t pxintr_ks_time;
845 	kstat_named_t pxintr_ks_ino;
846 	kstat_named_t pxintr_ks_cookie;
847 	kstat_named_t pxintr_ks_devpath;
848 	kstat_named_t pxintr_ks_buspath;
849 } pxintr_ks_template = {
850 	{ "name",	KSTAT_DATA_CHAR },
851 	{ "type",	KSTAT_DATA_CHAR },
852 	{ "cpu",	KSTAT_DATA_UINT64 },
853 	{ "pil",	KSTAT_DATA_UINT64 },
854 	{ "time",	KSTAT_DATA_UINT64 },
855 	{ "ino",	KSTAT_DATA_UINT64 },
856 	{ "cookie",	KSTAT_DATA_UINT64 },
857 	{ "devpath",	KSTAT_DATA_STRING },
858 	{ "buspath",	KSTAT_DATA_STRING },
859 };
860 
861 static uint32_t pxintr_ks_instance;
862 kmutex_t pxintr_ks_template_lock;
863 
864 int
865 px_ks_update(kstat_t *ksp, int rw)
866 {
867 	px_ih_t *ih_p = ksp->ks_private;
868 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
869 	px_ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
870 	px_t *px_p = ib_p->ib_px_p;
871 	devino_t ino;
872 	sysino_t sysino;
873 	char ih_devpath[MAXPATHLEN];
874 	char ih_buspath[MAXPATHLEN];
875 
876 	ino = ih_p->ih_ino_p->ino_ino;
877 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
878 
879 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
880 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
881 	    ddi_get_instance(ih_p->ih_dip));
882 
883 	(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
884 	    (ih_p->ih_rec_type == 0) ? "fixed" : "msi");
885 	pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ih_p->ih_ino_p->ino_cpuid;
886 	pxintr_ks_template.pxintr_ks_pil.value.ui64 = ih_p->ih_ino_p->ino_pil;
887 	pxintr_ks_template.pxintr_ks_time.value.ui64 =
888 	    ih_p->ih_nsec + (uint64_t)
889 	    tick2ns((hrtime_t)ih_p->ih_ticks, ih_p->ih_ino_p->ino_cpuid);
890 	pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
891 	pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
892 
893 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
894 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
895 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
896 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
897 
898 	return (0);
899 }
900 
901 void
902 px_create_intr_kstats(px_ih_t *ih_p)
903 {
904 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
905 
906 	ASSERT(ih_p->ih_ksp == NULL);
907 
908 	/*
909 	 * Create pci_intrs::: kstats for all ih types except messages,
910 	 * which represent unusual conditions and don't need to be tracked.
911 	 */
912 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
913 		ih_p->ih_ksp = kstat_create("pci_intrs",
914 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
915 		    "interrupts", KSTAT_TYPE_NAMED,
916 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
917 		    KSTAT_FLAG_VIRTUAL);
918 	}
919 	if (ih_p->ih_ksp != NULL) {
920 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
921 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
922 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
923 		ih_p->ih_ksp->ks_private = ih_p;
924 		ih_p->ih_ksp->ks_update = px_ks_update;
925 	}
926 }
927 
928 /*
929  * px_add_intx_intr:
930  *
931  * This function is called to register INTx and legacy hardware
932  * interrupt pins interrupts.
933  */
934 int
935 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
936     ddi_intr_handle_impl_t *hdlp)
937 {
938 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
939 	px_ib_t		*ib_p = px_p->px_ib_p;
940 	devino_t	ino;
941 	px_ih_t		*ih_p;
942 	px_ib_ino_info_t *ino_p;
943 	int32_t		weight;
944 	int		ret = DDI_SUCCESS;
945 
946 	ino = hdlp->ih_vector;
947 
948 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
949 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
950 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
951 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
952 
953 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
954 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
955 
956 	mutex_enter(&ib_p->ib_ino_lst_mutex);
957 
958 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
959 		uint32_t intr_index = hdlp->ih_inum;
960 		if (px_ib_ino_locate_intr(ino_p, rdip, intr_index, 0, 0)) {
961 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
962 			    "dup intr #%d\n", intr_index);
963 
964 			ret = DDI_FAILURE;
965 			goto fail1;
966 		}
967 
968 		/* Save mondo value in hdlp */
969 		hdlp->ih_vector = ino_p->ino_sysino;
970 
971 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
972 		    != DDI_SUCCESS)
973 			goto fail1;
974 	} else {
975 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
976 
977 		if (hdlp->ih_pri == 0)
978 			hdlp->ih_pri = px_class_to_pil(rdip);
979 
980 		/* Save mondo value in hdlp */
981 		hdlp->ih_vector = ino_p->ino_sysino;
982 
983 		DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
984 		    hdlp->ih_pri, hdlp->ih_vector);
985 
986 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
987 		    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ino_p, NULL);
988 
989 		ret = i_ddi_add_ivintr(hdlp);
990 
991 		/*
992 		 * Restore original interrupt handler
993 		 * and arguments in interrupt handle.
994 		 */
995 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
996 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
997 
998 		if (ret != DDI_SUCCESS)
999 			goto fail2;
1000 
1001 		/* Save the pil for this ino */
1002 		ino_p->ino_pil = hdlp->ih_pri;
1003 
1004 		/* select cpu, saving it for sharing and removal */
1005 		ino_p->ino_cpuid = intr_dist_cpuid();
1006 
1007 		/* Enable interrupt */
1008 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1009 	}
1010 
1011 	/* add weight to the cpu that we are already targeting */
1012 	weight = px_class_to_intr_weight(rdip);
1013 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1014 
1015 	ih_p->ih_ino_p = ino_p;
1016 	px_create_intr_kstats(ih_p);
1017 	if (ih_p->ih_ksp)
1018 		kstat_install(ih_p->ih_ksp);
1019 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1020 
1021 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1022 	    ino_p->ino_sysino, hdlp->ih_pri);
1023 
1024 	return (ret);
1025 fail2:
1026 	px_ib_delete_ino(ib_p, ino_p);
1027 fail1:
1028 	if (ih_p->ih_config_handle)
1029 		pci_config_teardown(&ih_p->ih_config_handle);
1030 
1031 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1032 	kmem_free(ih_p, sizeof (px_ih_t));
1033 
1034 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1035 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1036 
1037 	return (ret);
1038 }
1039 
1040 /*
1041  * px_rem_intx_intr:
1042  *
1043  * This function is called to unregister INTx and legacy hardware
1044  * interrupt pins interrupts.
1045  */
1046 int
1047 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1048     ddi_intr_handle_impl_t *hdlp)
1049 {
1050 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1051 	px_ib_t		*ib_p = px_p->px_ib_p;
1052 	devino_t	ino;
1053 	cpuid_t		curr_cpu;
1054 	px_ib_ino_info_t	*ino_p;
1055 	px_ih_t		*ih_p;
1056 	int		ret = DDI_SUCCESS;
1057 
1058 	ino = hdlp->ih_vector;
1059 
1060 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1061 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1062 
1063 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1064 
1065 	ino_p = px_ib_locate_ino(ib_p, ino);
1066 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum, 0, 0);
1067 
1068 	/* Get the current cpu */
1069 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1070 	    &curr_cpu)) != DDI_SUCCESS)
1071 		goto fail;
1072 
1073 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1074 		goto fail;
1075 
1076 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1077 
1078 	if (ino_p->ino_ih_size == 0) {
1079 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1080 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1081 			goto fail;
1082 
1083 		hdlp->ih_vector = ino_p->ino_sysino;
1084 		i_ddi_rem_ivintr(hdlp);
1085 
1086 		px_ib_delete_ino(ib_p, ino_p);
1087 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1088 	} else {
1089 		/* Re-enable interrupt only if mapping regsiter still shared */
1090 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1091 	}
1092 
1093 fail:
1094 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1095 	return (ret);
1096 }
1097 
1098 /*
1099  * px_add_msiq_intr:
1100  *
1101  * This function is called to register MSI/Xs and PCIe message interrupts.
1102  */
1103 int
1104 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1105     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1106     msgcode_t msg_code, msiqid_t *msiq_id_p)
1107 {
1108 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1109 	px_ib_t		*ib_p = px_p->px_ib_p;
1110 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1111 	devino_t	ino;
1112 	px_ih_t		*ih_p;
1113 	px_ib_ino_info_t	*ino_p;
1114 	int32_t		weight;
1115 	int		ret = DDI_SUCCESS;
1116 
1117 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
1118 	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
1119 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1120 
1121 	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
1122 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1123 		    "msiq allocation failed\n");
1124 		return (ret);
1125 	}
1126 
1127 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1128 
1129 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1130 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1131 
1132 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1133 
1134 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
1135 		uint32_t intr_index = hdlp->ih_inum;
1136 		if (px_ib_ino_locate_intr(ino_p, rdip,
1137 		    intr_index, rec_type, msg_code)) {
1138 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1139 			    "dup intr #%d\n", intr_index);
1140 
1141 			ret = DDI_FAILURE;
1142 			goto fail1;
1143 		}
1144 
1145 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
1146 		    != DDI_SUCCESS)
1147 			goto fail1;
1148 	} else {
1149 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
1150 
1151 		ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1152 		    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1153 
1154 		if (hdlp->ih_pri == 0)
1155 			hdlp->ih_pri = px_class_to_pil(rdip);
1156 
1157 		/* Save mondo value in hdlp */
1158 		hdlp->ih_vector = ino_p->ino_sysino;
1159 
1160 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1161 		    hdlp->ih_pri, hdlp->ih_vector);
1162 
1163 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1164 		    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ino_p, NULL);
1165 
1166 		ret = i_ddi_add_ivintr(hdlp);
1167 
1168 		/*
1169 		 * Restore original interrupt handler
1170 		 * and arguments in interrupt handle.
1171 		 */
1172 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1173 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1174 
1175 		if (ret != DDI_SUCCESS)
1176 			goto fail2;
1177 
1178 		/* Save the pil for this ino */
1179 		ino_p->ino_pil = hdlp->ih_pri;
1180 
1181 		/* Enable MSIQ */
1182 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1183 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1184 
1185 		/* select cpu, saving it for sharing and removal */
1186 		ino_p->ino_cpuid = intr_dist_cpuid();
1187 
1188 		/* Enable interrupt */
1189 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino_p->ino_ino);
1190 	}
1191 
1192 	/* add weight to the cpu that we are already targeting */
1193 	weight = px_class_to_intr_weight(rdip);
1194 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1195 
1196 	ih_p->ih_ino_p = ino_p;
1197 	px_create_intr_kstats(ih_p);
1198 	if (ih_p->ih_ksp)
1199 		kstat_install(ih_p->ih_ksp);
1200 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1201 
1202 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1203 	    ino_p->ino_sysino, hdlp->ih_pri);
1204 
1205 	return (ret);
1206 fail2:
1207 	px_ib_delete_ino(ib_p, ino_p);
1208 fail1:
1209 	if (ih_p->ih_config_handle)
1210 		pci_config_teardown(&ih_p->ih_config_handle);
1211 
1212 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1213 	kmem_free(ih_p, sizeof (px_ih_t));
1214 
1215 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1216 	    ino_p->ino_sysino, hdlp->ih_pri);
1217 
1218 	return (ret);
1219 }
1220 
1221 /*
1222  * px_rem_msiq_intr:
1223  *
1224  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1225  */
1226 int
1227 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1228     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1229     msgcode_t msg_code, msiqid_t msiq_id)
1230 {
1231 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1232 	px_ib_t		*ib_p = px_p->px_ib_p;
1233 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1234 	cpuid_t		curr_cpu;
1235 	px_ib_ino_info_t *ino_p;
1236 	px_ih_t		*ih_p;
1237 	int		ret = DDI_SUCCESS;
1238 
1239 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1240 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1241 
1242 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1243 
1244 	ino_p = px_ib_locate_ino(ib_p, ino);
1245 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum,
1246 	    rec_type, msg_code);
1247 
1248 	/* Get the current cpu */
1249 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1250 	    &curr_cpu)) != DDI_SUCCESS)
1251 		goto fail;
1252 
1253 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1254 		goto fail;
1255 
1256 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1257 
1258 	if (ino_p->ino_ih_size == 0) {
1259 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1260 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1261 			goto fail;
1262 
1263 		px_lib_msiq_setvalid(dip, px_devino_to_msiqid(px_p, ino),
1264 		    PCI_MSIQ_INVALID);
1265 
1266 		hdlp->ih_vector = ino_p->ino_sysino;
1267 		i_ddi_rem_ivintr(hdlp);
1268 
1269 		px_ib_delete_ino(ib_p, ino_p);
1270 
1271 		(void) px_msiq_free(px_p, msiq_id);
1272 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1273 	} else {
1274 		/* Re-enable interrupt only if mapping regsiter still shared */
1275 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1276 	}
1277 
1278 fail:
1279 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1280 	return (ret);
1281 }
1282