xref: /titanic_51/usr/src/uts/sun4/io/px/px_intr.c (revision 11a2bb386c90df26ed2d2d6086a56cb503465e33)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX nexus interrupt handling:
31  *	PX device interrupt handler wrapper
32  *	PIL lookup routine
33  *	PX device interrupt related initchild code
34  */
35 
36 #include <sys/types.h>
37 #include <sys/kmem.h>
38 #include <sys/async.h>
39 #include <sys/spl.h>
40 #include <sys/sunddi.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/fm/util.h>
43 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
44 #include <sys/ddi_impldefs.h>
45 #include <sys/sdt.h>
46 #include <sys/atomic.h>
47 #include "px_obj.h"
48 #include <sys/ontrap.h>
49 #include <sys/membar.h>
50 #include <sys/clock.h>
51 
52 /*
53  * interrupt jabber:
54  *
55  * When an interrupt line is jabbering, every time the state machine for the
56  * associated ino is idled, a new mondo will be sent and the ino will go into
57  * the pending state again. The mondo will cause a new call to
58  * px_intr_wrapper() which normally idles the ino's state machine which would
59  * precipitate another trip round the loop.
60  *
61  * The loop can be broken by preventing the ino's state machine from being
62  * idled when an interrupt line is jabbering. See the comment at the
63  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
64  * protection' code does this.
65  */
66 
67 /*LINTLIBRARY*/
68 
69 /*
70  * If the unclaimed interrupt count has reached the limit set by
71  * pci_unclaimed_intr_max within the time limit, then all interrupts
72  * on this ino is blocked by not idling the interrupt state machine.
73  */
74 static int
75 px_spurintr(px_ib_ino_info_t *ino_p)
76 {
77 	px_ih_t	*ih_p = ino_p->ino_ih_start;
78 	px_t	*px_p = ino_p->ino_ib_p->ib_px_p;
79 	char	*err_fmt_str;
80 	int	i;
81 
82 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max)
83 		return (DDI_INTR_CLAIMED);
84 
85 	if (!ino_p->ino_unclaimed)
86 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
87 
88 	ino_p->ino_unclaimed++;
89 
90 	if (ino_p->ino_unclaimed <= px_unclaimed_intr_max)
91 		goto clear;
92 
93 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
94 	    > px_spurintr_duration) {
95 		ino_p->ino_unclaimed = 0;
96 		goto clear;
97 	}
98 	err_fmt_str = "%s%d: ino 0x%x blocked";
99 	goto warn;
100 clear:
101 	/* Clear the pending state */
102 	if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
103 	    INTR_IDLE_STATE) != DDI_SUCCESS)
104 		return (DDI_INTR_UNCLAIMED);
105 
106 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
107 warn:
108 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
109 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next)
110 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
111 		    ih_p->ih_inum);
112 	cmn_err(CE_CONT, "!\n");
113 	return (DDI_INTR_CLAIMED);
114 }
115 
116 extern uint64_t intr_get_time(void);
117 
118 /*
119  * px_intx_intr (legacy or intx interrupt handler)
120  *
121  * This routine is used as wrapper around interrupt handlers installed by child
122  * device drivers.  This routine invokes the driver interrupt handlers and
123  * examines the return codes.
124  *
125  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
126  * least one handler claims the interrupt then the counter is halved and the
127  * interrupt state machine is idled. If no handler claims the interrupt then
128  * the counter is incremented by one and the state machine is idled.
129  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
130  * then the interrupt state machine is not idled thus preventing any further
131  * interrupts on that ino. The state machine will only be idled again if a
132  * handler is subsequently added or removed.
133  *
134  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
135  * DDI_INTR_UNCLAIMED otherwise.
136  */
137 uint_t
138 px_intx_intr(caddr_t arg)
139 {
140 	px_ib_ino_info_t *ino_p = (px_ib_ino_info_t *)arg;
141 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
142 	px_ih_t		*ih_p = ino_p->ino_ih_start;
143 	uint_t		result = 0, r;
144 	int		i;
145 
146 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
147 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
148 	    ino_p->ino_ino, ino_p->ino_sysino, ino_p->ino_pil,
149 	    ino_p->ino_ih_size, ino_p->ino_ih_head);
150 
151 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next) {
152 		dev_info_t *dip = ih_p->ih_dip;
153 		uint_t (*handler)() = ih_p->ih_handler;
154 		caddr_t arg1 = ih_p->ih_handler_arg1;
155 		caddr_t arg2 = ih_p->ih_handler_arg2;
156 
157 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
158 			DBG(DBG_INTX_INTR, px_p->px_dip,
159 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
160 			    ddi_driver_name(dip), ddi_get_instance(dip),
161 			    ino_p->ino_ino);
162 
163 			continue;
164 		}
165 
166 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
167 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
168 		    ino_p->ino_ino, handler, arg1, arg2);
169 
170 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
171 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
172 
173 		r = (*handler)(arg1, arg2);
174 
175 		/*
176 		 * Account for time used by this interrupt. Protect against
177 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
178 		 * using atomic ops.
179 		 */
180 
181 		if (ino_p->ino_pil <= LOCK_LEVEL)
182 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
183 
184 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
185 		    void *, handler, caddr_t, arg1, int, r);
186 
187 		result += r;
188 
189 		if (px_check_all_handlers)
190 			continue;
191 		if (result)
192 			break;
193 	}
194 
195 	if (!result && px_unclaimed_intr_block)
196 		return (px_spurintr(ino_p));
197 
198 	ino_p->ino_unclaimed = 0;
199 
200 	/* Clear the pending state */
201 	if (px_lib_intr_setstate(ino_p->ino_ib_p->ib_px_p->px_dip,
202 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
203 		return (DDI_INTR_UNCLAIMED);
204 
205 	return (DDI_INTR_CLAIMED);
206 }
207 
208 /*
209  * px_msiq_intr (MSI/MSIX/MSG interrupt handler)
210  *
211  * This routine is used as wrapper around interrupt handlers installed by child
212  * device drivers.  This routine invokes the driver interrupt handlers and
213  * examines the return codes.
214  *
215  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
216  * least one handler claims the interrupt then the counter is halved and the
217  * interrupt state machine is idled. If no handler claims the interrupt then
218  * the counter is incremented by one and the state machine is idled.
219  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
220  * then the interrupt state machine is not idled thus preventing any further
221  * interrupts on that ino. The state machine will only be idled again if a
222  * handler is subsequently added or removed.
223  *
224  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
225  * DDI_INTR_UNCLAIMED otherwise.
226  */
227 uint_t
228 px_msiq_intr(caddr_t arg)
229 {
230 	px_ib_ino_info_t	*ino_p = (px_ib_ino_info_t *)arg;
231 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
232 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
233 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
234 	dev_info_t	*dip = px_p->px_dip;
235 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
236 	msiqhead_t	curr_msiq_rec_cnt, new_msiq_rec_cnt;
237 	msgcode_t	msg_code;
238 	px_ih_t		*ih_p;
239 	int		ret;
240 
241 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
242 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
243 	    ino_p->ino_pil, ino_p->ino_ih_size, ino_p->ino_ih_head);
244 
245 	/* Read current MSIQ head index */
246 	px_lib_msiq_gethead(dip, msiq_p->msiq_id, &curr_msiq_rec_cnt);
247 	msiq_p->msiq_curr = (uint64_t)((caddr_t)msiq_p->msiq_base +
248 	    curr_msiq_rec_cnt * sizeof (msiq_rec_t));
249 	new_msiq_rec_cnt = curr_msiq_rec_cnt;
250 
251 	/* Read next MSIQ record */
252 	px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
253 
254 	/*
255 	 * Process current MSIQ record as long as record type
256 	 * field is non-zero.
257 	 */
258 	while (msiq_rec_p->msiq_rec_type) {
259 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
260 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
261 		    msiq_rec_p->msiq_rec_type, msiq_rec_p->msiq_rec_rid);
262 
263 		/* Get the pointer next EQ record */
264 		msiq_p->msiq_curr = (uint64_t)
265 		    ((caddr_t)msiq_p->msiq_curr + sizeof (msiq_rec_t));
266 
267 		/* Check for overflow condition */
268 		if (msiq_p->msiq_curr >= (uint64_t)((caddr_t)msiq_p->msiq_base +
269 		    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t)))
270 			msiq_p->msiq_curr = msiq_p->msiq_base;
271 
272 		/* Check MSIQ record type */
273 		switch (msiq_rec_p->msiq_rec_type) {
274 		case MSG_REC:
275 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
276 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
277 			    "record, msg type 0x%x\n", msg_code);
278 			break;
279 		case MSI32_REC:
280 		case MSI64_REC:
281 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
282 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
283 			    "msi 0x%x\n", msg_code);
284 
285 			/* Clear MSI state */
286 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
287 			    PCI_MSI_STATE_IDLE);
288 			break;
289 		default:
290 			msg_code = 0;
291 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
292 			    "record type is not supported",
293 			    ddi_driver_name(dip), ddi_get_instance(dip),
294 			    msiq_rec_p->msiq_rec_type);
295 			goto next_rec;
296 		}
297 
298 		ih_p = ino_p->ino_ih_start;
299 
300 		/*
301 		 * Scan through px_ih_t linked list, searching for the
302 		 * right px_ih_t, matching MSIQ record data.
303 		 */
304 		while ((ih_p) && (ih_p->ih_msg_code != msg_code) &&
305 		    (ih_p->ih_rec_type != msiq_rec_p->msiq_rec_type))
306 			ih_p = ih_p->ih_next;
307 
308 		if ((ih_p->ih_msg_code == msg_code) &&
309 		    (ih_p->ih_rec_type == msiq_rec_p->msiq_rec_type)) {
310 			dev_info_t *dip = ih_p->ih_dip;
311 			uint_t (*handler)() = ih_p->ih_handler;
312 			caddr_t arg1 = ih_p->ih_handler_arg1;
313 			caddr_t arg2 = ih_p->ih_handler_arg2;
314 
315 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
316 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
317 			    msg_code, handler, arg1, arg2);
318 
319 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
320 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
321 
322 			/*
323 			 * Special case for PCIE Error Messages.
324 			 * The current frame work doesn't fit PCIE Err Msgs
325 			 * This should be fixed when PCIE MESSAGES as a whole
326 			 * is architected correctly.
327 			 */
328 			if ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
329 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
330 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL)) {
331 				ret = px_err_fabric_intr(px_p, msg_code,
332 				    msiq_rec_p->msiq_rec_rid);
333 			} else
334 				ret = (*handler)(arg1, arg2);
335 
336 			/*
337 			 * Account for time used by this interrupt. Protect
338 			 * against conflicting writes to ih_ticks from
339 			 * ib_intr_dist_all() by using atomic ops.
340 			 */
341 
342 			if (ino_p->ino_pil <= LOCK_LEVEL)
343 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
344 
345 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
346 			    void *, handler, caddr_t, arg1, int, ret);
347 		} else {
348 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
349 			    "Not found matching MSIQ record\n");
350 
351 			/* px_spurintr(ino_p); */
352 			ino_p->ino_unclaimed++;
353 		}
354 
355 next_rec:
356 		new_msiq_rec_cnt++;
357 
358 		/* Zero out msiq_rec_type field */
359 		msiq_rec_p->msiq_rec_type = 0;
360 
361 		/* Read next MSIQ record */
362 		px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
363 	}
364 
365 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
366 	    (new_msiq_rec_cnt - curr_msiq_rec_cnt));
367 
368 	/*  Update MSIQ head index with no of MSIQ records processed */
369 	if (new_msiq_rec_cnt > curr_msiq_rec_cnt)  {
370 		if (new_msiq_rec_cnt >= msiq_state_p->msiq_rec_cnt)
371 			new_msiq_rec_cnt -= msiq_state_p->msiq_rec_cnt;
372 
373 		px_lib_msiq_sethead(dip, msiq_p->msiq_id, new_msiq_rec_cnt);
374 	}
375 
376 	/* Clear the pending state */
377 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
378 	    INTR_IDLE_STATE) != DDI_SUCCESS)
379 		return (DDI_INTR_UNCLAIMED);
380 
381 	return (DDI_INTR_CLAIMED);
382 }
383 
384 dev_info_t *
385 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
386 {
387 	dev_info_t	*cdip = rdip;
388 
389 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
390 		;
391 
392 	return (cdip);
393 }
394 
395 /* Default class to pil value mapping */
396 px_class_val_t px_default_pil [] = {
397 	{0x000000, 0xff0000, 0x1},	/* Class code for pre-2.0 devices */
398 	{0x010000, 0xff0000, 0x4},	/* Mass Storage Controller */
399 	{0x020000, 0xff0000, 0x6},	/* Network Controller */
400 	{0x030000, 0xff0000, 0x9},	/* Display Controller */
401 	{0x040000, 0xff0000, 0x9},	/* Multimedia Controller */
402 	{0x050000, 0xff0000, 0xb},	/* Memory Controller */
403 	{0x060000, 0xff0000, 0xb},	/* Bridge Controller */
404 	{0x0c0000, 0xffff00, 0x9},	/* Serial Bus, FireWire (IEEE 1394) */
405 	{0x0c0100, 0xffff00, 0x4},	/* Serial Bus, ACCESS.bus */
406 	{0x0c0200, 0xffff00, 0x4},	/* Serial Bus, SSA */
407 	{0x0c0300, 0xffff00, 0x9},	/* Serial Bus Universal Serial Bus */
408 	{0x0c0400, 0xffff00, 0x6},	/* Serial Bus, Fibre Channel */
409 	{0x0c0600, 0xffff00, 0x6}	/* Serial Bus, Infiniband */
410 };
411 
412 /*
413  * Default class to intr_weight value mapping (% of CPU).  A driver.conf
414  * entry on or above the pci node like
415  *
416  *	pci-class-intr-weights= 0x020000, 0xff0000, 30;
417  *
418  * can be used to augment or override entries in the default table below.
419  *
420  * NB: The values below give NICs preference on redistribution, and provide
421  * NICs some isolation from other interrupt sources. We need better interfaces
422  * that allow the NIC driver to identify a specific NIC instance as high
423  * bandwidth, and thus deserving of separation from other low bandwidth
424  * NICs additional isolation from other interrupt sources.
425  *
426  * NB: We treat Infiniband like a NIC.
427  */
428 px_class_val_t px_default_intr_weight [] = {
429 	{0x020000, 0xff0000, 35},	/* Network Controller */
430 	{0x010000, 0xff0000, 10},	/* Mass Storage Controller */
431 	{0x0c0400, 0xffff00, 10},	/* Serial Bus, Fibre Channel */
432 	{0x0c0600, 0xffff00, 50}	/* Serial Bus, Infiniband */
433 };
434 
435 static uint32_t
436 px_match_class_val(uint32_t key, px_class_val_t *rec_p, int nrec,
437     uint32_t default_val)
438 {
439 	int	i;
440 
441 	for (i = 0; i < nrec; rec_p++, i++) {
442 		if ((rec_p->class_code & rec_p->class_mask) ==
443 		    (key & rec_p->class_mask))
444 			return (rec_p->class_val);
445 	}
446 
447 	return (default_val);
448 }
449 
450 /*
451  * px_class_to_val
452  *
453  * Return the configuration value, based on class code and sub class code,
454  * from the specified property based or default px_class_val_t table.
455  */
456 uint32_t
457 px_class_to_val(dev_info_t *rdip, char *property_name, px_class_val_t *rec_p,
458     int nrec, uint32_t default_val)
459 {
460 	int property_len;
461 	uint32_t class_code;
462 	px_class_val_t *conf;
463 	uint32_t val = default_val;
464 
465 	/*
466 	 * Use the "class-code" property to get the base and sub class
467 	 * codes for the requesting device.
468 	 */
469 	class_code = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, rdip,
470 	    DDI_PROP_DONTPASS, "class-code", -1);
471 
472 	if (class_code == -1)
473 		return (val);
474 
475 	/* look up the val from the default table */
476 	val = px_match_class_val(class_code, rec_p, nrec, val);
477 
478 	/* see if there is a more specific property specified value */
479 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
480 	    property_name, (caddr_t)&conf, &property_len))
481 		return (val);
482 
483 	if ((property_len % sizeof (px_class_val_t)) == 0)
484 		val = px_match_class_val(class_code, conf,
485 		    property_len / sizeof (px_class_val_t), val);
486 	kmem_free(conf, property_len);
487 	return (val);
488 }
489 
490 /* px_class_to_pil: return the pil for a given device. */
491 uint32_t
492 px_class_to_pil(dev_info_t *rdip)
493 {
494 	uint32_t pil;
495 
496 	/* default pil is 0 (uninitialized) */
497 	pil = px_class_to_val(rdip,
498 	    "pci-class-priorities", px_default_pil,
499 	    sizeof (px_default_pil) / sizeof (px_class_val_t), 0);
500 
501 	/* range check the result */
502 	if (pil >= 0xf)
503 		pil = 0;
504 
505 	return (pil);
506 }
507 
508 /* px_class_to_intr_weight: return the intr_weight for a given device. */
509 static int32_t
510 px_class_to_intr_weight(dev_info_t *rdip)
511 {
512 	int32_t intr_weight;
513 
514 	/* default weight is 0% */
515 	intr_weight = px_class_to_val(rdip,
516 	    "pci-class-intr-weights", px_default_intr_weight,
517 	    sizeof (px_default_intr_weight) / sizeof (px_class_val_t), 0);
518 
519 	/* range check the result */
520 	if (intr_weight < 0)
521 		intr_weight = 0;
522 	if (intr_weight > 1000)
523 		intr_weight = 1000;
524 
525 	return (intr_weight);
526 }
527 
528 /* ARGSUSED */
529 int
530 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
531     ddi_intr_handle_impl_t *hdlp, void *result)
532 {
533 	px_t		*px_p = DIP_TO_STATE(dip);
534 	ddi_ispec_t	*ip = (ddi_ispec_t *)hdlp->ih_private;
535 	int		ret = DDI_SUCCESS;
536 
537 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
538 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
539 
540 	switch (intr_op) {
541 	case DDI_INTROP_GETCAP:
542 		ret = pci_intx_get_cap(rdip, (int *)result);
543 		break;
544 	case DDI_INTROP_SETCAP:
545 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
546 		ret = DDI_ENOTSUP;
547 		break;
548 	case DDI_INTROP_ALLOC:
549 		*(int *)result = hdlp->ih_scratch1;
550 		break;
551 	case DDI_INTROP_FREE:
552 		break;
553 	case DDI_INTROP_GETPRI:
554 		*(int *)result = ip->is_pil ?
555 		    ip->is_pil : px_class_to_pil(rdip);
556 		break;
557 	case DDI_INTROP_SETPRI:
558 		ip->is_pil = (*(int *)result);
559 		break;
560 	case DDI_INTROP_ADDISR:
561 		hdlp->ih_vector = *ip->is_intr;
562 
563 		ret = px_add_intx_intr(dip, rdip, hdlp);
564 		break;
565 	case DDI_INTROP_REMISR:
566 		hdlp->ih_vector = *ip->is_intr;
567 
568 		ret = px_rem_intx_intr(dip, rdip, hdlp);
569 		break;
570 	case DDI_INTROP_ENABLE:
571 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
572 		    *ip->is_intr, PX_INTR_STATE_ENABLE);
573 		break;
574 	case DDI_INTROP_DISABLE:
575 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
576 		    *ip->is_intr, PX_INTR_STATE_DISABLE);
577 		break;
578 	case DDI_INTROP_SETMASK:
579 		ret = pci_intx_set_mask(rdip);
580 		break;
581 	case DDI_INTROP_CLRMASK:
582 		ret = pci_intx_clr_mask(rdip);
583 		break;
584 	case DDI_INTROP_GETPENDING:
585 		ret = pci_intx_get_pending(rdip, (int *)result);
586 		break;
587 	case DDI_INTROP_NINTRS:
588 	case DDI_INTROP_NAVAIL:
589 		*(int *)result = i_ddi_get_nintrs(rdip);
590 		break;
591 	case DDI_INTROP_SUPPORTED_TYPES:
592 		*(int *)result = DDI_INTR_TYPE_FIXED;
593 		break;
594 	default:
595 		ret = DDI_ENOTSUP;
596 		break;
597 	}
598 
599 	return (ret);
600 }
601 
602 /* ARGSUSED */
603 int
604 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
605     ddi_intr_handle_impl_t *hdlp, void *result)
606 {
607 	px_t			*px_p = DIP_TO_STATE(dip);
608 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
609 	msinum_t		msi_num;
610 	msiqid_t		msiq_id;
611 	uint_t			nintrs;
612 	int			i, ret = DDI_SUCCESS;
613 
614 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
615 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
616 
617 	switch (intr_op) {
618 	case DDI_INTROP_GETCAP:
619 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
620 		break;
621 	case DDI_INTROP_SETCAP:
622 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
623 		ret = DDI_ENOTSUP;
624 		break;
625 	case DDI_INTROP_ALLOC:
626 		/*
627 		 * We need to restrict this allocation in future
628 		 * based on Resource Management policies.
629 		 */
630 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_inum,
631 		    hdlp->ih_scratch1, hdlp->ih_scratch2, &msi_num,
632 		    (int *)result)) != DDI_SUCCESS) {
633 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI allocation "
634 			    "failed, rdip 0x%p inum 0x%x count 0x%x\n",
635 			    rdip, hdlp->ih_inum, hdlp->ih_scratch1);
636 
637 			return (ret);
638 		}
639 
640 		break;
641 	case DDI_INTROP_FREE:
642 		(void) pci_msi_disable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
643 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
644 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
645 		    hdlp->ih_scratch1);
646 		break;
647 	case DDI_INTROP_GETPRI:
648 		*(int *)result = hdlp->ih_pri ?
649 		    hdlp->ih_pri : px_class_to_pil(rdip);
650 		break;
651 	case DDI_INTROP_SETPRI:
652 		break;
653 	case DDI_INTROP_ADDISR:
654 		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
655 		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
656 			return (ret);
657 
658 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
659 		    MSI32_REC, msi_num, &msiq_id)) != DDI_SUCCESS) {
660 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
661 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
662 			return (ret);
663 		}
664 
665 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
666 
667 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
668 		    msiq_id, MSI32_TYPE)) != DDI_SUCCESS) {
669 			(void) px_rem_msiq_intr(dip, rdip,
670 			    hdlp, MSI32_REC, msi_num, msiq_id);
671 			return (ret);
672 		}
673 
674 		if ((ret = px_lib_msi_setstate(dip, msi_num,
675 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
676 			(void) px_rem_msiq_intr(dip, rdip,
677 			    hdlp, MSI32_REC, msi_num, msiq_id);
678 			return (ret);
679 		}
680 
681 		hdlp->ih_vector = msi_num;
682 		break;
683 	case DDI_INTROP_DUPVEC:
684 		DBG(DBG_INTROPS, dip, "px_msix_ops: DupIsr is not supported\n");
685 		ret = DDI_ENOTSUP;
686 		break;
687 	case DDI_INTROP_REMISR:
688 		msi_num = hdlp->ih_vector;
689 
690 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
691 		    &msiq_id)) != DDI_SUCCESS)
692 			return (ret);
693 
694 		if ((ret = px_lib_msi_setstate(dip, msi_num,
695 		    PCI_MSI_STATE_DELIVERED)) != DDI_SUCCESS)
696 			return (ret);
697 
698 		ret = px_rem_msiq_intr(dip, rdip,
699 		    hdlp, MSI32_REC, msi_num, msiq_id);
700 
701 		hdlp->ih_vector = 0;
702 		break;
703 	case DDI_INTROP_ENABLE:
704 		msi_num = hdlp->ih_vector;
705 
706 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
707 		    PCI_MSI_VALID)) != DDI_SUCCESS)
708 			return (ret);
709 
710 		if (pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) {
711 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
712 
713 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
714 			    nintrs, hdlp->ih_inum, msi_state_p->msi_addr32,
715 			    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
716 				return (ret);
717 
718 			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type,
719 			    hdlp->ih_inum)) != DDI_SUCCESS)
720 				return (ret);
721 		}
722 
723 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
724 
725 		break;
726 	case DDI_INTROP_DISABLE:
727 		msi_num = hdlp->ih_vector;
728 
729 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
730 		    hdlp->ih_inum)) != DDI_SUCCESS)
731 			return (ret);
732 
733 		ret = px_lib_msi_setvalid(dip, msi_num, PCI_MSI_INVALID);
734 		break;
735 	case DDI_INTROP_BLOCKENABLE:
736 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
737 		msi_num = hdlp->ih_vector;
738 
739 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
740 		    nintrs, hdlp->ih_inum, msi_state_p->msi_addr32,
741 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
742 			return (ret);
743 
744 		for (i = 0; i < nintrs; i++, msi_num++) {
745 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
746 			    PCI_MSI_VALID)) != DDI_SUCCESS)
747 				return (ret);
748 		}
749 
750 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
751 		break;
752 	case DDI_INTROP_BLOCKDISABLE:
753 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
754 		msi_num = hdlp->ih_vector;
755 
756 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
757 		    hdlp->ih_inum)) != DDI_SUCCESS)
758 			return (ret);
759 
760 		for (i = 0; i < nintrs; i++, msi_num++) {
761 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
762 			    PCI_MSI_INVALID)) != DDI_SUCCESS)
763 				return (ret);
764 		}
765 
766 		break;
767 	case DDI_INTROP_SETMASK:
768 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
769 		break;
770 	case DDI_INTROP_CLRMASK:
771 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
772 		break;
773 	case DDI_INTROP_GETPENDING:
774 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
775 		    hdlp->ih_inum, (int *)result);
776 		break;
777 	case DDI_INTROP_NINTRS:
778 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
779 		break;
780 	case DDI_INTROP_NAVAIL:
781 		/* XXX - a new interface may be needed */
782 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
783 		break;
784 	case DDI_INTROP_SUPPORTED_TYPES:
785 		ret = pci_msi_get_supported_type(rdip, (int *)result);
786 		break;
787 	default:
788 		ret = DDI_ENOTSUP;
789 		break;
790 	}
791 
792 	return (ret);
793 }
794 
795 static struct {
796 	kstat_named_t pxintr_ks_name;
797 	kstat_named_t pxintr_ks_type;
798 	kstat_named_t pxintr_ks_cpu;
799 	kstat_named_t pxintr_ks_pil;
800 	kstat_named_t pxintr_ks_time;
801 	kstat_named_t pxintr_ks_ino;
802 	kstat_named_t pxintr_ks_cookie;
803 	kstat_named_t pxintr_ks_devpath;
804 	kstat_named_t pxintr_ks_buspath;
805 } pxintr_ks_template = {
806 	{ "name",	KSTAT_DATA_CHAR },
807 	{ "type",	KSTAT_DATA_CHAR },
808 	{ "cpu",	KSTAT_DATA_UINT64 },
809 	{ "pil",	KSTAT_DATA_UINT64 },
810 	{ "time",	KSTAT_DATA_UINT64 },
811 	{ "ino",	KSTAT_DATA_UINT64 },
812 	{ "cookie",	KSTAT_DATA_UINT64 },
813 	{ "devpath",	KSTAT_DATA_STRING },
814 	{ "buspath",	KSTAT_DATA_STRING },
815 };
816 
817 static uint32_t pxintr_ks_instance;
818 kmutex_t pxintr_ks_template_lock;
819 
820 int
821 px_ks_update(kstat_t *ksp, int rw)
822 {
823 	px_ih_t *ih_p = ksp->ks_private;
824 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
825 	px_ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
826 	px_t *px_p = ib_p->ib_px_p;
827 	devino_t ino;
828 	sysino_t sysino;
829 	char ih_devpath[MAXPATHLEN];
830 	char ih_buspath[MAXPATHLEN];
831 
832 	ino = ih_p->ih_ino_p->ino_ino;
833 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
834 
835 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
836 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
837 	    ddi_get_instance(ih_p->ih_dip));
838 
839 	(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
840 	    (ih_p->ih_rec_type == 0) ? "fixed" : "msi");
841 	pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ih_p->ih_ino_p->ino_cpuid;
842 	pxintr_ks_template.pxintr_ks_pil.value.ui64 = ih_p->ih_ino_p->ino_pil;
843 	pxintr_ks_template.pxintr_ks_time.value.ui64 =
844 	    ih_p->ih_nsec + (uint64_t)
845 	    tick2ns((hrtime_t)ih_p->ih_ticks, ih_p->ih_ino_p->ino_cpuid);
846 	pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
847 	pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
848 
849 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
850 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
851 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
852 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
853 
854 	return (0);
855 }
856 
857 void
858 px_create_intr_kstats(px_ih_t *ih_p)
859 {
860 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
861 
862 	ASSERT(ih_p->ih_ksp == NULL);
863 
864 	/*
865 	 * Create pci_intrs::: kstats for all ih types except messages,
866 	 * which represent unusual conditions and don't need to be tracked.
867 	 */
868 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
869 		ih_p->ih_ksp = kstat_create("pci_intrs",
870 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
871 		    "interrupts", KSTAT_TYPE_NAMED,
872 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
873 		    KSTAT_FLAG_VIRTUAL);
874 	}
875 	if (ih_p->ih_ksp != NULL) {
876 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
877 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
878 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
879 		ih_p->ih_ksp->ks_private = ih_p;
880 		ih_p->ih_ksp->ks_update = px_ks_update;
881 	}
882 }
883 
884 int
885 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
886     ddi_intr_handle_impl_t *hdlp)
887 {
888 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
889 	px_ib_t		*ib_p = px_p->px_ib_p;
890 	devino_t	ino;
891 	px_ih_t		*ih_p;
892 	px_ib_ino_info_t *ino_p;
893 	int32_t		weight;
894 	int		ret = DDI_SUCCESS;
895 
896 	ino = hdlp->ih_vector;
897 
898 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
899 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
900 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
901 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
902 
903 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
904 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
905 
906 	mutex_enter(&ib_p->ib_ino_lst_mutex);
907 
908 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
909 		uint32_t intr_index = hdlp->ih_inum;
910 		if (px_ib_ino_locate_intr(ino_p, rdip, intr_index, 0, 0)) {
911 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
912 			    "dup intr #%d\n", intr_index);
913 
914 			ret = DDI_FAILURE;
915 			goto fail1;
916 		}
917 
918 		/* Save mondo value in hdlp */
919 		hdlp->ih_vector = ino_p->ino_sysino;
920 
921 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
922 		    != DDI_SUCCESS)
923 			goto fail1;
924 	} else {
925 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
926 
927 		if (hdlp->ih_pri == 0)
928 			hdlp->ih_pri = px_class_to_pil(rdip);
929 
930 		/* Save mondo value in hdlp */
931 		hdlp->ih_vector = ino_p->ino_sysino;
932 
933 		DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
934 		    hdlp->ih_pri, hdlp->ih_vector);
935 
936 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
937 		    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ino_p, NULL);
938 
939 		ret = i_ddi_add_ivintr(hdlp);
940 
941 		/*
942 		 * Restore original interrupt handler
943 		 * and arguments in interrupt handle.
944 		 */
945 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
946 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
947 
948 		if (ret != DDI_SUCCESS)
949 			goto fail2;
950 
951 		/* Save the pil for this ino */
952 		ino_p->ino_pil = hdlp->ih_pri;
953 
954 		/* select cpu, saving it for sharing and removal */
955 		ino_p->ino_cpuid = intr_dist_cpuid();
956 
957 		/* Enable interrupt */
958 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
959 	}
960 
961 	/* add weight to the cpu that we are already targeting */
962 	weight = px_class_to_intr_weight(rdip);
963 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
964 
965 	ih_p->ih_ino_p = ino_p;
966 	px_create_intr_kstats(ih_p);
967 	if (ih_p->ih_ksp)
968 		kstat_install(ih_p->ih_ksp);
969 	mutex_exit(&ib_p->ib_ino_lst_mutex);
970 
971 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
972 	    ino_p->ino_sysino, hdlp->ih_pri);
973 
974 	return (ret);
975 fail2:
976 	px_ib_delete_ino(ib_p, ino_p);
977 fail1:
978 	if (ih_p->ih_config_handle)
979 		pci_config_teardown(&ih_p->ih_config_handle);
980 
981 	mutex_exit(&ib_p->ib_ino_lst_mutex);
982 	kmem_free(ih_p, sizeof (px_ih_t));
983 
984 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
985 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
986 
987 	return (ret);
988 }
989 
990 int
991 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
992     ddi_intr_handle_impl_t *hdlp)
993 {
994 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
995 	px_ib_t		*ib_p = px_p->px_ib_p;
996 	devino_t	ino;
997 	cpuid_t		curr_cpu;
998 	px_ib_ino_info_t	*ino_p;
999 	px_ih_t		*ih_p;
1000 	int		ret = DDI_SUCCESS;
1001 
1002 	ino = hdlp->ih_vector;
1003 
1004 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1005 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1006 
1007 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1008 
1009 	ino_p = px_ib_locate_ino(ib_p, ino);
1010 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum, 0, 0);
1011 
1012 	/* Get the current cpu */
1013 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1014 	    &curr_cpu)) != DDI_SUCCESS)
1015 		goto fail;
1016 
1017 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1018 		goto fail;
1019 
1020 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1021 
1022 	if (ino_p->ino_ih_size == 0) {
1023 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1024 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1025 			goto fail;
1026 
1027 		hdlp->ih_vector = ino_p->ino_sysino;
1028 		i_ddi_rem_ivintr(hdlp);
1029 
1030 		px_ib_delete_ino(ib_p, ino_p);
1031 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1032 	} else {
1033 		/* Re-enable interrupt only if mapping regsiter still shared */
1034 		if ((ret = px_lib_intr_settarget(px_p->px_dip,
1035 			    ino_p->ino_sysino, curr_cpu)) != DDI_SUCCESS)
1036 			goto fail;
1037 
1038 		ret = px_lib_intr_setvalid(px_p->px_dip, ino_p->ino_sysino,
1039 		    INTR_VALID);
1040 	}
1041 
1042 fail:
1043 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1044 	return (ret);
1045 }
1046 
1047 int
1048 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1049     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1050     msgcode_t msg_code, msiqid_t *msiq_id_p)
1051 {
1052 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1053 	px_ib_t		*ib_p = px_p->px_ib_p;
1054 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1055 	devino_t	ino;
1056 	px_ih_t		*ih_p;
1057 	px_ib_ino_info_t	*ino_p;
1058 	int32_t		weight;
1059 	int		ret = DDI_SUCCESS;
1060 
1061 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
1062 	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
1063 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1064 
1065 	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
1066 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1067 		    "msiq allocation failed\n");
1068 		return (ret);
1069 	}
1070 
1071 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1072 
1073 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1074 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1075 
1076 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1077 
1078 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
1079 		uint32_t intr_index = hdlp->ih_inum;
1080 		if (px_ib_ino_locate_intr(ino_p, rdip,
1081 		    intr_index, rec_type, msg_code)) {
1082 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1083 			    "dup intr #%d\n", intr_index);
1084 
1085 			ret = DDI_FAILURE;
1086 			goto fail1;
1087 		}
1088 
1089 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
1090 		    != DDI_SUCCESS)
1091 			goto fail1;
1092 	} else {
1093 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
1094 
1095 		ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1096 		    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1097 
1098 		if (hdlp->ih_pri == 0)
1099 			hdlp->ih_pri = px_class_to_pil(rdip);
1100 
1101 		/* Save mondo value in hdlp */
1102 		hdlp->ih_vector = ino_p->ino_sysino;
1103 
1104 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1105 		    hdlp->ih_pri, hdlp->ih_vector);
1106 
1107 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1108 		    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ino_p, NULL);
1109 
1110 		ret = i_ddi_add_ivintr(hdlp);
1111 
1112 		/*
1113 		 * Restore original interrupt handler
1114 		 * and arguments in interrupt handle.
1115 		 */
1116 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1117 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1118 
1119 		if (ret != DDI_SUCCESS)
1120 			goto fail2;
1121 
1122 		/* Save the pil for this ino */
1123 		ino_p->ino_pil = hdlp->ih_pri;
1124 
1125 		/* Enable MSIQ */
1126 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1127 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1128 
1129 		/* select cpu, saving it for sharing and removal */
1130 		ino_p->ino_cpuid = intr_dist_cpuid();
1131 
1132 		/* Enable interrupt */
1133 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino_p->ino_ino);
1134 	}
1135 
1136 	/* add weight to the cpu that we are already targeting */
1137 	weight = px_class_to_intr_weight(rdip);
1138 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1139 
1140 	ih_p->ih_ino_p = ino_p;
1141 	px_create_intr_kstats(ih_p);
1142 	if (ih_p->ih_ksp)
1143 		kstat_install(ih_p->ih_ksp);
1144 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1145 
1146 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1147 	    ino_p->ino_sysino, hdlp->ih_pri);
1148 
1149 	return (ret);
1150 fail2:
1151 	px_ib_delete_ino(ib_p, ino_p);
1152 fail1:
1153 	if (ih_p->ih_config_handle)
1154 		pci_config_teardown(&ih_p->ih_config_handle);
1155 
1156 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1157 	kmem_free(ih_p, sizeof (px_ih_t));
1158 
1159 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1160 	    ino_p->ino_sysino, hdlp->ih_pri);
1161 
1162 	return (ret);
1163 }
1164 
1165 int
1166 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1167     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1168     msgcode_t msg_code, msiqid_t msiq_id)
1169 {
1170 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1171 	px_ib_t		*ib_p = px_p->px_ib_p;
1172 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1173 	cpuid_t		curr_cpu;
1174 	px_ib_ino_info_t *ino_p;
1175 	px_ih_t		*ih_p;
1176 	int		ret = DDI_SUCCESS;
1177 
1178 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1179 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1180 
1181 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1182 
1183 	ino_p = px_ib_locate_ino(ib_p, ino);
1184 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum,
1185 	    rec_type, msg_code);
1186 
1187 	/* Get the current cpu */
1188 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1189 	    &curr_cpu)) != DDI_SUCCESS)
1190 		goto fail;
1191 
1192 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1193 		goto fail;
1194 
1195 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1196 
1197 	if (ino_p->ino_ih_size == 0) {
1198 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1199 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1200 			goto fail;
1201 
1202 		px_lib_msiq_setvalid(dip, px_devino_to_msiqid(px_p, ino),
1203 		    PCI_MSIQ_INVALID);
1204 
1205 		hdlp->ih_vector = ino_p->ino_sysino;
1206 		i_ddi_rem_ivintr(hdlp);
1207 
1208 		px_ib_delete_ino(ib_p, ino_p);
1209 
1210 		(void) px_msiq_free(px_p, msiq_id);
1211 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1212 	} else {
1213 		/* Re-enable interrupt only if mapping regsiter still shared */
1214 		if ((ret = px_lib_intr_settarget(px_p->px_dip,
1215 		    ino_p->ino_sysino, curr_cpu)) != DDI_SUCCESS)
1216 			goto fail;
1217 
1218 		ret = px_lib_intr_setvalid(px_p->px_dip, ino_p->ino_sysino,
1219 		    INTR_VALID);
1220 	}
1221 
1222 fail:
1223 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1224 	return (ret);
1225 }
1226