xref: /titanic_52/usr/src/uts/sun4/io/px/px_intr.c (revision e4a2bec7d50838378a9a8d23992ab4a6fa8726ab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PX nexus interrupt handling:
31  *	PX device interrupt handler wrapper
32  *	PIL lookup routine
33  *	PX device interrupt related initchild code
34  */
35 
36 #include <sys/types.h>
37 #include <sys/kmem.h>
38 #include <sys/async.h>
39 #include <sys/spl.h>
40 #include <sys/sunddi.h>
41 #include <sys/fm/protocol.h>
42 #include <sys/fm/util.h>
43 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
44 #include <sys/ddi_impldefs.h>
45 #include <sys/sdt.h>
46 #include <sys/atomic.h>
47 #include "px_obj.h"
48 #include <sys/ontrap.h>
49 #include <sys/membar.h>
50 #include <sys/clock.h>
51 
52 /*
53  * interrupt jabber:
54  *
55  * When an interrupt line is jabbering, every time the state machine for the
56  * associated ino is idled, a new mondo will be sent and the ino will go into
57  * the pending state again. The mondo will cause a new call to
58  * px_intr_wrapper() which normally idles the ino's state machine which would
59  * precipitate another trip round the loop.
60  *
61  * The loop can be broken by preventing the ino's state machine from being
62  * idled when an interrupt line is jabbering. See the comment at the
63  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
64  * protection' code does this.
65  */
66 
67 /*LINTLIBRARY*/
68 
69 /*
70  * If the unclaimed interrupt count has reached the limit set by
71  * pci_unclaimed_intr_max within the time limit, then all interrupts
72  * on this ino is blocked by not idling the interrupt state machine.
73  */
74 static int
75 px_spurintr(px_ib_ino_info_t *ino_p)
76 {
77 	px_ih_t	*ih_p = ino_p->ino_ih_start;
78 	px_t	*px_p = ino_p->ino_ib_p->ib_px_p;
79 	char	*err_fmt_str;
80 	int	i;
81 
82 	if (ino_p->ino_unclaimed > px_unclaimed_intr_max)
83 		return (DDI_INTR_CLAIMED);
84 
85 	if (!ino_p->ino_unclaimed)
86 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
87 
88 	ino_p->ino_unclaimed++;
89 
90 	if (ino_p->ino_unclaimed <= px_unclaimed_intr_max)
91 		goto clear;
92 
93 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
94 	    > px_spurintr_duration) {
95 		ino_p->ino_unclaimed = 0;
96 		goto clear;
97 	}
98 	err_fmt_str = "%s%d: ino 0x%x blocked";
99 	goto warn;
100 clear:
101 	/* Clear the pending state */
102 	if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
103 	    INTR_IDLE_STATE) != DDI_SUCCESS)
104 		return (DDI_INTR_UNCLAIMED);
105 
106 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
107 warn:
108 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
109 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next)
110 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
111 		    ih_p->ih_inum);
112 	cmn_err(CE_CONT, "!\n");
113 	return (DDI_INTR_CLAIMED);
114 }
115 
116 extern uint64_t intr_get_time(void);
117 
118 /*
119  * px_intx_intr (INTx or legacy interrupt handler)
120  *
121  * This routine is used as wrapper around interrupt handlers installed by child
122  * device drivers.  This routine invokes the driver interrupt handlers and
123  * examines the return codes.
124  *
125  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
126  * least one handler claims the interrupt then the counter is halved and the
127  * interrupt state machine is idled. If no handler claims the interrupt then
128  * the counter is incremented by one and the state machine is idled.
129  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
130  * then the interrupt state machine is not idled thus preventing any further
131  * interrupts on that ino. The state machine will only be idled again if a
132  * handler is subsequently added or removed.
133  *
134  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
135  * DDI_INTR_UNCLAIMED otherwise.
136  */
137 uint_t
138 px_intx_intr(caddr_t arg)
139 {
140 	px_ib_ino_info_t *ino_p = (px_ib_ino_info_t *)arg;
141 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
142 	px_ih_t		*ih_p = ino_p->ino_ih_start;
143 	uint_t		result = 0, r;
144 	int		i;
145 
146 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
147 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
148 	    ino_p->ino_ino, ino_p->ino_sysino, ino_p->ino_pil,
149 	    ino_p->ino_ih_size, ino_p->ino_ih_head);
150 
151 	for (i = 0; i < ino_p->ino_ih_size; i++, ih_p = ih_p->ih_next) {
152 		dev_info_t *dip = ih_p->ih_dip;
153 		uint_t (*handler)() = ih_p->ih_handler;
154 		caddr_t arg1 = ih_p->ih_handler_arg1;
155 		caddr_t arg2 = ih_p->ih_handler_arg2;
156 
157 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
158 			DBG(DBG_INTX_INTR, px_p->px_dip,
159 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
160 			    ddi_driver_name(dip), ddi_get_instance(dip),
161 			    ino_p->ino_ino);
162 
163 			continue;
164 		}
165 
166 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
167 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
168 		    ino_p->ino_ino, handler, arg1, arg2);
169 
170 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
171 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
172 
173 		r = (*handler)(arg1, arg2);
174 
175 		/*
176 		 * Account for time used by this interrupt. Protect against
177 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
178 		 * using atomic ops.
179 		 */
180 
181 		if (ino_p->ino_pil <= LOCK_LEVEL)
182 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
183 
184 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
185 		    void *, handler, caddr_t, arg1, int, r);
186 
187 		result += r;
188 
189 		if (px_check_all_handlers)
190 			continue;
191 		if (result)
192 			break;
193 	}
194 
195 	if (!result && px_unclaimed_intr_block)
196 		return (px_spurintr(ino_p));
197 
198 	ino_p->ino_unclaimed = 0;
199 
200 	/* Clear the pending state */
201 	if (px_lib_intr_setstate(ino_p->ino_ib_p->ib_px_p->px_dip,
202 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
203 		return (DDI_INTR_UNCLAIMED);
204 
205 	return (DDI_INTR_CLAIMED);
206 }
207 
208 /*
209  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
210  *
211  * This routine is used as wrapper around interrupt handlers installed by child
212  * device drivers.  This routine invokes the driver interrupt handlers and
213  * examines the return codes.
214  *
215  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
216  * least one handler claims the interrupt then the counter is halved and the
217  * interrupt state machine is idled. If no handler claims the interrupt then
218  * the counter is incremented by one and the state machine is idled.
219  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
220  * then the interrupt state machine is not idled thus preventing any further
221  * interrupts on that ino. The state machine will only be idled again if a
222  * handler is subsequently added or removed.
223  *
224  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
225  * DDI_INTR_UNCLAIMED otherwise.
226  */
227 uint_t
228 px_msiq_intr(caddr_t arg)
229 {
230 	px_ib_ino_info_t	*ino_p = (px_ib_ino_info_t *)arg;
231 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
232 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
233 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
234 	dev_info_t	*dip = px_p->px_dip;
235 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
236 	msiqhead_t	curr_msiq_rec_cnt, new_msiq_rec_cnt;
237 	msgcode_t	msg_code;
238 	px_ih_t		*ih_p;
239 	int		ret;
240 
241 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
242 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
243 	    ino_p->ino_pil, ino_p->ino_ih_size, ino_p->ino_ih_head);
244 
245 	/* Read current MSIQ head index */
246 	px_lib_msiq_gethead(dip, msiq_p->msiq_id, &curr_msiq_rec_cnt);
247 	msiq_p->msiq_curr = (uint64_t)((caddr_t)msiq_p->msiq_base +
248 	    curr_msiq_rec_cnt * sizeof (msiq_rec_t));
249 	new_msiq_rec_cnt = curr_msiq_rec_cnt;
250 
251 	/* Read next MSIQ record */
252 	px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
253 
254 	/*
255 	 * Process current MSIQ record as long as record type
256 	 * field is non-zero.
257 	 */
258 	while (msiq_rec_p->msiq_rec_type) {
259 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
260 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
261 		    msiq_rec_p->msiq_rec_type, msiq_rec_p->msiq_rec_rid);
262 
263 		/* Get the pointer next EQ record */
264 		msiq_p->msiq_curr = (uint64_t)
265 		    ((caddr_t)msiq_p->msiq_curr + sizeof (msiq_rec_t));
266 
267 		/* Check for overflow condition */
268 		if (msiq_p->msiq_curr >= (uint64_t)((caddr_t)msiq_p->msiq_base +
269 		    msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t)))
270 			msiq_p->msiq_curr = msiq_p->msiq_base;
271 
272 		/* Check MSIQ record type */
273 		switch (msiq_rec_p->msiq_rec_type) {
274 		case MSG_REC:
275 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
276 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
277 			    "record, msg type 0x%x\n", msg_code);
278 			break;
279 		case MSI32_REC:
280 		case MSI64_REC:
281 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
282 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
283 			    "msi 0x%x\n", msg_code);
284 
285 			/* Clear MSI state */
286 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
287 			    PCI_MSI_STATE_IDLE);
288 			break;
289 		default:
290 			msg_code = 0;
291 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
292 			    "record type is not supported",
293 			    ddi_driver_name(dip), ddi_get_instance(dip),
294 			    msiq_rec_p->msiq_rec_type);
295 			goto next_rec;
296 		}
297 
298 		ih_p = ino_p->ino_ih_start;
299 
300 		/*
301 		 * Scan through px_ih_t linked list, searching for the
302 		 * right px_ih_t, matching MSIQ record data.
303 		 */
304 		while ((ih_p) && (ih_p->ih_msg_code != msg_code) &&
305 		    (ih_p->ih_rec_type != msiq_rec_p->msiq_rec_type))
306 			ih_p = ih_p->ih_next;
307 
308 		if ((ih_p->ih_msg_code == msg_code) &&
309 		    (ih_p->ih_rec_type == msiq_rec_p->msiq_rec_type)) {
310 			dev_info_t *dip = ih_p->ih_dip;
311 			uint_t (*handler)() = ih_p->ih_handler;
312 			caddr_t arg1 = ih_p->ih_handler_arg1;
313 			caddr_t arg2 = ih_p->ih_handler_arg2;
314 
315 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
316 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
317 			    msg_code, handler, arg1, arg2);
318 
319 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
320 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
321 
322 			/*
323 			 * Special case for PCIE Error Messages.
324 			 * The current frame work doesn't fit PCIE Err Msgs
325 			 * This should be fixed when PCIE MESSAGES as a whole
326 			 * is architected correctly.
327 			 */
328 			if ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
329 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
330 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL)) {
331 				ret = px_err_fabric_intr(px_p, msg_code,
332 				    msiq_rec_p->msiq_rec_rid);
333 			} else
334 				ret = (*handler)(arg1, arg2);
335 
336 			/*
337 			 * Account for time used by this interrupt. Protect
338 			 * against conflicting writes to ih_ticks from
339 			 * ib_intr_dist_all() by using atomic ops.
340 			 */
341 
342 			if (ino_p->ino_pil <= LOCK_LEVEL)
343 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
344 
345 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
346 			    void *, handler, caddr_t, arg1, int, ret);
347 		} else {
348 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
349 			    "Not found matching MSIQ record\n");
350 
351 			/* px_spurintr(ino_p); */
352 			ino_p->ino_unclaimed++;
353 		}
354 
355 next_rec:
356 		new_msiq_rec_cnt++;
357 
358 		/* Zero out msiq_rec_type field */
359 		msiq_rec_p->msiq_rec_type = 0;
360 
361 		/* Read next MSIQ record */
362 		px_lib_get_msiq_rec(dip, msiq_p, msiq_rec_p);
363 	}
364 
365 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
366 	    (new_msiq_rec_cnt - curr_msiq_rec_cnt));
367 
368 	/*  Update MSIQ head index with no of MSIQ records processed */
369 	if (new_msiq_rec_cnt > curr_msiq_rec_cnt)  {
370 		if (new_msiq_rec_cnt >= msiq_state_p->msiq_rec_cnt)
371 			new_msiq_rec_cnt -= msiq_state_p->msiq_rec_cnt;
372 
373 		px_lib_msiq_sethead(dip, msiq_p->msiq_id, new_msiq_rec_cnt);
374 	}
375 
376 	/* Clear the pending state */
377 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
378 	    INTR_IDLE_STATE) != DDI_SUCCESS)
379 		return (DDI_INTR_UNCLAIMED);
380 
381 	return (DDI_INTR_CLAIMED);
382 }
383 
384 dev_info_t *
385 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
386 {
387 	dev_info_t	*cdip = rdip;
388 
389 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
390 		;
391 
392 	return (cdip);
393 }
394 
395 /* Default class to pil value mapping */
396 px_class_val_t px_default_pil [] = {
397 	{0x000000, 0xff0000, 0x1},	/* Class code for pre-2.0 devices */
398 	{0x010000, 0xff0000, 0x4},	/* Mass Storage Controller */
399 	{0x020000, 0xff0000, 0x6},	/* Network Controller */
400 	{0x030000, 0xff0000, 0x9},	/* Display Controller */
401 	{0x040000, 0xff0000, 0x9},	/* Multimedia Controller */
402 	{0x050000, 0xff0000, 0xb},	/* Memory Controller */
403 	{0x060000, 0xff0000, 0xb},	/* Bridge Controller */
404 	{0x0c0000, 0xffff00, 0x9},	/* Serial Bus, FireWire (IEEE 1394) */
405 	{0x0c0100, 0xffff00, 0x4},	/* Serial Bus, ACCESS.bus */
406 	{0x0c0200, 0xffff00, 0x4},	/* Serial Bus, SSA */
407 	{0x0c0300, 0xffff00, 0x9},	/* Serial Bus Universal Serial Bus */
408 	{0x0c0400, 0xffff00, 0x6},	/* Serial Bus, Fibre Channel */
409 	{0x0c0600, 0xffff00, 0x6}	/* Serial Bus, Infiniband */
410 };
411 
412 /*
413  * Default class to intr_weight value mapping (% of CPU).  A driver.conf
414  * entry on or above the pci node like
415  *
416  *	pci-class-intr-weights= 0x020000, 0xff0000, 30;
417  *
418  * can be used to augment or override entries in the default table below.
419  *
420  * NB: The values below give NICs preference on redistribution, and provide
421  * NICs some isolation from other interrupt sources. We need better interfaces
422  * that allow the NIC driver to identify a specific NIC instance as high
423  * bandwidth, and thus deserving of separation from other low bandwidth
424  * NICs additional isolation from other interrupt sources.
425  *
426  * NB: We treat Infiniband like a NIC.
427  */
428 px_class_val_t px_default_intr_weight [] = {
429 	{0x020000, 0xff0000, 35},	/* Network Controller */
430 	{0x010000, 0xff0000, 10},	/* Mass Storage Controller */
431 	{0x0c0400, 0xffff00, 10},	/* Serial Bus, Fibre Channel */
432 	{0x0c0600, 0xffff00, 50}	/* Serial Bus, Infiniband */
433 };
434 
435 static uint32_t
436 px_match_class_val(uint32_t key, px_class_val_t *rec_p, int nrec,
437     uint32_t default_val)
438 {
439 	int	i;
440 
441 	for (i = 0; i < nrec; rec_p++, i++) {
442 		if ((rec_p->class_code & rec_p->class_mask) ==
443 		    (key & rec_p->class_mask))
444 			return (rec_p->class_val);
445 	}
446 
447 	return (default_val);
448 }
449 
450 /*
451  * px_class_to_val
452  *
453  * Return the configuration value, based on class code and sub class code,
454  * from the specified property based or default px_class_val_t table.
455  */
456 uint32_t
457 px_class_to_val(dev_info_t *rdip, char *property_name, px_class_val_t *rec_p,
458     int nrec, uint32_t default_val)
459 {
460 	int property_len;
461 	uint32_t class_code;
462 	px_class_val_t *conf;
463 	uint32_t val = default_val;
464 
465 	/*
466 	 * Use the "class-code" property to get the base and sub class
467 	 * codes for the requesting device.
468 	 */
469 	class_code = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, rdip,
470 	    DDI_PROP_DONTPASS, "class-code", -1);
471 
472 	if (class_code == -1)
473 		return (val);
474 
475 	/* look up the val from the default table */
476 	val = px_match_class_val(class_code, rec_p, nrec, val);
477 
478 	/* see if there is a more specific property specified value */
479 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
480 	    property_name, (caddr_t)&conf, &property_len))
481 		return (val);
482 
483 	if ((property_len % sizeof (px_class_val_t)) == 0)
484 		val = px_match_class_val(class_code, conf,
485 		    property_len / sizeof (px_class_val_t), val);
486 	kmem_free(conf, property_len);
487 	return (val);
488 }
489 
490 /* px_class_to_pil: return the pil for a given device. */
491 uint32_t
492 px_class_to_pil(dev_info_t *rdip)
493 {
494 	uint32_t pil;
495 
496 	/* default pil is 0 (uninitialized) */
497 	pil = px_class_to_val(rdip,
498 	    "pci-class-priorities", px_default_pil,
499 	    sizeof (px_default_pil) / sizeof (px_class_val_t), 0);
500 
501 	/* range check the result */
502 	if (pil >= 0xf)
503 		pil = 0;
504 
505 	return (pil);
506 }
507 
508 /* px_class_to_intr_weight: return the intr_weight for a given device. */
509 static int32_t
510 px_class_to_intr_weight(dev_info_t *rdip)
511 {
512 	int32_t intr_weight;
513 
514 	/* default weight is 0% */
515 	intr_weight = px_class_to_val(rdip,
516 	    "pci-class-intr-weights", px_default_intr_weight,
517 	    sizeof (px_default_intr_weight) / sizeof (px_class_val_t), 0);
518 
519 	/* range check the result */
520 	if (intr_weight < 0)
521 		intr_weight = 0;
522 	if (intr_weight > 1000)
523 		intr_weight = 1000;
524 
525 	return (intr_weight);
526 }
527 
528 /* ARGSUSED */
529 int
530 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
531     ddi_intr_handle_impl_t *hdlp, void *result)
532 {
533 	px_t	*px_p = DIP_TO_STATE(dip);
534 	int	ret = DDI_SUCCESS;
535 
536 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
537 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
538 
539 	switch (intr_op) {
540 	case DDI_INTROP_GETCAP:
541 		ret = pci_intx_get_cap(rdip, (int *)result);
542 		break;
543 	case DDI_INTROP_SETCAP:
544 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
545 		ret = DDI_ENOTSUP;
546 		break;
547 	case DDI_INTROP_ALLOC:
548 		*(int *)result = hdlp->ih_scratch1;
549 		break;
550 	case DDI_INTROP_FREE:
551 		break;
552 	case DDI_INTROP_GETPRI:
553 		*(int *)result = hdlp->ih_pri ?
554 		    hdlp->ih_pri : px_class_to_pil(rdip);
555 		break;
556 	case DDI_INTROP_SETPRI:
557 		break;
558 	case DDI_INTROP_ADDISR:
559 		ret = px_add_intx_intr(dip, rdip, hdlp);
560 		break;
561 	case DDI_INTROP_REMISR:
562 		ret = px_rem_intx_intr(dip, rdip, hdlp);
563 		break;
564 	case DDI_INTROP_ENABLE:
565 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
566 		    hdlp->ih_vector, PX_INTR_STATE_ENABLE);
567 		break;
568 	case DDI_INTROP_DISABLE:
569 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
570 		    hdlp->ih_vector, PX_INTR_STATE_DISABLE);
571 		break;
572 	case DDI_INTROP_SETMASK:
573 		ret = pci_intx_set_mask(rdip);
574 		break;
575 	case DDI_INTROP_CLRMASK:
576 		ret = pci_intx_clr_mask(rdip);
577 		break;
578 	case DDI_INTROP_GETPENDING:
579 		ret = pci_intx_get_pending(rdip, (int *)result);
580 		break;
581 	case DDI_INTROP_NINTRS:
582 	case DDI_INTROP_NAVAIL:
583 		*(int *)result = i_ddi_get_nintrs(rdip);
584 		break;
585 	case DDI_INTROP_SUPPORTED_TYPES:
586 		*(int *)result = DDI_INTR_TYPE_FIXED;
587 		break;
588 	default:
589 		ret = DDI_ENOTSUP;
590 		break;
591 	}
592 
593 	return (ret);
594 }
595 
596 /* ARGSUSED */
597 int
598 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
599     ddi_intr_handle_impl_t *hdlp, void *result)
600 {
601 	px_t			*px_p = DIP_TO_STATE(dip);
602 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
603 	msinum_t		msi_num;
604 	msiqid_t		msiq_id;
605 	uint_t			nintrs;
606 	int			i, ret = DDI_SUCCESS;
607 
608 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
609 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
610 
611 	switch (intr_op) {
612 	case DDI_INTROP_GETCAP:
613 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
614 		break;
615 	case DDI_INTROP_SETCAP:
616 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
617 		ret = DDI_ENOTSUP;
618 		break;
619 	case DDI_INTROP_ALLOC:
620 		/*
621 		 * We need to restrict this allocation in future
622 		 * based on Resource Management policies.
623 		 */
624 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_inum,
625 		    hdlp->ih_scratch1, hdlp->ih_scratch2, &msi_num,
626 		    (int *)result)) != DDI_SUCCESS) {
627 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI allocation "
628 			    "failed, rdip 0x%p inum 0x%x count 0x%x\n",
629 			    rdip, hdlp->ih_inum, hdlp->ih_scratch1);
630 
631 			return (ret);
632 		}
633 
634 		break;
635 	case DDI_INTROP_FREE:
636 		(void) pci_msi_disable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
637 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
638 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
639 		    hdlp->ih_scratch1);
640 		break;
641 	case DDI_INTROP_GETPRI:
642 		*(int *)result = hdlp->ih_pri ?
643 		    hdlp->ih_pri : px_class_to_pil(rdip);
644 		break;
645 	case DDI_INTROP_SETPRI:
646 		break;
647 	case DDI_INTROP_ADDISR:
648 		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
649 		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
650 			return (ret);
651 
652 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
653 		    MSI32_REC, msi_num, &msiq_id)) != DDI_SUCCESS) {
654 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
655 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
656 			return (ret);
657 		}
658 
659 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
660 
661 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
662 		    msiq_id, MSI32_TYPE)) != DDI_SUCCESS) {
663 			(void) px_rem_msiq_intr(dip, rdip,
664 			    hdlp, MSI32_REC, msi_num, msiq_id);
665 			return (ret);
666 		}
667 
668 		if ((ret = px_lib_msi_setstate(dip, msi_num,
669 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
670 			(void) px_rem_msiq_intr(dip, rdip,
671 			    hdlp, MSI32_REC, msi_num, msiq_id);
672 			return (ret);
673 		}
674 
675 		hdlp->ih_vector = msi_num;
676 		break;
677 	case DDI_INTROP_DUPVEC:
678 		DBG(DBG_INTROPS, dip, "px_msix_ops: DupIsr is not supported\n");
679 		ret = DDI_ENOTSUP;
680 		break;
681 	case DDI_INTROP_REMISR:
682 		msi_num = hdlp->ih_vector;
683 
684 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
685 		    &msiq_id)) != DDI_SUCCESS)
686 			return (ret);
687 
688 		if ((ret = px_lib_msi_setstate(dip, msi_num,
689 		    PCI_MSI_STATE_DELIVERED)) != DDI_SUCCESS)
690 			return (ret);
691 
692 		ret = px_rem_msiq_intr(dip, rdip,
693 		    hdlp, MSI32_REC, msi_num, msiq_id);
694 
695 		hdlp->ih_vector = 0;
696 		break;
697 	case DDI_INTROP_ENABLE:
698 		msi_num = hdlp->ih_vector;
699 
700 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
701 		    PCI_MSI_VALID)) != DDI_SUCCESS)
702 			return (ret);
703 
704 		if (pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) {
705 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
706 
707 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
708 			    nintrs, hdlp->ih_inum, msi_state_p->msi_addr32,
709 			    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
710 				return (ret);
711 
712 			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type,
713 			    hdlp->ih_inum)) != DDI_SUCCESS)
714 				return (ret);
715 		}
716 
717 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
718 
719 		break;
720 	case DDI_INTROP_DISABLE:
721 		msi_num = hdlp->ih_vector;
722 
723 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
724 		    hdlp->ih_inum)) != DDI_SUCCESS)
725 			return (ret);
726 
727 		ret = px_lib_msi_setvalid(dip, msi_num, PCI_MSI_INVALID);
728 		break;
729 	case DDI_INTROP_BLOCKENABLE:
730 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
731 		msi_num = hdlp->ih_vector;
732 
733 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
734 		    nintrs, hdlp->ih_inum, msi_state_p->msi_addr32,
735 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
736 			return (ret);
737 
738 		for (i = 0; i < nintrs; i++, msi_num++) {
739 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
740 			    PCI_MSI_VALID)) != DDI_SUCCESS)
741 				return (ret);
742 		}
743 
744 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type, hdlp->ih_inum);
745 		break;
746 	case DDI_INTROP_BLOCKDISABLE:
747 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
748 		msi_num = hdlp->ih_vector;
749 
750 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
751 		    hdlp->ih_inum)) != DDI_SUCCESS)
752 			return (ret);
753 
754 		for (i = 0; i < nintrs; i++, msi_num++) {
755 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
756 			    PCI_MSI_INVALID)) != DDI_SUCCESS)
757 				return (ret);
758 		}
759 
760 		break;
761 	case DDI_INTROP_SETMASK:
762 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
763 		break;
764 	case DDI_INTROP_CLRMASK:
765 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
766 		break;
767 	case DDI_INTROP_GETPENDING:
768 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
769 		    hdlp->ih_inum, (int *)result);
770 		break;
771 	case DDI_INTROP_NINTRS:
772 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
773 		break;
774 	case DDI_INTROP_NAVAIL:
775 		/* XXX - a new interface may be needed */
776 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
777 		break;
778 	case DDI_INTROP_SUPPORTED_TYPES:
779 		ret = pci_msi_get_supported_type(rdip, (int *)result);
780 		break;
781 	default:
782 		ret = DDI_ENOTSUP;
783 		break;
784 	}
785 
786 	return (ret);
787 }
788 
789 static struct {
790 	kstat_named_t pxintr_ks_name;
791 	kstat_named_t pxintr_ks_type;
792 	kstat_named_t pxintr_ks_cpu;
793 	kstat_named_t pxintr_ks_pil;
794 	kstat_named_t pxintr_ks_time;
795 	kstat_named_t pxintr_ks_ino;
796 	kstat_named_t pxintr_ks_cookie;
797 	kstat_named_t pxintr_ks_devpath;
798 	kstat_named_t pxintr_ks_buspath;
799 } pxintr_ks_template = {
800 	{ "name",	KSTAT_DATA_CHAR },
801 	{ "type",	KSTAT_DATA_CHAR },
802 	{ "cpu",	KSTAT_DATA_UINT64 },
803 	{ "pil",	KSTAT_DATA_UINT64 },
804 	{ "time",	KSTAT_DATA_UINT64 },
805 	{ "ino",	KSTAT_DATA_UINT64 },
806 	{ "cookie",	KSTAT_DATA_UINT64 },
807 	{ "devpath",	KSTAT_DATA_STRING },
808 	{ "buspath",	KSTAT_DATA_STRING },
809 };
810 
811 static uint32_t pxintr_ks_instance;
812 kmutex_t pxintr_ks_template_lock;
813 
814 int
815 px_ks_update(kstat_t *ksp, int rw)
816 {
817 	px_ih_t *ih_p = ksp->ks_private;
818 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
819 	px_ib_t *ib_p = ih_p->ih_ino_p->ino_ib_p;
820 	px_t *px_p = ib_p->ib_px_p;
821 	devino_t ino;
822 	sysino_t sysino;
823 	char ih_devpath[MAXPATHLEN];
824 	char ih_buspath[MAXPATHLEN];
825 
826 	ino = ih_p->ih_ino_p->ino_ino;
827 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
828 
829 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
830 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
831 	    ddi_get_instance(ih_p->ih_dip));
832 
833 	(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
834 	    (ih_p->ih_rec_type == 0) ? "fixed" : "msi");
835 	pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ih_p->ih_ino_p->ino_cpuid;
836 	pxintr_ks_template.pxintr_ks_pil.value.ui64 = ih_p->ih_ino_p->ino_pil;
837 	pxintr_ks_template.pxintr_ks_time.value.ui64 =
838 	    ih_p->ih_nsec + (uint64_t)
839 	    tick2ns((hrtime_t)ih_p->ih_ticks, ih_p->ih_ino_p->ino_cpuid);
840 	pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
841 	pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
842 
843 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
844 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
845 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
846 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
847 
848 	return (0);
849 }
850 
851 void
852 px_create_intr_kstats(px_ih_t *ih_p)
853 {
854 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
855 
856 	ASSERT(ih_p->ih_ksp == NULL);
857 
858 	/*
859 	 * Create pci_intrs::: kstats for all ih types except messages,
860 	 * which represent unusual conditions and don't need to be tracked.
861 	 */
862 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
863 		ih_p->ih_ksp = kstat_create("pci_intrs",
864 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
865 		    "interrupts", KSTAT_TYPE_NAMED,
866 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
867 		    KSTAT_FLAG_VIRTUAL);
868 	}
869 	if (ih_p->ih_ksp != NULL) {
870 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
871 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
872 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
873 		ih_p->ih_ksp->ks_private = ih_p;
874 		ih_p->ih_ksp->ks_update = px_ks_update;
875 	}
876 }
877 
878 /*
879  * px_add_intx_intr:
880  *
881  * This function is called to register INTx and legacy hardware
882  * interrupt pins interrupts.
883  */
884 int
885 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
886     ddi_intr_handle_impl_t *hdlp)
887 {
888 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
889 	px_ib_t		*ib_p = px_p->px_ib_p;
890 	devino_t	ino;
891 	px_ih_t		*ih_p;
892 	px_ib_ino_info_t *ino_p;
893 	int32_t		weight;
894 	int		ret = DDI_SUCCESS;
895 
896 	ino = hdlp->ih_vector;
897 
898 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
899 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
900 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
901 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
902 
903 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
904 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
905 
906 	mutex_enter(&ib_p->ib_ino_lst_mutex);
907 
908 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
909 		uint32_t intr_index = hdlp->ih_inum;
910 		if (px_ib_ino_locate_intr(ino_p, rdip, intr_index, 0, 0)) {
911 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
912 			    "dup intr #%d\n", intr_index);
913 
914 			ret = DDI_FAILURE;
915 			goto fail1;
916 		}
917 
918 		/* Save mondo value in hdlp */
919 		hdlp->ih_vector = ino_p->ino_sysino;
920 
921 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
922 		    != DDI_SUCCESS)
923 			goto fail1;
924 	} else {
925 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
926 
927 		if (hdlp->ih_pri == 0)
928 			hdlp->ih_pri = px_class_to_pil(rdip);
929 
930 		/* Save mondo value in hdlp */
931 		hdlp->ih_vector = ino_p->ino_sysino;
932 
933 		DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
934 		    hdlp->ih_pri, hdlp->ih_vector);
935 
936 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
937 		    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ino_p, NULL);
938 
939 		ret = i_ddi_add_ivintr(hdlp);
940 
941 		/*
942 		 * Restore original interrupt handler
943 		 * and arguments in interrupt handle.
944 		 */
945 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
946 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
947 
948 		if (ret != DDI_SUCCESS)
949 			goto fail2;
950 
951 		/* Save the pil for this ino */
952 		ino_p->ino_pil = hdlp->ih_pri;
953 
954 		/* select cpu, saving it for sharing and removal */
955 		ino_p->ino_cpuid = intr_dist_cpuid();
956 
957 		/* Enable interrupt */
958 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
959 	}
960 
961 	/* add weight to the cpu that we are already targeting */
962 	weight = px_class_to_intr_weight(rdip);
963 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
964 
965 	ih_p->ih_ino_p = ino_p;
966 	px_create_intr_kstats(ih_p);
967 	if (ih_p->ih_ksp)
968 		kstat_install(ih_p->ih_ksp);
969 	mutex_exit(&ib_p->ib_ino_lst_mutex);
970 
971 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
972 	    ino_p->ino_sysino, hdlp->ih_pri);
973 
974 	return (ret);
975 fail2:
976 	px_ib_delete_ino(ib_p, ino_p);
977 fail1:
978 	if (ih_p->ih_config_handle)
979 		pci_config_teardown(&ih_p->ih_config_handle);
980 
981 	mutex_exit(&ib_p->ib_ino_lst_mutex);
982 	kmem_free(ih_p, sizeof (px_ih_t));
983 
984 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
985 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
986 
987 	return (ret);
988 }
989 
990 /*
991  * px_rem_intx_intr:
992  *
993  * This function is called to unregister INTx and legacy hardware
994  * interrupt pins interrupts.
995  */
996 int
997 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
998     ddi_intr_handle_impl_t *hdlp)
999 {
1000 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1001 	px_ib_t		*ib_p = px_p->px_ib_p;
1002 	devino_t	ino;
1003 	cpuid_t		curr_cpu;
1004 	px_ib_ino_info_t	*ino_p;
1005 	px_ih_t		*ih_p;
1006 	int		ret = DDI_SUCCESS;
1007 
1008 	ino = hdlp->ih_vector;
1009 
1010 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1011 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1012 
1013 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1014 
1015 	ino_p = px_ib_locate_ino(ib_p, ino);
1016 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum, 0, 0);
1017 
1018 	/* Get the current cpu */
1019 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1020 	    &curr_cpu)) != DDI_SUCCESS)
1021 		goto fail;
1022 
1023 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1024 		goto fail;
1025 
1026 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1027 
1028 	if (ino_p->ino_ih_size == 0) {
1029 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1030 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1031 			goto fail;
1032 
1033 		hdlp->ih_vector = ino_p->ino_sysino;
1034 		i_ddi_rem_ivintr(hdlp);
1035 
1036 		px_ib_delete_ino(ib_p, ino_p);
1037 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1038 	} else {
1039 		/* Re-enable interrupt only if mapping regsiter still shared */
1040 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1041 	}
1042 
1043 fail:
1044 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1045 	return (ret);
1046 }
1047 
1048 /*
1049  * px_add_msiq_intr:
1050  *
1051  * This function is called to register MSI/Xs and PCIe message interrupts.
1052  */
1053 int
1054 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1055     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1056     msgcode_t msg_code, msiqid_t *msiq_id_p)
1057 {
1058 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1059 	px_ib_t		*ib_p = px_p->px_ib_p;
1060 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1061 	devino_t	ino;
1062 	px_ih_t		*ih_p;
1063 	px_ib_ino_info_t	*ino_p;
1064 	int32_t		weight;
1065 	int		ret = DDI_SUCCESS;
1066 
1067 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
1068 	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
1069 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1070 
1071 	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
1072 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1073 		    "msiq allocation failed\n");
1074 		return (ret);
1075 	}
1076 
1077 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1078 
1079 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1080 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1081 
1082 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1083 
1084 	if (ino_p = px_ib_locate_ino(ib_p, ino)) {	/* sharing ino */
1085 		uint32_t intr_index = hdlp->ih_inum;
1086 		if (px_ib_ino_locate_intr(ino_p, rdip,
1087 		    intr_index, rec_type, msg_code)) {
1088 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1089 			    "dup intr #%d\n", intr_index);
1090 
1091 			ret = DDI_FAILURE;
1092 			goto fail1;
1093 		}
1094 
1095 		if ((ret = px_ib_ino_add_intr(px_p, ino_p, ih_p))
1096 		    != DDI_SUCCESS)
1097 			goto fail1;
1098 	} else {
1099 		ino_p = px_ib_new_ino(ib_p, ino, ih_p);
1100 
1101 		ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1102 		    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1103 
1104 		if (hdlp->ih_pri == 0)
1105 			hdlp->ih_pri = px_class_to_pil(rdip);
1106 
1107 		/* Save mondo value in hdlp */
1108 		hdlp->ih_vector = ino_p->ino_sysino;
1109 
1110 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1111 		    hdlp->ih_pri, hdlp->ih_vector);
1112 
1113 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1114 		    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ino_p, NULL);
1115 
1116 		ret = i_ddi_add_ivintr(hdlp);
1117 
1118 		/*
1119 		 * Restore original interrupt handler
1120 		 * and arguments in interrupt handle.
1121 		 */
1122 		DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1123 		    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1124 
1125 		if (ret != DDI_SUCCESS)
1126 			goto fail2;
1127 
1128 		/* Save the pil for this ino */
1129 		ino_p->ino_pil = hdlp->ih_pri;
1130 
1131 		/* Enable MSIQ */
1132 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1133 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1134 
1135 		/* select cpu, saving it for sharing and removal */
1136 		ino_p->ino_cpuid = intr_dist_cpuid();
1137 
1138 		/* Enable interrupt */
1139 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino_p->ino_ino);
1140 	}
1141 
1142 	/* add weight to the cpu that we are already targeting */
1143 	weight = px_class_to_intr_weight(rdip);
1144 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1145 
1146 	ih_p->ih_ino_p = ino_p;
1147 	px_create_intr_kstats(ih_p);
1148 	if (ih_p->ih_ksp)
1149 		kstat_install(ih_p->ih_ksp);
1150 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1151 
1152 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1153 	    ino_p->ino_sysino, hdlp->ih_pri);
1154 
1155 	return (ret);
1156 fail2:
1157 	px_ib_delete_ino(ib_p, ino_p);
1158 fail1:
1159 	if (ih_p->ih_config_handle)
1160 		pci_config_teardown(&ih_p->ih_config_handle);
1161 
1162 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1163 	kmem_free(ih_p, sizeof (px_ih_t));
1164 
1165 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1166 	    ino_p->ino_sysino, hdlp->ih_pri);
1167 
1168 	return (ret);
1169 }
1170 
1171 /*
1172  * px_rem_msiq_intr:
1173  *
1174  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1175  */
1176 int
1177 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1178     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1179     msgcode_t msg_code, msiqid_t msiq_id)
1180 {
1181 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1182 	px_ib_t		*ib_p = px_p->px_ib_p;
1183 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1184 	cpuid_t		curr_cpu;
1185 	px_ib_ino_info_t *ino_p;
1186 	px_ih_t		*ih_p;
1187 	int		ret = DDI_SUCCESS;
1188 
1189 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1190 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1191 
1192 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1193 
1194 	ino_p = px_ib_locate_ino(ib_p, ino);
1195 	ih_p = px_ib_ino_locate_intr(ino_p, rdip, hdlp->ih_inum,
1196 	    rec_type, msg_code);
1197 
1198 	/* Get the current cpu */
1199 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1200 	    &curr_cpu)) != DDI_SUCCESS)
1201 		goto fail;
1202 
1203 	if ((ret = px_ib_ino_rem_intr(px_p, ino_p, ih_p)) != DDI_SUCCESS)
1204 		goto fail;
1205 
1206 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1207 
1208 	if (ino_p->ino_ih_size == 0) {
1209 		if ((ret = px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
1210 		    INTR_DELIVERED_STATE)) != DDI_SUCCESS)
1211 			goto fail;
1212 
1213 		px_lib_msiq_setvalid(dip, px_devino_to_msiqid(px_p, ino),
1214 		    PCI_MSIQ_INVALID);
1215 
1216 		hdlp->ih_vector = ino_p->ino_sysino;
1217 		i_ddi_rem_ivintr(hdlp);
1218 
1219 		px_ib_delete_ino(ib_p, ino_p);
1220 
1221 		(void) px_msiq_free(px_p, msiq_id);
1222 		kmem_free(ino_p, sizeof (px_ib_ino_info_t));
1223 	} else {
1224 		/* Re-enable interrupt only if mapping regsiter still shared */
1225 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1226 	}
1227 
1228 fail:
1229 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1230 	return (ret);
1231 }
1232