xref: /titanic_51/usr/src/uts/sun4/io/px/px_intr.c (revision 4a8d0ea71c9a4e51c6a916a083ced6b499eb207f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * PX nexus interrupt handling:
28  *	PX device interrupt handler wrapper
29  *	PIL lookup routine
30  *	PX device interrupt related initchild code
31  */
32 
33 #include <sys/types.h>
34 #include <sys/kmem.h>
35 #include <sys/async.h>
36 #include <sys/spl.h>
37 #include <sys/sunddi.h>
38 #include <sys/fm/protocol.h>
39 #include <sys/fm/util.h>
40 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
41 #include <sys/ddi_impldefs.h>
42 #include <sys/sdt.h>
43 #include <sys/atomic.h>
44 #include "px_obj.h"
45 #include <sys/ontrap.h>
46 #include <sys/membar.h>
47 #include <sys/clock.h>
48 
49 /*
50  * interrupt jabber:
51  *
52  * When an interrupt line is jabbering, every time the state machine for the
53  * associated ino is idled, a new mondo will be sent and the ino will go into
54  * the pending state again. The mondo will cause a new call to
55  * px_intr_wrapper() which normally idles the ino's state machine which would
56  * precipitate another trip round the loop.
57  *
58  * The loop can be broken by preventing the ino's state machine from being
59  * idled when an interrupt line is jabbering. See the comment at the
60  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
61  * protection' code does this.
62  */
63 
64 /*LINTLIBRARY*/
65 
66 /*
67  * If the unclaimed interrupt count has reached the limit set by
68  * pci_unclaimed_intr_max within the time limit, then all interrupts
69  * on this ino is blocked by not idling the interrupt state machine.
70  */
71 static int
72 px_spurintr(px_ino_pil_t *ipil_p)
73 {
74 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
75 	px_ih_t		*ih_p = ipil_p->ipil_ih_start;
76 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
77 	char		*err_fmt_str;
78 	boolean_t	blocked = B_FALSE;
79 	int		i;
80 
81 	if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max)
82 		return (DDI_INTR_CLAIMED);
83 
84 	if (!ino_p->ino_unclaimed_intrs)
85 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
86 
87 	ino_p->ino_unclaimed_intrs++;
88 
89 	if (ino_p->ino_unclaimed_intrs <= px_unclaimed_intr_max)
90 		goto clear;
91 
92 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
93 	    > px_spurintr_duration) {
94 		ino_p->ino_unclaimed_intrs = 0;
95 		goto clear;
96 	}
97 	err_fmt_str = "%s%d: ino 0x%x blocked";
98 	blocked = B_TRUE;
99 	goto warn;
100 clear:
101 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
102 warn:
103 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
104 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next)
105 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
106 		    ih_p->ih_inum);
107 	cmn_err(CE_CONT, "!\n");
108 
109 	/* Clear the pending state */
110 	if (blocked == B_FALSE) {
111 		if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
112 		    INTR_IDLE_STATE) != DDI_SUCCESS)
113 			return (DDI_INTR_UNCLAIMED);
114 	}
115 
116 	return (DDI_INTR_CLAIMED);
117 }
118 
119 extern uint64_t intr_get_time(void);
120 
121 /*
122  * px_intx_intr (INTx or legacy interrupt handler)
123  *
124  * This routine is used as wrapper around interrupt handlers installed by child
125  * device drivers.  This routine invokes the driver interrupt handlers and
126  * examines the return codes.
127  *
128  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
129  * least one handler claims the interrupt then the counter is halved and the
130  * interrupt state machine is idled. If no handler claims the interrupt then
131  * the counter is incremented by one and the state machine is idled.
132  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
133  * then the interrupt state machine is not idled thus preventing any further
134  * interrupts on that ino. The state machine will only be idled again if a
135  * handler is subsequently added or removed.
136  *
137  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
138  * DDI_INTR_UNCLAIMED otherwise.
139  */
140 uint_t
141 px_intx_intr(caddr_t arg)
142 {
143 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
144 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
145 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
146 	px_ih_t		*ih_p = ipil_p->ipil_ih_start;
147 	ushort_t	pil = ipil_p->ipil_pil;
148 	uint_t		result = 0, r = DDI_INTR_UNCLAIMED;
149 	int		i;
150 
151 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
152 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
153 	    ino_p->ino_ino, ino_p->ino_sysino, ipil_p->ipil_pil,
154 	    ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
155 
156 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
157 		dev_info_t *dip = ih_p->ih_dip;
158 		uint_t (*handler)() = ih_p->ih_handler;
159 		caddr_t arg1 = ih_p->ih_handler_arg1;
160 		caddr_t arg2 = ih_p->ih_handler_arg2;
161 
162 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
163 			DBG(DBG_INTX_INTR, px_p->px_dip,
164 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
165 			    ddi_driver_name(dip), ddi_get_instance(dip),
166 			    ino_p->ino_ino);
167 
168 			continue;
169 		}
170 
171 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
172 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
173 		    ino_p->ino_ino, handler, arg1, arg2);
174 
175 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
176 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
177 
178 		r = (*handler)(arg1, arg2);
179 
180 		/*
181 		 * Account for time used by this interrupt. Protect against
182 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
183 		 * using atomic ops.
184 		 */
185 
186 		if (pil <= LOCK_LEVEL)
187 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
188 
189 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
190 		    void *, handler, caddr_t, arg1, int, r);
191 
192 		result += r;
193 
194 		if (px_check_all_handlers)
195 			continue;
196 		if (result)
197 			break;
198 	}
199 
200 	if (result)
201 		ino_p->ino_claimed |= (1 << pil);
202 
203 	/* Interrupt can only be cleared after all pil levels are handled */
204 	if (pil != ino_p->ino_lopil)
205 		return (DDI_INTR_CLAIMED);
206 
207 	if (!ino_p->ino_claimed) {
208 		if (px_unclaimed_intr_block)
209 			return (px_spurintr(ipil_p));
210 	}
211 
212 	ino_p->ino_unclaimed_intrs = 0;
213 	ino_p->ino_claimed = 0;
214 
215 	/* Clear the pending state */
216 	if (px_lib_intr_setstate(px_p->px_dip,
217 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
218 		return (DDI_INTR_UNCLAIMED);
219 
220 	return (DDI_INTR_CLAIMED);
221 }
222 
223 /*
224  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
225  *
226  * This routine is used as wrapper around interrupt handlers installed by child
227  * device drivers.  This routine invokes the driver interrupt handlers and
228  * examines the return codes.
229  *
230  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
231  * least one handler claims the interrupt then the counter is halved and the
232  * interrupt state machine is idled. If no handler claims the interrupt then
233  * the counter is incremented by one and the state machine is idled.
234  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
235  * then the interrupt state machine is not idled thus preventing any further
236  * interrupts on that ino. The state machine will only be idled again if a
237  * handler is subsequently added or removed.
238  *
239  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
240  * DDI_INTR_UNCLAIMED otherwise.
241  */
242 uint_t
243 px_msiq_intr(caddr_t arg)
244 {
245 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
246 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
247 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
248 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
249 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
250 	dev_info_t	*dip = px_p->px_dip;
251 	ushort_t	pil = ipil_p->ipil_pil;
252 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
253 	msiqhead_t	*curr_head_p;
254 	msiqtail_t	curr_tail_index;
255 	msgcode_t	msg_code;
256 	px_ih_t		*ih_p;
257 	uint_t		ret = DDI_INTR_UNCLAIMED;
258 	int		i, j;
259 
260 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
261 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
262 	    ipil_p->ipil_pil, ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
263 
264 	/*
265 	 * The px_msiq_intr() handles multiple interrupt priorities and it
266 	 * will set msiq->msiq_rec2process to the number of MSIQ records to
267 	 * process while handling the highest priority interrupt. Subsequent
268 	 * lower priority interrupts will just process any unprocessed MSIQ
269 	 * records or will just return immediately.
270 	 */
271 	if (msiq_p->msiq_recs2process == 0) {
272 		/* Read current MSIQ tail index */
273 		px_lib_msiq_gettail(dip, msiq_p->msiq_id, &curr_tail_index);
274 		msiq_p->msiq_new_head_index = msiq_p->msiq_curr_head_index;
275 
276 		if (curr_tail_index < msiq_p->msiq_curr_head_index)
277 			curr_tail_index += msiq_state_p->msiq_rec_cnt;
278 
279 		msiq_p->msiq_recs2process = curr_tail_index -
280 		    msiq_p->msiq_curr_head_index;
281 	}
282 
283 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
284 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
285 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
286 
287 	/* If all MSIQ records are already processed, just return immediately */
288 	if ((msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index)
289 	    == msiq_p->msiq_recs2process)
290 		goto intr_done;
291 
292 	curr_head_p = (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p +
293 	    (msiq_p->msiq_curr_head_index * sizeof (msiq_rec_t)));
294 
295 	/*
296 	 * Calculate the number of recs to process by taking the difference
297 	 * between the head and tail pointers. For all records we always
298 	 * verify that we have a valid record type before we do any processing.
299 	 * If triggered, we should always have at least one valid record.
300 	 */
301 	for (i = 0; i < msiq_p->msiq_recs2process; i++) {
302 		msiq_rec_type_t rec_type;
303 
304 		/* Read next MSIQ record */
305 		px_lib_get_msiq_rec(dip, curr_head_p, msiq_rec_p);
306 
307 		rec_type = msiq_rec_p->msiq_rec_type;
308 
309 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
310 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
311 		    rec_type, msiq_rec_p->msiq_rec_rid);
312 
313 		if (!rec_type)
314 			goto next_rec;
315 
316 		/* Check MSIQ record type */
317 		switch (rec_type) {
318 		case MSG_REC:
319 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
320 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
321 			    "record, msg type 0x%x\n", msg_code);
322 			break;
323 		case MSI32_REC:
324 		case MSI64_REC:
325 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
326 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
327 			    "msi 0x%x\n", msg_code);
328 
329 			/* Clear MSI state */
330 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
331 			    PCI_MSI_STATE_IDLE);
332 			break;
333 		default:
334 			msg_code = 0;
335 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
336 			    "record type is not supported",
337 			    ddi_driver_name(dip), ddi_get_instance(dip),
338 			    rec_type);
339 
340 			goto next_rec;
341 		}
342 
343 		/*
344 		 * Scan through px_ih_t linked list, searching for the
345 		 * right px_ih_t, matching MSIQ record data.
346 		 */
347 		for (j = 0, ih_p = ipil_p->ipil_ih_start;
348 		    ih_p && (j < ipil_p->ipil_ih_size) &&
349 		    ((ih_p->ih_msg_code != msg_code) ||
350 		    (ih_p->ih_rec_type != rec_type));
351 		    ih_p = ih_p->ih_next, j++)
352 			;
353 
354 		if ((ih_p->ih_msg_code == msg_code) &&
355 		    (ih_p->ih_rec_type == rec_type)) {
356 			dev_info_t *dip = ih_p->ih_dip;
357 			uint_t (*handler)() = ih_p->ih_handler;
358 			caddr_t arg1 = ih_p->ih_handler_arg1;
359 			caddr_t arg2 = ih_p->ih_handler_arg2;
360 
361 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
362 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
363 			    msg_code, handler, arg1, arg2);
364 
365 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
366 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
367 
368 			/*
369 			 * Special case for PCIE Error Messages.
370 			 * The current frame work doesn't fit PCIE Err Msgs
371 			 * This should be fixed when PCIE MESSAGES as a whole
372 			 * is architected correctly.
373 			 */
374 			if ((rec_type == MSG_REC) &&
375 			    ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
376 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
377 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL))) {
378 				ret = px_err_fabric_intr(px_p, msg_code,
379 				    msiq_rec_p->msiq_rec_rid);
380 			} else
381 				ret = (*handler)(arg1, arg2);
382 
383 			/*
384 			 * Account for time used by this interrupt. Protect
385 			 * against conflicting writes to ih_ticks from
386 			 * ib_intr_dist_all() by using atomic ops.
387 			 */
388 
389 			if (pil <= LOCK_LEVEL)
390 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
391 
392 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
393 			    void *, handler, caddr_t, arg1, int, ret);
394 
395 			msiq_p->msiq_new_head_index++;
396 			px_lib_clr_msiq_rec(dip, curr_head_p);
397 		} else {
398 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
399 			    "No matching MSIQ record found\n");
400 		}
401 next_rec:
402 		/* Get the pointer next EQ record */
403 		curr_head_p = (msiqhead_t *)
404 		    ((caddr_t)curr_head_p + sizeof (msiq_rec_t));
405 
406 		/* Check for overflow condition */
407 		if (curr_head_p >= (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p
408 		    + (msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t))))
409 			curr_head_p = (msiqhead_t *)msiq_p->msiq_base_p;
410 	}
411 
412 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
413 	    (msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index));
414 
415 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
416 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
417 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
418 
419 	/* ino_claimed used just for debugging purpose */
420 	if (ret)
421 		ino_p->ino_claimed |= (1 << pil);
422 
423 intr_done:
424 	/* Interrupt can only be cleared after all pil levels are handled */
425 	if (pil != ino_p->ino_lopil)
426 		return (DDI_INTR_CLAIMED);
427 
428 	if (msiq_p->msiq_new_head_index <= msiq_p->msiq_curr_head_index)  {
429 		if (px_unclaimed_intr_block)
430 			return (px_spurintr(ipil_p));
431 	}
432 
433 	/*  Update MSIQ head index with no of MSIQ records processed */
434 	if (msiq_p->msiq_new_head_index >= msiq_state_p->msiq_rec_cnt)
435 		msiq_p->msiq_new_head_index -= msiq_state_p->msiq_rec_cnt;
436 
437 	msiq_p->msiq_curr_head_index = msiq_p->msiq_new_head_index;
438 	px_lib_msiq_sethead(dip, msiq_p->msiq_id, msiq_p->msiq_new_head_index);
439 
440 	msiq_p->msiq_new_head_index = 0;
441 	msiq_p->msiq_recs2process = 0;
442 	ino_p->ino_claimed = 0;
443 
444 	/* Clear the pending state */
445 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
446 	    INTR_IDLE_STATE) != DDI_SUCCESS)
447 		return (DDI_INTR_UNCLAIMED);
448 
449 	return (DDI_INTR_CLAIMED);
450 }
451 
452 dev_info_t *
453 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
454 {
455 	dev_info_t	*cdip = rdip;
456 
457 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
458 		;
459 
460 	return (cdip);
461 }
462 
463 /* ARGSUSED */
464 int
465 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
466     ddi_intr_handle_impl_t *hdlp, void *result)
467 {
468 	px_t	*px_p = DIP_TO_STATE(dip);
469 	int	ret = DDI_SUCCESS;
470 
471 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
472 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
473 
474 	switch (intr_op) {
475 	case DDI_INTROP_GETCAP:
476 		ret = pci_intx_get_cap(rdip, (int *)result);
477 		break;
478 	case DDI_INTROP_SETCAP:
479 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
480 		ret = DDI_ENOTSUP;
481 		break;
482 	case DDI_INTROP_ALLOC:
483 		*(int *)result = hdlp->ih_scratch1;
484 		break;
485 	case DDI_INTROP_FREE:
486 		break;
487 	case DDI_INTROP_GETPRI:
488 		*(int *)result = hdlp->ih_pri ?
489 		    hdlp->ih_pri : pci_class_to_pil(rdip);
490 		break;
491 	case DDI_INTROP_SETPRI:
492 		break;
493 	case DDI_INTROP_ADDISR:
494 		ret = px_add_intx_intr(dip, rdip, hdlp);
495 		break;
496 	case DDI_INTROP_REMISR:
497 		ret = px_rem_intx_intr(dip, rdip, hdlp);
498 		break;
499 	case DDI_INTROP_ENABLE:
500 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
501 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_ENABLE, 0, 0);
502 		break;
503 	case DDI_INTROP_DISABLE:
504 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
505 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_DISABLE, 0, 0);
506 		break;
507 	case DDI_INTROP_SETMASK:
508 		ret = pci_intx_set_mask(rdip);
509 		break;
510 	case DDI_INTROP_CLRMASK:
511 		ret = pci_intx_clr_mask(rdip);
512 		break;
513 	case DDI_INTROP_GETPENDING:
514 		ret = pci_intx_get_pending(rdip, (int *)result);
515 		break;
516 	case DDI_INTROP_NINTRS:
517 	case DDI_INTROP_NAVAIL:
518 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
519 		break;
520 	default:
521 		ret = DDI_ENOTSUP;
522 		break;
523 	}
524 
525 	return (ret);
526 }
527 
528 /* ARGSUSED */
529 int
530 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
531     ddi_intr_handle_impl_t *hdlp, void *result)
532 {
533 	px_t			*px_p = DIP_TO_STATE(dip);
534 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
535 	msiq_rec_type_t		msiq_rec_type;
536 	msi_type_t		msi_type;
537 	uint64_t		msi_addr;
538 	msinum_t		msi_num;
539 	msiqid_t		msiq_id;
540 	uint_t			nintrs;
541 	int			i, ret = DDI_SUCCESS;
542 
543 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
544 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
545 
546 	/* Check for MSI64 support */
547 	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
548 		msiq_rec_type = MSI64_REC;
549 		msi_type = MSI64_TYPE;
550 		msi_addr = msi_state_p->msi_addr64;
551 	} else {
552 		msiq_rec_type = MSI32_REC;
553 		msi_type = MSI32_TYPE;
554 		msi_addr = msi_state_p->msi_addr32;
555 	}
556 
557 	switch (intr_op) {
558 	case DDI_INTROP_GETCAP:
559 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
560 		break;
561 	case DDI_INTROP_SETCAP:
562 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
563 		ret = DDI_ENOTSUP;
564 		break;
565 	case DDI_INTROP_ALLOC:
566 		/*
567 		 * We need to restrict this allocation in future
568 		 * based on Resource Management policies.
569 		 */
570 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_type,
571 		    hdlp->ih_inum, hdlp->ih_scratch1,
572 		    (uintptr_t)hdlp->ih_scratch2,
573 		    (int *)result)) != DDI_SUCCESS) {
574 			DBG(DBG_INTROPS, dip, "px_msix_ops: allocation "
575 			    "failed, rdip 0x%p type 0x%d inum 0x%x "
576 			    "count 0x%x\n", rdip, hdlp->ih_type, hdlp->ih_inum,
577 			    hdlp->ih_scratch1);
578 
579 			return (ret);
580 		}
581 
582 		if ((hdlp->ih_type == DDI_INTR_TYPE_MSIX) &&
583 		    (i_ddi_get_msix(rdip) == NULL)) {
584 			ddi_intr_msix_t		*msix_p;
585 
586 			if (msix_p = pci_msix_init(rdip)) {
587 				i_ddi_set_msix(rdip, msix_p);
588 				break;
589 			}
590 
591 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI-X allocation "
592 			    "failed, rdip 0x%p inum 0x%x\n", rdip,
593 			    hdlp->ih_inum);
594 
595 			(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
596 			    hdlp->ih_scratch1);
597 
598 			return (DDI_FAILURE);
599 		}
600 
601 		break;
602 	case DDI_INTROP_FREE:
603 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
604 
605 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
606 			goto msi_free;
607 
608 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
609 			break;
610 
611 		if (((i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1) == 0) &&
612 		    (i_ddi_get_msix(rdip))) {
613 			pci_msix_fini(i_ddi_get_msix(rdip));
614 			i_ddi_set_msix(rdip, NULL);
615 		}
616 msi_free:
617 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
618 		    hdlp->ih_scratch1);
619 		break;
620 	case DDI_INTROP_GETPRI:
621 		*(int *)result = hdlp->ih_pri ?
622 		    hdlp->ih_pri : pci_class_to_pil(rdip);
623 		break;
624 	case DDI_INTROP_SETPRI:
625 		break;
626 	case DDI_INTROP_ADDISR:
627 		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
628 		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
629 			return (ret);
630 
631 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
632 		    msiq_rec_type, msi_num, &msiq_id)) != DDI_SUCCESS) {
633 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
634 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
635 			return (ret);
636 		}
637 
638 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
639 
640 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
641 		    msiq_id, msi_type)) != DDI_SUCCESS) {
642 			(void) px_rem_msiq_intr(dip, rdip,
643 			    hdlp, msiq_rec_type, msi_num, msiq_id);
644 			return (ret);
645 		}
646 
647 		if ((ret = px_lib_msi_setstate(dip, msi_num,
648 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
649 			(void) px_rem_msiq_intr(dip, rdip,
650 			    hdlp, msiq_rec_type, msi_num, msiq_id);
651 			return (ret);
652 		}
653 
654 		hdlp->ih_vector = msi_num;
655 		break;
656 	case DDI_INTROP_DUPVEC:
657 		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
658 		    "new_vector: %x\n", hdlp->ih_inum, hdlp->ih_scratch1);
659 
660 		ret = pci_msix_dup(hdlp->ih_dip, hdlp->ih_inum,
661 		    hdlp->ih_scratch1);
662 		break;
663 	case DDI_INTROP_REMISR:
664 		msi_num = hdlp->ih_vector;
665 
666 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
667 		    &msiq_id)) != DDI_SUCCESS)
668 			return (ret);
669 
670 		if ((ret = px_lib_msi_setstate(dip, msi_num,
671 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
672 			return (ret);
673 
674 		ret = px_rem_msiq_intr(dip, rdip,
675 		    hdlp, msiq_rec_type, msi_num, msiq_id);
676 
677 		hdlp->ih_vector = 0;
678 		break;
679 	case DDI_INTROP_ENABLE:
680 		msi_num = hdlp->ih_vector;
681 
682 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
683 		    PCI_MSI_VALID)) != DDI_SUCCESS)
684 			return (ret);
685 
686 		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) ||
687 		    (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) {
688 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
689 
690 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
691 			    nintrs, hdlp->ih_inum, msi_addr,
692 			    hdlp->ih_type == DDI_INTR_TYPE_MSIX ?
693 			    msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
694 				return (ret);
695 
696 			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type))
697 			    != DDI_SUCCESS)
698 				return (ret);
699 		}
700 
701 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
702 		    hdlp->ih_inum)) != DDI_SUCCESS)
703 			return (ret);
704 
705 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
706 			break;
707 
708 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
709 		    &msiq_id)) != DDI_SUCCESS)
710 			return (ret);
711 
712 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
713 		    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
714 		    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num);
715 
716 		break;
717 	case DDI_INTROP_DISABLE:
718 		msi_num = hdlp->ih_vector;
719 
720 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
721 		    hdlp->ih_cap & DDI_INTR_FLAG_BLOCK)) != DDI_SUCCESS)
722 			return (ret);
723 
724 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
725 		    hdlp->ih_inum)) != DDI_SUCCESS)
726 			return (ret);
727 
728 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
729 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
730 			return (ret);
731 
732 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
733 			break;
734 
735 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
736 		    &msiq_id)) != DDI_SUCCESS)
737 			return (ret);
738 
739 		ret = px_ib_update_intr_state(px_p, rdip,
740 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
741 		    hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type,
742 		    msi_num);
743 
744 		break;
745 	case DDI_INTROP_BLOCKENABLE:
746 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
747 		msi_num = hdlp->ih_vector;
748 
749 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
750 		    nintrs, hdlp->ih_inum, msi_addr,
751 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
752 			return (ret);
753 
754 		for (i = 0; i < nintrs; i++, msi_num++) {
755 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
756 			    PCI_MSI_VALID)) != DDI_SUCCESS)
757 				return (ret);
758 
759 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
760 			    &msiq_id)) != DDI_SUCCESS)
761 				return (ret);
762 
763 			if ((ret = px_ib_update_intr_state(px_p, rdip,
764 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
765 			    msiq_id), hdlp->ih_pri, PX_INTR_STATE_ENABLE,
766 			    msiq_rec_type, msi_num)) != DDI_SUCCESS)
767 				return (ret);
768 		}
769 
770 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type);
771 		break;
772 	case DDI_INTROP_BLOCKDISABLE:
773 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
774 		msi_num = hdlp->ih_vector;
775 
776 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
777 		    hdlp->ih_cap & DDI_INTR_FLAG_BLOCK)) != DDI_SUCCESS)
778 			return (ret);
779 
780 		for (i = 0; i < nintrs; i++, msi_num++) {
781 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
782 			    PCI_MSI_INVALID)) != DDI_SUCCESS)
783 				return (ret);
784 
785 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
786 			    &msiq_id)) != DDI_SUCCESS)
787 				return (ret);
788 
789 			if ((ret = px_ib_update_intr_state(px_p, rdip,
790 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
791 			    msiq_id), hdlp->ih_pri, PX_INTR_STATE_DISABLE,
792 			    msiq_rec_type, msi_num)) != DDI_SUCCESS)
793 				return (ret);
794 		}
795 
796 		break;
797 	case DDI_INTROP_SETMASK:
798 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
799 		break;
800 	case DDI_INTROP_CLRMASK:
801 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
802 		break;
803 	case DDI_INTROP_GETPENDING:
804 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
805 		    hdlp->ih_inum, (int *)result);
806 		break;
807 	case DDI_INTROP_NINTRS:
808 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
809 		break;
810 	case DDI_INTROP_NAVAIL:
811 		/* XXX - a new interface may be needed */
812 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
813 		break;
814 	case DDI_INTROP_GETPOOL:
815 		if (msi_state_p->msi_pool_p == NULL) {
816 			*(ddi_irm_pool_t **)result = NULL;
817 			return (DDI_ENOTSUP);
818 		}
819 		*(ddi_irm_pool_t **)result = msi_state_p->msi_pool_p;
820 		ret = DDI_SUCCESS;
821 		break;
822 	default:
823 		ret = DDI_ENOTSUP;
824 		break;
825 	}
826 
827 	return (ret);
828 }
829 
830 static struct {
831 	kstat_named_t pxintr_ks_name;
832 	kstat_named_t pxintr_ks_type;
833 	kstat_named_t pxintr_ks_cpu;
834 	kstat_named_t pxintr_ks_pil;
835 	kstat_named_t pxintr_ks_time;
836 	kstat_named_t pxintr_ks_ino;
837 	kstat_named_t pxintr_ks_cookie;
838 	kstat_named_t pxintr_ks_devpath;
839 	kstat_named_t pxintr_ks_buspath;
840 } pxintr_ks_template = {
841 	{ "name",	KSTAT_DATA_CHAR },
842 	{ "type",	KSTAT_DATA_CHAR },
843 	{ "cpu",	KSTAT_DATA_UINT64 },
844 	{ "pil",	KSTAT_DATA_UINT64 },
845 	{ "time",	KSTAT_DATA_UINT64 },
846 	{ "ino",	KSTAT_DATA_UINT64 },
847 	{ "cookie",	KSTAT_DATA_UINT64 },
848 	{ "devpath",	KSTAT_DATA_STRING },
849 	{ "buspath",	KSTAT_DATA_STRING },
850 };
851 
852 static uint32_t pxintr_ks_instance;
853 static char ih_devpath[MAXPATHLEN];
854 static char ih_buspath[MAXPATHLEN];
855 kmutex_t pxintr_ks_template_lock;
856 
857 int
858 px_ks_update(kstat_t *ksp, int rw)
859 {
860 	px_ih_t *ih_p = ksp->ks_private;
861 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
862 	px_ino_pil_t *ipil_p = ih_p->ih_ipil_p;
863 	px_ino_t *ino_p = ipil_p->ipil_ino_p;
864 	px_t *px_p = ino_p->ino_ib_p->ib_px_p;
865 	devino_t ino;
866 	sysino_t sysino;
867 
868 	ino = ino_p->ino_ino;
869 	if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino) !=
870 	    DDI_SUCCESS) {
871 		cmn_err(CE_WARN, "px_ks_update: px_lib_intr_devino_to_sysino "
872 		    "failed");
873 	}
874 
875 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
876 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
877 	    ddi_get_instance(ih_p->ih_dip));
878 
879 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
880 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
881 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
882 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
883 
884 	if (ih_p->ih_intr_state == PX_INTR_STATE_ENABLE) {
885 
886 		switch (i_ddi_intr_get_current_type(ih_p->ih_dip)) {
887 		case DDI_INTR_TYPE_MSI:
888 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
889 			    "msi");
890 			break;
891 		case DDI_INTR_TYPE_MSIX:
892 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
893 			    "msix");
894 			break;
895 		default:
896 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
897 			    "fixed");
898 			break;
899 		}
900 
901 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ino_p->ino_cpuid;
902 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = ipil_p->ipil_pil;
903 		pxintr_ks_template.pxintr_ks_time.value.ui64 = ih_p->ih_nsec +
904 		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
905 		    ino_p->ino_cpuid);
906 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
907 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
908 	} else {
909 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
910 		    "disabled");
911 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = 0;
912 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = 0;
913 		pxintr_ks_template.pxintr_ks_time.value.ui64 = 0;
914 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = 0;
915 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = 0;
916 	}
917 	return (0);
918 }
919 
920 void
921 px_create_intr_kstats(px_ih_t *ih_p)
922 {
923 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
924 
925 	ASSERT(ih_p->ih_ksp == NULL);
926 
927 	/*
928 	 * Create pci_intrs::: kstats for all ih types except messages,
929 	 * which represent unusual conditions and don't need to be tracked.
930 	 */
931 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
932 		ih_p->ih_ksp = kstat_create("pci_intrs",
933 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
934 		    "interrupts", KSTAT_TYPE_NAMED,
935 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
936 		    KSTAT_FLAG_VIRTUAL);
937 	}
938 	if (ih_p->ih_ksp != NULL) {
939 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
940 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
941 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
942 		ih_p->ih_ksp->ks_private = ih_p;
943 		ih_p->ih_ksp->ks_update = px_ks_update;
944 	}
945 }
946 
947 /*
948  * px_add_intx_intr:
949  *
950  * This function is called to register INTx and legacy hardware
951  * interrupt pins interrupts.
952  */
953 int
954 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
955     ddi_intr_handle_impl_t *hdlp)
956 {
957 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
958 	px_ib_t		*ib_p = px_p->px_ib_p;
959 	devino_t	ino;
960 	px_ih_t		*ih_p;
961 	px_ino_t	*ino_p;
962 	px_ino_pil_t	*ipil_p, *ipil_list;
963 	int32_t		weight;
964 	int		ret = DDI_SUCCESS;
965 
966 	ino = hdlp->ih_vector;
967 
968 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
969 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
970 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
971 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
972 
973 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
974 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
975 
976 	mutex_enter(&ib_p->ib_ino_lst_mutex);
977 
978 	ino_p = px_ib_locate_ino(ib_p, ino);
979 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
980 
981 	/* Sharing ino */
982 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
983 		if (px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0)) {
984 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
985 			    "dup intr #%d\n", hdlp->ih_inum);
986 
987 			ret = DDI_FAILURE;
988 			goto fail1;
989 		}
990 
991 		/* Save mondo value in hdlp */
992 		hdlp->ih_vector = ino_p->ino_sysino;
993 
994 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
995 		    ih_p)) != DDI_SUCCESS)
996 			goto fail1;
997 
998 		goto ino_done;
999 	}
1000 
1001 	if (hdlp->ih_pri == 0)
1002 		hdlp->ih_pri = pci_class_to_pil(rdip);
1003 
1004 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1005 	ino_p = ipil_p->ipil_ino_p;
1006 
1007 	/* Save mondo value in hdlp */
1008 	hdlp->ih_vector = ino_p->ino_sysino;
1009 
1010 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
1011 	    hdlp->ih_pri, hdlp->ih_vector);
1012 
1013 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1014 	    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ipil_p, NULL);
1015 
1016 	ret = i_ddi_add_ivintr(hdlp);
1017 
1018 	/*
1019 	 * Restore original interrupt handler
1020 	 * and arguments in interrupt handle.
1021 	 */
1022 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1023 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1024 
1025 	if (ret != DDI_SUCCESS)
1026 		goto fail2;
1027 
1028 	/* Save the pil for this ino */
1029 	ipil_p->ipil_pil = hdlp->ih_pri;
1030 
1031 	/* Select cpu, saving it for sharing and removal */
1032 	if (ipil_list == NULL) {
1033 		ino_p->ino_cpuid = intr_dist_cpuid();
1034 
1035 		/* Enable interrupt */
1036 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1037 	}
1038 
1039 ino_done:
1040 	/* Add weight to the cpu that we are already targeting */
1041 	weight = pci_class_to_intr_weight(rdip);
1042 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1043 
1044 	ih_p->ih_ipil_p = ipil_p;
1045 	px_create_intr_kstats(ih_p);
1046 	if (ih_p->ih_ksp)
1047 		kstat_install(ih_p->ih_ksp);
1048 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1049 
1050 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1051 	    ino_p->ino_sysino, hdlp->ih_pri);
1052 
1053 	return (ret);
1054 fail2:
1055 	px_ib_delete_ino_pil(ib_p, ipil_p);
1056 fail1:
1057 	if (ih_p->ih_config_handle)
1058 		pci_config_teardown(&ih_p->ih_config_handle);
1059 
1060 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1061 	kmem_free(ih_p, sizeof (px_ih_t));
1062 
1063 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1064 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1065 
1066 	return (ret);
1067 }
1068 
1069 /*
1070  * px_rem_intx_intr:
1071  *
1072  * This function is called to unregister INTx and legacy hardware
1073  * interrupt pins interrupts.
1074  */
1075 int
1076 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1077     ddi_intr_handle_impl_t *hdlp)
1078 {
1079 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1080 	px_ib_t		*ib_p = px_p->px_ib_p;
1081 	devino_t	ino;
1082 	cpuid_t		curr_cpu;
1083 	px_ino_t	*ino_p;
1084 	px_ino_pil_t	*ipil_p;
1085 	px_ih_t		*ih_p;
1086 	int		ret = DDI_SUCCESS;
1087 
1088 	ino = hdlp->ih_vector;
1089 
1090 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1091 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1092 
1093 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1094 
1095 	ino_p = px_ib_locate_ino(ib_p, ino);
1096 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1097 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0);
1098 
1099 	/* Get the current cpu */
1100 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1101 	    &curr_cpu)) != DDI_SUCCESS)
1102 		goto fail;
1103 
1104 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1105 		goto fail;
1106 
1107 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1108 
1109 	if (ipil_p->ipil_ih_size == 0) {
1110 		hdlp->ih_vector = ino_p->ino_sysino;
1111 		i_ddi_rem_ivintr(hdlp);
1112 
1113 		px_ib_delete_ino_pil(ib_p, ipil_p);
1114 	}
1115 
1116 	if (ino_p->ino_ipil_size == 0) {
1117 		kmem_free(ino_p, sizeof (px_ino_t));
1118 	} else {
1119 		/* Re-enable interrupt only if mapping register still shared */
1120 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1121 	}
1122 
1123 fail:
1124 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1125 	return (ret);
1126 }
1127 
1128 /*
1129  * px_add_msiq_intr:
1130  *
1131  * This function is called to register MSI/Xs and PCIe message interrupts.
1132  */
1133 int
1134 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1135     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1136     msgcode_t msg_code, msiqid_t *msiq_id_p)
1137 {
1138 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1139 	px_ib_t		*ib_p = px_p->px_ib_p;
1140 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1141 	devino_t	ino;
1142 	px_ih_t		*ih_p;
1143 	px_ino_t	*ino_p;
1144 	px_ino_pil_t	*ipil_p, *ipil_list;
1145 	int32_t		weight;
1146 	int		ret = DDI_SUCCESS;
1147 
1148 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
1149 	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
1150 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1151 
1152 	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
1153 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1154 		    "msiq allocation failed\n");
1155 		return (ret);
1156 	}
1157 
1158 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1159 
1160 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1161 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1162 
1163 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1164 
1165 	ino_p = px_ib_locate_ino(ib_p, ino);
1166 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
1167 
1168 	/* Sharing ino */
1169 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
1170 		if (px_ib_intr_locate_ih(ipil_p, rdip,
1171 		    hdlp->ih_inum, rec_type, msg_code)) {
1172 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1173 			    "dup intr #%d\n", hdlp->ih_inum);
1174 
1175 			ret = DDI_FAILURE;
1176 			goto fail1;
1177 		}
1178 
1179 		/* Save mondo value in hdlp */
1180 		hdlp->ih_vector = ino_p->ino_sysino;
1181 
1182 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
1183 		    ih_p)) != DDI_SUCCESS)
1184 			goto fail1;
1185 
1186 		goto ino_done;
1187 	}
1188 
1189 	if (hdlp->ih_pri == 0)
1190 		hdlp->ih_pri = pci_class_to_pil(rdip);
1191 
1192 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1193 	ino_p = ipil_p->ipil_ino_p;
1194 
1195 	ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1196 	    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1197 
1198 	/* Save mondo value in hdlp */
1199 	hdlp->ih_vector = ino_p->ino_sysino;
1200 
1201 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1202 	    hdlp->ih_pri, hdlp->ih_vector);
1203 
1204 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1205 	    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ipil_p, NULL);
1206 
1207 	ret = i_ddi_add_ivintr(hdlp);
1208 
1209 	/*
1210 	 * Restore original interrupt handler
1211 	 * and arguments in interrupt handle.
1212 	 */
1213 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1214 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1215 
1216 	if (ret != DDI_SUCCESS)
1217 		goto fail2;
1218 
1219 	/* Save the pil for this ino */
1220 	ipil_p->ipil_pil = hdlp->ih_pri;
1221 
1222 	/* Select cpu, saving it for sharing and removal */
1223 	if (ipil_list == NULL) {
1224 		ino_p->ino_cpuid = intr_dist_cpuid();
1225 
1226 		/* Enable MSIQ */
1227 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1228 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1229 
1230 		/* Enable interrupt */
1231 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1232 	}
1233 
1234 ino_done:
1235 	/* Add weight to the cpu that we are already targeting */
1236 	weight = pci_class_to_intr_weight(rdip);
1237 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1238 
1239 	ih_p->ih_ipil_p = ipil_p;
1240 	px_create_intr_kstats(ih_p);
1241 	if (ih_p->ih_ksp)
1242 		kstat_install(ih_p->ih_ksp);
1243 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1244 
1245 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1246 	    ino_p->ino_sysino, hdlp->ih_pri);
1247 
1248 	return (ret);
1249 fail2:
1250 	px_ib_delete_ino_pil(ib_p, ipil_p);
1251 fail1:
1252 	if (ih_p->ih_config_handle)
1253 		pci_config_teardown(&ih_p->ih_config_handle);
1254 
1255 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1256 	kmem_free(ih_p, sizeof (px_ih_t));
1257 
1258 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1259 	    ino_p->ino_sysino, hdlp->ih_pri);
1260 
1261 	return (ret);
1262 }
1263 
1264 /*
1265  * px_rem_msiq_intr:
1266  *
1267  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1268  */
1269 int
1270 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1271     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1272     msgcode_t msg_code, msiqid_t msiq_id)
1273 {
1274 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1275 	px_ib_t		*ib_p = px_p->px_ib_p;
1276 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1277 	cpuid_t		curr_cpu;
1278 	px_ino_t	*ino_p;
1279 	px_ino_pil_t	*ipil_p;
1280 	px_ih_t		*ih_p;
1281 	int		ret = DDI_SUCCESS;
1282 
1283 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1284 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1285 
1286 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1287 
1288 	ino_p = px_ib_locate_ino(ib_p, ino);
1289 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1290 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, rec_type,
1291 	    msg_code);
1292 
1293 	/* Get the current cpu */
1294 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1295 	    &curr_cpu)) != DDI_SUCCESS)
1296 		goto fail;
1297 
1298 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1299 		goto fail;
1300 
1301 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1302 
1303 	if (ipil_p->ipil_ih_size == 0) {
1304 		hdlp->ih_vector = ino_p->ino_sysino;
1305 		i_ddi_rem_ivintr(hdlp);
1306 
1307 		px_ib_delete_ino_pil(ib_p, ipil_p);
1308 
1309 		if (ino_p->ino_ipil_size == 0)
1310 			px_lib_msiq_setvalid(dip,
1311 			    px_devino_to_msiqid(px_p, ino), PCI_MSIQ_INVALID);
1312 
1313 		(void) px_msiq_free(px_p, msiq_id);
1314 	}
1315 
1316 	if (ino_p->ino_ipil_size == 0) {
1317 		kmem_free(ino_p, sizeof (px_ino_t));
1318 	} else {
1319 		/* Re-enable interrupt only if mapping register still shared */
1320 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1321 	}
1322 
1323 fail:
1324 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1325 	return (ret);
1326 }
1327