xref: /titanic_51/usr/src/uts/sun4/io/px/px_intr.c (revision bde3d612a7c090234c60e6e4578821237a5db135)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * PX nexus interrupt handling:
27  *	PX device interrupt handler wrapper
28  *	PIL lookup routine
29  *	PX device interrupt related initchild code
30  */
31 
32 #include <sys/types.h>
33 #include <sys/kmem.h>
34 #include <sys/async.h>
35 #include <sys/spl.h>
36 #include <sys/sunddi.h>
37 #include <sys/fm/protocol.h>
38 #include <sys/fm/util.h>
39 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
40 #include <sys/ddi_impldefs.h>
41 #include <sys/sdt.h>
42 #include <sys/atomic.h>
43 #include "px_obj.h"
44 #include <sys/ontrap.h>
45 #include <sys/membar.h>
46 #include <sys/clock.h>
47 
48 /*
49  * interrupt jabber:
50  *
51  * When an interrupt line is jabbering, every time the state machine for the
52  * associated ino is idled, a new mondo will be sent and the ino will go into
53  * the pending state again. The mondo will cause a new call to
54  * px_intr_wrapper() which normally idles the ino's state machine which would
55  * precipitate another trip round the loop.
56  *
57  * The loop can be broken by preventing the ino's state machine from being
58  * idled when an interrupt line is jabbering. See the comment at the
59  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
60  * protection' code does this.
61  */
62 
63 /*LINTLIBRARY*/
64 
65 /*
66  * If the unclaimed interrupt count has reached the limit set by
67  * pci_unclaimed_intr_max within the time limit, then all interrupts
68  * on this ino is blocked by not idling the interrupt state machine.
69  */
70 static int
71 px_spurintr(px_ino_pil_t *ipil_p)
72 {
73 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
74 	px_ih_t		*ih_p;
75 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
76 	char		*err_fmt_str;
77 	boolean_t	blocked = B_FALSE;
78 	int		i;
79 
80 	if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max)
81 		return (DDI_INTR_CLAIMED);
82 
83 	if (!ino_p->ino_unclaimed_intrs)
84 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
85 
86 	ino_p->ino_unclaimed_intrs++;
87 
88 	if (ino_p->ino_unclaimed_intrs <= px_unclaimed_intr_max)
89 		goto clear;
90 
91 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
92 	    > px_spurintr_duration) {
93 		ino_p->ino_unclaimed_intrs = 0;
94 		goto clear;
95 	}
96 	err_fmt_str = "%s%d: ino 0x%x blocked";
97 	blocked = B_TRUE;
98 	goto warn;
99 clear:
100 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
101 warn:
102 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
103 	for (ipil_p = ino_p->ino_ipil_p; ipil_p;
104 	    ipil_p = ipil_p->ipil_next_p) {
105 		for (i = 0, ih_p = ipil_p->ipil_ih_start;
106 		    i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next)
107 			cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
108 			    ih_p->ih_inum);
109 	}
110 	cmn_err(CE_CONT, "!\n");
111 
112 	/* Clear the pending state */
113 	if (blocked == B_FALSE) {
114 		if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
115 		    INTR_IDLE_STATE) != DDI_SUCCESS)
116 			return (DDI_INTR_UNCLAIMED);
117 	}
118 
119 	return (DDI_INTR_CLAIMED);
120 }
121 
122 extern uint64_t intr_get_time(void);
123 
124 /*
125  * px_intx_intr (INTx or legacy interrupt handler)
126  *
127  * This routine is used as wrapper around interrupt handlers installed by child
128  * device drivers.  This routine invokes the driver interrupt handlers and
129  * examines the return codes.
130  *
131  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
132  * least one handler claims the interrupt then the counter is halved and the
133  * interrupt state machine is idled. If no handler claims the interrupt then
134  * the counter is incremented by one and the state machine is idled.
135  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
136  * then the interrupt state machine is not idled thus preventing any further
137  * interrupts on that ino. The state machine will only be idled again if a
138  * handler is subsequently added or removed.
139  *
140  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
141  * DDI_INTR_UNCLAIMED otherwise.
142  */
143 uint_t
144 px_intx_intr(caddr_t arg)
145 {
146 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
147 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
148 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
149 	px_ih_t		*ih_p = ipil_p->ipil_ih_start;
150 	ushort_t	pil = ipil_p->ipil_pil;
151 	uint_t		result = 0, r = DDI_INTR_UNCLAIMED;
152 	int		i;
153 
154 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
155 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
156 	    ino_p->ino_ino, ino_p->ino_sysino, ipil_p->ipil_pil,
157 	    ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
158 
159 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
160 		dev_info_t *dip = ih_p->ih_dip;
161 		uint_t (*handler)() = ih_p->ih_handler;
162 		caddr_t arg1 = ih_p->ih_handler_arg1;
163 		caddr_t arg2 = ih_p->ih_handler_arg2;
164 
165 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
166 			DBG(DBG_INTX_INTR, px_p->px_dip,
167 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
168 			    ddi_driver_name(dip), ddi_get_instance(dip),
169 			    ino_p->ino_ino);
170 
171 			continue;
172 		}
173 
174 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
175 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
176 		    ino_p->ino_ino, handler, arg1, arg2);
177 
178 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
179 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
180 
181 		r = (*handler)(arg1, arg2);
182 
183 		/*
184 		 * Account for time used by this interrupt. Protect against
185 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
186 		 * using atomic ops.
187 		 */
188 
189 		if (pil <= LOCK_LEVEL)
190 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
191 
192 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
193 		    void *, handler, caddr_t, arg1, int, r);
194 
195 		result += r;
196 
197 		if (px_check_all_handlers)
198 			continue;
199 		if (result)
200 			break;
201 	}
202 
203 	if (result)
204 		ino_p->ino_claimed |= (1 << pil);
205 
206 	/* Interrupt can only be cleared after all pil levels are handled */
207 	if (pil != ino_p->ino_lopil)
208 		return (DDI_INTR_CLAIMED);
209 
210 	if (!ino_p->ino_claimed) {
211 		if (px_unclaimed_intr_block)
212 			return (px_spurintr(ipil_p));
213 	}
214 
215 	ino_p->ino_unclaimed_intrs = 0;
216 	ino_p->ino_claimed = 0;
217 
218 	/* Clear the pending state */
219 	if (px_lib_intr_setstate(px_p->px_dip,
220 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
221 		return (DDI_INTR_UNCLAIMED);
222 
223 	return (DDI_INTR_CLAIMED);
224 }
225 
226 /*
227  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
228  *
229  * This routine is used as wrapper around interrupt handlers installed by child
230  * device drivers.  This routine invokes the driver interrupt handlers and
231  * examines the return codes.
232  *
233  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
234  * least one handler claims the interrupt then the counter is halved and the
235  * interrupt state machine is idled. If no handler claims the interrupt then
236  * the counter is incremented by one and the state machine is idled.
237  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
238  * then the interrupt state machine is not idled thus preventing any further
239  * interrupts on that ino. The state machine will only be idled again if a
240  * handler is subsequently added or removed.
241  *
242  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
243  * DDI_INTR_UNCLAIMED otherwise.
244  */
245 uint_t
246 px_msiq_intr(caddr_t arg)
247 {
248 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
249 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
250 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
251 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
252 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
253 	dev_info_t	*dip = px_p->px_dip;
254 	ushort_t	pil = ipil_p->ipil_pil;
255 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
256 	msiqhead_t	*curr_head_p;
257 	msiqtail_t	curr_tail_index;
258 	msgcode_t	msg_code;
259 	px_ih_t		*ih_p;
260 	uint_t		ret = DDI_INTR_UNCLAIMED;
261 	int		i, j;
262 
263 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
264 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
265 	    ipil_p->ipil_pil, ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
266 
267 	/*
268 	 * The px_msiq_intr() handles multiple interrupt priorities and it
269 	 * will set msiq->msiq_rec2process to the number of MSIQ records to
270 	 * process while handling the highest priority interrupt. Subsequent
271 	 * lower priority interrupts will just process any unprocessed MSIQ
272 	 * records or will just return immediately.
273 	 */
274 	if (msiq_p->msiq_recs2process == 0) {
275 		ASSERT(ino_p->ino_ipil_cntr == 0);
276 		ino_p->ino_ipil_cntr = ino_p->ino_ipil_size;
277 
278 		/* Read current MSIQ tail index */
279 		px_lib_msiq_gettail(dip, msiq_p->msiq_id, &curr_tail_index);
280 		msiq_p->msiq_new_head_index = msiq_p->msiq_curr_head_index;
281 
282 		if (curr_tail_index < msiq_p->msiq_curr_head_index)
283 			curr_tail_index += msiq_state_p->msiq_rec_cnt;
284 
285 		msiq_p->msiq_recs2process = curr_tail_index -
286 		    msiq_p->msiq_curr_head_index;
287 	}
288 
289 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
290 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
291 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
292 
293 	/* If all MSIQ records are already processed, just return immediately */
294 	if ((msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index)
295 	    == msiq_p->msiq_recs2process)
296 		goto intr_done;
297 
298 	curr_head_p = (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p +
299 	    (msiq_p->msiq_curr_head_index * sizeof (msiq_rec_t)));
300 
301 	/*
302 	 * Calculate the number of recs to process by taking the difference
303 	 * between the head and tail pointers. For all records we always
304 	 * verify that we have a valid record type before we do any processing.
305 	 * If triggered, we should always have at least one valid record.
306 	 */
307 	for (i = 0; i < msiq_p->msiq_recs2process; i++) {
308 		msiq_rec_type_t rec_type;
309 
310 		/* Read next MSIQ record */
311 		px_lib_get_msiq_rec(dip, curr_head_p, msiq_rec_p);
312 
313 		rec_type = msiq_rec_p->msiq_rec_type;
314 
315 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
316 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
317 		    rec_type, msiq_rec_p->msiq_rec_rid);
318 
319 		if (!rec_type)
320 			goto next_rec;
321 
322 		/* Check MSIQ record type */
323 		switch (rec_type) {
324 		case MSG_REC:
325 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
326 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
327 			    "record, msg type 0x%x\n", msg_code);
328 			break;
329 		case MSI32_REC:
330 		case MSI64_REC:
331 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
332 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
333 			    "msi 0x%x\n", msg_code);
334 			break;
335 		default:
336 			msg_code = 0;
337 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
338 			    "record type is not supported",
339 			    ddi_driver_name(dip), ddi_get_instance(dip),
340 			    rec_type);
341 
342 			goto next_rec;
343 		}
344 
345 		/*
346 		 * Scan through px_ih_t linked list, searching for the
347 		 * right px_ih_t, matching MSIQ record data.
348 		 */
349 		for (j = 0, ih_p = ipil_p->ipil_ih_start;
350 		    ih_p && (j < ipil_p->ipil_ih_size) &&
351 		    ((ih_p->ih_msg_code != msg_code) ||
352 		    (ih_p->ih_rec_type != rec_type));
353 		    ih_p = ih_p->ih_next, j++)
354 			;
355 
356 		if ((ih_p->ih_msg_code == msg_code) &&
357 		    (ih_p->ih_rec_type == rec_type)) {
358 			dev_info_t *ih_dip = ih_p->ih_dip;
359 			uint_t (*handler)() = ih_p->ih_handler;
360 			caddr_t arg1 = ih_p->ih_handler_arg1;
361 			caddr_t arg2 = ih_p->ih_handler_arg2;
362 
363 			DBG(DBG_MSIQ_INTR, ih_dip, "px_msiq_intr: ino=%x "
364 			    "data=%x handler=%p arg1 =%p arg2=%p\n",
365 			    ino_p->ino_ino, msg_code, handler, arg1, arg2);
366 
367 			DTRACE_PROBE4(interrupt__start, dev_info_t, ih_dip,
368 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
369 
370 			ih_p->ih_intr_flags = PX_INTR_PENDING;
371 
372 			/*
373 			 * Special case for PCIE Error Messages.
374 			 * The current frame work doesn't fit PCIE Err Msgs
375 			 * This should be fixed when PCIE MESSAGES as a whole
376 			 * is architected correctly.
377 			 */
378 			if ((rec_type == MSG_REC) &&
379 			    ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
380 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
381 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL))) {
382 				ret = px_err_fabric_intr(px_p, msg_code,
383 				    msiq_rec_p->msiq_rec_rid);
384 			} else {
385 				/* Clear MSI state */
386 				px_lib_msi_setstate(dip, (msinum_t)msg_code,
387 				    PCI_MSI_STATE_IDLE);
388 
389 				ret = (*handler)(arg1, arg2);
390 			}
391 
392 			/*
393 			 * Account for time used by this interrupt. Protect
394 			 * against conflicting writes to ih_ticks from
395 			 * ib_intr_dist_all() by using atomic ops.
396 			 */
397 
398 			if (pil <= LOCK_LEVEL)
399 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
400 
401 			DTRACE_PROBE4(interrupt__complete, dev_info_t, ih_dip,
402 			    void *, handler, caddr_t, arg1, int, ret);
403 
404 			/* clear handler status flags */
405 			ih_p->ih_intr_flags = PX_INTR_IDLE;
406 
407 			msiq_p->msiq_new_head_index++;
408 			px_lib_clr_msiq_rec(ih_dip, curr_head_p);
409 		} else {
410 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: "
411 			    "No matching MSIQ record found\n");
412 		}
413 next_rec:
414 		/* Get the pointer next EQ record */
415 		curr_head_p = (msiqhead_t *)
416 		    ((caddr_t)curr_head_p + sizeof (msiq_rec_t));
417 
418 		/* Check for overflow condition */
419 		if (curr_head_p >= (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p
420 		    + (msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t))))
421 			curr_head_p = (msiqhead_t *)msiq_p->msiq_base_p;
422 	}
423 
424 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
425 	    (msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index));
426 
427 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
428 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
429 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
430 
431 	/* ino_claimed used just for debugging purpose */
432 	if (ret)
433 		ino_p->ino_claimed |= (1 << pil);
434 
435 intr_done:
436 	/* Interrupt can only be cleared after all pil levels are handled */
437 	if (--ino_p->ino_ipil_cntr != 0)
438 		return (DDI_INTR_CLAIMED);
439 
440 	if (msiq_p->msiq_new_head_index <= msiq_p->msiq_curr_head_index)  {
441 		if (px_unclaimed_intr_block)
442 			return (px_spurintr(ipil_p));
443 	}
444 
445 	/*  Update MSIQ head index with no of MSIQ records processed */
446 	if (msiq_p->msiq_new_head_index >= msiq_state_p->msiq_rec_cnt)
447 		msiq_p->msiq_new_head_index -= msiq_state_p->msiq_rec_cnt;
448 
449 	msiq_p->msiq_curr_head_index = msiq_p->msiq_new_head_index;
450 	px_lib_msiq_sethead(dip, msiq_p->msiq_id, msiq_p->msiq_new_head_index);
451 
452 	msiq_p->msiq_new_head_index = 0;
453 	msiq_p->msiq_recs2process = 0;
454 	ino_p->ino_claimed = 0;
455 
456 	/* Clear the pending state */
457 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
458 	    INTR_IDLE_STATE) != DDI_SUCCESS)
459 		return (DDI_INTR_UNCLAIMED);
460 
461 	return (DDI_INTR_CLAIMED);
462 }
463 
464 dev_info_t *
465 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
466 {
467 	dev_info_t	*cdip = rdip;
468 
469 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
470 		;
471 
472 	return (cdip);
473 }
474 
475 /* ARGSUSED */
476 int
477 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
478     ddi_intr_handle_impl_t *hdlp, void *result)
479 {
480 	px_t	*px_p = DIP_TO_STATE(dip);
481 	int	ret = DDI_SUCCESS;
482 
483 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
484 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
485 
486 	switch (intr_op) {
487 	case DDI_INTROP_GETCAP:
488 		ret = pci_intx_get_cap(rdip, (int *)result);
489 		break;
490 	case DDI_INTROP_SETCAP:
491 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
492 		ret = DDI_ENOTSUP;
493 		break;
494 	case DDI_INTROP_ALLOC:
495 		*(int *)result = hdlp->ih_scratch1;
496 		break;
497 	case DDI_INTROP_FREE:
498 		break;
499 	case DDI_INTROP_GETPRI:
500 		*(int *)result = hdlp->ih_pri ?
501 		    hdlp->ih_pri : pci_class_to_pil(rdip);
502 		break;
503 	case DDI_INTROP_SETPRI:
504 		break;
505 	case DDI_INTROP_ADDISR:
506 		ret = px_add_intx_intr(dip, rdip, hdlp);
507 		break;
508 	case DDI_INTROP_REMISR:
509 		ret = px_rem_intx_intr(dip, rdip, hdlp);
510 		break;
511 	case DDI_INTROP_GETTARGET:
512 		ret = px_ib_get_intr_target(px_p, hdlp->ih_vector,
513 		    (cpuid_t *)result);
514 		break;
515 	case DDI_INTROP_SETTARGET:
516 		ret = DDI_ENOTSUP;
517 		break;
518 	case DDI_INTROP_ENABLE:
519 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
520 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_ENABLE, 0, 0);
521 		break;
522 	case DDI_INTROP_DISABLE:
523 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
524 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_DISABLE, 0, 0);
525 		break;
526 	case DDI_INTROP_SETMASK:
527 		ret = pci_intx_set_mask(rdip);
528 		break;
529 	case DDI_INTROP_CLRMASK:
530 		ret = pci_intx_clr_mask(rdip);
531 		break;
532 	case DDI_INTROP_GETPENDING:
533 		ret = pci_intx_get_pending(rdip, (int *)result);
534 		break;
535 	case DDI_INTROP_NINTRS:
536 	case DDI_INTROP_NAVAIL:
537 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
538 		break;
539 	default:
540 		ret = DDI_ENOTSUP;
541 		break;
542 	}
543 
544 	return (ret);
545 }
546 
547 /* ARGSUSED */
548 int
549 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
550     ddi_intr_handle_impl_t *hdlp, void *result)
551 {
552 	px_t			*px_p = DIP_TO_STATE(dip);
553 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
554 	msiq_rec_type_t		msiq_rec_type;
555 	msi_type_t		msi_type;
556 	uint64_t		msi_addr;
557 	msinum_t		msi_num;
558 	msiqid_t		msiq_id;
559 	uint_t			nintrs;
560 	int			ret = DDI_SUCCESS;
561 
562 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
563 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
564 
565 	/* Check for MSI64 support */
566 	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
567 		msiq_rec_type = MSI64_REC;
568 		msi_type = MSI64_TYPE;
569 		msi_addr = msi_state_p->msi_addr64;
570 	} else {
571 		msiq_rec_type = MSI32_REC;
572 		msi_type = MSI32_TYPE;
573 		msi_addr = msi_state_p->msi_addr32;
574 	}
575 
576 	(void) px_msi_get_msinum(px_p, hdlp->ih_dip,
577 	    (hdlp->ih_flags & DDI_INTR_MSIX_DUP) ? hdlp->ih_main->ih_inum :
578 	    hdlp->ih_inum, &msi_num);
579 
580 	switch (intr_op) {
581 	case DDI_INTROP_GETCAP:
582 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
583 		break;
584 	case DDI_INTROP_SETCAP:
585 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
586 		ret = DDI_ENOTSUP;
587 		break;
588 	case DDI_INTROP_ALLOC:
589 		/*
590 		 * We need to restrict this allocation in future
591 		 * based on Resource Management policies.
592 		 */
593 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_type,
594 		    hdlp->ih_inum, hdlp->ih_scratch1,
595 		    (uintptr_t)hdlp->ih_scratch2,
596 		    (int *)result)) != DDI_SUCCESS) {
597 			DBG(DBG_INTROPS, dip, "px_msix_ops: allocation "
598 			    "failed, rdip 0x%p type 0x%d inum 0x%x "
599 			    "count 0x%x\n", rdip, hdlp->ih_type, hdlp->ih_inum,
600 			    hdlp->ih_scratch1);
601 
602 			return (ret);
603 		}
604 
605 		if ((hdlp->ih_type == DDI_INTR_TYPE_MSIX) &&
606 		    (i_ddi_get_msix(rdip) == NULL)) {
607 			ddi_intr_msix_t		*msix_p;
608 
609 			if (msix_p = pci_msix_init(rdip)) {
610 				i_ddi_set_msix(rdip, msix_p);
611 				break;
612 			}
613 
614 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI-X allocation "
615 			    "failed, rdip 0x%p inum 0x%x\n", rdip,
616 			    hdlp->ih_inum);
617 
618 			(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
619 			    hdlp->ih_scratch1);
620 
621 			return (DDI_FAILURE);
622 		}
623 
624 		break;
625 	case DDI_INTROP_FREE:
626 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
627 
628 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
629 			goto msi_free;
630 
631 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
632 			break;
633 
634 		if (((i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1) == 0) &&
635 		    (i_ddi_get_msix(rdip))) {
636 			pci_msix_fini(i_ddi_get_msix(rdip));
637 			i_ddi_set_msix(rdip, NULL);
638 		}
639 msi_free:
640 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
641 		    hdlp->ih_scratch1);
642 		break;
643 	case DDI_INTROP_GETPRI:
644 		*(int *)result = hdlp->ih_pri ?
645 		    hdlp->ih_pri : pci_class_to_pil(rdip);
646 		break;
647 	case DDI_INTROP_SETPRI:
648 		break;
649 	case DDI_INTROP_ADDISR:
650 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
651 		    msiq_rec_type, msi_num, -1, &msiq_id)) != DDI_SUCCESS) {
652 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
653 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
654 			return (ret);
655 		}
656 
657 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
658 
659 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
660 		    msiq_id, msi_type)) != DDI_SUCCESS) {
661 			(void) px_rem_msiq_intr(dip, rdip,
662 			    hdlp, msiq_rec_type, msi_num, msiq_id);
663 			return (ret);
664 		}
665 
666 		if ((ret = px_lib_msi_setstate(dip, msi_num,
667 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
668 			(void) px_rem_msiq_intr(dip, rdip,
669 			    hdlp, msiq_rec_type, msi_num, msiq_id);
670 			return (ret);
671 		}
672 
673 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
674 		    PCI_MSI_VALID)) != DDI_SUCCESS)
675 			return (ret);
676 
677 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
678 		    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
679 		    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num);
680 
681 		break;
682 	case DDI_INTROP_DUPVEC:
683 		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
684 		    "new_vector: %x\n", hdlp->ih_inum, hdlp->ih_scratch1);
685 
686 		ret = pci_msix_dup(hdlp->ih_dip, hdlp->ih_inum,
687 		    hdlp->ih_scratch1);
688 		break;
689 	case DDI_INTROP_REMISR:
690 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
691 		    &msiq_id)) != DDI_SUCCESS)
692 			return (ret);
693 
694 		if ((ret = px_ib_update_intr_state(px_p, rdip,
695 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
696 		    hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type,
697 		    msi_num)) != DDI_SUCCESS)
698 			return (ret);
699 
700 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
701 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
702 			return (ret);
703 
704 		if ((ret = px_lib_msi_setstate(dip, msi_num,
705 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
706 			return (ret);
707 
708 		ret = px_rem_msiq_intr(dip, rdip,
709 		    hdlp, msiq_rec_type, msi_num, msiq_id);
710 
711 		break;
712 	case DDI_INTROP_GETTARGET:
713 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
714 		    &msiq_id)) != DDI_SUCCESS)
715 			return (ret);
716 
717 		ret = px_ib_get_intr_target(px_p,
718 		    px_msiqid_to_devino(px_p, msiq_id), (cpuid_t *)result);
719 		break;
720 	case DDI_INTROP_SETTARGET:
721 		ret = px_ib_set_msix_target(px_p, hdlp, msi_num,
722 		    *(cpuid_t *)result);
723 		break;
724 	case DDI_INTROP_ENABLE:
725 		/*
726 		 * For MSI, just clear the mask bit and return if curr_nenables
727 		 * is > 1. For MSI-X, program MSI address and data for every
728 		 * MSI-X vector including dup vectors irrespective of current
729 		 * curr_nenables value.
730 		 */
731 		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) ||
732 		    (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) {
733 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
734 
735 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
736 			    nintrs, hdlp->ih_inum, msi_addr,
737 			    hdlp->ih_type == DDI_INTR_TYPE_MSIX ?
738 			    msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
739 				return (ret);
740 
741 			if (i_ddi_intr_get_current_nenables(rdip) < 1) {
742 				if ((ret = pci_msi_enable_mode(rdip,
743 				    hdlp->ih_type)) != DDI_SUCCESS)
744 					return (ret);
745 			}
746 		}
747 
748 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
749 		    hdlp->ih_inum)) != DDI_SUCCESS)
750 			return (ret);
751 
752 		break;
753 	case DDI_INTROP_DISABLE:
754 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
755 		    hdlp->ih_inum)) != DDI_SUCCESS)
756 			return (ret);
757 
758 		/*
759 		 * curr_nenables will be greater than 1 if rdip is using
760 		 * MSI-X and also, if it is using DUP interface. If this
761 		 * curr_enables is > 1, return after setting the mask bit.
762 		 */
763 		if (i_ddi_intr_get_current_nenables(rdip) > 1)
764 			return (DDI_SUCCESS);
765 
766 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type))
767 		    != DDI_SUCCESS)
768 			return (ret);
769 
770 		break;
771 	case DDI_INTROP_BLOCKENABLE:
772 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
773 
774 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
775 		    nintrs, hdlp->ih_inum, msi_addr,
776 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
777 			return (ret);
778 
779 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type);
780 		break;
781 	case DDI_INTROP_BLOCKDISABLE:
782 		ret = pci_msi_disable_mode(rdip, hdlp->ih_type);
783 		break;
784 	case DDI_INTROP_SETMASK:
785 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
786 		break;
787 	case DDI_INTROP_CLRMASK:
788 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
789 		break;
790 	case DDI_INTROP_GETPENDING:
791 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
792 		    hdlp->ih_inum, (int *)result);
793 		break;
794 	case DDI_INTROP_NINTRS:
795 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
796 		break;
797 	case DDI_INTROP_NAVAIL:
798 		/* XXX - a new interface may be needed */
799 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
800 		break;
801 	case DDI_INTROP_GETPOOL:
802 		if (msi_state_p->msi_pool_p == NULL) {
803 			*(ddi_irm_pool_t **)result = NULL;
804 			return (DDI_ENOTSUP);
805 		}
806 		*(ddi_irm_pool_t **)result = msi_state_p->msi_pool_p;
807 		ret = DDI_SUCCESS;
808 		break;
809 	default:
810 		ret = DDI_ENOTSUP;
811 		break;
812 	}
813 
814 	return (ret);
815 }
816 
817 static struct {
818 	kstat_named_t pxintr_ks_name;
819 	kstat_named_t pxintr_ks_type;
820 	kstat_named_t pxintr_ks_cpu;
821 	kstat_named_t pxintr_ks_pil;
822 	kstat_named_t pxintr_ks_time;
823 	kstat_named_t pxintr_ks_ino;
824 	kstat_named_t pxintr_ks_cookie;
825 	kstat_named_t pxintr_ks_devpath;
826 	kstat_named_t pxintr_ks_buspath;
827 } pxintr_ks_template = {
828 	{ "name",	KSTAT_DATA_CHAR },
829 	{ "type",	KSTAT_DATA_CHAR },
830 	{ "cpu",	KSTAT_DATA_UINT64 },
831 	{ "pil",	KSTAT_DATA_UINT64 },
832 	{ "time",	KSTAT_DATA_UINT64 },
833 	{ "ino",	KSTAT_DATA_UINT64 },
834 	{ "cookie",	KSTAT_DATA_UINT64 },
835 	{ "devpath",	KSTAT_DATA_STRING },
836 	{ "buspath",	KSTAT_DATA_STRING },
837 };
838 
839 static uint32_t pxintr_ks_instance;
840 static char ih_devpath[MAXPATHLEN];
841 static char ih_buspath[MAXPATHLEN];
842 kmutex_t pxintr_ks_template_lock;
843 
844 int
845 px_ks_update(kstat_t *ksp, int rw)
846 {
847 	px_ih_t *ih_p = ksp->ks_private;
848 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
849 	px_ino_pil_t *ipil_p = ih_p->ih_ipil_p;
850 	px_ino_t *ino_p = ipil_p->ipil_ino_p;
851 	px_t *px_p = ino_p->ino_ib_p->ib_px_p;
852 	devino_t ino;
853 	sysino_t sysino;
854 
855 	ino = ino_p->ino_ino;
856 	if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino) !=
857 	    DDI_SUCCESS) {
858 		cmn_err(CE_WARN, "px_ks_update: px_lib_intr_devino_to_sysino "
859 		    "failed");
860 	}
861 
862 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
863 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
864 	    ddi_get_instance(ih_p->ih_dip));
865 
866 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
867 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
868 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
869 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
870 
871 	if (ih_p->ih_intr_state == PX_INTR_STATE_ENABLE) {
872 
873 		switch (i_ddi_intr_get_current_type(ih_p->ih_dip)) {
874 		case DDI_INTR_TYPE_MSI:
875 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
876 			    "msi");
877 			break;
878 		case DDI_INTR_TYPE_MSIX:
879 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
880 			    "msix");
881 			break;
882 		default:
883 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
884 			    "fixed");
885 			break;
886 		}
887 
888 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ino_p->ino_cpuid;
889 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = ipil_p->ipil_pil;
890 		pxintr_ks_template.pxintr_ks_time.value.ui64 = ih_p->ih_nsec +
891 		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
892 		    ino_p->ino_cpuid);
893 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
894 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
895 	} else {
896 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
897 		    "disabled");
898 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = 0;
899 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = 0;
900 		pxintr_ks_template.pxintr_ks_time.value.ui64 = 0;
901 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = 0;
902 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = 0;
903 	}
904 	return (0);
905 }
906 
907 void
908 px_create_intr_kstats(px_ih_t *ih_p)
909 {
910 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
911 
912 	ASSERT(ih_p->ih_ksp == NULL);
913 
914 	/*
915 	 * Create pci_intrs::: kstats for all ih types except messages,
916 	 * which represent unusual conditions and don't need to be tracked.
917 	 */
918 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
919 		ih_p->ih_ksp = kstat_create("pci_intrs",
920 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
921 		    "interrupts", KSTAT_TYPE_NAMED,
922 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
923 		    KSTAT_FLAG_VIRTUAL);
924 	}
925 	if (ih_p->ih_ksp != NULL) {
926 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
927 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
928 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
929 		ih_p->ih_ksp->ks_private = ih_p;
930 		ih_p->ih_ksp->ks_update = px_ks_update;
931 	}
932 }
933 
934 /*
935  * px_add_intx_intr:
936  *
937  * This function is called to register INTx and legacy hardware
938  * interrupt pins interrupts.
939  */
940 int
941 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
942     ddi_intr_handle_impl_t *hdlp)
943 {
944 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
945 	px_ib_t		*ib_p = px_p->px_ib_p;
946 	devino_t	ino;
947 	px_ih_t		*ih_p;
948 	px_ino_t	*ino_p;
949 	px_ino_pil_t	*ipil_p, *ipil_list;
950 	int32_t		weight;
951 	int		ret = DDI_SUCCESS;
952 	cpuid_t		curr_cpu;
953 
954 	ino = hdlp->ih_vector;
955 
956 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
957 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
958 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
959 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
960 
961 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
962 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
963 
964 	mutex_enter(&ib_p->ib_ino_lst_mutex);
965 
966 	ino_p = px_ib_locate_ino(ib_p, ino);
967 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
968 
969 	if (hdlp->ih_pri == 0)
970 		hdlp->ih_pri = pci_class_to_pil(rdip);
971 
972 	/* Sharing the INO using a PIL that already exists */
973 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
974 		if (px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0)) {
975 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
976 			    "dup intr #%d\n", hdlp->ih_inum);
977 
978 			ret = DDI_FAILURE;
979 			goto fail1;
980 		}
981 
982 		/* Save mondo value in hdlp */
983 		hdlp->ih_vector = ino_p->ino_sysino;
984 
985 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
986 		    ih_p)) != DDI_SUCCESS)
987 			goto fail1;
988 
989 		goto ino_done;
990 	}
991 
992 	/* Sharing the INO using a new PIL */
993 	if (ipil_list != NULL) {
994 		/*
995 		 * disable INO to avoid lopil race condition with
996 		 * px_intx_intr
997 		 */
998 
999 		if ((ret = px_lib_intr_gettarget(dip, ino_p->ino_sysino,
1000 		    &curr_cpu)) != DDI_SUCCESS) {
1001 			DBG(DBG_IB, dip,
1002 			    "px_add_intx_intr px_intr_gettarget() failed\n");
1003 
1004 			goto fail1;
1005 		}
1006 
1007 		/* Wait on pending interrupt */
1008 		if ((ret = px_ib_intr_pend(dip, ino_p->ino_sysino)) !=
1009 		    DDI_SUCCESS) {
1010 			cmn_err(CE_WARN, "%s%d: px_add_intx_intr: "
1011 			    "pending sysino 0x%lx(ino 0x%x) timeout",
1012 			    ddi_driver_name(dip), ddi_get_instance(dip),
1013 			    ino_p->ino_sysino, ino_p->ino_ino);
1014 			goto fail1;
1015 		}
1016 	}
1017 
1018 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1019 	ino_p = ipil_p->ipil_ino_p;
1020 
1021 	/* Save mondo value in hdlp */
1022 	hdlp->ih_vector = ino_p->ino_sysino;
1023 
1024 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
1025 	    hdlp->ih_pri, hdlp->ih_vector);
1026 
1027 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1028 	    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ipil_p, NULL);
1029 
1030 	ret = i_ddi_add_ivintr(hdlp);
1031 
1032 	/*
1033 	 * Restore original interrupt handler
1034 	 * and arguments in interrupt handle.
1035 	 */
1036 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1037 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1038 
1039 	if (ret != DDI_SUCCESS)
1040 		goto fail2;
1041 
1042 	/* Save the pil for this ino */
1043 	ipil_p->ipil_pil = hdlp->ih_pri;
1044 
1045 	/* Select cpu, saving it for sharing and removal */
1046 	if (ipil_list == NULL) {
1047 		if (ino_p->ino_cpuid == -1)
1048 			ino_p->ino_cpuid = intr_dist_cpuid();
1049 
1050 		/* Enable interrupt */
1051 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1052 	} else {
1053 		/* Re-enable interrupt */
1054 		PX_INTR_ENABLE(dip, ino_p->ino_sysino, curr_cpu);
1055 	}
1056 
1057 ino_done:
1058 	hdlp->ih_target = ino_p->ino_cpuid;
1059 
1060 	/* Add weight to the cpu that we are already targeting */
1061 	weight = pci_class_to_intr_weight(rdip);
1062 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1063 
1064 	ih_p->ih_ipil_p = ipil_p;
1065 	px_create_intr_kstats(ih_p);
1066 	if (ih_p->ih_ksp)
1067 		kstat_install(ih_p->ih_ksp);
1068 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1069 
1070 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1071 	    ino_p->ino_sysino, hdlp->ih_pri);
1072 
1073 	return (ret);
1074 fail2:
1075 	px_ib_delete_ino_pil(ib_p, ipil_p);
1076 fail1:
1077 	if (ih_p->ih_config_handle)
1078 		pci_config_teardown(&ih_p->ih_config_handle);
1079 
1080 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1081 	kmem_free(ih_p, sizeof (px_ih_t));
1082 
1083 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1084 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1085 
1086 	return (ret);
1087 }
1088 
1089 /*
1090  * px_rem_intx_intr:
1091  *
1092  * This function is called to unregister INTx and legacy hardware
1093  * interrupt pins interrupts.
1094  */
1095 int
1096 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1097     ddi_intr_handle_impl_t *hdlp)
1098 {
1099 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1100 	px_ib_t		*ib_p = px_p->px_ib_p;
1101 	devino_t	ino;
1102 	cpuid_t		curr_cpu;
1103 	px_ino_t	*ino_p;
1104 	px_ino_pil_t	*ipil_p;
1105 	px_ih_t		*ih_p;
1106 	int		ret = DDI_SUCCESS;
1107 
1108 	ino = hdlp->ih_vector;
1109 
1110 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1111 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1112 
1113 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1114 
1115 	ino_p = px_ib_locate_ino(ib_p, ino);
1116 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1117 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0);
1118 
1119 	/* Get the current cpu */
1120 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1121 	    &curr_cpu)) != DDI_SUCCESS)
1122 		goto fail;
1123 
1124 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1125 		goto fail;
1126 
1127 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1128 
1129 	if (ipil_p->ipil_ih_size == 0) {
1130 		hdlp->ih_vector = ino_p->ino_sysino;
1131 		i_ddi_rem_ivintr(hdlp);
1132 
1133 		px_ib_delete_ino_pil(ib_p, ipil_p);
1134 	}
1135 
1136 	if (ino_p->ino_ipil_size == 0) {
1137 		kmem_free(ino_p, sizeof (px_ino_t));
1138 	} else {
1139 		/* Re-enable interrupt only if mapping register still shared */
1140 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1141 	}
1142 
1143 fail:
1144 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1145 	return (ret);
1146 }
1147 
1148 /*
1149  * px_add_msiq_intr:
1150  *
1151  * This function is called to register MSI/Xs and PCIe message interrupts.
1152  */
1153 int
1154 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1155     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1156     msgcode_t msg_code, cpuid_t cpu_id, msiqid_t *msiq_id_p)
1157 {
1158 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1159 	px_ib_t		*ib_p = px_p->px_ib_p;
1160 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1161 	devino_t	ino;
1162 	px_ih_t		*ih_p;
1163 	px_ino_t	*ino_p;
1164 	px_ino_pil_t	*ipil_p, *ipil_list;
1165 	int32_t		weight;
1166 	int		ret = DDI_SUCCESS;
1167 
1168 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=0x%x "
1169 	    "arg1=0x%x arg2=0x%x cpu=0x%x\n", ddi_driver_name(rdip),
1170 	    ddi_get_instance(rdip), hdlp->ih_cb_func, hdlp->ih_cb_arg1,
1171 	    hdlp->ih_cb_arg2, cpu_id);
1172 
1173 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1174 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1175 
1176 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1177 
1178 	ret = (cpu_id == -1) ? px_msiq_alloc(px_p, rec_type, msg_code,
1179 	    msiq_id_p) : px_msiq_alloc_based_on_cpuid(px_p, rec_type,
1180 	    cpu_id, msiq_id_p);
1181 
1182 	if (ret != DDI_SUCCESS) {
1183 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1184 		    "msiq allocation failed\n");
1185 		goto fail;
1186 	}
1187 
1188 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1189 
1190 	ino_p = px_ib_locate_ino(ib_p, ino);
1191 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
1192 
1193 	if (hdlp->ih_pri == 0)
1194 		hdlp->ih_pri = pci_class_to_pil(rdip);
1195 
1196 	/* Sharing ino */
1197 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
1198 		if (px_ib_intr_locate_ih(ipil_p, rdip,
1199 		    hdlp->ih_inum, rec_type, msg_code)) {
1200 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1201 			    "dup intr #%d\n", hdlp->ih_inum);
1202 
1203 			ret = DDI_FAILURE;
1204 			goto fail1;
1205 		}
1206 
1207 		/* Save mondo value in hdlp */
1208 		hdlp->ih_vector = ino_p->ino_sysino;
1209 
1210 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
1211 		    ih_p)) != DDI_SUCCESS)
1212 			goto fail1;
1213 
1214 		goto ino_done;
1215 	}
1216 
1217 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1218 	ino_p = ipil_p->ipil_ino_p;
1219 
1220 	ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1221 	    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1222 
1223 	/* Save mondo value in hdlp */
1224 	hdlp->ih_vector = ino_p->ino_sysino;
1225 
1226 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1227 	    hdlp->ih_pri, hdlp->ih_vector);
1228 
1229 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1230 	    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ipil_p, NULL);
1231 
1232 	ret = i_ddi_add_ivintr(hdlp);
1233 
1234 	/*
1235 	 * Restore original interrupt handler
1236 	 * and arguments in interrupt handle.
1237 	 */
1238 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1239 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1240 
1241 	if (ret != DDI_SUCCESS)
1242 		goto fail2;
1243 
1244 	/* Save the pil for this ino */
1245 	ipil_p->ipil_pil = hdlp->ih_pri;
1246 
1247 	/* Select cpu, saving it for sharing and removal */
1248 	if (ipil_list == NULL) {
1249 		/* Enable MSIQ */
1250 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1251 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1252 
1253 		if (ino_p->ino_cpuid == -1)
1254 			ino_p->ino_cpuid = intr_dist_cpuid();
1255 
1256 		/* Enable interrupt */
1257 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1258 	}
1259 
1260 ino_done:
1261 	hdlp->ih_target = ino_p->ino_cpuid;
1262 
1263 	/* Add weight to the cpu that we are already targeting */
1264 	weight = pci_class_to_intr_weight(rdip);
1265 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1266 
1267 	ih_p->ih_ipil_p = ipil_p;
1268 	px_create_intr_kstats(ih_p);
1269 	if (ih_p->ih_ksp)
1270 		kstat_install(ih_p->ih_ksp);
1271 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1272 
1273 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1274 	    ino_p->ino_sysino, hdlp->ih_pri);
1275 
1276 	return (ret);
1277 fail2:
1278 	px_ib_delete_ino_pil(ib_p, ipil_p);
1279 fail1:
1280 	(void) px_msiq_free(px_p, *msiq_id_p);
1281 fail:
1282 	if (ih_p->ih_config_handle)
1283 		pci_config_teardown(&ih_p->ih_config_handle);
1284 
1285 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1286 	kmem_free(ih_p, sizeof (px_ih_t));
1287 
1288 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1289 	    ino_p->ino_sysino, hdlp->ih_pri);
1290 
1291 	return (ret);
1292 }
1293 
1294 /*
1295  * px_rem_msiq_intr:
1296  *
1297  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1298  */
1299 int
1300 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1301     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1302     msgcode_t msg_code, msiqid_t msiq_id)
1303 {
1304 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1305 	px_ib_t		*ib_p = px_p->px_ib_p;
1306 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1307 	cpuid_t		curr_cpu;
1308 	px_ino_t	*ino_p;
1309 	px_ino_pil_t	*ipil_p;
1310 	px_ih_t		*ih_p;
1311 	int		ret = DDI_SUCCESS;
1312 
1313 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1314 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1315 
1316 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1317 
1318 	ino_p = px_ib_locate_ino(ib_p, ino);
1319 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1320 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, rec_type,
1321 	    msg_code);
1322 
1323 	/* Get the current cpu */
1324 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1325 	    &curr_cpu)) != DDI_SUCCESS)
1326 		goto fail;
1327 
1328 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1329 		goto fail;
1330 
1331 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1332 
1333 	if (ipil_p->ipil_ih_size == 0) {
1334 		hdlp->ih_vector = ino_p->ino_sysino;
1335 		i_ddi_rem_ivintr(hdlp);
1336 
1337 		px_ib_delete_ino_pil(ib_p, ipil_p);
1338 
1339 		if (ino_p->ino_ipil_size == 0)
1340 			px_lib_msiq_setvalid(dip,
1341 			    px_devino_to_msiqid(px_p, ino), PCI_MSIQ_INVALID);
1342 	}
1343 
1344 	(void) px_msiq_free(px_p, msiq_id);
1345 
1346 	if (ino_p->ino_ipil_size) {
1347 		/* Re-enable interrupt only if mapping register still shared */
1348 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1349 	}
1350 
1351 fail:
1352 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1353 	return (ret);
1354 }
1355