xref: /titanic_50/usr/src/uts/sun4/io/px/px_intr.c (revision 5bdc182b8368dd35a75d7d1f73639ea9920d03d8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * PX nexus interrupt handling:
28  *	PX device interrupt handler wrapper
29  *	PIL lookup routine
30  *	PX device interrupt related initchild code
31  */
32 
33 #include <sys/types.h>
34 #include <sys/kmem.h>
35 #include <sys/async.h>
36 #include <sys/spl.h>
37 #include <sys/sunddi.h>
38 #include <sys/fm/protocol.h>
39 #include <sys/fm/util.h>
40 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
41 #include <sys/ddi_impldefs.h>
42 #include <sys/sdt.h>
43 #include <sys/atomic.h>
44 #include "px_obj.h"
45 #include <sys/ontrap.h>
46 #include <sys/membar.h>
47 #include <sys/clock.h>
48 
49 /*
50  * interrupt jabber:
51  *
52  * When an interrupt line is jabbering, every time the state machine for the
53  * associated ino is idled, a new mondo will be sent and the ino will go into
54  * the pending state again. The mondo will cause a new call to
55  * px_intr_wrapper() which normally idles the ino's state machine which would
56  * precipitate another trip round the loop.
57  *
58  * The loop can be broken by preventing the ino's state machine from being
59  * idled when an interrupt line is jabbering. See the comment at the
60  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
61  * protection' code does this.
62  */
63 
64 /*LINTLIBRARY*/
65 
66 /*
67  * If the unclaimed interrupt count has reached the limit set by
68  * pci_unclaimed_intr_max within the time limit, then all interrupts
69  * on this ino is blocked by not idling the interrupt state machine.
70  */
71 static int
72 px_spurintr(px_ino_pil_t *ipil_p)
73 {
74 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
75 	px_ih_t		*ih_p;
76 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
77 	char		*err_fmt_str;
78 	boolean_t	blocked = B_FALSE;
79 	int		i;
80 
81 	if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max)
82 		return (DDI_INTR_CLAIMED);
83 
84 	if (!ino_p->ino_unclaimed_intrs)
85 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
86 
87 	ino_p->ino_unclaimed_intrs++;
88 
89 	if (ino_p->ino_unclaimed_intrs <= px_unclaimed_intr_max)
90 		goto clear;
91 
92 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
93 	    > px_spurintr_duration) {
94 		ino_p->ino_unclaimed_intrs = 0;
95 		goto clear;
96 	}
97 	err_fmt_str = "%s%d: ino 0x%x blocked";
98 	blocked = B_TRUE;
99 	goto warn;
100 clear:
101 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
102 warn:
103 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
104 	for (ipil_p = ino_p->ino_ipil_p; ipil_p;
105 	    ipil_p = ipil_p->ipil_next_p) {
106 		for (i = 0, ih_p = ipil_p->ipil_ih_start;
107 		    i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next)
108 			cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
109 			    ih_p->ih_inum);
110 	}
111 	cmn_err(CE_CONT, "!\n");
112 
113 	/* Clear the pending state */
114 	if (blocked == B_FALSE) {
115 		if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
116 		    INTR_IDLE_STATE) != DDI_SUCCESS)
117 			return (DDI_INTR_UNCLAIMED);
118 	}
119 
120 	return (DDI_INTR_CLAIMED);
121 }
122 
123 extern uint64_t intr_get_time(void);
124 
125 /*
126  * px_intx_intr (INTx or legacy interrupt handler)
127  *
128  * This routine is used as wrapper around interrupt handlers installed by child
129  * device drivers.  This routine invokes the driver interrupt handlers and
130  * examines the return codes.
131  *
132  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
133  * least one handler claims the interrupt then the counter is halved and the
134  * interrupt state machine is idled. If no handler claims the interrupt then
135  * the counter is incremented by one and the state machine is idled.
136  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
137  * then the interrupt state machine is not idled thus preventing any further
138  * interrupts on that ino. The state machine will only be idled again if a
139  * handler is subsequently added or removed.
140  *
141  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
142  * DDI_INTR_UNCLAIMED otherwise.
143  */
144 uint_t
145 px_intx_intr(caddr_t arg)
146 {
147 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
148 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
149 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
150 	px_ih_t		*ih_p = ipil_p->ipil_ih_start;
151 	ushort_t	pil = ipil_p->ipil_pil;
152 	uint_t		result = 0, r = DDI_INTR_UNCLAIMED;
153 	int		i;
154 
155 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
156 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
157 	    ino_p->ino_ino, ino_p->ino_sysino, ipil_p->ipil_pil,
158 	    ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
159 
160 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
161 		dev_info_t *dip = ih_p->ih_dip;
162 		uint_t (*handler)() = ih_p->ih_handler;
163 		caddr_t arg1 = ih_p->ih_handler_arg1;
164 		caddr_t arg2 = ih_p->ih_handler_arg2;
165 
166 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
167 			DBG(DBG_INTX_INTR, px_p->px_dip,
168 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
169 			    ddi_driver_name(dip), ddi_get_instance(dip),
170 			    ino_p->ino_ino);
171 
172 			continue;
173 		}
174 
175 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
176 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
177 		    ino_p->ino_ino, handler, arg1, arg2);
178 
179 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
180 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
181 
182 		r = (*handler)(arg1, arg2);
183 
184 		/*
185 		 * Account for time used by this interrupt. Protect against
186 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
187 		 * using atomic ops.
188 		 */
189 
190 		if (pil <= LOCK_LEVEL)
191 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
192 
193 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
194 		    void *, handler, caddr_t, arg1, int, r);
195 
196 		result += r;
197 
198 		if (px_check_all_handlers)
199 			continue;
200 		if (result)
201 			break;
202 	}
203 
204 	if (result)
205 		ino_p->ino_claimed |= (1 << pil);
206 
207 	/* Interrupt can only be cleared after all pil levels are handled */
208 	if (pil != ino_p->ino_lopil)
209 		return (DDI_INTR_CLAIMED);
210 
211 	if (!ino_p->ino_claimed) {
212 		if (px_unclaimed_intr_block)
213 			return (px_spurintr(ipil_p));
214 	}
215 
216 	ino_p->ino_unclaimed_intrs = 0;
217 	ino_p->ino_claimed = 0;
218 
219 	/* Clear the pending state */
220 	if (px_lib_intr_setstate(px_p->px_dip,
221 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
222 		return (DDI_INTR_UNCLAIMED);
223 
224 	return (DDI_INTR_CLAIMED);
225 }
226 
227 /*
228  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
229  *
230  * This routine is used as wrapper around interrupt handlers installed by child
231  * device drivers.  This routine invokes the driver interrupt handlers and
232  * examines the return codes.
233  *
234  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
235  * least one handler claims the interrupt then the counter is halved and the
236  * interrupt state machine is idled. If no handler claims the interrupt then
237  * the counter is incremented by one and the state machine is idled.
238  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
239  * then the interrupt state machine is not idled thus preventing any further
240  * interrupts on that ino. The state machine will only be idled again if a
241  * handler is subsequently added or removed.
242  *
243  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
244  * DDI_INTR_UNCLAIMED otherwise.
245  */
246 uint_t
247 px_msiq_intr(caddr_t arg)
248 {
249 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
250 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
251 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
252 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
253 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
254 	dev_info_t	*dip = px_p->px_dip;
255 	ushort_t	pil = ipil_p->ipil_pil;
256 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
257 	msiqhead_t	*curr_head_p;
258 	msiqtail_t	curr_tail_index;
259 	msgcode_t	msg_code;
260 	px_ih_t		*ih_p;
261 	uint_t		ret = DDI_INTR_UNCLAIMED;
262 	int		i, j;
263 
264 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
265 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
266 	    ipil_p->ipil_pil, ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
267 
268 	/*
269 	 * The px_msiq_intr() handles multiple interrupt priorities and it
270 	 * will set msiq->msiq_rec2process to the number of MSIQ records to
271 	 * process while handling the highest priority interrupt. Subsequent
272 	 * lower priority interrupts will just process any unprocessed MSIQ
273 	 * records or will just return immediately.
274 	 */
275 	if (msiq_p->msiq_recs2process == 0) {
276 		ASSERT(ino_p->ino_ipil_cntr == 0);
277 		ino_p->ino_ipil_cntr = ino_p->ino_ipil_size;
278 
279 		/* Read current MSIQ tail index */
280 		px_lib_msiq_gettail(dip, msiq_p->msiq_id, &curr_tail_index);
281 		msiq_p->msiq_new_head_index = msiq_p->msiq_curr_head_index;
282 
283 		if (curr_tail_index < msiq_p->msiq_curr_head_index)
284 			curr_tail_index += msiq_state_p->msiq_rec_cnt;
285 
286 		msiq_p->msiq_recs2process = curr_tail_index -
287 		    msiq_p->msiq_curr_head_index;
288 	}
289 
290 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
291 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
292 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
293 
294 	/* If all MSIQ records are already processed, just return immediately */
295 	if ((msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index)
296 	    == msiq_p->msiq_recs2process)
297 		goto intr_done;
298 
299 	curr_head_p = (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p +
300 	    (msiq_p->msiq_curr_head_index * sizeof (msiq_rec_t)));
301 
302 	/*
303 	 * Calculate the number of recs to process by taking the difference
304 	 * between the head and tail pointers. For all records we always
305 	 * verify that we have a valid record type before we do any processing.
306 	 * If triggered, we should always have at least one valid record.
307 	 */
308 	for (i = 0; i < msiq_p->msiq_recs2process; i++) {
309 		msiq_rec_type_t rec_type;
310 
311 		/* Read next MSIQ record */
312 		px_lib_get_msiq_rec(dip, curr_head_p, msiq_rec_p);
313 
314 		rec_type = msiq_rec_p->msiq_rec_type;
315 
316 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
317 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
318 		    rec_type, msiq_rec_p->msiq_rec_rid);
319 
320 		if (!rec_type)
321 			goto next_rec;
322 
323 		/* Check MSIQ record type */
324 		switch (rec_type) {
325 		case MSG_REC:
326 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
327 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
328 			    "record, msg type 0x%x\n", msg_code);
329 			break;
330 		case MSI32_REC:
331 		case MSI64_REC:
332 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
333 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
334 			    "msi 0x%x\n", msg_code);
335 			break;
336 		default:
337 			msg_code = 0;
338 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
339 			    "record type is not supported",
340 			    ddi_driver_name(dip), ddi_get_instance(dip),
341 			    rec_type);
342 
343 			goto next_rec;
344 		}
345 
346 		/*
347 		 * Scan through px_ih_t linked list, searching for the
348 		 * right px_ih_t, matching MSIQ record data.
349 		 */
350 		for (j = 0, ih_p = ipil_p->ipil_ih_start;
351 		    ih_p && (j < ipil_p->ipil_ih_size) &&
352 		    ((ih_p->ih_msg_code != msg_code) ||
353 		    (ih_p->ih_rec_type != rec_type));
354 		    ih_p = ih_p->ih_next, j++)
355 			;
356 
357 		if ((ih_p->ih_msg_code == msg_code) &&
358 		    (ih_p->ih_rec_type == rec_type)) {
359 			dev_info_t *ih_dip = ih_p->ih_dip;
360 			uint_t (*handler)() = ih_p->ih_handler;
361 			caddr_t arg1 = ih_p->ih_handler_arg1;
362 			caddr_t arg2 = ih_p->ih_handler_arg2;
363 
364 			DBG(DBG_MSIQ_INTR, ih_dip, "px_msiq_intr: ino=%x "
365 			    "data=%x handler=%p arg1 =%p arg2=%p\n",
366 			    ino_p->ino_ino, msg_code, handler, arg1, arg2);
367 
368 			DTRACE_PROBE4(interrupt__start, dev_info_t, ih_dip,
369 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
370 
371 			ih_p->ih_intr_flags = PX_INTR_PENDING;
372 
373 			/*
374 			 * Special case for PCIE Error Messages.
375 			 * The current frame work doesn't fit PCIE Err Msgs
376 			 * This should be fixed when PCIE MESSAGES as a whole
377 			 * is architected correctly.
378 			 */
379 			if ((rec_type == MSG_REC) &&
380 			    ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
381 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
382 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL))) {
383 				ret = px_err_fabric_intr(px_p, msg_code,
384 				    msiq_rec_p->msiq_rec_rid);
385 			} else {
386 				/* Clear MSI state */
387 				px_lib_msi_setstate(dip, (msinum_t)msg_code,
388 				    PCI_MSI_STATE_IDLE);
389 
390 				ret = (*handler)(arg1, arg2);
391 			}
392 
393 			/*
394 			 * Account for time used by this interrupt. Protect
395 			 * against conflicting writes to ih_ticks from
396 			 * ib_intr_dist_all() by using atomic ops.
397 			 */
398 
399 			if (pil <= LOCK_LEVEL)
400 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
401 
402 			DTRACE_PROBE4(interrupt__complete, dev_info_t, ih_dip,
403 			    void *, handler, caddr_t, arg1, int, ret);
404 
405 			/* clear handler status flags */
406 			ih_p->ih_intr_flags = PX_INTR_IDLE;
407 
408 			msiq_p->msiq_new_head_index++;
409 			px_lib_clr_msiq_rec(ih_dip, curr_head_p);
410 		} else {
411 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: "
412 			    "No matching MSIQ record found\n");
413 		}
414 next_rec:
415 		/* Get the pointer next EQ record */
416 		curr_head_p = (msiqhead_t *)
417 		    ((caddr_t)curr_head_p + sizeof (msiq_rec_t));
418 
419 		/* Check for overflow condition */
420 		if (curr_head_p >= (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p
421 		    + (msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t))))
422 			curr_head_p = (msiqhead_t *)msiq_p->msiq_base_p;
423 	}
424 
425 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
426 	    (msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index));
427 
428 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
429 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
430 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
431 
432 	/* ino_claimed used just for debugging purpose */
433 	if (ret)
434 		ino_p->ino_claimed |= (1 << pil);
435 
436 intr_done:
437 	/* Interrupt can only be cleared after all pil levels are handled */
438 	if (--ino_p->ino_ipil_cntr != 0)
439 		return (DDI_INTR_CLAIMED);
440 
441 	if (msiq_p->msiq_new_head_index <= msiq_p->msiq_curr_head_index)  {
442 		if (px_unclaimed_intr_block)
443 			return (px_spurintr(ipil_p));
444 	}
445 
446 	/*  Update MSIQ head index with no of MSIQ records processed */
447 	if (msiq_p->msiq_new_head_index >= msiq_state_p->msiq_rec_cnt)
448 		msiq_p->msiq_new_head_index -= msiq_state_p->msiq_rec_cnt;
449 
450 	msiq_p->msiq_curr_head_index = msiq_p->msiq_new_head_index;
451 	px_lib_msiq_sethead(dip, msiq_p->msiq_id, msiq_p->msiq_new_head_index);
452 
453 	msiq_p->msiq_new_head_index = 0;
454 	msiq_p->msiq_recs2process = 0;
455 	ino_p->ino_claimed = 0;
456 
457 	/* Clear the pending state */
458 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
459 	    INTR_IDLE_STATE) != DDI_SUCCESS)
460 		return (DDI_INTR_UNCLAIMED);
461 
462 	return (DDI_INTR_CLAIMED);
463 }
464 
465 dev_info_t *
466 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
467 {
468 	dev_info_t	*cdip = rdip;
469 
470 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
471 		;
472 
473 	return (cdip);
474 }
475 
476 /* ARGSUSED */
477 int
478 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
479     ddi_intr_handle_impl_t *hdlp, void *result)
480 {
481 	px_t	*px_p = DIP_TO_STATE(dip);
482 	int	ret = DDI_SUCCESS;
483 
484 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
485 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
486 
487 	switch (intr_op) {
488 	case DDI_INTROP_GETCAP:
489 		ret = pci_intx_get_cap(rdip, (int *)result);
490 		break;
491 	case DDI_INTROP_SETCAP:
492 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
493 		ret = DDI_ENOTSUP;
494 		break;
495 	case DDI_INTROP_ALLOC:
496 		*(int *)result = hdlp->ih_scratch1;
497 		break;
498 	case DDI_INTROP_FREE:
499 		break;
500 	case DDI_INTROP_GETPRI:
501 		*(int *)result = hdlp->ih_pri ?
502 		    hdlp->ih_pri : pci_class_to_pil(rdip);
503 		break;
504 	case DDI_INTROP_SETPRI:
505 		break;
506 	case DDI_INTROP_ADDISR:
507 		ret = px_add_intx_intr(dip, rdip, hdlp);
508 		break;
509 	case DDI_INTROP_REMISR:
510 		ret = px_rem_intx_intr(dip, rdip, hdlp);
511 		break;
512 	case DDI_INTROP_GETTARGET:
513 		ret = px_ib_get_intr_target(px_p, hdlp->ih_vector,
514 		    (cpuid_t *)result);
515 		break;
516 	case DDI_INTROP_SETTARGET:
517 		ret = DDI_ENOTSUP;
518 		break;
519 	case DDI_INTROP_ENABLE:
520 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
521 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_ENABLE, 0, 0);
522 		break;
523 	case DDI_INTROP_DISABLE:
524 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
525 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_DISABLE, 0, 0);
526 		break;
527 	case DDI_INTROP_SETMASK:
528 		ret = pci_intx_set_mask(rdip);
529 		break;
530 	case DDI_INTROP_CLRMASK:
531 		ret = pci_intx_clr_mask(rdip);
532 		break;
533 	case DDI_INTROP_GETPENDING:
534 		ret = pci_intx_get_pending(rdip, (int *)result);
535 		break;
536 	case DDI_INTROP_NINTRS:
537 	case DDI_INTROP_NAVAIL:
538 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
539 		break;
540 	default:
541 		ret = DDI_ENOTSUP;
542 		break;
543 	}
544 
545 	return (ret);
546 }
547 
548 /* ARGSUSED */
549 int
550 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
551     ddi_intr_handle_impl_t *hdlp, void *result)
552 {
553 	px_t			*px_p = DIP_TO_STATE(dip);
554 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
555 	msiq_rec_type_t		msiq_rec_type;
556 	msi_type_t		msi_type;
557 	uint64_t		msi_addr;
558 	msinum_t		msi_num;
559 	msiqid_t		msiq_id;
560 	uint_t			nintrs;
561 	int			ret = DDI_SUCCESS;
562 
563 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
564 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
565 
566 	/* Check for MSI64 support */
567 	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
568 		msiq_rec_type = MSI64_REC;
569 		msi_type = MSI64_TYPE;
570 		msi_addr = msi_state_p->msi_addr64;
571 	} else {
572 		msiq_rec_type = MSI32_REC;
573 		msi_type = MSI32_TYPE;
574 		msi_addr = msi_state_p->msi_addr32;
575 	}
576 
577 	(void) px_msi_get_msinum(px_p, hdlp->ih_dip,
578 	    (hdlp->ih_flags & DDI_INTR_MSIX_DUP) ? hdlp->ih_main->ih_inum :
579 	    hdlp->ih_inum, &msi_num);
580 
581 	switch (intr_op) {
582 	case DDI_INTROP_GETCAP:
583 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
584 		if (ret == DDI_SUCCESS)
585 			*(int *)result |= DDI_INTR_FLAG_RETARGETABLE;
586 		break;
587 	case DDI_INTROP_SETCAP:
588 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
589 		ret = DDI_ENOTSUP;
590 		break;
591 	case DDI_INTROP_ALLOC:
592 		/*
593 		 * We need to restrict this allocation in future
594 		 * based on Resource Management policies.
595 		 */
596 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_type,
597 		    hdlp->ih_inum, hdlp->ih_scratch1,
598 		    (uintptr_t)hdlp->ih_scratch2,
599 		    (int *)result)) != DDI_SUCCESS) {
600 			DBG(DBG_INTROPS, dip, "px_msix_ops: allocation "
601 			    "failed, rdip 0x%p type 0x%d inum 0x%x "
602 			    "count 0x%x\n", rdip, hdlp->ih_type, hdlp->ih_inum,
603 			    hdlp->ih_scratch1);
604 
605 			return (ret);
606 		}
607 
608 		if ((hdlp->ih_type == DDI_INTR_TYPE_MSIX) &&
609 		    (i_ddi_get_msix(rdip) == NULL)) {
610 			ddi_intr_msix_t		*msix_p;
611 
612 			if (msix_p = pci_msix_init(rdip)) {
613 				i_ddi_set_msix(rdip, msix_p);
614 				break;
615 			}
616 
617 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI-X allocation "
618 			    "failed, rdip 0x%p inum 0x%x\n", rdip,
619 			    hdlp->ih_inum);
620 
621 			(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
622 			    hdlp->ih_scratch1);
623 
624 			return (DDI_FAILURE);
625 		}
626 
627 		break;
628 	case DDI_INTROP_FREE:
629 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
630 
631 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
632 			goto msi_free;
633 
634 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
635 			break;
636 
637 		if (((i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1) == 0) &&
638 		    (i_ddi_get_msix(rdip))) {
639 			pci_msix_fini(i_ddi_get_msix(rdip));
640 			i_ddi_set_msix(rdip, NULL);
641 		}
642 msi_free:
643 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
644 		    hdlp->ih_scratch1);
645 		break;
646 	case DDI_INTROP_GETPRI:
647 		*(int *)result = hdlp->ih_pri ?
648 		    hdlp->ih_pri : pci_class_to_pil(rdip);
649 		break;
650 	case DDI_INTROP_SETPRI:
651 		break;
652 	case DDI_INTROP_ADDISR:
653 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
654 		    msiq_rec_type, msi_num, -1, &msiq_id)) != DDI_SUCCESS) {
655 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
656 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
657 			return (ret);
658 		}
659 
660 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
661 
662 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
663 		    msiq_id, msi_type)) != DDI_SUCCESS) {
664 			(void) px_rem_msiq_intr(dip, rdip,
665 			    hdlp, msiq_rec_type, msi_num, msiq_id);
666 			return (ret);
667 		}
668 
669 		if ((ret = px_lib_msi_setstate(dip, msi_num,
670 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
671 			(void) px_rem_msiq_intr(dip, rdip,
672 			    hdlp, msiq_rec_type, msi_num, msiq_id);
673 			return (ret);
674 		}
675 
676 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
677 		    PCI_MSI_VALID)) != DDI_SUCCESS)
678 			return (ret);
679 
680 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
681 		    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
682 		    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num);
683 
684 		break;
685 	case DDI_INTROP_DUPVEC:
686 		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
687 		    "new_vector: %x\n", hdlp->ih_inum, hdlp->ih_scratch1);
688 
689 		ret = pci_msix_dup(hdlp->ih_dip, hdlp->ih_inum,
690 		    hdlp->ih_scratch1);
691 		break;
692 	case DDI_INTROP_REMISR:
693 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
694 		    &msiq_id)) != DDI_SUCCESS)
695 			return (ret);
696 
697 		if ((ret = px_ib_update_intr_state(px_p, rdip,
698 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
699 		    hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type,
700 		    msi_num)) != DDI_SUCCESS)
701 			return (ret);
702 
703 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
704 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
705 			return (ret);
706 
707 		if ((ret = px_lib_msi_setstate(dip, msi_num,
708 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
709 			return (ret);
710 
711 		ret = px_rem_msiq_intr(dip, rdip,
712 		    hdlp, msiq_rec_type, msi_num, msiq_id);
713 
714 		break;
715 	case DDI_INTROP_GETTARGET:
716 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
717 		    &msiq_id)) != DDI_SUCCESS)
718 			return (ret);
719 
720 		ret = px_ib_get_intr_target(px_p,
721 		    px_msiqid_to_devino(px_p, msiq_id), (cpuid_t *)result);
722 		break;
723 	case DDI_INTROP_SETTARGET:
724 		ret = px_ib_set_msix_target(px_p, hdlp, msi_num,
725 		    *(cpuid_t *)result);
726 		break;
727 	case DDI_INTROP_ENABLE:
728 		/*
729 		 * For MSI, just clear the mask bit and return if curr_nenables
730 		 * is > 1. For MSI-X, program MSI address and data for every
731 		 * MSI-X vector including dup vectors irrespective of current
732 		 * curr_nenables value.
733 		 */
734 		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) ||
735 		    (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) {
736 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
737 
738 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
739 			    nintrs, hdlp->ih_inum, msi_addr,
740 			    hdlp->ih_type == DDI_INTR_TYPE_MSIX ?
741 			    msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
742 				return (ret);
743 
744 			if (i_ddi_intr_get_current_nenables(rdip) < 1) {
745 				if ((ret = pci_msi_enable_mode(rdip,
746 				    hdlp->ih_type)) != DDI_SUCCESS)
747 					return (ret);
748 			}
749 		}
750 
751 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
752 		    hdlp->ih_inum)) != DDI_SUCCESS)
753 			return (ret);
754 
755 		break;
756 	case DDI_INTROP_DISABLE:
757 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
758 		    hdlp->ih_inum)) != DDI_SUCCESS)
759 			return (ret);
760 
761 		/*
762 		 * curr_nenables will be greater than 1 if rdip is using
763 		 * MSI-X and also, if it is using DUP interface. If this
764 		 * curr_enables is > 1, return after setting the mask bit.
765 		 */
766 		if (i_ddi_intr_get_current_nenables(rdip) > 1)
767 			return (DDI_SUCCESS);
768 
769 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type))
770 		    != DDI_SUCCESS)
771 			return (ret);
772 
773 		break;
774 	case DDI_INTROP_BLOCKENABLE:
775 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
776 
777 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
778 		    nintrs, hdlp->ih_inum, msi_addr,
779 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
780 			return (ret);
781 
782 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type);
783 		break;
784 	case DDI_INTROP_BLOCKDISABLE:
785 		ret = pci_msi_disable_mode(rdip, hdlp->ih_type);
786 		break;
787 	case DDI_INTROP_SETMASK:
788 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
789 		break;
790 	case DDI_INTROP_CLRMASK:
791 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
792 		break;
793 	case DDI_INTROP_GETPENDING:
794 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
795 		    hdlp->ih_inum, (int *)result);
796 		break;
797 	case DDI_INTROP_NINTRS:
798 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
799 		break;
800 	case DDI_INTROP_NAVAIL:
801 		/* XXX - a new interface may be needed */
802 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
803 		break;
804 	case DDI_INTROP_GETPOOL:
805 		if (msi_state_p->msi_pool_p == NULL) {
806 			*(ddi_irm_pool_t **)result = NULL;
807 			return (DDI_ENOTSUP);
808 		}
809 		*(ddi_irm_pool_t **)result = msi_state_p->msi_pool_p;
810 		ret = DDI_SUCCESS;
811 		break;
812 	default:
813 		ret = DDI_ENOTSUP;
814 		break;
815 	}
816 
817 	return (ret);
818 }
819 
820 static struct {
821 	kstat_named_t pxintr_ks_name;
822 	kstat_named_t pxintr_ks_type;
823 	kstat_named_t pxintr_ks_cpu;
824 	kstat_named_t pxintr_ks_pil;
825 	kstat_named_t pxintr_ks_time;
826 	kstat_named_t pxintr_ks_ino;
827 	kstat_named_t pxintr_ks_cookie;
828 	kstat_named_t pxintr_ks_devpath;
829 	kstat_named_t pxintr_ks_buspath;
830 } pxintr_ks_template = {
831 	{ "name",	KSTAT_DATA_CHAR },
832 	{ "type",	KSTAT_DATA_CHAR },
833 	{ "cpu",	KSTAT_DATA_UINT64 },
834 	{ "pil",	KSTAT_DATA_UINT64 },
835 	{ "time",	KSTAT_DATA_UINT64 },
836 	{ "ino",	KSTAT_DATA_UINT64 },
837 	{ "cookie",	KSTAT_DATA_UINT64 },
838 	{ "devpath",	KSTAT_DATA_STRING },
839 	{ "buspath",	KSTAT_DATA_STRING },
840 };
841 
842 static uint32_t pxintr_ks_instance;
843 static char ih_devpath[MAXPATHLEN];
844 static char ih_buspath[MAXPATHLEN];
845 kmutex_t pxintr_ks_template_lock;
846 
847 int
848 px_ks_update(kstat_t *ksp, int rw)
849 {
850 	px_ih_t *ih_p = ksp->ks_private;
851 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
852 	px_ino_pil_t *ipil_p = ih_p->ih_ipil_p;
853 	px_ino_t *ino_p = ipil_p->ipil_ino_p;
854 	px_t *px_p = ino_p->ino_ib_p->ib_px_p;
855 	devino_t ino;
856 	sysino_t sysino;
857 
858 	ino = ino_p->ino_ino;
859 	if (px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino) !=
860 	    DDI_SUCCESS) {
861 		cmn_err(CE_WARN, "px_ks_update: px_lib_intr_devino_to_sysino "
862 		    "failed");
863 	}
864 
865 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
866 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
867 	    ddi_get_instance(ih_p->ih_dip));
868 
869 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
870 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
871 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
872 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
873 
874 	if (ih_p->ih_intr_state == PX_INTR_STATE_ENABLE) {
875 
876 		switch (i_ddi_intr_get_current_type(ih_p->ih_dip)) {
877 		case DDI_INTR_TYPE_MSI:
878 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
879 			    "msi");
880 			break;
881 		case DDI_INTR_TYPE_MSIX:
882 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
883 			    "msix");
884 			break;
885 		default:
886 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
887 			    "fixed");
888 			break;
889 		}
890 
891 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ino_p->ino_cpuid;
892 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = ipil_p->ipil_pil;
893 		pxintr_ks_template.pxintr_ks_time.value.ui64 = ih_p->ih_nsec +
894 		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
895 		    ino_p->ino_cpuid);
896 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
897 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
898 	} else {
899 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
900 		    "disabled");
901 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = 0;
902 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = 0;
903 		pxintr_ks_template.pxintr_ks_time.value.ui64 = 0;
904 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = 0;
905 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = 0;
906 	}
907 	return (0);
908 }
909 
910 void
911 px_create_intr_kstats(px_ih_t *ih_p)
912 {
913 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
914 
915 	ASSERT(ih_p->ih_ksp == NULL);
916 
917 	/*
918 	 * Create pci_intrs::: kstats for all ih types except messages,
919 	 * which represent unusual conditions and don't need to be tracked.
920 	 */
921 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
922 		ih_p->ih_ksp = kstat_create("pci_intrs",
923 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
924 		    "interrupts", KSTAT_TYPE_NAMED,
925 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
926 		    KSTAT_FLAG_VIRTUAL);
927 	}
928 	if (ih_p->ih_ksp != NULL) {
929 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
930 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
931 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
932 		ih_p->ih_ksp->ks_private = ih_p;
933 		ih_p->ih_ksp->ks_update = px_ks_update;
934 	}
935 }
936 
937 /*
938  * px_add_intx_intr:
939  *
940  * This function is called to register INTx and legacy hardware
941  * interrupt pins interrupts.
942  */
943 int
944 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
945     ddi_intr_handle_impl_t *hdlp)
946 {
947 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
948 	px_ib_t		*ib_p = px_p->px_ib_p;
949 	devino_t	ino;
950 	px_ih_t		*ih_p;
951 	px_ino_t	*ino_p;
952 	px_ino_pil_t	*ipil_p, *ipil_list;
953 	int32_t		weight;
954 	int		ret = DDI_SUCCESS;
955 
956 	ino = hdlp->ih_vector;
957 
958 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
959 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
960 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
961 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
962 
963 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
964 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
965 
966 	mutex_enter(&ib_p->ib_ino_lst_mutex);
967 
968 	ino_p = px_ib_locate_ino(ib_p, ino);
969 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
970 
971 	/* Sharing ino */
972 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
973 		if (px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0)) {
974 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
975 			    "dup intr #%d\n", hdlp->ih_inum);
976 
977 			ret = DDI_FAILURE;
978 			goto fail1;
979 		}
980 
981 		/* Save mondo value in hdlp */
982 		hdlp->ih_vector = ino_p->ino_sysino;
983 
984 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
985 		    ih_p)) != DDI_SUCCESS)
986 			goto fail1;
987 
988 		goto ino_done;
989 	}
990 
991 	if (hdlp->ih_pri == 0)
992 		hdlp->ih_pri = pci_class_to_pil(rdip);
993 
994 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
995 	ino_p = ipil_p->ipil_ino_p;
996 
997 	/* Save mondo value in hdlp */
998 	hdlp->ih_vector = ino_p->ino_sysino;
999 
1000 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
1001 	    hdlp->ih_pri, hdlp->ih_vector);
1002 
1003 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1004 	    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ipil_p, NULL);
1005 
1006 	ret = i_ddi_add_ivintr(hdlp);
1007 
1008 	/*
1009 	 * Restore original interrupt handler
1010 	 * and arguments in interrupt handle.
1011 	 */
1012 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1013 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1014 
1015 	if (ret != DDI_SUCCESS)
1016 		goto fail2;
1017 
1018 	/* Save the pil for this ino */
1019 	ipil_p->ipil_pil = hdlp->ih_pri;
1020 
1021 	/* Select cpu, saving it for sharing and removal */
1022 	if (ipil_list == NULL) {
1023 		if (ino_p->ino_cpuid == -1)
1024 			ino_p->ino_cpuid = intr_dist_cpuid();
1025 
1026 		/* Enable interrupt */
1027 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1028 	}
1029 
1030 ino_done:
1031 	hdlp->ih_target = ino_p->ino_cpuid;
1032 
1033 	/* Add weight to the cpu that we are already targeting */
1034 	weight = pci_class_to_intr_weight(rdip);
1035 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1036 
1037 	ih_p->ih_ipil_p = ipil_p;
1038 	px_create_intr_kstats(ih_p);
1039 	if (ih_p->ih_ksp)
1040 		kstat_install(ih_p->ih_ksp);
1041 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1042 
1043 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1044 	    ino_p->ino_sysino, hdlp->ih_pri);
1045 
1046 	return (ret);
1047 fail2:
1048 	px_ib_delete_ino_pil(ib_p, ipil_p);
1049 fail1:
1050 	if (ih_p->ih_config_handle)
1051 		pci_config_teardown(&ih_p->ih_config_handle);
1052 
1053 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1054 	kmem_free(ih_p, sizeof (px_ih_t));
1055 
1056 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1057 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1058 
1059 	return (ret);
1060 }
1061 
1062 /*
1063  * px_rem_intx_intr:
1064  *
1065  * This function is called to unregister INTx and legacy hardware
1066  * interrupt pins interrupts.
1067  */
1068 int
1069 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1070     ddi_intr_handle_impl_t *hdlp)
1071 {
1072 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1073 	px_ib_t		*ib_p = px_p->px_ib_p;
1074 	devino_t	ino;
1075 	cpuid_t		curr_cpu;
1076 	px_ino_t	*ino_p;
1077 	px_ino_pil_t	*ipil_p;
1078 	px_ih_t		*ih_p;
1079 	int		ret = DDI_SUCCESS;
1080 
1081 	ino = hdlp->ih_vector;
1082 
1083 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1084 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1085 
1086 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1087 
1088 	ino_p = px_ib_locate_ino(ib_p, ino);
1089 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1090 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0);
1091 
1092 	/* Get the current cpu */
1093 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1094 	    &curr_cpu)) != DDI_SUCCESS)
1095 		goto fail;
1096 
1097 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1098 		goto fail;
1099 
1100 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1101 
1102 	if (ipil_p->ipil_ih_size == 0) {
1103 		hdlp->ih_vector = ino_p->ino_sysino;
1104 		i_ddi_rem_ivintr(hdlp);
1105 
1106 		px_ib_delete_ino_pil(ib_p, ipil_p);
1107 	}
1108 
1109 	if (ino_p->ino_ipil_size == 0) {
1110 		kmem_free(ino_p, sizeof (px_ino_t));
1111 	} else {
1112 		/* Re-enable interrupt only if mapping register still shared */
1113 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1114 	}
1115 
1116 fail:
1117 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1118 	return (ret);
1119 }
1120 
1121 /*
1122  * px_add_msiq_intr:
1123  *
1124  * This function is called to register MSI/Xs and PCIe message interrupts.
1125  */
1126 int
1127 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1128     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1129     msgcode_t msg_code, cpuid_t cpu_id, msiqid_t *msiq_id_p)
1130 {
1131 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1132 	px_ib_t		*ib_p = px_p->px_ib_p;
1133 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1134 	devino_t	ino;
1135 	px_ih_t		*ih_p;
1136 	px_ino_t	*ino_p;
1137 	px_ino_pil_t	*ipil_p, *ipil_list;
1138 	int32_t		weight;
1139 	int		ret = DDI_SUCCESS;
1140 
1141 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=0x%x "
1142 	    "arg1=0x%x arg2=0x%x cpu=0x%x\n", ddi_driver_name(rdip),
1143 	    ddi_get_instance(rdip), hdlp->ih_cb_func, hdlp->ih_cb_arg1,
1144 	    hdlp->ih_cb_arg2, cpu_id);
1145 
1146 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1147 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1148 
1149 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1150 
1151 	ret = (cpu_id == -1) ? px_msiq_alloc(px_p, rec_type, msg_code,
1152 	    msiq_id_p) : px_msiq_alloc_based_on_cpuid(px_p, rec_type,
1153 	    cpu_id, msiq_id_p);
1154 
1155 	if (ret != DDI_SUCCESS) {
1156 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1157 		    "msiq allocation failed\n");
1158 		goto fail;
1159 	}
1160 
1161 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1162 
1163 	ino_p = px_ib_locate_ino(ib_p, ino);
1164 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
1165 
1166 	/* Sharing ino */
1167 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
1168 		if (px_ib_intr_locate_ih(ipil_p, rdip,
1169 		    hdlp->ih_inum, rec_type, msg_code)) {
1170 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1171 			    "dup intr #%d\n", hdlp->ih_inum);
1172 
1173 			ret = DDI_FAILURE;
1174 			goto fail1;
1175 		}
1176 
1177 		/* Save mondo value in hdlp */
1178 		hdlp->ih_vector = ino_p->ino_sysino;
1179 
1180 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
1181 		    ih_p)) != DDI_SUCCESS)
1182 			goto fail1;
1183 
1184 		goto ino_done;
1185 	}
1186 
1187 	if (hdlp->ih_pri == 0)
1188 		hdlp->ih_pri = pci_class_to_pil(rdip);
1189 
1190 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1191 	ino_p = ipil_p->ipil_ino_p;
1192 
1193 	ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1194 	    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1195 
1196 	/* Save mondo value in hdlp */
1197 	hdlp->ih_vector = ino_p->ino_sysino;
1198 
1199 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1200 	    hdlp->ih_pri, hdlp->ih_vector);
1201 
1202 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1203 	    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ipil_p, NULL);
1204 
1205 	ret = i_ddi_add_ivintr(hdlp);
1206 
1207 	/*
1208 	 * Restore original interrupt handler
1209 	 * and arguments in interrupt handle.
1210 	 */
1211 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1212 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1213 
1214 	if (ret != DDI_SUCCESS)
1215 		goto fail2;
1216 
1217 	/* Save the pil for this ino */
1218 	ipil_p->ipil_pil = hdlp->ih_pri;
1219 
1220 	/* Select cpu, saving it for sharing and removal */
1221 	if (ipil_list == NULL) {
1222 		/* Enable MSIQ */
1223 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1224 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1225 
1226 		if (ino_p->ino_cpuid == -1)
1227 			ino_p->ino_cpuid = intr_dist_cpuid();
1228 
1229 		/* Enable interrupt */
1230 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1231 	}
1232 
1233 ino_done:
1234 	hdlp->ih_target = ino_p->ino_cpuid;
1235 
1236 	/* Add weight to the cpu that we are already targeting */
1237 	weight = pci_class_to_intr_weight(rdip);
1238 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1239 
1240 	ih_p->ih_ipil_p = ipil_p;
1241 	px_create_intr_kstats(ih_p);
1242 	if (ih_p->ih_ksp)
1243 		kstat_install(ih_p->ih_ksp);
1244 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1245 
1246 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1247 	    ino_p->ino_sysino, hdlp->ih_pri);
1248 
1249 	return (ret);
1250 fail2:
1251 	px_ib_delete_ino_pil(ib_p, ipil_p);
1252 fail1:
1253 	(void) px_msiq_free(px_p, *msiq_id_p);
1254 fail:
1255 	if (ih_p->ih_config_handle)
1256 		pci_config_teardown(&ih_p->ih_config_handle);
1257 
1258 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1259 	kmem_free(ih_p, sizeof (px_ih_t));
1260 
1261 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1262 	    ino_p->ino_sysino, hdlp->ih_pri);
1263 
1264 	return (ret);
1265 }
1266 
1267 /*
1268  * px_rem_msiq_intr:
1269  *
1270  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1271  */
1272 int
1273 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1274     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1275     msgcode_t msg_code, msiqid_t msiq_id)
1276 {
1277 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1278 	px_ib_t		*ib_p = px_p->px_ib_p;
1279 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1280 	cpuid_t		curr_cpu;
1281 	px_ino_t	*ino_p;
1282 	px_ino_pil_t	*ipil_p;
1283 	px_ih_t		*ih_p;
1284 	int		ret = DDI_SUCCESS;
1285 
1286 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1287 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1288 
1289 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1290 
1291 	ino_p = px_ib_locate_ino(ib_p, ino);
1292 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1293 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, rec_type,
1294 	    msg_code);
1295 
1296 	/* Get the current cpu */
1297 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1298 	    &curr_cpu)) != DDI_SUCCESS)
1299 		goto fail;
1300 
1301 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1302 		goto fail;
1303 
1304 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1305 
1306 	if (ipil_p->ipil_ih_size == 0) {
1307 		hdlp->ih_vector = ino_p->ino_sysino;
1308 		i_ddi_rem_ivintr(hdlp);
1309 
1310 		px_ib_delete_ino_pil(ib_p, ipil_p);
1311 
1312 		if (ino_p->ino_ipil_size == 0)
1313 			px_lib_msiq_setvalid(dip,
1314 			    px_devino_to_msiqid(px_p, ino), PCI_MSIQ_INVALID);
1315 	}
1316 
1317 	(void) px_msiq_free(px_p, msiq_id);
1318 
1319 	if (ino_p->ino_ipil_size) {
1320 		/* Re-enable interrupt only if mapping register still shared */
1321 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1322 	}
1323 
1324 fail:
1325 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1326 	return (ret);
1327 }
1328