xref: /titanic_50/usr/src/uts/sun4/io/px/px_intr.c (revision 98892a3058bed9457ab6a22659dcccecfaf05d2a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * PX nexus interrupt handling:
30  *	PX device interrupt handler wrapper
31  *	PIL lookup routine
32  *	PX device interrupt related initchild code
33  */
34 
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #include <sys/async.h>
38 #include <sys/spl.h>
39 #include <sys/sunddi.h>
40 #include <sys/fm/protocol.h>
41 #include <sys/fm/util.h>
42 #include <sys/machsystm.h>	/* e_ddi_nodeid_to_dip() */
43 #include <sys/ddi_impldefs.h>
44 #include <sys/sdt.h>
45 #include <sys/atomic.h>
46 #include "px_obj.h"
47 #include <sys/ontrap.h>
48 #include <sys/membar.h>
49 #include <sys/clock.h>
50 
51 /*
52  * interrupt jabber:
53  *
54  * When an interrupt line is jabbering, every time the state machine for the
55  * associated ino is idled, a new mondo will be sent and the ino will go into
56  * the pending state again. The mondo will cause a new call to
57  * px_intr_wrapper() which normally idles the ino's state machine which would
58  * precipitate another trip round the loop.
59  *
60  * The loop can be broken by preventing the ino's state machine from being
61  * idled when an interrupt line is jabbering. See the comment at the
62  * beginning of px_intr_wrapper() explaining how the 'interrupt jabber
63  * protection' code does this.
64  */
65 
66 /*LINTLIBRARY*/
67 
68 /*
69  * If the unclaimed interrupt count has reached the limit set by
70  * pci_unclaimed_intr_max within the time limit, then all interrupts
71  * on this ino is blocked by not idling the interrupt state machine.
72  */
73 static int
74 px_spurintr(px_ino_pil_t *ipil_p)
75 {
76 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
77 	px_ih_t		*ih_p = ipil_p->ipil_ih_start;
78 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
79 	char		*err_fmt_str;
80 	boolean_t	blocked = B_FALSE;
81 	int		i;
82 
83 	if (ino_p->ino_unclaimed_intrs > px_unclaimed_intr_max)
84 		return (DDI_INTR_CLAIMED);
85 
86 	if (!ino_p->ino_unclaimed_intrs)
87 		ino_p->ino_spurintr_begin = ddi_get_lbolt();
88 
89 	ino_p->ino_unclaimed_intrs++;
90 
91 	if (ino_p->ino_unclaimed_intrs <= px_unclaimed_intr_max)
92 		goto clear;
93 
94 	if (drv_hztousec(ddi_get_lbolt() - ino_p->ino_spurintr_begin)
95 	    > px_spurintr_duration) {
96 		ino_p->ino_unclaimed_intrs = 0;
97 		goto clear;
98 	}
99 	err_fmt_str = "%s%d: ino 0x%x blocked";
100 	blocked = B_TRUE;
101 	goto warn;
102 clear:
103 	err_fmt_str = "!%s%d: spurious interrupt from ino 0x%x";
104 warn:
105 	cmn_err(CE_WARN, err_fmt_str, NAMEINST(px_p->px_dip), ino_p->ino_ino);
106 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next)
107 		cmn_err(CE_CONT, "!%s-%d#%x ", NAMEINST(ih_p->ih_dip),
108 		    ih_p->ih_inum);
109 	cmn_err(CE_CONT, "!\n");
110 
111 	/* Clear the pending state */
112 	if (blocked == B_FALSE) {
113 		if (px_lib_intr_setstate(px_p->px_dip, ino_p->ino_sysino,
114 		    INTR_IDLE_STATE) != DDI_SUCCESS)
115 			return (DDI_INTR_UNCLAIMED);
116 	}
117 
118 	return (DDI_INTR_CLAIMED);
119 }
120 
121 extern uint64_t intr_get_time(void);
122 
123 /*
124  * px_intx_intr (INTx or legacy interrupt handler)
125  *
126  * This routine is used as wrapper around interrupt handlers installed by child
127  * device drivers.  This routine invokes the driver interrupt handlers and
128  * examines the return codes.
129  *
130  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
131  * least one handler claims the interrupt then the counter is halved and the
132  * interrupt state machine is idled. If no handler claims the interrupt then
133  * the counter is incremented by one and the state machine is idled.
134  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
135  * then the interrupt state machine is not idled thus preventing any further
136  * interrupts on that ino. The state machine will only be idled again if a
137  * handler is subsequently added or removed.
138  *
139  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
140  * DDI_INTR_UNCLAIMED otherwise.
141  */
142 uint_t
143 px_intx_intr(caddr_t arg)
144 {
145 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
146 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
147 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
148 	px_ih_t		*ih_p = ipil_p->ipil_ih_start;
149 	ushort_t	pil = ipil_p->ipil_pil;
150 	uint_t		result = 0, r = DDI_INTR_UNCLAIMED;
151 	int		i;
152 
153 	DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
154 	    "ino=%x sysino=%llx pil=%x ih_size=%x ih_lst=%x\n",
155 	    ino_p->ino_ino, ino_p->ino_sysino, ipil_p->ipil_pil,
156 	    ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
157 
158 	for (i = 0; i < ipil_p->ipil_ih_size; i++, ih_p = ih_p->ih_next) {
159 		dev_info_t *dip = ih_p->ih_dip;
160 		uint_t (*handler)() = ih_p->ih_handler;
161 		caddr_t arg1 = ih_p->ih_handler_arg1;
162 		caddr_t arg2 = ih_p->ih_handler_arg2;
163 
164 		if (ih_p->ih_intr_state == PX_INTR_STATE_DISABLE) {
165 			DBG(DBG_INTX_INTR, px_p->px_dip,
166 			    "px_intx_intr: %s%d interrupt %d is disabled\n",
167 			    ddi_driver_name(dip), ddi_get_instance(dip),
168 			    ino_p->ino_ino);
169 
170 			continue;
171 		}
172 
173 		DBG(DBG_INTX_INTR, px_p->px_dip, "px_intx_intr:"
174 		    "ino=%x handler=%p arg1 =%p arg2 = %p\n",
175 		    ino_p->ino_ino, handler, arg1, arg2);
176 
177 		DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
178 		    void *, handler, caddr_t, arg1, caddr_t, arg2);
179 
180 		r = (*handler)(arg1, arg2);
181 
182 		/*
183 		 * Account for time used by this interrupt. Protect against
184 		 * conflicting writes to ih_ticks from ib_intr_dist_all() by
185 		 * using atomic ops.
186 		 */
187 
188 		if (pil <= LOCK_LEVEL)
189 			atomic_add_64(&ih_p->ih_ticks, intr_get_time());
190 
191 		DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
192 		    void *, handler, caddr_t, arg1, int, r);
193 
194 		result += r;
195 
196 		if (px_check_all_handlers)
197 			continue;
198 		if (result)
199 			break;
200 	}
201 
202 	if (result)
203 		ino_p->ino_claimed |= (1 << pil);
204 
205 	/* Interrupt can only be cleared after all pil levels are handled */
206 	if (pil != ino_p->ino_lopil)
207 		return (DDI_INTR_CLAIMED);
208 
209 	if (!ino_p->ino_claimed) {
210 		if (px_unclaimed_intr_block)
211 			return (px_spurintr(ipil_p));
212 	}
213 
214 	ino_p->ino_unclaimed_intrs = 0;
215 	ino_p->ino_claimed = 0;
216 
217 	/* Clear the pending state */
218 	if (px_lib_intr_setstate(px_p->px_dip,
219 	    ino_p->ino_sysino, INTR_IDLE_STATE) != DDI_SUCCESS)
220 		return (DDI_INTR_UNCLAIMED);
221 
222 	return (DDI_INTR_CLAIMED);
223 }
224 
225 /*
226  * px_msiq_intr (MSI/X or PCIe MSG interrupt handler)
227  *
228  * This routine is used as wrapper around interrupt handlers installed by child
229  * device drivers.  This routine invokes the driver interrupt handlers and
230  * examines the return codes.
231  *
232  * There is a count of unclaimed interrupts kept on a per-ino basis. If at
233  * least one handler claims the interrupt then the counter is halved and the
234  * interrupt state machine is idled. If no handler claims the interrupt then
235  * the counter is incremented by one and the state machine is idled.
236  * If the count ever reaches the limit value set by pci_unclaimed_intr_max
237  * then the interrupt state machine is not idled thus preventing any further
238  * interrupts on that ino. The state machine will only be idled again if a
239  * handler is subsequently added or removed.
240  *
241  * return value: DDI_INTR_CLAIMED if any handlers claimed the interrupt,
242  * DDI_INTR_UNCLAIMED otherwise.
243  */
244 uint_t
245 px_msiq_intr(caddr_t arg)
246 {
247 	px_ino_pil_t	*ipil_p = (px_ino_pil_t *)arg;
248 	px_ino_t	*ino_p = ipil_p->ipil_ino_p;
249 	px_t		*px_p = ino_p->ino_ib_p->ib_px_p;
250 	px_msiq_state_t	*msiq_state_p = &px_p->px_ib_p->ib_msiq_state;
251 	px_msiq_t	*msiq_p = ino_p->ino_msiq_p;
252 	dev_info_t	*dip = px_p->px_dip;
253 	ushort_t	pil = ipil_p->ipil_pil;
254 	msiq_rec_t	msiq_rec, *msiq_rec_p = &msiq_rec;
255 	msiqhead_t	*curr_head_p;
256 	msiqtail_t	curr_tail_index;
257 	msgcode_t	msg_code;
258 	px_ih_t		*ih_p;
259 	uint_t		ret = DDI_INTR_UNCLAIMED;
260 	int		i, j;
261 
262 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: msiq_id =%x ino=%x pil=%x "
263 	    "ih_size=%x ih_lst=%x\n", msiq_p->msiq_id, ino_p->ino_ino,
264 	    ipil_p->ipil_pil, ipil_p->ipil_ih_size, ipil_p->ipil_ih_head);
265 
266 	/*
267 	 * The px_msiq_intr() handles multiple interrupt priorities and it
268 	 * will set msiq->msiq_rec2process to the number of MSIQ records to
269 	 * process while handling the highest priority interrupt. Subsequent
270 	 * lower priority interrupts will just process any unprocessed MSIQ
271 	 * records or will just return immediately.
272 	 */
273 	if (msiq_p->msiq_recs2process == 0) {
274 		/* Read current MSIQ tail index */
275 		px_lib_msiq_gettail(dip, msiq_p->msiq_id, &curr_tail_index);
276 		msiq_p->msiq_new_head_index = msiq_p->msiq_curr_head_index;
277 
278 		if (curr_tail_index < msiq_p->msiq_curr_head_index)
279 			curr_tail_index += msiq_state_p->msiq_rec_cnt;
280 
281 		msiq_p->msiq_recs2process = curr_tail_index -
282 		    msiq_p->msiq_curr_head_index;
283 	}
284 
285 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
286 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
287 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
288 
289 	/* If all MSIQ records are already processed, just return immediately */
290 	if ((msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index)
291 	    == msiq_p->msiq_recs2process)
292 		goto intr_done;
293 
294 	curr_head_p = (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p +
295 	    (msiq_p->msiq_curr_head_index * sizeof (msiq_rec_t)));
296 
297 	/*
298 	 * Calculate the number of recs to process by taking the difference
299 	 * between the head and tail pointers. For all records we always
300 	 * verify that we have a valid record type before we do any processing.
301 	 * If triggered, we should always have at least one valid record.
302 	 */
303 	for (i = 0; i < msiq_p->msiq_recs2process; i++) {
304 		/* Read next MSIQ record */
305 		px_lib_get_msiq_rec(dip, curr_head_p, msiq_rec_p);
306 
307 		DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSIQ RECORD, "
308 		    "msiq_rec_type 0x%llx msiq_rec_rid 0x%llx\n",
309 		    msiq_rec_p->msiq_rec_type, msiq_rec_p->msiq_rec_rid);
310 
311 		if (!msiq_rec_p->msiq_rec_type)
312 			goto next_rec;
313 
314 		/* Check MSIQ record type */
315 		switch (msiq_rec_p->msiq_rec_type) {
316 		case MSG_REC:
317 			msg_code = msiq_rec_p->msiq_rec_data.msg.msg_code;
318 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: PCIE MSG "
319 			    "record, msg type 0x%x\n", msg_code);
320 			break;
321 		case MSI32_REC:
322 		case MSI64_REC:
323 			msg_code = msiq_rec_p->msiq_rec_data.msi.msi_data;
324 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: MSI record, "
325 			    "msi 0x%x\n", msg_code);
326 
327 			/* Clear MSI state */
328 			px_lib_msi_setstate(dip, (msinum_t)msg_code,
329 			    PCI_MSI_STATE_IDLE);
330 			break;
331 		default:
332 			msg_code = 0;
333 			cmn_err(CE_WARN, "%s%d: px_msiq_intr: 0x%x MSIQ "
334 			    "record type is not supported",
335 			    ddi_driver_name(dip), ddi_get_instance(dip),
336 			    msiq_rec_p->msiq_rec_type);
337 
338 			goto next_rec;
339 		}
340 
341 		/*
342 		 * Scan through px_ih_t linked list, searching for the
343 		 * right px_ih_t, matching MSIQ record data.
344 		 */
345 		for (j = 0, ih_p = ipil_p->ipil_ih_start;
346 		    ih_p && (j < ipil_p->ipil_ih_size) &&
347 		    ((ih_p->ih_msg_code != msg_code) ||
348 		    (ih_p->ih_rec_type != msiq_rec_p->msiq_rec_type));
349 		    ih_p = ih_p->ih_next, j++)
350 			;
351 
352 		if ((ih_p->ih_msg_code == msg_code) &&
353 		    (ih_p->ih_rec_type == msiq_rec_p->msiq_rec_type)) {
354 			dev_info_t *dip = ih_p->ih_dip;
355 			uint_t (*handler)() = ih_p->ih_handler;
356 			caddr_t arg1 = ih_p->ih_handler_arg1;
357 			caddr_t arg2 = ih_p->ih_handler_arg2;
358 
359 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: ino=%x data=%x "
360 			    "handler=%p arg1 =%p arg2=%p\n", ino_p->ino_ino,
361 			    msg_code, handler, arg1, arg2);
362 
363 			DTRACE_PROBE4(interrupt__start, dev_info_t, dip,
364 			    void *, handler, caddr_t, arg1, caddr_t, arg2);
365 
366 			/*
367 			 * Special case for PCIE Error Messages.
368 			 * The current frame work doesn't fit PCIE Err Msgs
369 			 * This should be fixed when PCIE MESSAGES as a whole
370 			 * is architected correctly.
371 			 */
372 			if ((msg_code == PCIE_MSG_CODE_ERR_COR) ||
373 			    (msg_code == PCIE_MSG_CODE_ERR_NONFATAL) ||
374 			    (msg_code == PCIE_MSG_CODE_ERR_FATAL)) {
375 				ret = px_err_fabric_intr(px_p, msg_code,
376 				    msiq_rec_p->msiq_rec_rid);
377 			} else
378 				ret = (*handler)(arg1, arg2);
379 
380 			/*
381 			 * Account for time used by this interrupt. Protect
382 			 * against conflicting writes to ih_ticks from
383 			 * ib_intr_dist_all() by using atomic ops.
384 			 */
385 
386 			if (pil <= LOCK_LEVEL)
387 				atomic_add_64(&ih_p->ih_ticks, intr_get_time());
388 
389 			DTRACE_PROBE4(interrupt__complete, dev_info_t, dip,
390 			    void *, handler, caddr_t, arg1, int, ret);
391 
392 			msiq_p->msiq_new_head_index++;
393 			px_lib_clr_msiq_rec(dip, curr_head_p);
394 		} else {
395 			DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr:"
396 			    "No matching MSIQ record found\n");
397 		}
398 next_rec:
399 		/* Get the pointer next EQ record */
400 		curr_head_p = (msiqhead_t *)
401 		    ((caddr_t)curr_head_p + sizeof (msiq_rec_t));
402 
403 		/* Check for overflow condition */
404 		if (curr_head_p >= (msiqhead_t *)((caddr_t)msiq_p->msiq_base_p
405 		    + (msiq_state_p->msiq_rec_cnt * sizeof (msiq_rec_t))))
406 			curr_head_p = (msiqhead_t *)msiq_p->msiq_base_p;
407 	}
408 
409 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: No of MSIQ recs processed %x\n",
410 	    (msiq_p->msiq_new_head_index - msiq_p->msiq_curr_head_index));
411 
412 	DBG(DBG_MSIQ_INTR, dip, "px_msiq_intr: curr_head %x new_head %x "
413 	    "rec2process %x\n", msiq_p->msiq_curr_head_index,
414 	    msiq_p->msiq_new_head_index, msiq_p->msiq_recs2process);
415 
416 	/* ino_claimed used just for debugging purpose */
417 	if (ret)
418 		ino_p->ino_claimed |= (1 << pil);
419 
420 intr_done:
421 	/* Interrupt can only be cleared after all pil levels are handled */
422 	if (pil != ino_p->ino_lopil)
423 		return (DDI_INTR_CLAIMED);
424 
425 	if (msiq_p->msiq_new_head_index <= msiq_p->msiq_curr_head_index)  {
426 		if (px_unclaimed_intr_block)
427 			return (px_spurintr(ipil_p));
428 	}
429 
430 	/*  Update MSIQ head index with no of MSIQ records processed */
431 	if (msiq_p->msiq_new_head_index >= msiq_state_p->msiq_rec_cnt)
432 		msiq_p->msiq_new_head_index -= msiq_state_p->msiq_rec_cnt;
433 
434 	msiq_p->msiq_curr_head_index = msiq_p->msiq_new_head_index;
435 	px_lib_msiq_sethead(dip, msiq_p->msiq_id, msiq_p->msiq_new_head_index);
436 
437 	msiq_p->msiq_new_head_index = 0;
438 	msiq_p->msiq_recs2process = 0;
439 	ino_p->ino_claimed = 0;
440 
441 	/* Clear the pending state */
442 	if (px_lib_intr_setstate(dip, ino_p->ino_sysino,
443 	    INTR_IDLE_STATE) != DDI_SUCCESS)
444 		return (DDI_INTR_UNCLAIMED);
445 
446 	return (DDI_INTR_CLAIMED);
447 }
448 
449 dev_info_t *
450 px_get_my_childs_dip(dev_info_t *dip, dev_info_t *rdip)
451 {
452 	dev_info_t	*cdip = rdip;
453 
454 	for (; ddi_get_parent(cdip) != dip; cdip = ddi_get_parent(cdip))
455 		;
456 
457 	return (cdip);
458 }
459 
460 /* Default class to pil value mapping */
461 px_class_val_t px_default_pil [] = {
462 	{0x000000, 0xff0000, 0x1},	/* Class code for pre-2.0 devices */
463 	{0x010000, 0xff0000, 0x4},	/* Mass Storage Controller */
464 	{0x020000, 0xff0000, 0x6},	/* Network Controller */
465 	{0x030000, 0xff0000, 0x9},	/* Display Controller */
466 	{0x040000, 0xff0000, 0x9},	/* Multimedia Controller */
467 	{0x050000, 0xff0000, 0x9},	/* Memory Controller */
468 	{0x060000, 0xff0000, 0x9},	/* Bridge Controller */
469 	{0x0c0000, 0xffff00, 0x9},	/* Serial Bus, FireWire (IEEE 1394) */
470 	{0x0c0100, 0xffff00, 0x4},	/* Serial Bus, ACCESS.bus */
471 	{0x0c0200, 0xffff00, 0x4},	/* Serial Bus, SSA */
472 	{0x0c0300, 0xffff00, 0x9},	/* Serial Bus Universal Serial Bus */
473 	{0x0c0400, 0xffff00, 0x6},	/* Serial Bus, Fibre Channel */
474 	{0x0c0600, 0xffff00, 0x6}	/* Serial Bus, Infiniband */
475 };
476 
477 /*
478  * Default class to intr_weight value mapping (% of CPU).  A driver.conf
479  * entry on or above the pci node like
480  *
481  *	pci-class-intr-weights= 0x020000, 0xff0000, 30;
482  *
483  * can be used to augment or override entries in the default table below.
484  *
485  * NB: The values below give NICs preference on redistribution, and provide
486  * NICs some isolation from other interrupt sources. We need better interfaces
487  * that allow the NIC driver to identify a specific NIC instance as high
488  * bandwidth, and thus deserving of separation from other low bandwidth
489  * NICs additional isolation from other interrupt sources.
490  *
491  * NB: We treat Infiniband like a NIC.
492  */
493 px_class_val_t px_default_intr_weight [] = {
494 	{0x020000, 0xff0000, 35},	/* Network Controller */
495 	{0x010000, 0xff0000, 10},	/* Mass Storage Controller */
496 	{0x0c0400, 0xffff00, 10},	/* Serial Bus, Fibre Channel */
497 	{0x0c0600, 0xffff00, 50}	/* Serial Bus, Infiniband */
498 };
499 
500 static uint32_t
501 px_match_class_val(uint32_t key, px_class_val_t *rec_p, int nrec,
502     uint32_t default_val)
503 {
504 	int	i;
505 
506 	for (i = 0; i < nrec; rec_p++, i++) {
507 		if ((rec_p->class_code & rec_p->class_mask) ==
508 		    (key & rec_p->class_mask))
509 			return (rec_p->class_val);
510 	}
511 
512 	return (default_val);
513 }
514 
515 /*
516  * px_class_to_val
517  *
518  * Return the configuration value, based on class code and sub class code,
519  * from the specified property based or default px_class_val_t table.
520  */
521 uint32_t
522 px_class_to_val(dev_info_t *rdip, char *property_name, px_class_val_t *rec_p,
523     int nrec, uint32_t default_val)
524 {
525 	int property_len;
526 	uint32_t class_code;
527 	px_class_val_t *conf;
528 	uint32_t val = default_val;
529 
530 	/*
531 	 * Use the "class-code" property to get the base and sub class
532 	 * codes for the requesting device.
533 	 */
534 	class_code = (uint32_t)ddi_prop_get_int(DDI_DEV_T_ANY, rdip,
535 	    DDI_PROP_DONTPASS, "class-code", -1);
536 
537 	if (class_code == -1)
538 		return (val);
539 
540 	/* look up the val from the default table */
541 	val = px_match_class_val(class_code, rec_p, nrec, val);
542 
543 	/* see if there is a more specific property specified value */
544 	if (ddi_getlongprop(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
545 	    property_name, (caddr_t)&conf, &property_len))
546 		return (val);
547 
548 	if ((property_len % sizeof (px_class_val_t)) == 0)
549 		val = px_match_class_val(class_code, conf,
550 		    property_len / sizeof (px_class_val_t), val);
551 	kmem_free(conf, property_len);
552 	return (val);
553 }
554 
555 /* px_class_to_pil: return the pil for a given device. */
556 uint32_t
557 px_class_to_pil(dev_info_t *rdip)
558 {
559 	uint32_t pil;
560 
561 	/* Default pil is 1 */
562 	pil = px_class_to_val(rdip,
563 	    "pci-class-priorities", px_default_pil,
564 	    sizeof (px_default_pil) / sizeof (px_class_val_t), 1);
565 
566 	/* Range check the result */
567 	if (pil >= 0xf)
568 		pil = 1;
569 
570 	return (pil);
571 }
572 
573 /* px_class_to_intr_weight: return the intr_weight for a given device. */
574 static int32_t
575 px_class_to_intr_weight(dev_info_t *rdip)
576 {
577 	int32_t intr_weight;
578 
579 	/* default weight is 0% */
580 	intr_weight = px_class_to_val(rdip,
581 	    "pci-class-intr-weights", px_default_intr_weight,
582 	    sizeof (px_default_intr_weight) / sizeof (px_class_val_t), 0);
583 
584 	/* range check the result */
585 	if (intr_weight < 0)
586 		intr_weight = 0;
587 	if (intr_weight > 1000)
588 		intr_weight = 1000;
589 
590 	return (intr_weight);
591 }
592 
593 /* ARGSUSED */
594 int
595 px_intx_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
596     ddi_intr_handle_impl_t *hdlp, void *result)
597 {
598 	px_t	*px_p = DIP_TO_STATE(dip);
599 	int	ret = DDI_SUCCESS;
600 
601 	DBG(DBG_INTROPS, dip, "px_intx_ops: dip=%x rdip=%x intr_op=%x "
602 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
603 
604 	switch (intr_op) {
605 	case DDI_INTROP_GETCAP:
606 		ret = pci_intx_get_cap(rdip, (int *)result);
607 		break;
608 	case DDI_INTROP_SETCAP:
609 		DBG(DBG_INTROPS, dip, "px_intx_ops: SetCap is not supported\n");
610 		ret = DDI_ENOTSUP;
611 		break;
612 	case DDI_INTROP_ALLOC:
613 		*(int *)result = hdlp->ih_scratch1;
614 		break;
615 	case DDI_INTROP_FREE:
616 		break;
617 	case DDI_INTROP_GETPRI:
618 		*(int *)result = hdlp->ih_pri ?
619 		    hdlp->ih_pri : px_class_to_pil(rdip);
620 		break;
621 	case DDI_INTROP_SETPRI:
622 		break;
623 	case DDI_INTROP_ADDISR:
624 		ret = px_add_intx_intr(dip, rdip, hdlp);
625 		break;
626 	case DDI_INTROP_REMISR:
627 		ret = px_rem_intx_intr(dip, rdip, hdlp);
628 		break;
629 	case DDI_INTROP_ENABLE:
630 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
631 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_ENABLE, 0, 0);
632 		break;
633 	case DDI_INTROP_DISABLE:
634 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
635 		    hdlp->ih_vector, hdlp->ih_pri, PX_INTR_STATE_DISABLE, 0, 0);
636 		break;
637 	case DDI_INTROP_SETMASK:
638 		ret = pci_intx_set_mask(rdip);
639 		break;
640 	case DDI_INTROP_CLRMASK:
641 		ret = pci_intx_clr_mask(rdip);
642 		break;
643 	case DDI_INTROP_GETPENDING:
644 		ret = pci_intx_get_pending(rdip, (int *)result);
645 		break;
646 	case DDI_INTROP_NINTRS:
647 	case DDI_INTROP_NAVAIL:
648 		*(int *)result = i_ddi_get_intx_nintrs(rdip);
649 		break;
650 	default:
651 		ret = DDI_ENOTSUP;
652 		break;
653 	}
654 
655 	return (ret);
656 }
657 
658 /* ARGSUSED */
659 int
660 px_msix_ops(dev_info_t *dip, dev_info_t *rdip, ddi_intr_op_t intr_op,
661     ddi_intr_handle_impl_t *hdlp, void *result)
662 {
663 	px_t			*px_p = DIP_TO_STATE(dip);
664 	px_msi_state_t		*msi_state_p = &px_p->px_ib_p->ib_msi_state;
665 	msiq_rec_type_t		msiq_rec_type;
666 	msi_type_t		msi_type;
667 	uint64_t		msi_addr;
668 	msinum_t		msi_num;
669 	msiqid_t		msiq_id;
670 	uint_t			nintrs;
671 	int			i, ret = DDI_SUCCESS;
672 
673 	DBG(DBG_INTROPS, dip, "px_msix_ops: dip=%x rdip=%x intr_op=%x "
674 	    "handle=%p\n", dip, rdip, intr_op, hdlp);
675 
676 	/* Check for MSI64 support */
677 	if ((hdlp->ih_cap & DDI_INTR_FLAG_MSI64) && msi_state_p->msi_addr64) {
678 		msiq_rec_type = MSI64_REC;
679 		msi_type = MSI64_TYPE;
680 		msi_addr = msi_state_p->msi_addr64;
681 	} else {
682 		msiq_rec_type = MSI32_REC;
683 		msi_type = MSI32_TYPE;
684 		msi_addr = msi_state_p->msi_addr32;
685 	}
686 
687 	switch (intr_op) {
688 	case DDI_INTROP_GETCAP:
689 		ret = pci_msi_get_cap(rdip, hdlp->ih_type, (int *)result);
690 		break;
691 	case DDI_INTROP_SETCAP:
692 		DBG(DBG_INTROPS, dip, "px_msix_ops: SetCap is not supported\n");
693 		ret = DDI_ENOTSUP;
694 		break;
695 	case DDI_INTROP_ALLOC:
696 		/*
697 		 * We need to restrict this allocation in future
698 		 * based on Resource Management policies.
699 		 */
700 		if ((ret = px_msi_alloc(px_p, rdip, hdlp->ih_inum,
701 		    hdlp->ih_scratch1, (uintptr_t)hdlp->ih_scratch2, &msi_num,
702 		    (int *)result)) != DDI_SUCCESS) {
703 			DBG(DBG_INTROPS, dip, "px_msix_ops: allocation "
704 			    "failed, rdip 0x%p type 0x%d inum 0x%x "
705 			    "count 0x%x\n", rdip, hdlp->ih_type, hdlp->ih_inum,
706 			    hdlp->ih_scratch1);
707 
708 			return (ret);
709 		}
710 
711 		if ((hdlp->ih_type == DDI_INTR_TYPE_MSIX) &&
712 		    (i_ddi_get_msix(rdip) == NULL)) {
713 			ddi_intr_msix_t		*msix_p;
714 
715 			if (msix_p = pci_msix_init(rdip)) {
716 				i_ddi_set_msix(rdip, msix_p);
717 				break;
718 			}
719 
720 			DBG(DBG_INTROPS, dip, "px_msix_ops: MSI-X allocation "
721 			    "failed, rdip 0x%p inum 0x%x\n", rdip,
722 			    hdlp->ih_inum);
723 
724 			(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
725 			    hdlp->ih_scratch1);
726 
727 			return (DDI_FAILURE);
728 		}
729 
730 		break;
731 	case DDI_INTROP_FREE:
732 		(void) pci_msi_disable_mode(rdip, hdlp->ih_type, NULL);
733 		(void) pci_msi_unconfigure(rdip, hdlp->ih_type, hdlp->ih_inum);
734 
735 		if (hdlp->ih_type == DDI_INTR_TYPE_MSI)
736 			goto msi_free;
737 
738 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
739 			break;
740 
741 		if (((i_ddi_intr_get_current_nintrs(hdlp->ih_dip) - 1) == 0) &&
742 		    (i_ddi_get_msix(rdip))) {
743 			pci_msix_fini(i_ddi_get_msix(rdip));
744 			i_ddi_set_msix(rdip, NULL);
745 		}
746 msi_free:
747 		(void) px_msi_free(px_p, rdip, hdlp->ih_inum,
748 		    hdlp->ih_scratch1);
749 		break;
750 	case DDI_INTROP_GETPRI:
751 		*(int *)result = hdlp->ih_pri ?
752 		    hdlp->ih_pri : px_class_to_pil(rdip);
753 		break;
754 	case DDI_INTROP_SETPRI:
755 		break;
756 	case DDI_INTROP_ADDISR:
757 		if ((ret = px_msi_get_msinum(px_p, hdlp->ih_dip,
758 		    hdlp->ih_inum, &msi_num)) != DDI_SUCCESS)
759 			return (ret);
760 
761 		if ((ret = px_add_msiq_intr(dip, rdip, hdlp,
762 		    msiq_rec_type, msi_num, &msiq_id)) != DDI_SUCCESS) {
763 			DBG(DBG_INTROPS, dip, "px_msix_ops: Add MSI handler "
764 			    "failed, rdip 0x%p msi 0x%x\n", rdip, msi_num);
765 			return (ret);
766 		}
767 
768 		DBG(DBG_INTROPS, dip, "px_msix_ops: msiq used 0x%x\n", msiq_id);
769 
770 		if ((ret = px_lib_msi_setmsiq(dip, msi_num,
771 		    msiq_id, msi_type)) != DDI_SUCCESS) {
772 			(void) px_rem_msiq_intr(dip, rdip,
773 			    hdlp, msiq_rec_type, msi_num, msiq_id);
774 			return (ret);
775 		}
776 
777 		if ((ret = px_lib_msi_setstate(dip, msi_num,
778 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS) {
779 			(void) px_rem_msiq_intr(dip, rdip,
780 			    hdlp, msiq_rec_type, msi_num, msiq_id);
781 			return (ret);
782 		}
783 
784 		hdlp->ih_vector = msi_num;
785 		break;
786 	case DDI_INTROP_DUPVEC:
787 		DBG(DBG_INTROPS, dip, "px_msix_ops: dupisr - inum: %x, "
788 		    "new_vector: %x\n", hdlp->ih_inum, hdlp->ih_scratch1);
789 
790 		ret = pci_msix_dup(hdlp->ih_dip, hdlp->ih_inum,
791 		    hdlp->ih_scratch1);
792 		break;
793 	case DDI_INTROP_REMISR:
794 		msi_num = hdlp->ih_vector;
795 
796 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
797 		    &msiq_id)) != DDI_SUCCESS)
798 			return (ret);
799 
800 		if ((ret = px_lib_msi_setstate(dip, msi_num,
801 		    PCI_MSI_STATE_IDLE)) != DDI_SUCCESS)
802 			return (ret);
803 
804 		ret = px_rem_msiq_intr(dip, rdip,
805 		    hdlp, msiq_rec_type, msi_num, msiq_id);
806 
807 		hdlp->ih_vector = 0;
808 		break;
809 	case DDI_INTROP_ENABLE:
810 		msi_num = hdlp->ih_vector;
811 
812 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
813 		    PCI_MSI_VALID)) != DDI_SUCCESS)
814 			return (ret);
815 
816 		if ((pci_is_msi_enabled(rdip, hdlp->ih_type) != DDI_SUCCESS) ||
817 		    (hdlp->ih_type == DDI_INTR_TYPE_MSIX)) {
818 			nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
819 
820 			if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
821 			    nintrs, hdlp->ih_inum, msi_addr,
822 			    hdlp->ih_type == DDI_INTR_TYPE_MSIX ?
823 			    msi_num : msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
824 				return (ret);
825 
826 			if ((ret = pci_msi_enable_mode(rdip, hdlp->ih_type))
827 			    != DDI_SUCCESS)
828 				return (ret);
829 		}
830 
831 		if ((ret = pci_msi_clr_mask(rdip, hdlp->ih_type,
832 		    hdlp->ih_inum)) != DDI_SUCCESS)
833 			return (ret);
834 
835 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
836 			break;
837 
838 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
839 		    &msiq_id)) != DDI_SUCCESS)
840 			return (ret);
841 
842 		ret = px_ib_update_intr_state(px_p, rdip, hdlp->ih_inum,
843 		    px_msiqid_to_devino(px_p, msiq_id), hdlp->ih_pri,
844 		    PX_INTR_STATE_ENABLE, msiq_rec_type, msi_num);
845 
846 		break;
847 	case DDI_INTROP_DISABLE:
848 		msi_num = hdlp->ih_vector;
849 
850 		if ((ret = pci_msi_set_mask(rdip, hdlp->ih_type,
851 		    hdlp->ih_inum)) != DDI_SUCCESS)
852 			return (ret);
853 
854 		if ((ret = px_lib_msi_setvalid(dip, msi_num,
855 		    PCI_MSI_INVALID)) != DDI_SUCCESS)
856 			return (ret);
857 
858 		if (hdlp->ih_flags & DDI_INTR_MSIX_DUP)
859 			break;
860 
861 		if ((ret = px_lib_msi_getmsiq(dip, msi_num,
862 		    &msiq_id)) != DDI_SUCCESS)
863 			return (ret);
864 
865 		ret = px_ib_update_intr_state(px_p, rdip,
866 		    hdlp->ih_inum, px_msiqid_to_devino(px_p, msiq_id),
867 		    hdlp->ih_pri, PX_INTR_STATE_DISABLE, msiq_rec_type,
868 		    msi_num);
869 
870 		break;
871 	case DDI_INTROP_BLOCKENABLE:
872 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
873 		msi_num = hdlp->ih_vector;
874 
875 		if ((ret = pci_msi_configure(rdip, hdlp->ih_type,
876 		    nintrs, hdlp->ih_inum, msi_addr,
877 		    msi_num & ~(nintrs - 1))) != DDI_SUCCESS)
878 			return (ret);
879 
880 		for (i = 0; i < nintrs; i++, msi_num++) {
881 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
882 			    PCI_MSI_VALID)) != DDI_SUCCESS)
883 				return (ret);
884 
885 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
886 			    &msiq_id)) != DDI_SUCCESS)
887 				return (ret);
888 
889 			if ((ret = px_ib_update_intr_state(px_p, rdip,
890 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
891 			    msiq_id), hdlp->ih_pri, PX_INTR_STATE_ENABLE,
892 			    msiq_rec_type, msi_num)) != DDI_SUCCESS)
893 				return (ret);
894 		}
895 
896 		ret = pci_msi_enable_mode(rdip, hdlp->ih_type);
897 		break;
898 	case DDI_INTROP_BLOCKDISABLE:
899 		nintrs = i_ddi_intr_get_current_nintrs(hdlp->ih_dip);
900 		msi_num = hdlp->ih_vector;
901 
902 		if ((ret = pci_msi_disable_mode(rdip, hdlp->ih_type,
903 		    hdlp->ih_cap & DDI_INTR_FLAG_BLOCK)) != DDI_SUCCESS)
904 			return (ret);
905 
906 		for (i = 0; i < nintrs; i++, msi_num++) {
907 			if ((ret = px_lib_msi_setvalid(dip, msi_num,
908 			    PCI_MSI_INVALID)) != DDI_SUCCESS)
909 				return (ret);
910 
911 			if ((ret = px_lib_msi_getmsiq(dip, msi_num,
912 			    &msiq_id)) != DDI_SUCCESS)
913 				return (ret);
914 
915 			if ((ret = px_ib_update_intr_state(px_p, rdip,
916 			    hdlp->ih_inum + i, px_msiqid_to_devino(px_p,
917 			    msiq_id), hdlp->ih_pri, PX_INTR_STATE_DISABLE,
918 			    msiq_rec_type, msi_num)) != DDI_SUCCESS)
919 				return (ret);
920 		}
921 
922 		break;
923 	case DDI_INTROP_SETMASK:
924 		ret = pci_msi_set_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
925 		break;
926 	case DDI_INTROP_CLRMASK:
927 		ret = pci_msi_clr_mask(rdip, hdlp->ih_type, hdlp->ih_inum);
928 		break;
929 	case DDI_INTROP_GETPENDING:
930 		ret = pci_msi_get_pending(rdip, hdlp->ih_type,
931 		    hdlp->ih_inum, (int *)result);
932 		break;
933 	case DDI_INTROP_NINTRS:
934 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
935 		break;
936 	case DDI_INTROP_NAVAIL:
937 		/* XXX - a new interface may be needed */
938 		ret = pci_msi_get_nintrs(rdip, hdlp->ih_type, (int *)result);
939 		break;
940 	default:
941 		ret = DDI_ENOTSUP;
942 		break;
943 	}
944 
945 	return (ret);
946 }
947 
948 static struct {
949 	kstat_named_t pxintr_ks_name;
950 	kstat_named_t pxintr_ks_type;
951 	kstat_named_t pxintr_ks_cpu;
952 	kstat_named_t pxintr_ks_pil;
953 	kstat_named_t pxintr_ks_time;
954 	kstat_named_t pxintr_ks_ino;
955 	kstat_named_t pxintr_ks_cookie;
956 	kstat_named_t pxintr_ks_devpath;
957 	kstat_named_t pxintr_ks_buspath;
958 } pxintr_ks_template = {
959 	{ "name",	KSTAT_DATA_CHAR },
960 	{ "type",	KSTAT_DATA_CHAR },
961 	{ "cpu",	KSTAT_DATA_UINT64 },
962 	{ "pil",	KSTAT_DATA_UINT64 },
963 	{ "time",	KSTAT_DATA_UINT64 },
964 	{ "ino",	KSTAT_DATA_UINT64 },
965 	{ "cookie",	KSTAT_DATA_UINT64 },
966 	{ "devpath",	KSTAT_DATA_STRING },
967 	{ "buspath",	KSTAT_DATA_STRING },
968 };
969 
970 static uint32_t pxintr_ks_instance;
971 static char ih_devpath[MAXPATHLEN];
972 static char ih_buspath[MAXPATHLEN];
973 kmutex_t pxintr_ks_template_lock;
974 
975 int
976 px_ks_update(kstat_t *ksp, int rw)
977 {
978 	px_ih_t *ih_p = ksp->ks_private;
979 	int maxlen = sizeof (pxintr_ks_template.pxintr_ks_name.value.c);
980 	px_ino_pil_t *ipil_p = ih_p->ih_ipil_p;
981 	px_ino_t *ino_p = ipil_p->ipil_ino_p;
982 	px_t *px_p = ino_p->ino_ib_p->ib_px_p;
983 	devino_t ino;
984 	sysino_t sysino;
985 
986 	ino = ino_p->ino_ino;
987 	(void) px_lib_intr_devino_to_sysino(px_p->px_dip, ino, &sysino);
988 
989 	(void) snprintf(pxintr_ks_template.pxintr_ks_name.value.c, maxlen,
990 	    "%s%d", ddi_driver_name(ih_p->ih_dip),
991 	    ddi_get_instance(ih_p->ih_dip));
992 
993 	(void) ddi_pathname(ih_p->ih_dip, ih_devpath);
994 	(void) ddi_pathname(px_p->px_dip, ih_buspath);
995 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_devpath, ih_devpath);
996 	kstat_named_setstr(&pxintr_ks_template.pxintr_ks_buspath, ih_buspath);
997 
998 	if (ih_p->ih_intr_state == PX_INTR_STATE_ENABLE) {
999 
1000 		switch (i_ddi_intr_get_current_type(ih_p->ih_dip)) {
1001 		case DDI_INTR_TYPE_MSI:
1002 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
1003 			    "msi");
1004 			break;
1005 		case DDI_INTR_TYPE_MSIX:
1006 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
1007 			    "msix");
1008 			break;
1009 		default:
1010 			(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
1011 			    "fixed");
1012 			break;
1013 		}
1014 
1015 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = ino_p->ino_cpuid;
1016 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = ipil_p->ipil_pil;
1017 		pxintr_ks_template.pxintr_ks_time.value.ui64 = ih_p->ih_nsec +
1018 		    (uint64_t)tick2ns((hrtime_t)ih_p->ih_ticks,
1019 		    ino_p->ino_cpuid);
1020 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = ino;
1021 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = sysino;
1022 	} else {
1023 		(void) strcpy(pxintr_ks_template.pxintr_ks_type.value.c,
1024 		    "disabled");
1025 		pxintr_ks_template.pxintr_ks_cpu.value.ui64 = 0;
1026 		pxintr_ks_template.pxintr_ks_pil.value.ui64 = 0;
1027 		pxintr_ks_template.pxintr_ks_time.value.ui64 = 0;
1028 		pxintr_ks_template.pxintr_ks_ino.value.ui64 = 0;
1029 		pxintr_ks_template.pxintr_ks_cookie.value.ui64 = 0;
1030 	}
1031 	return (0);
1032 }
1033 
1034 void
1035 px_create_intr_kstats(px_ih_t *ih_p)
1036 {
1037 	msiq_rec_type_t rec_type = ih_p->ih_rec_type;
1038 
1039 	ASSERT(ih_p->ih_ksp == NULL);
1040 
1041 	/*
1042 	 * Create pci_intrs::: kstats for all ih types except messages,
1043 	 * which represent unusual conditions and don't need to be tracked.
1044 	 */
1045 	if (rec_type == 0 || rec_type == MSI32_REC || rec_type == MSI64_REC) {
1046 		ih_p->ih_ksp = kstat_create("pci_intrs",
1047 		    atomic_inc_32_nv(&pxintr_ks_instance), "config",
1048 		    "interrupts", KSTAT_TYPE_NAMED,
1049 		    sizeof (pxintr_ks_template) / sizeof (kstat_named_t),
1050 		    KSTAT_FLAG_VIRTUAL);
1051 	}
1052 	if (ih_p->ih_ksp != NULL) {
1053 		ih_p->ih_ksp->ks_data_size += MAXPATHLEN * 2;
1054 		ih_p->ih_ksp->ks_lock = &pxintr_ks_template_lock;
1055 		ih_p->ih_ksp->ks_data = &pxintr_ks_template;
1056 		ih_p->ih_ksp->ks_private = ih_p;
1057 		ih_p->ih_ksp->ks_update = px_ks_update;
1058 	}
1059 }
1060 
1061 /*
1062  * px_add_intx_intr:
1063  *
1064  * This function is called to register INTx and legacy hardware
1065  * interrupt pins interrupts.
1066  */
1067 int
1068 px_add_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1069     ddi_intr_handle_impl_t *hdlp)
1070 {
1071 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1072 	px_ib_t		*ib_p = px_p->px_ib_p;
1073 	devino_t	ino;
1074 	px_ih_t		*ih_p;
1075 	px_ino_t	*ino_p;
1076 	px_ino_pil_t	*ipil_p, *ipil_list;
1077 	int32_t		weight;
1078 	int		ret = DDI_SUCCESS;
1079 
1080 	ino = hdlp->ih_vector;
1081 
1082 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: rdip=%s%d ino=%x "
1083 	    "handler=%x arg1=%x arg2=%x\n", ddi_driver_name(rdip),
1084 	    ddi_get_instance(rdip), ino, hdlp->ih_cb_func,
1085 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1086 
1087 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum,
1088 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, 0, 0);
1089 
1090 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1091 
1092 	ino_p = px_ib_locate_ino(ib_p, ino);
1093 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
1094 
1095 	/* Sharing ino */
1096 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
1097 		if (px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0)) {
1098 			DBG(DBG_A_INTX, dip, "px_add_intx_intr: "
1099 			    "dup intr #%d\n", hdlp->ih_inum);
1100 
1101 			ret = DDI_FAILURE;
1102 			goto fail1;
1103 		}
1104 
1105 		/* Save mondo value in hdlp */
1106 		hdlp->ih_vector = ino_p->ino_sysino;
1107 
1108 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
1109 		    ih_p)) != DDI_SUCCESS)
1110 			goto fail1;
1111 
1112 		goto ino_done;
1113 	}
1114 
1115 	if (hdlp->ih_pri == 0)
1116 		hdlp->ih_pri = px_class_to_pil(rdip);
1117 
1118 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1119 	ino_p = ipil_p->ipil_ino_p;
1120 
1121 	/* Save mondo value in hdlp */
1122 	hdlp->ih_vector = ino_p->ino_sysino;
1123 
1124 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: pil=0x%x mondo=0x%x\n",
1125 	    hdlp->ih_pri, hdlp->ih_vector);
1126 
1127 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1128 	    (ddi_intr_handler_t *)px_intx_intr, (caddr_t)ipil_p, NULL);
1129 
1130 	ret = i_ddi_add_ivintr(hdlp);
1131 
1132 	/*
1133 	 * Restore original interrupt handler
1134 	 * and arguments in interrupt handle.
1135 	 */
1136 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1137 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1138 
1139 	if (ret != DDI_SUCCESS)
1140 		goto fail2;
1141 
1142 	/* Save the pil for this ino */
1143 	ipil_p->ipil_pil = hdlp->ih_pri;
1144 
1145 	/* Select cpu, saving it for sharing and removal */
1146 	if (ipil_list == NULL) {
1147 		ino_p->ino_cpuid = intr_dist_cpuid();
1148 
1149 		/* Enable interrupt */
1150 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1151 	}
1152 
1153 ino_done:
1154 	/* Add weight to the cpu that we are already targeting */
1155 	weight = px_class_to_intr_weight(rdip);
1156 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1157 
1158 	ih_p->ih_ipil_p = ipil_p;
1159 	px_create_intr_kstats(ih_p);
1160 	if (ih_p->ih_ksp)
1161 		kstat_install(ih_p->ih_ksp);
1162 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1163 
1164 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: done! Interrupt 0x%x pil=%x\n",
1165 	    ino_p->ino_sysino, hdlp->ih_pri);
1166 
1167 	return (ret);
1168 fail2:
1169 	px_ib_delete_ino_pil(ib_p, ipil_p);
1170 fail1:
1171 	if (ih_p->ih_config_handle)
1172 		pci_config_teardown(&ih_p->ih_config_handle);
1173 
1174 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1175 	kmem_free(ih_p, sizeof (px_ih_t));
1176 
1177 	DBG(DBG_A_INTX, dip, "px_add_intx_intr: Failed! Interrupt 0x%x "
1178 	    "pil=%x\n", ino_p->ino_sysino, hdlp->ih_pri);
1179 
1180 	return (ret);
1181 }
1182 
1183 /*
1184  * px_rem_intx_intr:
1185  *
1186  * This function is called to unregister INTx and legacy hardware
1187  * interrupt pins interrupts.
1188  */
1189 int
1190 px_rem_intx_intr(dev_info_t *dip, dev_info_t *rdip,
1191     ddi_intr_handle_impl_t *hdlp)
1192 {
1193 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1194 	px_ib_t		*ib_p = px_p->px_ib_p;
1195 	devino_t	ino;
1196 	cpuid_t		curr_cpu;
1197 	px_ino_t	*ino_p;
1198 	px_ino_pil_t	*ipil_p;
1199 	px_ih_t		*ih_p;
1200 	int		ret = DDI_SUCCESS;
1201 
1202 	ino = hdlp->ih_vector;
1203 
1204 	DBG(DBG_R_INTX, dip, "px_rem_intx_intr: rdip=%s%d ino=%x\n",
1205 	    ddi_driver_name(rdip), ddi_get_instance(rdip), ino);
1206 
1207 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1208 
1209 	ino_p = px_ib_locate_ino(ib_p, ino);
1210 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1211 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, 0, 0);
1212 
1213 	/* Get the current cpu */
1214 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1215 	    &curr_cpu)) != DDI_SUCCESS)
1216 		goto fail;
1217 
1218 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1219 		goto fail;
1220 
1221 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1222 
1223 	if (ipil_p->ipil_ih_size == 0) {
1224 		hdlp->ih_vector = ino_p->ino_sysino;
1225 		i_ddi_rem_ivintr(hdlp);
1226 
1227 		px_ib_delete_ino_pil(ib_p, ipil_p);
1228 	}
1229 
1230 	if (ino_p->ino_ipil_size == 0) {
1231 		kmem_free(ino_p, sizeof (px_ino_t));
1232 	} else {
1233 		/* Re-enable interrupt only if mapping register still shared */
1234 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1235 	}
1236 
1237 fail:
1238 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1239 	return (ret);
1240 }
1241 
1242 /*
1243  * px_add_msiq_intr:
1244  *
1245  * This function is called to register MSI/Xs and PCIe message interrupts.
1246  */
1247 int
1248 px_add_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1249     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1250     msgcode_t msg_code, msiqid_t *msiq_id_p)
1251 {
1252 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1253 	px_ib_t		*ib_p = px_p->px_ib_p;
1254 	px_msiq_state_t	*msiq_state_p = &ib_p->ib_msiq_state;
1255 	devino_t	ino;
1256 	px_ih_t		*ih_p;
1257 	px_ino_t	*ino_p;
1258 	px_ino_pil_t	*ipil_p, *ipil_list;
1259 	int32_t		weight;
1260 	int		ret = DDI_SUCCESS;
1261 
1262 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: rdip=%s%d handler=%x "
1263 	    "arg1=%x arg2=%x\n", ddi_driver_name(rdip), ddi_get_instance(rdip),
1264 	    hdlp->ih_cb_func, hdlp->ih_cb_arg1, hdlp->ih_cb_arg2);
1265 
1266 	if ((ret = px_msiq_alloc(px_p, rec_type, msiq_id_p)) != DDI_SUCCESS) {
1267 		DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1268 		    "msiq allocation failed\n");
1269 		return (ret);
1270 	}
1271 
1272 	ino = px_msiqid_to_devino(px_p, *msiq_id_p);
1273 
1274 	ih_p = px_ib_alloc_ih(rdip, hdlp->ih_inum, hdlp->ih_cb_func,
1275 	    hdlp->ih_cb_arg1, hdlp->ih_cb_arg2, rec_type, msg_code);
1276 
1277 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1278 
1279 	ino_p = px_ib_locate_ino(ib_p, ino);
1280 	ipil_list = ino_p ? ino_p->ino_ipil_p : NULL;
1281 
1282 	/* Sharing ino */
1283 	if (ino_p && (ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri))) {
1284 		if (px_ib_intr_locate_ih(ipil_p, rdip,
1285 		    hdlp->ih_inum, rec_type, msg_code)) {
1286 			DBG(DBG_MSIQ, dip, "px_add_msiq_intr: "
1287 			    "dup intr #%d\n", hdlp->ih_inum);
1288 
1289 			ret = DDI_FAILURE;
1290 			goto fail1;
1291 		}
1292 
1293 		/* Save mondo value in hdlp */
1294 		hdlp->ih_vector = ino_p->ino_sysino;
1295 
1296 		if ((ret = px_ib_ino_add_intr(px_p, ipil_p,
1297 		    ih_p)) != DDI_SUCCESS)
1298 			goto fail1;
1299 
1300 		goto ino_done;
1301 	}
1302 
1303 	if (hdlp->ih_pri == 0)
1304 		hdlp->ih_pri = px_class_to_pil(rdip);
1305 
1306 	ipil_p = px_ib_new_ino_pil(ib_p, ino, hdlp->ih_pri, ih_p);
1307 	ino_p = ipil_p->ipil_ino_p;
1308 
1309 	ino_p->ino_msiq_p = msiq_state_p->msiq_p +
1310 	    (*msiq_id_p - msiq_state_p->msiq_1st_msiq_id);
1311 
1312 	/* Save mondo value in hdlp */
1313 	hdlp->ih_vector = ino_p->ino_sysino;
1314 
1315 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: pil=0x%x mondo=0x%x\n",
1316 	    hdlp->ih_pri, hdlp->ih_vector);
1317 
1318 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp,
1319 	    (ddi_intr_handler_t *)px_msiq_intr, (caddr_t)ipil_p, NULL);
1320 
1321 	ret = i_ddi_add_ivintr(hdlp);
1322 
1323 	/*
1324 	 * Restore original interrupt handler
1325 	 * and arguments in interrupt handle.
1326 	 */
1327 	DDI_INTR_ASSIGN_HDLR_N_ARGS(hdlp, ih_p->ih_handler,
1328 	    ih_p->ih_handler_arg1, ih_p->ih_handler_arg2);
1329 
1330 	if (ret != DDI_SUCCESS)
1331 		goto fail2;
1332 
1333 	/* Save the pil for this ino */
1334 	ipil_p->ipil_pil = hdlp->ih_pri;
1335 
1336 	/* Select cpu, saving it for sharing and removal */
1337 	if (ipil_list == NULL) {
1338 		ino_p->ino_cpuid = intr_dist_cpuid();
1339 
1340 		/* Enable MSIQ */
1341 		px_lib_msiq_setstate(dip, *msiq_id_p, PCI_MSIQ_STATE_IDLE);
1342 		px_lib_msiq_setvalid(dip, *msiq_id_p, PCI_MSIQ_VALID);
1343 
1344 		/* Enable interrupt */
1345 		px_ib_intr_enable(px_p, ino_p->ino_cpuid, ino);
1346 	}
1347 
1348 ino_done:
1349 	/* Add weight to the cpu that we are already targeting */
1350 	weight = px_class_to_intr_weight(rdip);
1351 	intr_dist_cpuid_add_device_weight(ino_p->ino_cpuid, rdip, weight);
1352 
1353 	ih_p->ih_ipil_p = ipil_p;
1354 	px_create_intr_kstats(ih_p);
1355 	if (ih_p->ih_ksp)
1356 		kstat_install(ih_p->ih_ksp);
1357 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1358 
1359 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: done! Interrupt 0x%x pil=%x\n",
1360 	    ino_p->ino_sysino, hdlp->ih_pri);
1361 
1362 	return (ret);
1363 fail2:
1364 	px_ib_delete_ino_pil(ib_p, ipil_p);
1365 fail1:
1366 	if (ih_p->ih_config_handle)
1367 		pci_config_teardown(&ih_p->ih_config_handle);
1368 
1369 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1370 	kmem_free(ih_p, sizeof (px_ih_t));
1371 
1372 	DBG(DBG_MSIQ, dip, "px_add_msiq_intr: Failed! Interrupt 0x%x pil=%x\n",
1373 	    ino_p->ino_sysino, hdlp->ih_pri);
1374 
1375 	return (ret);
1376 }
1377 
1378 /*
1379  * px_rem_msiq_intr:
1380  *
1381  * This function is called to unregister MSI/Xs and PCIe message interrupts.
1382  */
1383 int
1384 px_rem_msiq_intr(dev_info_t *dip, dev_info_t *rdip,
1385     ddi_intr_handle_impl_t *hdlp, msiq_rec_type_t rec_type,
1386     msgcode_t msg_code, msiqid_t msiq_id)
1387 {
1388 	px_t		*px_p = INST_TO_STATE(ddi_get_instance(dip));
1389 	px_ib_t		*ib_p = px_p->px_ib_p;
1390 	devino_t	ino = px_msiqid_to_devino(px_p, msiq_id);
1391 	cpuid_t		curr_cpu;
1392 	px_ino_t	*ino_p;
1393 	px_ino_pil_t	*ipil_p;
1394 	px_ih_t		*ih_p;
1395 	int		ret = DDI_SUCCESS;
1396 
1397 	DBG(DBG_MSIQ, dip, "px_rem_msiq_intr: rdip=%s%d msiq_id=%x ino=%x\n",
1398 	    ddi_driver_name(rdip), ddi_get_instance(rdip), msiq_id, ino);
1399 
1400 	mutex_enter(&ib_p->ib_ino_lst_mutex);
1401 
1402 	ino_p = px_ib_locate_ino(ib_p, ino);
1403 	ipil_p = px_ib_ino_locate_ipil(ino_p, hdlp->ih_pri);
1404 	ih_p = px_ib_intr_locate_ih(ipil_p, rdip, hdlp->ih_inum, rec_type,
1405 	    msg_code);
1406 
1407 	/* Get the current cpu */
1408 	if ((ret = px_lib_intr_gettarget(px_p->px_dip, ino_p->ino_sysino,
1409 	    &curr_cpu)) != DDI_SUCCESS)
1410 		goto fail;
1411 
1412 	if ((ret = px_ib_ino_rem_intr(px_p, ipil_p, ih_p)) != DDI_SUCCESS)
1413 		goto fail;
1414 
1415 	intr_dist_cpuid_rem_device_weight(ino_p->ino_cpuid, rdip);
1416 
1417 	if (ipil_p->ipil_ih_size == 0) {
1418 		hdlp->ih_vector = ino_p->ino_sysino;
1419 		i_ddi_rem_ivintr(hdlp);
1420 
1421 		px_ib_delete_ino_pil(ib_p, ipil_p);
1422 
1423 		if (ino_p->ino_ipil_size == 0)
1424 			px_lib_msiq_setvalid(dip,
1425 			    px_devino_to_msiqid(px_p, ino), PCI_MSIQ_INVALID);
1426 
1427 		(void) px_msiq_free(px_p, msiq_id);
1428 	}
1429 
1430 	if (ino_p->ino_ipil_size == 0) {
1431 		kmem_free(ino_p, sizeof (px_ino_t));
1432 	} else {
1433 		/* Re-enable interrupt only if mapping register still shared */
1434 		PX_INTR_ENABLE(px_p->px_dip, ino_p->ino_sysino, curr_cpu);
1435 	}
1436 
1437 fail:
1438 	mutex_exit(&ib_p->ib_ino_lst_mutex);
1439 	return (ret);
1440 }
1441