xref: /titanic_50/usr/src/uts/sun4/os/intr.c (revision a288e5a9793fdffe5e842d7e61ab45263e75eaca)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5100b72f4Sandrei  * Common Development and Distribution License (the "License").
6100b72f4Sandrei  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2207247649SMadhavan Venkataraman  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
25*a288e5a9SJoshua M. Clulow /*
26*a288e5a9SJoshua M. Clulow  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
27*a288e5a9SJoshua M. Clulow  */
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
307c478bd9Sstevel@tonic-gate #include <sys/stack.h>
317c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
327c478bd9Sstevel@tonic-gate #include <sys/ivintr.h>
337c478bd9Sstevel@tonic-gate #include <sys/intreg.h>
347c478bd9Sstevel@tonic-gate #include <sys/membar.h>
357c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
367c478bd9Sstevel@tonic-gate #include <sys/intr.h>
37dd4eeefdSeota #include <sys/sunddi.h>
387c478bd9Sstevel@tonic-gate #include <sys/sunndi.h>
397c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
407c478bd9Sstevel@tonic-gate #include <sys/privregs.h>
417c478bd9Sstevel@tonic-gate #include <sys/systm.h>
427c478bd9Sstevel@tonic-gate #include <sys/archsystm.h>
437c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
447c478bd9Sstevel@tonic-gate #include <sys/x_call.h>
457c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h>
467c478bd9Sstevel@tonic-gate #include <sys/debug.h>
477c478bd9Sstevel@tonic-gate #include <sys/cyclic.h>
48a1af7ba0Scwb #include <sys/kdi_impl.h>
49*a288e5a9SJoshua M. Clulow #include <sys/ddi_periodic.h>
507c478bd9Sstevel@tonic-gate 
517c478bd9Sstevel@tonic-gate #include <sys/cpu_sgnblk_defs.h>
527c478bd9Sstevel@tonic-gate 
537c478bd9Sstevel@tonic-gate /* Global locks which protect the interrupt distribution lists */
547c478bd9Sstevel@tonic-gate static kmutex_t intr_dist_lock;
557c478bd9Sstevel@tonic-gate static kmutex_t intr_dist_cpu_lock;
567c478bd9Sstevel@tonic-gate 
577c478bd9Sstevel@tonic-gate /* Head of the interrupt distribution lists */
587c478bd9Sstevel@tonic-gate static struct intr_dist *intr_dist_head = NULL;
597c478bd9Sstevel@tonic-gate static struct intr_dist *intr_dist_whead = NULL;
607c478bd9Sstevel@tonic-gate 
61dd4eeefdSeota static uint64_t siron_inum[DDI_IPL_10]; /* software interrupt numbers */
623aedfe0bSmishra uint64_t *siron_cpu_inum = NULL;
633aedfe0bSmishra uint64_t siron_poke_cpu_inum;
643aedfe0bSmishra static int siron_cpu_setup(cpu_setup_t, int, void *);
653aedfe0bSmishra extern uint_t softlevel1();
663aedfe0bSmishra 
67dd4eeefdSeota static uint64_t siron1_inum; /* backward compatibility */
68b0fc0e77Sgovinda uint64_t poke_cpu_inum;
69b0fc0e77Sgovinda uint_t poke_cpu_intr(caddr_t arg1, caddr_t arg2);
703aedfe0bSmishra uint_t siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2);
71b0fc0e77Sgovinda 
72f8047eabSsudheer /*
73492887eeSChristopher Baumbauer - Sun Microsystems - San Diego United States  * Variable to enable/disable printing a message when an invalid vecintr
74492887eeSChristopher Baumbauer - Sun Microsystems - San Diego United States  * is received.
75492887eeSChristopher Baumbauer - Sun Microsystems - San Diego United States  */
76492887eeSChristopher Baumbauer - Sun Microsystems - San Diego United States uint_t ignore_invalid_vecintr = 0;
77492887eeSChristopher Baumbauer - Sun Microsystems - San Diego United States 
78492887eeSChristopher Baumbauer - Sun Microsystems - San Diego United States /*
79f8047eabSsudheer  * Note:-
80f8047eabSsudheer  * siron_pending was originally created to prevent a resource over consumption
81f8047eabSsudheer  * bug in setsoftint(exhaustion of interrupt pool free list).
82f8047eabSsudheer  * It's original intention is obsolete with the use of iv_pending in
83f8047eabSsudheer  * setsoftint. However, siron_pending stayed around, acting as a second
84f8047eabSsudheer  * gatekeeper preventing soft interrupts from being queued. In this capacity,
85f8047eabSsudheer  * it can lead to hangs on MP systems, where due to global visibility issues
86f8047eabSsudheer  * it can end up set while iv_pending is reset, preventing soft interrupts from
87f8047eabSsudheer  * ever being processed. In addition to its gatekeeper role, init_intr also
88f8047eabSsudheer  * uses it to flag the situation where siron() was called before siron_inum has
89f8047eabSsudheer  * been defined.
90f8047eabSsudheer  *
91f8047eabSsudheer  * siron() does not need an extra gatekeeper; any cpu that wishes should be
92f8047eabSsudheer  * allowed to queue a soft interrupt. It is softint()'s job to ensure
93f8047eabSsudheer  * correct handling of the queues. Therefore, siron_pending has been
94f8047eabSsudheer  * stripped of its gatekeeper task, retaining only its intr_init job, where
95f8047eabSsudheer  * it indicates that there is a pending need to call siron().
96f8047eabSsudheer  */
97dd4eeefdSeota static int siron_pending[DDI_IPL_10]; /* software interrupt pending flags */
98dd4eeefdSeota static int siron1_pending; /* backward compatibility */
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate int intr_policy = INTR_WEIGHTED_DIST;	/* interrupt distribution policy */
1017c478bd9Sstevel@tonic-gate int intr_dist_debug = 0;
1027c478bd9Sstevel@tonic-gate int32_t intr_dist_weight_max = 1;
1037c478bd9Sstevel@tonic-gate int32_t intr_dist_weight_maxmax = 1000;
1047c478bd9Sstevel@tonic-gate int intr_dist_weight_maxfactor = 2;
1057c478bd9Sstevel@tonic-gate #define	INTR_DEBUG(args) if (intr_dist_debug) cmn_err args
1067c478bd9Sstevel@tonic-gate 
1077c478bd9Sstevel@tonic-gate /*
108b0fc0e77Sgovinda  * intr_init() - Interrupt initialization
109b0fc0e77Sgovinda  *	Initialize the system's interrupt vector table.
1107c478bd9Sstevel@tonic-gate  */
1117c478bd9Sstevel@tonic-gate void
intr_init(cpu_t * cp)1127c478bd9Sstevel@tonic-gate intr_init(cpu_t *cp)
1137c478bd9Sstevel@tonic-gate {
114dd4eeefdSeota 	int i;
115b0fc0e77Sgovinda 	extern uint_t softlevel1();
116b0fc0e77Sgovinda 
1177c478bd9Sstevel@tonic-gate 	init_ivintr();
118b0fc0e77Sgovinda 	REGISTER_BBUS_INTR();
119b0fc0e77Sgovinda 
1203aedfe0bSmishra 	/*
121dd4eeefdSeota 	 * Register these software interrupts for ddi timer.
122dd4eeefdSeota 	 * Software interrupts up to the level 10 are supported.
123dd4eeefdSeota 	 */
124dd4eeefdSeota 	for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
125*a288e5a9SJoshua M. Clulow 		siron_inum[i - 1] = add_softintr(i,
126*a288e5a9SJoshua M. Clulow 		    (softintrfunc)ddi_periodic_softintr,
127dd4eeefdSeota 		    (caddr_t)(uintptr_t)(i), SOFTINT_ST);
128dd4eeefdSeota 	}
129dd4eeefdSeota 
130dd4eeefdSeota 	siron1_inum = add_softintr(PIL_1, softlevel1, 0, SOFTINT_ST);
131b0fc0e77Sgovinda 	poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0, SOFTINT_MT);
1323aedfe0bSmishra 	siron_poke_cpu_inum = add_softintr(PIL_13,
1333aedfe0bSmishra 	    siron_poke_cpu_intr, 0, SOFTINT_MT);
134b0fc0e77Sgovinda 	cp->cpu_m.poke_cpu_outstanding = B_FALSE;
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate 	mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL);
1377c478bd9Sstevel@tonic-gate 	mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL);
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate 	/*
1407c478bd9Sstevel@tonic-gate 	 * A soft interrupt may have been requested prior to the initialization
1417c478bd9Sstevel@tonic-gate 	 * of soft interrupts.  Soft interrupts can't be dispatched until after
142b0fc0e77Sgovinda 	 * init_intr(), so we have to wait until now before we can dispatch the
143b0fc0e77Sgovinda 	 * pending soft interrupt (if any).
1447c478bd9Sstevel@tonic-gate 	 */
145dd4eeefdSeota 	for (i = DDI_IPL_1; i <= DDI_IPL_10; i++) {
146dd4eeefdSeota 		if (siron_pending[i-1]) {
147dd4eeefdSeota 			siron_pending[i-1] = 0;
148dd4eeefdSeota 			sir_on(i);
149dd4eeefdSeota 		}
150dd4eeefdSeota 	}
151dd4eeefdSeota 	if (siron1_pending) {
152dd4eeefdSeota 		siron1_pending = 0;
153f8047eabSsudheer 		siron();
154f8047eabSsudheer 	}
1557c478bd9Sstevel@tonic-gate }
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate /*
1587c478bd9Sstevel@tonic-gate  * poke_cpu_intr - fall through when poke_cpu calls
1597c478bd9Sstevel@tonic-gate  */
1607c478bd9Sstevel@tonic-gate /* ARGSUSED */
1617c478bd9Sstevel@tonic-gate uint_t
poke_cpu_intr(caddr_t arg1,caddr_t arg2)1627c478bd9Sstevel@tonic-gate poke_cpu_intr(caddr_t arg1, caddr_t arg2)
1637c478bd9Sstevel@tonic-gate {
1647c478bd9Sstevel@tonic-gate 	CPU->cpu_m.poke_cpu_outstanding = B_FALSE;
1657c478bd9Sstevel@tonic-gate 	membar_stld_stst();
1667c478bd9Sstevel@tonic-gate 	return (1);
1677c478bd9Sstevel@tonic-gate }
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate /*
170dd4eeefdSeota  * Trigger software interrupts dedicated to ddi timer.
171dd4eeefdSeota  */
172dd4eeefdSeota void
sir_on(int level)173dd4eeefdSeota sir_on(int level)
174dd4eeefdSeota {
175dd4eeefdSeota 	ASSERT(level >= DDI_IPL_1 && level <= DDI_IPL_10);
176dd4eeefdSeota 	if (siron_inum[level-1])
177dd4eeefdSeota 		setsoftint(siron_inum[level-1]);
178dd4eeefdSeota 	else
179dd4eeefdSeota 		siron_pending[level-1] = 1;
180dd4eeefdSeota }
181dd4eeefdSeota 
182dd4eeefdSeota /*
183a1af7ba0Scwb  * kmdb uses siron (and thus setsoftint) while the world is stopped in order to
184a1af7ba0Scwb  * inform its driver component that there's work to be done.  We need to keep
185a1af7ba0Scwb  * DTrace from instrumenting kmdb's siron and setsoftint.  We duplicate siron,
186a1af7ba0Scwb  * giving kmdb's version a kdi_ prefix to keep DTrace at bay.  The
187a1af7ba0Scwb  * implementation of setsoftint is complicated enough that we don't want to
188a1af7ba0Scwb  * duplicate it, but at the same time we don't want to preclude tracing either.
189a1af7ba0Scwb  * The meat of setsoftint() therefore goes into kdi_setsoftint, with
190a1af7ba0Scwb  * setsoftint() implemented as a wrapper.  This allows tracing, while still
191a1af7ba0Scwb  * providing a way for kmdb to sneak in unmolested.
1927c478bd9Sstevel@tonic-gate  */
1937c478bd9Sstevel@tonic-gate void
kdi_siron(void)194a1af7ba0Scwb kdi_siron(void)
195a1af7ba0Scwb {
196dd4eeefdSeota 	if (siron1_inum != 0)
197dd4eeefdSeota 		kdi_setsoftint(siron1_inum);
198a1af7ba0Scwb 	else
199dd4eeefdSeota 		siron1_pending = 1;
200a1af7ba0Scwb }
201a1af7ba0Scwb 
202a1af7ba0Scwb void
setsoftint(uint64_t inum)203a1af7ba0Scwb setsoftint(uint64_t inum)
204a1af7ba0Scwb {
205a1af7ba0Scwb 	kdi_setsoftint(inum);
206a1af7ba0Scwb }
207a1af7ba0Scwb 
2083aedfe0bSmishra /*
2093aedfe0bSmishra  * Generates softlevel1 interrupt on current CPU if it
2103aedfe0bSmishra  * is not pending already.
2113aedfe0bSmishra  */
212a1af7ba0Scwb void
siron(void)2137c478bd9Sstevel@tonic-gate siron(void)
2147c478bd9Sstevel@tonic-gate {
2153aedfe0bSmishra 	uint64_t inum;
2163aedfe0bSmishra 
217dd4eeefdSeota 	if (siron1_inum != 0) {
21812ceefb6Smishra 		/*
21912ceefb6Smishra 		 * Once siron_cpu_inum has been allocated, we can
22012ceefb6Smishra 		 * use per-CPU siron inum.
22112ceefb6Smishra 		 */
22212ceefb6Smishra 		if (siron_cpu_inum && siron_cpu_inum[CPU->cpu_id] != 0)
2233aedfe0bSmishra 			inum = siron_cpu_inum[CPU->cpu_id];
224f8047eabSsudheer 		else
225dd4eeefdSeota 			inum = siron1_inum;
2263aedfe0bSmishra 
2273aedfe0bSmishra 		setsoftint(inum);
2283aedfe0bSmishra 	} else
229dd4eeefdSeota 		siron1_pending = 1;
2307c478bd9Sstevel@tonic-gate }
2317c478bd9Sstevel@tonic-gate 
23212ceefb6Smishra 
23312ceefb6Smishra static void
siron_init(void)23412ceefb6Smishra siron_init(void)
23512ceefb6Smishra {
23612ceefb6Smishra 	/*
23712ceefb6Smishra 	 * We just allocate memory for per-cpu siron right now. Rest of
23812ceefb6Smishra 	 * the work is done when CPU is configured.
23912ceefb6Smishra 	 */
24012ceefb6Smishra 	siron_cpu_inum = kmem_zalloc(sizeof (uint64_t) * NCPU, KM_SLEEP);
24112ceefb6Smishra }
24212ceefb6Smishra 
2437c478bd9Sstevel@tonic-gate /*
2443aedfe0bSmishra  * This routine creates per-CPU siron inum for CPUs which are
2453aedfe0bSmishra  * configured during boot.
2463aedfe0bSmishra  */
2473aedfe0bSmishra void
siron_mp_init()2483aedfe0bSmishra siron_mp_init()
2493aedfe0bSmishra {
2503aedfe0bSmishra 	cpu_t *c;
2513aedfe0bSmishra 
25212ceefb6Smishra 	/*
25312ceefb6Smishra 	 * Get the memory for per-CPU siron inums
25412ceefb6Smishra 	 */
25512ceefb6Smishra 	siron_init();
25612ceefb6Smishra 
2573aedfe0bSmishra 	mutex_enter(&cpu_lock);
2583aedfe0bSmishra 	c = cpu_list;
2593aedfe0bSmishra 	do {
2603aedfe0bSmishra 		(void) siron_cpu_setup(CPU_CONFIG, c->cpu_id, NULL);
2613aedfe0bSmishra 	} while ((c = c->cpu_next) != cpu_list);
2623aedfe0bSmishra 
2633aedfe0bSmishra 	register_cpu_setup_func(siron_cpu_setup, NULL);
2643aedfe0bSmishra 	mutex_exit(&cpu_lock);
2653aedfe0bSmishra }
2663aedfe0bSmishra 
2673aedfe0bSmishra /*
2683aedfe0bSmishra  * siron_poke_cpu_intr - cross-call handler.
2693aedfe0bSmishra  */
2703aedfe0bSmishra /* ARGSUSED */
2713aedfe0bSmishra uint_t
siron_poke_cpu_intr(caddr_t arg1,caddr_t arg2)2723aedfe0bSmishra siron_poke_cpu_intr(caddr_t arg1, caddr_t arg2)
2733aedfe0bSmishra {
2743aedfe0bSmishra 	/* generate level1 softint */
2753aedfe0bSmishra 	siron();
2763aedfe0bSmishra 	return (1);
2773aedfe0bSmishra }
2783aedfe0bSmishra 
2793aedfe0bSmishra /*
2803aedfe0bSmishra  * This routine generates a cross-call on target CPU(s).
2813aedfe0bSmishra  */
2823aedfe0bSmishra void
siron_poke_cpu(cpuset_t poke)2833aedfe0bSmishra siron_poke_cpu(cpuset_t poke)
2843aedfe0bSmishra {
2853aedfe0bSmishra 	int cpuid = CPU->cpu_id;
2863aedfe0bSmishra 
2873aedfe0bSmishra 	if (CPU_IN_SET(poke, cpuid)) {
2883aedfe0bSmishra 		siron();
2893aedfe0bSmishra 		CPUSET_DEL(poke, cpuid);
2903aedfe0bSmishra 		if (CPUSET_ISNULL(poke))
2913aedfe0bSmishra 			return;
2923aedfe0bSmishra 	}
2933aedfe0bSmishra 
2943aedfe0bSmishra 	xt_some(poke, setsoftint_tl1, siron_poke_cpu_inum, 0);
2953aedfe0bSmishra }
2963aedfe0bSmishra 
2973aedfe0bSmishra /*
2983aedfe0bSmishra  * This callback function allows us to create per-CPU siron inum.
2993aedfe0bSmishra  */
3003aedfe0bSmishra /* ARGSUSED */
3013aedfe0bSmishra static int
siron_cpu_setup(cpu_setup_t what,int id,void * arg)3023aedfe0bSmishra siron_cpu_setup(cpu_setup_t what, int id, void *arg)
3033aedfe0bSmishra {
3043aedfe0bSmishra 	cpu_t *cp = cpu[id];
3053aedfe0bSmishra 
3063aedfe0bSmishra 	ASSERT(MUTEX_HELD(&cpu_lock));
3073aedfe0bSmishra 	ASSERT(cp != NULL);
3083aedfe0bSmishra 
3093aedfe0bSmishra 	switch (what) {
3103aedfe0bSmishra 	case CPU_CONFIG:
3113aedfe0bSmishra 		siron_cpu_inum[cp->cpu_id] = add_softintr(PIL_1,
3123aedfe0bSmishra 		    (softintrfunc)softlevel1, 0, SOFTINT_ST);
3133aedfe0bSmishra 		break;
3143aedfe0bSmishra 	case CPU_UNCONFIG:
3153aedfe0bSmishra 		(void) rem_softintr(siron_cpu_inum[cp->cpu_id]);
3163aedfe0bSmishra 		siron_cpu_inum[cp->cpu_id] = 0;
3173aedfe0bSmishra 		break;
3183aedfe0bSmishra 	default:
3193aedfe0bSmishra 		break;
3203aedfe0bSmishra 	}
3213aedfe0bSmishra 
3223aedfe0bSmishra 	return (0);
3233aedfe0bSmishra }
3243aedfe0bSmishra 
3253aedfe0bSmishra /*
3267c478bd9Sstevel@tonic-gate  * no_ivintr()
327b0fc0e77Sgovinda  * 	called by setvecint_tl1() through sys_trap()
3287c478bd9Sstevel@tonic-gate  *	vector interrupt received but not valid or not
329b0fc0e77Sgovinda  *	registered in intr_vec_table
3307c478bd9Sstevel@tonic-gate  *	considered as a spurious mondo interrupt
3317c478bd9Sstevel@tonic-gate  */
3327c478bd9Sstevel@tonic-gate /* ARGSUSED */
3337c478bd9Sstevel@tonic-gate void
no_ivintr(struct regs * rp,int inum,int pil)3347c478bd9Sstevel@tonic-gate no_ivintr(struct regs *rp, int inum, int pil)
3357c478bd9Sstevel@tonic-gate {
336492887eeSChristopher Baumbauer - Sun Microsystems - San Diego United States 	if (!ignore_invalid_vecintr)
3377c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x",
3387c478bd9Sstevel@tonic-gate 		    inum, pil);
3397c478bd9Sstevel@tonic-gate 
3407c478bd9Sstevel@tonic-gate #ifdef DEBUG_VEC_INTR
3417c478bd9Sstevel@tonic-gate 	prom_enter_mon();
3427c478bd9Sstevel@tonic-gate #endif /* DEBUG_VEC_INTR */
3437c478bd9Sstevel@tonic-gate }
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate void
intr_dequeue_req(uint_t pil,uint64_t inum)346b0fc0e77Sgovinda intr_dequeue_req(uint_t pil, uint64_t inum)
3477c478bd9Sstevel@tonic-gate {
348b0fc0e77Sgovinda 	intr_vec_t	*iv, *next, *prev;
3497c478bd9Sstevel@tonic-gate 	struct machcpu	*mcpu;
3507c478bd9Sstevel@tonic-gate 	uint32_t	clr;
351b0fc0e77Sgovinda 	processorid_t	cpu_id;
3527c478bd9Sstevel@tonic-gate 	extern uint_t	getpstate(void);
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate 	ASSERT((getpstate() & PSTATE_IE) == 0);
3557c478bd9Sstevel@tonic-gate 
3567c478bd9Sstevel@tonic-gate 	mcpu = &CPU->cpu_m;
357b0fc0e77Sgovinda 	cpu_id = CPU->cpu_id;
358b0fc0e77Sgovinda 
359b0fc0e77Sgovinda 	iv = (intr_vec_t *)inum;
360b0fc0e77Sgovinda 	prev = NULL;
361b0fc0e77Sgovinda 	next = mcpu->intr_head[pil];
3627c478bd9Sstevel@tonic-gate 
3637c478bd9Sstevel@tonic-gate 	/* Find a matching entry in the list */
364b0fc0e77Sgovinda 	while (next != NULL) {
365b0fc0e77Sgovinda 		if (next == iv)
3667c478bd9Sstevel@tonic-gate 			break;
367b0fc0e77Sgovinda 		prev = next;
368b0fc0e77Sgovinda 		next = IV_GET_PIL_NEXT(next, cpu_id);
3697c478bd9Sstevel@tonic-gate 	}
370b0fc0e77Sgovinda 
371b0fc0e77Sgovinda 	if (next != NULL) {
372b0fc0e77Sgovinda 		intr_vec_t	*next_iv = IV_GET_PIL_NEXT(next, cpu_id);
373b0fc0e77Sgovinda 
374b0fc0e77Sgovinda 		/* Remove entry from list */
3757c478bd9Sstevel@tonic-gate 		if (prev != NULL)
376b0fc0e77Sgovinda 			IV_SET_PIL_NEXT(prev, cpu_id, next_iv); /* non-head */
3777c478bd9Sstevel@tonic-gate 		else
378b0fc0e77Sgovinda 			mcpu->intr_head[pil] = next_iv; /* head */
3797c478bd9Sstevel@tonic-gate 
380b0fc0e77Sgovinda 		if (next_iv == NULL)
3817c478bd9Sstevel@tonic-gate 			mcpu->intr_tail[pil] = prev; /* tail */
3827c478bd9Sstevel@tonic-gate 	}
3837c478bd9Sstevel@tonic-gate 
384b0fc0e77Sgovinda 	/* Clear pending interrupts at this level if the list is empty */
3857c478bd9Sstevel@tonic-gate 	if (mcpu->intr_head[pil] == NULL) {
3867c478bd9Sstevel@tonic-gate 		clr = 1 << pil;
3877c478bd9Sstevel@tonic-gate 		if (pil == PIL_14)
3887c478bd9Sstevel@tonic-gate 			clr |= (TICK_INT_MASK | STICK_INT_MASK);
3897c478bd9Sstevel@tonic-gate 		wr_clr_softint(clr);
3907c478bd9Sstevel@tonic-gate 	}
3917c478bd9Sstevel@tonic-gate }
3927c478bd9Sstevel@tonic-gate 
3937c478bd9Sstevel@tonic-gate 
3947c478bd9Sstevel@tonic-gate /*
3957c478bd9Sstevel@tonic-gate  * Send a directed interrupt of specified interrupt number id to a cpu.
3967c478bd9Sstevel@tonic-gate  */
3977c478bd9Sstevel@tonic-gate void
send_dirint(int cpuix,int intr_id)3987c478bd9Sstevel@tonic-gate send_dirint(
3997c478bd9Sstevel@tonic-gate 	int cpuix,		/* cpu to be interrupted */
4007c478bd9Sstevel@tonic-gate 	int intr_id)		/* interrupt number id */
4017c478bd9Sstevel@tonic-gate {
4027c478bd9Sstevel@tonic-gate 	xt_one(cpuix, setsoftint_tl1, intr_id, 0);
4037c478bd9Sstevel@tonic-gate }
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate /*
4067c478bd9Sstevel@tonic-gate  * Take the specified CPU out of participation in interrupts.
4077c478bd9Sstevel@tonic-gate  *	Called by p_online(2) when a processor is being taken off-line.
4087c478bd9Sstevel@tonic-gate  *	This allows interrupt threads being handled on the processor to
4097c478bd9Sstevel@tonic-gate  *	complete before the processor is idled.
4107c478bd9Sstevel@tonic-gate  */
4117c478bd9Sstevel@tonic-gate int
cpu_disable_intr(struct cpu * cp)4127c478bd9Sstevel@tonic-gate cpu_disable_intr(struct cpu *cp)
4137c478bd9Sstevel@tonic-gate {
4147c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
4157c478bd9Sstevel@tonic-gate 
4167c478bd9Sstevel@tonic-gate 	/*
4177c478bd9Sstevel@tonic-gate 	 * Turn off the CPU_ENABLE flag before calling the redistribution
4187c478bd9Sstevel@tonic-gate 	 * function, since it checks for this in the cpu flags.
4197c478bd9Sstevel@tonic-gate 	 */
4207c478bd9Sstevel@tonic-gate 	cp->cpu_flags &= ~CPU_ENABLE;
4217c478bd9Sstevel@tonic-gate 
4227c478bd9Sstevel@tonic-gate 	intr_redist_all_cpus();
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 	return (0);
4257c478bd9Sstevel@tonic-gate }
4267c478bd9Sstevel@tonic-gate 
4277c478bd9Sstevel@tonic-gate /*
4287c478bd9Sstevel@tonic-gate  * Allow the specified CPU to participate in interrupts.
4297c478bd9Sstevel@tonic-gate  *	Called by p_online(2) if a processor could not be taken off-line
4307c478bd9Sstevel@tonic-gate  *	because of bound threads, in order to resume processing interrupts.
4317c478bd9Sstevel@tonic-gate  *	Also called after starting a processor.
4327c478bd9Sstevel@tonic-gate  */
4337c478bd9Sstevel@tonic-gate void
cpu_enable_intr(struct cpu * cp)4347c478bd9Sstevel@tonic-gate cpu_enable_intr(struct cpu *cp)
4357c478bd9Sstevel@tonic-gate {
4367c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate 	cp->cpu_flags |= CPU_ENABLE;
4397c478bd9Sstevel@tonic-gate 
4407c478bd9Sstevel@tonic-gate 	intr_redist_all_cpus();
4417c478bd9Sstevel@tonic-gate }
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate /*
4447c478bd9Sstevel@tonic-gate  * Add function to callback list for intr_redist_all_cpus.  We keep two lists,
4457c478bd9Sstevel@tonic-gate  * one for weighted callbacks and one for normal callbacks. Weighted callbacks
4467c478bd9Sstevel@tonic-gate  * are issued to redirect interrupts of a specified weight, from heavy to
4477c478bd9Sstevel@tonic-gate  * light.  This allows all the interrupts of a given weight to be redistributed
4487c478bd9Sstevel@tonic-gate  * for all weighted nexus drivers prior to those of less weight.
4497c478bd9Sstevel@tonic-gate  */
4507c478bd9Sstevel@tonic-gate static void
intr_dist_add_list(struct intr_dist ** phead,void (* func)(void *),void * arg)4517c478bd9Sstevel@tonic-gate intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg)
4527c478bd9Sstevel@tonic-gate {
4537c478bd9Sstevel@tonic-gate 	struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP);
4547c478bd9Sstevel@tonic-gate 	struct intr_dist *iptr;
4557c478bd9Sstevel@tonic-gate 	struct intr_dist **pptr;
4567c478bd9Sstevel@tonic-gate 
4577c478bd9Sstevel@tonic-gate 	ASSERT(func);
4587c478bd9Sstevel@tonic-gate 	new->func = func;
4597c478bd9Sstevel@tonic-gate 	new->arg = arg;
4607c478bd9Sstevel@tonic-gate 	new->next = NULL;
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate 	/* Add to tail so that redistribution occurs in original order. */
4637c478bd9Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
4647c478bd9Sstevel@tonic-gate 	for (iptr = *phead, pptr = phead; iptr != NULL;
4657c478bd9Sstevel@tonic-gate 	    pptr = &iptr->next, iptr = iptr->next) {
4667c478bd9Sstevel@tonic-gate 		/* check for problems as we locate the tail */
4677c478bd9Sstevel@tonic-gate 		if ((iptr->func == func) && (iptr->arg == arg)) {
4687c478bd9Sstevel@tonic-gate 			cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate");
4697c478bd9Sstevel@tonic-gate 			/*NOTREACHED*/
4707c478bd9Sstevel@tonic-gate 		}
4717c478bd9Sstevel@tonic-gate 	}
4727c478bd9Sstevel@tonic-gate 	*pptr = new;
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
4757c478bd9Sstevel@tonic-gate }
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate void
intr_dist_add(void (* func)(void *),void * arg)4787c478bd9Sstevel@tonic-gate intr_dist_add(void (*func)(void *), void *arg)
4797c478bd9Sstevel@tonic-gate {
4807c478bd9Sstevel@tonic-gate 	intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg);
4817c478bd9Sstevel@tonic-gate }
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate void
intr_dist_add_weighted(void (* func)(void *,int32_t,int32_t),void * arg)4847c478bd9Sstevel@tonic-gate intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
4857c478bd9Sstevel@tonic-gate {
4867c478bd9Sstevel@tonic-gate 	intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg);
4877c478bd9Sstevel@tonic-gate }
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate /*
4907c478bd9Sstevel@tonic-gate  * Search for the interrupt distribution structure with the specified
4917c478bd9Sstevel@tonic-gate  * mondo vec reg in the interrupt distribution list. If a match is found,
4927c478bd9Sstevel@tonic-gate  * then delete the entry from the list. The caller is responsible for
4937c478bd9Sstevel@tonic-gate  * modifying the mondo vector registers.
4947c478bd9Sstevel@tonic-gate  */
4957c478bd9Sstevel@tonic-gate static void
intr_dist_rem_list(struct intr_dist ** headp,void (* func)(void *),void * arg)4967c478bd9Sstevel@tonic-gate intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg)
4977c478bd9Sstevel@tonic-gate {
4987c478bd9Sstevel@tonic-gate 	struct intr_dist *iptr;
4997c478bd9Sstevel@tonic-gate 	struct intr_dist **vect;
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
5027c478bd9Sstevel@tonic-gate 	for (iptr = *headp, vect = headp;
5037c478bd9Sstevel@tonic-gate 	    iptr != NULL; vect = &iptr->next, iptr = iptr->next) {
5047c478bd9Sstevel@tonic-gate 		if ((iptr->func == func) && (iptr->arg == arg)) {
5057c478bd9Sstevel@tonic-gate 			*vect = iptr->next;
5067c478bd9Sstevel@tonic-gate 			kmem_free(iptr, sizeof (struct intr_dist));
5077c478bd9Sstevel@tonic-gate 			mutex_exit(&intr_dist_lock);
5087c478bd9Sstevel@tonic-gate 			return;
5097c478bd9Sstevel@tonic-gate 		}
5107c478bd9Sstevel@tonic-gate 	}
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 	if (!panicstr)
5137c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC, "intr_dist_rem_list: not found");
5147c478bd9Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
5157c478bd9Sstevel@tonic-gate }
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate void
intr_dist_rem(void (* func)(void *),void * arg)5187c478bd9Sstevel@tonic-gate intr_dist_rem(void (*func)(void *), void *arg)
5197c478bd9Sstevel@tonic-gate {
5207c478bd9Sstevel@tonic-gate 	intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg);
5217c478bd9Sstevel@tonic-gate }
5227c478bd9Sstevel@tonic-gate 
5237c478bd9Sstevel@tonic-gate void
intr_dist_rem_weighted(void (* func)(void *,int32_t,int32_t),void * arg)5247c478bd9Sstevel@tonic-gate intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg)
5257c478bd9Sstevel@tonic-gate {
5267c478bd9Sstevel@tonic-gate 	intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg);
5277c478bd9Sstevel@tonic-gate }
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate /*
5307c478bd9Sstevel@tonic-gate  * Initiate interrupt redistribution.  Redistribution improves the isolation
5317c478bd9Sstevel@tonic-gate  * associated with interrupt weights by ordering operations from heavy weight
5327c478bd9Sstevel@tonic-gate  * to light weight.  When a CPUs orientation changes relative to interrupts,
5337c478bd9Sstevel@tonic-gate  * there is *always* a redistribution to accommodate this change (call to
5347c478bd9Sstevel@tonic-gate  * intr_redist_all_cpus()).  As devices (not CPUs) attach/detach it is possible
5357c478bd9Sstevel@tonic-gate  * that a redistribution could improve the quality of an initialization. For
5367c478bd9Sstevel@tonic-gate  * example, if you are not using a NIC it may not be attached with s10 (devfs).
5377c478bd9Sstevel@tonic-gate  * If you then configure the NIC (ifconfig), this may cause the NIC to attach
5387c478bd9Sstevel@tonic-gate  * and plumb interrupts.  The CPU assignment for the NIC's interrupts is
5397c478bd9Sstevel@tonic-gate  * occurring late, so optimal "isolation" relative to weight is not occurring.
5407c478bd9Sstevel@tonic-gate  * The same applies to detach, although in this case doing the redistribution
5417c478bd9Sstevel@tonic-gate  * might improve "spread" for medium weight devices since the "isolation" of
5427c478bd9Sstevel@tonic-gate  * a higher weight device may no longer be present.
5437c478bd9Sstevel@tonic-gate  *
5447c478bd9Sstevel@tonic-gate  * NB: We should provide a utility to trigger redistribution (ala "intradm -r").
5457c478bd9Sstevel@tonic-gate  *
5467c478bd9Sstevel@tonic-gate  * NB: There is risk associated with automatically triggering execution of the
5477c478bd9Sstevel@tonic-gate  * redistribution code at arbitrary times. The risk comes from the fact that
5487c478bd9Sstevel@tonic-gate  * there is a lot of low-level hardware interaction associated with a
5497c478bd9Sstevel@tonic-gate  * redistribution.  At some point we may want this code to perform automatic
5507c478bd9Sstevel@tonic-gate  * redistribution (redistribution thread; trigger timeout when add/remove
5517c478bd9Sstevel@tonic-gate  * weight delta is large enough, and call cv_signal from timeout - causing
5527c478bd9Sstevel@tonic-gate  * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too
5537c478bd9Sstevel@tonic-gate  * risky at this time.
5547c478bd9Sstevel@tonic-gate  */
5557c478bd9Sstevel@tonic-gate void
i_ddi_intr_redist_all_cpus()5567c478bd9Sstevel@tonic-gate i_ddi_intr_redist_all_cpus()
5577c478bd9Sstevel@tonic-gate {
5587c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
5597c478bd9Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n"));
5607c478bd9Sstevel@tonic-gate 	intr_redist_all_cpus();
5617c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
5627c478bd9Sstevel@tonic-gate }
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate /*
5657c478bd9Sstevel@tonic-gate  * Redistribute all interrupts
5667c478bd9Sstevel@tonic-gate  *
5677c478bd9Sstevel@tonic-gate  * This function redistributes all interrupting devices, running the
5687c478bd9Sstevel@tonic-gate  * parent callback functions for each node.
5697c478bd9Sstevel@tonic-gate  */
5707c478bd9Sstevel@tonic-gate void
intr_redist_all_cpus(void)5717c478bd9Sstevel@tonic-gate intr_redist_all_cpus(void)
5727c478bd9Sstevel@tonic-gate {
5737c478bd9Sstevel@tonic-gate 	struct cpu *cp;
5747c478bd9Sstevel@tonic-gate 	struct intr_dist *iptr;
5757c478bd9Sstevel@tonic-gate 	int32_t weight, max_weight;
5767c478bd9Sstevel@tonic-gate 
5777c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
5787c478bd9Sstevel@tonic-gate 	mutex_enter(&intr_dist_lock);
5797c478bd9Sstevel@tonic-gate 
5807c478bd9Sstevel@tonic-gate 	/*
5817c478bd9Sstevel@tonic-gate 	 * zero cpu_intr_weight on all cpus - it is safe to traverse
5827c478bd9Sstevel@tonic-gate 	 * cpu_list since we hold cpu_lock.
5837c478bd9Sstevel@tonic-gate 	 */
5847c478bd9Sstevel@tonic-gate 	cp = cpu_list;
5857c478bd9Sstevel@tonic-gate 	do {
5867c478bd9Sstevel@tonic-gate 		cp->cpu_intr_weight = 0;
5877c478bd9Sstevel@tonic-gate 	} while ((cp = cp->cpu_next) != cpu_list);
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	/*
5907c478bd9Sstevel@tonic-gate 	 * Assume that this redistribution may encounter a device weight
5917c478bd9Sstevel@tonic-gate 	 * via driver.conf tuning of "ddi-intr-weight" that is at most
5927c478bd9Sstevel@tonic-gate 	 * intr_dist_weight_maxfactor times larger.
5937c478bd9Sstevel@tonic-gate 	 */
5947c478bd9Sstevel@tonic-gate 	max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor;
5957c478bd9Sstevel@tonic-gate 	if (max_weight > intr_dist_weight_maxmax)
5967c478bd9Sstevel@tonic-gate 		max_weight = intr_dist_weight_maxmax;
5977c478bd9Sstevel@tonic-gate 	intr_dist_weight_max = 1;
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: "
6007c478bd9Sstevel@tonic-gate 	    "intr_redist_all_cpus: %d-0\n", max_weight));
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate 	/*
6037c478bd9Sstevel@tonic-gate 	 * Redistribute weighted, from heavy to light.  The callback that
6047c478bd9Sstevel@tonic-gate 	 * specifies a weight equal to weight_max should redirect all
6057c478bd9Sstevel@tonic-gate 	 * interrupts of weight weight_max or greater [weight_max, inf.).
6067c478bd9Sstevel@tonic-gate 	 * Interrupts of lesser weight should be processed on the call with
6077c478bd9Sstevel@tonic-gate 	 * the matching weight. This allows all the heaver weight interrupts
6087c478bd9Sstevel@tonic-gate 	 * on all weighted busses (multiple pci busses) to be redirected prior
6097c478bd9Sstevel@tonic-gate 	 * to any lesser weight interrupts.
6107c478bd9Sstevel@tonic-gate 	 */
6117c478bd9Sstevel@tonic-gate 	for (weight = max_weight; weight >= 0; weight--)
6127c478bd9Sstevel@tonic-gate 		for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next)
6137c478bd9Sstevel@tonic-gate 			((void (*)(void *, int32_t, int32_t))iptr->func)
6147c478bd9Sstevel@tonic-gate 			    (iptr->arg, max_weight, weight);
6157c478bd9Sstevel@tonic-gate 
6167c478bd9Sstevel@tonic-gate 	/* redistribute normal (non-weighted) interrupts */
6177c478bd9Sstevel@tonic-gate 	for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next)
6187c478bd9Sstevel@tonic-gate 		((void (*)(void *))iptr->func)(iptr->arg);
6197c478bd9Sstevel@tonic-gate 	mutex_exit(&intr_dist_lock);
6207c478bd9Sstevel@tonic-gate }
6217c478bd9Sstevel@tonic-gate 
6227c478bd9Sstevel@tonic-gate void
intr_redist_all_cpus_shutdown(void)6237c478bd9Sstevel@tonic-gate intr_redist_all_cpus_shutdown(void)
6247c478bd9Sstevel@tonic-gate {
6257c478bd9Sstevel@tonic-gate 	intr_policy = INTR_CURRENT_CPU;
6267c478bd9Sstevel@tonic-gate 	intr_redist_all_cpus();
6277c478bd9Sstevel@tonic-gate }
6287c478bd9Sstevel@tonic-gate 
6297c478bd9Sstevel@tonic-gate /*
6307c478bd9Sstevel@tonic-gate  * Determine what CPU to target, based on interrupt policy.
6317c478bd9Sstevel@tonic-gate  *
6327c478bd9Sstevel@tonic-gate  * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and
6337c478bd9Sstevel@tonic-gate  *	advance through interrupt enabled cpus (round-robin).
6347c478bd9Sstevel@tonic-gate  *
6357c478bd9Sstevel@tonic-gate  * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest
6367c478bd9Sstevel@tonic-gate  *	cpu_intr_weight, round robin when all equal.
6377c478bd9Sstevel@tonic-gate  *
6387c478bd9Sstevel@tonic-gate  *	Weighted interrupt distribution provides two things: "spread" of weight
6397c478bd9Sstevel@tonic-gate  *	(associated with algorithm itself) and "isolation" (associated with a
6407c478bd9Sstevel@tonic-gate  *	particular device weight). A redistribution is what provides optimal
6417c478bd9Sstevel@tonic-gate  *	"isolation" of heavy weight interrupts, optimal "spread" of weight
6427c478bd9Sstevel@tonic-gate  *	(relative to what came before) is always occurring.
6437c478bd9Sstevel@tonic-gate  *
6447c478bd9Sstevel@tonic-gate  *	An interrupt weight is a subjective number that represents the
6457c478bd9Sstevel@tonic-gate  *	percentage of a CPU required to service a device's interrupts: the
6467c478bd9Sstevel@tonic-gate  *	default weight is 0% (however the algorithm still maintains
6477c478bd9Sstevel@tonic-gate  *	round-robin), a network interface controller (NIC) may have a large
6487c478bd9Sstevel@tonic-gate  *	weight (35%). Interrupt weight only has meaning relative to the
6497c478bd9Sstevel@tonic-gate  *	interrupt weight of other devices: a CPU can be weighted more than
6507c478bd9Sstevel@tonic-gate  *	100%, and a single device might consume more than 100% of a CPU.
6517c478bd9Sstevel@tonic-gate  *
6527c478bd9Sstevel@tonic-gate  *	A coarse interrupt weight can be defined by the parent nexus driver
6537c478bd9Sstevel@tonic-gate  *	based on bus specific information, like pci class codes. A nexus
6547c478bd9Sstevel@tonic-gate  *	driver that supports device interrupt weighting for its children
6557c478bd9Sstevel@tonic-gate  *	should call intr_dist_cpuid_add/rem_device_weight(), which adds
6567c478bd9Sstevel@tonic-gate  *	and removes the weight of a device from the CPU that an interrupt
6577c478bd9Sstevel@tonic-gate  *	is directed at.  The quality of initialization improves when the
6587c478bd9Sstevel@tonic-gate  *	device interrupt weights more accuracy reflect actual run-time weights,
6597c478bd9Sstevel@tonic-gate  *	and as the assignments are ordered from is heavy to light.
6607c478bd9Sstevel@tonic-gate  *
6617c478bd9Sstevel@tonic-gate  *	The implementation also supports interrupt weight being specified in
6627c478bd9Sstevel@tonic-gate  *	driver.conf files via the property "ddi-intr-weight", which takes
6637c478bd9Sstevel@tonic-gate  *	precedence over the nexus supplied weight.  This support is added to
6647c478bd9Sstevel@tonic-gate  *	permit possible tweaking in the product in response to customer
6657c478bd9Sstevel@tonic-gate  *	problems. This is not a formal or committed interface.
6667c478bd9Sstevel@tonic-gate  *
6677c478bd9Sstevel@tonic-gate  *	While a weighted approach chooses the CPU providing the best spread
6687c478bd9Sstevel@tonic-gate  *	given past weights, less than optimal isolation can result in cases
6697c478bd9Sstevel@tonic-gate  *	where heavy weight devices show up last. The nexus driver's interrupt
6707c478bd9Sstevel@tonic-gate  *	redistribution logic should use intr_dist_add/rem_weighted so that
6717c478bd9Sstevel@tonic-gate  *	interrupts can be redistributed heavy first for optimal isolation.
6727c478bd9Sstevel@tonic-gate  */
6737c478bd9Sstevel@tonic-gate uint32_t
intr_dist_cpuid(void)6747c478bd9Sstevel@tonic-gate intr_dist_cpuid(void)
6757c478bd9Sstevel@tonic-gate {
6767c478bd9Sstevel@tonic-gate 	static struct cpu	*curr_cpu;
6777c478bd9Sstevel@tonic-gate 	struct cpu		*start_cpu;
6787c478bd9Sstevel@tonic-gate 	struct cpu		*new_cpu;
6797c478bd9Sstevel@tonic-gate 	struct cpu		*cp;
6807c478bd9Sstevel@tonic-gate 	int			cpuid = -1;
6817c478bd9Sstevel@tonic-gate 
6827c478bd9Sstevel@tonic-gate 	/* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */
6837c478bd9Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
6847c478bd9Sstevel@tonic-gate 
6857c478bd9Sstevel@tonic-gate 	switch (intr_policy) {
6867c478bd9Sstevel@tonic-gate 	case INTR_CURRENT_CPU:
6877c478bd9Sstevel@tonic-gate 		cpuid = CPU->cpu_id;
6887c478bd9Sstevel@tonic-gate 		break;
6897c478bd9Sstevel@tonic-gate 
6907c478bd9Sstevel@tonic-gate 	case INTR_BOOT_CPU:
6917c478bd9Sstevel@tonic-gate 		panic("INTR_BOOT_CPU no longer supported.");
6927c478bd9Sstevel@tonic-gate 		/*NOTREACHED*/
6937c478bd9Sstevel@tonic-gate 
6947c478bd9Sstevel@tonic-gate 	case INTR_FLAT_DIST:
6957c478bd9Sstevel@tonic-gate 	case INTR_WEIGHTED_DIST:
6967c478bd9Sstevel@tonic-gate 	default:
6977c478bd9Sstevel@tonic-gate 		/*
6987c478bd9Sstevel@tonic-gate 		 * Ensure that curr_cpu is valid - cpu_next will be NULL if
6997c478bd9Sstevel@tonic-gate 		 * the cpu has been deleted (cpu structs are never freed).
7007c478bd9Sstevel@tonic-gate 		 */
7017c478bd9Sstevel@tonic-gate 		if (curr_cpu == NULL || curr_cpu->cpu_next == NULL)
7027c478bd9Sstevel@tonic-gate 			curr_cpu = CPU;
7037c478bd9Sstevel@tonic-gate 
7047c478bd9Sstevel@tonic-gate 		/*
7057c478bd9Sstevel@tonic-gate 		 * Advance to online CPU after curr_cpu (round-robin). For
7067c478bd9Sstevel@tonic-gate 		 * INTR_WEIGHTED_DIST we choose the cpu with the lightest
7077c478bd9Sstevel@tonic-gate 		 * weight.  For a nexus that does not support weight the
7087c478bd9Sstevel@tonic-gate 		 * default weight of zero is used. We degrade to round-robin
7097c478bd9Sstevel@tonic-gate 		 * behavior among equal weightes.  The default weight is zero
7107c478bd9Sstevel@tonic-gate 		 * and round-robin behavior continues.
7117c478bd9Sstevel@tonic-gate 		 *
7127c478bd9Sstevel@tonic-gate 		 * Disable preemption while traversing cpu_next_onln to
7137c478bd9Sstevel@tonic-gate 		 * ensure the list does not change.  This works because
7147c478bd9Sstevel@tonic-gate 		 * modifiers of this list and other lists in a struct cpu
7157c478bd9Sstevel@tonic-gate 		 * call pause_cpus() before making changes.
7167c478bd9Sstevel@tonic-gate 		 */
7177c478bd9Sstevel@tonic-gate 		kpreempt_disable();
7187c478bd9Sstevel@tonic-gate 		cp = start_cpu = curr_cpu->cpu_next_onln;
7197c478bd9Sstevel@tonic-gate 		new_cpu = NULL;
7207c478bd9Sstevel@tonic-gate 		do {
7217c478bd9Sstevel@tonic-gate 			/* Skip CPUs with interrupts disabled */
7227c478bd9Sstevel@tonic-gate 			if ((cp->cpu_flags & CPU_ENABLE) == 0)
7237c478bd9Sstevel@tonic-gate 				continue;
7247c478bd9Sstevel@tonic-gate 
7257c478bd9Sstevel@tonic-gate 			if (intr_policy == INTR_FLAT_DIST) {
7267c478bd9Sstevel@tonic-gate 				/* select CPU */
7277c478bd9Sstevel@tonic-gate 				new_cpu = cp;
7287c478bd9Sstevel@tonic-gate 				break;
7297c478bd9Sstevel@tonic-gate 			} else if ((new_cpu == NULL) ||
7307c478bd9Sstevel@tonic-gate 			    (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) {
7317c478bd9Sstevel@tonic-gate 				/* Choose if lighter weight */
7327c478bd9Sstevel@tonic-gate 				new_cpu = cp;
7337c478bd9Sstevel@tonic-gate 			}
7347c478bd9Sstevel@tonic-gate 		} while ((cp = cp->cpu_next_onln) != start_cpu);
7357c478bd9Sstevel@tonic-gate 		ASSERT(new_cpu);
7367c478bd9Sstevel@tonic-gate 		cpuid = new_cpu->cpu_id;
7377c478bd9Sstevel@tonic-gate 
7387c478bd9Sstevel@tonic-gate 		INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: "
7397c478bd9Sstevel@tonic-gate 		    "targeted\n", cpuid, new_cpu->cpu_intr_weight));
7407c478bd9Sstevel@tonic-gate 
7417c478bd9Sstevel@tonic-gate 		/* update static pointer for next round-robin */
7427c478bd9Sstevel@tonic-gate 		curr_cpu = new_cpu;
7437c478bd9Sstevel@tonic-gate 		kpreempt_enable();
7447c478bd9Sstevel@tonic-gate 		break;
7457c478bd9Sstevel@tonic-gate 	}
7467c478bd9Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
7477c478bd9Sstevel@tonic-gate 	return (cpuid);
7487c478bd9Sstevel@tonic-gate }
7497c478bd9Sstevel@tonic-gate 
7507c478bd9Sstevel@tonic-gate /*
7517c478bd9Sstevel@tonic-gate  * Add or remove the the weight of a device from a CPUs interrupt weight.
7527c478bd9Sstevel@tonic-gate  *
7537c478bd9Sstevel@tonic-gate  * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for
7547c478bd9Sstevel@tonic-gate  * their children to improve the overall quality of interrupt initialization.
7557c478bd9Sstevel@tonic-gate  *
7567c478bd9Sstevel@tonic-gate  * If a nexues shares the CPU returned by a single intr_dist_cpuid() call
7577c478bd9Sstevel@tonic-gate  * among multiple devices (sharing ino) then the nexus should call
7587c478bd9Sstevel@tonic-gate  * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices
7597c478bd9Sstevel@tonic-gate  * that share must specify the same cpuid.
7607c478bd9Sstevel@tonic-gate  *
7617c478bd9Sstevel@tonic-gate  * If a nexus driver is unable to determine the cpu at remove_intr time
7627c478bd9Sstevel@tonic-gate  * for some of its interrupts, then it should not call add_device_weight -
7637c478bd9Sstevel@tonic-gate  * intr_dist_cpuid will still provide round-robin.
7647c478bd9Sstevel@tonic-gate  *
7657c478bd9Sstevel@tonic-gate  * An established device weight (from dev_info node) takes precedence over
7667c478bd9Sstevel@tonic-gate  * the weight passed in.  If a device weight is not already established
7677c478bd9Sstevel@tonic-gate  * then the passed in nexus weight is established.
7687c478bd9Sstevel@tonic-gate  */
7697c478bd9Sstevel@tonic-gate void
intr_dist_cpuid_add_device_weight(uint32_t cpuid,dev_info_t * dip,int32_t nweight)7707c478bd9Sstevel@tonic-gate intr_dist_cpuid_add_device_weight(uint32_t cpuid,
7717c478bd9Sstevel@tonic-gate     dev_info_t *dip, int32_t nweight)
7727c478bd9Sstevel@tonic-gate {
7737c478bd9Sstevel@tonic-gate 	int32_t		eweight;
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate 	/*
7767c478bd9Sstevel@tonic-gate 	 * For non-weighted policy everything has weight of zero (and we get
7777c478bd9Sstevel@tonic-gate 	 * round-robin distribution from intr_dist_cpuid).
7787c478bd9Sstevel@tonic-gate 	 * NB: intr_policy is limited to this file. A weighted nexus driver is
7797c478bd9Sstevel@tonic-gate 	 * calls this rouitne even if intr_policy has been patched to
7807c478bd9Sstevel@tonic-gate 	 * INTR_FLAG_DIST.
7817c478bd9Sstevel@tonic-gate 	 */
7827c478bd9Sstevel@tonic-gate 	ASSERT(dip);
7837c478bd9Sstevel@tonic-gate 	if (intr_policy != INTR_WEIGHTED_DIST)
7847c478bd9Sstevel@tonic-gate 		return;
7857c478bd9Sstevel@tonic-gate 
7867c478bd9Sstevel@tonic-gate 	eweight = i_ddi_get_intr_weight(dip);
7877c478bd9Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for "
7887c478bd9Sstevel@tonic-gate 	    "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight,
7897c478bd9Sstevel@tonic-gate 	    nweight, eweight, ddi_driver_name(ddi_get_parent(dip)),
7907c478bd9Sstevel@tonic-gate 	    ddi_get_instance(ddi_get_parent(dip)),
7917c478bd9Sstevel@tonic-gate 	    ddi_driver_name(dip), ddi_get_instance(dip)));
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate 	/* if no establish weight, establish nexus weight */
7947c478bd9Sstevel@tonic-gate 	if (eweight < 0) {
7957c478bd9Sstevel@tonic-gate 		if (nweight > 0)
7967c478bd9Sstevel@tonic-gate 			(void) i_ddi_set_intr_weight(dip, nweight);
7977c478bd9Sstevel@tonic-gate 		else
7987c478bd9Sstevel@tonic-gate 			nweight = 0;
7997c478bd9Sstevel@tonic-gate 	} else
8007c478bd9Sstevel@tonic-gate 		nweight = eweight;	/* use established weight */
8017c478bd9Sstevel@tonic-gate 
8027c478bd9Sstevel@tonic-gate 	/* Establish exclusion for cpu_intr_weight manipulation */
8037c478bd9Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
8047c478bd9Sstevel@tonic-gate 	cpu[cpuid]->cpu_intr_weight += nweight;
8057c478bd9Sstevel@tonic-gate 
8067c478bd9Sstevel@tonic-gate 	/* update intr_dist_weight_max */
8077c478bd9Sstevel@tonic-gate 	if (nweight > intr_dist_weight_max)
8087c478bd9Sstevel@tonic-gate 		intr_dist_weight_max = nweight;
8097c478bd9Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
8107c478bd9Sstevel@tonic-gate }
8117c478bd9Sstevel@tonic-gate 
8127c478bd9Sstevel@tonic-gate void
intr_dist_cpuid_rem_device_weight(uint32_t cpuid,dev_info_t * dip)8137c478bd9Sstevel@tonic-gate intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip)
8147c478bd9Sstevel@tonic-gate {
8157c478bd9Sstevel@tonic-gate 	struct cpu	*cp;
8167c478bd9Sstevel@tonic-gate 	int32_t		weight;
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate 	ASSERT(dip);
8197c478bd9Sstevel@tonic-gate 	if (intr_policy != INTR_WEIGHTED_DIST)
8207c478bd9Sstevel@tonic-gate 		return;
8217c478bd9Sstevel@tonic-gate 
8227c478bd9Sstevel@tonic-gate 	/* remove weight of device from cpu */
8237c478bd9Sstevel@tonic-gate 	weight = i_ddi_get_intr_weight(dip);
8247c478bd9Sstevel@tonic-gate 	if (weight < 0)
8257c478bd9Sstevel@tonic-gate 		weight = 0;
8267c478bd9Sstevel@tonic-gate 	INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d    for "
8277c478bd9Sstevel@tonic-gate 	    "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight,
8287c478bd9Sstevel@tonic-gate 	    ddi_driver_name(ddi_get_parent(dip)),
8297c478bd9Sstevel@tonic-gate 	    ddi_get_instance(ddi_get_parent(dip)),
8307c478bd9Sstevel@tonic-gate 	    ddi_driver_name(dip), ddi_get_instance(dip)));
8317c478bd9Sstevel@tonic-gate 
8327c478bd9Sstevel@tonic-gate 	/* Establish exclusion for cpu_intr_weight manipulation */
8337c478bd9Sstevel@tonic-gate 	mutex_enter(&intr_dist_cpu_lock);
8347c478bd9Sstevel@tonic-gate 	cp = cpu[cpuid];
8357c478bd9Sstevel@tonic-gate 	cp->cpu_intr_weight -= weight;
8367c478bd9Sstevel@tonic-gate 	if (cp->cpu_intr_weight < 0)
8377c478bd9Sstevel@tonic-gate 		cp->cpu_intr_weight = 0;	/* sanity */
8387c478bd9Sstevel@tonic-gate 	mutex_exit(&intr_dist_cpu_lock);
8397c478bd9Sstevel@tonic-gate }
8402850d85bSmv143129 
8412850d85bSmv143129 ulong_t
create_softint(uint_t pil,uint_t (* func)(caddr_t,caddr_t),caddr_t arg1)8422850d85bSmv143129 create_softint(uint_t pil, uint_t (*func)(caddr_t, caddr_t), caddr_t arg1)
8432850d85bSmv143129 {
8442850d85bSmv143129 	uint64_t inum;
8452850d85bSmv143129 
84607247649SMadhavan Venkataraman 	inum = add_softintr(pil, func, arg1, SOFTINT_MT);
8472850d85bSmv143129 	return ((ulong_t)inum);
8482850d85bSmv143129 }
8492850d85bSmv143129 
8502850d85bSmv143129 void
invoke_softint(processorid_t cpuid,ulong_t hdl)8512850d85bSmv143129 invoke_softint(processorid_t cpuid, ulong_t hdl)
8522850d85bSmv143129 {
8532850d85bSmv143129 	uint64_t inum = hdl;
8542850d85bSmv143129 
8552850d85bSmv143129 	if (cpuid == CPU->cpu_id)
8562850d85bSmv143129 		setsoftint(inum);
8572850d85bSmv143129 	else
8582850d85bSmv143129 		xt_one(cpuid, setsoftint_tl1, inum, 0);
8592850d85bSmv143129 }
8602850d85bSmv143129 
8612850d85bSmv143129 void
remove_softint(ulong_t hdl)8622850d85bSmv143129 remove_softint(ulong_t hdl)
8632850d85bSmv143129 {
8642850d85bSmv143129 	uint64_t inum = hdl;
8652850d85bSmv143129 
8662850d85bSmv143129 	(void) rem_softintr(inum);
8672850d85bSmv143129 }
8682850d85bSmv143129 
8692850d85bSmv143129 void
sync_softint(cpuset_t set)8702850d85bSmv143129 sync_softint(cpuset_t set)
8712850d85bSmv143129 {
8722850d85bSmv143129 	xt_sync(set);
8732850d85bSmv143129 }
874