1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 30*7c478bd9Sstevel@tonic-gate #include <sys/stack.h> 31*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 32*7c478bd9Sstevel@tonic-gate #include <sys/ivintr.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/intreg.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/membar.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/intr.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/sunndi.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/privregs.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/x_call.h> 44*7c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/cyclic.h> 47*7c478bd9Sstevel@tonic-gate 48*7c478bd9Sstevel@tonic-gate #include <sys/cpu_sgnblk_defs.h> 49*7c478bd9Sstevel@tonic-gate 50*7c478bd9Sstevel@tonic-gate kmutex_t soft_iv_lock; /* protect software interrupt vector table */ 51*7c478bd9Sstevel@tonic-gate /* Global locks which protect the interrupt distribution lists */ 52*7c478bd9Sstevel@tonic-gate static kmutex_t intr_dist_lock; 53*7c478bd9Sstevel@tonic-gate static kmutex_t intr_dist_cpu_lock; 54*7c478bd9Sstevel@tonic-gate 55*7c478bd9Sstevel@tonic-gate /* Head of the interrupt distribution lists */ 56*7c478bd9Sstevel@tonic-gate static struct intr_dist *intr_dist_head = NULL; 57*7c478bd9Sstevel@tonic-gate static struct intr_dist *intr_dist_whead = NULL; 58*7c478bd9Sstevel@tonic-gate 59*7c478bd9Sstevel@tonic-gate uint_t swinum_base; 60*7c478bd9Sstevel@tonic-gate uint_t maxswinum; 61*7c478bd9Sstevel@tonic-gate uint_t siron_inum; 62*7c478bd9Sstevel@tonic-gate uint_t poke_cpu_inum; 63*7c478bd9Sstevel@tonic-gate int siron_pending; 64*7c478bd9Sstevel@tonic-gate 65*7c478bd9Sstevel@tonic-gate int intr_policy = INTR_WEIGHTED_DIST; /* interrupt distribution policy */ 66*7c478bd9Sstevel@tonic-gate int intr_dist_debug = 0; 67*7c478bd9Sstevel@tonic-gate int32_t intr_dist_weight_max = 1; 68*7c478bd9Sstevel@tonic-gate int32_t intr_dist_weight_maxmax = 1000; 69*7c478bd9Sstevel@tonic-gate int intr_dist_weight_maxfactor = 2; 70*7c478bd9Sstevel@tonic-gate #define INTR_DEBUG(args) if (intr_dist_debug) cmn_err args 71*7c478bd9Sstevel@tonic-gate 72*7c478bd9Sstevel@tonic-gate static void sw_ivintr_init(cpu_t *); 73*7c478bd9Sstevel@tonic-gate 74*7c478bd9Sstevel@tonic-gate /* 75*7c478bd9Sstevel@tonic-gate * intr_init() - interrupt initialization 76*7c478bd9Sstevel@tonic-gate * Initialize the system's software interrupt vector table and 77*7c478bd9Sstevel@tonic-gate * CPU's interrupt free list 78*7c478bd9Sstevel@tonic-gate */ 79*7c478bd9Sstevel@tonic-gate void 80*7c478bd9Sstevel@tonic-gate intr_init(cpu_t *cp) 81*7c478bd9Sstevel@tonic-gate { 82*7c478bd9Sstevel@tonic-gate init_ivintr(); 83*7c478bd9Sstevel@tonic-gate sw_ivintr_init(cp); 84*7c478bd9Sstevel@tonic-gate init_intr_pool(cp); 85*7c478bd9Sstevel@tonic-gate 86*7c478bd9Sstevel@tonic-gate mutex_init(&intr_dist_lock, NULL, MUTEX_DEFAULT, NULL); 87*7c478bd9Sstevel@tonic-gate mutex_init(&intr_dist_cpu_lock, NULL, MUTEX_DEFAULT, NULL); 88*7c478bd9Sstevel@tonic-gate 89*7c478bd9Sstevel@tonic-gate /* 90*7c478bd9Sstevel@tonic-gate * A soft interrupt may have been requested prior to the initialization 91*7c478bd9Sstevel@tonic-gate * of soft interrupts. Soft interrupts can't be dispatched until after 92*7c478bd9Sstevel@tonic-gate * init_intr_pool, so we have to wait until now before we can dispatch 93*7c478bd9Sstevel@tonic-gate * the pending soft interrupt (if any). 94*7c478bd9Sstevel@tonic-gate */ 95*7c478bd9Sstevel@tonic-gate if (siron_pending) 96*7c478bd9Sstevel@tonic-gate setsoftint(siron_inum); 97*7c478bd9Sstevel@tonic-gate } 98*7c478bd9Sstevel@tonic-gate 99*7c478bd9Sstevel@tonic-gate /* 100*7c478bd9Sstevel@tonic-gate * poke_cpu_intr - fall through when poke_cpu calls 101*7c478bd9Sstevel@tonic-gate */ 102*7c478bd9Sstevel@tonic-gate 103*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 104*7c478bd9Sstevel@tonic-gate uint_t 105*7c478bd9Sstevel@tonic-gate poke_cpu_intr(caddr_t arg1, caddr_t arg2) 106*7c478bd9Sstevel@tonic-gate { 107*7c478bd9Sstevel@tonic-gate CPU->cpu_m.poke_cpu_outstanding = B_FALSE; 108*7c478bd9Sstevel@tonic-gate membar_stld_stst(); 109*7c478bd9Sstevel@tonic-gate return (1); 110*7c478bd9Sstevel@tonic-gate } 111*7c478bd9Sstevel@tonic-gate 112*7c478bd9Sstevel@tonic-gate /* 113*7c478bd9Sstevel@tonic-gate * sw_ivintr_init() - software interrupt vector initialization 114*7c478bd9Sstevel@tonic-gate * called after CPU is active 115*7c478bd9Sstevel@tonic-gate * the software interrupt vector table is part of the intr_vector[] 116*7c478bd9Sstevel@tonic-gate */ 117*7c478bd9Sstevel@tonic-gate static void 118*7c478bd9Sstevel@tonic-gate sw_ivintr_init(cpu_t *cp) 119*7c478bd9Sstevel@tonic-gate { 120*7c478bd9Sstevel@tonic-gate extern uint_t softlevel1(); 121*7c478bd9Sstevel@tonic-gate 122*7c478bd9Sstevel@tonic-gate mutex_init(&soft_iv_lock, NULL, MUTEX_DEFAULT, NULL); 123*7c478bd9Sstevel@tonic-gate 124*7c478bd9Sstevel@tonic-gate swinum_base = SOFTIVNUM; 125*7c478bd9Sstevel@tonic-gate 126*7c478bd9Sstevel@tonic-gate /* 127*7c478bd9Sstevel@tonic-gate * the maximum software interrupt == MAX_SOFT_INO 128*7c478bd9Sstevel@tonic-gate */ 129*7c478bd9Sstevel@tonic-gate maxswinum = swinum_base + MAX_SOFT_INO; 130*7c478bd9Sstevel@tonic-gate 131*7c478bd9Sstevel@tonic-gate REGISTER_BBUS_INTR(); 132*7c478bd9Sstevel@tonic-gate 133*7c478bd9Sstevel@tonic-gate siron_inum = add_softintr(PIL_1, softlevel1, 0); 134*7c478bd9Sstevel@tonic-gate poke_cpu_inum = add_softintr(PIL_13, poke_cpu_intr, 0); 135*7c478bd9Sstevel@tonic-gate cp->cpu_m.poke_cpu_outstanding = B_FALSE; 136*7c478bd9Sstevel@tonic-gate } 137*7c478bd9Sstevel@tonic-gate 138*7c478bd9Sstevel@tonic-gate cpuset_t intr_add_pools_inuse; 139*7c478bd9Sstevel@tonic-gate 140*7c478bd9Sstevel@tonic-gate /* 141*7c478bd9Sstevel@tonic-gate * cleanup_intr_pool() 142*7c478bd9Sstevel@tonic-gate * Free up the extra intr request pool for this cpu. 143*7c478bd9Sstevel@tonic-gate */ 144*7c478bd9Sstevel@tonic-gate void 145*7c478bd9Sstevel@tonic-gate cleanup_intr_pool(cpu_t *cp) 146*7c478bd9Sstevel@tonic-gate { 147*7c478bd9Sstevel@tonic-gate extern struct intr_req *intr_add_head; 148*7c478bd9Sstevel@tonic-gate int poolno; 149*7c478bd9Sstevel@tonic-gate struct intr_req *pool; 150*7c478bd9Sstevel@tonic-gate 151*7c478bd9Sstevel@tonic-gate poolno = cp->cpu_m.intr_pool_added; 152*7c478bd9Sstevel@tonic-gate if (poolno >= 0) { 153*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_pool_added = -1; 154*7c478bd9Sstevel@tonic-gate pool = (poolno * INTR_PENDING_MAX * intr_add_pools) + 155*7c478bd9Sstevel@tonic-gate 156*7c478bd9Sstevel@tonic-gate intr_add_head; /* not byte arithmetic */ 157*7c478bd9Sstevel@tonic-gate bzero(pool, INTR_PENDING_MAX * intr_add_pools * 158*7c478bd9Sstevel@tonic-gate sizeof (struct intr_req)); 159*7c478bd9Sstevel@tonic-gate 160*7c478bd9Sstevel@tonic-gate CPUSET_DEL(intr_add_pools_inuse, poolno); 161*7c478bd9Sstevel@tonic-gate } 162*7c478bd9Sstevel@tonic-gate } 163*7c478bd9Sstevel@tonic-gate 164*7c478bd9Sstevel@tonic-gate /* 165*7c478bd9Sstevel@tonic-gate * init_intr_pool() 166*7c478bd9Sstevel@tonic-gate * initialize the intr request pool for the cpu 167*7c478bd9Sstevel@tonic-gate * should be called for each cpu 168*7c478bd9Sstevel@tonic-gate */ 169*7c478bd9Sstevel@tonic-gate void 170*7c478bd9Sstevel@tonic-gate init_intr_pool(cpu_t *cp) 171*7c478bd9Sstevel@tonic-gate { 172*7c478bd9Sstevel@tonic-gate extern struct intr_req *intr_add_head; 173*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 174*7c478bd9Sstevel@tonic-gate extern struct intr_req *intr_add_tail; 175*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 176*7c478bd9Sstevel@tonic-gate int i, pool; 177*7c478bd9Sstevel@tonic-gate 178*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_pool_added = -1; 179*7c478bd9Sstevel@tonic-gate 180*7c478bd9Sstevel@tonic-gate for (i = 0; i < INTR_PENDING_MAX-1; i++) { 181*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_pool[i].intr_next = 182*7c478bd9Sstevel@tonic-gate &cp->cpu_m.intr_pool[i+1]; 183*7c478bd9Sstevel@tonic-gate } 184*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = NULL; 185*7c478bd9Sstevel@tonic-gate 186*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_head[0] = &cp->cpu_m.intr_pool[0]; 187*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_tail[0] = &cp->cpu_m.intr_pool[INTR_PENDING_MAX-1]; 188*7c478bd9Sstevel@tonic-gate 189*7c478bd9Sstevel@tonic-gate if (intr_add_pools != 0) { 190*7c478bd9Sstevel@tonic-gate 191*7c478bd9Sstevel@tonic-gate /* 192*7c478bd9Sstevel@tonic-gate * If additional interrupt pools have been allocated, 193*7c478bd9Sstevel@tonic-gate * initialize those too and add them to the free list. 194*7c478bd9Sstevel@tonic-gate */ 195*7c478bd9Sstevel@tonic-gate 196*7c478bd9Sstevel@tonic-gate struct intr_req *trace; 197*7c478bd9Sstevel@tonic-gate 198*7c478bd9Sstevel@tonic-gate for (pool = 0; pool < max_ncpus; pool++) { 199*7c478bd9Sstevel@tonic-gate if (!(CPU_IN_SET(intr_add_pools_inuse, pool))) 200*7c478bd9Sstevel@tonic-gate break; 201*7c478bd9Sstevel@tonic-gate } 202*7c478bd9Sstevel@tonic-gate if (pool >= max_ncpus) { 203*7c478bd9Sstevel@tonic-gate /* 204*7c478bd9Sstevel@tonic-gate * XXX - intr pools are alloc'd, just not as 205*7c478bd9Sstevel@tonic-gate * much as we would like. 206*7c478bd9Sstevel@tonic-gate */ 207*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Failed to alloc all requested intr " 208*7c478bd9Sstevel@tonic-gate "pools for cpu%d", cp->cpu_id); 209*7c478bd9Sstevel@tonic-gate return; 210*7c478bd9Sstevel@tonic-gate } 211*7c478bd9Sstevel@tonic-gate CPUSET_ADD(intr_add_pools_inuse, pool); 212*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_pool_added = pool; 213*7c478bd9Sstevel@tonic-gate 214*7c478bd9Sstevel@tonic-gate trace = (pool * INTR_PENDING_MAX * intr_add_pools) + 215*7c478bd9Sstevel@tonic-gate intr_add_head; /* not byte arithmetic */ 216*7c478bd9Sstevel@tonic-gate 217*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_pool[INTR_PENDING_MAX-1].intr_next = trace; 218*7c478bd9Sstevel@tonic-gate 219*7c478bd9Sstevel@tonic-gate for (i = 1; i < intr_add_pools * INTR_PENDING_MAX; i++, trace++) 220*7c478bd9Sstevel@tonic-gate trace->intr_next = trace + 1; 221*7c478bd9Sstevel@tonic-gate trace->intr_next = NULL; 222*7c478bd9Sstevel@tonic-gate 223*7c478bd9Sstevel@tonic-gate ASSERT(trace >= intr_add_head && trace <= intr_add_tail); 224*7c478bd9Sstevel@tonic-gate 225*7c478bd9Sstevel@tonic-gate cp->cpu_m.intr_tail[0] = trace; 226*7c478bd9Sstevel@tonic-gate } 227*7c478bd9Sstevel@tonic-gate } 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate 230*7c478bd9Sstevel@tonic-gate /* 231*7c478bd9Sstevel@tonic-gate * siron - primitive for sun/os/softint.c 232*7c478bd9Sstevel@tonic-gate */ 233*7c478bd9Sstevel@tonic-gate void 234*7c478bd9Sstevel@tonic-gate siron(void) 235*7c478bd9Sstevel@tonic-gate { 236*7c478bd9Sstevel@tonic-gate if (!siron_pending) { 237*7c478bd9Sstevel@tonic-gate siron_pending = 1; 238*7c478bd9Sstevel@tonic-gate if (siron_inum != 0) 239*7c478bd9Sstevel@tonic-gate setsoftint(siron_inum); 240*7c478bd9Sstevel@tonic-gate } 241*7c478bd9Sstevel@tonic-gate } 242*7c478bd9Sstevel@tonic-gate 243*7c478bd9Sstevel@tonic-gate /* 244*7c478bd9Sstevel@tonic-gate * no_ivintr() 245*7c478bd9Sstevel@tonic-gate * called by vec_interrupt() through sys_trap() 246*7c478bd9Sstevel@tonic-gate * vector interrupt received but not valid or not 247*7c478bd9Sstevel@tonic-gate * registered in intr_vector[] 248*7c478bd9Sstevel@tonic-gate * considered as a spurious mondo interrupt 249*7c478bd9Sstevel@tonic-gate */ 250*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 251*7c478bd9Sstevel@tonic-gate void 252*7c478bd9Sstevel@tonic-gate no_ivintr(struct regs *rp, int inum, int pil) 253*7c478bd9Sstevel@tonic-gate { 254*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "invalid vector intr: number 0x%x, pil 0x%x", 255*7c478bd9Sstevel@tonic-gate inum, pil); 256*7c478bd9Sstevel@tonic-gate 257*7c478bd9Sstevel@tonic-gate 258*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_VEC_INTR 259*7c478bd9Sstevel@tonic-gate prom_enter_mon(); 260*7c478bd9Sstevel@tonic-gate #endif /* DEBUG_VEC_INTR */ 261*7c478bd9Sstevel@tonic-gate } 262*7c478bd9Sstevel@tonic-gate 263*7c478bd9Sstevel@tonic-gate /* 264*7c478bd9Sstevel@tonic-gate * no_intr_pool() 265*7c478bd9Sstevel@tonic-gate * called by vec_interrupt() through sys_trap() 266*7c478bd9Sstevel@tonic-gate * vector interrupt received but no intr_req entries 267*7c478bd9Sstevel@tonic-gate */ 268*7c478bd9Sstevel@tonic-gate /* ARGSUSED */ 269*7c478bd9Sstevel@tonic-gate void 270*7c478bd9Sstevel@tonic-gate no_intr_pool(struct regs *rp, int inum, int pil) 271*7c478bd9Sstevel@tonic-gate { 272*7c478bd9Sstevel@tonic-gate #ifdef DEBUG_VEC_INTR 273*7c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "intr_req pool empty: num 0x%x, pil 0x%x", 274*7c478bd9Sstevel@tonic-gate inum, pil); 275*7c478bd9Sstevel@tonic-gate prom_enter_mon(); 276*7c478bd9Sstevel@tonic-gate #else 277*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "intr_req pool empty: num 0x%x, pil 0x%x", 278*7c478bd9Sstevel@tonic-gate inum, pil); 279*7c478bd9Sstevel@tonic-gate #endif /* DEBUG_VEC_INTR */ 280*7c478bd9Sstevel@tonic-gate } 281*7c478bd9Sstevel@tonic-gate 282*7c478bd9Sstevel@tonic-gate void 283*7c478bd9Sstevel@tonic-gate intr_dequeue_req(uint_t pil, uint32_t inum) 284*7c478bd9Sstevel@tonic-gate { 285*7c478bd9Sstevel@tonic-gate struct intr_req *ir, *prev; 286*7c478bd9Sstevel@tonic-gate struct machcpu *mcpu; 287*7c478bd9Sstevel@tonic-gate uint32_t clr; 288*7c478bd9Sstevel@tonic-gate extern uint_t getpstate(void); 289*7c478bd9Sstevel@tonic-gate 290*7c478bd9Sstevel@tonic-gate ASSERT((getpstate() & PSTATE_IE) == 0); 291*7c478bd9Sstevel@tonic-gate 292*7c478bd9Sstevel@tonic-gate mcpu = &CPU->cpu_m; 293*7c478bd9Sstevel@tonic-gate 294*7c478bd9Sstevel@tonic-gate /* Find a matching entry in the list */ 295*7c478bd9Sstevel@tonic-gate prev = NULL; 296*7c478bd9Sstevel@tonic-gate ir = mcpu->intr_head[pil]; 297*7c478bd9Sstevel@tonic-gate while (ir != NULL) { 298*7c478bd9Sstevel@tonic-gate if (ir->intr_number == inum) 299*7c478bd9Sstevel@tonic-gate break; 300*7c478bd9Sstevel@tonic-gate prev = ir; 301*7c478bd9Sstevel@tonic-gate ir = ir->intr_next; 302*7c478bd9Sstevel@tonic-gate } 303*7c478bd9Sstevel@tonic-gate if (ir != NULL) { 304*7c478bd9Sstevel@tonic-gate /* 305*7c478bd9Sstevel@tonic-gate * Remove entry from list 306*7c478bd9Sstevel@tonic-gate */ 307*7c478bd9Sstevel@tonic-gate if (prev != NULL) 308*7c478bd9Sstevel@tonic-gate prev->intr_next = ir->intr_next; /* non-head */ 309*7c478bd9Sstevel@tonic-gate else 310*7c478bd9Sstevel@tonic-gate mcpu->intr_head[pil] = ir->intr_next; /* head */ 311*7c478bd9Sstevel@tonic-gate 312*7c478bd9Sstevel@tonic-gate if (ir->intr_next == NULL) 313*7c478bd9Sstevel@tonic-gate mcpu->intr_tail[pil] = prev; /* tail */ 314*7c478bd9Sstevel@tonic-gate 315*7c478bd9Sstevel@tonic-gate /* 316*7c478bd9Sstevel@tonic-gate * Place on free list 317*7c478bd9Sstevel@tonic-gate */ 318*7c478bd9Sstevel@tonic-gate ir->intr_next = mcpu->intr_head[0]; 319*7c478bd9Sstevel@tonic-gate mcpu->intr_head[0] = ir; 320*7c478bd9Sstevel@tonic-gate } 321*7c478bd9Sstevel@tonic-gate 322*7c478bd9Sstevel@tonic-gate /* 323*7c478bd9Sstevel@tonic-gate * clear pending interrupts at this level if the list is empty 324*7c478bd9Sstevel@tonic-gate */ 325*7c478bd9Sstevel@tonic-gate if (mcpu->intr_head[pil] == NULL) { 326*7c478bd9Sstevel@tonic-gate clr = 1 << pil; 327*7c478bd9Sstevel@tonic-gate if (pil == PIL_14) 328*7c478bd9Sstevel@tonic-gate clr |= (TICK_INT_MASK | STICK_INT_MASK); 329*7c478bd9Sstevel@tonic-gate wr_clr_softint(clr); 330*7c478bd9Sstevel@tonic-gate } 331*7c478bd9Sstevel@tonic-gate } 332*7c478bd9Sstevel@tonic-gate 333*7c478bd9Sstevel@tonic-gate 334*7c478bd9Sstevel@tonic-gate /* 335*7c478bd9Sstevel@tonic-gate * Send a directed interrupt of specified interrupt number id to a cpu. 336*7c478bd9Sstevel@tonic-gate */ 337*7c478bd9Sstevel@tonic-gate void 338*7c478bd9Sstevel@tonic-gate send_dirint( 339*7c478bd9Sstevel@tonic-gate int cpuix, /* cpu to be interrupted */ 340*7c478bd9Sstevel@tonic-gate int intr_id) /* interrupt number id */ 341*7c478bd9Sstevel@tonic-gate { 342*7c478bd9Sstevel@tonic-gate xt_one(cpuix, setsoftint_tl1, intr_id, 0); 343*7c478bd9Sstevel@tonic-gate } 344*7c478bd9Sstevel@tonic-gate 345*7c478bd9Sstevel@tonic-gate void 346*7c478bd9Sstevel@tonic-gate init_intr_threads(struct cpu *cp) 347*7c478bd9Sstevel@tonic-gate { 348*7c478bd9Sstevel@tonic-gate int i; 349*7c478bd9Sstevel@tonic-gate 350*7c478bd9Sstevel@tonic-gate for (i = 0; i < NINTR_THREADS; i++) 351*7c478bd9Sstevel@tonic-gate thread_create_intr(cp); 352*7c478bd9Sstevel@tonic-gate 353*7c478bd9Sstevel@tonic-gate cp->cpu_intr_stack = (caddr_t)segkp_get(segkp, INTR_STACK_SIZE, 354*7c478bd9Sstevel@tonic-gate KPD_HASREDZONE | KPD_NO_ANON | KPD_LOCKED) + 355*7c478bd9Sstevel@tonic-gate INTR_STACK_SIZE - SA(MINFRAME); 356*7c478bd9Sstevel@tonic-gate } 357*7c478bd9Sstevel@tonic-gate 358*7c478bd9Sstevel@tonic-gate /* 359*7c478bd9Sstevel@tonic-gate * Take the specified CPU out of participation in interrupts. 360*7c478bd9Sstevel@tonic-gate * Called by p_online(2) when a processor is being taken off-line. 361*7c478bd9Sstevel@tonic-gate * This allows interrupt threads being handled on the processor to 362*7c478bd9Sstevel@tonic-gate * complete before the processor is idled. 363*7c478bd9Sstevel@tonic-gate */ 364*7c478bd9Sstevel@tonic-gate int 365*7c478bd9Sstevel@tonic-gate cpu_disable_intr(struct cpu *cp) 366*7c478bd9Sstevel@tonic-gate { 367*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 368*7c478bd9Sstevel@tonic-gate 369*7c478bd9Sstevel@tonic-gate /* 370*7c478bd9Sstevel@tonic-gate * Turn off the CPU_ENABLE flag before calling the redistribution 371*7c478bd9Sstevel@tonic-gate * function, since it checks for this in the cpu flags. 372*7c478bd9Sstevel@tonic-gate */ 373*7c478bd9Sstevel@tonic-gate cp->cpu_flags &= ~CPU_ENABLE; 374*7c478bd9Sstevel@tonic-gate 375*7c478bd9Sstevel@tonic-gate intr_redist_all_cpus(); 376*7c478bd9Sstevel@tonic-gate 377*7c478bd9Sstevel@tonic-gate return (0); 378*7c478bd9Sstevel@tonic-gate } 379*7c478bd9Sstevel@tonic-gate 380*7c478bd9Sstevel@tonic-gate /* 381*7c478bd9Sstevel@tonic-gate * Allow the specified CPU to participate in interrupts. 382*7c478bd9Sstevel@tonic-gate * Called by p_online(2) if a processor could not be taken off-line 383*7c478bd9Sstevel@tonic-gate * because of bound threads, in order to resume processing interrupts. 384*7c478bd9Sstevel@tonic-gate * Also called after starting a processor. 385*7c478bd9Sstevel@tonic-gate */ 386*7c478bd9Sstevel@tonic-gate void 387*7c478bd9Sstevel@tonic-gate cpu_enable_intr(struct cpu *cp) 388*7c478bd9Sstevel@tonic-gate { 389*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 390*7c478bd9Sstevel@tonic-gate 391*7c478bd9Sstevel@tonic-gate cp->cpu_flags |= CPU_ENABLE; 392*7c478bd9Sstevel@tonic-gate 393*7c478bd9Sstevel@tonic-gate intr_redist_all_cpus(); 394*7c478bd9Sstevel@tonic-gate } 395*7c478bd9Sstevel@tonic-gate 396*7c478bd9Sstevel@tonic-gate /* 397*7c478bd9Sstevel@tonic-gate * Add function to callback list for intr_redist_all_cpus. We keep two lists, 398*7c478bd9Sstevel@tonic-gate * one for weighted callbacks and one for normal callbacks. Weighted callbacks 399*7c478bd9Sstevel@tonic-gate * are issued to redirect interrupts of a specified weight, from heavy to 400*7c478bd9Sstevel@tonic-gate * light. This allows all the interrupts of a given weight to be redistributed 401*7c478bd9Sstevel@tonic-gate * for all weighted nexus drivers prior to those of less weight. 402*7c478bd9Sstevel@tonic-gate */ 403*7c478bd9Sstevel@tonic-gate static void 404*7c478bd9Sstevel@tonic-gate intr_dist_add_list(struct intr_dist **phead, void (*func)(void *), void *arg) 405*7c478bd9Sstevel@tonic-gate { 406*7c478bd9Sstevel@tonic-gate struct intr_dist *new = kmem_alloc(sizeof (*new), KM_SLEEP); 407*7c478bd9Sstevel@tonic-gate struct intr_dist *iptr; 408*7c478bd9Sstevel@tonic-gate struct intr_dist **pptr; 409*7c478bd9Sstevel@tonic-gate 410*7c478bd9Sstevel@tonic-gate ASSERT(func); 411*7c478bd9Sstevel@tonic-gate new->func = func; 412*7c478bd9Sstevel@tonic-gate new->arg = arg; 413*7c478bd9Sstevel@tonic-gate new->next = NULL; 414*7c478bd9Sstevel@tonic-gate 415*7c478bd9Sstevel@tonic-gate /* Add to tail so that redistribution occurs in original order. */ 416*7c478bd9Sstevel@tonic-gate mutex_enter(&intr_dist_lock); 417*7c478bd9Sstevel@tonic-gate for (iptr = *phead, pptr = phead; iptr != NULL; 418*7c478bd9Sstevel@tonic-gate pptr = &iptr->next, iptr = iptr->next) { 419*7c478bd9Sstevel@tonic-gate /* check for problems as we locate the tail */ 420*7c478bd9Sstevel@tonic-gate if ((iptr->func == func) && (iptr->arg == arg)) { 421*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "intr_dist_add_list(): duplicate"); 422*7c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 423*7c478bd9Sstevel@tonic-gate } 424*7c478bd9Sstevel@tonic-gate } 425*7c478bd9Sstevel@tonic-gate *pptr = new; 426*7c478bd9Sstevel@tonic-gate 427*7c478bd9Sstevel@tonic-gate mutex_exit(&intr_dist_lock); 428*7c478bd9Sstevel@tonic-gate } 429*7c478bd9Sstevel@tonic-gate 430*7c478bd9Sstevel@tonic-gate void 431*7c478bd9Sstevel@tonic-gate intr_dist_add(void (*func)(void *), void *arg) 432*7c478bd9Sstevel@tonic-gate { 433*7c478bd9Sstevel@tonic-gate intr_dist_add_list(&intr_dist_head, (void (*)(void *))func, arg); 434*7c478bd9Sstevel@tonic-gate } 435*7c478bd9Sstevel@tonic-gate 436*7c478bd9Sstevel@tonic-gate void 437*7c478bd9Sstevel@tonic-gate intr_dist_add_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 438*7c478bd9Sstevel@tonic-gate { 439*7c478bd9Sstevel@tonic-gate intr_dist_add_list(&intr_dist_whead, (void (*)(void *))func, arg); 440*7c478bd9Sstevel@tonic-gate } 441*7c478bd9Sstevel@tonic-gate 442*7c478bd9Sstevel@tonic-gate /* 443*7c478bd9Sstevel@tonic-gate * Search for the interrupt distribution structure with the specified 444*7c478bd9Sstevel@tonic-gate * mondo vec reg in the interrupt distribution list. If a match is found, 445*7c478bd9Sstevel@tonic-gate * then delete the entry from the list. The caller is responsible for 446*7c478bd9Sstevel@tonic-gate * modifying the mondo vector registers. 447*7c478bd9Sstevel@tonic-gate */ 448*7c478bd9Sstevel@tonic-gate static void 449*7c478bd9Sstevel@tonic-gate intr_dist_rem_list(struct intr_dist **headp, void (*func)(void *), void *arg) 450*7c478bd9Sstevel@tonic-gate { 451*7c478bd9Sstevel@tonic-gate struct intr_dist *iptr; 452*7c478bd9Sstevel@tonic-gate struct intr_dist **vect; 453*7c478bd9Sstevel@tonic-gate 454*7c478bd9Sstevel@tonic-gate mutex_enter(&intr_dist_lock); 455*7c478bd9Sstevel@tonic-gate for (iptr = *headp, vect = headp; 456*7c478bd9Sstevel@tonic-gate iptr != NULL; vect = &iptr->next, iptr = iptr->next) { 457*7c478bd9Sstevel@tonic-gate if ((iptr->func == func) && (iptr->arg == arg)) { 458*7c478bd9Sstevel@tonic-gate *vect = iptr->next; 459*7c478bd9Sstevel@tonic-gate kmem_free(iptr, sizeof (struct intr_dist)); 460*7c478bd9Sstevel@tonic-gate mutex_exit(&intr_dist_lock); 461*7c478bd9Sstevel@tonic-gate return; 462*7c478bd9Sstevel@tonic-gate } 463*7c478bd9Sstevel@tonic-gate } 464*7c478bd9Sstevel@tonic-gate 465*7c478bd9Sstevel@tonic-gate if (!panicstr) 466*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "intr_dist_rem_list: not found"); 467*7c478bd9Sstevel@tonic-gate mutex_exit(&intr_dist_lock); 468*7c478bd9Sstevel@tonic-gate } 469*7c478bd9Sstevel@tonic-gate 470*7c478bd9Sstevel@tonic-gate void 471*7c478bd9Sstevel@tonic-gate intr_dist_rem(void (*func)(void *), void *arg) 472*7c478bd9Sstevel@tonic-gate { 473*7c478bd9Sstevel@tonic-gate intr_dist_rem_list(&intr_dist_head, (void (*)(void *))func, arg); 474*7c478bd9Sstevel@tonic-gate } 475*7c478bd9Sstevel@tonic-gate 476*7c478bd9Sstevel@tonic-gate void 477*7c478bd9Sstevel@tonic-gate intr_dist_rem_weighted(void (*func)(void *, int32_t, int32_t), void *arg) 478*7c478bd9Sstevel@tonic-gate { 479*7c478bd9Sstevel@tonic-gate intr_dist_rem_list(&intr_dist_whead, (void (*)(void *))func, arg); 480*7c478bd9Sstevel@tonic-gate } 481*7c478bd9Sstevel@tonic-gate 482*7c478bd9Sstevel@tonic-gate /* 483*7c478bd9Sstevel@tonic-gate * Initiate interrupt redistribution. Redistribution improves the isolation 484*7c478bd9Sstevel@tonic-gate * associated with interrupt weights by ordering operations from heavy weight 485*7c478bd9Sstevel@tonic-gate * to light weight. When a CPUs orientation changes relative to interrupts, 486*7c478bd9Sstevel@tonic-gate * there is *always* a redistribution to accommodate this change (call to 487*7c478bd9Sstevel@tonic-gate * intr_redist_all_cpus()). As devices (not CPUs) attach/detach it is possible 488*7c478bd9Sstevel@tonic-gate * that a redistribution could improve the quality of an initialization. For 489*7c478bd9Sstevel@tonic-gate * example, if you are not using a NIC it may not be attached with s10 (devfs). 490*7c478bd9Sstevel@tonic-gate * If you then configure the NIC (ifconfig), this may cause the NIC to attach 491*7c478bd9Sstevel@tonic-gate * and plumb interrupts. The CPU assignment for the NIC's interrupts is 492*7c478bd9Sstevel@tonic-gate * occurring late, so optimal "isolation" relative to weight is not occurring. 493*7c478bd9Sstevel@tonic-gate * The same applies to detach, although in this case doing the redistribution 494*7c478bd9Sstevel@tonic-gate * might improve "spread" for medium weight devices since the "isolation" of 495*7c478bd9Sstevel@tonic-gate * a higher weight device may no longer be present. 496*7c478bd9Sstevel@tonic-gate * 497*7c478bd9Sstevel@tonic-gate * NB: We should provide a utility to trigger redistribution (ala "intradm -r"). 498*7c478bd9Sstevel@tonic-gate * 499*7c478bd9Sstevel@tonic-gate * NB: There is risk associated with automatically triggering execution of the 500*7c478bd9Sstevel@tonic-gate * redistribution code at arbitrary times. The risk comes from the fact that 501*7c478bd9Sstevel@tonic-gate * there is a lot of low-level hardware interaction associated with a 502*7c478bd9Sstevel@tonic-gate * redistribution. At some point we may want this code to perform automatic 503*7c478bd9Sstevel@tonic-gate * redistribution (redistribution thread; trigger timeout when add/remove 504*7c478bd9Sstevel@tonic-gate * weight delta is large enough, and call cv_signal from timeout - causing 505*7c478bd9Sstevel@tonic-gate * thead to call i_ddi_intr_redist_all_cpus()) but this is considered too 506*7c478bd9Sstevel@tonic-gate * risky at this time. 507*7c478bd9Sstevel@tonic-gate */ 508*7c478bd9Sstevel@tonic-gate void 509*7c478bd9Sstevel@tonic-gate i_ddi_intr_redist_all_cpus() 510*7c478bd9Sstevel@tonic-gate { 511*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 512*7c478bd9Sstevel@tonic-gate INTR_DEBUG((CE_CONT, "intr_dist: i_ddi_intr_redist_all_cpus\n")); 513*7c478bd9Sstevel@tonic-gate intr_redist_all_cpus(); 514*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 515*7c478bd9Sstevel@tonic-gate } 516*7c478bd9Sstevel@tonic-gate 517*7c478bd9Sstevel@tonic-gate /* 518*7c478bd9Sstevel@tonic-gate * Redistribute all interrupts 519*7c478bd9Sstevel@tonic-gate * 520*7c478bd9Sstevel@tonic-gate * This function redistributes all interrupting devices, running the 521*7c478bd9Sstevel@tonic-gate * parent callback functions for each node. 522*7c478bd9Sstevel@tonic-gate */ 523*7c478bd9Sstevel@tonic-gate void 524*7c478bd9Sstevel@tonic-gate intr_redist_all_cpus(void) 525*7c478bd9Sstevel@tonic-gate { 526*7c478bd9Sstevel@tonic-gate struct cpu *cp; 527*7c478bd9Sstevel@tonic-gate struct intr_dist *iptr; 528*7c478bd9Sstevel@tonic-gate int32_t weight, max_weight; 529*7c478bd9Sstevel@tonic-gate 530*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 531*7c478bd9Sstevel@tonic-gate mutex_enter(&intr_dist_lock); 532*7c478bd9Sstevel@tonic-gate 533*7c478bd9Sstevel@tonic-gate /* 534*7c478bd9Sstevel@tonic-gate * zero cpu_intr_weight on all cpus - it is safe to traverse 535*7c478bd9Sstevel@tonic-gate * cpu_list since we hold cpu_lock. 536*7c478bd9Sstevel@tonic-gate */ 537*7c478bd9Sstevel@tonic-gate cp = cpu_list; 538*7c478bd9Sstevel@tonic-gate do { 539*7c478bd9Sstevel@tonic-gate cp->cpu_intr_weight = 0; 540*7c478bd9Sstevel@tonic-gate } while ((cp = cp->cpu_next) != cpu_list); 541*7c478bd9Sstevel@tonic-gate 542*7c478bd9Sstevel@tonic-gate /* 543*7c478bd9Sstevel@tonic-gate * Assume that this redistribution may encounter a device weight 544*7c478bd9Sstevel@tonic-gate * via driver.conf tuning of "ddi-intr-weight" that is at most 545*7c478bd9Sstevel@tonic-gate * intr_dist_weight_maxfactor times larger. 546*7c478bd9Sstevel@tonic-gate */ 547*7c478bd9Sstevel@tonic-gate max_weight = intr_dist_weight_max * intr_dist_weight_maxfactor; 548*7c478bd9Sstevel@tonic-gate if (max_weight > intr_dist_weight_maxmax) 549*7c478bd9Sstevel@tonic-gate max_weight = intr_dist_weight_maxmax; 550*7c478bd9Sstevel@tonic-gate intr_dist_weight_max = 1; 551*7c478bd9Sstevel@tonic-gate 552*7c478bd9Sstevel@tonic-gate INTR_DEBUG((CE_CONT, "intr_dist: " 553*7c478bd9Sstevel@tonic-gate "intr_redist_all_cpus: %d-0\n", max_weight)); 554*7c478bd9Sstevel@tonic-gate 555*7c478bd9Sstevel@tonic-gate /* 556*7c478bd9Sstevel@tonic-gate * Redistribute weighted, from heavy to light. The callback that 557*7c478bd9Sstevel@tonic-gate * specifies a weight equal to weight_max should redirect all 558*7c478bd9Sstevel@tonic-gate * interrupts of weight weight_max or greater [weight_max, inf.). 559*7c478bd9Sstevel@tonic-gate * Interrupts of lesser weight should be processed on the call with 560*7c478bd9Sstevel@tonic-gate * the matching weight. This allows all the heaver weight interrupts 561*7c478bd9Sstevel@tonic-gate * on all weighted busses (multiple pci busses) to be redirected prior 562*7c478bd9Sstevel@tonic-gate * to any lesser weight interrupts. 563*7c478bd9Sstevel@tonic-gate */ 564*7c478bd9Sstevel@tonic-gate for (weight = max_weight; weight >= 0; weight--) 565*7c478bd9Sstevel@tonic-gate for (iptr = intr_dist_whead; iptr != NULL; iptr = iptr->next) 566*7c478bd9Sstevel@tonic-gate ((void (*)(void *, int32_t, int32_t))iptr->func) 567*7c478bd9Sstevel@tonic-gate (iptr->arg, max_weight, weight); 568*7c478bd9Sstevel@tonic-gate 569*7c478bd9Sstevel@tonic-gate /* redistribute normal (non-weighted) interrupts */ 570*7c478bd9Sstevel@tonic-gate for (iptr = intr_dist_head; iptr != NULL; iptr = iptr->next) 571*7c478bd9Sstevel@tonic-gate ((void (*)(void *))iptr->func)(iptr->arg); 572*7c478bd9Sstevel@tonic-gate mutex_exit(&intr_dist_lock); 573*7c478bd9Sstevel@tonic-gate } 574*7c478bd9Sstevel@tonic-gate 575*7c478bd9Sstevel@tonic-gate void 576*7c478bd9Sstevel@tonic-gate intr_redist_all_cpus_shutdown(void) 577*7c478bd9Sstevel@tonic-gate { 578*7c478bd9Sstevel@tonic-gate intr_policy = INTR_CURRENT_CPU; 579*7c478bd9Sstevel@tonic-gate intr_redist_all_cpus(); 580*7c478bd9Sstevel@tonic-gate } 581*7c478bd9Sstevel@tonic-gate 582*7c478bd9Sstevel@tonic-gate /* 583*7c478bd9Sstevel@tonic-gate * Determine what CPU to target, based on interrupt policy. 584*7c478bd9Sstevel@tonic-gate * 585*7c478bd9Sstevel@tonic-gate * INTR_FLAT_DIST: hold a current CPU pointer in a static variable and 586*7c478bd9Sstevel@tonic-gate * advance through interrupt enabled cpus (round-robin). 587*7c478bd9Sstevel@tonic-gate * 588*7c478bd9Sstevel@tonic-gate * INTR_WEIGHTED_DIST: search for an enabled CPU with the lowest 589*7c478bd9Sstevel@tonic-gate * cpu_intr_weight, round robin when all equal. 590*7c478bd9Sstevel@tonic-gate * 591*7c478bd9Sstevel@tonic-gate * Weighted interrupt distribution provides two things: "spread" of weight 592*7c478bd9Sstevel@tonic-gate * (associated with algorithm itself) and "isolation" (associated with a 593*7c478bd9Sstevel@tonic-gate * particular device weight). A redistribution is what provides optimal 594*7c478bd9Sstevel@tonic-gate * "isolation" of heavy weight interrupts, optimal "spread" of weight 595*7c478bd9Sstevel@tonic-gate * (relative to what came before) is always occurring. 596*7c478bd9Sstevel@tonic-gate * 597*7c478bd9Sstevel@tonic-gate * An interrupt weight is a subjective number that represents the 598*7c478bd9Sstevel@tonic-gate * percentage of a CPU required to service a device's interrupts: the 599*7c478bd9Sstevel@tonic-gate * default weight is 0% (however the algorithm still maintains 600*7c478bd9Sstevel@tonic-gate * round-robin), a network interface controller (NIC) may have a large 601*7c478bd9Sstevel@tonic-gate * weight (35%). Interrupt weight only has meaning relative to the 602*7c478bd9Sstevel@tonic-gate * interrupt weight of other devices: a CPU can be weighted more than 603*7c478bd9Sstevel@tonic-gate * 100%, and a single device might consume more than 100% of a CPU. 604*7c478bd9Sstevel@tonic-gate * 605*7c478bd9Sstevel@tonic-gate * A coarse interrupt weight can be defined by the parent nexus driver 606*7c478bd9Sstevel@tonic-gate * based on bus specific information, like pci class codes. A nexus 607*7c478bd9Sstevel@tonic-gate * driver that supports device interrupt weighting for its children 608*7c478bd9Sstevel@tonic-gate * should call intr_dist_cpuid_add/rem_device_weight(), which adds 609*7c478bd9Sstevel@tonic-gate * and removes the weight of a device from the CPU that an interrupt 610*7c478bd9Sstevel@tonic-gate * is directed at. The quality of initialization improves when the 611*7c478bd9Sstevel@tonic-gate * device interrupt weights more accuracy reflect actual run-time weights, 612*7c478bd9Sstevel@tonic-gate * and as the assignments are ordered from is heavy to light. 613*7c478bd9Sstevel@tonic-gate * 614*7c478bd9Sstevel@tonic-gate * The implementation also supports interrupt weight being specified in 615*7c478bd9Sstevel@tonic-gate * driver.conf files via the property "ddi-intr-weight", which takes 616*7c478bd9Sstevel@tonic-gate * precedence over the nexus supplied weight. This support is added to 617*7c478bd9Sstevel@tonic-gate * permit possible tweaking in the product in response to customer 618*7c478bd9Sstevel@tonic-gate * problems. This is not a formal or committed interface. 619*7c478bd9Sstevel@tonic-gate * 620*7c478bd9Sstevel@tonic-gate * While a weighted approach chooses the CPU providing the best spread 621*7c478bd9Sstevel@tonic-gate * given past weights, less than optimal isolation can result in cases 622*7c478bd9Sstevel@tonic-gate * where heavy weight devices show up last. The nexus driver's interrupt 623*7c478bd9Sstevel@tonic-gate * redistribution logic should use intr_dist_add/rem_weighted so that 624*7c478bd9Sstevel@tonic-gate * interrupts can be redistributed heavy first for optimal isolation. 625*7c478bd9Sstevel@tonic-gate */ 626*7c478bd9Sstevel@tonic-gate uint32_t 627*7c478bd9Sstevel@tonic-gate intr_dist_cpuid(void) 628*7c478bd9Sstevel@tonic-gate { 629*7c478bd9Sstevel@tonic-gate static struct cpu *curr_cpu; 630*7c478bd9Sstevel@tonic-gate struct cpu *start_cpu; 631*7c478bd9Sstevel@tonic-gate struct cpu *new_cpu; 632*7c478bd9Sstevel@tonic-gate struct cpu *cp; 633*7c478bd9Sstevel@tonic-gate int cpuid = -1; 634*7c478bd9Sstevel@tonic-gate 635*7c478bd9Sstevel@tonic-gate /* Establish exclusion for curr_cpu and cpu_intr_weight manipulation */ 636*7c478bd9Sstevel@tonic-gate mutex_enter(&intr_dist_cpu_lock); 637*7c478bd9Sstevel@tonic-gate 638*7c478bd9Sstevel@tonic-gate switch (intr_policy) { 639*7c478bd9Sstevel@tonic-gate case INTR_CURRENT_CPU: 640*7c478bd9Sstevel@tonic-gate cpuid = CPU->cpu_id; 641*7c478bd9Sstevel@tonic-gate break; 642*7c478bd9Sstevel@tonic-gate 643*7c478bd9Sstevel@tonic-gate case INTR_BOOT_CPU: 644*7c478bd9Sstevel@tonic-gate panic("INTR_BOOT_CPU no longer supported."); 645*7c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 646*7c478bd9Sstevel@tonic-gate 647*7c478bd9Sstevel@tonic-gate case INTR_FLAT_DIST: 648*7c478bd9Sstevel@tonic-gate case INTR_WEIGHTED_DIST: 649*7c478bd9Sstevel@tonic-gate default: 650*7c478bd9Sstevel@tonic-gate /* 651*7c478bd9Sstevel@tonic-gate * Ensure that curr_cpu is valid - cpu_next will be NULL if 652*7c478bd9Sstevel@tonic-gate * the cpu has been deleted (cpu structs are never freed). 653*7c478bd9Sstevel@tonic-gate */ 654*7c478bd9Sstevel@tonic-gate if (curr_cpu == NULL || curr_cpu->cpu_next == NULL) 655*7c478bd9Sstevel@tonic-gate curr_cpu = CPU; 656*7c478bd9Sstevel@tonic-gate 657*7c478bd9Sstevel@tonic-gate /* 658*7c478bd9Sstevel@tonic-gate * Advance to online CPU after curr_cpu (round-robin). For 659*7c478bd9Sstevel@tonic-gate * INTR_WEIGHTED_DIST we choose the cpu with the lightest 660*7c478bd9Sstevel@tonic-gate * weight. For a nexus that does not support weight the 661*7c478bd9Sstevel@tonic-gate * default weight of zero is used. We degrade to round-robin 662*7c478bd9Sstevel@tonic-gate * behavior among equal weightes. The default weight is zero 663*7c478bd9Sstevel@tonic-gate * and round-robin behavior continues. 664*7c478bd9Sstevel@tonic-gate * 665*7c478bd9Sstevel@tonic-gate * Disable preemption while traversing cpu_next_onln to 666*7c478bd9Sstevel@tonic-gate * ensure the list does not change. This works because 667*7c478bd9Sstevel@tonic-gate * modifiers of this list and other lists in a struct cpu 668*7c478bd9Sstevel@tonic-gate * call pause_cpus() before making changes. 669*7c478bd9Sstevel@tonic-gate */ 670*7c478bd9Sstevel@tonic-gate kpreempt_disable(); 671*7c478bd9Sstevel@tonic-gate cp = start_cpu = curr_cpu->cpu_next_onln; 672*7c478bd9Sstevel@tonic-gate new_cpu = NULL; 673*7c478bd9Sstevel@tonic-gate do { 674*7c478bd9Sstevel@tonic-gate /* Skip CPUs with interrupts disabled */ 675*7c478bd9Sstevel@tonic-gate if ((cp->cpu_flags & CPU_ENABLE) == 0) 676*7c478bd9Sstevel@tonic-gate continue; 677*7c478bd9Sstevel@tonic-gate 678*7c478bd9Sstevel@tonic-gate if (intr_policy == INTR_FLAT_DIST) { 679*7c478bd9Sstevel@tonic-gate /* select CPU */ 680*7c478bd9Sstevel@tonic-gate new_cpu = cp; 681*7c478bd9Sstevel@tonic-gate break; 682*7c478bd9Sstevel@tonic-gate } else if ((new_cpu == NULL) || 683*7c478bd9Sstevel@tonic-gate (cp->cpu_intr_weight < new_cpu->cpu_intr_weight)) { 684*7c478bd9Sstevel@tonic-gate /* Choose if lighter weight */ 685*7c478bd9Sstevel@tonic-gate new_cpu = cp; 686*7c478bd9Sstevel@tonic-gate } 687*7c478bd9Sstevel@tonic-gate } while ((cp = cp->cpu_next_onln) != start_cpu); 688*7c478bd9Sstevel@tonic-gate ASSERT(new_cpu); 689*7c478bd9Sstevel@tonic-gate cpuid = new_cpu->cpu_id; 690*7c478bd9Sstevel@tonic-gate 691*7c478bd9Sstevel@tonic-gate INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: " 692*7c478bd9Sstevel@tonic-gate "targeted\n", cpuid, new_cpu->cpu_intr_weight)); 693*7c478bd9Sstevel@tonic-gate 694*7c478bd9Sstevel@tonic-gate /* update static pointer for next round-robin */ 695*7c478bd9Sstevel@tonic-gate curr_cpu = new_cpu; 696*7c478bd9Sstevel@tonic-gate kpreempt_enable(); 697*7c478bd9Sstevel@tonic-gate break; 698*7c478bd9Sstevel@tonic-gate } 699*7c478bd9Sstevel@tonic-gate mutex_exit(&intr_dist_cpu_lock); 700*7c478bd9Sstevel@tonic-gate return (cpuid); 701*7c478bd9Sstevel@tonic-gate } 702*7c478bd9Sstevel@tonic-gate 703*7c478bd9Sstevel@tonic-gate /* 704*7c478bd9Sstevel@tonic-gate * Add or remove the the weight of a device from a CPUs interrupt weight. 705*7c478bd9Sstevel@tonic-gate * 706*7c478bd9Sstevel@tonic-gate * We expect nexus drivers to call intr_dist_cpuid_add/rem_device_weight for 707*7c478bd9Sstevel@tonic-gate * their children to improve the overall quality of interrupt initialization. 708*7c478bd9Sstevel@tonic-gate * 709*7c478bd9Sstevel@tonic-gate * If a nexues shares the CPU returned by a single intr_dist_cpuid() call 710*7c478bd9Sstevel@tonic-gate * among multiple devices (sharing ino) then the nexus should call 711*7c478bd9Sstevel@tonic-gate * intr_dist_cpuid_add/rem_device_weight for each device separately. Devices 712*7c478bd9Sstevel@tonic-gate * that share must specify the same cpuid. 713*7c478bd9Sstevel@tonic-gate * 714*7c478bd9Sstevel@tonic-gate * If a nexus driver is unable to determine the cpu at remove_intr time 715*7c478bd9Sstevel@tonic-gate * for some of its interrupts, then it should not call add_device_weight - 716*7c478bd9Sstevel@tonic-gate * intr_dist_cpuid will still provide round-robin. 717*7c478bd9Sstevel@tonic-gate * 718*7c478bd9Sstevel@tonic-gate * An established device weight (from dev_info node) takes precedence over 719*7c478bd9Sstevel@tonic-gate * the weight passed in. If a device weight is not already established 720*7c478bd9Sstevel@tonic-gate * then the passed in nexus weight is established. 721*7c478bd9Sstevel@tonic-gate */ 722*7c478bd9Sstevel@tonic-gate void 723*7c478bd9Sstevel@tonic-gate intr_dist_cpuid_add_device_weight(uint32_t cpuid, 724*7c478bd9Sstevel@tonic-gate dev_info_t *dip, int32_t nweight) 725*7c478bd9Sstevel@tonic-gate { 726*7c478bd9Sstevel@tonic-gate int32_t eweight; 727*7c478bd9Sstevel@tonic-gate 728*7c478bd9Sstevel@tonic-gate /* 729*7c478bd9Sstevel@tonic-gate * For non-weighted policy everything has weight of zero (and we get 730*7c478bd9Sstevel@tonic-gate * round-robin distribution from intr_dist_cpuid). 731*7c478bd9Sstevel@tonic-gate * NB: intr_policy is limited to this file. A weighted nexus driver is 732*7c478bd9Sstevel@tonic-gate * calls this rouitne even if intr_policy has been patched to 733*7c478bd9Sstevel@tonic-gate * INTR_FLAG_DIST. 734*7c478bd9Sstevel@tonic-gate */ 735*7c478bd9Sstevel@tonic-gate ASSERT(dip); 736*7c478bd9Sstevel@tonic-gate if (intr_policy != INTR_WEIGHTED_DIST) 737*7c478bd9Sstevel@tonic-gate return; 738*7c478bd9Sstevel@tonic-gate 739*7c478bd9Sstevel@tonic-gate eweight = i_ddi_get_intr_weight(dip); 740*7c478bd9Sstevel@tonic-gate INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: +%2d/%2d for " 741*7c478bd9Sstevel@tonic-gate "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, 742*7c478bd9Sstevel@tonic-gate nweight, eweight, ddi_driver_name(ddi_get_parent(dip)), 743*7c478bd9Sstevel@tonic-gate ddi_get_instance(ddi_get_parent(dip)), 744*7c478bd9Sstevel@tonic-gate ddi_driver_name(dip), ddi_get_instance(dip))); 745*7c478bd9Sstevel@tonic-gate 746*7c478bd9Sstevel@tonic-gate /* if no establish weight, establish nexus weight */ 747*7c478bd9Sstevel@tonic-gate if (eweight < 0) { 748*7c478bd9Sstevel@tonic-gate if (nweight > 0) 749*7c478bd9Sstevel@tonic-gate (void) i_ddi_set_intr_weight(dip, nweight); 750*7c478bd9Sstevel@tonic-gate else 751*7c478bd9Sstevel@tonic-gate nweight = 0; 752*7c478bd9Sstevel@tonic-gate } else 753*7c478bd9Sstevel@tonic-gate nweight = eweight; /* use established weight */ 754*7c478bd9Sstevel@tonic-gate 755*7c478bd9Sstevel@tonic-gate /* Establish exclusion for cpu_intr_weight manipulation */ 756*7c478bd9Sstevel@tonic-gate mutex_enter(&intr_dist_cpu_lock); 757*7c478bd9Sstevel@tonic-gate cpu[cpuid]->cpu_intr_weight += nweight; 758*7c478bd9Sstevel@tonic-gate 759*7c478bd9Sstevel@tonic-gate /* update intr_dist_weight_max */ 760*7c478bd9Sstevel@tonic-gate if (nweight > intr_dist_weight_max) 761*7c478bd9Sstevel@tonic-gate intr_dist_weight_max = nweight; 762*7c478bd9Sstevel@tonic-gate mutex_exit(&intr_dist_cpu_lock); 763*7c478bd9Sstevel@tonic-gate } 764*7c478bd9Sstevel@tonic-gate 765*7c478bd9Sstevel@tonic-gate void 766*7c478bd9Sstevel@tonic-gate intr_dist_cpuid_rem_device_weight(uint32_t cpuid, dev_info_t *dip) 767*7c478bd9Sstevel@tonic-gate { 768*7c478bd9Sstevel@tonic-gate struct cpu *cp; 769*7c478bd9Sstevel@tonic-gate int32_t weight; 770*7c478bd9Sstevel@tonic-gate 771*7c478bd9Sstevel@tonic-gate ASSERT(dip); 772*7c478bd9Sstevel@tonic-gate if (intr_policy != INTR_WEIGHTED_DIST) 773*7c478bd9Sstevel@tonic-gate return; 774*7c478bd9Sstevel@tonic-gate 775*7c478bd9Sstevel@tonic-gate /* remove weight of device from cpu */ 776*7c478bd9Sstevel@tonic-gate weight = i_ddi_get_intr_weight(dip); 777*7c478bd9Sstevel@tonic-gate if (weight < 0) 778*7c478bd9Sstevel@tonic-gate weight = 0; 779*7c478bd9Sstevel@tonic-gate INTR_DEBUG((CE_CONT, "intr_dist: cpu %2d weight %3d: -%2d for " 780*7c478bd9Sstevel@tonic-gate "%s#%d/%s#%d\n", cpuid, cpu[cpuid]->cpu_intr_weight, weight, 781*7c478bd9Sstevel@tonic-gate ddi_driver_name(ddi_get_parent(dip)), 782*7c478bd9Sstevel@tonic-gate ddi_get_instance(ddi_get_parent(dip)), 783*7c478bd9Sstevel@tonic-gate ddi_driver_name(dip), ddi_get_instance(dip))); 784*7c478bd9Sstevel@tonic-gate 785*7c478bd9Sstevel@tonic-gate /* Establish exclusion for cpu_intr_weight manipulation */ 786*7c478bd9Sstevel@tonic-gate mutex_enter(&intr_dist_cpu_lock); 787*7c478bd9Sstevel@tonic-gate cp = cpu[cpuid]; 788*7c478bd9Sstevel@tonic-gate cp->cpu_intr_weight -= weight; 789*7c478bd9Sstevel@tonic-gate if (cp->cpu_intr_weight < 0) 790*7c478bd9Sstevel@tonic-gate cp->cpu_intr_weight = 0; /* sanity */ 791*7c478bd9Sstevel@tonic-gate mutex_exit(&intr_dist_cpu_lock); 792*7c478bd9Sstevel@tonic-gate } 793