1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 29*7c478bd9Sstevel@tonic-gate 30*7c478bd9Sstevel@tonic-gate 31*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.30 */ 32*7c478bd9Sstevel@tonic-gate 33*7c478bd9Sstevel@tonic-gate #include <sys/types.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/signal.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/user.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/systm.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/var.h> 41*7c478bd9Sstevel@tonic-gate #include <sys/errno.h> 42*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 43*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 44*7c478bd9Sstevel@tonic-gate #include <sys/inline.h> 45*7c478bd9Sstevel@tonic-gate #include <sys/disp.h> 46*7c478bd9Sstevel@tonic-gate #include <sys/class.h> 47*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 48*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 49*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 50*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 51*7c478bd9Sstevel@tonic-gate #include <sys/tnf.h> 52*7c478bd9Sstevel@tonic-gate #include <sys/cpupart.h> 53*7c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 54*7c478bd9Sstevel@tonic-gate #include <sys/chip.h> 55*7c478bd9Sstevel@tonic-gate #include <sys/schedctl.h> 56*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 57*7c478bd9Sstevel@tonic-gate #include <sys/dtrace.h> 58*7c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 59*7c478bd9Sstevel@tonic-gate 60*7c478bd9Sstevel@tonic-gate #include <vm/as.h> 61*7c478bd9Sstevel@tonic-gate 62*7c478bd9Sstevel@tonic-gate #define BOUND_CPU 0x1 63*7c478bd9Sstevel@tonic-gate #define BOUND_PARTITION 0x2 64*7c478bd9Sstevel@tonic-gate #define BOUND_INTR 0x4 65*7c478bd9Sstevel@tonic-gate 66*7c478bd9Sstevel@tonic-gate /* Dispatch queue allocation structure and functions */ 67*7c478bd9Sstevel@tonic-gate struct disp_queue_info { 68*7c478bd9Sstevel@tonic-gate disp_t *dp; 69*7c478bd9Sstevel@tonic-gate dispq_t *olddispq; 70*7c478bd9Sstevel@tonic-gate dispq_t *newdispq; 71*7c478bd9Sstevel@tonic-gate ulong_t *olddqactmap; 72*7c478bd9Sstevel@tonic-gate ulong_t *newdqactmap; 73*7c478bd9Sstevel@tonic-gate int oldnglobpris; 74*7c478bd9Sstevel@tonic-gate }; 75*7c478bd9Sstevel@tonic-gate static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris, 76*7c478bd9Sstevel@tonic-gate disp_t *dp); 77*7c478bd9Sstevel@tonic-gate static void disp_dq_assign(struct disp_queue_info *dptr, int numpris); 78*7c478bd9Sstevel@tonic-gate static void disp_dq_free(struct disp_queue_info *dptr); 79*7c478bd9Sstevel@tonic-gate 80*7c478bd9Sstevel@tonic-gate /* platform-specific routine to call when processor is idle */ 81*7c478bd9Sstevel@tonic-gate static void generic_idle_cpu(); 82*7c478bd9Sstevel@tonic-gate void (*idle_cpu)() = generic_idle_cpu; 83*7c478bd9Sstevel@tonic-gate 84*7c478bd9Sstevel@tonic-gate /* routines invoked when a CPU enters/exits the idle loop */ 85*7c478bd9Sstevel@tonic-gate static void idle_enter(); 86*7c478bd9Sstevel@tonic-gate static void idle_exit(); 87*7c478bd9Sstevel@tonic-gate 88*7c478bd9Sstevel@tonic-gate /* platform-specific routine to call when thread is enqueued */ 89*7c478bd9Sstevel@tonic-gate static void generic_enq_thread(cpu_t *, int); 90*7c478bd9Sstevel@tonic-gate void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread; 91*7c478bd9Sstevel@tonic-gate 92*7c478bd9Sstevel@tonic-gate pri_t kpreemptpri; /* priority where kernel preemption applies */ 93*7c478bd9Sstevel@tonic-gate pri_t upreemptpri = 0; /* priority where normal preemption applies */ 94*7c478bd9Sstevel@tonic-gate pri_t intr_pri; /* interrupt thread priority base level */ 95*7c478bd9Sstevel@tonic-gate 96*7c478bd9Sstevel@tonic-gate #define KPQPRI -1 /* priority where cpu affinity is dropped for kp queue */ 97*7c478bd9Sstevel@tonic-gate pri_t kpqpri = KPQPRI; /* can be set in /etc/system */ 98*7c478bd9Sstevel@tonic-gate disp_t cpu0_disp; /* boot CPU's dispatch queue */ 99*7c478bd9Sstevel@tonic-gate disp_lock_t swapped_lock; /* lock swapped threads and swap queue */ 100*7c478bd9Sstevel@tonic-gate int nswapped; /* total number of swapped threads */ 101*7c478bd9Sstevel@tonic-gate void disp_swapped_enq(kthread_t *tp); 102*7c478bd9Sstevel@tonic-gate static void disp_swapped_setrun(kthread_t *tp); 103*7c478bd9Sstevel@tonic-gate static void cpu_resched(cpu_t *cp, pri_t tpri); 104*7c478bd9Sstevel@tonic-gate 105*7c478bd9Sstevel@tonic-gate /* 106*7c478bd9Sstevel@tonic-gate * If this is set, only interrupt threads will cause kernel preemptions. 107*7c478bd9Sstevel@tonic-gate * This is done by changing the value of kpreemptpri. kpreemptpri 108*7c478bd9Sstevel@tonic-gate * will either be the max sysclass pri + 1 or the min interrupt pri. 109*7c478bd9Sstevel@tonic-gate */ 110*7c478bd9Sstevel@tonic-gate int only_intr_kpreempt; 111*7c478bd9Sstevel@tonic-gate 112*7c478bd9Sstevel@tonic-gate extern void set_idle_cpu(int cpun); 113*7c478bd9Sstevel@tonic-gate extern void unset_idle_cpu(int cpun); 114*7c478bd9Sstevel@tonic-gate static void setkpdq(kthread_t *tp, int borf); 115*7c478bd9Sstevel@tonic-gate #define SETKP_BACK 0 116*7c478bd9Sstevel@tonic-gate #define SETKP_FRONT 1 117*7c478bd9Sstevel@tonic-gate /* 118*7c478bd9Sstevel@tonic-gate * Parameter that determines how recently a thread must have run 119*7c478bd9Sstevel@tonic-gate * on the CPU to be considered loosely-bound to that CPU to reduce 120*7c478bd9Sstevel@tonic-gate * cold cache effects. The interval is in hertz. 121*7c478bd9Sstevel@tonic-gate * 122*7c478bd9Sstevel@tonic-gate * The platform may define a per physical processor adjustment of 123*7c478bd9Sstevel@tonic-gate * this parameter. For efficiency, the effective rechoose interval 124*7c478bd9Sstevel@tonic-gate * (rechoose_interval + per chip adjustment) is maintained in the 125*7c478bd9Sstevel@tonic-gate * cpu structures. See cpu_choose() 126*7c478bd9Sstevel@tonic-gate */ 127*7c478bd9Sstevel@tonic-gate int rechoose_interval = RECHOOSE_INTERVAL; 128*7c478bd9Sstevel@tonic-gate 129*7c478bd9Sstevel@tonic-gate static cpu_t *cpu_choose(kthread_t *, pri_t); 130*7c478bd9Sstevel@tonic-gate 131*7c478bd9Sstevel@tonic-gate id_t defaultcid; /* system "default" class; see dispadmin(1M) */ 132*7c478bd9Sstevel@tonic-gate 133*7c478bd9Sstevel@tonic-gate disp_lock_t transition_lock; /* lock on transitioning threads */ 134*7c478bd9Sstevel@tonic-gate disp_lock_t stop_lock; /* lock on stopped threads */ 135*7c478bd9Sstevel@tonic-gate disp_lock_t shuttle_lock; /* lock on shuttle objects */ 136*7c478bd9Sstevel@tonic-gate 137*7c478bd9Sstevel@tonic-gate static void cpu_dispqalloc(int numpris); 138*7c478bd9Sstevel@tonic-gate 139*7c478bd9Sstevel@tonic-gate static kthread_t *disp_getwork(cpu_t *to); 140*7c478bd9Sstevel@tonic-gate static kthread_t *disp_getbest(disp_t *from); 141*7c478bd9Sstevel@tonic-gate static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq); 142*7c478bd9Sstevel@tonic-gate 143*7c478bd9Sstevel@tonic-gate void swtch_to(kthread_t *); 144*7c478bd9Sstevel@tonic-gate 145*7c478bd9Sstevel@tonic-gate /* 146*7c478bd9Sstevel@tonic-gate * dispatcher and scheduler initialization 147*7c478bd9Sstevel@tonic-gate */ 148*7c478bd9Sstevel@tonic-gate 149*7c478bd9Sstevel@tonic-gate /* 150*7c478bd9Sstevel@tonic-gate * disp_setup - Common code to calculate and allocate dispatcher 151*7c478bd9Sstevel@tonic-gate * variables and structures based on the maximum priority. 152*7c478bd9Sstevel@tonic-gate */ 153*7c478bd9Sstevel@tonic-gate static void 154*7c478bd9Sstevel@tonic-gate disp_setup(pri_t maxglobpri, pri_t oldnglobpris) 155*7c478bd9Sstevel@tonic-gate { 156*7c478bd9Sstevel@tonic-gate pri_t newnglobpris; 157*7c478bd9Sstevel@tonic-gate 158*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 159*7c478bd9Sstevel@tonic-gate 160*7c478bd9Sstevel@tonic-gate newnglobpris = maxglobpri + 1 + LOCK_LEVEL; 161*7c478bd9Sstevel@tonic-gate 162*7c478bd9Sstevel@tonic-gate if (newnglobpris > oldnglobpris) { 163*7c478bd9Sstevel@tonic-gate /* 164*7c478bd9Sstevel@tonic-gate * Allocate new kp queues for each CPU partition. 165*7c478bd9Sstevel@tonic-gate */ 166*7c478bd9Sstevel@tonic-gate cpupart_kpqalloc(newnglobpris); 167*7c478bd9Sstevel@tonic-gate 168*7c478bd9Sstevel@tonic-gate /* 169*7c478bd9Sstevel@tonic-gate * Allocate new dispatch queues for each CPU. 170*7c478bd9Sstevel@tonic-gate */ 171*7c478bd9Sstevel@tonic-gate cpu_dispqalloc(newnglobpris); 172*7c478bd9Sstevel@tonic-gate 173*7c478bd9Sstevel@tonic-gate /* 174*7c478bd9Sstevel@tonic-gate * compute new interrupt thread base priority 175*7c478bd9Sstevel@tonic-gate */ 176*7c478bd9Sstevel@tonic-gate intr_pri = maxglobpri; 177*7c478bd9Sstevel@tonic-gate if (only_intr_kpreempt) { 178*7c478bd9Sstevel@tonic-gate kpreemptpri = intr_pri + 1; 179*7c478bd9Sstevel@tonic-gate if (kpqpri == KPQPRI) 180*7c478bd9Sstevel@tonic-gate kpqpri = kpreemptpri; 181*7c478bd9Sstevel@tonic-gate } 182*7c478bd9Sstevel@tonic-gate v.v_nglobpris = newnglobpris; 183*7c478bd9Sstevel@tonic-gate } 184*7c478bd9Sstevel@tonic-gate } 185*7c478bd9Sstevel@tonic-gate 186*7c478bd9Sstevel@tonic-gate /* 187*7c478bd9Sstevel@tonic-gate * dispinit - Called to initialize all loaded classes and the 188*7c478bd9Sstevel@tonic-gate * dispatcher framework. 189*7c478bd9Sstevel@tonic-gate */ 190*7c478bd9Sstevel@tonic-gate void 191*7c478bd9Sstevel@tonic-gate dispinit(void) 192*7c478bd9Sstevel@tonic-gate { 193*7c478bd9Sstevel@tonic-gate id_t cid; 194*7c478bd9Sstevel@tonic-gate pri_t maxglobpri; 195*7c478bd9Sstevel@tonic-gate pri_t cl_maxglobpri; 196*7c478bd9Sstevel@tonic-gate 197*7c478bd9Sstevel@tonic-gate maxglobpri = -1; 198*7c478bd9Sstevel@tonic-gate 199*7c478bd9Sstevel@tonic-gate /* 200*7c478bd9Sstevel@tonic-gate * Initialize transition lock, which will always be set. 201*7c478bd9Sstevel@tonic-gate */ 202*7c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&transition_lock); 203*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&transition_lock); 204*7c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&stop_lock); 205*7c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&shuttle_lock); 206*7c478bd9Sstevel@tonic-gate 207*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 208*7c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_maxrunpri = -1; 209*7c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_max_unbound_pri = -1; 210*7c478bd9Sstevel@tonic-gate /* 211*7c478bd9Sstevel@tonic-gate * Initialize the default CPU partition. 212*7c478bd9Sstevel@tonic-gate */ 213*7c478bd9Sstevel@tonic-gate cpupart_initialize_default(); 214*7c478bd9Sstevel@tonic-gate /* 215*7c478bd9Sstevel@tonic-gate * Call the class specific initialization functions for 216*7c478bd9Sstevel@tonic-gate * all pre-installed schedulers. 217*7c478bd9Sstevel@tonic-gate * 218*7c478bd9Sstevel@tonic-gate * We pass the size of a class specific parameter 219*7c478bd9Sstevel@tonic-gate * buffer to each of the initialization functions 220*7c478bd9Sstevel@tonic-gate * to try to catch problems with backward compatibility 221*7c478bd9Sstevel@tonic-gate * of class modules. 222*7c478bd9Sstevel@tonic-gate * 223*7c478bd9Sstevel@tonic-gate * For example a new class module running on an old system 224*7c478bd9Sstevel@tonic-gate * which didn't provide sufficiently large parameter buffers 225*7c478bd9Sstevel@tonic-gate * would be bad news. Class initialization modules can check for 226*7c478bd9Sstevel@tonic-gate * this and take action if they detect a problem. 227*7c478bd9Sstevel@tonic-gate */ 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate for (cid = 0; cid < nclass; cid++) { 230*7c478bd9Sstevel@tonic-gate sclass_t *sc; 231*7c478bd9Sstevel@tonic-gate 232*7c478bd9Sstevel@tonic-gate sc = &sclass[cid]; 233*7c478bd9Sstevel@tonic-gate if (SCHED_INSTALLED(sc)) { 234*7c478bd9Sstevel@tonic-gate cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ, 235*7c478bd9Sstevel@tonic-gate &sc->cl_funcs); 236*7c478bd9Sstevel@tonic-gate if (cl_maxglobpri > maxglobpri) 237*7c478bd9Sstevel@tonic-gate maxglobpri = cl_maxglobpri; 238*7c478bd9Sstevel@tonic-gate } 239*7c478bd9Sstevel@tonic-gate } 240*7c478bd9Sstevel@tonic-gate kpreemptpri = (pri_t)v.v_maxsyspri + 1; 241*7c478bd9Sstevel@tonic-gate if (kpqpri == KPQPRI) 242*7c478bd9Sstevel@tonic-gate kpqpri = kpreemptpri; 243*7c478bd9Sstevel@tonic-gate 244*7c478bd9Sstevel@tonic-gate ASSERT(maxglobpri >= 0); 245*7c478bd9Sstevel@tonic-gate disp_setup(maxglobpri, 0); 246*7c478bd9Sstevel@tonic-gate 247*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 248*7c478bd9Sstevel@tonic-gate 249*7c478bd9Sstevel@tonic-gate /* 250*7c478bd9Sstevel@tonic-gate * Get the default class ID; this may be later modified via 251*7c478bd9Sstevel@tonic-gate * dispadmin(1M). This will load the class (normally TS) and that will 252*7c478bd9Sstevel@tonic-gate * call disp_add(), which is why we had to drop cpu_lock first. 253*7c478bd9Sstevel@tonic-gate */ 254*7c478bd9Sstevel@tonic-gate if (getcid(defaultclass, &defaultcid) != 0) { 255*7c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'", 256*7c478bd9Sstevel@tonic-gate defaultclass); 257*7c478bd9Sstevel@tonic-gate } 258*7c478bd9Sstevel@tonic-gate } 259*7c478bd9Sstevel@tonic-gate 260*7c478bd9Sstevel@tonic-gate /* 261*7c478bd9Sstevel@tonic-gate * disp_add - Called with class pointer to initialize the dispatcher 262*7c478bd9Sstevel@tonic-gate * for a newly loaded class. 263*7c478bd9Sstevel@tonic-gate */ 264*7c478bd9Sstevel@tonic-gate void 265*7c478bd9Sstevel@tonic-gate disp_add(sclass_t *clp) 266*7c478bd9Sstevel@tonic-gate { 267*7c478bd9Sstevel@tonic-gate pri_t maxglobpri; 268*7c478bd9Sstevel@tonic-gate pri_t cl_maxglobpri; 269*7c478bd9Sstevel@tonic-gate 270*7c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 271*7c478bd9Sstevel@tonic-gate /* 272*7c478bd9Sstevel@tonic-gate * Initialize the scheduler class. 273*7c478bd9Sstevel@tonic-gate */ 274*7c478bd9Sstevel@tonic-gate maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1); 275*7c478bd9Sstevel@tonic-gate cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs); 276*7c478bd9Sstevel@tonic-gate if (cl_maxglobpri > maxglobpri) 277*7c478bd9Sstevel@tonic-gate maxglobpri = cl_maxglobpri; 278*7c478bd9Sstevel@tonic-gate 279*7c478bd9Sstevel@tonic-gate /* 280*7c478bd9Sstevel@tonic-gate * Save old queue information. Since we're initializing a 281*7c478bd9Sstevel@tonic-gate * new scheduling class which has just been loaded, then 282*7c478bd9Sstevel@tonic-gate * the size of the dispq may have changed. We need to handle 283*7c478bd9Sstevel@tonic-gate * that here. 284*7c478bd9Sstevel@tonic-gate */ 285*7c478bd9Sstevel@tonic-gate disp_setup(maxglobpri, v.v_nglobpris); 286*7c478bd9Sstevel@tonic-gate 287*7c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 288*7c478bd9Sstevel@tonic-gate } 289*7c478bd9Sstevel@tonic-gate 290*7c478bd9Sstevel@tonic-gate 291*7c478bd9Sstevel@tonic-gate /* 292*7c478bd9Sstevel@tonic-gate * For each CPU, allocate new dispatch queues 293*7c478bd9Sstevel@tonic-gate * with the stated number of priorities. 294*7c478bd9Sstevel@tonic-gate */ 295*7c478bd9Sstevel@tonic-gate static void 296*7c478bd9Sstevel@tonic-gate cpu_dispqalloc(int numpris) 297*7c478bd9Sstevel@tonic-gate { 298*7c478bd9Sstevel@tonic-gate cpu_t *cpup; 299*7c478bd9Sstevel@tonic-gate struct disp_queue_info *disp_mem; 300*7c478bd9Sstevel@tonic-gate int i, num; 301*7c478bd9Sstevel@tonic-gate 302*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 303*7c478bd9Sstevel@tonic-gate 304*7c478bd9Sstevel@tonic-gate disp_mem = kmem_zalloc(NCPU * 305*7c478bd9Sstevel@tonic-gate sizeof (struct disp_queue_info), KM_SLEEP); 306*7c478bd9Sstevel@tonic-gate 307*7c478bd9Sstevel@tonic-gate /* 308*7c478bd9Sstevel@tonic-gate * This routine must allocate all of the memory before stopping 309*7c478bd9Sstevel@tonic-gate * the cpus because it must not sleep in kmem_alloc while the 310*7c478bd9Sstevel@tonic-gate * CPUs are stopped. Locks they hold will not be freed until they 311*7c478bd9Sstevel@tonic-gate * are restarted. 312*7c478bd9Sstevel@tonic-gate */ 313*7c478bd9Sstevel@tonic-gate i = 0; 314*7c478bd9Sstevel@tonic-gate cpup = cpu_list; 315*7c478bd9Sstevel@tonic-gate do { 316*7c478bd9Sstevel@tonic-gate disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp); 317*7c478bd9Sstevel@tonic-gate i++; 318*7c478bd9Sstevel@tonic-gate cpup = cpup->cpu_next; 319*7c478bd9Sstevel@tonic-gate } while (cpup != cpu_list); 320*7c478bd9Sstevel@tonic-gate num = i; 321*7c478bd9Sstevel@tonic-gate 322*7c478bd9Sstevel@tonic-gate pause_cpus(NULL); 323*7c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++) 324*7c478bd9Sstevel@tonic-gate disp_dq_assign(&disp_mem[i], numpris); 325*7c478bd9Sstevel@tonic-gate start_cpus(); 326*7c478bd9Sstevel@tonic-gate 327*7c478bd9Sstevel@tonic-gate /* 328*7c478bd9Sstevel@tonic-gate * I must free all of the memory after starting the cpus because 329*7c478bd9Sstevel@tonic-gate * I can not risk sleeping in kmem_free while the cpus are stopped. 330*7c478bd9Sstevel@tonic-gate */ 331*7c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++) 332*7c478bd9Sstevel@tonic-gate disp_dq_free(&disp_mem[i]); 333*7c478bd9Sstevel@tonic-gate 334*7c478bd9Sstevel@tonic-gate kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info)); 335*7c478bd9Sstevel@tonic-gate } 336*7c478bd9Sstevel@tonic-gate 337*7c478bd9Sstevel@tonic-gate static void 338*7c478bd9Sstevel@tonic-gate disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp) 339*7c478bd9Sstevel@tonic-gate { 340*7c478bd9Sstevel@tonic-gate dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP); 341*7c478bd9Sstevel@tonic-gate dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) * 342*7c478bd9Sstevel@tonic-gate sizeof (long), KM_SLEEP); 343*7c478bd9Sstevel@tonic-gate dptr->dp = dp; 344*7c478bd9Sstevel@tonic-gate } 345*7c478bd9Sstevel@tonic-gate 346*7c478bd9Sstevel@tonic-gate static void 347*7c478bd9Sstevel@tonic-gate disp_dq_assign(struct disp_queue_info *dptr, int numpris) 348*7c478bd9Sstevel@tonic-gate { 349*7c478bd9Sstevel@tonic-gate disp_t *dp; 350*7c478bd9Sstevel@tonic-gate 351*7c478bd9Sstevel@tonic-gate dp = dptr->dp; 352*7c478bd9Sstevel@tonic-gate dptr->olddispq = dp->disp_q; 353*7c478bd9Sstevel@tonic-gate dptr->olddqactmap = dp->disp_qactmap; 354*7c478bd9Sstevel@tonic-gate dptr->oldnglobpris = dp->disp_npri; 355*7c478bd9Sstevel@tonic-gate 356*7c478bd9Sstevel@tonic-gate ASSERT(dptr->oldnglobpris < numpris); 357*7c478bd9Sstevel@tonic-gate 358*7c478bd9Sstevel@tonic-gate if (dptr->olddispq != NULL) { 359*7c478bd9Sstevel@tonic-gate /* 360*7c478bd9Sstevel@tonic-gate * Use kcopy because bcopy is platform-specific 361*7c478bd9Sstevel@tonic-gate * and could block while we might have paused the cpus. 362*7c478bd9Sstevel@tonic-gate */ 363*7c478bd9Sstevel@tonic-gate (void) kcopy(dptr->olddispq, dptr->newdispq, 364*7c478bd9Sstevel@tonic-gate dptr->oldnglobpris * sizeof (dispq_t)); 365*7c478bd9Sstevel@tonic-gate (void) kcopy(dptr->olddqactmap, dptr->newdqactmap, 366*7c478bd9Sstevel@tonic-gate ((dptr->oldnglobpris / BT_NBIPUL) + 1) * 367*7c478bd9Sstevel@tonic-gate sizeof (long)); 368*7c478bd9Sstevel@tonic-gate } 369*7c478bd9Sstevel@tonic-gate dp->disp_q = dptr->newdispq; 370*7c478bd9Sstevel@tonic-gate dp->disp_qactmap = dptr->newdqactmap; 371*7c478bd9Sstevel@tonic-gate dp->disp_q_limit = &dptr->newdispq[numpris]; 372*7c478bd9Sstevel@tonic-gate dp->disp_npri = numpris; 373*7c478bd9Sstevel@tonic-gate } 374*7c478bd9Sstevel@tonic-gate 375*7c478bd9Sstevel@tonic-gate static void 376*7c478bd9Sstevel@tonic-gate disp_dq_free(struct disp_queue_info *dptr) 377*7c478bd9Sstevel@tonic-gate { 378*7c478bd9Sstevel@tonic-gate if (dptr->olddispq != NULL) 379*7c478bd9Sstevel@tonic-gate kmem_free(dptr->olddispq, 380*7c478bd9Sstevel@tonic-gate dptr->oldnglobpris * sizeof (dispq_t)); 381*7c478bd9Sstevel@tonic-gate if (dptr->olddqactmap != NULL) 382*7c478bd9Sstevel@tonic-gate kmem_free(dptr->olddqactmap, 383*7c478bd9Sstevel@tonic-gate ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long)); 384*7c478bd9Sstevel@tonic-gate } 385*7c478bd9Sstevel@tonic-gate 386*7c478bd9Sstevel@tonic-gate /* 387*7c478bd9Sstevel@tonic-gate * For a newly created CPU, initialize the dispatch queue. 388*7c478bd9Sstevel@tonic-gate * This is called before the CPU is known through cpu[] or on any lists. 389*7c478bd9Sstevel@tonic-gate */ 390*7c478bd9Sstevel@tonic-gate void 391*7c478bd9Sstevel@tonic-gate disp_cpu_init(cpu_t *cp) 392*7c478bd9Sstevel@tonic-gate { 393*7c478bd9Sstevel@tonic-gate disp_t *dp; 394*7c478bd9Sstevel@tonic-gate dispq_t *newdispq; 395*7c478bd9Sstevel@tonic-gate ulong_t *newdqactmap; 396*7c478bd9Sstevel@tonic-gate 397*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */ 398*7c478bd9Sstevel@tonic-gate 399*7c478bd9Sstevel@tonic-gate if (cp == cpu0_disp.disp_cpu) 400*7c478bd9Sstevel@tonic-gate dp = &cpu0_disp; 401*7c478bd9Sstevel@tonic-gate else 402*7c478bd9Sstevel@tonic-gate dp = kmem_alloc(sizeof (disp_t), KM_SLEEP); 403*7c478bd9Sstevel@tonic-gate bzero(dp, sizeof (disp_t)); 404*7c478bd9Sstevel@tonic-gate cp->cpu_disp = dp; 405*7c478bd9Sstevel@tonic-gate dp->disp_cpu = cp; 406*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 407*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 408*7c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&cp->cpu_thread_lock); 409*7c478bd9Sstevel@tonic-gate /* 410*7c478bd9Sstevel@tonic-gate * Allocate memory for the dispatcher queue headers 411*7c478bd9Sstevel@tonic-gate * and the active queue bitmap. 412*7c478bd9Sstevel@tonic-gate */ 413*7c478bd9Sstevel@tonic-gate newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP); 414*7c478bd9Sstevel@tonic-gate newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) * 415*7c478bd9Sstevel@tonic-gate sizeof (long), KM_SLEEP); 416*7c478bd9Sstevel@tonic-gate dp->disp_q = newdispq; 417*7c478bd9Sstevel@tonic-gate dp->disp_qactmap = newdqactmap; 418*7c478bd9Sstevel@tonic-gate dp->disp_q_limit = &newdispq[v.v_nglobpris]; 419*7c478bd9Sstevel@tonic-gate dp->disp_npri = v.v_nglobpris; 420*7c478bd9Sstevel@tonic-gate } 421*7c478bd9Sstevel@tonic-gate 422*7c478bd9Sstevel@tonic-gate void 423*7c478bd9Sstevel@tonic-gate disp_cpu_fini(cpu_t *cp) 424*7c478bd9Sstevel@tonic-gate { 425*7c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 426*7c478bd9Sstevel@tonic-gate 427*7c478bd9Sstevel@tonic-gate disp_kp_free(cp->cpu_disp); 428*7c478bd9Sstevel@tonic-gate if (cp->cpu_disp != &cpu0_disp) 429*7c478bd9Sstevel@tonic-gate kmem_free(cp->cpu_disp, sizeof (disp_t)); 430*7c478bd9Sstevel@tonic-gate } 431*7c478bd9Sstevel@tonic-gate 432*7c478bd9Sstevel@tonic-gate /* 433*7c478bd9Sstevel@tonic-gate * Allocate new, larger kpreempt dispatch queue to replace the old one. 434*7c478bd9Sstevel@tonic-gate */ 435*7c478bd9Sstevel@tonic-gate void 436*7c478bd9Sstevel@tonic-gate disp_kp_alloc(disp_t *dq, pri_t npri) 437*7c478bd9Sstevel@tonic-gate { 438*7c478bd9Sstevel@tonic-gate struct disp_queue_info mem_info; 439*7c478bd9Sstevel@tonic-gate 440*7c478bd9Sstevel@tonic-gate if (npri > dq->disp_npri) { 441*7c478bd9Sstevel@tonic-gate /* 442*7c478bd9Sstevel@tonic-gate * Allocate memory for the new array. 443*7c478bd9Sstevel@tonic-gate */ 444*7c478bd9Sstevel@tonic-gate disp_dq_alloc(&mem_info, npri, dq); 445*7c478bd9Sstevel@tonic-gate 446*7c478bd9Sstevel@tonic-gate /* 447*7c478bd9Sstevel@tonic-gate * We need to copy the old structures to the new 448*7c478bd9Sstevel@tonic-gate * and free the old. 449*7c478bd9Sstevel@tonic-gate */ 450*7c478bd9Sstevel@tonic-gate disp_dq_assign(&mem_info, npri); 451*7c478bd9Sstevel@tonic-gate disp_dq_free(&mem_info); 452*7c478bd9Sstevel@tonic-gate } 453*7c478bd9Sstevel@tonic-gate } 454*7c478bd9Sstevel@tonic-gate 455*7c478bd9Sstevel@tonic-gate /* 456*7c478bd9Sstevel@tonic-gate * Free dispatch queue. 457*7c478bd9Sstevel@tonic-gate * Used for the kpreempt queues for a removed CPU partition and 458*7c478bd9Sstevel@tonic-gate * for the per-CPU queues of deleted CPUs. 459*7c478bd9Sstevel@tonic-gate */ 460*7c478bd9Sstevel@tonic-gate void 461*7c478bd9Sstevel@tonic-gate disp_kp_free(disp_t *dq) 462*7c478bd9Sstevel@tonic-gate { 463*7c478bd9Sstevel@tonic-gate struct disp_queue_info mem_info; 464*7c478bd9Sstevel@tonic-gate 465*7c478bd9Sstevel@tonic-gate mem_info.olddispq = dq->disp_q; 466*7c478bd9Sstevel@tonic-gate mem_info.olddqactmap = dq->disp_qactmap; 467*7c478bd9Sstevel@tonic-gate mem_info.oldnglobpris = dq->disp_npri; 468*7c478bd9Sstevel@tonic-gate disp_dq_free(&mem_info); 469*7c478bd9Sstevel@tonic-gate } 470*7c478bd9Sstevel@tonic-gate 471*7c478bd9Sstevel@tonic-gate /* 472*7c478bd9Sstevel@tonic-gate * End dispatcher and scheduler initialization. 473*7c478bd9Sstevel@tonic-gate */ 474*7c478bd9Sstevel@tonic-gate 475*7c478bd9Sstevel@tonic-gate /* 476*7c478bd9Sstevel@tonic-gate * See if there's anything to do other than remain idle. 477*7c478bd9Sstevel@tonic-gate * Return non-zero if there is. 478*7c478bd9Sstevel@tonic-gate * 479*7c478bd9Sstevel@tonic-gate * This function must be called with high spl, or with 480*7c478bd9Sstevel@tonic-gate * kernel preemption disabled to prevent the partition's 481*7c478bd9Sstevel@tonic-gate * active cpu list from changing while being traversed. 482*7c478bd9Sstevel@tonic-gate * 483*7c478bd9Sstevel@tonic-gate */ 484*7c478bd9Sstevel@tonic-gate int 485*7c478bd9Sstevel@tonic-gate disp_anywork(void) 486*7c478bd9Sstevel@tonic-gate { 487*7c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 488*7c478bd9Sstevel@tonic-gate cpu_t *ocp; 489*7c478bd9Sstevel@tonic-gate 490*7c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable != 0) 491*7c478bd9Sstevel@tonic-gate return (1); 492*7c478bd9Sstevel@tonic-gate 493*7c478bd9Sstevel@tonic-gate if (!(cp->cpu_flags & CPU_OFFLINE)) { 494*7c478bd9Sstevel@tonic-gate if (CP_MAXRUNPRI(cp->cpu_part) >= 0) 495*7c478bd9Sstevel@tonic-gate return (1); 496*7c478bd9Sstevel@tonic-gate 497*7c478bd9Sstevel@tonic-gate /* 498*7c478bd9Sstevel@tonic-gate * Work can be taken from another CPU if: 499*7c478bd9Sstevel@tonic-gate * - There is unbound work on the run queue 500*7c478bd9Sstevel@tonic-gate * - That work isn't a thread undergoing a 501*7c478bd9Sstevel@tonic-gate * - context switch on an otherwise empty queue. 502*7c478bd9Sstevel@tonic-gate * - The CPU isn't running the idle loop. 503*7c478bd9Sstevel@tonic-gate */ 504*7c478bd9Sstevel@tonic-gate for (ocp = cp->cpu_next_part; ocp != cp; 505*7c478bd9Sstevel@tonic-gate ocp = ocp->cpu_next_part) { 506*7c478bd9Sstevel@tonic-gate ASSERT(CPU_ACTIVE(ocp)); 507*7c478bd9Sstevel@tonic-gate 508*7c478bd9Sstevel@tonic-gate if (ocp->cpu_disp->disp_max_unbound_pri != -1 && 509*7c478bd9Sstevel@tonic-gate !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 510*7c478bd9Sstevel@tonic-gate ocp->cpu_disp->disp_nrunnable == 1) && 511*7c478bd9Sstevel@tonic-gate ocp->cpu_dispatch_pri != -1) 512*7c478bd9Sstevel@tonic-gate return (1); 513*7c478bd9Sstevel@tonic-gate } 514*7c478bd9Sstevel@tonic-gate } 515*7c478bd9Sstevel@tonic-gate return (0); 516*7c478bd9Sstevel@tonic-gate } 517*7c478bd9Sstevel@tonic-gate 518*7c478bd9Sstevel@tonic-gate /* 519*7c478bd9Sstevel@tonic-gate * Called when CPU enters the idle loop 520*7c478bd9Sstevel@tonic-gate */ 521*7c478bd9Sstevel@tonic-gate static void 522*7c478bd9Sstevel@tonic-gate idle_enter() 523*7c478bd9Sstevel@tonic-gate { 524*7c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 525*7c478bd9Sstevel@tonic-gate 526*7c478bd9Sstevel@tonic-gate new_cpu_mstate(cp, CMS_IDLE); 527*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, idlethread, 1); 528*7c478bd9Sstevel@tonic-gate set_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 529*7c478bd9Sstevel@tonic-gate } 530*7c478bd9Sstevel@tonic-gate 531*7c478bd9Sstevel@tonic-gate /* 532*7c478bd9Sstevel@tonic-gate * Called when CPU exits the idle loop 533*7c478bd9Sstevel@tonic-gate */ 534*7c478bd9Sstevel@tonic-gate static void 535*7c478bd9Sstevel@tonic-gate idle_exit() 536*7c478bd9Sstevel@tonic-gate { 537*7c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 538*7c478bd9Sstevel@tonic-gate 539*7c478bd9Sstevel@tonic-gate new_cpu_mstate(cp, CMS_SYSTEM); 540*7c478bd9Sstevel@tonic-gate unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 541*7c478bd9Sstevel@tonic-gate } 542*7c478bd9Sstevel@tonic-gate 543*7c478bd9Sstevel@tonic-gate /* 544*7c478bd9Sstevel@tonic-gate * Idle loop. 545*7c478bd9Sstevel@tonic-gate */ 546*7c478bd9Sstevel@tonic-gate void 547*7c478bd9Sstevel@tonic-gate idle() 548*7c478bd9Sstevel@tonic-gate { 549*7c478bd9Sstevel@tonic-gate struct cpu *cp = CPU; /* pointer to this CPU */ 550*7c478bd9Sstevel@tonic-gate kthread_t *t; /* taken thread */ 551*7c478bd9Sstevel@tonic-gate 552*7c478bd9Sstevel@tonic-gate idle_enter(); 553*7c478bd9Sstevel@tonic-gate 554*7c478bd9Sstevel@tonic-gate /* 555*7c478bd9Sstevel@tonic-gate * Uniprocessor version of idle loop. 556*7c478bd9Sstevel@tonic-gate * Do this until notified that we're on an actual multiprocessor. 557*7c478bd9Sstevel@tonic-gate */ 558*7c478bd9Sstevel@tonic-gate while (ncpus == 1) { 559*7c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable == 0) { 560*7c478bd9Sstevel@tonic-gate (*idle_cpu)(); 561*7c478bd9Sstevel@tonic-gate continue; 562*7c478bd9Sstevel@tonic-gate } 563*7c478bd9Sstevel@tonic-gate idle_exit(); 564*7c478bd9Sstevel@tonic-gate swtch(); 565*7c478bd9Sstevel@tonic-gate 566*7c478bd9Sstevel@tonic-gate idle_enter(); /* returned from swtch */ 567*7c478bd9Sstevel@tonic-gate } 568*7c478bd9Sstevel@tonic-gate 569*7c478bd9Sstevel@tonic-gate /* 570*7c478bd9Sstevel@tonic-gate * Multiprocessor idle loop. 571*7c478bd9Sstevel@tonic-gate */ 572*7c478bd9Sstevel@tonic-gate for (;;) { 573*7c478bd9Sstevel@tonic-gate /* 574*7c478bd9Sstevel@tonic-gate * If CPU is completely quiesced by p_online(2), just wait 575*7c478bd9Sstevel@tonic-gate * here with minimal bus traffic until put online. 576*7c478bd9Sstevel@tonic-gate */ 577*7c478bd9Sstevel@tonic-gate while (cp->cpu_flags & CPU_QUIESCED) 578*7c478bd9Sstevel@tonic-gate (*idle_cpu)(); 579*7c478bd9Sstevel@tonic-gate 580*7c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable != 0) { 581*7c478bd9Sstevel@tonic-gate idle_exit(); 582*7c478bd9Sstevel@tonic-gate swtch(); 583*7c478bd9Sstevel@tonic-gate } else { 584*7c478bd9Sstevel@tonic-gate if (cp->cpu_flags & CPU_OFFLINE) 585*7c478bd9Sstevel@tonic-gate continue; 586*7c478bd9Sstevel@tonic-gate if ((t = disp_getwork(cp)) == NULL) { 587*7c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level != -1) { 588*7c478bd9Sstevel@tonic-gate disp_t *dp = cp->cpu_disp; 589*7c478bd9Sstevel@tonic-gate disp_t *kpq; 590*7c478bd9Sstevel@tonic-gate 591*7c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 592*7c478bd9Sstevel@tonic-gate /* 593*7c478bd9Sstevel@tonic-gate * Set kpq under lock to prevent 594*7c478bd9Sstevel@tonic-gate * migration between partitions. 595*7c478bd9Sstevel@tonic-gate */ 596*7c478bd9Sstevel@tonic-gate kpq = &cp->cpu_part->cp_kp_queue; 597*7c478bd9Sstevel@tonic-gate if (kpq->disp_maxrunpri == -1) 598*7c478bd9Sstevel@tonic-gate cp->cpu_chosen_level = -1; 599*7c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 600*7c478bd9Sstevel@tonic-gate } 601*7c478bd9Sstevel@tonic-gate (*idle_cpu)(); 602*7c478bd9Sstevel@tonic-gate continue; 603*7c478bd9Sstevel@tonic-gate } 604*7c478bd9Sstevel@tonic-gate idle_exit(); 605*7c478bd9Sstevel@tonic-gate restore_mstate(t); 606*7c478bd9Sstevel@tonic-gate swtch_to(t); 607*7c478bd9Sstevel@tonic-gate } 608*7c478bd9Sstevel@tonic-gate idle_enter(); /* returned from swtch/swtch_to */ 609*7c478bd9Sstevel@tonic-gate } 610*7c478bd9Sstevel@tonic-gate } 611*7c478bd9Sstevel@tonic-gate 612*7c478bd9Sstevel@tonic-gate 613*7c478bd9Sstevel@tonic-gate /* 614*7c478bd9Sstevel@tonic-gate * Preempt the currently running thread in favor of the highest 615*7c478bd9Sstevel@tonic-gate * priority thread. The class of the current thread controls 616*7c478bd9Sstevel@tonic-gate * where it goes on the dispatcher queues. If panicking, turn 617*7c478bd9Sstevel@tonic-gate * preemption off. 618*7c478bd9Sstevel@tonic-gate */ 619*7c478bd9Sstevel@tonic-gate void 620*7c478bd9Sstevel@tonic-gate preempt() 621*7c478bd9Sstevel@tonic-gate { 622*7c478bd9Sstevel@tonic-gate kthread_t *t = curthread; 623*7c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 624*7c478bd9Sstevel@tonic-gate 625*7c478bd9Sstevel@tonic-gate if (panicstr) 626*7c478bd9Sstevel@tonic-gate return; 627*7c478bd9Sstevel@tonic-gate 628*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start"); 629*7c478bd9Sstevel@tonic-gate 630*7c478bd9Sstevel@tonic-gate thread_lock(t); 631*7c478bd9Sstevel@tonic-gate 632*7c478bd9Sstevel@tonic-gate if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) { 633*7c478bd9Sstevel@tonic-gate /* 634*7c478bd9Sstevel@tonic-gate * this thread has already been chosen to be run on 635*7c478bd9Sstevel@tonic-gate * another CPU. Clear kprunrun on this CPU since we're 636*7c478bd9Sstevel@tonic-gate * already headed for swtch(). 637*7c478bd9Sstevel@tonic-gate */ 638*7c478bd9Sstevel@tonic-gate CPU->cpu_kprunrun = 0; 639*7c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 640*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 641*7c478bd9Sstevel@tonic-gate } else { 642*7c478bd9Sstevel@tonic-gate if (lwp != NULL) 643*7c478bd9Sstevel@tonic-gate lwp->lwp_ru.nivcsw++; 644*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1); 645*7c478bd9Sstevel@tonic-gate THREAD_TRANSITION(t); 646*7c478bd9Sstevel@tonic-gate CL_PREEMPT(t); 647*7c478bd9Sstevel@tonic-gate DTRACE_SCHED(preempt); 648*7c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 649*7c478bd9Sstevel@tonic-gate 650*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 651*7c478bd9Sstevel@tonic-gate 652*7c478bd9Sstevel@tonic-gate swtch(); /* clears CPU->cpu_runrun via disp() */ 653*7c478bd9Sstevel@tonic-gate } 654*7c478bd9Sstevel@tonic-gate } 655*7c478bd9Sstevel@tonic-gate 656*7c478bd9Sstevel@tonic-gate extern kthread_t *thread_unpin(); 657*7c478bd9Sstevel@tonic-gate 658*7c478bd9Sstevel@tonic-gate /* 659*7c478bd9Sstevel@tonic-gate * disp() - find the highest priority thread for this processor to run, and 660*7c478bd9Sstevel@tonic-gate * set it in TS_ONPROC state so that resume() can be called to run it. 661*7c478bd9Sstevel@tonic-gate */ 662*7c478bd9Sstevel@tonic-gate static kthread_t * 663*7c478bd9Sstevel@tonic-gate disp() 664*7c478bd9Sstevel@tonic-gate { 665*7c478bd9Sstevel@tonic-gate cpu_t *cpup; 666*7c478bd9Sstevel@tonic-gate disp_t *dp; 667*7c478bd9Sstevel@tonic-gate kthread_t *tp; 668*7c478bd9Sstevel@tonic-gate dispq_t *dq; 669*7c478bd9Sstevel@tonic-gate int maxrunword; 670*7c478bd9Sstevel@tonic-gate pri_t pri; 671*7c478bd9Sstevel@tonic-gate disp_t *kpq; 672*7c478bd9Sstevel@tonic-gate 673*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start"); 674*7c478bd9Sstevel@tonic-gate 675*7c478bd9Sstevel@tonic-gate cpup = CPU; 676*7c478bd9Sstevel@tonic-gate /* 677*7c478bd9Sstevel@tonic-gate * Find the highest priority loaded, runnable thread. 678*7c478bd9Sstevel@tonic-gate */ 679*7c478bd9Sstevel@tonic-gate dp = cpup->cpu_disp; 680*7c478bd9Sstevel@tonic-gate 681*7c478bd9Sstevel@tonic-gate reschedule: 682*7c478bd9Sstevel@tonic-gate /* 683*7c478bd9Sstevel@tonic-gate * If there is more important work on the global queue with a better 684*7c478bd9Sstevel@tonic-gate * priority than the maximum on this CPU, take it now. 685*7c478bd9Sstevel@tonic-gate */ 686*7c478bd9Sstevel@tonic-gate kpq = &cpup->cpu_part->cp_kp_queue; 687*7c478bd9Sstevel@tonic-gate while ((pri = kpq->disp_maxrunpri) >= 0 && 688*7c478bd9Sstevel@tonic-gate pri >= dp->disp_maxrunpri && 689*7c478bd9Sstevel@tonic-gate (cpup->cpu_flags & CPU_OFFLINE) == 0 && 690*7c478bd9Sstevel@tonic-gate (tp = disp_getbest(kpq)) != NULL) { 691*7c478bd9Sstevel@tonic-gate if (disp_ratify(tp, kpq) != NULL) { 692*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 693*7c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 694*7c478bd9Sstevel@tonic-gate restore_mstate(tp); 695*7c478bd9Sstevel@tonic-gate return (tp); 696*7c478bd9Sstevel@tonic-gate } 697*7c478bd9Sstevel@tonic-gate } 698*7c478bd9Sstevel@tonic-gate 699*7c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 700*7c478bd9Sstevel@tonic-gate pri = dp->disp_maxrunpri; 701*7c478bd9Sstevel@tonic-gate 702*7c478bd9Sstevel@tonic-gate /* 703*7c478bd9Sstevel@tonic-gate * If there is nothing to run, look at what's runnable on other queues. 704*7c478bd9Sstevel@tonic-gate * Choose the idle thread if the CPU is quiesced. 705*7c478bd9Sstevel@tonic-gate * Note that CPUs that have the CPU_OFFLINE flag set can still run 706*7c478bd9Sstevel@tonic-gate * interrupt threads, which will be the only threads on the CPU's own 707*7c478bd9Sstevel@tonic-gate * queue, but cannot run threads from other queues. 708*7c478bd9Sstevel@tonic-gate */ 709*7c478bd9Sstevel@tonic-gate if (pri == -1) { 710*7c478bd9Sstevel@tonic-gate if (!(cpup->cpu_flags & CPU_OFFLINE)) { 711*7c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 712*7c478bd9Sstevel@tonic-gate if ((tp = disp_getwork(cpup)) == NULL) { 713*7c478bd9Sstevel@tonic-gate tp = cpup->cpu_idle_thread; 714*7c478bd9Sstevel@tonic-gate (void) splhigh(); 715*7c478bd9Sstevel@tonic-gate THREAD_ONPROC(tp, cpup); 716*7c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; 717*7c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = -1; 718*7c478bd9Sstevel@tonic-gate cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 719*7c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 720*7c478bd9Sstevel@tonic-gate } 721*7c478bd9Sstevel@tonic-gate } else { 722*7c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 723*7c478bd9Sstevel@tonic-gate tp = cpup->cpu_idle_thread; 724*7c478bd9Sstevel@tonic-gate THREAD_ONPROC(tp, cpup); 725*7c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; 726*7c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = -1; 727*7c478bd9Sstevel@tonic-gate cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 728*7c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 729*7c478bd9Sstevel@tonic-gate } 730*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 731*7c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 732*7c478bd9Sstevel@tonic-gate restore_mstate(tp); 733*7c478bd9Sstevel@tonic-gate return (tp); 734*7c478bd9Sstevel@tonic-gate } 735*7c478bd9Sstevel@tonic-gate 736*7c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 737*7c478bd9Sstevel@tonic-gate tp = dq->dq_first; 738*7c478bd9Sstevel@tonic-gate 739*7c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 740*7c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */ 741*7c478bd9Sstevel@tonic-gate 742*7c478bd9Sstevel@tonic-gate DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 743*7c478bd9Sstevel@tonic-gate 744*7c478bd9Sstevel@tonic-gate /* 745*7c478bd9Sstevel@tonic-gate * Found it so remove it from queue. 746*7c478bd9Sstevel@tonic-gate */ 747*7c478bd9Sstevel@tonic-gate dp->disp_nrunnable--; 748*7c478bd9Sstevel@tonic-gate dq->dq_sruncnt--; 749*7c478bd9Sstevel@tonic-gate if ((dq->dq_first = tp->t_link) == NULL) { 750*7c478bd9Sstevel@tonic-gate ulong_t *dqactmap = dp->disp_qactmap; 751*7c478bd9Sstevel@tonic-gate 752*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_sruncnt == 0); 753*7c478bd9Sstevel@tonic-gate dq->dq_last = NULL; 754*7c478bd9Sstevel@tonic-gate 755*7c478bd9Sstevel@tonic-gate /* 756*7c478bd9Sstevel@tonic-gate * The queue is empty, so the corresponding bit needs to be 757*7c478bd9Sstevel@tonic-gate * turned off in dqactmap. If nrunnable != 0 just took the 758*7c478bd9Sstevel@tonic-gate * last runnable thread off the 759*7c478bd9Sstevel@tonic-gate * highest queue, so recompute disp_maxrunpri. 760*7c478bd9Sstevel@tonic-gate */ 761*7c478bd9Sstevel@tonic-gate maxrunword = pri >> BT_ULSHIFT; 762*7c478bd9Sstevel@tonic-gate dqactmap[maxrunword] &= ~BT_BIW(pri); 763*7c478bd9Sstevel@tonic-gate 764*7c478bd9Sstevel@tonic-gate if (dp->disp_nrunnable == 0) { 765*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 766*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 767*7c478bd9Sstevel@tonic-gate } else { 768*7c478bd9Sstevel@tonic-gate int ipri; 769*7c478bd9Sstevel@tonic-gate 770*7c478bd9Sstevel@tonic-gate ipri = bt_gethighbit(dqactmap, maxrunword); 771*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = ipri; 772*7c478bd9Sstevel@tonic-gate if (ipri < dp->disp_max_unbound_pri) 773*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = ipri; 774*7c478bd9Sstevel@tonic-gate } 775*7c478bd9Sstevel@tonic-gate } else { 776*7c478bd9Sstevel@tonic-gate tp->t_link = NULL; 777*7c478bd9Sstevel@tonic-gate } 778*7c478bd9Sstevel@tonic-gate 779*7c478bd9Sstevel@tonic-gate /* 780*7c478bd9Sstevel@tonic-gate * Set TS_DONT_SWAP flag to prevent another processor from swapping 781*7c478bd9Sstevel@tonic-gate * out this thread before we have a chance to run it. 782*7c478bd9Sstevel@tonic-gate * While running, it is protected against swapping by t_lock. 783*7c478bd9Sstevel@tonic-gate */ 784*7c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_DONT_SWAP; 785*7c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; /* protected by spl only */ 786*7c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = pri; 787*7c478bd9Sstevel@tonic-gate ASSERT(pri == DISP_PRIO(tp)); 788*7c478bd9Sstevel@tonic-gate thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */ 789*7c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */ 790*7c478bd9Sstevel@tonic-gate 791*7c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 792*7c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 793*7c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 794*7c478bd9Sstevel@tonic-gate 795*7c478bd9Sstevel@tonic-gate if (disp_ratify(tp, kpq) == NULL) 796*7c478bd9Sstevel@tonic-gate goto reschedule; 797*7c478bd9Sstevel@tonic-gate 798*7c478bd9Sstevel@tonic-gate restore_mstate(tp); 799*7c478bd9Sstevel@tonic-gate return (tp); 800*7c478bd9Sstevel@tonic-gate } 801*7c478bd9Sstevel@tonic-gate 802*7c478bd9Sstevel@tonic-gate /* 803*7c478bd9Sstevel@tonic-gate * swtch() 804*7c478bd9Sstevel@tonic-gate * Find best runnable thread and run it. 805*7c478bd9Sstevel@tonic-gate * Called with the current thread already switched to a new state, 806*7c478bd9Sstevel@tonic-gate * on a sleep queue, run queue, stopped, and not zombied. 807*7c478bd9Sstevel@tonic-gate * May be called at any spl level less than or equal to LOCK_LEVEL. 808*7c478bd9Sstevel@tonic-gate * Always drops spl to the base level (spl0()). 809*7c478bd9Sstevel@tonic-gate */ 810*7c478bd9Sstevel@tonic-gate void 811*7c478bd9Sstevel@tonic-gate swtch() 812*7c478bd9Sstevel@tonic-gate { 813*7c478bd9Sstevel@tonic-gate kthread_t *t = curthread; 814*7c478bd9Sstevel@tonic-gate kthread_t *next; 815*7c478bd9Sstevel@tonic-gate cpu_t *cp; 816*7c478bd9Sstevel@tonic-gate 817*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 818*7c478bd9Sstevel@tonic-gate 819*7c478bd9Sstevel@tonic-gate if (t->t_flag & T_INTR_THREAD) 820*7c478bd9Sstevel@tonic-gate cpu_intr_swtch_enter(t); 821*7c478bd9Sstevel@tonic-gate 822*7c478bd9Sstevel@tonic-gate if (t->t_intr != NULL) { 823*7c478bd9Sstevel@tonic-gate /* 824*7c478bd9Sstevel@tonic-gate * We are an interrupt thread. Setup and return 825*7c478bd9Sstevel@tonic-gate * the interrupted thread to be resumed. 826*7c478bd9Sstevel@tonic-gate */ 827*7c478bd9Sstevel@tonic-gate (void) splhigh(); /* block other scheduler action */ 828*7c478bd9Sstevel@tonic-gate cp = CPU; /* now protected against migration */ 829*7c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 830*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 831*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, intrblk, 1); 832*7c478bd9Sstevel@tonic-gate next = thread_unpin(); 833*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 834*7c478bd9Sstevel@tonic-gate resume_from_intr(next); 835*7c478bd9Sstevel@tonic-gate } else { 836*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 837*7c478bd9Sstevel@tonic-gate if (t->t_state == TS_ONPROC && 838*7c478bd9Sstevel@tonic-gate t->t_disp_queue->disp_cpu == CPU && 839*7c478bd9Sstevel@tonic-gate t->t_preempt == 0) { 840*7c478bd9Sstevel@tonic-gate thread_lock(t); 841*7c478bd9Sstevel@tonic-gate ASSERT(t->t_state != TS_ONPROC || 842*7c478bd9Sstevel@tonic-gate t->t_disp_queue->disp_cpu != CPU || 843*7c478bd9Sstevel@tonic-gate t->t_preempt != 0); /* cannot migrate */ 844*7c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 845*7c478bd9Sstevel@tonic-gate } 846*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 847*7c478bd9Sstevel@tonic-gate cp = CPU; 848*7c478bd9Sstevel@tonic-gate next = disp(); /* returns with spl high */ 849*7c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 850*7c478bd9Sstevel@tonic-gate 851*7c478bd9Sstevel@tonic-gate /* OK to steal anything left on run queue */ 852*7c478bd9Sstevel@tonic-gate cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 853*7c478bd9Sstevel@tonic-gate 854*7c478bd9Sstevel@tonic-gate if (next != t) { 855*7c478bd9Sstevel@tonic-gate if (t == cp->cpu_idle_thread) { 856*7c478bd9Sstevel@tonic-gate CHIP_NRUNNING(cp->cpu_chip, 1); 857*7c478bd9Sstevel@tonic-gate } else if (next == cp->cpu_idle_thread) { 858*7c478bd9Sstevel@tonic-gate CHIP_NRUNNING(cp->cpu_chip, -1); 859*7c478bd9Sstevel@tonic-gate } 860*7c478bd9Sstevel@tonic-gate 861*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 862*7c478bd9Sstevel@tonic-gate cp->cpu_last_swtch = t->t_disp_time = lbolt; 863*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 864*7c478bd9Sstevel@tonic-gate 865*7c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 866*7c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 867*7c478bd9Sstevel@tonic-gate 868*7c478bd9Sstevel@tonic-gate resume(next); 869*7c478bd9Sstevel@tonic-gate /* 870*7c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 871*7c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we may not 872*7c478bd9Sstevel@tonic-gate * return here 873*7c478bd9Sstevel@tonic-gate */ 874*7c478bd9Sstevel@tonic-gate } else { 875*7c478bd9Sstevel@tonic-gate if (t->t_flag & T_INTR_THREAD) 876*7c478bd9Sstevel@tonic-gate cpu_intr_swtch_exit(t); 877*7c478bd9Sstevel@tonic-gate 878*7c478bd9Sstevel@tonic-gate DTRACE_SCHED(remain__cpu); 879*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end"); 880*7c478bd9Sstevel@tonic-gate (void) spl0(); 881*7c478bd9Sstevel@tonic-gate } 882*7c478bd9Sstevel@tonic-gate } 883*7c478bd9Sstevel@tonic-gate } 884*7c478bd9Sstevel@tonic-gate 885*7c478bd9Sstevel@tonic-gate /* 886*7c478bd9Sstevel@tonic-gate * swtch_from_zombie() 887*7c478bd9Sstevel@tonic-gate * Special case of swtch(), which allows checks for TS_ZOMB to be 888*7c478bd9Sstevel@tonic-gate * eliminated from normal resume. 889*7c478bd9Sstevel@tonic-gate * Find best runnable thread and run it. 890*7c478bd9Sstevel@tonic-gate * Called with the current thread zombied. 891*7c478bd9Sstevel@tonic-gate * Zombies cannot migrate, so CPU references are safe. 892*7c478bd9Sstevel@tonic-gate */ 893*7c478bd9Sstevel@tonic-gate void 894*7c478bd9Sstevel@tonic-gate swtch_from_zombie() 895*7c478bd9Sstevel@tonic-gate { 896*7c478bd9Sstevel@tonic-gate kthread_t *next; 897*7c478bd9Sstevel@tonic-gate cpu_t *cpu = CPU; 898*7c478bd9Sstevel@tonic-gate 899*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 900*7c478bd9Sstevel@tonic-gate 901*7c478bd9Sstevel@tonic-gate ASSERT(curthread->t_state == TS_ZOMB); 902*7c478bd9Sstevel@tonic-gate 903*7c478bd9Sstevel@tonic-gate next = disp(); /* returns with spl high */ 904*7c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */ 905*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, pswitch, 1); 906*7c478bd9Sstevel@tonic-gate ASSERT(next != curthread); 907*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 908*7c478bd9Sstevel@tonic-gate 909*7c478bd9Sstevel@tonic-gate if (next == cpu->cpu_idle_thread) 910*7c478bd9Sstevel@tonic-gate CHIP_NRUNNING(cpu->cpu_chip, -1); 911*7c478bd9Sstevel@tonic-gate 912*7c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 913*7c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 914*7c478bd9Sstevel@tonic-gate 915*7c478bd9Sstevel@tonic-gate resume_from_zombie(next); 916*7c478bd9Sstevel@tonic-gate /* 917*7c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 918*7c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we certainly will not 919*7c478bd9Sstevel@tonic-gate * return here 920*7c478bd9Sstevel@tonic-gate */ 921*7c478bd9Sstevel@tonic-gate } 922*7c478bd9Sstevel@tonic-gate 923*7c478bd9Sstevel@tonic-gate #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint)) 924*7c478bd9Sstevel@tonic-gate static int 925*7c478bd9Sstevel@tonic-gate thread_on_queue(kthread_t *tp) 926*7c478bd9Sstevel@tonic-gate { 927*7c478bd9Sstevel@tonic-gate cpu_t *cp; 928*7c478bd9Sstevel@tonic-gate cpu_t *self; 929*7c478bd9Sstevel@tonic-gate disp_t *dp; 930*7c478bd9Sstevel@tonic-gate 931*7c478bd9Sstevel@tonic-gate self = CPU; 932*7c478bd9Sstevel@tonic-gate cp = self->cpu_next_onln; 933*7c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 934*7c478bd9Sstevel@tonic-gate for (;;) { 935*7c478bd9Sstevel@tonic-gate dispq_t *dq; 936*7c478bd9Sstevel@tonic-gate dispq_t *eq; 937*7c478bd9Sstevel@tonic-gate 938*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 939*7c478bd9Sstevel@tonic-gate for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) { 940*7c478bd9Sstevel@tonic-gate kthread_t *rp; 941*7c478bd9Sstevel@tonic-gate 942*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL || 943*7c478bd9Sstevel@tonic-gate dq->dq_last->t_link == NULL); 944*7c478bd9Sstevel@tonic-gate for (rp = dq->dq_first; rp; rp = rp->t_link) 945*7c478bd9Sstevel@tonic-gate if (tp == rp) { 946*7c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 947*7c478bd9Sstevel@tonic-gate return (1); 948*7c478bd9Sstevel@tonic-gate } 949*7c478bd9Sstevel@tonic-gate } 950*7c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 951*7c478bd9Sstevel@tonic-gate if (cp == NULL) 952*7c478bd9Sstevel@tonic-gate break; 953*7c478bd9Sstevel@tonic-gate if (cp == self) { 954*7c478bd9Sstevel@tonic-gate cp = NULL; 955*7c478bd9Sstevel@tonic-gate dp = &cp->cpu_part->cp_kp_queue; 956*7c478bd9Sstevel@tonic-gate } else { 957*7c478bd9Sstevel@tonic-gate cp = cp->cpu_next_onln; 958*7c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 959*7c478bd9Sstevel@tonic-gate } 960*7c478bd9Sstevel@tonic-gate } 961*7c478bd9Sstevel@tonic-gate return (0); 962*7c478bd9Sstevel@tonic-gate } /* end of thread_on_queue */ 963*7c478bd9Sstevel@tonic-gate #else 964*7c478bd9Sstevel@tonic-gate 965*7c478bd9Sstevel@tonic-gate #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */ 966*7c478bd9Sstevel@tonic-gate 967*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 968*7c478bd9Sstevel@tonic-gate 969*7c478bd9Sstevel@tonic-gate /* 970*7c478bd9Sstevel@tonic-gate * like swtch(), but switch to a specified thread taken from another CPU. 971*7c478bd9Sstevel@tonic-gate * called with spl high.. 972*7c478bd9Sstevel@tonic-gate */ 973*7c478bd9Sstevel@tonic-gate void 974*7c478bd9Sstevel@tonic-gate swtch_to(kthread_t *next) 975*7c478bd9Sstevel@tonic-gate { 976*7c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 977*7c478bd9Sstevel@tonic-gate 978*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 979*7c478bd9Sstevel@tonic-gate 980*7c478bd9Sstevel@tonic-gate /* 981*7c478bd9Sstevel@tonic-gate * Update context switch statistics. 982*7c478bd9Sstevel@tonic-gate */ 983*7c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 984*7c478bd9Sstevel@tonic-gate 985*7c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 986*7c478bd9Sstevel@tonic-gate 987*7c478bd9Sstevel@tonic-gate if (curthread == cp->cpu_idle_thread) 988*7c478bd9Sstevel@tonic-gate CHIP_NRUNNING(cp->cpu_chip, 1); 989*7c478bd9Sstevel@tonic-gate 990*7c478bd9Sstevel@tonic-gate /* OK to steal anything left on run queue */ 991*7c478bd9Sstevel@tonic-gate cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 992*7c478bd9Sstevel@tonic-gate 993*7c478bd9Sstevel@tonic-gate /* record last execution time */ 994*7c478bd9Sstevel@tonic-gate cp->cpu_last_swtch = curthread->t_disp_time = lbolt; 995*7c478bd9Sstevel@tonic-gate 996*7c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 997*7c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 998*7c478bd9Sstevel@tonic-gate 999*7c478bd9Sstevel@tonic-gate resume(next); 1000*7c478bd9Sstevel@tonic-gate /* 1001*7c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 1002*7c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we may not 1003*7c478bd9Sstevel@tonic-gate * return here 1004*7c478bd9Sstevel@tonic-gate */ 1005*7c478bd9Sstevel@tonic-gate } 1006*7c478bd9Sstevel@tonic-gate 1007*7c478bd9Sstevel@tonic-gate 1008*7c478bd9Sstevel@tonic-gate 1009*7c478bd9Sstevel@tonic-gate #define CPU_IDLING(pri) ((pri) == -1) 1010*7c478bd9Sstevel@tonic-gate 1011*7c478bd9Sstevel@tonic-gate static void 1012*7c478bd9Sstevel@tonic-gate cpu_resched(cpu_t *cp, pri_t tpri) 1013*7c478bd9Sstevel@tonic-gate { 1014*7c478bd9Sstevel@tonic-gate int call_poke_cpu = 0; 1015*7c478bd9Sstevel@tonic-gate pri_t cpupri = cp->cpu_dispatch_pri; 1016*7c478bd9Sstevel@tonic-gate 1017*7c478bd9Sstevel@tonic-gate if (!CPU_IDLING(cpupri) && (cpupri < tpri)) { 1018*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, 1019*7c478bd9Sstevel@tonic-gate "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); 1020*7c478bd9Sstevel@tonic-gate if (tpri >= upreemptpri && cp->cpu_runrun == 0) { 1021*7c478bd9Sstevel@tonic-gate cp->cpu_runrun = 1; 1022*7c478bd9Sstevel@tonic-gate aston(cp->cpu_dispthread); 1023*7c478bd9Sstevel@tonic-gate if (tpri < kpreemptpri && cp != CPU) 1024*7c478bd9Sstevel@tonic-gate call_poke_cpu = 1; 1025*7c478bd9Sstevel@tonic-gate } 1026*7c478bd9Sstevel@tonic-gate if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) { 1027*7c478bd9Sstevel@tonic-gate cp->cpu_kprunrun = 1; 1028*7c478bd9Sstevel@tonic-gate if (cp != CPU) 1029*7c478bd9Sstevel@tonic-gate call_poke_cpu = 1; 1030*7c478bd9Sstevel@tonic-gate } 1031*7c478bd9Sstevel@tonic-gate } 1032*7c478bd9Sstevel@tonic-gate 1033*7c478bd9Sstevel@tonic-gate /* 1034*7c478bd9Sstevel@tonic-gate * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1035*7c478bd9Sstevel@tonic-gate */ 1036*7c478bd9Sstevel@tonic-gate membar_enter(); 1037*7c478bd9Sstevel@tonic-gate 1038*7c478bd9Sstevel@tonic-gate if (call_poke_cpu) 1039*7c478bd9Sstevel@tonic-gate poke_cpu(cp->cpu_id); 1040*7c478bd9Sstevel@tonic-gate } 1041*7c478bd9Sstevel@tonic-gate 1042*7c478bd9Sstevel@tonic-gate /* 1043*7c478bd9Sstevel@tonic-gate * Routine used by setbackdq() to balance load across the physical 1044*7c478bd9Sstevel@tonic-gate * processors. Returns a CPU of a lesser loaded chip in the lgroup 1045*7c478bd9Sstevel@tonic-gate * if balancing is necessary, or the "hint" CPU if it's not. 1046*7c478bd9Sstevel@tonic-gate * 1047*7c478bd9Sstevel@tonic-gate * - tp is the thread being enqueued 1048*7c478bd9Sstevel@tonic-gate * - cp is a hint CPU (chosen by cpu_choose()). 1049*7c478bd9Sstevel@tonic-gate * - curchip (if not NULL) is the chip on which the current thread 1050*7c478bd9Sstevel@tonic-gate * is running. 1051*7c478bd9Sstevel@tonic-gate * 1052*7c478bd9Sstevel@tonic-gate * The thread lock for "tp" must be held while calling this routine. 1053*7c478bd9Sstevel@tonic-gate */ 1054*7c478bd9Sstevel@tonic-gate static cpu_t * 1055*7c478bd9Sstevel@tonic-gate chip_balance(kthread_t *tp, cpu_t *cp, chip_t *curchip) 1056*7c478bd9Sstevel@tonic-gate { 1057*7c478bd9Sstevel@tonic-gate int chp_nrun, ochp_nrun; 1058*7c478bd9Sstevel@tonic-gate chip_t *chp, *nchp; 1059*7c478bd9Sstevel@tonic-gate 1060*7c478bd9Sstevel@tonic-gate chp = cp->cpu_chip; 1061*7c478bd9Sstevel@tonic-gate chp_nrun = chp->chip_nrunning; 1062*7c478bd9Sstevel@tonic-gate 1063*7c478bd9Sstevel@tonic-gate if (chp == curchip) 1064*7c478bd9Sstevel@tonic-gate chp_nrun--; /* Ignore curthread */ 1065*7c478bd9Sstevel@tonic-gate 1066*7c478bd9Sstevel@tonic-gate /* 1067*7c478bd9Sstevel@tonic-gate * If this chip isn't at all idle, then let 1068*7c478bd9Sstevel@tonic-gate * run queue balancing do the work. 1069*7c478bd9Sstevel@tonic-gate */ 1070*7c478bd9Sstevel@tonic-gate if (chp_nrun == chp->chip_ncpu) 1071*7c478bd9Sstevel@tonic-gate return (cp); 1072*7c478bd9Sstevel@tonic-gate 1073*7c478bd9Sstevel@tonic-gate nchp = chp->chip_balance; 1074*7c478bd9Sstevel@tonic-gate do { 1075*7c478bd9Sstevel@tonic-gate if (nchp == chp || 1076*7c478bd9Sstevel@tonic-gate !CHIP_IN_CPUPART(nchp, tp->t_cpupart)) 1077*7c478bd9Sstevel@tonic-gate continue; 1078*7c478bd9Sstevel@tonic-gate 1079*7c478bd9Sstevel@tonic-gate ochp_nrun = nchp->chip_nrunning; 1080*7c478bd9Sstevel@tonic-gate 1081*7c478bd9Sstevel@tonic-gate /* 1082*7c478bd9Sstevel@tonic-gate * If the other chip is running less threads, 1083*7c478bd9Sstevel@tonic-gate * or if it's running the same number of threads, but 1084*7c478bd9Sstevel@tonic-gate * has more online logical CPUs, then choose to balance. 1085*7c478bd9Sstevel@tonic-gate */ 1086*7c478bd9Sstevel@tonic-gate if (chp_nrun > ochp_nrun || 1087*7c478bd9Sstevel@tonic-gate (chp_nrun == ochp_nrun && 1088*7c478bd9Sstevel@tonic-gate nchp->chip_ncpu > chp->chip_ncpu)) { 1089*7c478bd9Sstevel@tonic-gate cp = nchp->chip_cpus; 1090*7c478bd9Sstevel@tonic-gate nchp->chip_cpus = cp->cpu_next_chip; 1091*7c478bd9Sstevel@tonic-gate 1092*7c478bd9Sstevel@tonic-gate /* 1093*7c478bd9Sstevel@tonic-gate * Find a CPU on the chip in the correct 1094*7c478bd9Sstevel@tonic-gate * partition. We know at least one exists 1095*7c478bd9Sstevel@tonic-gate * because of the CHIP_IN_CPUPART() check above. 1096*7c478bd9Sstevel@tonic-gate */ 1097*7c478bd9Sstevel@tonic-gate while (cp->cpu_part != tp->t_cpupart) 1098*7c478bd9Sstevel@tonic-gate cp = cp->cpu_next_chip; 1099*7c478bd9Sstevel@tonic-gate } 1100*7c478bd9Sstevel@tonic-gate chp->chip_balance = nchp->chip_next_lgrp; 1101*7c478bd9Sstevel@tonic-gate break; 1102*7c478bd9Sstevel@tonic-gate } while ((nchp = nchp->chip_next_lgrp) != chp->chip_balance); 1103*7c478bd9Sstevel@tonic-gate 1104*7c478bd9Sstevel@tonic-gate ASSERT(CHIP_IN_CPUPART(cp->cpu_chip, tp->t_cpupart)); 1105*7c478bd9Sstevel@tonic-gate return (cp); 1106*7c478bd9Sstevel@tonic-gate } 1107*7c478bd9Sstevel@tonic-gate 1108*7c478bd9Sstevel@tonic-gate /* 1109*7c478bd9Sstevel@tonic-gate * setbackdq() keeps runqs balanced such that the difference in length 1110*7c478bd9Sstevel@tonic-gate * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF. 1111*7c478bd9Sstevel@tonic-gate * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths 1112*7c478bd9Sstevel@tonic-gate * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will 1113*7c478bd9Sstevel@tonic-gate * try to keep runqs perfectly balanced regardless of the thread priority. 1114*7c478bd9Sstevel@tonic-gate */ 1115*7c478bd9Sstevel@tonic-gate #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */ 1116*7c478bd9Sstevel@tonic-gate #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */ 1117*7c478bd9Sstevel@tonic-gate #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt) 1118*7c478bd9Sstevel@tonic-gate 1119*7c478bd9Sstevel@tonic-gate /* 1120*7c478bd9Sstevel@tonic-gate * Put the specified thread on the back of the dispatcher 1121*7c478bd9Sstevel@tonic-gate * queue corresponding to its current priority. 1122*7c478bd9Sstevel@tonic-gate * 1123*7c478bd9Sstevel@tonic-gate * Called with the thread in transition, onproc or stopped state 1124*7c478bd9Sstevel@tonic-gate * and locked (transition implies locked) and at high spl. 1125*7c478bd9Sstevel@tonic-gate * Returns with the thread in TS_RUN state and still locked. 1126*7c478bd9Sstevel@tonic-gate */ 1127*7c478bd9Sstevel@tonic-gate void 1128*7c478bd9Sstevel@tonic-gate setbackdq(kthread_t *tp) 1129*7c478bd9Sstevel@tonic-gate { 1130*7c478bd9Sstevel@tonic-gate dispq_t *dq; 1131*7c478bd9Sstevel@tonic-gate disp_t *dp; 1132*7c478bd9Sstevel@tonic-gate chip_t *curchip = NULL; 1133*7c478bd9Sstevel@tonic-gate cpu_t *cp; 1134*7c478bd9Sstevel@tonic-gate pri_t tpri; 1135*7c478bd9Sstevel@tonic-gate int bound; 1136*7c478bd9Sstevel@tonic-gate 1137*7c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 1138*7c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1139*7c478bd9Sstevel@tonic-gate 1140*7c478bd9Sstevel@tonic-gate if (tp->t_waitrq == 0) { 1141*7c478bd9Sstevel@tonic-gate hrtime_t curtime; 1142*7c478bd9Sstevel@tonic-gate 1143*7c478bd9Sstevel@tonic-gate curtime = gethrtime_unscaled(); 1144*7c478bd9Sstevel@tonic-gate (void) cpu_update_pct(tp, curtime); 1145*7c478bd9Sstevel@tonic-gate tp->t_waitrq = curtime; 1146*7c478bd9Sstevel@tonic-gate } else { 1147*7c478bd9Sstevel@tonic-gate (void) cpu_update_pct(tp, gethrtime_unscaled()); 1148*7c478bd9Sstevel@tonic-gate } 1149*7c478bd9Sstevel@tonic-gate 1150*7c478bd9Sstevel@tonic-gate ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1151*7c478bd9Sstevel@tonic-gate 1152*7c478bd9Sstevel@tonic-gate /* 1153*7c478bd9Sstevel@tonic-gate * If thread is "swapped" or on the swap queue don't 1154*7c478bd9Sstevel@tonic-gate * queue it, but wake sched. 1155*7c478bd9Sstevel@tonic-gate */ 1156*7c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1157*7c478bd9Sstevel@tonic-gate disp_swapped_setrun(tp); 1158*7c478bd9Sstevel@tonic-gate return; 1159*7c478bd9Sstevel@tonic-gate } 1160*7c478bd9Sstevel@tonic-gate 1161*7c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 1162*7c478bd9Sstevel@tonic-gate if (tp == curthread) { 1163*7c478bd9Sstevel@tonic-gate curchip = CPU->cpu_chip; 1164*7c478bd9Sstevel@tonic-gate } 1165*7c478bd9Sstevel@tonic-gate 1166*7c478bd9Sstevel@tonic-gate if (ncpus == 1) 1167*7c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 1168*7c478bd9Sstevel@tonic-gate else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) { 1169*7c478bd9Sstevel@tonic-gate if (tpri >= kpqpri) { 1170*7c478bd9Sstevel@tonic-gate setkpdq(tp, SETKP_BACK); 1171*7c478bd9Sstevel@tonic-gate return; 1172*7c478bd9Sstevel@tonic-gate } 1173*7c478bd9Sstevel@tonic-gate /* 1174*7c478bd9Sstevel@tonic-gate * Let cpu_choose suggest a CPU. 1175*7c478bd9Sstevel@tonic-gate */ 1176*7c478bd9Sstevel@tonic-gate cp = cpu_choose(tp, tpri); 1177*7c478bd9Sstevel@tonic-gate 1178*7c478bd9Sstevel@tonic-gate if (tp->t_cpupart == cp->cpu_part) { 1179*7c478bd9Sstevel@tonic-gate int qlen; 1180*7c478bd9Sstevel@tonic-gate 1181*7c478bd9Sstevel@tonic-gate /* 1182*7c478bd9Sstevel@tonic-gate * Select another CPU if we need 1183*7c478bd9Sstevel@tonic-gate * to do some load balancing across the 1184*7c478bd9Sstevel@tonic-gate * physical processors. 1185*7c478bd9Sstevel@tonic-gate */ 1186*7c478bd9Sstevel@tonic-gate if (CHIP_SHOULD_BALANCE(cp->cpu_chip)) 1187*7c478bd9Sstevel@tonic-gate cp = chip_balance(tp, cp, curchip); 1188*7c478bd9Sstevel@tonic-gate 1189*7c478bd9Sstevel@tonic-gate /* 1190*7c478bd9Sstevel@tonic-gate * Balance across the run queues 1191*7c478bd9Sstevel@tonic-gate */ 1192*7c478bd9Sstevel@tonic-gate qlen = RUNQ_LEN(cp, tpri); 1193*7c478bd9Sstevel@tonic-gate if (tpri >= RUNQ_MATCH_PRI && 1194*7c478bd9Sstevel@tonic-gate !(tp->t_schedflag & TS_RUNQMATCH)) 1195*7c478bd9Sstevel@tonic-gate qlen -= RUNQ_MAX_DIFF; 1196*7c478bd9Sstevel@tonic-gate if (qlen > 0) { 1197*7c478bd9Sstevel@tonic-gate cpu_t *np; 1198*7c478bd9Sstevel@tonic-gate 1199*7c478bd9Sstevel@tonic-gate if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) 1200*7c478bd9Sstevel@tonic-gate np = cp->cpu_next_part; 1201*7c478bd9Sstevel@tonic-gate else { 1202*7c478bd9Sstevel@tonic-gate if ((np = cp->cpu_next_lpl) == cp) 1203*7c478bd9Sstevel@tonic-gate np = cp->cpu_next_part; 1204*7c478bd9Sstevel@tonic-gate } 1205*7c478bd9Sstevel@tonic-gate if (RUNQ_LEN(np, tpri) < qlen) 1206*7c478bd9Sstevel@tonic-gate cp = np; 1207*7c478bd9Sstevel@tonic-gate } 1208*7c478bd9Sstevel@tonic-gate } else { 1209*7c478bd9Sstevel@tonic-gate /* 1210*7c478bd9Sstevel@tonic-gate * Migrate to a cpu in the new partition. 1211*7c478bd9Sstevel@tonic-gate */ 1212*7c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1213*7c478bd9Sstevel@tonic-gate tp->t_lpl, tp->t_pri, NULL); 1214*7c478bd9Sstevel@tonic-gate } 1215*7c478bd9Sstevel@tonic-gate bound = 0; 1216*7c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1217*7c478bd9Sstevel@tonic-gate } else { 1218*7c478bd9Sstevel@tonic-gate /* 1219*7c478bd9Sstevel@tonic-gate * It is possible that t_weakbound_cpu != t_bound_cpu (for 1220*7c478bd9Sstevel@tonic-gate * a short time until weak binding that existed when the 1221*7c478bd9Sstevel@tonic-gate * strong binding was established has dropped) so we must 1222*7c478bd9Sstevel@tonic-gate * favour weak binding over strong. 1223*7c478bd9Sstevel@tonic-gate */ 1224*7c478bd9Sstevel@tonic-gate cp = tp->t_weakbound_cpu ? 1225*7c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu : tp->t_bound_cpu; 1226*7c478bd9Sstevel@tonic-gate bound = 1; 1227*7c478bd9Sstevel@tonic-gate } 1228*7c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 1229*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 1230*7c478bd9Sstevel@tonic-gate 1231*7c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0); 1232*7c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p", 1233*7c478bd9Sstevel@tonic-gate tpri, cp, tp); 1234*7c478bd9Sstevel@tonic-gate 1235*7c478bd9Sstevel@tonic-gate #ifndef NPROBE 1236*7c478bd9Sstevel@tonic-gate /* Kernel probe */ 1237*7c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 1238*7c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 1239*7c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 1240*7c478bd9Sstevel@tonic-gate 1241*7c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1242*7c478bd9Sstevel@tonic-gate 1243*7c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1244*7c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 1245*7c478bd9Sstevel@tonic-gate tp->t_link = NULL; 1246*7c478bd9Sstevel@tonic-gate 1247*7c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 1248*7c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 1249*7c478bd9Sstevel@tonic-gate membar_enter(); 1250*7c478bd9Sstevel@tonic-gate 1251*7c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 1252*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first != NULL); 1253*7c478bd9Sstevel@tonic-gate dq->dq_last->t_link = tp; 1254*7c478bd9Sstevel@tonic-gate dq->dq_last = tp; 1255*7c478bd9Sstevel@tonic-gate } else { 1256*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 1257*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 1258*7c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 1259*7c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 1260*7c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 1261*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 1262*7c478bd9Sstevel@tonic-gate membar_enter(); 1263*7c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 1264*7c478bd9Sstevel@tonic-gate } 1265*7c478bd9Sstevel@tonic-gate } 1266*7c478bd9Sstevel@tonic-gate 1267*7c478bd9Sstevel@tonic-gate if (!bound && tpri > dp->disp_max_unbound_pri) { 1268*7c478bd9Sstevel@tonic-gate if (tp == curthread && dp->disp_max_unbound_pri == -1 && 1269*7c478bd9Sstevel@tonic-gate cp == CPU) { 1270*7c478bd9Sstevel@tonic-gate /* 1271*7c478bd9Sstevel@tonic-gate * If there are no other unbound threads on the 1272*7c478bd9Sstevel@tonic-gate * run queue, don't allow other CPUs to steal 1273*7c478bd9Sstevel@tonic-gate * this thread while we are in the middle of a 1274*7c478bd9Sstevel@tonic-gate * context switch. We may just switch to it 1275*7c478bd9Sstevel@tonic-gate * again right away. CPU_DISP_DONTSTEAL is cleared 1276*7c478bd9Sstevel@tonic-gate * in swtch and swtch_to. 1277*7c478bd9Sstevel@tonic-gate */ 1278*7c478bd9Sstevel@tonic-gate cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1279*7c478bd9Sstevel@tonic-gate } 1280*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 1281*7c478bd9Sstevel@tonic-gate } 1282*7c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, bound); 1283*7c478bd9Sstevel@tonic-gate } 1284*7c478bd9Sstevel@tonic-gate 1285*7c478bd9Sstevel@tonic-gate /* 1286*7c478bd9Sstevel@tonic-gate * Put the specified thread on the front of the dispatcher 1287*7c478bd9Sstevel@tonic-gate * queue corresponding to its current priority. 1288*7c478bd9Sstevel@tonic-gate * 1289*7c478bd9Sstevel@tonic-gate * Called with the thread in transition, onproc or stopped state 1290*7c478bd9Sstevel@tonic-gate * and locked (transition implies locked) and at high spl. 1291*7c478bd9Sstevel@tonic-gate * Returns with the thread in TS_RUN state and still locked. 1292*7c478bd9Sstevel@tonic-gate */ 1293*7c478bd9Sstevel@tonic-gate void 1294*7c478bd9Sstevel@tonic-gate setfrontdq(kthread_t *tp) 1295*7c478bd9Sstevel@tonic-gate { 1296*7c478bd9Sstevel@tonic-gate disp_t *dp; 1297*7c478bd9Sstevel@tonic-gate dispq_t *dq; 1298*7c478bd9Sstevel@tonic-gate cpu_t *cp; 1299*7c478bd9Sstevel@tonic-gate pri_t tpri; 1300*7c478bd9Sstevel@tonic-gate int bound; 1301*7c478bd9Sstevel@tonic-gate 1302*7c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 1303*7c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 1304*7c478bd9Sstevel@tonic-gate 1305*7c478bd9Sstevel@tonic-gate if (tp->t_waitrq == 0) { 1306*7c478bd9Sstevel@tonic-gate hrtime_t curtime; 1307*7c478bd9Sstevel@tonic-gate 1308*7c478bd9Sstevel@tonic-gate curtime = gethrtime_unscaled(); 1309*7c478bd9Sstevel@tonic-gate (void) cpu_update_pct(tp, curtime); 1310*7c478bd9Sstevel@tonic-gate tp->t_waitrq = curtime; 1311*7c478bd9Sstevel@tonic-gate } else { 1312*7c478bd9Sstevel@tonic-gate (void) cpu_update_pct(tp, gethrtime_unscaled()); 1313*7c478bd9Sstevel@tonic-gate } 1314*7c478bd9Sstevel@tonic-gate 1315*7c478bd9Sstevel@tonic-gate ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 1316*7c478bd9Sstevel@tonic-gate 1317*7c478bd9Sstevel@tonic-gate /* 1318*7c478bd9Sstevel@tonic-gate * If thread is "swapped" or on the swap queue don't 1319*7c478bd9Sstevel@tonic-gate * queue it, but wake sched. 1320*7c478bd9Sstevel@tonic-gate */ 1321*7c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 1322*7c478bd9Sstevel@tonic-gate disp_swapped_setrun(tp); 1323*7c478bd9Sstevel@tonic-gate return; 1324*7c478bd9Sstevel@tonic-gate } 1325*7c478bd9Sstevel@tonic-gate 1326*7c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 1327*7c478bd9Sstevel@tonic-gate if (ncpus == 1) 1328*7c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 1329*7c478bd9Sstevel@tonic-gate else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) { 1330*7c478bd9Sstevel@tonic-gate if (tpri >= kpqpri) { 1331*7c478bd9Sstevel@tonic-gate setkpdq(tp, SETKP_FRONT); 1332*7c478bd9Sstevel@tonic-gate return; 1333*7c478bd9Sstevel@tonic-gate } 1334*7c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 1335*7c478bd9Sstevel@tonic-gate if (tp->t_cpupart == cp->cpu_part) { 1336*7c478bd9Sstevel@tonic-gate /* 1337*7c478bd9Sstevel@tonic-gate * If we are of higher or equal priority than 1338*7c478bd9Sstevel@tonic-gate * the highest priority runnable thread of 1339*7c478bd9Sstevel@tonic-gate * the current CPU, just pick this CPU. Otherwise 1340*7c478bd9Sstevel@tonic-gate * Let cpu_choose() select the CPU. If this cpu 1341*7c478bd9Sstevel@tonic-gate * is the target of an offline request then do not 1342*7c478bd9Sstevel@tonic-gate * pick it - a thread_nomigrate() on the in motion 1343*7c478bd9Sstevel@tonic-gate * cpu relies on this when it forces a preempt. 1344*7c478bd9Sstevel@tonic-gate */ 1345*7c478bd9Sstevel@tonic-gate if (tpri < cp->cpu_disp->disp_maxrunpri || 1346*7c478bd9Sstevel@tonic-gate cp == cpu_inmotion) 1347*7c478bd9Sstevel@tonic-gate cp = cpu_choose(tp, tpri); 1348*7c478bd9Sstevel@tonic-gate } else { 1349*7c478bd9Sstevel@tonic-gate /* 1350*7c478bd9Sstevel@tonic-gate * Migrate to a cpu in the new partition. 1351*7c478bd9Sstevel@tonic-gate */ 1352*7c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 1353*7c478bd9Sstevel@tonic-gate tp->t_lpl, tp->t_pri, NULL); 1354*7c478bd9Sstevel@tonic-gate } 1355*7c478bd9Sstevel@tonic-gate bound = 0; 1356*7c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1357*7c478bd9Sstevel@tonic-gate } else { 1358*7c478bd9Sstevel@tonic-gate /* 1359*7c478bd9Sstevel@tonic-gate * It is possible that t_weakbound_cpu != t_bound_cpu (for 1360*7c478bd9Sstevel@tonic-gate * a short time until weak binding that existed when the 1361*7c478bd9Sstevel@tonic-gate * strong binding was established has dropped) so we must 1362*7c478bd9Sstevel@tonic-gate * favour weak binding over strong. 1363*7c478bd9Sstevel@tonic-gate */ 1364*7c478bd9Sstevel@tonic-gate cp = tp->t_weakbound_cpu ? 1365*7c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu : tp->t_bound_cpu; 1366*7c478bd9Sstevel@tonic-gate bound = 1; 1367*7c478bd9Sstevel@tonic-gate } 1368*7c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 1369*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 1370*7c478bd9Sstevel@tonic-gate 1371*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1372*7c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1); 1373*7c478bd9Sstevel@tonic-gate 1374*7c478bd9Sstevel@tonic-gate #ifndef NPROBE 1375*7c478bd9Sstevel@tonic-gate /* Kernel probe */ 1376*7c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 1377*7c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 1378*7c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 1379*7c478bd9Sstevel@tonic-gate 1380*7c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1381*7c478bd9Sstevel@tonic-gate 1382*7c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */ 1383*7c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 1384*7c478bd9Sstevel@tonic-gate 1385*7c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 1386*7c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 1387*7c478bd9Sstevel@tonic-gate membar_enter(); 1388*7c478bd9Sstevel@tonic-gate 1389*7c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 1390*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last != NULL); 1391*7c478bd9Sstevel@tonic-gate tp->t_link = dq->dq_first; 1392*7c478bd9Sstevel@tonic-gate dq->dq_first = tp; 1393*7c478bd9Sstevel@tonic-gate } else { 1394*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 1395*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 1396*7c478bd9Sstevel@tonic-gate tp->t_link = NULL; 1397*7c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 1398*7c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 1399*7c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 1400*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 1401*7c478bd9Sstevel@tonic-gate membar_enter(); 1402*7c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 1403*7c478bd9Sstevel@tonic-gate } 1404*7c478bd9Sstevel@tonic-gate } 1405*7c478bd9Sstevel@tonic-gate 1406*7c478bd9Sstevel@tonic-gate if (!bound && tpri > dp->disp_max_unbound_pri) { 1407*7c478bd9Sstevel@tonic-gate if (tp == curthread && dp->disp_max_unbound_pri == -1 && 1408*7c478bd9Sstevel@tonic-gate cp == CPU) { 1409*7c478bd9Sstevel@tonic-gate /* 1410*7c478bd9Sstevel@tonic-gate * If there are no other unbound threads on the 1411*7c478bd9Sstevel@tonic-gate * run queue, don't allow other CPUs to steal 1412*7c478bd9Sstevel@tonic-gate * this thread while we are in the middle of a 1413*7c478bd9Sstevel@tonic-gate * context switch. We may just switch to it 1414*7c478bd9Sstevel@tonic-gate * again right away. CPU_DISP_DONTSTEAL is cleared 1415*7c478bd9Sstevel@tonic-gate * in swtch and swtch_to. 1416*7c478bd9Sstevel@tonic-gate */ 1417*7c478bd9Sstevel@tonic-gate cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 1418*7c478bd9Sstevel@tonic-gate } 1419*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 1420*7c478bd9Sstevel@tonic-gate } 1421*7c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, bound); 1422*7c478bd9Sstevel@tonic-gate } 1423*7c478bd9Sstevel@tonic-gate 1424*7c478bd9Sstevel@tonic-gate /* 1425*7c478bd9Sstevel@tonic-gate * Put a high-priority unbound thread on the kp queue 1426*7c478bd9Sstevel@tonic-gate */ 1427*7c478bd9Sstevel@tonic-gate static void 1428*7c478bd9Sstevel@tonic-gate setkpdq(kthread_t *tp, int borf) 1429*7c478bd9Sstevel@tonic-gate { 1430*7c478bd9Sstevel@tonic-gate dispq_t *dq; 1431*7c478bd9Sstevel@tonic-gate disp_t *dp; 1432*7c478bd9Sstevel@tonic-gate cpu_t *cp; 1433*7c478bd9Sstevel@tonic-gate pri_t tpri; 1434*7c478bd9Sstevel@tonic-gate 1435*7c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 1436*7c478bd9Sstevel@tonic-gate 1437*7c478bd9Sstevel@tonic-gate dp = &tp->t_cpupart->cp_kp_queue; 1438*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 1439*7c478bd9Sstevel@tonic-gate 1440*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 1441*7c478bd9Sstevel@tonic-gate 1442*7c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 1443*7c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf); 1444*7c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 1445*7c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 1446*7c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 1447*7c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 1448*7c478bd9Sstevel@tonic-gate 1449*7c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 1450*7c478bd9Sstevel@tonic-gate if (borf == SETKP_BACK) { 1451*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first != NULL); 1452*7c478bd9Sstevel@tonic-gate tp->t_link = NULL; 1453*7c478bd9Sstevel@tonic-gate dq->dq_last->t_link = tp; 1454*7c478bd9Sstevel@tonic-gate dq->dq_last = tp; 1455*7c478bd9Sstevel@tonic-gate } else { 1456*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last != NULL); 1457*7c478bd9Sstevel@tonic-gate tp->t_link = dq->dq_first; 1458*7c478bd9Sstevel@tonic-gate dq->dq_first = tp; 1459*7c478bd9Sstevel@tonic-gate } 1460*7c478bd9Sstevel@tonic-gate } else { 1461*7c478bd9Sstevel@tonic-gate if (borf == SETKP_BACK) { 1462*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 1463*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 1464*7c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 1465*7c478bd9Sstevel@tonic-gate } else { 1466*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 1467*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 1468*7c478bd9Sstevel@tonic-gate tp->t_link = NULL; 1469*7c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 1470*7c478bd9Sstevel@tonic-gate } 1471*7c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 1472*7c478bd9Sstevel@tonic-gate if (tpri > dp->disp_max_unbound_pri) 1473*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 1474*7c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 1475*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 1476*7c478bd9Sstevel@tonic-gate membar_enter(); 1477*7c478bd9Sstevel@tonic-gate } 1478*7c478bd9Sstevel@tonic-gate } 1479*7c478bd9Sstevel@tonic-gate 1480*7c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 1481*7c478bd9Sstevel@tonic-gate if (tp->t_cpupart != cp->cpu_part) { 1482*7c478bd9Sstevel@tonic-gate /* migrate to a cpu in the new partition */ 1483*7c478bd9Sstevel@tonic-gate cp = tp->t_cpupart->cp_cpulist; 1484*7c478bd9Sstevel@tonic-gate } 1485*7c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL); 1486*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&cp->cpu_disp->disp_lock); 1487*7c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 1488*7c478bd9Sstevel@tonic-gate 1489*7c478bd9Sstevel@tonic-gate #ifndef NPROBE 1490*7c478bd9Sstevel@tonic-gate /* Kernel probe */ 1491*7c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 1492*7c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 1493*7c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 1494*7c478bd9Sstevel@tonic-gate 1495*7c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level < tpri) 1496*7c478bd9Sstevel@tonic-gate cp->cpu_chosen_level = tpri; 1497*7c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 1498*7c478bd9Sstevel@tonic-gate disp_lock_exit_high(&cp->cpu_disp->disp_lock); 1499*7c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, 0); 1500*7c478bd9Sstevel@tonic-gate } 1501*7c478bd9Sstevel@tonic-gate 1502*7c478bd9Sstevel@tonic-gate /* 1503*7c478bd9Sstevel@tonic-gate * Remove a thread from the dispatcher queue if it is on it. 1504*7c478bd9Sstevel@tonic-gate * It is not an error if it is not found but we return whether 1505*7c478bd9Sstevel@tonic-gate * or not it was found in case the caller wants to check. 1506*7c478bd9Sstevel@tonic-gate */ 1507*7c478bd9Sstevel@tonic-gate int 1508*7c478bd9Sstevel@tonic-gate dispdeq(kthread_t *tp) 1509*7c478bd9Sstevel@tonic-gate { 1510*7c478bd9Sstevel@tonic-gate disp_t *dp; 1511*7c478bd9Sstevel@tonic-gate dispq_t *dq; 1512*7c478bd9Sstevel@tonic-gate kthread_t *rp; 1513*7c478bd9Sstevel@tonic-gate kthread_t *trp; 1514*7c478bd9Sstevel@tonic-gate kthread_t **ptp; 1515*7c478bd9Sstevel@tonic-gate int tpri; 1516*7c478bd9Sstevel@tonic-gate 1517*7c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 1518*7c478bd9Sstevel@tonic-gate 1519*7c478bd9Sstevel@tonic-gate if (tp->t_state != TS_RUN) 1520*7c478bd9Sstevel@tonic-gate return (0); 1521*7c478bd9Sstevel@tonic-gate 1522*7c478bd9Sstevel@tonic-gate /* 1523*7c478bd9Sstevel@tonic-gate * The thread is "swapped" or is on the swap queue and 1524*7c478bd9Sstevel@tonic-gate * hence no longer on the run queue, so return true. 1525*7c478bd9Sstevel@tonic-gate */ 1526*7c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) 1527*7c478bd9Sstevel@tonic-gate return (1); 1528*7c478bd9Sstevel@tonic-gate 1529*7c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 1530*7c478bd9Sstevel@tonic-gate dp = tp->t_disp_queue; 1531*7c478bd9Sstevel@tonic-gate ASSERT(tpri < dp->disp_npri); 1532*7c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 1533*7c478bd9Sstevel@tonic-gate ptp = &dq->dq_first; 1534*7c478bd9Sstevel@tonic-gate rp = *ptp; 1535*7c478bd9Sstevel@tonic-gate trp = NULL; 1536*7c478bd9Sstevel@tonic-gate 1537*7c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 1538*7c478bd9Sstevel@tonic-gate 1539*7c478bd9Sstevel@tonic-gate /* 1540*7c478bd9Sstevel@tonic-gate * Search for thread in queue. 1541*7c478bd9Sstevel@tonic-gate * Double links would simplify this at the expense of disp/setrun. 1542*7c478bd9Sstevel@tonic-gate */ 1543*7c478bd9Sstevel@tonic-gate while (rp != tp && rp != NULL) { 1544*7c478bd9Sstevel@tonic-gate trp = rp; 1545*7c478bd9Sstevel@tonic-gate ptp = &trp->t_link; 1546*7c478bd9Sstevel@tonic-gate rp = trp->t_link; 1547*7c478bd9Sstevel@tonic-gate } 1548*7c478bd9Sstevel@tonic-gate 1549*7c478bd9Sstevel@tonic-gate if (rp == NULL) { 1550*7c478bd9Sstevel@tonic-gate panic("dispdeq: thread not on queue"); 1551*7c478bd9Sstevel@tonic-gate } 1552*7c478bd9Sstevel@tonic-gate 1553*7c478bd9Sstevel@tonic-gate DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 1554*7c478bd9Sstevel@tonic-gate 1555*7c478bd9Sstevel@tonic-gate /* 1556*7c478bd9Sstevel@tonic-gate * Found it so remove it from queue. 1557*7c478bd9Sstevel@tonic-gate */ 1558*7c478bd9Sstevel@tonic-gate if ((*ptp = rp->t_link) == NULL) 1559*7c478bd9Sstevel@tonic-gate dq->dq_last = trp; 1560*7c478bd9Sstevel@tonic-gate 1561*7c478bd9Sstevel@tonic-gate dp->disp_nrunnable--; 1562*7c478bd9Sstevel@tonic-gate if (--dq->dq_sruncnt == 0) { 1563*7c478bd9Sstevel@tonic-gate dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri); 1564*7c478bd9Sstevel@tonic-gate if (dp->disp_nrunnable == 0) { 1565*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 1566*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 1567*7c478bd9Sstevel@tonic-gate } else if (tpri == dp->disp_maxrunpri) { 1568*7c478bd9Sstevel@tonic-gate int ipri; 1569*7c478bd9Sstevel@tonic-gate 1570*7c478bd9Sstevel@tonic-gate ipri = bt_gethighbit(dp->disp_qactmap, 1571*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri >> BT_ULSHIFT); 1572*7c478bd9Sstevel@tonic-gate if (ipri < dp->disp_max_unbound_pri) 1573*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = ipri; 1574*7c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = ipri; 1575*7c478bd9Sstevel@tonic-gate } 1576*7c478bd9Sstevel@tonic-gate } 1577*7c478bd9Sstevel@tonic-gate tp->t_link = NULL; 1578*7c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); /* put in intermediate state */ 1579*7c478bd9Sstevel@tonic-gate return (1); 1580*7c478bd9Sstevel@tonic-gate } 1581*7c478bd9Sstevel@tonic-gate 1582*7c478bd9Sstevel@tonic-gate 1583*7c478bd9Sstevel@tonic-gate /* 1584*7c478bd9Sstevel@tonic-gate * dq_sruninc and dq_srundec are public functions for 1585*7c478bd9Sstevel@tonic-gate * incrementing/decrementing the sruncnts when a thread on 1586*7c478bd9Sstevel@tonic-gate * a dispatcher queue is made schedulable/unschedulable by 1587*7c478bd9Sstevel@tonic-gate * resetting the TS_LOAD flag. 1588*7c478bd9Sstevel@tonic-gate * 1589*7c478bd9Sstevel@tonic-gate * The caller MUST have the thread lock and therefore the dispatcher 1590*7c478bd9Sstevel@tonic-gate * queue lock so that the operation which changes 1591*7c478bd9Sstevel@tonic-gate * the flag, the operation that checks the status of the thread to 1592*7c478bd9Sstevel@tonic-gate * determine if it's on a disp queue AND the call to this function 1593*7c478bd9Sstevel@tonic-gate * are one atomic operation with respect to interrupts. 1594*7c478bd9Sstevel@tonic-gate */ 1595*7c478bd9Sstevel@tonic-gate 1596*7c478bd9Sstevel@tonic-gate /* 1597*7c478bd9Sstevel@tonic-gate * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread. 1598*7c478bd9Sstevel@tonic-gate */ 1599*7c478bd9Sstevel@tonic-gate void 1600*7c478bd9Sstevel@tonic-gate dq_sruninc(kthread_t *t) 1601*7c478bd9Sstevel@tonic-gate { 1602*7c478bd9Sstevel@tonic-gate ASSERT(t->t_state == TS_RUN); 1603*7c478bd9Sstevel@tonic-gate ASSERT(t->t_schedflag & TS_LOAD); 1604*7c478bd9Sstevel@tonic-gate 1605*7c478bd9Sstevel@tonic-gate THREAD_TRANSITION(t); 1606*7c478bd9Sstevel@tonic-gate setfrontdq(t); 1607*7c478bd9Sstevel@tonic-gate } 1608*7c478bd9Sstevel@tonic-gate 1609*7c478bd9Sstevel@tonic-gate /* 1610*7c478bd9Sstevel@tonic-gate * See comment on calling conventions above. 1611*7c478bd9Sstevel@tonic-gate * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread. 1612*7c478bd9Sstevel@tonic-gate */ 1613*7c478bd9Sstevel@tonic-gate void 1614*7c478bd9Sstevel@tonic-gate dq_srundec(kthread_t *t) 1615*7c478bd9Sstevel@tonic-gate { 1616*7c478bd9Sstevel@tonic-gate ASSERT(t->t_schedflag & TS_LOAD); 1617*7c478bd9Sstevel@tonic-gate 1618*7c478bd9Sstevel@tonic-gate (void) dispdeq(t); 1619*7c478bd9Sstevel@tonic-gate disp_swapped_enq(t); 1620*7c478bd9Sstevel@tonic-gate } 1621*7c478bd9Sstevel@tonic-gate 1622*7c478bd9Sstevel@tonic-gate /* 1623*7c478bd9Sstevel@tonic-gate * Change the dispatcher lock of thread to the "swapped_lock" 1624*7c478bd9Sstevel@tonic-gate * and return with thread lock still held. 1625*7c478bd9Sstevel@tonic-gate * 1626*7c478bd9Sstevel@tonic-gate * Called with thread_lock held, in transition state, and at high spl. 1627*7c478bd9Sstevel@tonic-gate */ 1628*7c478bd9Sstevel@tonic-gate void 1629*7c478bd9Sstevel@tonic-gate disp_swapped_enq(kthread_t *tp) 1630*7c478bd9Sstevel@tonic-gate { 1631*7c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 1632*7c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_LOAD); 1633*7c478bd9Sstevel@tonic-gate 1634*7c478bd9Sstevel@tonic-gate switch (tp->t_state) { 1635*7c478bd9Sstevel@tonic-gate case TS_RUN: 1636*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 1637*7c478bd9Sstevel@tonic-gate THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1638*7c478bd9Sstevel@tonic-gate break; 1639*7c478bd9Sstevel@tonic-gate case TS_ONPROC: 1640*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 1641*7c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); 1642*7c478bd9Sstevel@tonic-gate wake_sched_sec = 1; /* tell clock to wake sched */ 1643*7c478bd9Sstevel@tonic-gate THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 1644*7c478bd9Sstevel@tonic-gate break; 1645*7c478bd9Sstevel@tonic-gate default: 1646*7c478bd9Sstevel@tonic-gate panic("disp_swapped: tp: %p bad t_state", (void *)tp); 1647*7c478bd9Sstevel@tonic-gate } 1648*7c478bd9Sstevel@tonic-gate } 1649*7c478bd9Sstevel@tonic-gate 1650*7c478bd9Sstevel@tonic-gate /* 1651*7c478bd9Sstevel@tonic-gate * This routine is called by setbackdq/setfrontdq if the thread is 1652*7c478bd9Sstevel@tonic-gate * not loaded or loaded and on the swap queue. 1653*7c478bd9Sstevel@tonic-gate * 1654*7c478bd9Sstevel@tonic-gate * Thread state TS_SLEEP implies that a swapped thread 1655*7c478bd9Sstevel@tonic-gate * has been woken up and needs to be swapped in by the swapper. 1656*7c478bd9Sstevel@tonic-gate * 1657*7c478bd9Sstevel@tonic-gate * Thread state TS_RUN, it implies that the priority of a swapped 1658*7c478bd9Sstevel@tonic-gate * thread is being increased by scheduling class (e.g. ts_update). 1659*7c478bd9Sstevel@tonic-gate */ 1660*7c478bd9Sstevel@tonic-gate static void 1661*7c478bd9Sstevel@tonic-gate disp_swapped_setrun(kthread_t *tp) 1662*7c478bd9Sstevel@tonic-gate { 1663*7c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 1664*7c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD); 1665*7c478bd9Sstevel@tonic-gate 1666*7c478bd9Sstevel@tonic-gate switch (tp->t_state) { 1667*7c478bd9Sstevel@tonic-gate case TS_SLEEP: 1668*7c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 1669*7c478bd9Sstevel@tonic-gate /* 1670*7c478bd9Sstevel@tonic-gate * Wakeup sched immediately (i.e., next tick) if the 1671*7c478bd9Sstevel@tonic-gate * thread priority is above maxclsyspri. 1672*7c478bd9Sstevel@tonic-gate */ 1673*7c478bd9Sstevel@tonic-gate if (DISP_PRIO(tp) > maxclsyspri) 1674*7c478bd9Sstevel@tonic-gate wake_sched = 1; 1675*7c478bd9Sstevel@tonic-gate else 1676*7c478bd9Sstevel@tonic-gate wake_sched_sec = 1; 1677*7c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */ 1678*7c478bd9Sstevel@tonic-gate break; 1679*7c478bd9Sstevel@tonic-gate case TS_RUN: /* called from ts_update */ 1680*7c478bd9Sstevel@tonic-gate break; 1681*7c478bd9Sstevel@tonic-gate default: 1682*7c478bd9Sstevel@tonic-gate panic("disp_swapped_setrun: tp: %p bad t_state", tp); 1683*7c478bd9Sstevel@tonic-gate } 1684*7c478bd9Sstevel@tonic-gate } 1685*7c478bd9Sstevel@tonic-gate 1686*7c478bd9Sstevel@tonic-gate 1687*7c478bd9Sstevel@tonic-gate /* 1688*7c478bd9Sstevel@tonic-gate * Make a thread give up its processor. Find the processor on 1689*7c478bd9Sstevel@tonic-gate * which this thread is executing, and have that processor 1690*7c478bd9Sstevel@tonic-gate * preempt. 1691*7c478bd9Sstevel@tonic-gate */ 1692*7c478bd9Sstevel@tonic-gate void 1693*7c478bd9Sstevel@tonic-gate cpu_surrender(kthread_t *tp) 1694*7c478bd9Sstevel@tonic-gate { 1695*7c478bd9Sstevel@tonic-gate cpu_t *cpup; 1696*7c478bd9Sstevel@tonic-gate int max_pri; 1697*7c478bd9Sstevel@tonic-gate int max_run_pri; 1698*7c478bd9Sstevel@tonic-gate klwp_t *lwp; 1699*7c478bd9Sstevel@tonic-gate 1700*7c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 1701*7c478bd9Sstevel@tonic-gate 1702*7c478bd9Sstevel@tonic-gate if (tp->t_state != TS_ONPROC) 1703*7c478bd9Sstevel@tonic-gate return; 1704*7c478bd9Sstevel@tonic-gate cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */ 1705*7c478bd9Sstevel@tonic-gate max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */ 1706*7c478bd9Sstevel@tonic-gate max_run_pri = CP_MAXRUNPRI(cpup->cpu_part); 1707*7c478bd9Sstevel@tonic-gate if (max_pri < max_run_pri) 1708*7c478bd9Sstevel@tonic-gate max_pri = max_run_pri; 1709*7c478bd9Sstevel@tonic-gate 1710*7c478bd9Sstevel@tonic-gate cpup->cpu_runrun = 1; 1711*7c478bd9Sstevel@tonic-gate if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) { 1712*7c478bd9Sstevel@tonic-gate cpup->cpu_kprunrun = 1; 1713*7c478bd9Sstevel@tonic-gate } 1714*7c478bd9Sstevel@tonic-gate 1715*7c478bd9Sstevel@tonic-gate /* 1716*7c478bd9Sstevel@tonic-gate * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 1717*7c478bd9Sstevel@tonic-gate */ 1718*7c478bd9Sstevel@tonic-gate membar_enter(); 1719*7c478bd9Sstevel@tonic-gate 1720*7c478bd9Sstevel@tonic-gate DTRACE_SCHED1(surrender, kthread_t *, tp); 1721*7c478bd9Sstevel@tonic-gate 1722*7c478bd9Sstevel@tonic-gate /* 1723*7c478bd9Sstevel@tonic-gate * Make the target thread take an excursion through trap() 1724*7c478bd9Sstevel@tonic-gate * to do preempt() (unless we're already in trap or post_syscall, 1725*7c478bd9Sstevel@tonic-gate * calling cpu_surrender via CL_TRAPRET). 1726*7c478bd9Sstevel@tonic-gate */ 1727*7c478bd9Sstevel@tonic-gate if (tp != curthread || (lwp = tp->t_lwp) == NULL || 1728*7c478bd9Sstevel@tonic-gate lwp->lwp_state != LWP_USER) { 1729*7c478bd9Sstevel@tonic-gate aston(tp); 1730*7c478bd9Sstevel@tonic-gate if (cpup != CPU) 1731*7c478bd9Sstevel@tonic-gate poke_cpu(cpup->cpu_id); 1732*7c478bd9Sstevel@tonic-gate } 1733*7c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER, 1734*7c478bd9Sstevel@tonic-gate "cpu_surrender:tid %p cpu %p", tp, cpup); 1735*7c478bd9Sstevel@tonic-gate } 1736*7c478bd9Sstevel@tonic-gate 1737*7c478bd9Sstevel@tonic-gate 1738*7c478bd9Sstevel@tonic-gate /* 1739*7c478bd9Sstevel@tonic-gate * Commit to and ratify a scheduling decision 1740*7c478bd9Sstevel@tonic-gate */ 1741*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1742*7c478bd9Sstevel@tonic-gate static kthread_t * 1743*7c478bd9Sstevel@tonic-gate disp_ratify(kthread_t *tp, disp_t *kpq) 1744*7c478bd9Sstevel@tonic-gate { 1745*7c478bd9Sstevel@tonic-gate pri_t tpri, maxpri; 1746*7c478bd9Sstevel@tonic-gate pri_t maxkpri; 1747*7c478bd9Sstevel@tonic-gate cpu_t *cpup; 1748*7c478bd9Sstevel@tonic-gate 1749*7c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 1750*7c478bd9Sstevel@tonic-gate /* 1751*7c478bd9Sstevel@tonic-gate * Commit to, then ratify scheduling decision 1752*7c478bd9Sstevel@tonic-gate */ 1753*7c478bd9Sstevel@tonic-gate cpup = CPU; 1754*7c478bd9Sstevel@tonic-gate if (cpup->cpu_runrun != 0) 1755*7c478bd9Sstevel@tonic-gate cpup->cpu_runrun = 0; 1756*7c478bd9Sstevel@tonic-gate if (cpup->cpu_kprunrun != 0) 1757*7c478bd9Sstevel@tonic-gate cpup->cpu_kprunrun = 0; 1758*7c478bd9Sstevel@tonic-gate if (cpup->cpu_chosen_level != -1) 1759*7c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 1760*7c478bd9Sstevel@tonic-gate membar_enter(); 1761*7c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 1762*7c478bd9Sstevel@tonic-gate maxpri = cpup->cpu_disp->disp_maxrunpri; 1763*7c478bd9Sstevel@tonic-gate maxkpri = kpq->disp_maxrunpri; 1764*7c478bd9Sstevel@tonic-gate if (maxpri < maxkpri) 1765*7c478bd9Sstevel@tonic-gate maxpri = maxkpri; 1766*7c478bd9Sstevel@tonic-gate if (tpri < maxpri) { 1767*7c478bd9Sstevel@tonic-gate /* 1768*7c478bd9Sstevel@tonic-gate * should have done better 1769*7c478bd9Sstevel@tonic-gate * put this one back and indicate to try again 1770*7c478bd9Sstevel@tonic-gate */ 1771*7c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = curthread; /* fixup dispthread */ 1772*7c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = DISP_PRIO(curthread); 1773*7c478bd9Sstevel@tonic-gate thread_lock_high(tp); 1774*7c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); 1775*7c478bd9Sstevel@tonic-gate setfrontdq(tp); 1776*7c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(tp); 1777*7c478bd9Sstevel@tonic-gate 1778*7c478bd9Sstevel@tonic-gate tp = NULL; 1779*7c478bd9Sstevel@tonic-gate } 1780*7c478bd9Sstevel@tonic-gate return (tp); 1781*7c478bd9Sstevel@tonic-gate } 1782*7c478bd9Sstevel@tonic-gate 1783*7c478bd9Sstevel@tonic-gate /* 1784*7c478bd9Sstevel@tonic-gate * See if there is any work on the dispatcher queue for other CPUs. 1785*7c478bd9Sstevel@tonic-gate * If there is, dequeue the best thread and return. 1786*7c478bd9Sstevel@tonic-gate */ 1787*7c478bd9Sstevel@tonic-gate static kthread_t * 1788*7c478bd9Sstevel@tonic-gate disp_getwork(cpu_t *cp) 1789*7c478bd9Sstevel@tonic-gate { 1790*7c478bd9Sstevel@tonic-gate cpu_t *ocp; /* other CPU */ 1791*7c478bd9Sstevel@tonic-gate cpu_t *ocp_start; 1792*7c478bd9Sstevel@tonic-gate cpu_t *tcp; /* target local CPU */ 1793*7c478bd9Sstevel@tonic-gate kthread_t *tp; 1794*7c478bd9Sstevel@tonic-gate pri_t maxpri; 1795*7c478bd9Sstevel@tonic-gate int s; 1796*7c478bd9Sstevel@tonic-gate disp_t *kpq; /* kp queue for this partition */ 1797*7c478bd9Sstevel@tonic-gate lpl_t *lpl, *lpl_leaf; 1798*7c478bd9Sstevel@tonic-gate int hint, leafidx; 1799*7c478bd9Sstevel@tonic-gate 1800*7c478bd9Sstevel@tonic-gate maxpri = -1; 1801*7c478bd9Sstevel@tonic-gate tcp = NULL; 1802*7c478bd9Sstevel@tonic-gate 1803*7c478bd9Sstevel@tonic-gate kpq = &cp->cpu_part->cp_kp_queue; 1804*7c478bd9Sstevel@tonic-gate while (kpq->disp_maxrunpri >= 0) { 1805*7c478bd9Sstevel@tonic-gate /* 1806*7c478bd9Sstevel@tonic-gate * Try to take a thread from the kp_queue. 1807*7c478bd9Sstevel@tonic-gate */ 1808*7c478bd9Sstevel@tonic-gate tp = (disp_getbest(kpq)); 1809*7c478bd9Sstevel@tonic-gate if (tp) 1810*7c478bd9Sstevel@tonic-gate return (disp_ratify(tp, kpq)); 1811*7c478bd9Sstevel@tonic-gate } 1812*7c478bd9Sstevel@tonic-gate 1813*7c478bd9Sstevel@tonic-gate s = splhigh(); /* protect the cpu_active list */ 1814*7c478bd9Sstevel@tonic-gate 1815*7c478bd9Sstevel@tonic-gate /* 1816*7c478bd9Sstevel@tonic-gate * Try to find something to do on another CPU's run queue. 1817*7c478bd9Sstevel@tonic-gate * Loop through all other CPUs looking for the one with the highest 1818*7c478bd9Sstevel@tonic-gate * priority unbound thread. 1819*7c478bd9Sstevel@tonic-gate * 1820*7c478bd9Sstevel@tonic-gate * On NUMA machines, the partition's CPUs are consulted in order of 1821*7c478bd9Sstevel@tonic-gate * distance from the current CPU. This way, the first available 1822*7c478bd9Sstevel@tonic-gate * work found is also the closest, and will suffer the least 1823*7c478bd9Sstevel@tonic-gate * from being migrated. 1824*7c478bd9Sstevel@tonic-gate */ 1825*7c478bd9Sstevel@tonic-gate lpl = lpl_leaf = cp->cpu_lpl; 1826*7c478bd9Sstevel@tonic-gate hint = leafidx = 0; 1827*7c478bd9Sstevel@tonic-gate 1828*7c478bd9Sstevel@tonic-gate /* 1829*7c478bd9Sstevel@tonic-gate * This loop traverses the lpl hierarchy. Higher level lpls represent 1830*7c478bd9Sstevel@tonic-gate * broader levels of locality 1831*7c478bd9Sstevel@tonic-gate */ 1832*7c478bd9Sstevel@tonic-gate do { 1833*7c478bd9Sstevel@tonic-gate /* This loop iterates over the lpl's leaves */ 1834*7c478bd9Sstevel@tonic-gate do { 1835*7c478bd9Sstevel@tonic-gate if (lpl_leaf != cp->cpu_lpl) 1836*7c478bd9Sstevel@tonic-gate ocp = lpl_leaf->lpl_cpus; 1837*7c478bd9Sstevel@tonic-gate else 1838*7c478bd9Sstevel@tonic-gate ocp = cp->cpu_next_lpl; 1839*7c478bd9Sstevel@tonic-gate 1840*7c478bd9Sstevel@tonic-gate /* This loop iterates over the CPUs in the leaf */ 1841*7c478bd9Sstevel@tonic-gate ocp_start = ocp; 1842*7c478bd9Sstevel@tonic-gate do { 1843*7c478bd9Sstevel@tonic-gate pri_t pri; 1844*7c478bd9Sstevel@tonic-gate 1845*7c478bd9Sstevel@tonic-gate ASSERT(CPU_ACTIVE(ocp)); 1846*7c478bd9Sstevel@tonic-gate 1847*7c478bd9Sstevel@tonic-gate /* 1848*7c478bd9Sstevel@tonic-gate * End our stroll around the partition if: 1849*7c478bd9Sstevel@tonic-gate * 1850*7c478bd9Sstevel@tonic-gate * - Something became runnable on the local 1851*7c478bd9Sstevel@tonic-gate * queue 1852*7c478bd9Sstevel@tonic-gate * 1853*7c478bd9Sstevel@tonic-gate * - We're at the broadest level of locality and 1854*7c478bd9Sstevel@tonic-gate * we happen across another idle CPU. At the 1855*7c478bd9Sstevel@tonic-gate * highest level of locality, all CPUs will 1856*7c478bd9Sstevel@tonic-gate * walk the partition's CPUs in the same 1857*7c478bd9Sstevel@tonic-gate * order, so we can end our stroll taking 1858*7c478bd9Sstevel@tonic-gate * comfort in knowing the other idle CPU is 1859*7c478bd9Sstevel@tonic-gate * already covering the next portion of the 1860*7c478bd9Sstevel@tonic-gate * list. 1861*7c478bd9Sstevel@tonic-gate */ 1862*7c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable != 0) 1863*7c478bd9Sstevel@tonic-gate break; 1864*7c478bd9Sstevel@tonic-gate if (ocp->cpu_dispatch_pri == -1) { 1865*7c478bd9Sstevel@tonic-gate if (ocp->cpu_disp_flags & 1866*7c478bd9Sstevel@tonic-gate CPU_DISP_HALTED) 1867*7c478bd9Sstevel@tonic-gate continue; 1868*7c478bd9Sstevel@tonic-gate else if (lpl->lpl_parent == NULL) 1869*7c478bd9Sstevel@tonic-gate break; 1870*7c478bd9Sstevel@tonic-gate } 1871*7c478bd9Sstevel@tonic-gate 1872*7c478bd9Sstevel@tonic-gate /* 1873*7c478bd9Sstevel@tonic-gate * If there's only one thread and the CPU 1874*7c478bd9Sstevel@tonic-gate * is in the middle of a context switch, 1875*7c478bd9Sstevel@tonic-gate * or it's currently running the idle thread, 1876*7c478bd9Sstevel@tonic-gate * don't steal it. 1877*7c478bd9Sstevel@tonic-gate */ 1878*7c478bd9Sstevel@tonic-gate if ((ocp->cpu_disp_flags & 1879*7c478bd9Sstevel@tonic-gate CPU_DISP_DONTSTEAL) && 1880*7c478bd9Sstevel@tonic-gate ocp->cpu_disp->disp_nrunnable == 1) 1881*7c478bd9Sstevel@tonic-gate continue; 1882*7c478bd9Sstevel@tonic-gate 1883*7c478bd9Sstevel@tonic-gate pri = ocp->cpu_disp->disp_max_unbound_pri; 1884*7c478bd9Sstevel@tonic-gate if (pri > maxpri) { 1885*7c478bd9Sstevel@tonic-gate maxpri = pri; 1886*7c478bd9Sstevel@tonic-gate tcp = ocp; 1887*7c478bd9Sstevel@tonic-gate } 1888*7c478bd9Sstevel@tonic-gate } while ((ocp = ocp->cpu_next_lpl) != ocp_start); 1889*7c478bd9Sstevel@tonic-gate 1890*7c478bd9Sstevel@tonic-gate if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) { 1891*7c478bd9Sstevel@tonic-gate leafidx = 0; 1892*7c478bd9Sstevel@tonic-gate lpl_leaf = lpl->lpl_rset[leafidx]; 1893*7c478bd9Sstevel@tonic-gate } 1894*7c478bd9Sstevel@tonic-gate } while (leafidx != hint); 1895*7c478bd9Sstevel@tonic-gate 1896*7c478bd9Sstevel@tonic-gate hint = leafidx = lpl->lpl_hint; 1897*7c478bd9Sstevel@tonic-gate if ((lpl = lpl->lpl_parent) != NULL) 1898*7c478bd9Sstevel@tonic-gate lpl_leaf = lpl->lpl_rset[hint]; 1899*7c478bd9Sstevel@tonic-gate } while (!tcp && lpl); 1900*7c478bd9Sstevel@tonic-gate 1901*7c478bd9Sstevel@tonic-gate splx(s); 1902*7c478bd9Sstevel@tonic-gate 1903*7c478bd9Sstevel@tonic-gate /* 1904*7c478bd9Sstevel@tonic-gate * If another queue looks good, and there is still nothing on 1905*7c478bd9Sstevel@tonic-gate * the local queue, try to transfer one or more threads 1906*7c478bd9Sstevel@tonic-gate * from it to our queue. 1907*7c478bd9Sstevel@tonic-gate */ 1908*7c478bd9Sstevel@tonic-gate if (tcp && cp->cpu_disp->disp_nrunnable == 0) { 1909*7c478bd9Sstevel@tonic-gate tp = (disp_getbest(tcp->cpu_disp)); 1910*7c478bd9Sstevel@tonic-gate if (tp) 1911*7c478bd9Sstevel@tonic-gate return (disp_ratify(tp, kpq)); 1912*7c478bd9Sstevel@tonic-gate } 1913*7c478bd9Sstevel@tonic-gate return (NULL); 1914*7c478bd9Sstevel@tonic-gate } 1915*7c478bd9Sstevel@tonic-gate 1916*7c478bd9Sstevel@tonic-gate 1917*7c478bd9Sstevel@tonic-gate /* 1918*7c478bd9Sstevel@tonic-gate * disp_fix_unbound_pri() 1919*7c478bd9Sstevel@tonic-gate * Determines the maximum priority of unbound threads on the queue. 1920*7c478bd9Sstevel@tonic-gate * The priority is kept for the queue, but is only increased, never 1921*7c478bd9Sstevel@tonic-gate * reduced unless some CPU is looking for something on that queue. 1922*7c478bd9Sstevel@tonic-gate * 1923*7c478bd9Sstevel@tonic-gate * The priority argument is the known upper limit. 1924*7c478bd9Sstevel@tonic-gate * 1925*7c478bd9Sstevel@tonic-gate * Perhaps this should be kept accurately, but that probably means 1926*7c478bd9Sstevel@tonic-gate * separate bitmaps for bound and unbound threads. Since only idled 1927*7c478bd9Sstevel@tonic-gate * CPUs will have to do this recalculation, it seems better this way. 1928*7c478bd9Sstevel@tonic-gate */ 1929*7c478bd9Sstevel@tonic-gate static void 1930*7c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(disp_t *dp, pri_t pri) 1931*7c478bd9Sstevel@tonic-gate { 1932*7c478bd9Sstevel@tonic-gate kthread_t *tp; 1933*7c478bd9Sstevel@tonic-gate dispq_t *dq; 1934*7c478bd9Sstevel@tonic-gate ulong_t *dqactmap = dp->disp_qactmap; 1935*7c478bd9Sstevel@tonic-gate ulong_t mapword; 1936*7c478bd9Sstevel@tonic-gate int wx; 1937*7c478bd9Sstevel@tonic-gate 1938*7c478bd9Sstevel@tonic-gate ASSERT(DISP_LOCK_HELD(&dp->disp_lock)); 1939*7c478bd9Sstevel@tonic-gate 1940*7c478bd9Sstevel@tonic-gate ASSERT(pri >= 0); /* checked by caller */ 1941*7c478bd9Sstevel@tonic-gate 1942*7c478bd9Sstevel@tonic-gate /* 1943*7c478bd9Sstevel@tonic-gate * Start the search at the next lowest priority below the supplied 1944*7c478bd9Sstevel@tonic-gate * priority. This depends on the bitmap implementation. 1945*7c478bd9Sstevel@tonic-gate */ 1946*7c478bd9Sstevel@tonic-gate do { 1947*7c478bd9Sstevel@tonic-gate wx = pri >> BT_ULSHIFT; /* index of word in map */ 1948*7c478bd9Sstevel@tonic-gate 1949*7c478bd9Sstevel@tonic-gate /* 1950*7c478bd9Sstevel@tonic-gate * Form mask for all lower priorities in the word. 1951*7c478bd9Sstevel@tonic-gate */ 1952*7c478bd9Sstevel@tonic-gate mapword = dqactmap[wx] & (BT_BIW(pri) - 1); 1953*7c478bd9Sstevel@tonic-gate 1954*7c478bd9Sstevel@tonic-gate /* 1955*7c478bd9Sstevel@tonic-gate * Get next lower active priority. 1956*7c478bd9Sstevel@tonic-gate */ 1957*7c478bd9Sstevel@tonic-gate if (mapword != 0) { 1958*7c478bd9Sstevel@tonic-gate pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1; 1959*7c478bd9Sstevel@tonic-gate } else if (wx > 0) { 1960*7c478bd9Sstevel@tonic-gate pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */ 1961*7c478bd9Sstevel@tonic-gate if (pri < 0) 1962*7c478bd9Sstevel@tonic-gate break; 1963*7c478bd9Sstevel@tonic-gate } else { 1964*7c478bd9Sstevel@tonic-gate pri = -1; 1965*7c478bd9Sstevel@tonic-gate break; 1966*7c478bd9Sstevel@tonic-gate } 1967*7c478bd9Sstevel@tonic-gate 1968*7c478bd9Sstevel@tonic-gate /* 1969*7c478bd9Sstevel@tonic-gate * Search the queue for unbound, runnable threads. 1970*7c478bd9Sstevel@tonic-gate */ 1971*7c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 1972*7c478bd9Sstevel@tonic-gate tp = dq->dq_first; 1973*7c478bd9Sstevel@tonic-gate 1974*7c478bd9Sstevel@tonic-gate while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 1975*7c478bd9Sstevel@tonic-gate tp = tp->t_link; 1976*7c478bd9Sstevel@tonic-gate } 1977*7c478bd9Sstevel@tonic-gate 1978*7c478bd9Sstevel@tonic-gate /* 1979*7c478bd9Sstevel@tonic-gate * If a thread was found, set the priority and return. 1980*7c478bd9Sstevel@tonic-gate */ 1981*7c478bd9Sstevel@tonic-gate } while (tp == NULL); 1982*7c478bd9Sstevel@tonic-gate 1983*7c478bd9Sstevel@tonic-gate /* 1984*7c478bd9Sstevel@tonic-gate * pri holds the maximum unbound thread priority or -1. 1985*7c478bd9Sstevel@tonic-gate */ 1986*7c478bd9Sstevel@tonic-gate if (dp->disp_max_unbound_pri != pri) 1987*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = pri; 1988*7c478bd9Sstevel@tonic-gate } 1989*7c478bd9Sstevel@tonic-gate 1990*7c478bd9Sstevel@tonic-gate /* 1991*7c478bd9Sstevel@tonic-gate * disp_adjust_unbound_pri() - thread is becoming unbound, so we should 1992*7c478bd9Sstevel@tonic-gate * check if the CPU to which is was previously bound should have 1993*7c478bd9Sstevel@tonic-gate * its disp_max_unbound_pri increased. 1994*7c478bd9Sstevel@tonic-gate */ 1995*7c478bd9Sstevel@tonic-gate void 1996*7c478bd9Sstevel@tonic-gate disp_adjust_unbound_pri(kthread_t *tp) 1997*7c478bd9Sstevel@tonic-gate { 1998*7c478bd9Sstevel@tonic-gate disp_t *dp; 1999*7c478bd9Sstevel@tonic-gate pri_t tpri; 2000*7c478bd9Sstevel@tonic-gate 2001*7c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 2002*7c478bd9Sstevel@tonic-gate 2003*7c478bd9Sstevel@tonic-gate /* 2004*7c478bd9Sstevel@tonic-gate * Don't do anything if the thread is not bound, or 2005*7c478bd9Sstevel@tonic-gate * currently not runnable or swapped out. 2006*7c478bd9Sstevel@tonic-gate */ 2007*7c478bd9Sstevel@tonic-gate if (tp->t_bound_cpu == NULL || 2008*7c478bd9Sstevel@tonic-gate tp->t_state != TS_RUN || 2009*7c478bd9Sstevel@tonic-gate tp->t_schedflag & TS_ON_SWAPQ) 2010*7c478bd9Sstevel@tonic-gate return; 2011*7c478bd9Sstevel@tonic-gate 2012*7c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 2013*7c478bd9Sstevel@tonic-gate dp = tp->t_bound_cpu->cpu_disp; 2014*7c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 2015*7c478bd9Sstevel@tonic-gate if (tpri > dp->disp_max_unbound_pri) 2016*7c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 2017*7c478bd9Sstevel@tonic-gate } 2018*7c478bd9Sstevel@tonic-gate 2019*7c478bd9Sstevel@tonic-gate /* 2020*7c478bd9Sstevel@tonic-gate * disp_getbest() - de-queue the highest priority unbound runnable thread. 2021*7c478bd9Sstevel@tonic-gate * returns with the thread unlocked and onproc 2022*7c478bd9Sstevel@tonic-gate * but at splhigh (like disp()). 2023*7c478bd9Sstevel@tonic-gate * returns NULL if nothing found. 2024*7c478bd9Sstevel@tonic-gate * 2025*7c478bd9Sstevel@tonic-gate * Passed a pointer to a dispatch queue not associated with this CPU. 2026*7c478bd9Sstevel@tonic-gate */ 2027*7c478bd9Sstevel@tonic-gate static kthread_t * 2028*7c478bd9Sstevel@tonic-gate disp_getbest(disp_t *dp) 2029*7c478bd9Sstevel@tonic-gate { 2030*7c478bd9Sstevel@tonic-gate kthread_t *tp; 2031*7c478bd9Sstevel@tonic-gate dispq_t *dq; 2032*7c478bd9Sstevel@tonic-gate pri_t pri; 2033*7c478bd9Sstevel@tonic-gate cpu_t *cp; 2034*7c478bd9Sstevel@tonic-gate 2035*7c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 2036*7c478bd9Sstevel@tonic-gate 2037*7c478bd9Sstevel@tonic-gate /* 2038*7c478bd9Sstevel@tonic-gate * If there is nothing to run, or the CPU is in the middle of a 2039*7c478bd9Sstevel@tonic-gate * context switch of the only thread, return NULL. 2040*7c478bd9Sstevel@tonic-gate */ 2041*7c478bd9Sstevel@tonic-gate pri = dp->disp_max_unbound_pri; 2042*7c478bd9Sstevel@tonic-gate if (pri == -1 || 2043*7c478bd9Sstevel@tonic-gate (dp->disp_cpu != NULL && 2044*7c478bd9Sstevel@tonic-gate (dp->disp_cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 2045*7c478bd9Sstevel@tonic-gate dp->disp_cpu->cpu_disp->disp_nrunnable == 1)) { 2046*7c478bd9Sstevel@tonic-gate disp_lock_exit_nopreempt(&dp->disp_lock); 2047*7c478bd9Sstevel@tonic-gate return (NULL); 2048*7c478bd9Sstevel@tonic-gate } 2049*7c478bd9Sstevel@tonic-gate 2050*7c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 2051*7c478bd9Sstevel@tonic-gate tp = dq->dq_first; 2052*7c478bd9Sstevel@tonic-gate 2053*7c478bd9Sstevel@tonic-gate /* 2054*7c478bd9Sstevel@tonic-gate * Skip over bound threads. 2055*7c478bd9Sstevel@tonic-gate * Bound threads can be here even though disp_max_unbound_pri 2056*7c478bd9Sstevel@tonic-gate * indicated this level. Besides, it not always accurate because it 2057*7c478bd9Sstevel@tonic-gate * isn't reduced until another CPU looks for work. 2058*7c478bd9Sstevel@tonic-gate * Note that tp could be NULL right away due to this. 2059*7c478bd9Sstevel@tonic-gate */ 2060*7c478bd9Sstevel@tonic-gate while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 2061*7c478bd9Sstevel@tonic-gate tp = tp->t_link; 2062*7c478bd9Sstevel@tonic-gate } 2063*7c478bd9Sstevel@tonic-gate 2064*7c478bd9Sstevel@tonic-gate /* 2065*7c478bd9Sstevel@tonic-gate * If there were no unbound threads on this queue, find the queue 2066*7c478bd9Sstevel@tonic-gate * where they are and then return NULL so that other CPUs will be 2067*7c478bd9Sstevel@tonic-gate * considered. 2068*7c478bd9Sstevel@tonic-gate */ 2069*7c478bd9Sstevel@tonic-gate if (tp == NULL) { 2070*7c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(dp, pri); 2071*7c478bd9Sstevel@tonic-gate disp_lock_exit_nopreempt(&dp->disp_lock); 2072*7c478bd9Sstevel@tonic-gate return (NULL); 2073*7c478bd9Sstevel@tonic-gate } 2074*7c478bd9Sstevel@tonic-gate 2075*7c478bd9Sstevel@tonic-gate /* 2076*7c478bd9Sstevel@tonic-gate * Found a runnable, unbound thread, so remove it from queue. 2077*7c478bd9Sstevel@tonic-gate * dispdeq() requires that we have the thread locked, and we do, 2078*7c478bd9Sstevel@tonic-gate * by virtue of holding the dispatch queue lock. dispdeq() will 2079*7c478bd9Sstevel@tonic-gate * put the thread in transition state, thereby dropping the dispq 2080*7c478bd9Sstevel@tonic-gate * lock. 2081*7c478bd9Sstevel@tonic-gate */ 2082*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 2083*7c478bd9Sstevel@tonic-gate { 2084*7c478bd9Sstevel@tonic-gate int thread_was_on_queue; 2085*7c478bd9Sstevel@tonic-gate 2086*7c478bd9Sstevel@tonic-gate thread_was_on_queue = dispdeq(tp); /* drops disp_lock */ 2087*7c478bd9Sstevel@tonic-gate ASSERT(thread_was_on_queue); 2088*7c478bd9Sstevel@tonic-gate } 2089*7c478bd9Sstevel@tonic-gate #else /* DEBUG */ 2090*7c478bd9Sstevel@tonic-gate (void) dispdeq(tp); /* drops disp_lock */ 2091*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 2092*7c478bd9Sstevel@tonic-gate 2093*7c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_DONT_SWAP; 2094*7c478bd9Sstevel@tonic-gate 2095*7c478bd9Sstevel@tonic-gate /* 2096*7c478bd9Sstevel@tonic-gate * Setup thread to run on the current CPU. 2097*7c478bd9Sstevel@tonic-gate */ 2098*7c478bd9Sstevel@tonic-gate cp = CPU; 2099*7c478bd9Sstevel@tonic-gate 2100*7c478bd9Sstevel@tonic-gate tp->t_disp_queue = cp->cpu_disp; 2101*7c478bd9Sstevel@tonic-gate 2102*7c478bd9Sstevel@tonic-gate cp->cpu_dispthread = tp; /* protected by spl only */ 2103*7c478bd9Sstevel@tonic-gate cp->cpu_dispatch_pri = pri; 2104*7c478bd9Sstevel@tonic-gate ASSERT(pri == DISP_PRIO(tp)); 2105*7c478bd9Sstevel@tonic-gate 2106*7c478bd9Sstevel@tonic-gate thread_onproc(tp, cp); /* set t_state to TS_ONPROC */ 2107*7c478bd9Sstevel@tonic-gate 2108*7c478bd9Sstevel@tonic-gate /* 2109*7c478bd9Sstevel@tonic-gate * Return with spl high so that swtch() won't need to raise it. 2110*7c478bd9Sstevel@tonic-gate * The disp_lock was dropped by dispdeq(). 2111*7c478bd9Sstevel@tonic-gate */ 2112*7c478bd9Sstevel@tonic-gate 2113*7c478bd9Sstevel@tonic-gate return (tp); 2114*7c478bd9Sstevel@tonic-gate } 2115*7c478bd9Sstevel@tonic-gate 2116*7c478bd9Sstevel@tonic-gate /* 2117*7c478bd9Sstevel@tonic-gate * disp_bound_common() - common routine for higher level functions 2118*7c478bd9Sstevel@tonic-gate * that check for bound threads under certain conditions. 2119*7c478bd9Sstevel@tonic-gate * If 'threadlistsafe' is set then there is no need to acquire 2120*7c478bd9Sstevel@tonic-gate * pidlock to stop the thread list from changing (eg, if 2121*7c478bd9Sstevel@tonic-gate * disp_bound_* is called with cpus paused). 2122*7c478bd9Sstevel@tonic-gate */ 2123*7c478bd9Sstevel@tonic-gate static int 2124*7c478bd9Sstevel@tonic-gate disp_bound_common(cpu_t *cp, int threadlistsafe, int flag) 2125*7c478bd9Sstevel@tonic-gate { 2126*7c478bd9Sstevel@tonic-gate int found = 0; 2127*7c478bd9Sstevel@tonic-gate kthread_t *tp; 2128*7c478bd9Sstevel@tonic-gate 2129*7c478bd9Sstevel@tonic-gate ASSERT(flag); 2130*7c478bd9Sstevel@tonic-gate 2131*7c478bd9Sstevel@tonic-gate if (!threadlistsafe) 2132*7c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 2133*7c478bd9Sstevel@tonic-gate tp = curthread; /* faster than allthreads */ 2134*7c478bd9Sstevel@tonic-gate do { 2135*7c478bd9Sstevel@tonic-gate if (tp->t_state != TS_FREE) { 2136*7c478bd9Sstevel@tonic-gate /* 2137*7c478bd9Sstevel@tonic-gate * If an interrupt thread is busy, but the 2138*7c478bd9Sstevel@tonic-gate * caller doesn't care (i.e. BOUND_INTR is off), 2139*7c478bd9Sstevel@tonic-gate * then just ignore it and continue through. 2140*7c478bd9Sstevel@tonic-gate */ 2141*7c478bd9Sstevel@tonic-gate if ((tp->t_flag & T_INTR_THREAD) && 2142*7c478bd9Sstevel@tonic-gate !(flag & BOUND_INTR)) 2143*7c478bd9Sstevel@tonic-gate continue; 2144*7c478bd9Sstevel@tonic-gate 2145*7c478bd9Sstevel@tonic-gate /* 2146*7c478bd9Sstevel@tonic-gate * Skip the idle thread for the CPU 2147*7c478bd9Sstevel@tonic-gate * we're about to set offline. 2148*7c478bd9Sstevel@tonic-gate */ 2149*7c478bd9Sstevel@tonic-gate if (tp == cp->cpu_idle_thread) 2150*7c478bd9Sstevel@tonic-gate continue; 2151*7c478bd9Sstevel@tonic-gate 2152*7c478bd9Sstevel@tonic-gate /* 2153*7c478bd9Sstevel@tonic-gate * Skip the pause thread for the CPU 2154*7c478bd9Sstevel@tonic-gate * we're about to set offline. 2155*7c478bd9Sstevel@tonic-gate */ 2156*7c478bd9Sstevel@tonic-gate if (tp == cp->cpu_pause_thread) 2157*7c478bd9Sstevel@tonic-gate continue; 2158*7c478bd9Sstevel@tonic-gate 2159*7c478bd9Sstevel@tonic-gate if ((flag & BOUND_CPU) && 2160*7c478bd9Sstevel@tonic-gate (tp->t_bound_cpu == cp || 2161*7c478bd9Sstevel@tonic-gate tp->t_bind_cpu == cp->cpu_id || 2162*7c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu == cp)) { 2163*7c478bd9Sstevel@tonic-gate found = 1; 2164*7c478bd9Sstevel@tonic-gate break; 2165*7c478bd9Sstevel@tonic-gate } 2166*7c478bd9Sstevel@tonic-gate 2167*7c478bd9Sstevel@tonic-gate if ((flag & BOUND_PARTITION) && 2168*7c478bd9Sstevel@tonic-gate (tp->t_cpupart == cp->cpu_part)) { 2169*7c478bd9Sstevel@tonic-gate found = 1; 2170*7c478bd9Sstevel@tonic-gate break; 2171*7c478bd9Sstevel@tonic-gate } 2172*7c478bd9Sstevel@tonic-gate } 2173*7c478bd9Sstevel@tonic-gate } while ((tp = tp->t_next) != curthread && found == 0); 2174*7c478bd9Sstevel@tonic-gate if (!threadlistsafe) 2175*7c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 2176*7c478bd9Sstevel@tonic-gate return (found); 2177*7c478bd9Sstevel@tonic-gate } 2178*7c478bd9Sstevel@tonic-gate 2179*7c478bd9Sstevel@tonic-gate /* 2180*7c478bd9Sstevel@tonic-gate * disp_bound_threads - return nonzero if threads are bound to the processor. 2181*7c478bd9Sstevel@tonic-gate * Called infrequently. Keep this simple. 2182*7c478bd9Sstevel@tonic-gate * Includes threads that are asleep or stopped but not onproc. 2183*7c478bd9Sstevel@tonic-gate */ 2184*7c478bd9Sstevel@tonic-gate int 2185*7c478bd9Sstevel@tonic-gate disp_bound_threads(cpu_t *cp, int threadlistsafe) 2186*7c478bd9Sstevel@tonic-gate { 2187*7c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_CPU)); 2188*7c478bd9Sstevel@tonic-gate } 2189*7c478bd9Sstevel@tonic-gate 2190*7c478bd9Sstevel@tonic-gate /* 2191*7c478bd9Sstevel@tonic-gate * disp_bound_anythreads - return nonzero if _any_ threads are bound 2192*7c478bd9Sstevel@tonic-gate * to the given processor, including interrupt threads. 2193*7c478bd9Sstevel@tonic-gate */ 2194*7c478bd9Sstevel@tonic-gate int 2195*7c478bd9Sstevel@tonic-gate disp_bound_anythreads(cpu_t *cp, int threadlistsafe) 2196*7c478bd9Sstevel@tonic-gate { 2197*7c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR)); 2198*7c478bd9Sstevel@tonic-gate } 2199*7c478bd9Sstevel@tonic-gate 2200*7c478bd9Sstevel@tonic-gate /* 2201*7c478bd9Sstevel@tonic-gate * disp_bound_partition - return nonzero if threads are bound to the same 2202*7c478bd9Sstevel@tonic-gate * partition as the processor. 2203*7c478bd9Sstevel@tonic-gate * Called infrequently. Keep this simple. 2204*7c478bd9Sstevel@tonic-gate * Includes threads that are asleep or stopped but not onproc. 2205*7c478bd9Sstevel@tonic-gate */ 2206*7c478bd9Sstevel@tonic-gate int 2207*7c478bd9Sstevel@tonic-gate disp_bound_partition(cpu_t *cp, int threadlistsafe) 2208*7c478bd9Sstevel@tonic-gate { 2209*7c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION)); 2210*7c478bd9Sstevel@tonic-gate } 2211*7c478bd9Sstevel@tonic-gate 2212*7c478bd9Sstevel@tonic-gate /* 2213*7c478bd9Sstevel@tonic-gate * disp_cpu_inactive - make a CPU inactive by moving all of its unbound 2214*7c478bd9Sstevel@tonic-gate * threads to other CPUs. 2215*7c478bd9Sstevel@tonic-gate */ 2216*7c478bd9Sstevel@tonic-gate void 2217*7c478bd9Sstevel@tonic-gate disp_cpu_inactive(cpu_t *cp) 2218*7c478bd9Sstevel@tonic-gate { 2219*7c478bd9Sstevel@tonic-gate kthread_t *tp; 2220*7c478bd9Sstevel@tonic-gate disp_t *dp = cp->cpu_disp; 2221*7c478bd9Sstevel@tonic-gate dispq_t *dq; 2222*7c478bd9Sstevel@tonic-gate pri_t pri; 2223*7c478bd9Sstevel@tonic-gate int wasonq; 2224*7c478bd9Sstevel@tonic-gate 2225*7c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 2226*7c478bd9Sstevel@tonic-gate while ((pri = dp->disp_max_unbound_pri) != -1) { 2227*7c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 2228*7c478bd9Sstevel@tonic-gate tp = dq->dq_first; 2229*7c478bd9Sstevel@tonic-gate 2230*7c478bd9Sstevel@tonic-gate /* 2231*7c478bd9Sstevel@tonic-gate * Skip over bound threads. 2232*7c478bd9Sstevel@tonic-gate */ 2233*7c478bd9Sstevel@tonic-gate while (tp != NULL && tp->t_bound_cpu != NULL) { 2234*7c478bd9Sstevel@tonic-gate tp = tp->t_link; 2235*7c478bd9Sstevel@tonic-gate } 2236*7c478bd9Sstevel@tonic-gate 2237*7c478bd9Sstevel@tonic-gate if (tp == NULL) { 2238*7c478bd9Sstevel@tonic-gate /* disp_max_unbound_pri must be inaccurate, so fix it */ 2239*7c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(dp, pri); 2240*7c478bd9Sstevel@tonic-gate continue; 2241*7c478bd9Sstevel@tonic-gate } 2242*7c478bd9Sstevel@tonic-gate 2243*7c478bd9Sstevel@tonic-gate wasonq = dispdeq(tp); /* drops disp_lock */ 2244*7c478bd9Sstevel@tonic-gate ASSERT(wasonq); 2245*7c478bd9Sstevel@tonic-gate ASSERT(tp->t_weakbound_cpu == NULL); 2246*7c478bd9Sstevel@tonic-gate 2247*7c478bd9Sstevel@tonic-gate setbackdq(tp); 2248*7c478bd9Sstevel@tonic-gate /* 2249*7c478bd9Sstevel@tonic-gate * Called from cpu_offline: 2250*7c478bd9Sstevel@tonic-gate * 2251*7c478bd9Sstevel@tonic-gate * cp has already been removed from the list of active cpus 2252*7c478bd9Sstevel@tonic-gate * and tp->t_cpu has been changed so there is no risk of 2253*7c478bd9Sstevel@tonic-gate * tp ending up back on cp. 2254*7c478bd9Sstevel@tonic-gate * 2255*7c478bd9Sstevel@tonic-gate * Called from cpupart_move_cpu: 2256*7c478bd9Sstevel@tonic-gate * 2257*7c478bd9Sstevel@tonic-gate * The cpu has moved to a new cpupart. Any threads that 2258*7c478bd9Sstevel@tonic-gate * were on it's dispatch queues before the move remain 2259*7c478bd9Sstevel@tonic-gate * in the old partition and can't run in the new partition. 2260*7c478bd9Sstevel@tonic-gate */ 2261*7c478bd9Sstevel@tonic-gate ASSERT(tp->t_cpu != cp); 2262*7c478bd9Sstevel@tonic-gate thread_unlock(tp); 2263*7c478bd9Sstevel@tonic-gate 2264*7c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 2265*7c478bd9Sstevel@tonic-gate } 2266*7c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 2267*7c478bd9Sstevel@tonic-gate } 2268*7c478bd9Sstevel@tonic-gate 2269*7c478bd9Sstevel@tonic-gate /* 2270*7c478bd9Sstevel@tonic-gate * disp_lowpri_cpu - find CPU running the lowest priority thread. 2271*7c478bd9Sstevel@tonic-gate * The hint passed in is used as a starting point so we don't favor 2272*7c478bd9Sstevel@tonic-gate * CPU 0 or any other CPU. The caller should pass in the most recently 2273*7c478bd9Sstevel@tonic-gate * used CPU for the thread. 2274*7c478bd9Sstevel@tonic-gate * 2275*7c478bd9Sstevel@tonic-gate * The lgroup and priority are used to determine the best CPU to run on 2276*7c478bd9Sstevel@tonic-gate * in a NUMA machine. The lgroup specifies which CPUs are closest while 2277*7c478bd9Sstevel@tonic-gate * the thread priority will indicate whether the thread will actually run 2278*7c478bd9Sstevel@tonic-gate * there. To pick the best CPU, the CPUs inside and outside of the given 2279*7c478bd9Sstevel@tonic-gate * lgroup which are running the lowest priority threads are found. The 2280*7c478bd9Sstevel@tonic-gate * remote CPU is chosen only if the thread will not run locally on a CPU 2281*7c478bd9Sstevel@tonic-gate * within the lgroup, but will run on the remote CPU. If the thread 2282*7c478bd9Sstevel@tonic-gate * cannot immediately run on any CPU, the best local CPU will be chosen. 2283*7c478bd9Sstevel@tonic-gate * 2284*7c478bd9Sstevel@tonic-gate * The lpl specified also identifies the cpu partition from which 2285*7c478bd9Sstevel@tonic-gate * disp_lowpri_cpu should select a CPU. 2286*7c478bd9Sstevel@tonic-gate * 2287*7c478bd9Sstevel@tonic-gate * curcpu is used to indicate that disp_lowpri_cpu is being called on 2288*7c478bd9Sstevel@tonic-gate * behalf of the current thread. (curthread is looking for a new cpu) 2289*7c478bd9Sstevel@tonic-gate * In this case, cpu_dispatch_pri for this thread's cpu should be 2290*7c478bd9Sstevel@tonic-gate * ignored. 2291*7c478bd9Sstevel@tonic-gate * 2292*7c478bd9Sstevel@tonic-gate * If a cpu is the target of an offline request then try to avoid it. 2293*7c478bd9Sstevel@tonic-gate * 2294*7c478bd9Sstevel@tonic-gate * This function must be called at either high SPL, or with preemption 2295*7c478bd9Sstevel@tonic-gate * disabled, so that the "hint" CPU cannot be removed from the online 2296*7c478bd9Sstevel@tonic-gate * CPU list while we are traversing it. 2297*7c478bd9Sstevel@tonic-gate */ 2298*7c478bd9Sstevel@tonic-gate cpu_t * 2299*7c478bd9Sstevel@tonic-gate disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) 2300*7c478bd9Sstevel@tonic-gate { 2301*7c478bd9Sstevel@tonic-gate cpu_t *bestcpu; 2302*7c478bd9Sstevel@tonic-gate cpu_t *besthomecpu; 2303*7c478bd9Sstevel@tonic-gate cpu_t *cp, *cpstart; 2304*7c478bd9Sstevel@tonic-gate 2305*7c478bd9Sstevel@tonic-gate pri_t bestpri; 2306*7c478bd9Sstevel@tonic-gate pri_t cpupri; 2307*7c478bd9Sstevel@tonic-gate 2308*7c478bd9Sstevel@tonic-gate klgrpset_t done; 2309*7c478bd9Sstevel@tonic-gate klgrpset_t cur_set; 2310*7c478bd9Sstevel@tonic-gate 2311*7c478bd9Sstevel@tonic-gate lpl_t *lpl_iter, *lpl_leaf; 2312*7c478bd9Sstevel@tonic-gate int i; 2313*7c478bd9Sstevel@tonic-gate 2314*7c478bd9Sstevel@tonic-gate /* 2315*7c478bd9Sstevel@tonic-gate * Scan for a CPU currently running the lowest priority thread. 2316*7c478bd9Sstevel@tonic-gate * Cannot get cpu_lock here because it is adaptive. 2317*7c478bd9Sstevel@tonic-gate * We do not require lock on CPU list. 2318*7c478bd9Sstevel@tonic-gate */ 2319*7c478bd9Sstevel@tonic-gate ASSERT(hint != NULL); 2320*7c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL); 2321*7c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu > 0); 2322*7c478bd9Sstevel@tonic-gate 2323*7c478bd9Sstevel@tonic-gate /* 2324*7c478bd9Sstevel@tonic-gate * First examine local CPUs. Note that it's possible the hint CPU 2325*7c478bd9Sstevel@tonic-gate * passed in in remote to the specified home lgroup. If our priority 2326*7c478bd9Sstevel@tonic-gate * isn't sufficient enough such that we can run immediately at home, 2327*7c478bd9Sstevel@tonic-gate * then examine CPUs remote to our home lgroup. 2328*7c478bd9Sstevel@tonic-gate * We would like to give preference to CPUs closest to "home". 2329*7c478bd9Sstevel@tonic-gate * If we can't find a CPU where we'll run at a given level 2330*7c478bd9Sstevel@tonic-gate * of locality, we expand our search to include the next level. 2331*7c478bd9Sstevel@tonic-gate */ 2332*7c478bd9Sstevel@tonic-gate bestcpu = besthomecpu = NULL; 2333*7c478bd9Sstevel@tonic-gate klgrpset_clear(done); 2334*7c478bd9Sstevel@tonic-gate /* start with lpl we were passed */ 2335*7c478bd9Sstevel@tonic-gate 2336*7c478bd9Sstevel@tonic-gate lpl_iter = lpl; 2337*7c478bd9Sstevel@tonic-gate 2338*7c478bd9Sstevel@tonic-gate do { 2339*7c478bd9Sstevel@tonic-gate 2340*7c478bd9Sstevel@tonic-gate bestpri = SHRT_MAX; 2341*7c478bd9Sstevel@tonic-gate klgrpset_clear(cur_set); 2342*7c478bd9Sstevel@tonic-gate 2343*7c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_iter->lpl_nrset; i++) { 2344*7c478bd9Sstevel@tonic-gate lpl_leaf = lpl_iter->lpl_rset[i]; 2345*7c478bd9Sstevel@tonic-gate if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) 2346*7c478bd9Sstevel@tonic-gate continue; 2347*7c478bd9Sstevel@tonic-gate 2348*7c478bd9Sstevel@tonic-gate klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); 2349*7c478bd9Sstevel@tonic-gate 2350*7c478bd9Sstevel@tonic-gate if (hint->cpu_lpl == lpl_leaf) 2351*7c478bd9Sstevel@tonic-gate cp = cpstart = hint; 2352*7c478bd9Sstevel@tonic-gate else 2353*7c478bd9Sstevel@tonic-gate cp = cpstart = lpl_leaf->lpl_cpus; 2354*7c478bd9Sstevel@tonic-gate 2355*7c478bd9Sstevel@tonic-gate do { 2356*7c478bd9Sstevel@tonic-gate 2357*7c478bd9Sstevel@tonic-gate if (cp == curcpu) 2358*7c478bd9Sstevel@tonic-gate cpupri = -1; 2359*7c478bd9Sstevel@tonic-gate else if (cp == cpu_inmotion) 2360*7c478bd9Sstevel@tonic-gate cpupri = SHRT_MAX; 2361*7c478bd9Sstevel@tonic-gate else 2362*7c478bd9Sstevel@tonic-gate cpupri = cp->cpu_dispatch_pri; 2363*7c478bd9Sstevel@tonic-gate 2364*7c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_maxrunpri > cpupri) 2365*7c478bd9Sstevel@tonic-gate cpupri = cp->cpu_disp->disp_maxrunpri; 2366*7c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level > cpupri) 2367*7c478bd9Sstevel@tonic-gate cpupri = cp->cpu_chosen_level; 2368*7c478bd9Sstevel@tonic-gate if (cpupri < bestpri) { 2369*7c478bd9Sstevel@tonic-gate if (CPU_IDLING(cpupri)) { 2370*7c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & 2371*7c478bd9Sstevel@tonic-gate CPU_QUIESCED) == 0); 2372*7c478bd9Sstevel@tonic-gate return (cp); 2373*7c478bd9Sstevel@tonic-gate } 2374*7c478bd9Sstevel@tonic-gate bestcpu = cp; 2375*7c478bd9Sstevel@tonic-gate bestpri = cpupri; 2376*7c478bd9Sstevel@tonic-gate } 2377*7c478bd9Sstevel@tonic-gate } while ((cp = cp->cpu_next_lpl) != cpstart); 2378*7c478bd9Sstevel@tonic-gate } 2379*7c478bd9Sstevel@tonic-gate 2380*7c478bd9Sstevel@tonic-gate if (bestcpu && (tpri > bestpri)) { 2381*7c478bd9Sstevel@tonic-gate ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); 2382*7c478bd9Sstevel@tonic-gate return (bestcpu); 2383*7c478bd9Sstevel@tonic-gate } 2384*7c478bd9Sstevel@tonic-gate if (besthomecpu == NULL) 2385*7c478bd9Sstevel@tonic-gate besthomecpu = bestcpu; 2386*7c478bd9Sstevel@tonic-gate /* 2387*7c478bd9Sstevel@tonic-gate * Add the lgrps we just considered to the "done" set 2388*7c478bd9Sstevel@tonic-gate */ 2389*7c478bd9Sstevel@tonic-gate klgrpset_or(done, cur_set); 2390*7c478bd9Sstevel@tonic-gate 2391*7c478bd9Sstevel@tonic-gate } while ((lpl_iter = lpl_iter->lpl_parent) != NULL); 2392*7c478bd9Sstevel@tonic-gate 2393*7c478bd9Sstevel@tonic-gate /* 2394*7c478bd9Sstevel@tonic-gate * The specified priority isn't high enough to run immediately 2395*7c478bd9Sstevel@tonic-gate * anywhere, so just return the best CPU from the home lgroup. 2396*7c478bd9Sstevel@tonic-gate */ 2397*7c478bd9Sstevel@tonic-gate ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0); 2398*7c478bd9Sstevel@tonic-gate return (besthomecpu); 2399*7c478bd9Sstevel@tonic-gate } 2400*7c478bd9Sstevel@tonic-gate 2401*7c478bd9Sstevel@tonic-gate /* 2402*7c478bd9Sstevel@tonic-gate * This routine provides the generic idle cpu function for all processors. 2403*7c478bd9Sstevel@tonic-gate * If a processor has some specific code to execute when idle (say, to stop 2404*7c478bd9Sstevel@tonic-gate * the pipeline and save power) then that routine should be defined in the 2405*7c478bd9Sstevel@tonic-gate * processors specific code (module_xx.c) and the global variable idle_cpu 2406*7c478bd9Sstevel@tonic-gate * set to that function. 2407*7c478bd9Sstevel@tonic-gate */ 2408*7c478bd9Sstevel@tonic-gate static void 2409*7c478bd9Sstevel@tonic-gate generic_idle_cpu(void) 2410*7c478bd9Sstevel@tonic-gate { 2411*7c478bd9Sstevel@tonic-gate } 2412*7c478bd9Sstevel@tonic-gate 2413*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 2414*7c478bd9Sstevel@tonic-gate static void 2415*7c478bd9Sstevel@tonic-gate generic_enq_thread(cpu_t *cpu, int bound) 2416*7c478bd9Sstevel@tonic-gate { 2417*7c478bd9Sstevel@tonic-gate } 2418*7c478bd9Sstevel@tonic-gate 2419*7c478bd9Sstevel@tonic-gate /* 2420*7c478bd9Sstevel@tonic-gate * Select a CPU for this thread to run on. Choose t->t_cpu unless: 2421*7c478bd9Sstevel@tonic-gate * - t->t_cpu is not in this thread's assigned lgrp 2422*7c478bd9Sstevel@tonic-gate * - the time since the thread last came off t->t_cpu exceeds the 2423*7c478bd9Sstevel@tonic-gate * rechoose time for this cpu (ignore this if t is curthread in 2424*7c478bd9Sstevel@tonic-gate * which case it's on CPU and t->t_disp_time is inaccurate) 2425*7c478bd9Sstevel@tonic-gate * - t->t_cpu is presently the target of an offline or partition move 2426*7c478bd9Sstevel@tonic-gate * request 2427*7c478bd9Sstevel@tonic-gate */ 2428*7c478bd9Sstevel@tonic-gate static cpu_t * 2429*7c478bd9Sstevel@tonic-gate cpu_choose(kthread_t *t, pri_t tpri) 2430*7c478bd9Sstevel@tonic-gate { 2431*7c478bd9Sstevel@tonic-gate ASSERT(tpri < kpqpri); 2432*7c478bd9Sstevel@tonic-gate 2433*7c478bd9Sstevel@tonic-gate if ((((lbolt - t->t_disp_time) > t->t_cpu->cpu_rechoose) && 2434*7c478bd9Sstevel@tonic-gate t != curthread) || t->t_cpu == cpu_inmotion) { 2435*7c478bd9Sstevel@tonic-gate return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, NULL)); 2436*7c478bd9Sstevel@tonic-gate } 2437*7c478bd9Sstevel@tonic-gate 2438*7c478bd9Sstevel@tonic-gate /* 2439*7c478bd9Sstevel@tonic-gate * Take a trip through disp_lowpri_cpu() if the thread was 2440*7c478bd9Sstevel@tonic-gate * running outside it's home lgroup 2441*7c478bd9Sstevel@tonic-gate */ 2442*7c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(t->t_lpl->lpl_lgrp->lgrp_set[LGRP_RSRC_CPU], 2443*7c478bd9Sstevel@tonic-gate t->t_cpu->cpu_lpl->lpl_lgrpid)) { 2444*7c478bd9Sstevel@tonic-gate return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, 2445*7c478bd9Sstevel@tonic-gate (t == curthread) ? t->t_cpu : NULL)); 2446*7c478bd9Sstevel@tonic-gate } 2447*7c478bd9Sstevel@tonic-gate return (t->t_cpu); 2448*7c478bd9Sstevel@tonic-gate } 2449