17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5ab761399Sesaxe * Common Development and Distribution License (the "License").
6ab761399Sesaxe * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
277c478bd9Sstevel@tonic-gate /* All Rights Reserved */
287c478bd9Sstevel@tonic-gate
297c478bd9Sstevel@tonic-gate
307c478bd9Sstevel@tonic-gate #include <sys/types.h>
317c478bd9Sstevel@tonic-gate #include <sys/param.h>
327c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
337c478bd9Sstevel@tonic-gate #include <sys/signal.h>
347c478bd9Sstevel@tonic-gate #include <sys/user.h>
357c478bd9Sstevel@tonic-gate #include <sys/systm.h>
367c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h>
377c478bd9Sstevel@tonic-gate #include <sys/var.h>
387c478bd9Sstevel@tonic-gate #include <sys/errno.h>
397c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
407c478bd9Sstevel@tonic-gate #include <sys/debug.h>
417c478bd9Sstevel@tonic-gate #include <sys/inline.h>
427c478bd9Sstevel@tonic-gate #include <sys/disp.h>
437c478bd9Sstevel@tonic-gate #include <sys/class.h>
447c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
457c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
467c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
477c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
487c478bd9Sstevel@tonic-gate #include <sys/tnf.h>
497c478bd9Sstevel@tonic-gate #include <sys/cpupart.h>
507c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
51fb2f18f8Sesaxe #include <sys/pg.h>
52fb2f18f8Sesaxe #include <sys/cmt.h>
53fb2f18f8Sesaxe #include <sys/bitset.h>
547c478bd9Sstevel@tonic-gate #include <sys/schedctl.h>
557c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
567c478bd9Sstevel@tonic-gate #include <sys/dtrace.h>
577c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
58057452c6Sjj209869 #include <sys/archsystm.h>
597c478bd9Sstevel@tonic-gate
607c478bd9Sstevel@tonic-gate #include <vm/as.h>
617c478bd9Sstevel@tonic-gate
627c478bd9Sstevel@tonic-gate #define BOUND_CPU 0x1
637c478bd9Sstevel@tonic-gate #define BOUND_PARTITION 0x2
647c478bd9Sstevel@tonic-gate #define BOUND_INTR 0x4
657c478bd9Sstevel@tonic-gate
667c478bd9Sstevel@tonic-gate /* Dispatch queue allocation structure and functions */
677c478bd9Sstevel@tonic-gate struct disp_queue_info {
687c478bd9Sstevel@tonic-gate disp_t *dp;
697c478bd9Sstevel@tonic-gate dispq_t *olddispq;
707c478bd9Sstevel@tonic-gate dispq_t *newdispq;
717c478bd9Sstevel@tonic-gate ulong_t *olddqactmap;
727c478bd9Sstevel@tonic-gate ulong_t *newdqactmap;
737c478bd9Sstevel@tonic-gate int oldnglobpris;
747c478bd9Sstevel@tonic-gate };
757c478bd9Sstevel@tonic-gate static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris,
767c478bd9Sstevel@tonic-gate disp_t *dp);
777c478bd9Sstevel@tonic-gate static void disp_dq_assign(struct disp_queue_info *dptr, int numpris);
787c478bd9Sstevel@tonic-gate static void disp_dq_free(struct disp_queue_info *dptr);
797c478bd9Sstevel@tonic-gate
807c478bd9Sstevel@tonic-gate /* platform-specific routine to call when processor is idle */
817c478bd9Sstevel@tonic-gate static void generic_idle_cpu();
827c478bd9Sstevel@tonic-gate void (*idle_cpu)() = generic_idle_cpu;
837c478bd9Sstevel@tonic-gate
847c478bd9Sstevel@tonic-gate /* routines invoked when a CPU enters/exits the idle loop */
857c478bd9Sstevel@tonic-gate static void idle_enter();
867c478bd9Sstevel@tonic-gate static void idle_exit();
877c478bd9Sstevel@tonic-gate
887c478bd9Sstevel@tonic-gate /* platform-specific routine to call when thread is enqueued */
897c478bd9Sstevel@tonic-gate static void generic_enq_thread(cpu_t *, int);
907c478bd9Sstevel@tonic-gate void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread;
917c478bd9Sstevel@tonic-gate
927c478bd9Sstevel@tonic-gate pri_t kpreemptpri; /* priority where kernel preemption applies */
937c478bd9Sstevel@tonic-gate pri_t upreemptpri = 0; /* priority where normal preemption applies */
947c478bd9Sstevel@tonic-gate pri_t intr_pri; /* interrupt thread priority base level */
957c478bd9Sstevel@tonic-gate
96685679f7Sakolb #define KPQPRI -1 /* pri where cpu affinity is dropped for kpq */
977c478bd9Sstevel@tonic-gate pri_t kpqpri = KPQPRI; /* can be set in /etc/system */
987c478bd9Sstevel@tonic-gate disp_t cpu0_disp; /* boot CPU's dispatch queue */
997c478bd9Sstevel@tonic-gate disp_lock_t swapped_lock; /* lock swapped threads and swap queue */
1007c478bd9Sstevel@tonic-gate int nswapped; /* total number of swapped threads */
1017c478bd9Sstevel@tonic-gate void disp_swapped_enq(kthread_t *tp);
1027c478bd9Sstevel@tonic-gate static void disp_swapped_setrun(kthread_t *tp);
1037c478bd9Sstevel@tonic-gate static void cpu_resched(cpu_t *cp, pri_t tpri);
1047c478bd9Sstevel@tonic-gate
1057c478bd9Sstevel@tonic-gate /*
1067c478bd9Sstevel@tonic-gate * If this is set, only interrupt threads will cause kernel preemptions.
1077c478bd9Sstevel@tonic-gate * This is done by changing the value of kpreemptpri. kpreemptpri
108*9a718033SBryan Cantrill * will either be the max sysclass pri or the min interrupt pri.
1097c478bd9Sstevel@tonic-gate */
1107c478bd9Sstevel@tonic-gate int only_intr_kpreempt;
1117c478bd9Sstevel@tonic-gate
1127c478bd9Sstevel@tonic-gate extern void set_idle_cpu(int cpun);
1137c478bd9Sstevel@tonic-gate extern void unset_idle_cpu(int cpun);
1147c478bd9Sstevel@tonic-gate static void setkpdq(kthread_t *tp, int borf);
1157c478bd9Sstevel@tonic-gate #define SETKP_BACK 0
1167c478bd9Sstevel@tonic-gate #define SETKP_FRONT 1
1177c478bd9Sstevel@tonic-gate /*
1187c478bd9Sstevel@tonic-gate * Parameter that determines how recently a thread must have run
1197c478bd9Sstevel@tonic-gate * on the CPU to be considered loosely-bound to that CPU to reduce
1207c478bd9Sstevel@tonic-gate * cold cache effects. The interval is in hertz.
1217c478bd9Sstevel@tonic-gate */
122fb2f18f8Sesaxe #define RECHOOSE_INTERVAL 3
1237c478bd9Sstevel@tonic-gate int rechoose_interval = RECHOOSE_INTERVAL;
1247c478bd9Sstevel@tonic-gate
125685679f7Sakolb /*
126685679f7Sakolb * Parameter that determines how long (in nanoseconds) a thread must
127685679f7Sakolb * be sitting on a run queue before it can be stolen by another CPU
128685679f7Sakolb * to reduce migrations. The interval is in nanoseconds.
129685679f7Sakolb *
13081588590Sbholler * The nosteal_nsec should be set by platform code cmp_set_nosteal_interval()
13181588590Sbholler * to an appropriate value. nosteal_nsec is set to NOSTEAL_UNINITIALIZED
13281588590Sbholler * here indicating it is uninitiallized.
13381588590Sbholler * Setting nosteal_nsec to 0 effectively disables the nosteal 'protection'.
13481588590Sbholler *
135685679f7Sakolb */
13681588590Sbholler #define NOSTEAL_UNINITIALIZED (-1)
13781588590Sbholler hrtime_t nosteal_nsec = NOSTEAL_UNINITIALIZED;
13881588590Sbholler extern void cmp_set_nosteal_interval(void);
139685679f7Sakolb
1407c478bd9Sstevel@tonic-gate id_t defaultcid; /* system "default" class; see dispadmin(1M) */
1417c478bd9Sstevel@tonic-gate
1427c478bd9Sstevel@tonic-gate disp_lock_t transition_lock; /* lock on transitioning threads */
1437c478bd9Sstevel@tonic-gate disp_lock_t stop_lock; /* lock on stopped threads */
1447c478bd9Sstevel@tonic-gate
1457c478bd9Sstevel@tonic-gate static void cpu_dispqalloc(int numpris);
1467c478bd9Sstevel@tonic-gate
147685679f7Sakolb /*
148685679f7Sakolb * This gets returned by disp_getwork/disp_getbest if we couldn't steal
149685679f7Sakolb * a thread because it was sitting on its run queue for a very short
150685679f7Sakolb * period of time.
151685679f7Sakolb */
152685679f7Sakolb #define T_DONTSTEAL (kthread_t *)(-1) /* returned by disp_getwork/getbest */
153685679f7Sakolb
1547c478bd9Sstevel@tonic-gate static kthread_t *disp_getwork(cpu_t *to);
1557c478bd9Sstevel@tonic-gate static kthread_t *disp_getbest(disp_t *from);
1567c478bd9Sstevel@tonic-gate static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq);
1577c478bd9Sstevel@tonic-gate
1587c478bd9Sstevel@tonic-gate void swtch_to(kthread_t *);
1597c478bd9Sstevel@tonic-gate
1607c478bd9Sstevel@tonic-gate /*
1617c478bd9Sstevel@tonic-gate * dispatcher and scheduler initialization
1627c478bd9Sstevel@tonic-gate */
1637c478bd9Sstevel@tonic-gate
1647c478bd9Sstevel@tonic-gate /*
1657c478bd9Sstevel@tonic-gate * disp_setup - Common code to calculate and allocate dispatcher
1667c478bd9Sstevel@tonic-gate * variables and structures based on the maximum priority.
1677c478bd9Sstevel@tonic-gate */
1687c478bd9Sstevel@tonic-gate static void
disp_setup(pri_t maxglobpri,pri_t oldnglobpris)1697c478bd9Sstevel@tonic-gate disp_setup(pri_t maxglobpri, pri_t oldnglobpris)
1707c478bd9Sstevel@tonic-gate {
1717c478bd9Sstevel@tonic-gate pri_t newnglobpris;
1727c478bd9Sstevel@tonic-gate
1737c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
1747c478bd9Sstevel@tonic-gate
1757c478bd9Sstevel@tonic-gate newnglobpris = maxglobpri + 1 + LOCK_LEVEL;
1767c478bd9Sstevel@tonic-gate
1777c478bd9Sstevel@tonic-gate if (newnglobpris > oldnglobpris) {
1787c478bd9Sstevel@tonic-gate /*
1797c478bd9Sstevel@tonic-gate * Allocate new kp queues for each CPU partition.
1807c478bd9Sstevel@tonic-gate */
1817c478bd9Sstevel@tonic-gate cpupart_kpqalloc(newnglobpris);
1827c478bd9Sstevel@tonic-gate
1837c478bd9Sstevel@tonic-gate /*
1847c478bd9Sstevel@tonic-gate * Allocate new dispatch queues for each CPU.
1857c478bd9Sstevel@tonic-gate */
1867c478bd9Sstevel@tonic-gate cpu_dispqalloc(newnglobpris);
1877c478bd9Sstevel@tonic-gate
1887c478bd9Sstevel@tonic-gate /*
1897c478bd9Sstevel@tonic-gate * compute new interrupt thread base priority
1907c478bd9Sstevel@tonic-gate */
1917c478bd9Sstevel@tonic-gate intr_pri = maxglobpri;
1927c478bd9Sstevel@tonic-gate if (only_intr_kpreempt) {
1937c478bd9Sstevel@tonic-gate kpreemptpri = intr_pri + 1;
1947c478bd9Sstevel@tonic-gate if (kpqpri == KPQPRI)
1957c478bd9Sstevel@tonic-gate kpqpri = kpreemptpri;
1967c478bd9Sstevel@tonic-gate }
1977c478bd9Sstevel@tonic-gate v.v_nglobpris = newnglobpris;
1987c478bd9Sstevel@tonic-gate }
1997c478bd9Sstevel@tonic-gate }
2007c478bd9Sstevel@tonic-gate
2017c478bd9Sstevel@tonic-gate /*
2027c478bd9Sstevel@tonic-gate * dispinit - Called to initialize all loaded classes and the
2037c478bd9Sstevel@tonic-gate * dispatcher framework.
2047c478bd9Sstevel@tonic-gate */
2057c478bd9Sstevel@tonic-gate void
dispinit(void)2067c478bd9Sstevel@tonic-gate dispinit(void)
2077c478bd9Sstevel@tonic-gate {
2087c478bd9Sstevel@tonic-gate id_t cid;
2097c478bd9Sstevel@tonic-gate pri_t maxglobpri;
2107c478bd9Sstevel@tonic-gate pri_t cl_maxglobpri;
2117c478bd9Sstevel@tonic-gate
2127c478bd9Sstevel@tonic-gate maxglobpri = -1;
2137c478bd9Sstevel@tonic-gate
2147c478bd9Sstevel@tonic-gate /*
2157c478bd9Sstevel@tonic-gate * Initialize transition lock, which will always be set.
2167c478bd9Sstevel@tonic-gate */
2177c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&transition_lock);
2187c478bd9Sstevel@tonic-gate disp_lock_enter_high(&transition_lock);
2197c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&stop_lock);
2207c478bd9Sstevel@tonic-gate
2217c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock);
2227c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_maxrunpri = -1;
2237c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_max_unbound_pri = -1;
224fb2f18f8Sesaxe
2257c478bd9Sstevel@tonic-gate /*
2267c478bd9Sstevel@tonic-gate * Initialize the default CPU partition.
2277c478bd9Sstevel@tonic-gate */
2287c478bd9Sstevel@tonic-gate cpupart_initialize_default();
2297c478bd9Sstevel@tonic-gate /*
2307c478bd9Sstevel@tonic-gate * Call the class specific initialization functions for
2317c478bd9Sstevel@tonic-gate * all pre-installed schedulers.
2327c478bd9Sstevel@tonic-gate *
2337c478bd9Sstevel@tonic-gate * We pass the size of a class specific parameter
2347c478bd9Sstevel@tonic-gate * buffer to each of the initialization functions
2357c478bd9Sstevel@tonic-gate * to try to catch problems with backward compatibility
2367c478bd9Sstevel@tonic-gate * of class modules.
2377c478bd9Sstevel@tonic-gate *
2387c478bd9Sstevel@tonic-gate * For example a new class module running on an old system
2397c478bd9Sstevel@tonic-gate * which didn't provide sufficiently large parameter buffers
2407c478bd9Sstevel@tonic-gate * would be bad news. Class initialization modules can check for
2417c478bd9Sstevel@tonic-gate * this and take action if they detect a problem.
2427c478bd9Sstevel@tonic-gate */
2437c478bd9Sstevel@tonic-gate
2447c478bd9Sstevel@tonic-gate for (cid = 0; cid < nclass; cid++) {
2457c478bd9Sstevel@tonic-gate sclass_t *sc;
2467c478bd9Sstevel@tonic-gate
2477c478bd9Sstevel@tonic-gate sc = &sclass[cid];
2487c478bd9Sstevel@tonic-gate if (SCHED_INSTALLED(sc)) {
2497c478bd9Sstevel@tonic-gate cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ,
2507c478bd9Sstevel@tonic-gate &sc->cl_funcs);
2517c478bd9Sstevel@tonic-gate if (cl_maxglobpri > maxglobpri)
2527c478bd9Sstevel@tonic-gate maxglobpri = cl_maxglobpri;
2537c478bd9Sstevel@tonic-gate }
2547c478bd9Sstevel@tonic-gate }
255*9a718033SBryan Cantrill
256*9a718033SBryan Cantrill /*
257*9a718033SBryan Cantrill * Historically, kpreemptpri was set to v_maxsyspri + 1 -- which is
258*9a718033SBryan Cantrill * to say, maxclsyspri + 1. However, over time, the system has used
259*9a718033SBryan Cantrill * more and more asynchronous kernel threads, with an increasing number
260*9a718033SBryan Cantrill * of these doing work on direct behalf of higher-level software (e.g.,
261*9a718033SBryan Cantrill * network processing). This has led to potential priority inversions:
262*9a718033SBryan Cantrill * threads doing low-priority lengthy kernel work can effectively
263*9a718033SBryan Cantrill * delay kernel-level processing of higher-priority data. To minimize
264*9a718033SBryan Cantrill * such inversions, we set kpreemptpri to be v_maxsyspri; anything in
265*9a718033SBryan Cantrill * the kernel that runs at maxclsyspri will therefore induce kernel
266*9a718033SBryan Cantrill * preemption, and this priority should be used if/when an asynchronous
267*9a718033SBryan Cantrill * thread (or, as is often the case, task queue) is performing a task
268*9a718033SBryan Cantrill * on behalf of higher-level software (or any task that is otherwise
269*9a718033SBryan Cantrill * latency-sensitve).
270*9a718033SBryan Cantrill */
271*9a718033SBryan Cantrill kpreemptpri = (pri_t)v.v_maxsyspri;
2727c478bd9Sstevel@tonic-gate if (kpqpri == KPQPRI)
2737c478bd9Sstevel@tonic-gate kpqpri = kpreemptpri;
2747c478bd9Sstevel@tonic-gate
2757c478bd9Sstevel@tonic-gate ASSERT(maxglobpri >= 0);
2767c478bd9Sstevel@tonic-gate disp_setup(maxglobpri, 0);
2777c478bd9Sstevel@tonic-gate
2787c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
2797c478bd9Sstevel@tonic-gate
2807c478bd9Sstevel@tonic-gate /*
28181588590Sbholler * Platform specific sticky scheduler setup.
28281588590Sbholler */
28381588590Sbholler if (nosteal_nsec == NOSTEAL_UNINITIALIZED)
28481588590Sbholler cmp_set_nosteal_interval();
28581588590Sbholler
28681588590Sbholler /*
2877c478bd9Sstevel@tonic-gate * Get the default class ID; this may be later modified via
2887c478bd9Sstevel@tonic-gate * dispadmin(1M). This will load the class (normally TS) and that will
2897c478bd9Sstevel@tonic-gate * call disp_add(), which is why we had to drop cpu_lock first.
2907c478bd9Sstevel@tonic-gate */
2917c478bd9Sstevel@tonic-gate if (getcid(defaultclass, &defaultcid) != 0) {
2927c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'",
2937c478bd9Sstevel@tonic-gate defaultclass);
2947c478bd9Sstevel@tonic-gate }
2957c478bd9Sstevel@tonic-gate }
2967c478bd9Sstevel@tonic-gate
2977c478bd9Sstevel@tonic-gate /*
2987c478bd9Sstevel@tonic-gate * disp_add - Called with class pointer to initialize the dispatcher
2997c478bd9Sstevel@tonic-gate * for a newly loaded class.
3007c478bd9Sstevel@tonic-gate */
3017c478bd9Sstevel@tonic-gate void
disp_add(sclass_t * clp)3027c478bd9Sstevel@tonic-gate disp_add(sclass_t *clp)
3037c478bd9Sstevel@tonic-gate {
3047c478bd9Sstevel@tonic-gate pri_t maxglobpri;
3057c478bd9Sstevel@tonic-gate pri_t cl_maxglobpri;
3067c478bd9Sstevel@tonic-gate
3077c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock);
3087c478bd9Sstevel@tonic-gate /*
3097c478bd9Sstevel@tonic-gate * Initialize the scheduler class.
3107c478bd9Sstevel@tonic-gate */
3117c478bd9Sstevel@tonic-gate maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1);
3127c478bd9Sstevel@tonic-gate cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs);
3137c478bd9Sstevel@tonic-gate if (cl_maxglobpri > maxglobpri)
3147c478bd9Sstevel@tonic-gate maxglobpri = cl_maxglobpri;
3157c478bd9Sstevel@tonic-gate
3167c478bd9Sstevel@tonic-gate /*
3177c478bd9Sstevel@tonic-gate * Save old queue information. Since we're initializing a
3187c478bd9Sstevel@tonic-gate * new scheduling class which has just been loaded, then
3197c478bd9Sstevel@tonic-gate * the size of the dispq may have changed. We need to handle
3207c478bd9Sstevel@tonic-gate * that here.
3217c478bd9Sstevel@tonic-gate */
3227c478bd9Sstevel@tonic-gate disp_setup(maxglobpri, v.v_nglobpris);
3237c478bd9Sstevel@tonic-gate
3247c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
3257c478bd9Sstevel@tonic-gate }
3267c478bd9Sstevel@tonic-gate
3277c478bd9Sstevel@tonic-gate
3287c478bd9Sstevel@tonic-gate /*
3297c478bd9Sstevel@tonic-gate * For each CPU, allocate new dispatch queues
3307c478bd9Sstevel@tonic-gate * with the stated number of priorities.
3317c478bd9Sstevel@tonic-gate */
3327c478bd9Sstevel@tonic-gate static void
cpu_dispqalloc(int numpris)3337c478bd9Sstevel@tonic-gate cpu_dispqalloc(int numpris)
3347c478bd9Sstevel@tonic-gate {
3357c478bd9Sstevel@tonic-gate cpu_t *cpup;
3367c478bd9Sstevel@tonic-gate struct disp_queue_info *disp_mem;
3377c478bd9Sstevel@tonic-gate int i, num;
3387c478bd9Sstevel@tonic-gate
3397c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
3407c478bd9Sstevel@tonic-gate
3417c478bd9Sstevel@tonic-gate disp_mem = kmem_zalloc(NCPU *
3427c478bd9Sstevel@tonic-gate sizeof (struct disp_queue_info), KM_SLEEP);
3437c478bd9Sstevel@tonic-gate
3447c478bd9Sstevel@tonic-gate /*
3457c478bd9Sstevel@tonic-gate * This routine must allocate all of the memory before stopping
3467c478bd9Sstevel@tonic-gate * the cpus because it must not sleep in kmem_alloc while the
3477c478bd9Sstevel@tonic-gate * CPUs are stopped. Locks they hold will not be freed until they
3487c478bd9Sstevel@tonic-gate * are restarted.
3497c478bd9Sstevel@tonic-gate */
3507c478bd9Sstevel@tonic-gate i = 0;
3517c478bd9Sstevel@tonic-gate cpup = cpu_list;
3527c478bd9Sstevel@tonic-gate do {
3537c478bd9Sstevel@tonic-gate disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp);
3547c478bd9Sstevel@tonic-gate i++;
3557c478bd9Sstevel@tonic-gate cpup = cpup->cpu_next;
3567c478bd9Sstevel@tonic-gate } while (cpup != cpu_list);
3577c478bd9Sstevel@tonic-gate num = i;
3587c478bd9Sstevel@tonic-gate
3590ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL);
3607c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++)
3617c478bd9Sstevel@tonic-gate disp_dq_assign(&disp_mem[i], numpris);
3627c478bd9Sstevel@tonic-gate start_cpus();
3637c478bd9Sstevel@tonic-gate
3647c478bd9Sstevel@tonic-gate /*
3657c478bd9Sstevel@tonic-gate * I must free all of the memory after starting the cpus because
3667c478bd9Sstevel@tonic-gate * I can not risk sleeping in kmem_free while the cpus are stopped.
3677c478bd9Sstevel@tonic-gate */
3687c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++)
3697c478bd9Sstevel@tonic-gate disp_dq_free(&disp_mem[i]);
3707c478bd9Sstevel@tonic-gate
3717c478bd9Sstevel@tonic-gate kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info));
3727c478bd9Sstevel@tonic-gate }
3737c478bd9Sstevel@tonic-gate
3747c478bd9Sstevel@tonic-gate static void
disp_dq_alloc(struct disp_queue_info * dptr,int numpris,disp_t * dp)3757c478bd9Sstevel@tonic-gate disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp)
3767c478bd9Sstevel@tonic-gate {
3777c478bd9Sstevel@tonic-gate dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP);
3787c478bd9Sstevel@tonic-gate dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) *
3797c478bd9Sstevel@tonic-gate sizeof (long), KM_SLEEP);
3807c478bd9Sstevel@tonic-gate dptr->dp = dp;
3817c478bd9Sstevel@tonic-gate }
3827c478bd9Sstevel@tonic-gate
3837c478bd9Sstevel@tonic-gate static void
disp_dq_assign(struct disp_queue_info * dptr,int numpris)3847c478bd9Sstevel@tonic-gate disp_dq_assign(struct disp_queue_info *dptr, int numpris)
3857c478bd9Sstevel@tonic-gate {
3867c478bd9Sstevel@tonic-gate disp_t *dp;
3877c478bd9Sstevel@tonic-gate
3887c478bd9Sstevel@tonic-gate dp = dptr->dp;
3897c478bd9Sstevel@tonic-gate dptr->olddispq = dp->disp_q;
3907c478bd9Sstevel@tonic-gate dptr->olddqactmap = dp->disp_qactmap;
3917c478bd9Sstevel@tonic-gate dptr->oldnglobpris = dp->disp_npri;
3927c478bd9Sstevel@tonic-gate
3937c478bd9Sstevel@tonic-gate ASSERT(dptr->oldnglobpris < numpris);
3947c478bd9Sstevel@tonic-gate
3957c478bd9Sstevel@tonic-gate if (dptr->olddispq != NULL) {
3967c478bd9Sstevel@tonic-gate /*
3977c478bd9Sstevel@tonic-gate * Use kcopy because bcopy is platform-specific
3987c478bd9Sstevel@tonic-gate * and could block while we might have paused the cpus.
3997c478bd9Sstevel@tonic-gate */
4007c478bd9Sstevel@tonic-gate (void) kcopy(dptr->olddispq, dptr->newdispq,
4017c478bd9Sstevel@tonic-gate dptr->oldnglobpris * sizeof (dispq_t));
4027c478bd9Sstevel@tonic-gate (void) kcopy(dptr->olddqactmap, dptr->newdqactmap,
4037c478bd9Sstevel@tonic-gate ((dptr->oldnglobpris / BT_NBIPUL) + 1) *
4047c478bd9Sstevel@tonic-gate sizeof (long));
4057c478bd9Sstevel@tonic-gate }
4067c478bd9Sstevel@tonic-gate dp->disp_q = dptr->newdispq;
4077c478bd9Sstevel@tonic-gate dp->disp_qactmap = dptr->newdqactmap;
4087c478bd9Sstevel@tonic-gate dp->disp_q_limit = &dptr->newdispq[numpris];
4097c478bd9Sstevel@tonic-gate dp->disp_npri = numpris;
4107c478bd9Sstevel@tonic-gate }
4117c478bd9Sstevel@tonic-gate
4127c478bd9Sstevel@tonic-gate static void
disp_dq_free(struct disp_queue_info * dptr)4137c478bd9Sstevel@tonic-gate disp_dq_free(struct disp_queue_info *dptr)
4147c478bd9Sstevel@tonic-gate {
4157c478bd9Sstevel@tonic-gate if (dptr->olddispq != NULL)
4167c478bd9Sstevel@tonic-gate kmem_free(dptr->olddispq,
4177c478bd9Sstevel@tonic-gate dptr->oldnglobpris * sizeof (dispq_t));
4187c478bd9Sstevel@tonic-gate if (dptr->olddqactmap != NULL)
4197c478bd9Sstevel@tonic-gate kmem_free(dptr->olddqactmap,
4207c478bd9Sstevel@tonic-gate ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long));
4217c478bd9Sstevel@tonic-gate }
4227c478bd9Sstevel@tonic-gate
4237c478bd9Sstevel@tonic-gate /*
4247c478bd9Sstevel@tonic-gate * For a newly created CPU, initialize the dispatch queue.
4257c478bd9Sstevel@tonic-gate * This is called before the CPU is known through cpu[] or on any lists.
4267c478bd9Sstevel@tonic-gate */
4277c478bd9Sstevel@tonic-gate void
disp_cpu_init(cpu_t * cp)4287c478bd9Sstevel@tonic-gate disp_cpu_init(cpu_t *cp)
4297c478bd9Sstevel@tonic-gate {
4307c478bd9Sstevel@tonic-gate disp_t *dp;
4317c478bd9Sstevel@tonic-gate dispq_t *newdispq;
4327c478bd9Sstevel@tonic-gate ulong_t *newdqactmap;
4337c478bd9Sstevel@tonic-gate
4347c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */
4357c478bd9Sstevel@tonic-gate
4367c478bd9Sstevel@tonic-gate if (cp == cpu0_disp.disp_cpu)
4377c478bd9Sstevel@tonic-gate dp = &cpu0_disp;
4387c478bd9Sstevel@tonic-gate else
4397c478bd9Sstevel@tonic-gate dp = kmem_alloc(sizeof (disp_t), KM_SLEEP);
4407c478bd9Sstevel@tonic-gate bzero(dp, sizeof (disp_t));
4417c478bd9Sstevel@tonic-gate cp->cpu_disp = dp;
4427c478bd9Sstevel@tonic-gate dp->disp_cpu = cp;
4437c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1;
4447c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1;
4457c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&cp->cpu_thread_lock);
4467c478bd9Sstevel@tonic-gate /*
4477c478bd9Sstevel@tonic-gate * Allocate memory for the dispatcher queue headers
4487c478bd9Sstevel@tonic-gate * and the active queue bitmap.
4497c478bd9Sstevel@tonic-gate */
4507c478bd9Sstevel@tonic-gate newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP);
4517c478bd9Sstevel@tonic-gate newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) *
4527c478bd9Sstevel@tonic-gate sizeof (long), KM_SLEEP);
4537c478bd9Sstevel@tonic-gate dp->disp_q = newdispq;
4547c478bd9Sstevel@tonic-gate dp->disp_qactmap = newdqactmap;
4557c478bd9Sstevel@tonic-gate dp->disp_q_limit = &newdispq[v.v_nglobpris];
4567c478bd9Sstevel@tonic-gate dp->disp_npri = v.v_nglobpris;
4577c478bd9Sstevel@tonic-gate }
4587c478bd9Sstevel@tonic-gate
4597c478bd9Sstevel@tonic-gate void
disp_cpu_fini(cpu_t * cp)4607c478bd9Sstevel@tonic-gate disp_cpu_fini(cpu_t *cp)
4617c478bd9Sstevel@tonic-gate {
4627c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
4637c478bd9Sstevel@tonic-gate
4647c478bd9Sstevel@tonic-gate disp_kp_free(cp->cpu_disp);
4657c478bd9Sstevel@tonic-gate if (cp->cpu_disp != &cpu0_disp)
4667c478bd9Sstevel@tonic-gate kmem_free(cp->cpu_disp, sizeof (disp_t));
4677c478bd9Sstevel@tonic-gate }
4687c478bd9Sstevel@tonic-gate
4697c478bd9Sstevel@tonic-gate /*
4707c478bd9Sstevel@tonic-gate * Allocate new, larger kpreempt dispatch queue to replace the old one.
4717c478bd9Sstevel@tonic-gate */
4727c478bd9Sstevel@tonic-gate void
disp_kp_alloc(disp_t * dq,pri_t npri)4737c478bd9Sstevel@tonic-gate disp_kp_alloc(disp_t *dq, pri_t npri)
4747c478bd9Sstevel@tonic-gate {
4757c478bd9Sstevel@tonic-gate struct disp_queue_info mem_info;
4767c478bd9Sstevel@tonic-gate
4777c478bd9Sstevel@tonic-gate if (npri > dq->disp_npri) {
4787c478bd9Sstevel@tonic-gate /*
4797c478bd9Sstevel@tonic-gate * Allocate memory for the new array.
4807c478bd9Sstevel@tonic-gate */
4817c478bd9Sstevel@tonic-gate disp_dq_alloc(&mem_info, npri, dq);
4827c478bd9Sstevel@tonic-gate
4837c478bd9Sstevel@tonic-gate /*
4847c478bd9Sstevel@tonic-gate * We need to copy the old structures to the new
4857c478bd9Sstevel@tonic-gate * and free the old.
4867c478bd9Sstevel@tonic-gate */
4877c478bd9Sstevel@tonic-gate disp_dq_assign(&mem_info, npri);
4887c478bd9Sstevel@tonic-gate disp_dq_free(&mem_info);
4897c478bd9Sstevel@tonic-gate }
4907c478bd9Sstevel@tonic-gate }
4917c478bd9Sstevel@tonic-gate
4927c478bd9Sstevel@tonic-gate /*
4937c478bd9Sstevel@tonic-gate * Free dispatch queue.
4947c478bd9Sstevel@tonic-gate * Used for the kpreempt queues for a removed CPU partition and
4957c478bd9Sstevel@tonic-gate * for the per-CPU queues of deleted CPUs.
4967c478bd9Sstevel@tonic-gate */
4977c478bd9Sstevel@tonic-gate void
disp_kp_free(disp_t * dq)4987c478bd9Sstevel@tonic-gate disp_kp_free(disp_t *dq)
4997c478bd9Sstevel@tonic-gate {
5007c478bd9Sstevel@tonic-gate struct disp_queue_info mem_info;
5017c478bd9Sstevel@tonic-gate
5027c478bd9Sstevel@tonic-gate mem_info.olddispq = dq->disp_q;
5037c478bd9Sstevel@tonic-gate mem_info.olddqactmap = dq->disp_qactmap;
5047c478bd9Sstevel@tonic-gate mem_info.oldnglobpris = dq->disp_npri;
5057c478bd9Sstevel@tonic-gate disp_dq_free(&mem_info);
5067c478bd9Sstevel@tonic-gate }
5077c478bd9Sstevel@tonic-gate
5087c478bd9Sstevel@tonic-gate /*
5097c478bd9Sstevel@tonic-gate * End dispatcher and scheduler initialization.
5107c478bd9Sstevel@tonic-gate */
5117c478bd9Sstevel@tonic-gate
5127c478bd9Sstevel@tonic-gate /*
5137c478bd9Sstevel@tonic-gate * See if there's anything to do other than remain idle.
5147c478bd9Sstevel@tonic-gate * Return non-zero if there is.
5157c478bd9Sstevel@tonic-gate *
5167c478bd9Sstevel@tonic-gate * This function must be called with high spl, or with
5177c478bd9Sstevel@tonic-gate * kernel preemption disabled to prevent the partition's
5187c478bd9Sstevel@tonic-gate * active cpu list from changing while being traversed.
5197c478bd9Sstevel@tonic-gate *
5206890d023SEric Saxe * This is essentially a simpler version of disp_getwork()
5216890d023SEric Saxe * to be called by CPUs preparing to "halt".
5227c478bd9Sstevel@tonic-gate */
5237c478bd9Sstevel@tonic-gate int
disp_anywork(void)5247c478bd9Sstevel@tonic-gate disp_anywork(void)
5257c478bd9Sstevel@tonic-gate {
5267c478bd9Sstevel@tonic-gate cpu_t *cp = CPU;
5277c478bd9Sstevel@tonic-gate cpu_t *ocp;
5286890d023SEric Saxe volatile int *local_nrunnable = &cp->cpu_disp->disp_nrunnable;
5297c478bd9Sstevel@tonic-gate
5307c478bd9Sstevel@tonic-gate if (!(cp->cpu_flags & CPU_OFFLINE)) {
5317c478bd9Sstevel@tonic-gate if (CP_MAXRUNPRI(cp->cpu_part) >= 0)
5327c478bd9Sstevel@tonic-gate return (1);
5337c478bd9Sstevel@tonic-gate
5346890d023SEric Saxe for (ocp = cp->cpu_next_part; ocp != cp;
5356890d023SEric Saxe ocp = ocp->cpu_next_part) {
5366890d023SEric Saxe ASSERT(CPU_ACTIVE(ocp));
5376890d023SEric Saxe
5386890d023SEric Saxe /*
5396890d023SEric Saxe * Something has appeared on the local run queue.
5406890d023SEric Saxe */
5416890d023SEric Saxe if (*local_nrunnable > 0)
5426890d023SEric Saxe return (1);
5436890d023SEric Saxe /*
5446890d023SEric Saxe * If we encounter another idle CPU that will
5456890d023SEric Saxe * soon be trolling around through disp_anywork()
5466890d023SEric Saxe * terminate our walk here and let this other CPU
5476890d023SEric Saxe * patrol the next part of the list.
5486890d023SEric Saxe */
5496890d023SEric Saxe if (ocp->cpu_dispatch_pri == -1 &&
5506890d023SEric Saxe (ocp->cpu_disp_flags & CPU_DISP_HALTED) == 0)
5516890d023SEric Saxe return (0);
5527c478bd9Sstevel@tonic-gate /*
5537c478bd9Sstevel@tonic-gate * Work can be taken from another CPU if:
5547c478bd9Sstevel@tonic-gate * - There is unbound work on the run queue
5557c478bd9Sstevel@tonic-gate * - That work isn't a thread undergoing a
5567c478bd9Sstevel@tonic-gate * - context switch on an otherwise empty queue.
5577c478bd9Sstevel@tonic-gate * - The CPU isn't running the idle loop.
5587c478bd9Sstevel@tonic-gate */
5597c478bd9Sstevel@tonic-gate if (ocp->cpu_disp->disp_max_unbound_pri != -1 &&
5607c478bd9Sstevel@tonic-gate !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) &&
5617c478bd9Sstevel@tonic-gate ocp->cpu_disp->disp_nrunnable == 1) &&
5627c478bd9Sstevel@tonic-gate ocp->cpu_dispatch_pri != -1)
5637c478bd9Sstevel@tonic-gate return (1);
5647c478bd9Sstevel@tonic-gate }
5657c478bd9Sstevel@tonic-gate }
5667c478bd9Sstevel@tonic-gate return (0);
5677c478bd9Sstevel@tonic-gate }
5687c478bd9Sstevel@tonic-gate
5697c478bd9Sstevel@tonic-gate /*
5707c478bd9Sstevel@tonic-gate * Called when CPU enters the idle loop
5717c478bd9Sstevel@tonic-gate */
5727c478bd9Sstevel@tonic-gate static void
idle_enter()5737c478bd9Sstevel@tonic-gate idle_enter()
5747c478bd9Sstevel@tonic-gate {
5757c478bd9Sstevel@tonic-gate cpu_t *cp = CPU;
5767c478bd9Sstevel@tonic-gate
577eda89462Sesolom new_cpu_mstate(CMS_IDLE, gethrtime_unscaled());
5787c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, idlethread, 1);
5797c478bd9Sstevel@tonic-gate set_idle_cpu(cp->cpu_id); /* arch-dependent hook */
5807c478bd9Sstevel@tonic-gate }
5817c478bd9Sstevel@tonic-gate
5827c478bd9Sstevel@tonic-gate /*
5837c478bd9Sstevel@tonic-gate * Called when CPU exits the idle loop
5847c478bd9Sstevel@tonic-gate */
5857c478bd9Sstevel@tonic-gate static void
idle_exit()5867c478bd9Sstevel@tonic-gate idle_exit()
5877c478bd9Sstevel@tonic-gate {
5887c478bd9Sstevel@tonic-gate cpu_t *cp = CPU;
5897c478bd9Sstevel@tonic-gate
590eda89462Sesolom new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled());
5917c478bd9Sstevel@tonic-gate unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */
5927c478bd9Sstevel@tonic-gate }
5937c478bd9Sstevel@tonic-gate
5947c478bd9Sstevel@tonic-gate /*
5957c478bd9Sstevel@tonic-gate * Idle loop.
5967c478bd9Sstevel@tonic-gate */
5977c478bd9Sstevel@tonic-gate void
idle()5987c478bd9Sstevel@tonic-gate idle()
5997c478bd9Sstevel@tonic-gate {
6007c478bd9Sstevel@tonic-gate struct cpu *cp = CPU; /* pointer to this CPU */
6017c478bd9Sstevel@tonic-gate kthread_t *t; /* taken thread */
6027c478bd9Sstevel@tonic-gate
6037c478bd9Sstevel@tonic-gate idle_enter();
6047c478bd9Sstevel@tonic-gate
6057c478bd9Sstevel@tonic-gate /*
6067c478bd9Sstevel@tonic-gate * Uniprocessor version of idle loop.
6077c478bd9Sstevel@tonic-gate * Do this until notified that we're on an actual multiprocessor.
6087c478bd9Sstevel@tonic-gate */
6097c478bd9Sstevel@tonic-gate while (ncpus == 1) {
6107c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable == 0) {
6117c478bd9Sstevel@tonic-gate (*idle_cpu)();
6127c478bd9Sstevel@tonic-gate continue;
6137c478bd9Sstevel@tonic-gate }
6147c478bd9Sstevel@tonic-gate idle_exit();
6157c478bd9Sstevel@tonic-gate swtch();
6167c478bd9Sstevel@tonic-gate
6177c478bd9Sstevel@tonic-gate idle_enter(); /* returned from swtch */
6187c478bd9Sstevel@tonic-gate }
6197c478bd9Sstevel@tonic-gate
6207c478bd9Sstevel@tonic-gate /*
6217c478bd9Sstevel@tonic-gate * Multiprocessor idle loop.
6227c478bd9Sstevel@tonic-gate */
6237c478bd9Sstevel@tonic-gate for (;;) {
6247c478bd9Sstevel@tonic-gate /*
6257c478bd9Sstevel@tonic-gate * If CPU is completely quiesced by p_online(2), just wait
6267c478bd9Sstevel@tonic-gate * here with minimal bus traffic until put online.
6277c478bd9Sstevel@tonic-gate */
6287c478bd9Sstevel@tonic-gate while (cp->cpu_flags & CPU_QUIESCED)
6297c478bd9Sstevel@tonic-gate (*idle_cpu)();
6307c478bd9Sstevel@tonic-gate
6317c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable != 0) {
6327c478bd9Sstevel@tonic-gate idle_exit();
6337c478bd9Sstevel@tonic-gate swtch();
6347c478bd9Sstevel@tonic-gate } else {
6357c478bd9Sstevel@tonic-gate if (cp->cpu_flags & CPU_OFFLINE)
6367c478bd9Sstevel@tonic-gate continue;
6377c478bd9Sstevel@tonic-gate if ((t = disp_getwork(cp)) == NULL) {
6387c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level != -1) {
6397c478bd9Sstevel@tonic-gate disp_t *dp = cp->cpu_disp;
6407c478bd9Sstevel@tonic-gate disp_t *kpq;
6417c478bd9Sstevel@tonic-gate
6427c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock);
6437c478bd9Sstevel@tonic-gate /*
6447c478bd9Sstevel@tonic-gate * Set kpq under lock to prevent
6457c478bd9Sstevel@tonic-gate * migration between partitions.
6467c478bd9Sstevel@tonic-gate */
6477c478bd9Sstevel@tonic-gate kpq = &cp->cpu_part->cp_kp_queue;
6487c478bd9Sstevel@tonic-gate if (kpq->disp_maxrunpri == -1)
6497c478bd9Sstevel@tonic-gate cp->cpu_chosen_level = -1;
6507c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock);
6517c478bd9Sstevel@tonic-gate }
6527c478bd9Sstevel@tonic-gate (*idle_cpu)();
6537c478bd9Sstevel@tonic-gate continue;
6547c478bd9Sstevel@tonic-gate }
655685679f7Sakolb /*
656685679f7Sakolb * If there was a thread but we couldn't steal
657685679f7Sakolb * it, then keep trying.
658685679f7Sakolb */
659685679f7Sakolb if (t == T_DONTSTEAL)
660685679f7Sakolb continue;
6617c478bd9Sstevel@tonic-gate idle_exit();
6627c478bd9Sstevel@tonic-gate swtch_to(t);
6637c478bd9Sstevel@tonic-gate }
6647c478bd9Sstevel@tonic-gate idle_enter(); /* returned from swtch/swtch_to */
6657c478bd9Sstevel@tonic-gate }
6667c478bd9Sstevel@tonic-gate }
6677c478bd9Sstevel@tonic-gate
6687c478bd9Sstevel@tonic-gate
6697c478bd9Sstevel@tonic-gate /*
6707c478bd9Sstevel@tonic-gate * Preempt the currently running thread in favor of the highest
6717c478bd9Sstevel@tonic-gate * priority thread. The class of the current thread controls
6727c478bd9Sstevel@tonic-gate * where it goes on the dispatcher queues. If panicking, turn
6737c478bd9Sstevel@tonic-gate * preemption off.
6747c478bd9Sstevel@tonic-gate */
6757c478bd9Sstevel@tonic-gate void
preempt()6767c478bd9Sstevel@tonic-gate preempt()
6777c478bd9Sstevel@tonic-gate {
6787c478bd9Sstevel@tonic-gate kthread_t *t = curthread;
6797c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread);
6807c478bd9Sstevel@tonic-gate
6817c478bd9Sstevel@tonic-gate if (panicstr)
6827c478bd9Sstevel@tonic-gate return;
6837c478bd9Sstevel@tonic-gate
6847c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start");
6857c478bd9Sstevel@tonic-gate
6867c478bd9Sstevel@tonic-gate thread_lock(t);
6877c478bd9Sstevel@tonic-gate
6887c478bd9Sstevel@tonic-gate if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) {
6897c478bd9Sstevel@tonic-gate /*
6907c478bd9Sstevel@tonic-gate * this thread has already been chosen to be run on
6917c478bd9Sstevel@tonic-gate * another CPU. Clear kprunrun on this CPU since we're
6927c478bd9Sstevel@tonic-gate * already headed for swtch().
6937c478bd9Sstevel@tonic-gate */
6947c478bd9Sstevel@tonic-gate CPU->cpu_kprunrun = 0;
6957c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t);
6967c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end");
6977c478bd9Sstevel@tonic-gate } else {
6987c478bd9Sstevel@tonic-gate if (lwp != NULL)
6997c478bd9Sstevel@tonic-gate lwp->lwp_ru.nivcsw++;
7007c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1);
7017c478bd9Sstevel@tonic-gate THREAD_TRANSITION(t);
7027c478bd9Sstevel@tonic-gate CL_PREEMPT(t);
7037c478bd9Sstevel@tonic-gate DTRACE_SCHED(preempt);
7047c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t);
7057c478bd9Sstevel@tonic-gate
7067c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end");
7077c478bd9Sstevel@tonic-gate
7087c478bd9Sstevel@tonic-gate swtch(); /* clears CPU->cpu_runrun via disp() */
7097c478bd9Sstevel@tonic-gate }
7107c478bd9Sstevel@tonic-gate }
7117c478bd9Sstevel@tonic-gate
7127c478bd9Sstevel@tonic-gate extern kthread_t *thread_unpin();
7137c478bd9Sstevel@tonic-gate
7147c478bd9Sstevel@tonic-gate /*
7157c478bd9Sstevel@tonic-gate * disp() - find the highest priority thread for this processor to run, and
7167c478bd9Sstevel@tonic-gate * set it in TS_ONPROC state so that resume() can be called to run it.
7177c478bd9Sstevel@tonic-gate */
7187c478bd9Sstevel@tonic-gate static kthread_t *
disp()7197c478bd9Sstevel@tonic-gate disp()
7207c478bd9Sstevel@tonic-gate {
7217c478bd9Sstevel@tonic-gate cpu_t *cpup;
7227c478bd9Sstevel@tonic-gate disp_t *dp;
7237c478bd9Sstevel@tonic-gate kthread_t *tp;
7247c478bd9Sstevel@tonic-gate dispq_t *dq;
7257c478bd9Sstevel@tonic-gate int maxrunword;
7267c478bd9Sstevel@tonic-gate pri_t pri;
7277c478bd9Sstevel@tonic-gate disp_t *kpq;
7287c478bd9Sstevel@tonic-gate
7297c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start");
7307c478bd9Sstevel@tonic-gate
7317c478bd9Sstevel@tonic-gate cpup = CPU;
7327c478bd9Sstevel@tonic-gate /*
7337c478bd9Sstevel@tonic-gate * Find the highest priority loaded, runnable thread.
7347c478bd9Sstevel@tonic-gate */
7357c478bd9Sstevel@tonic-gate dp = cpup->cpu_disp;
7367c478bd9Sstevel@tonic-gate
7377c478bd9Sstevel@tonic-gate reschedule:
7387c478bd9Sstevel@tonic-gate /*
7397c478bd9Sstevel@tonic-gate * If there is more important work on the global queue with a better
7407c478bd9Sstevel@tonic-gate * priority than the maximum on this CPU, take it now.
7417c478bd9Sstevel@tonic-gate */
7427c478bd9Sstevel@tonic-gate kpq = &cpup->cpu_part->cp_kp_queue;
7437c478bd9Sstevel@tonic-gate while ((pri = kpq->disp_maxrunpri) >= 0 &&
7447c478bd9Sstevel@tonic-gate pri >= dp->disp_maxrunpri &&
7457c478bd9Sstevel@tonic-gate (cpup->cpu_flags & CPU_OFFLINE) == 0 &&
7467c478bd9Sstevel@tonic-gate (tp = disp_getbest(kpq)) != NULL) {
7477c478bd9Sstevel@tonic-gate if (disp_ratify(tp, kpq) != NULL) {
7487c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END,
7497c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp);
7507c478bd9Sstevel@tonic-gate return (tp);
7517c478bd9Sstevel@tonic-gate }
7527c478bd9Sstevel@tonic-gate }
7537c478bd9Sstevel@tonic-gate
7547c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock);
7557c478bd9Sstevel@tonic-gate pri = dp->disp_maxrunpri;
7567c478bd9Sstevel@tonic-gate
7577c478bd9Sstevel@tonic-gate /*
7587c478bd9Sstevel@tonic-gate * If there is nothing to run, look at what's runnable on other queues.
7597c478bd9Sstevel@tonic-gate * Choose the idle thread if the CPU is quiesced.
7607c478bd9Sstevel@tonic-gate * Note that CPUs that have the CPU_OFFLINE flag set can still run
7617c478bd9Sstevel@tonic-gate * interrupt threads, which will be the only threads on the CPU's own
7627c478bd9Sstevel@tonic-gate * queue, but cannot run threads from other queues.
7637c478bd9Sstevel@tonic-gate */
7647c478bd9Sstevel@tonic-gate if (pri == -1) {
7657c478bd9Sstevel@tonic-gate if (!(cpup->cpu_flags & CPU_OFFLINE)) {
7667c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock);
767685679f7Sakolb if ((tp = disp_getwork(cpup)) == NULL ||
768685679f7Sakolb tp == T_DONTSTEAL) {
7697c478bd9Sstevel@tonic-gate tp = cpup->cpu_idle_thread;
7707c478bd9Sstevel@tonic-gate (void) splhigh();
7717c478bd9Sstevel@tonic-gate THREAD_ONPROC(tp, cpup);
7727c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp;
7737c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = -1;
7747c478bd9Sstevel@tonic-gate cpup->cpu_runrun = cpup->cpu_kprunrun = 0;
7757c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1;
7767c478bd9Sstevel@tonic-gate }
7777c478bd9Sstevel@tonic-gate } else {
7787c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock);
7797c478bd9Sstevel@tonic-gate tp = cpup->cpu_idle_thread;
7807c478bd9Sstevel@tonic-gate THREAD_ONPROC(tp, cpup);
7817c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp;
7827c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = -1;
7837c478bd9Sstevel@tonic-gate cpup->cpu_runrun = cpup->cpu_kprunrun = 0;
7847c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1;
7857c478bd9Sstevel@tonic-gate }
7867c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END,
7877c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp);
7887c478bd9Sstevel@tonic-gate return (tp);
7897c478bd9Sstevel@tonic-gate }
7907c478bd9Sstevel@tonic-gate
7917c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri];
7927c478bd9Sstevel@tonic-gate tp = dq->dq_first;
7937c478bd9Sstevel@tonic-gate
7947c478bd9Sstevel@tonic-gate ASSERT(tp != NULL);
7957c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */
7967c478bd9Sstevel@tonic-gate
7977c478bd9Sstevel@tonic-gate DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp);
7987c478bd9Sstevel@tonic-gate
7997c478bd9Sstevel@tonic-gate /*
8007c478bd9Sstevel@tonic-gate * Found it so remove it from queue.
8017c478bd9Sstevel@tonic-gate */
8027c478bd9Sstevel@tonic-gate dp->disp_nrunnable--;
8037c478bd9Sstevel@tonic-gate dq->dq_sruncnt--;
8047c478bd9Sstevel@tonic-gate if ((dq->dq_first = tp->t_link) == NULL) {
8057c478bd9Sstevel@tonic-gate ulong_t *dqactmap = dp->disp_qactmap;
8067c478bd9Sstevel@tonic-gate
8077c478bd9Sstevel@tonic-gate ASSERT(dq->dq_sruncnt == 0);
8087c478bd9Sstevel@tonic-gate dq->dq_last = NULL;
8097c478bd9Sstevel@tonic-gate
8107c478bd9Sstevel@tonic-gate /*
8117c478bd9Sstevel@tonic-gate * The queue is empty, so the corresponding bit needs to be
8127c478bd9Sstevel@tonic-gate * turned off in dqactmap. If nrunnable != 0 just took the
8137c478bd9Sstevel@tonic-gate * last runnable thread off the
8147c478bd9Sstevel@tonic-gate * highest queue, so recompute disp_maxrunpri.
8157c478bd9Sstevel@tonic-gate */
8167c478bd9Sstevel@tonic-gate maxrunword = pri >> BT_ULSHIFT;
8177c478bd9Sstevel@tonic-gate dqactmap[maxrunword] &= ~BT_BIW(pri);
8187c478bd9Sstevel@tonic-gate
8197c478bd9Sstevel@tonic-gate if (dp->disp_nrunnable == 0) {
8207c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1;
8217c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1;
8227c478bd9Sstevel@tonic-gate } else {
8237c478bd9Sstevel@tonic-gate int ipri;
8247c478bd9Sstevel@tonic-gate
8257c478bd9Sstevel@tonic-gate ipri = bt_gethighbit(dqactmap, maxrunword);
8267c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = ipri;
8277c478bd9Sstevel@tonic-gate if (ipri < dp->disp_max_unbound_pri)
8287c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = ipri;
8297c478bd9Sstevel@tonic-gate }
8307c478bd9Sstevel@tonic-gate } else {
8317c478bd9Sstevel@tonic-gate tp->t_link = NULL;
8327c478bd9Sstevel@tonic-gate }
8337c478bd9Sstevel@tonic-gate
8347c478bd9Sstevel@tonic-gate /*
8357c478bd9Sstevel@tonic-gate * Set TS_DONT_SWAP flag to prevent another processor from swapping
8367c478bd9Sstevel@tonic-gate * out this thread before we have a chance to run it.
8377c478bd9Sstevel@tonic-gate * While running, it is protected against swapping by t_lock.
8387c478bd9Sstevel@tonic-gate */
8397c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_DONT_SWAP;
8407c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; /* protected by spl only */
8417c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = pri;
8427c478bd9Sstevel@tonic-gate ASSERT(pri == DISP_PRIO(tp));
8437c478bd9Sstevel@tonic-gate thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */
8447c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */
8457c478bd9Sstevel@tonic-gate
8467c478bd9Sstevel@tonic-gate ASSERT(tp != NULL);
8477c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END,
8487c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp);
8497c478bd9Sstevel@tonic-gate
8507c478bd9Sstevel@tonic-gate if (disp_ratify(tp, kpq) == NULL)
8517c478bd9Sstevel@tonic-gate goto reschedule;
8527c478bd9Sstevel@tonic-gate
8537c478bd9Sstevel@tonic-gate return (tp);
8547c478bd9Sstevel@tonic-gate }
8557c478bd9Sstevel@tonic-gate
8567c478bd9Sstevel@tonic-gate /*
8577c478bd9Sstevel@tonic-gate * swtch()
8587c478bd9Sstevel@tonic-gate * Find best runnable thread and run it.
8597c478bd9Sstevel@tonic-gate * Called with the current thread already switched to a new state,
8607c478bd9Sstevel@tonic-gate * on a sleep queue, run queue, stopped, and not zombied.
8617c478bd9Sstevel@tonic-gate * May be called at any spl level less than or equal to LOCK_LEVEL.
8627c478bd9Sstevel@tonic-gate * Always drops spl to the base level (spl0()).
8637c478bd9Sstevel@tonic-gate */
8647c478bd9Sstevel@tonic-gate void
swtch()8657c478bd9Sstevel@tonic-gate swtch()
8667c478bd9Sstevel@tonic-gate {
8677c478bd9Sstevel@tonic-gate kthread_t *t = curthread;
8687c478bd9Sstevel@tonic-gate kthread_t *next;
8697c478bd9Sstevel@tonic-gate cpu_t *cp;
8707c478bd9Sstevel@tonic-gate
8717c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
8727c478bd9Sstevel@tonic-gate
8737c478bd9Sstevel@tonic-gate if (t->t_flag & T_INTR_THREAD)
8747c478bd9Sstevel@tonic-gate cpu_intr_swtch_enter(t);
8757c478bd9Sstevel@tonic-gate
8767c478bd9Sstevel@tonic-gate if (t->t_intr != NULL) {
8777c478bd9Sstevel@tonic-gate /*
8787c478bd9Sstevel@tonic-gate * We are an interrupt thread. Setup and return
8797c478bd9Sstevel@tonic-gate * the interrupted thread to be resumed.
8807c478bd9Sstevel@tonic-gate */
8817c478bd9Sstevel@tonic-gate (void) splhigh(); /* block other scheduler action */
8827c478bd9Sstevel@tonic-gate cp = CPU; /* now protected against migration */
8837c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */
8847c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1);
8857c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, intrblk, 1);
8867c478bd9Sstevel@tonic-gate next = thread_unpin();
8877c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
8887c478bd9Sstevel@tonic-gate resume_from_intr(next);
8897c478bd9Sstevel@tonic-gate } else {
8907c478bd9Sstevel@tonic-gate #ifdef DEBUG
8917c478bd9Sstevel@tonic-gate if (t->t_state == TS_ONPROC &&
8927c478bd9Sstevel@tonic-gate t->t_disp_queue->disp_cpu == CPU &&
8937c478bd9Sstevel@tonic-gate t->t_preempt == 0) {
8947c478bd9Sstevel@tonic-gate thread_lock(t);
8957c478bd9Sstevel@tonic-gate ASSERT(t->t_state != TS_ONPROC ||
8967c478bd9Sstevel@tonic-gate t->t_disp_queue->disp_cpu != CPU ||
8977c478bd9Sstevel@tonic-gate t->t_preempt != 0); /* cannot migrate */
8987c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t);
8997c478bd9Sstevel@tonic-gate }
9007c478bd9Sstevel@tonic-gate #endif /* DEBUG */
9017c478bd9Sstevel@tonic-gate cp = CPU;
9027c478bd9Sstevel@tonic-gate next = disp(); /* returns with spl high */
9037c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */
9047c478bd9Sstevel@tonic-gate
9057c478bd9Sstevel@tonic-gate /* OK to steal anything left on run queue */
9067c478bd9Sstevel@tonic-gate cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL;
9077c478bd9Sstevel@tonic-gate
9087c478bd9Sstevel@tonic-gate if (next != t) {
9090e751525SEric Saxe hrtime_t now;
9100e751525SEric Saxe
9110e751525SEric Saxe now = gethrtime_unscaled();
9120e751525SEric Saxe pg_ev_thread_swtch(cp, now, t, next);
9137c478bd9Sstevel@tonic-gate
914f2bd4627Sjohansen /*
915f2bd4627Sjohansen * If t was previously in the TS_ONPROC state,
916f2bd4627Sjohansen * setfrontdq and setbackdq won't have set its t_waitrq.
917f2bd4627Sjohansen * Since we now finally know that we're switching away
918f2bd4627Sjohansen * from this thread, set its t_waitrq if it is on a run
919f2bd4627Sjohansen * queue.
920f2bd4627Sjohansen */
921f2bd4627Sjohansen if ((t->t_state == TS_RUN) && (t->t_waitrq == 0)) {
9220e751525SEric Saxe t->t_waitrq = now;
923f2bd4627Sjohansen }
924f2bd4627Sjohansen
925f2bd4627Sjohansen /*
926f2bd4627Sjohansen * restore mstate of thread that we are switching to
927f2bd4627Sjohansen */
928f2bd4627Sjohansen restore_mstate(next);
929f2bd4627Sjohansen
9307c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1);
931d3d50737SRafael Vanoni cp->cpu_last_swtch = t->t_disp_time = ddi_get_lbolt();
9327c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
9337c478bd9Sstevel@tonic-gate
9347c478bd9Sstevel@tonic-gate if (dtrace_vtime_active)
9357c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next);
9367c478bd9Sstevel@tonic-gate
9377c478bd9Sstevel@tonic-gate resume(next);
9387c478bd9Sstevel@tonic-gate /*
9397c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points
9407c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we may not
9417c478bd9Sstevel@tonic-gate * return here
9427c478bd9Sstevel@tonic-gate */
9437c478bd9Sstevel@tonic-gate } else {
9447c478bd9Sstevel@tonic-gate if (t->t_flag & T_INTR_THREAD)
9457c478bd9Sstevel@tonic-gate cpu_intr_swtch_exit(t);
9461dbbbf76SSudheer A /*
9471dbbbf76SSudheer A * Threads that enqueue themselves on a run queue defer
9481dbbbf76SSudheer A * setting t_waitrq. It is then either set in swtch()
9491dbbbf76SSudheer A * when the CPU is actually yielded, or not at all if it
9501dbbbf76SSudheer A * is remaining on the CPU.
9511dbbbf76SSudheer A * There is however a window between where the thread
9521dbbbf76SSudheer A * placed itself on a run queue, and where it selects
9531dbbbf76SSudheer A * itself in disp(), where a third party (eg. clock()
9541dbbbf76SSudheer A * doing tick processing) may have re-enqueued this
9551dbbbf76SSudheer A * thread, setting t_waitrq in the process. We detect
9561dbbbf76SSudheer A * this race by noticing that despite switching to
9571dbbbf76SSudheer A * ourself, our t_waitrq has been set, and should be
9581dbbbf76SSudheer A * cleared.
9591dbbbf76SSudheer A */
9601dbbbf76SSudheer A if (t->t_waitrq != 0)
9611dbbbf76SSudheer A t->t_waitrq = 0;
9627c478bd9Sstevel@tonic-gate
9630e751525SEric Saxe pg_ev_thread_remain(cp, t);
9640e751525SEric Saxe
9657c478bd9Sstevel@tonic-gate DTRACE_SCHED(remain__cpu);
9667c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end");
9677c478bd9Sstevel@tonic-gate (void) spl0();
9687c478bd9Sstevel@tonic-gate }
9697c478bd9Sstevel@tonic-gate }
9707c478bd9Sstevel@tonic-gate }
9717c478bd9Sstevel@tonic-gate
9727c478bd9Sstevel@tonic-gate /*
9737c478bd9Sstevel@tonic-gate * swtch_from_zombie()
9747c478bd9Sstevel@tonic-gate * Special case of swtch(), which allows checks for TS_ZOMB to be
9757c478bd9Sstevel@tonic-gate * eliminated from normal resume.
9767c478bd9Sstevel@tonic-gate * Find best runnable thread and run it.
9777c478bd9Sstevel@tonic-gate * Called with the current thread zombied.
9787c478bd9Sstevel@tonic-gate * Zombies cannot migrate, so CPU references are safe.
9797c478bd9Sstevel@tonic-gate */
9807c478bd9Sstevel@tonic-gate void
swtch_from_zombie()9817c478bd9Sstevel@tonic-gate swtch_from_zombie()
9827c478bd9Sstevel@tonic-gate {
9837c478bd9Sstevel@tonic-gate kthread_t *next;
9847c478bd9Sstevel@tonic-gate cpu_t *cpu = CPU;
9857c478bd9Sstevel@tonic-gate
9867c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
9877c478bd9Sstevel@tonic-gate
9887c478bd9Sstevel@tonic-gate ASSERT(curthread->t_state == TS_ZOMB);
9897c478bd9Sstevel@tonic-gate
9907c478bd9Sstevel@tonic-gate next = disp(); /* returns with spl high */
9917c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */
9927c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, pswitch, 1);
9937c478bd9Sstevel@tonic-gate ASSERT(next != curthread);
9947c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
9957c478bd9Sstevel@tonic-gate
9960e751525SEric Saxe pg_ev_thread_swtch(cpu, gethrtime_unscaled(), curthread, next);
9977c478bd9Sstevel@tonic-gate
998f2bd4627Sjohansen restore_mstate(next);
999f2bd4627Sjohansen
10007c478bd9Sstevel@tonic-gate if (dtrace_vtime_active)
10017c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next);
10027c478bd9Sstevel@tonic-gate
10037c478bd9Sstevel@tonic-gate resume_from_zombie(next);
10047c478bd9Sstevel@tonic-gate /*
10057c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points
10067c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we certainly will not
10077c478bd9Sstevel@tonic-gate * return here
10087c478bd9Sstevel@tonic-gate */
10097c478bd9Sstevel@tonic-gate }
10107c478bd9Sstevel@tonic-gate
10117c478bd9Sstevel@tonic-gate #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint))
10127c478bd9Sstevel@tonic-gate
1013057452c6Sjj209869 /*
1014057452c6Sjj209869 * search_disp_queues()
1015057452c6Sjj209869 * Search the given dispatch queues for thread tp.
1016057452c6Sjj209869 * Return 1 if tp is found, otherwise return 0.
1017057452c6Sjj209869 */
1018057452c6Sjj209869 static int
search_disp_queues(disp_t * dp,kthread_t * tp)1019057452c6Sjj209869 search_disp_queues(disp_t *dp, kthread_t *tp)
1020057452c6Sjj209869 {
10217c478bd9Sstevel@tonic-gate dispq_t *dq;
10227c478bd9Sstevel@tonic-gate dispq_t *eq;
10237c478bd9Sstevel@tonic-gate
10247c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock);
1025057452c6Sjj209869
10267c478bd9Sstevel@tonic-gate for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) {
10277c478bd9Sstevel@tonic-gate kthread_t *rp;
10287c478bd9Sstevel@tonic-gate
1029057452c6Sjj209869 ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL);
1030057452c6Sjj209869
10317c478bd9Sstevel@tonic-gate for (rp = dq->dq_first; rp; rp = rp->t_link)
10327c478bd9Sstevel@tonic-gate if (tp == rp) {
10337c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock);
10347c478bd9Sstevel@tonic-gate return (1);
10357c478bd9Sstevel@tonic-gate }
10367c478bd9Sstevel@tonic-gate }
10377c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock);
1038057452c6Sjj209869
10397c478bd9Sstevel@tonic-gate return (0);
1040057452c6Sjj209869 }
1041057452c6Sjj209869
1042057452c6Sjj209869 /*
1043057452c6Sjj209869 * thread_on_queue()
1044057452c6Sjj209869 * Search all per-CPU dispatch queues and all partition-wide kpreempt
1045057452c6Sjj209869 * queues for thread tp. Return 1 if tp is found, otherwise return 0.
1046057452c6Sjj209869 */
1047057452c6Sjj209869 static int
thread_on_queue(kthread_t * tp)1048057452c6Sjj209869 thread_on_queue(kthread_t *tp)
1049057452c6Sjj209869 {
1050057452c6Sjj209869 cpu_t *cp;
1051057452c6Sjj209869 struct cpupart *part;
1052057452c6Sjj209869
1053057452c6Sjj209869 ASSERT(getpil() >= DISP_LEVEL);
1054057452c6Sjj209869
1055057452c6Sjj209869 /*
1056057452c6Sjj209869 * Search the per-CPU dispatch queues for tp.
1057057452c6Sjj209869 */
1058057452c6Sjj209869 cp = CPU;
1059057452c6Sjj209869 do {
1060057452c6Sjj209869 if (search_disp_queues(cp->cpu_disp, tp))
1061057452c6Sjj209869 return (1);
1062057452c6Sjj209869 } while ((cp = cp->cpu_next_onln) != CPU);
1063057452c6Sjj209869
1064057452c6Sjj209869 /*
1065057452c6Sjj209869 * Search the partition-wide kpreempt queues for tp.
1066057452c6Sjj209869 */
1067057452c6Sjj209869 part = CPU->cpu_part;
1068057452c6Sjj209869 do {
1069057452c6Sjj209869 if (search_disp_queues(&part->cp_kp_queue, tp))
1070057452c6Sjj209869 return (1);
1071057452c6Sjj209869 } while ((part = part->cp_next) != CPU->cpu_part);
1072057452c6Sjj209869
1073057452c6Sjj209869 return (0);
1074057452c6Sjj209869 }
1075057452c6Sjj209869
10767c478bd9Sstevel@tonic-gate #else
10777c478bd9Sstevel@tonic-gate
10787c478bd9Sstevel@tonic-gate #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */
10797c478bd9Sstevel@tonic-gate
10807c478bd9Sstevel@tonic-gate #endif /* DEBUG */
10817c478bd9Sstevel@tonic-gate
10827c478bd9Sstevel@tonic-gate /*
10837c478bd9Sstevel@tonic-gate * like swtch(), but switch to a specified thread taken from another CPU.
10847c478bd9Sstevel@tonic-gate * called with spl high..
10857c478bd9Sstevel@tonic-gate */
10867c478bd9Sstevel@tonic-gate void
swtch_to(kthread_t * next)10877c478bd9Sstevel@tonic-gate swtch_to(kthread_t *next)
10887c478bd9Sstevel@tonic-gate {
10897c478bd9Sstevel@tonic-gate cpu_t *cp = CPU;
10900e751525SEric Saxe hrtime_t now;
10917c478bd9Sstevel@tonic-gate
10927c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
10937c478bd9Sstevel@tonic-gate
10947c478bd9Sstevel@tonic-gate /*
10957c478bd9Sstevel@tonic-gate * Update context switch statistics.
10967c478bd9Sstevel@tonic-gate */
10977c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1);
10987c478bd9Sstevel@tonic-gate
10997c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
11007c478bd9Sstevel@tonic-gate
11010e751525SEric Saxe now = gethrtime_unscaled();
11020e751525SEric Saxe pg_ev_thread_swtch(cp, now, curthread, next);
11037c478bd9Sstevel@tonic-gate
11047c478bd9Sstevel@tonic-gate /* OK to steal anything left on run queue */
11057c478bd9Sstevel@tonic-gate cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL;
11067c478bd9Sstevel@tonic-gate
11077c478bd9Sstevel@tonic-gate /* record last execution time */
1108d3d50737SRafael Vanoni cp->cpu_last_swtch = curthread->t_disp_time = ddi_get_lbolt();
11097c478bd9Sstevel@tonic-gate
1110f2bd4627Sjohansen /*
1111f2bd4627Sjohansen * If t was previously in the TS_ONPROC state, setfrontdq and setbackdq
1112f2bd4627Sjohansen * won't have set its t_waitrq. Since we now finally know that we're
1113f2bd4627Sjohansen * switching away from this thread, set its t_waitrq if it is on a run
1114f2bd4627Sjohansen * queue.
1115f2bd4627Sjohansen */
1116f2bd4627Sjohansen if ((curthread->t_state == TS_RUN) && (curthread->t_waitrq == 0)) {
11170e751525SEric Saxe curthread->t_waitrq = now;
1118f2bd4627Sjohansen }
1119f2bd4627Sjohansen
1120f2bd4627Sjohansen /* restore next thread to previously running microstate */
1121f2bd4627Sjohansen restore_mstate(next);
1122f2bd4627Sjohansen
11237c478bd9Sstevel@tonic-gate if (dtrace_vtime_active)
11247c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next);
11257c478bd9Sstevel@tonic-gate
11267c478bd9Sstevel@tonic-gate resume(next);
11277c478bd9Sstevel@tonic-gate /*
11287c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points
11297c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we may not
11307c478bd9Sstevel@tonic-gate * return here
11317c478bd9Sstevel@tonic-gate */
11327c478bd9Sstevel@tonic-gate }
11337c478bd9Sstevel@tonic-gate
11347c478bd9Sstevel@tonic-gate #define CPU_IDLING(pri) ((pri) == -1)
11357c478bd9Sstevel@tonic-gate
11367c478bd9Sstevel@tonic-gate static void
cpu_resched(cpu_t * cp,pri_t tpri)11377c478bd9Sstevel@tonic-gate cpu_resched(cpu_t *cp, pri_t tpri)
11387c478bd9Sstevel@tonic-gate {
11397c478bd9Sstevel@tonic-gate int call_poke_cpu = 0;
11407c478bd9Sstevel@tonic-gate pri_t cpupri = cp->cpu_dispatch_pri;
11417c478bd9Sstevel@tonic-gate
11427c478bd9Sstevel@tonic-gate if (!CPU_IDLING(cpupri) && (cpupri < tpri)) {
11437c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED,
11447c478bd9Sstevel@tonic-gate "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri);
11457c478bd9Sstevel@tonic-gate if (tpri >= upreemptpri && cp->cpu_runrun == 0) {
11467c478bd9Sstevel@tonic-gate cp->cpu_runrun = 1;
11477c478bd9Sstevel@tonic-gate aston(cp->cpu_dispthread);
11487c478bd9Sstevel@tonic-gate if (tpri < kpreemptpri && cp != CPU)
11497c478bd9Sstevel@tonic-gate call_poke_cpu = 1;
11507c478bd9Sstevel@tonic-gate }
11517c478bd9Sstevel@tonic-gate if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) {
11527c478bd9Sstevel@tonic-gate cp->cpu_kprunrun = 1;
11537c478bd9Sstevel@tonic-gate if (cp != CPU)
11547c478bd9Sstevel@tonic-gate call_poke_cpu = 1;
11557c478bd9Sstevel@tonic-gate }
11567c478bd9Sstevel@tonic-gate }
11577c478bd9Sstevel@tonic-gate
11587c478bd9Sstevel@tonic-gate /*
11597c478bd9Sstevel@tonic-gate * Propagate cpu_runrun, and cpu_kprunrun to global visibility.
11607c478bd9Sstevel@tonic-gate */
11617c478bd9Sstevel@tonic-gate membar_enter();
11627c478bd9Sstevel@tonic-gate
11637c478bd9Sstevel@tonic-gate if (call_poke_cpu)
11647c478bd9Sstevel@tonic-gate poke_cpu(cp->cpu_id);
11657c478bd9Sstevel@tonic-gate }
11667c478bd9Sstevel@tonic-gate
11677c478bd9Sstevel@tonic-gate /*
11687c478bd9Sstevel@tonic-gate * setbackdq() keeps runqs balanced such that the difference in length
11697c478bd9Sstevel@tonic-gate * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF.
11707c478bd9Sstevel@tonic-gate * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths
11717c478bd9Sstevel@tonic-gate * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will
11727c478bd9Sstevel@tonic-gate * try to keep runqs perfectly balanced regardless of the thread priority.
11737c478bd9Sstevel@tonic-gate */
11747c478bd9Sstevel@tonic-gate #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */
11757c478bd9Sstevel@tonic-gate #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */
11767c478bd9Sstevel@tonic-gate #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt)
11777c478bd9Sstevel@tonic-gate
11787c478bd9Sstevel@tonic-gate /*
11796890d023SEric Saxe * Macro that evaluates to true if it is likely that the thread has cache
11806890d023SEric Saxe * warmth. This is based on the amount of time that has elapsed since the
11816890d023SEric Saxe * thread last ran. If that amount of time is less than "rechoose_interval"
11826890d023SEric Saxe * ticks, then we decide that the thread has enough cache warmth to warrant
11836890d023SEric Saxe * some affinity for t->t_cpu.
11846890d023SEric Saxe */
11856890d023SEric Saxe #define THREAD_HAS_CACHE_WARMTH(thread) \
11866890d023SEric Saxe ((thread == curthread) || \
1187d3d50737SRafael Vanoni ((ddi_get_lbolt() - thread->t_disp_time) <= rechoose_interval))
11886890d023SEric Saxe /*
11897c478bd9Sstevel@tonic-gate * Put the specified thread on the back of the dispatcher
11907c478bd9Sstevel@tonic-gate * queue corresponding to its current priority.
11917c478bd9Sstevel@tonic-gate *
11927c478bd9Sstevel@tonic-gate * Called with the thread in transition, onproc or stopped state
11937c478bd9Sstevel@tonic-gate * and locked (transition implies locked) and at high spl.
11947c478bd9Sstevel@tonic-gate * Returns with the thread in TS_RUN state and still locked.
11957c478bd9Sstevel@tonic-gate */
11967c478bd9Sstevel@tonic-gate void
setbackdq(kthread_t * tp)11977c478bd9Sstevel@tonic-gate setbackdq(kthread_t *tp)
11987c478bd9Sstevel@tonic-gate {
11997c478bd9Sstevel@tonic-gate dispq_t *dq;
12007c478bd9Sstevel@tonic-gate disp_t *dp;
12017c478bd9Sstevel@tonic-gate cpu_t *cp;
12027c478bd9Sstevel@tonic-gate pri_t tpri;
12037c478bd9Sstevel@tonic-gate int bound;
12046890d023SEric Saxe boolean_t self;
12057c478bd9Sstevel@tonic-gate
12067c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp));
12077c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
12087c478bd9Sstevel@tonic-gate ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */
12097c478bd9Sstevel@tonic-gate
12107c478bd9Sstevel@tonic-gate /*
12117c478bd9Sstevel@tonic-gate * If thread is "swapped" or on the swap queue don't
12127c478bd9Sstevel@tonic-gate * queue it, but wake sched.
12137c478bd9Sstevel@tonic-gate */
12147c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
12157c478bd9Sstevel@tonic-gate disp_swapped_setrun(tp);
12167c478bd9Sstevel@tonic-gate return;
12177c478bd9Sstevel@tonic-gate }
12187c478bd9Sstevel@tonic-gate
12196890d023SEric Saxe self = (tp == curthread);
12206890d023SEric Saxe
1221abd41583Sgd209917 if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1222abd41583Sgd209917 bound = 1;
1223abd41583Sgd209917 else
1224abd41583Sgd209917 bound = 0;
1225abd41583Sgd209917
12267c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp);
12277c478bd9Sstevel@tonic-gate if (ncpus == 1)
12287c478bd9Sstevel@tonic-gate cp = tp->t_cpu;
1229abd41583Sgd209917 else if (!bound) {
12307c478bd9Sstevel@tonic-gate if (tpri >= kpqpri) {
12317c478bd9Sstevel@tonic-gate setkpdq(tp, SETKP_BACK);
12327c478bd9Sstevel@tonic-gate return;
12337c478bd9Sstevel@tonic-gate }
12346890d023SEric Saxe
12357c478bd9Sstevel@tonic-gate /*
12366890d023SEric Saxe * We'll generally let this thread continue to run where
12376890d023SEric Saxe * it last ran...but will consider migration if:
12386890d023SEric Saxe * - We thread probably doesn't have much cache warmth.
12396890d023SEric Saxe * - The CPU where it last ran is the target of an offline
12406890d023SEric Saxe * request.
12416890d023SEric Saxe * - The thread last ran outside it's home lgroup.
12427c478bd9Sstevel@tonic-gate */
12436890d023SEric Saxe if ((!THREAD_HAS_CACHE_WARMTH(tp)) ||
12446890d023SEric Saxe (tp->t_cpu == cpu_inmotion)) {
12456890d023SEric Saxe cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri, NULL);
12466890d023SEric Saxe } else if (!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, tp->t_cpu)) {
12476890d023SEric Saxe cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
12486890d023SEric Saxe self ? tp->t_cpu : NULL);
12496890d023SEric Saxe } else {
12506890d023SEric Saxe cp = tp->t_cpu;
12516890d023SEric Saxe }
12527c478bd9Sstevel@tonic-gate
12537c478bd9Sstevel@tonic-gate if (tp->t_cpupart == cp->cpu_part) {
12547c478bd9Sstevel@tonic-gate int qlen;
12557c478bd9Sstevel@tonic-gate
12567c478bd9Sstevel@tonic-gate /*
1257fb2f18f8Sesaxe * Perform any CMT load balancing
12587c478bd9Sstevel@tonic-gate */
1259fb2f18f8Sesaxe cp = cmt_balance(tp, cp);
12607c478bd9Sstevel@tonic-gate
12617c478bd9Sstevel@tonic-gate /*
12627c478bd9Sstevel@tonic-gate * Balance across the run queues
12637c478bd9Sstevel@tonic-gate */
12647c478bd9Sstevel@tonic-gate qlen = RUNQ_LEN(cp, tpri);
12657c478bd9Sstevel@tonic-gate if (tpri >= RUNQ_MATCH_PRI &&
12667c478bd9Sstevel@tonic-gate !(tp->t_schedflag & TS_RUNQMATCH))
12677c478bd9Sstevel@tonic-gate qlen -= RUNQ_MAX_DIFF;
12687c478bd9Sstevel@tonic-gate if (qlen > 0) {
1269685679f7Sakolb cpu_t *newcp;
12707c478bd9Sstevel@tonic-gate
1271685679f7Sakolb if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) {
1272685679f7Sakolb newcp = cp->cpu_next_part;
1273685679f7Sakolb } else if ((newcp = cp->cpu_next_lpl) == cp) {
1274685679f7Sakolb newcp = cp->cpu_next_part;
12757c478bd9Sstevel@tonic-gate }
1276685679f7Sakolb
1277685679f7Sakolb if (RUNQ_LEN(newcp, tpri) < qlen) {
1278685679f7Sakolb DTRACE_PROBE3(runq__balance,
1279685679f7Sakolb kthread_t *, tp,
1280685679f7Sakolb cpu_t *, cp, cpu_t *, newcp);
1281685679f7Sakolb cp = newcp;
1282685679f7Sakolb }
12837c478bd9Sstevel@tonic-gate }
12847c478bd9Sstevel@tonic-gate } else {
12857c478bd9Sstevel@tonic-gate /*
12867c478bd9Sstevel@tonic-gate * Migrate to a cpu in the new partition.
12877c478bd9Sstevel@tonic-gate */
12887c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
12897c478bd9Sstevel@tonic-gate tp->t_lpl, tp->t_pri, NULL);
12907c478bd9Sstevel@tonic-gate }
12917c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
12927c478bd9Sstevel@tonic-gate } else {
12937c478bd9Sstevel@tonic-gate /*
12947c478bd9Sstevel@tonic-gate * It is possible that t_weakbound_cpu != t_bound_cpu (for
12957c478bd9Sstevel@tonic-gate * a short time until weak binding that existed when the
12967c478bd9Sstevel@tonic-gate * strong binding was established has dropped) so we must
12977c478bd9Sstevel@tonic-gate * favour weak binding over strong.
12987c478bd9Sstevel@tonic-gate */
12997c478bd9Sstevel@tonic-gate cp = tp->t_weakbound_cpu ?
13007c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu : tp->t_bound_cpu;
13017c478bd9Sstevel@tonic-gate }
1302f2bd4627Sjohansen /*
1303f2bd4627Sjohansen * A thread that is ONPROC may be temporarily placed on the run queue
1304f2bd4627Sjohansen * but then chosen to run again by disp. If the thread we're placing on
1305f2bd4627Sjohansen * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1306f2bd4627Sjohansen * replacement process is actually scheduled in swtch(). In this
1307f2bd4627Sjohansen * situation, curthread is the only thread that could be in the ONPROC
1308f2bd4627Sjohansen * state.
1309f2bd4627Sjohansen */
13106890d023SEric Saxe if ((!self) && (tp->t_waitrq == 0)) {
1311f2bd4627Sjohansen hrtime_t curtime;
1312f2bd4627Sjohansen
1313f2bd4627Sjohansen curtime = gethrtime_unscaled();
1314f2bd4627Sjohansen (void) cpu_update_pct(tp, curtime);
1315f2bd4627Sjohansen tp->t_waitrq = curtime;
1316f2bd4627Sjohansen } else {
1317f2bd4627Sjohansen (void) cpu_update_pct(tp, gethrtime_unscaled());
1318f2bd4627Sjohansen }
1319f2bd4627Sjohansen
13207c478bd9Sstevel@tonic-gate dp = cp->cpu_disp;
13217c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock);
13227c478bd9Sstevel@tonic-gate
13237c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0);
13247c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
13257c478bd9Sstevel@tonic-gate tpri, cp, tp);
13267c478bd9Sstevel@tonic-gate
13277c478bd9Sstevel@tonic-gate #ifndef NPROBE
13287c478bd9Sstevel@tonic-gate /* Kernel probe */
13297c478bd9Sstevel@tonic-gate if (tnf_tracing_active)
13307c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri);
13317c478bd9Sstevel@tonic-gate #endif /* NPROBE */
13327c478bd9Sstevel@tonic-gate
13337c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri);
13347c478bd9Sstevel@tonic-gate
13357c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */
13367c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp;
13377c478bd9Sstevel@tonic-gate tp->t_link = NULL;
13387c478bd9Sstevel@tonic-gate
13397c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri];
13407c478bd9Sstevel@tonic-gate dp->disp_nrunnable++;
1341685679f7Sakolb if (!bound)
1342685679f7Sakolb dp->disp_steal = 0;
13437c478bd9Sstevel@tonic-gate membar_enter();
13447c478bd9Sstevel@tonic-gate
13457c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) {
13467c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first != NULL);
13477c478bd9Sstevel@tonic-gate dq->dq_last->t_link = tp;
13487c478bd9Sstevel@tonic-gate dq->dq_last = tp;
13497c478bd9Sstevel@tonic-gate } else {
13507c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL);
13517c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL);
13527c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp;
13537c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri);
13547c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) {
13557c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri;
13567c478bd9Sstevel@tonic-gate membar_enter();
13577c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri);
13587c478bd9Sstevel@tonic-gate }
13597c478bd9Sstevel@tonic-gate }
13607c478bd9Sstevel@tonic-gate
13617c478bd9Sstevel@tonic-gate if (!bound && tpri > dp->disp_max_unbound_pri) {
13626890d023SEric Saxe if (self && dp->disp_max_unbound_pri == -1 && cp == CPU) {
13637c478bd9Sstevel@tonic-gate /*
13647c478bd9Sstevel@tonic-gate * If there are no other unbound threads on the
13657c478bd9Sstevel@tonic-gate * run queue, don't allow other CPUs to steal
13667c478bd9Sstevel@tonic-gate * this thread while we are in the middle of a
13677c478bd9Sstevel@tonic-gate * context switch. We may just switch to it
13687c478bd9Sstevel@tonic-gate * again right away. CPU_DISP_DONTSTEAL is cleared
13697c478bd9Sstevel@tonic-gate * in swtch and swtch_to.
13707c478bd9Sstevel@tonic-gate */
13717c478bd9Sstevel@tonic-gate cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
13727c478bd9Sstevel@tonic-gate }
13737c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri;
13747c478bd9Sstevel@tonic-gate }
13757c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, bound);
13767c478bd9Sstevel@tonic-gate }
13777c478bd9Sstevel@tonic-gate
13787c478bd9Sstevel@tonic-gate /*
13797c478bd9Sstevel@tonic-gate * Put the specified thread on the front of the dispatcher
13807c478bd9Sstevel@tonic-gate * queue corresponding to its current priority.
13817c478bd9Sstevel@tonic-gate *
13827c478bd9Sstevel@tonic-gate * Called with the thread in transition, onproc or stopped state
13837c478bd9Sstevel@tonic-gate * and locked (transition implies locked) and at high spl.
13847c478bd9Sstevel@tonic-gate * Returns with the thread in TS_RUN state and still locked.
13857c478bd9Sstevel@tonic-gate */
13867c478bd9Sstevel@tonic-gate void
setfrontdq(kthread_t * tp)13877c478bd9Sstevel@tonic-gate setfrontdq(kthread_t *tp)
13887c478bd9Sstevel@tonic-gate {
13897c478bd9Sstevel@tonic-gate disp_t *dp;
13907c478bd9Sstevel@tonic-gate dispq_t *dq;
13917c478bd9Sstevel@tonic-gate cpu_t *cp;
13927c478bd9Sstevel@tonic-gate pri_t tpri;
13937c478bd9Sstevel@tonic-gate int bound;
13947c478bd9Sstevel@tonic-gate
13957c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp));
13967c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
13977c478bd9Sstevel@tonic-gate ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */
13987c478bd9Sstevel@tonic-gate
13997c478bd9Sstevel@tonic-gate /*
14007c478bd9Sstevel@tonic-gate * If thread is "swapped" or on the swap queue don't
14017c478bd9Sstevel@tonic-gate * queue it, but wake sched.
14027c478bd9Sstevel@tonic-gate */
14037c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
14047c478bd9Sstevel@tonic-gate disp_swapped_setrun(tp);
14057c478bd9Sstevel@tonic-gate return;
14067c478bd9Sstevel@tonic-gate }
14077c478bd9Sstevel@tonic-gate
1408abd41583Sgd209917 if (tp->t_bound_cpu || tp->t_weakbound_cpu)
1409abd41583Sgd209917 bound = 1;
1410abd41583Sgd209917 else
1411abd41583Sgd209917 bound = 0;
1412abd41583Sgd209917
14137c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp);
14147c478bd9Sstevel@tonic-gate if (ncpus == 1)
14157c478bd9Sstevel@tonic-gate cp = tp->t_cpu;
1416abd41583Sgd209917 else if (!bound) {
14177c478bd9Sstevel@tonic-gate if (tpri >= kpqpri) {
14187c478bd9Sstevel@tonic-gate setkpdq(tp, SETKP_FRONT);
14197c478bd9Sstevel@tonic-gate return;
14207c478bd9Sstevel@tonic-gate }
14217c478bd9Sstevel@tonic-gate cp = tp->t_cpu;
14227c478bd9Sstevel@tonic-gate if (tp->t_cpupart == cp->cpu_part) {
14237c478bd9Sstevel@tonic-gate /*
14246890d023SEric Saxe * We'll generally let this thread continue to run
14256890d023SEric Saxe * where it last ran, but will consider migration if:
14266890d023SEric Saxe * - The thread last ran outside it's home lgroup.
14276890d023SEric Saxe * - The CPU where it last ran is the target of an
14286890d023SEric Saxe * offline request (a thread_nomigrate() on the in
14296890d023SEric Saxe * motion CPU relies on this when forcing a preempt).
14306890d023SEric Saxe * - The thread isn't the highest priority thread where
14316890d023SEric Saxe * it last ran, and it is considered not likely to
14326890d023SEric Saxe * have significant cache warmth.
14337c478bd9Sstevel@tonic-gate */
14346890d023SEric Saxe if ((!LGRP_CONTAINS_CPU(tp->t_lpl->lpl_lgrp, cp)) ||
14356890d023SEric Saxe (cp == cpu_inmotion)) {
14366890d023SEric Saxe cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
14376890d023SEric Saxe (tp == curthread) ? cp : NULL);
14386890d023SEric Saxe } else if ((tpri < cp->cpu_disp->disp_maxrunpri) &&
14396890d023SEric Saxe (!THREAD_HAS_CACHE_WARMTH(tp))) {
14406890d023SEric Saxe cp = disp_lowpri_cpu(tp->t_cpu, tp->t_lpl, tpri,
14416890d023SEric Saxe NULL);
14426890d023SEric Saxe }
14437c478bd9Sstevel@tonic-gate } else {
14447c478bd9Sstevel@tonic-gate /*
14457c478bd9Sstevel@tonic-gate * Migrate to a cpu in the new partition.
14467c478bd9Sstevel@tonic-gate */
14477c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
14487c478bd9Sstevel@tonic-gate tp->t_lpl, tp->t_pri, NULL);
14497c478bd9Sstevel@tonic-gate }
14507c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
14517c478bd9Sstevel@tonic-gate } else {
14527c478bd9Sstevel@tonic-gate /*
14537c478bd9Sstevel@tonic-gate * It is possible that t_weakbound_cpu != t_bound_cpu (for
14547c478bd9Sstevel@tonic-gate * a short time until weak binding that existed when the
14557c478bd9Sstevel@tonic-gate * strong binding was established has dropped) so we must
14567c478bd9Sstevel@tonic-gate * favour weak binding over strong.
14577c478bd9Sstevel@tonic-gate */
14587c478bd9Sstevel@tonic-gate cp = tp->t_weakbound_cpu ?
14597c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu : tp->t_bound_cpu;
14607c478bd9Sstevel@tonic-gate }
1461f2bd4627Sjohansen
1462f2bd4627Sjohansen /*
1463f2bd4627Sjohansen * A thread that is ONPROC may be temporarily placed on the run queue
1464f2bd4627Sjohansen * but then chosen to run again by disp. If the thread we're placing on
1465f2bd4627Sjohansen * the queue is in TS_ONPROC state, don't set its t_waitrq until a
1466f2bd4627Sjohansen * replacement process is actually scheduled in swtch(). In this
1467f2bd4627Sjohansen * situation, curthread is the only thread that could be in the ONPROC
1468f2bd4627Sjohansen * state.
1469f2bd4627Sjohansen */
1470f2bd4627Sjohansen if ((tp != curthread) && (tp->t_waitrq == 0)) {
1471f2bd4627Sjohansen hrtime_t curtime;
1472f2bd4627Sjohansen
1473f2bd4627Sjohansen curtime = gethrtime_unscaled();
1474f2bd4627Sjohansen (void) cpu_update_pct(tp, curtime);
1475f2bd4627Sjohansen tp->t_waitrq = curtime;
1476f2bd4627Sjohansen } else {
1477f2bd4627Sjohansen (void) cpu_update_pct(tp, gethrtime_unscaled());
1478f2bd4627Sjohansen }
1479f2bd4627Sjohansen
14807c478bd9Sstevel@tonic-gate dp = cp->cpu_disp;
14817c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock);
14827c478bd9Sstevel@tonic-gate
14837c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
14847c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1);
14857c478bd9Sstevel@tonic-gate
14867c478bd9Sstevel@tonic-gate #ifndef NPROBE
14877c478bd9Sstevel@tonic-gate /* Kernel probe */
14887c478bd9Sstevel@tonic-gate if (tnf_tracing_active)
14897c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri);
14907c478bd9Sstevel@tonic-gate #endif /* NPROBE */
14917c478bd9Sstevel@tonic-gate
14927c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri);
14937c478bd9Sstevel@tonic-gate
14947c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */
14957c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp;
14967c478bd9Sstevel@tonic-gate
14977c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri];
14987c478bd9Sstevel@tonic-gate dp->disp_nrunnable++;
1499685679f7Sakolb if (!bound)
1500685679f7Sakolb dp->disp_steal = 0;
15017c478bd9Sstevel@tonic-gate membar_enter();
15027c478bd9Sstevel@tonic-gate
15037c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) {
15047c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last != NULL);
15057c478bd9Sstevel@tonic-gate tp->t_link = dq->dq_first;
15067c478bd9Sstevel@tonic-gate dq->dq_first = tp;
15077c478bd9Sstevel@tonic-gate } else {
15087c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL);
15097c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL);
15107c478bd9Sstevel@tonic-gate tp->t_link = NULL;
15117c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp;
15127c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri);
15137c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) {
15147c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri;
15157c478bd9Sstevel@tonic-gate membar_enter();
15167c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri);
15177c478bd9Sstevel@tonic-gate }
15187c478bd9Sstevel@tonic-gate }
15197c478bd9Sstevel@tonic-gate
15207c478bd9Sstevel@tonic-gate if (!bound && tpri > dp->disp_max_unbound_pri) {
15217c478bd9Sstevel@tonic-gate if (tp == curthread && dp->disp_max_unbound_pri == -1 &&
15227c478bd9Sstevel@tonic-gate cp == CPU) {
15237c478bd9Sstevel@tonic-gate /*
15247c478bd9Sstevel@tonic-gate * If there are no other unbound threads on the
15257c478bd9Sstevel@tonic-gate * run queue, don't allow other CPUs to steal
15267c478bd9Sstevel@tonic-gate * this thread while we are in the middle of a
15277c478bd9Sstevel@tonic-gate * context switch. We may just switch to it
15287c478bd9Sstevel@tonic-gate * again right away. CPU_DISP_DONTSTEAL is cleared
15297c478bd9Sstevel@tonic-gate * in swtch and swtch_to.
15307c478bd9Sstevel@tonic-gate */
15317c478bd9Sstevel@tonic-gate cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
15327c478bd9Sstevel@tonic-gate }
15337c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri;
15347c478bd9Sstevel@tonic-gate }
15357c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, bound);
15367c478bd9Sstevel@tonic-gate }
15377c478bd9Sstevel@tonic-gate
15387c478bd9Sstevel@tonic-gate /*
15397c478bd9Sstevel@tonic-gate * Put a high-priority unbound thread on the kp queue
15407c478bd9Sstevel@tonic-gate */
15417c478bd9Sstevel@tonic-gate static void
setkpdq(kthread_t * tp,int borf)15427c478bd9Sstevel@tonic-gate setkpdq(kthread_t *tp, int borf)
15437c478bd9Sstevel@tonic-gate {
15447c478bd9Sstevel@tonic-gate dispq_t *dq;
15457c478bd9Sstevel@tonic-gate disp_t *dp;
15467c478bd9Sstevel@tonic-gate cpu_t *cp;
15477c478bd9Sstevel@tonic-gate pri_t tpri;
15487c478bd9Sstevel@tonic-gate
15497c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp);
15507c478bd9Sstevel@tonic-gate
15517c478bd9Sstevel@tonic-gate dp = &tp->t_cpupart->cp_kp_queue;
15527c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock);
15537c478bd9Sstevel@tonic-gate
15547c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
15557c478bd9Sstevel@tonic-gate
15567c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri);
15577c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf);
15587c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */
15597c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp;
15607c478bd9Sstevel@tonic-gate dp->disp_nrunnable++;
15617c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri];
15627c478bd9Sstevel@tonic-gate
15637c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) {
15647c478bd9Sstevel@tonic-gate if (borf == SETKP_BACK) {
15657c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first != NULL);
15667c478bd9Sstevel@tonic-gate tp->t_link = NULL;
15677c478bd9Sstevel@tonic-gate dq->dq_last->t_link = tp;
15687c478bd9Sstevel@tonic-gate dq->dq_last = tp;
15697c478bd9Sstevel@tonic-gate } else {
15707c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last != NULL);
15717c478bd9Sstevel@tonic-gate tp->t_link = dq->dq_first;
15727c478bd9Sstevel@tonic-gate dq->dq_first = tp;
15737c478bd9Sstevel@tonic-gate }
15747c478bd9Sstevel@tonic-gate } else {
15757c478bd9Sstevel@tonic-gate if (borf == SETKP_BACK) {
15767c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL);
15777c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL);
15787c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp;
15797c478bd9Sstevel@tonic-gate } else {
15807c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL);
15817c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL);
15827c478bd9Sstevel@tonic-gate tp->t_link = NULL;
15837c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp;
15847c478bd9Sstevel@tonic-gate }
15857c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri);
15867c478bd9Sstevel@tonic-gate if (tpri > dp->disp_max_unbound_pri)
15877c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri;
15887c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) {
15897c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri;
15907c478bd9Sstevel@tonic-gate membar_enter();
15917c478bd9Sstevel@tonic-gate }
15927c478bd9Sstevel@tonic-gate }
15937c478bd9Sstevel@tonic-gate
15947c478bd9Sstevel@tonic-gate cp = tp->t_cpu;
15957c478bd9Sstevel@tonic-gate if (tp->t_cpupart != cp->cpu_part) {
15967c478bd9Sstevel@tonic-gate /* migrate to a cpu in the new partition */
15977c478bd9Sstevel@tonic-gate cp = tp->t_cpupart->cp_cpulist;
15987c478bd9Sstevel@tonic-gate }
15997c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL);
16007c478bd9Sstevel@tonic-gate disp_lock_enter_high(&cp->cpu_disp->disp_lock);
16017c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
16027c478bd9Sstevel@tonic-gate
16037c478bd9Sstevel@tonic-gate #ifndef NPROBE
16047c478bd9Sstevel@tonic-gate /* Kernel probe */
16057c478bd9Sstevel@tonic-gate if (tnf_tracing_active)
16067c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri);
16077c478bd9Sstevel@tonic-gate #endif /* NPROBE */
16087c478bd9Sstevel@tonic-gate
16097c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level < tpri)
16107c478bd9Sstevel@tonic-gate cp->cpu_chosen_level = tpri;
16117c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri);
16127c478bd9Sstevel@tonic-gate disp_lock_exit_high(&cp->cpu_disp->disp_lock);
16137c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, 0);
16147c478bd9Sstevel@tonic-gate }
16157c478bd9Sstevel@tonic-gate
16167c478bd9Sstevel@tonic-gate /*
16177c478bd9Sstevel@tonic-gate * Remove a thread from the dispatcher queue if it is on it.
16187c478bd9Sstevel@tonic-gate * It is not an error if it is not found but we return whether
16197c478bd9Sstevel@tonic-gate * or not it was found in case the caller wants to check.
16207c478bd9Sstevel@tonic-gate */
16217c478bd9Sstevel@tonic-gate int
dispdeq(kthread_t * tp)16227c478bd9Sstevel@tonic-gate dispdeq(kthread_t *tp)
16237c478bd9Sstevel@tonic-gate {
16247c478bd9Sstevel@tonic-gate disp_t *dp;
16257c478bd9Sstevel@tonic-gate dispq_t *dq;
16267c478bd9Sstevel@tonic-gate kthread_t *rp;
16277c478bd9Sstevel@tonic-gate kthread_t *trp;
16287c478bd9Sstevel@tonic-gate kthread_t **ptp;
16297c478bd9Sstevel@tonic-gate int tpri;
16307c478bd9Sstevel@tonic-gate
16317c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp));
16327c478bd9Sstevel@tonic-gate
16337c478bd9Sstevel@tonic-gate if (tp->t_state != TS_RUN)
16347c478bd9Sstevel@tonic-gate return (0);
16357c478bd9Sstevel@tonic-gate
16367c478bd9Sstevel@tonic-gate /*
16377c478bd9Sstevel@tonic-gate * The thread is "swapped" or is on the swap queue and
16387c478bd9Sstevel@tonic-gate * hence no longer on the run queue, so return true.
16397c478bd9Sstevel@tonic-gate */
16407c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD)
16417c478bd9Sstevel@tonic-gate return (1);
16427c478bd9Sstevel@tonic-gate
16437c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp);
16447c478bd9Sstevel@tonic-gate dp = tp->t_disp_queue;
16457c478bd9Sstevel@tonic-gate ASSERT(tpri < dp->disp_npri);
16467c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri];
16477c478bd9Sstevel@tonic-gate ptp = &dq->dq_first;
16487c478bd9Sstevel@tonic-gate rp = *ptp;
16497c478bd9Sstevel@tonic-gate trp = NULL;
16507c478bd9Sstevel@tonic-gate
16517c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL);
16527c478bd9Sstevel@tonic-gate
16537c478bd9Sstevel@tonic-gate /*
16547c478bd9Sstevel@tonic-gate * Search for thread in queue.
16557c478bd9Sstevel@tonic-gate * Double links would simplify this at the expense of disp/setrun.
16567c478bd9Sstevel@tonic-gate */
16577c478bd9Sstevel@tonic-gate while (rp != tp && rp != NULL) {
16587c478bd9Sstevel@tonic-gate trp = rp;
16597c478bd9Sstevel@tonic-gate ptp = &trp->t_link;
16607c478bd9Sstevel@tonic-gate rp = trp->t_link;
16617c478bd9Sstevel@tonic-gate }
16627c478bd9Sstevel@tonic-gate
16637c478bd9Sstevel@tonic-gate if (rp == NULL) {
16647c478bd9Sstevel@tonic-gate panic("dispdeq: thread not on queue");
16657c478bd9Sstevel@tonic-gate }
16667c478bd9Sstevel@tonic-gate
16677c478bd9Sstevel@tonic-gate DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp);
16687c478bd9Sstevel@tonic-gate
16697c478bd9Sstevel@tonic-gate /*
16707c478bd9Sstevel@tonic-gate * Found it so remove it from queue.
16717c478bd9Sstevel@tonic-gate */
16727c478bd9Sstevel@tonic-gate if ((*ptp = rp->t_link) == NULL)
16737c478bd9Sstevel@tonic-gate dq->dq_last = trp;
16747c478bd9Sstevel@tonic-gate
16757c478bd9Sstevel@tonic-gate dp->disp_nrunnable--;
16767c478bd9Sstevel@tonic-gate if (--dq->dq_sruncnt == 0) {
16777c478bd9Sstevel@tonic-gate dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri);
16787c478bd9Sstevel@tonic-gate if (dp->disp_nrunnable == 0) {
16797c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1;
16807c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1;
16817c478bd9Sstevel@tonic-gate } else if (tpri == dp->disp_maxrunpri) {
16827c478bd9Sstevel@tonic-gate int ipri;
16837c478bd9Sstevel@tonic-gate
16847c478bd9Sstevel@tonic-gate ipri = bt_gethighbit(dp->disp_qactmap,
16857c478bd9Sstevel@tonic-gate dp->disp_maxrunpri >> BT_ULSHIFT);
16867c478bd9Sstevel@tonic-gate if (ipri < dp->disp_max_unbound_pri)
16877c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = ipri;
16887c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = ipri;
16897c478bd9Sstevel@tonic-gate }
16907c478bd9Sstevel@tonic-gate }
16917c478bd9Sstevel@tonic-gate tp->t_link = NULL;
16927c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); /* put in intermediate state */
16937c478bd9Sstevel@tonic-gate return (1);
16947c478bd9Sstevel@tonic-gate }
16957c478bd9Sstevel@tonic-gate
16967c478bd9Sstevel@tonic-gate
16977c478bd9Sstevel@tonic-gate /*
16987c478bd9Sstevel@tonic-gate * dq_sruninc and dq_srundec are public functions for
16997c478bd9Sstevel@tonic-gate * incrementing/decrementing the sruncnts when a thread on
17007c478bd9Sstevel@tonic-gate * a dispatcher queue is made schedulable/unschedulable by
17017c478bd9Sstevel@tonic-gate * resetting the TS_LOAD flag.
17027c478bd9Sstevel@tonic-gate *
17037c478bd9Sstevel@tonic-gate * The caller MUST have the thread lock and therefore the dispatcher
17047c478bd9Sstevel@tonic-gate * queue lock so that the operation which changes
17057c478bd9Sstevel@tonic-gate * the flag, the operation that checks the status of the thread to
17067c478bd9Sstevel@tonic-gate * determine if it's on a disp queue AND the call to this function
17077c478bd9Sstevel@tonic-gate * are one atomic operation with respect to interrupts.
17087c478bd9Sstevel@tonic-gate */
17097c478bd9Sstevel@tonic-gate
17107c478bd9Sstevel@tonic-gate /*
17117c478bd9Sstevel@tonic-gate * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread.
17127c478bd9Sstevel@tonic-gate */
17137c478bd9Sstevel@tonic-gate void
dq_sruninc(kthread_t * t)17147c478bd9Sstevel@tonic-gate dq_sruninc(kthread_t *t)
17157c478bd9Sstevel@tonic-gate {
17167c478bd9Sstevel@tonic-gate ASSERT(t->t_state == TS_RUN);
17177c478bd9Sstevel@tonic-gate ASSERT(t->t_schedflag & TS_LOAD);
17187c478bd9Sstevel@tonic-gate
17197c478bd9Sstevel@tonic-gate THREAD_TRANSITION(t);
17207c478bd9Sstevel@tonic-gate setfrontdq(t);
17217c478bd9Sstevel@tonic-gate }
17227c478bd9Sstevel@tonic-gate
17237c478bd9Sstevel@tonic-gate /*
17247c478bd9Sstevel@tonic-gate * See comment on calling conventions above.
17257c478bd9Sstevel@tonic-gate * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread.
17267c478bd9Sstevel@tonic-gate */
17277c478bd9Sstevel@tonic-gate void
dq_srundec(kthread_t * t)17287c478bd9Sstevel@tonic-gate dq_srundec(kthread_t *t)
17297c478bd9Sstevel@tonic-gate {
17307c478bd9Sstevel@tonic-gate ASSERT(t->t_schedflag & TS_LOAD);
17317c478bd9Sstevel@tonic-gate
17327c478bd9Sstevel@tonic-gate (void) dispdeq(t);
17337c478bd9Sstevel@tonic-gate disp_swapped_enq(t);
17347c478bd9Sstevel@tonic-gate }
17357c478bd9Sstevel@tonic-gate
17367c478bd9Sstevel@tonic-gate /*
17377c478bd9Sstevel@tonic-gate * Change the dispatcher lock of thread to the "swapped_lock"
17387c478bd9Sstevel@tonic-gate * and return with thread lock still held.
17397c478bd9Sstevel@tonic-gate *
17407c478bd9Sstevel@tonic-gate * Called with thread_lock held, in transition state, and at high spl.
17417c478bd9Sstevel@tonic-gate */
17427c478bd9Sstevel@tonic-gate void
disp_swapped_enq(kthread_t * tp)17437c478bd9Sstevel@tonic-gate disp_swapped_enq(kthread_t *tp)
17447c478bd9Sstevel@tonic-gate {
17457c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp));
17467c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_LOAD);
17477c478bd9Sstevel@tonic-gate
17487c478bd9Sstevel@tonic-gate switch (tp->t_state) {
17497c478bd9Sstevel@tonic-gate case TS_RUN:
17507c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock);
17517c478bd9Sstevel@tonic-gate THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */
17527c478bd9Sstevel@tonic-gate break;
17537c478bd9Sstevel@tonic-gate case TS_ONPROC:
17547c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock);
17557c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp);
17567c478bd9Sstevel@tonic-gate wake_sched_sec = 1; /* tell clock to wake sched */
17577c478bd9Sstevel@tonic-gate THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */
17587c478bd9Sstevel@tonic-gate break;
17597c478bd9Sstevel@tonic-gate default:
17607c478bd9Sstevel@tonic-gate panic("disp_swapped: tp: %p bad t_state", (void *)tp);
17617c478bd9Sstevel@tonic-gate }
17627c478bd9Sstevel@tonic-gate }
17637c478bd9Sstevel@tonic-gate
17647c478bd9Sstevel@tonic-gate /*
17657c478bd9Sstevel@tonic-gate * This routine is called by setbackdq/setfrontdq if the thread is
17667c478bd9Sstevel@tonic-gate * not loaded or loaded and on the swap queue.
17677c478bd9Sstevel@tonic-gate *
17687c478bd9Sstevel@tonic-gate * Thread state TS_SLEEP implies that a swapped thread
17697c478bd9Sstevel@tonic-gate * has been woken up and needs to be swapped in by the swapper.
17707c478bd9Sstevel@tonic-gate *
17717c478bd9Sstevel@tonic-gate * Thread state TS_RUN, it implies that the priority of a swapped
17727c478bd9Sstevel@tonic-gate * thread is being increased by scheduling class (e.g. ts_update).
17737c478bd9Sstevel@tonic-gate */
17747c478bd9Sstevel@tonic-gate static void
disp_swapped_setrun(kthread_t * tp)17757c478bd9Sstevel@tonic-gate disp_swapped_setrun(kthread_t *tp)
17767c478bd9Sstevel@tonic-gate {
17777c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp));
17787c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD);
17797c478bd9Sstevel@tonic-gate
17807c478bd9Sstevel@tonic-gate switch (tp->t_state) {
17817c478bd9Sstevel@tonic-gate case TS_SLEEP:
17827c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock);
17837c478bd9Sstevel@tonic-gate /*
17847c478bd9Sstevel@tonic-gate * Wakeup sched immediately (i.e., next tick) if the
17857c478bd9Sstevel@tonic-gate * thread priority is above maxclsyspri.
17867c478bd9Sstevel@tonic-gate */
17877c478bd9Sstevel@tonic-gate if (DISP_PRIO(tp) > maxclsyspri)
17887c478bd9Sstevel@tonic-gate wake_sched = 1;
17897c478bd9Sstevel@tonic-gate else
17907c478bd9Sstevel@tonic-gate wake_sched_sec = 1;
17917c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */
17927c478bd9Sstevel@tonic-gate break;
17937c478bd9Sstevel@tonic-gate case TS_RUN: /* called from ts_update */
17947c478bd9Sstevel@tonic-gate break;
17957c478bd9Sstevel@tonic-gate default:
17968793b36bSNick Todd panic("disp_swapped_setrun: tp: %p bad t_state", (void *)tp);
17977c478bd9Sstevel@tonic-gate }
17987c478bd9Sstevel@tonic-gate }
17997c478bd9Sstevel@tonic-gate
18007c478bd9Sstevel@tonic-gate /*
18017c478bd9Sstevel@tonic-gate * Make a thread give up its processor. Find the processor on
18027c478bd9Sstevel@tonic-gate * which this thread is executing, and have that processor
18037c478bd9Sstevel@tonic-gate * preempt.
180435a5a358SJonathan Adams *
180535a5a358SJonathan Adams * We allow System Duty Cycle (SDC) threads to be preempted even if
180635a5a358SJonathan Adams * they are running at kernel priorities. To implement this, we always
180735a5a358SJonathan Adams * set cpu_kprunrun; this ensures preempt() will be called. Since SDC
180835a5a358SJonathan Adams * calls cpu_surrender() very often, we only preempt if there is anyone
180935a5a358SJonathan Adams * competing with us.
18107c478bd9Sstevel@tonic-gate */
18117c478bd9Sstevel@tonic-gate void
cpu_surrender(kthread_t * tp)18127c478bd9Sstevel@tonic-gate cpu_surrender(kthread_t *tp)
18137c478bd9Sstevel@tonic-gate {
18147c478bd9Sstevel@tonic-gate cpu_t *cpup;
18157c478bd9Sstevel@tonic-gate int max_pri;
18167c478bd9Sstevel@tonic-gate int max_run_pri;
18177c478bd9Sstevel@tonic-gate klwp_t *lwp;
18187c478bd9Sstevel@tonic-gate
18197c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp));
18207c478bd9Sstevel@tonic-gate
18217c478bd9Sstevel@tonic-gate if (tp->t_state != TS_ONPROC)
18227c478bd9Sstevel@tonic-gate return;
18237c478bd9Sstevel@tonic-gate cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */
18247c478bd9Sstevel@tonic-gate max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */
18257c478bd9Sstevel@tonic-gate max_run_pri = CP_MAXRUNPRI(cpup->cpu_part);
18267c478bd9Sstevel@tonic-gate if (max_pri < max_run_pri)
18277c478bd9Sstevel@tonic-gate max_pri = max_run_pri;
18287c478bd9Sstevel@tonic-gate
182935a5a358SJonathan Adams if (tp->t_cid == sysdccid) {
183035a5a358SJonathan Adams uint_t t_pri = DISP_PRIO(tp);
183135a5a358SJonathan Adams if (t_pri > max_pri)
183235a5a358SJonathan Adams return; /* we are not competing w/ anyone */
183335a5a358SJonathan Adams cpup->cpu_runrun = cpup->cpu_kprunrun = 1;
183435a5a358SJonathan Adams } else {
18357c478bd9Sstevel@tonic-gate cpup->cpu_runrun = 1;
18367c478bd9Sstevel@tonic-gate if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) {
18377c478bd9Sstevel@tonic-gate cpup->cpu_kprunrun = 1;
18387c478bd9Sstevel@tonic-gate }
183935a5a358SJonathan Adams }
18407c478bd9Sstevel@tonic-gate
18417c478bd9Sstevel@tonic-gate /*
18427c478bd9Sstevel@tonic-gate * Propagate cpu_runrun, and cpu_kprunrun to global visibility.
18437c478bd9Sstevel@tonic-gate */
18447c478bd9Sstevel@tonic-gate membar_enter();
18457c478bd9Sstevel@tonic-gate
18467c478bd9Sstevel@tonic-gate DTRACE_SCHED1(surrender, kthread_t *, tp);
18477c478bd9Sstevel@tonic-gate
18487c478bd9Sstevel@tonic-gate /*
18497c478bd9Sstevel@tonic-gate * Make the target thread take an excursion through trap()
18507c478bd9Sstevel@tonic-gate * to do preempt() (unless we're already in trap or post_syscall,
18517c478bd9Sstevel@tonic-gate * calling cpu_surrender via CL_TRAPRET).
18527c478bd9Sstevel@tonic-gate */
18537c478bd9Sstevel@tonic-gate if (tp != curthread || (lwp = tp->t_lwp) == NULL ||
18547c478bd9Sstevel@tonic-gate lwp->lwp_state != LWP_USER) {
18557c478bd9Sstevel@tonic-gate aston(tp);
18567c478bd9Sstevel@tonic-gate if (cpup != CPU)
18577c478bd9Sstevel@tonic-gate poke_cpu(cpup->cpu_id);
18587c478bd9Sstevel@tonic-gate }
18597c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER,
18607c478bd9Sstevel@tonic-gate "cpu_surrender:tid %p cpu %p", tp, cpup);
18617c478bd9Sstevel@tonic-gate }
18627c478bd9Sstevel@tonic-gate
18637c478bd9Sstevel@tonic-gate /*
18647c478bd9Sstevel@tonic-gate * Commit to and ratify a scheduling decision
18657c478bd9Sstevel@tonic-gate */
18667c478bd9Sstevel@tonic-gate /*ARGSUSED*/
18677c478bd9Sstevel@tonic-gate static kthread_t *
disp_ratify(kthread_t * tp,disp_t * kpq)18687c478bd9Sstevel@tonic-gate disp_ratify(kthread_t *tp, disp_t *kpq)
18697c478bd9Sstevel@tonic-gate {
18707c478bd9Sstevel@tonic-gate pri_t tpri, maxpri;
18717c478bd9Sstevel@tonic-gate pri_t maxkpri;
18727c478bd9Sstevel@tonic-gate cpu_t *cpup;
18737c478bd9Sstevel@tonic-gate
18747c478bd9Sstevel@tonic-gate ASSERT(tp != NULL);
18757c478bd9Sstevel@tonic-gate /*
18767c478bd9Sstevel@tonic-gate * Commit to, then ratify scheduling decision
18777c478bd9Sstevel@tonic-gate */
18787c478bd9Sstevel@tonic-gate cpup = CPU;
18797c478bd9Sstevel@tonic-gate if (cpup->cpu_runrun != 0)
18807c478bd9Sstevel@tonic-gate cpup->cpu_runrun = 0;
18817c478bd9Sstevel@tonic-gate if (cpup->cpu_kprunrun != 0)
18827c478bd9Sstevel@tonic-gate cpup->cpu_kprunrun = 0;
18837c478bd9Sstevel@tonic-gate if (cpup->cpu_chosen_level != -1)
18847c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1;
18857c478bd9Sstevel@tonic-gate membar_enter();
18867c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp);
18877c478bd9Sstevel@tonic-gate maxpri = cpup->cpu_disp->disp_maxrunpri;
18887c478bd9Sstevel@tonic-gate maxkpri = kpq->disp_maxrunpri;
18897c478bd9Sstevel@tonic-gate if (maxpri < maxkpri)
18907c478bd9Sstevel@tonic-gate maxpri = maxkpri;
18917c478bd9Sstevel@tonic-gate if (tpri < maxpri) {
18927c478bd9Sstevel@tonic-gate /*
18937c478bd9Sstevel@tonic-gate * should have done better
18947c478bd9Sstevel@tonic-gate * put this one back and indicate to try again
18957c478bd9Sstevel@tonic-gate */
18967c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = curthread; /* fixup dispthread */
18977c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = DISP_PRIO(curthread);
18987c478bd9Sstevel@tonic-gate thread_lock_high(tp);
18997c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp);
19007c478bd9Sstevel@tonic-gate setfrontdq(tp);
19017c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(tp);
19027c478bd9Sstevel@tonic-gate
19037c478bd9Sstevel@tonic-gate tp = NULL;
19047c478bd9Sstevel@tonic-gate }
19057c478bd9Sstevel@tonic-gate return (tp);
19067c478bd9Sstevel@tonic-gate }
19077c478bd9Sstevel@tonic-gate
19087c478bd9Sstevel@tonic-gate /*
19097c478bd9Sstevel@tonic-gate * See if there is any work on the dispatcher queue for other CPUs.
19107c478bd9Sstevel@tonic-gate * If there is, dequeue the best thread and return.
19117c478bd9Sstevel@tonic-gate */
19127c478bd9Sstevel@tonic-gate static kthread_t *
disp_getwork(cpu_t * cp)19137c478bd9Sstevel@tonic-gate disp_getwork(cpu_t *cp)
19147c478bd9Sstevel@tonic-gate {
19157c478bd9Sstevel@tonic-gate cpu_t *ocp; /* other CPU */
19167c478bd9Sstevel@tonic-gate cpu_t *ocp_start;
19177c478bd9Sstevel@tonic-gate cpu_t *tcp; /* target local CPU */
19187c478bd9Sstevel@tonic-gate kthread_t *tp;
1919685679f7Sakolb kthread_t *retval = NULL;
19207c478bd9Sstevel@tonic-gate pri_t maxpri;
19217c478bd9Sstevel@tonic-gate disp_t *kpq; /* kp queue for this partition */
19227c478bd9Sstevel@tonic-gate lpl_t *lpl, *lpl_leaf;
19236890d023SEric Saxe int leafidx, startidx;
1924685679f7Sakolb hrtime_t stealtime;
19256890d023SEric Saxe lgrp_id_t local_id;
19267c478bd9Sstevel@tonic-gate
19277c478bd9Sstevel@tonic-gate maxpri = -1;
19287c478bd9Sstevel@tonic-gate tcp = NULL;
19297c478bd9Sstevel@tonic-gate
19307c478bd9Sstevel@tonic-gate kpq = &cp->cpu_part->cp_kp_queue;
19317c478bd9Sstevel@tonic-gate while (kpq->disp_maxrunpri >= 0) {
19327c478bd9Sstevel@tonic-gate /*
19337c478bd9Sstevel@tonic-gate * Try to take a thread from the kp_queue.
19347c478bd9Sstevel@tonic-gate */
19357c478bd9Sstevel@tonic-gate tp = (disp_getbest(kpq));
19367c478bd9Sstevel@tonic-gate if (tp)
19377c478bd9Sstevel@tonic-gate return (disp_ratify(tp, kpq));
19387c478bd9Sstevel@tonic-gate }
19397c478bd9Sstevel@tonic-gate
1940ab761399Sesaxe kpreempt_disable(); /* protect the cpu_active list */
19417c478bd9Sstevel@tonic-gate
19427c478bd9Sstevel@tonic-gate /*
19437c478bd9Sstevel@tonic-gate * Try to find something to do on another CPU's run queue.
19447c478bd9Sstevel@tonic-gate * Loop through all other CPUs looking for the one with the highest
19457c478bd9Sstevel@tonic-gate * priority unbound thread.
19467c478bd9Sstevel@tonic-gate *
19477c478bd9Sstevel@tonic-gate * On NUMA machines, the partition's CPUs are consulted in order of
19487c478bd9Sstevel@tonic-gate * distance from the current CPU. This way, the first available
19497c478bd9Sstevel@tonic-gate * work found is also the closest, and will suffer the least
19507c478bd9Sstevel@tonic-gate * from being migrated.
19517c478bd9Sstevel@tonic-gate */
19527c478bd9Sstevel@tonic-gate lpl = lpl_leaf = cp->cpu_lpl;
19536890d023SEric Saxe local_id = lpl_leaf->lpl_lgrpid;
19546890d023SEric Saxe leafidx = startidx = 0;
19557c478bd9Sstevel@tonic-gate
19567c478bd9Sstevel@tonic-gate /*
19577c478bd9Sstevel@tonic-gate * This loop traverses the lpl hierarchy. Higher level lpls represent
19587c478bd9Sstevel@tonic-gate * broader levels of locality
19597c478bd9Sstevel@tonic-gate */
19607c478bd9Sstevel@tonic-gate do {
19617c478bd9Sstevel@tonic-gate /* This loop iterates over the lpl's leaves */
19627c478bd9Sstevel@tonic-gate do {
19637c478bd9Sstevel@tonic-gate if (lpl_leaf != cp->cpu_lpl)
19647c478bd9Sstevel@tonic-gate ocp = lpl_leaf->lpl_cpus;
19657c478bd9Sstevel@tonic-gate else
19667c478bd9Sstevel@tonic-gate ocp = cp->cpu_next_lpl;
19677c478bd9Sstevel@tonic-gate
19687c478bd9Sstevel@tonic-gate /* This loop iterates over the CPUs in the leaf */
19697c478bd9Sstevel@tonic-gate ocp_start = ocp;
19707c478bd9Sstevel@tonic-gate do {
19717c478bd9Sstevel@tonic-gate pri_t pri;
19727c478bd9Sstevel@tonic-gate
19737c478bd9Sstevel@tonic-gate ASSERT(CPU_ACTIVE(ocp));
19747c478bd9Sstevel@tonic-gate
19757c478bd9Sstevel@tonic-gate /*
197639bac370Sesaxe * End our stroll around this lpl if:
19777c478bd9Sstevel@tonic-gate *
19787c478bd9Sstevel@tonic-gate * - Something became runnable on the local
197939bac370Sesaxe * queue...which also ends our stroll around
198039bac370Sesaxe * the partition.
19817c478bd9Sstevel@tonic-gate *
198239bac370Sesaxe * - We happen across another idle CPU.
198339bac370Sesaxe * Since it is patrolling the next portion
198439bac370Sesaxe * of the lpl's list (assuming it's not
19856890d023SEric Saxe * halted, or busy servicing an interrupt),
19866890d023SEric Saxe * move to the next higher level of locality.
19877c478bd9Sstevel@tonic-gate */
198839bac370Sesaxe if (cp->cpu_disp->disp_nrunnable != 0) {
198939bac370Sesaxe kpreempt_enable();
199039bac370Sesaxe return (NULL);
199139bac370Sesaxe }
19927c478bd9Sstevel@tonic-gate if (ocp->cpu_dispatch_pri == -1) {
19937c478bd9Sstevel@tonic-gate if (ocp->cpu_disp_flags &
19946890d023SEric Saxe CPU_DISP_HALTED ||
19956890d023SEric Saxe ocp->cpu_intr_actv != 0)
19967c478bd9Sstevel@tonic-gate continue;
199739bac370Sesaxe else
19986890d023SEric Saxe goto next_level;
19997c478bd9Sstevel@tonic-gate }
20007c478bd9Sstevel@tonic-gate
20017c478bd9Sstevel@tonic-gate /*
20027c478bd9Sstevel@tonic-gate * If there's only one thread and the CPU
20037c478bd9Sstevel@tonic-gate * is in the middle of a context switch,
20047c478bd9Sstevel@tonic-gate * or it's currently running the idle thread,
20057c478bd9Sstevel@tonic-gate * don't steal it.
20067c478bd9Sstevel@tonic-gate */
20077c478bd9Sstevel@tonic-gate if ((ocp->cpu_disp_flags &
20087c478bd9Sstevel@tonic-gate CPU_DISP_DONTSTEAL) &&
20097c478bd9Sstevel@tonic-gate ocp->cpu_disp->disp_nrunnable == 1)
20107c478bd9Sstevel@tonic-gate continue;
20117c478bd9Sstevel@tonic-gate
20127c478bd9Sstevel@tonic-gate pri = ocp->cpu_disp->disp_max_unbound_pri;
20137c478bd9Sstevel@tonic-gate if (pri > maxpri) {
2014685679f7Sakolb /*
2015685679f7Sakolb * Don't steal threads that we attempted
2016fb2f18f8Sesaxe * to steal recently until they're ready
2017fb2f18f8Sesaxe * to be stolen again.
2018685679f7Sakolb */
2019685679f7Sakolb stealtime = ocp->cpu_disp->disp_steal;
2020685679f7Sakolb if (stealtime == 0 ||
2021685679f7Sakolb stealtime - gethrtime() <= 0) {
20227c478bd9Sstevel@tonic-gate maxpri = pri;
20237c478bd9Sstevel@tonic-gate tcp = ocp;
2024685679f7Sakolb } else {
2025685679f7Sakolb /*
2026685679f7Sakolb * Don't update tcp, just set
2027685679f7Sakolb * the retval to T_DONTSTEAL, so
2028685679f7Sakolb * that if no acceptable CPUs
2029685679f7Sakolb * are found the return value
2030685679f7Sakolb * will be T_DONTSTEAL rather
2031685679f7Sakolb * then NULL.
2032685679f7Sakolb */
2033685679f7Sakolb retval = T_DONTSTEAL;
2034685679f7Sakolb }
20357c478bd9Sstevel@tonic-gate }
20367c478bd9Sstevel@tonic-gate } while ((ocp = ocp->cpu_next_lpl) != ocp_start);
20377c478bd9Sstevel@tonic-gate
20386890d023SEric Saxe /*
20396890d023SEric Saxe * Iterate to the next leaf lpl in the resource set
20406890d023SEric Saxe * at this level of locality. If we hit the end of
20416890d023SEric Saxe * the set, wrap back around to the beginning.
20426890d023SEric Saxe *
20436890d023SEric Saxe * Note: This iteration is NULL terminated for a reason
20446890d023SEric Saxe * see lpl_topo_bootstrap() in lgrp.c for details.
20456890d023SEric Saxe */
20467c478bd9Sstevel@tonic-gate if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) {
20477c478bd9Sstevel@tonic-gate leafidx = 0;
20487c478bd9Sstevel@tonic-gate lpl_leaf = lpl->lpl_rset[leafidx];
20497c478bd9Sstevel@tonic-gate }
20506890d023SEric Saxe } while (leafidx != startidx);
20517c478bd9Sstevel@tonic-gate
20526890d023SEric Saxe next_level:
20536890d023SEric Saxe /*
20546890d023SEric Saxe * Expand the search to include farther away CPUs (next
20556890d023SEric Saxe * locality level). The closer CPUs that have already been
20566890d023SEric Saxe * checked will be checked again. In doing so, idle CPUs
20576890d023SEric Saxe * will tend to be more aggresive about stealing from CPUs
20586890d023SEric Saxe * that are closer (since the closer CPUs will be considered
20596890d023SEric Saxe * more often).
20606890d023SEric Saxe * Begin at this level with the CPUs local leaf lpl.
20616890d023SEric Saxe */
20626890d023SEric Saxe if ((lpl = lpl->lpl_parent) != NULL) {
20636890d023SEric Saxe leafidx = startidx = lpl->lpl_id2rset[local_id];
20646890d023SEric Saxe lpl_leaf = lpl->lpl_rset[leafidx];
20656890d023SEric Saxe }
20667c478bd9Sstevel@tonic-gate } while (!tcp && lpl);
20677c478bd9Sstevel@tonic-gate
2068ab761399Sesaxe kpreempt_enable();
20697c478bd9Sstevel@tonic-gate
20707c478bd9Sstevel@tonic-gate /*
20717c478bd9Sstevel@tonic-gate * If another queue looks good, and there is still nothing on
20727c478bd9Sstevel@tonic-gate * the local queue, try to transfer one or more threads
20737c478bd9Sstevel@tonic-gate * from it to our queue.
20747c478bd9Sstevel@tonic-gate */
20757c478bd9Sstevel@tonic-gate if (tcp && cp->cpu_disp->disp_nrunnable == 0) {
2076685679f7Sakolb tp = disp_getbest(tcp->cpu_disp);
2077685679f7Sakolb if (tp == NULL || tp == T_DONTSTEAL)
2078685679f7Sakolb return (tp);
20797c478bd9Sstevel@tonic-gate return (disp_ratify(tp, kpq));
20807c478bd9Sstevel@tonic-gate }
2081685679f7Sakolb return (retval);
20827c478bd9Sstevel@tonic-gate }
20837c478bd9Sstevel@tonic-gate
20847c478bd9Sstevel@tonic-gate
20857c478bd9Sstevel@tonic-gate /*
20867c478bd9Sstevel@tonic-gate * disp_fix_unbound_pri()
20877c478bd9Sstevel@tonic-gate * Determines the maximum priority of unbound threads on the queue.
20887c478bd9Sstevel@tonic-gate * The priority is kept for the queue, but is only increased, never
20897c478bd9Sstevel@tonic-gate * reduced unless some CPU is looking for something on that queue.
20907c478bd9Sstevel@tonic-gate *
20917c478bd9Sstevel@tonic-gate * The priority argument is the known upper limit.
20927c478bd9Sstevel@tonic-gate *
20937c478bd9Sstevel@tonic-gate * Perhaps this should be kept accurately, but that probably means
20947c478bd9Sstevel@tonic-gate * separate bitmaps for bound and unbound threads. Since only idled
20957c478bd9Sstevel@tonic-gate * CPUs will have to do this recalculation, it seems better this way.
20967c478bd9Sstevel@tonic-gate */
20977c478bd9Sstevel@tonic-gate static void
disp_fix_unbound_pri(disp_t * dp,pri_t pri)20987c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(disp_t *dp, pri_t pri)
20997c478bd9Sstevel@tonic-gate {
21007c478bd9Sstevel@tonic-gate kthread_t *tp;
21017c478bd9Sstevel@tonic-gate dispq_t *dq;
21027c478bd9Sstevel@tonic-gate ulong_t *dqactmap = dp->disp_qactmap;
21037c478bd9Sstevel@tonic-gate ulong_t mapword;
21047c478bd9Sstevel@tonic-gate int wx;
21057c478bd9Sstevel@tonic-gate
21067c478bd9Sstevel@tonic-gate ASSERT(DISP_LOCK_HELD(&dp->disp_lock));
21077c478bd9Sstevel@tonic-gate
21087c478bd9Sstevel@tonic-gate ASSERT(pri >= 0); /* checked by caller */
21097c478bd9Sstevel@tonic-gate
21107c478bd9Sstevel@tonic-gate /*
21117c478bd9Sstevel@tonic-gate * Start the search at the next lowest priority below the supplied
21127c478bd9Sstevel@tonic-gate * priority. This depends on the bitmap implementation.
21137c478bd9Sstevel@tonic-gate */
21147c478bd9Sstevel@tonic-gate do {
21157c478bd9Sstevel@tonic-gate wx = pri >> BT_ULSHIFT; /* index of word in map */
21167c478bd9Sstevel@tonic-gate
21177c478bd9Sstevel@tonic-gate /*
21187c478bd9Sstevel@tonic-gate * Form mask for all lower priorities in the word.
21197c478bd9Sstevel@tonic-gate */
21207c478bd9Sstevel@tonic-gate mapword = dqactmap[wx] & (BT_BIW(pri) - 1);
21217c478bd9Sstevel@tonic-gate
21227c478bd9Sstevel@tonic-gate /*
21237c478bd9Sstevel@tonic-gate * Get next lower active priority.
21247c478bd9Sstevel@tonic-gate */
21257c478bd9Sstevel@tonic-gate if (mapword != 0) {
21267c478bd9Sstevel@tonic-gate pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1;
21277c478bd9Sstevel@tonic-gate } else if (wx > 0) {
21287c478bd9Sstevel@tonic-gate pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */
21297c478bd9Sstevel@tonic-gate if (pri < 0)
21307c478bd9Sstevel@tonic-gate break;
21317c478bd9Sstevel@tonic-gate } else {
21327c478bd9Sstevel@tonic-gate pri = -1;
21337c478bd9Sstevel@tonic-gate break;
21347c478bd9Sstevel@tonic-gate }
21357c478bd9Sstevel@tonic-gate
21367c478bd9Sstevel@tonic-gate /*
21377c478bd9Sstevel@tonic-gate * Search the queue for unbound, runnable threads.
21387c478bd9Sstevel@tonic-gate */
21397c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri];
21407c478bd9Sstevel@tonic-gate tp = dq->dq_first;
21417c478bd9Sstevel@tonic-gate
21427c478bd9Sstevel@tonic-gate while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) {
21437c478bd9Sstevel@tonic-gate tp = tp->t_link;
21447c478bd9Sstevel@tonic-gate }
21457c478bd9Sstevel@tonic-gate
21467c478bd9Sstevel@tonic-gate /*
21477c478bd9Sstevel@tonic-gate * If a thread was found, set the priority and return.
21487c478bd9Sstevel@tonic-gate */
21497c478bd9Sstevel@tonic-gate } while (tp == NULL);
21507c478bd9Sstevel@tonic-gate
21517c478bd9Sstevel@tonic-gate /*
21527c478bd9Sstevel@tonic-gate * pri holds the maximum unbound thread priority or -1.
21537c478bd9Sstevel@tonic-gate */
21547c478bd9Sstevel@tonic-gate if (dp->disp_max_unbound_pri != pri)
21557c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = pri;
21567c478bd9Sstevel@tonic-gate }
21577c478bd9Sstevel@tonic-gate
21587c478bd9Sstevel@tonic-gate /*
21597c478bd9Sstevel@tonic-gate * disp_adjust_unbound_pri() - thread is becoming unbound, so we should
21607c478bd9Sstevel@tonic-gate * check if the CPU to which is was previously bound should have
21617c478bd9Sstevel@tonic-gate * its disp_max_unbound_pri increased.
21627c478bd9Sstevel@tonic-gate */
21637c478bd9Sstevel@tonic-gate void
disp_adjust_unbound_pri(kthread_t * tp)21647c478bd9Sstevel@tonic-gate disp_adjust_unbound_pri(kthread_t *tp)
21657c478bd9Sstevel@tonic-gate {
21667c478bd9Sstevel@tonic-gate disp_t *dp;
21677c478bd9Sstevel@tonic-gate pri_t tpri;
21687c478bd9Sstevel@tonic-gate
21697c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp));
21707c478bd9Sstevel@tonic-gate
21717c478bd9Sstevel@tonic-gate /*
21727c478bd9Sstevel@tonic-gate * Don't do anything if the thread is not bound, or
21737c478bd9Sstevel@tonic-gate * currently not runnable or swapped out.
21747c478bd9Sstevel@tonic-gate */
21757c478bd9Sstevel@tonic-gate if (tp->t_bound_cpu == NULL ||
21767c478bd9Sstevel@tonic-gate tp->t_state != TS_RUN ||
21777c478bd9Sstevel@tonic-gate tp->t_schedflag & TS_ON_SWAPQ)
21787c478bd9Sstevel@tonic-gate return;
21797c478bd9Sstevel@tonic-gate
21807c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp);
21817c478bd9Sstevel@tonic-gate dp = tp->t_bound_cpu->cpu_disp;
21827c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri);
21837c478bd9Sstevel@tonic-gate if (tpri > dp->disp_max_unbound_pri)
21847c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri;
21857c478bd9Sstevel@tonic-gate }
21867c478bd9Sstevel@tonic-gate
21877c478bd9Sstevel@tonic-gate /*
2188685679f7Sakolb * disp_getbest()
2189685679f7Sakolb * De-queue the highest priority unbound runnable thread.
2190685679f7Sakolb * Returns with the thread unlocked and onproc but at splhigh (like disp()).
2191685679f7Sakolb * Returns NULL if nothing found.
2192685679f7Sakolb * Returns T_DONTSTEAL if the thread was not stealable.
2193685679f7Sakolb * so that the caller will try again later.
21947c478bd9Sstevel@tonic-gate *
2195685679f7Sakolb * Passed a pointer to a dispatch queue not associated with this CPU, and
2196685679f7Sakolb * its type.
21977c478bd9Sstevel@tonic-gate */
21987c478bd9Sstevel@tonic-gate static kthread_t *
disp_getbest(disp_t * dp)21997c478bd9Sstevel@tonic-gate disp_getbest(disp_t *dp)
22007c478bd9Sstevel@tonic-gate {
22017c478bd9Sstevel@tonic-gate kthread_t *tp;
22027c478bd9Sstevel@tonic-gate dispq_t *dq;
22037c478bd9Sstevel@tonic-gate pri_t pri;
2204685679f7Sakolb cpu_t *cp, *tcp;
2205685679f7Sakolb boolean_t allbound;
22067c478bd9Sstevel@tonic-gate
22077c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock);
22087c478bd9Sstevel@tonic-gate
22097c478bd9Sstevel@tonic-gate /*
22107c478bd9Sstevel@tonic-gate * If there is nothing to run, or the CPU is in the middle of a
22117c478bd9Sstevel@tonic-gate * context switch of the only thread, return NULL.
22127c478bd9Sstevel@tonic-gate */
2213685679f7Sakolb tcp = dp->disp_cpu;
2214685679f7Sakolb cp = CPU;
22157c478bd9Sstevel@tonic-gate pri = dp->disp_max_unbound_pri;
22167c478bd9Sstevel@tonic-gate if (pri == -1 ||
2217685679f7Sakolb (tcp != NULL && (tcp->cpu_disp_flags & CPU_DISP_DONTSTEAL) &&
2218685679f7Sakolb tcp->cpu_disp->disp_nrunnable == 1)) {
22197c478bd9Sstevel@tonic-gate disp_lock_exit_nopreempt(&dp->disp_lock);
22207c478bd9Sstevel@tonic-gate return (NULL);
22217c478bd9Sstevel@tonic-gate }
22227c478bd9Sstevel@tonic-gate
22237c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri];
2224685679f7Sakolb
22257c478bd9Sstevel@tonic-gate
22267c478bd9Sstevel@tonic-gate /*
2227685679f7Sakolb * Assume that all threads are bound on this queue, and change it
2228685679f7Sakolb * later when we find out that it is not the case.
22297c478bd9Sstevel@tonic-gate */
2230685679f7Sakolb allbound = B_TRUE;
2231685679f7Sakolb for (tp = dq->dq_first; tp != NULL; tp = tp->t_link) {
2232685679f7Sakolb hrtime_t now, nosteal, rqtime;
2233685679f7Sakolb
2234685679f7Sakolb /*
2235685679f7Sakolb * Skip over bound threads which could be here even
2236685679f7Sakolb * though disp_max_unbound_pri indicated this level.
2237685679f7Sakolb */
2238685679f7Sakolb if (tp->t_bound_cpu || tp->t_weakbound_cpu)
2239685679f7Sakolb continue;
2240685679f7Sakolb
2241685679f7Sakolb /*
2242685679f7Sakolb * We've got some unbound threads on this queue, so turn
2243685679f7Sakolb * the allbound flag off now.
2244685679f7Sakolb */
2245685679f7Sakolb allbound = B_FALSE;
2246685679f7Sakolb
2247685679f7Sakolb /*
2248685679f7Sakolb * The thread is a candidate for stealing from its run queue. We
2249685679f7Sakolb * don't want to steal threads that became runnable just a
2250685679f7Sakolb * moment ago. This improves CPU affinity for threads that get
2251685679f7Sakolb * preempted for short periods of time and go back on the run
2252685679f7Sakolb * queue.
2253685679f7Sakolb *
2254685679f7Sakolb * We want to let it stay on its run queue if it was only placed
2255685679f7Sakolb * there recently and it was running on the same CPU before that
2256685679f7Sakolb * to preserve its cache investment. For the thread to remain on
2257685679f7Sakolb * its run queue, ALL of the following conditions must be
2258685679f7Sakolb * satisfied:
2259685679f7Sakolb *
2260685679f7Sakolb * - the disp queue should not be the kernel preemption queue
2261685679f7Sakolb * - delayed idle stealing should not be disabled
2262685679f7Sakolb * - nosteal_nsec should be non-zero
2263685679f7Sakolb * - it should run with user priority
2264685679f7Sakolb * - it should be on the run queue of the CPU where it was
2265685679f7Sakolb * running before being placed on the run queue
2266685679f7Sakolb * - it should be the only thread on the run queue (to prevent
2267685679f7Sakolb * extra scheduling latency for other threads)
2268685679f7Sakolb * - it should sit on the run queue for less than per-chip
2269685679f7Sakolb * nosteal interval or global nosteal interval
2270685679f7Sakolb * - in case of CPUs with shared cache it should sit in a run
2271685679f7Sakolb * queue of a CPU from a different chip
2272685679f7Sakolb *
2273685679f7Sakolb * The checks are arranged so that the ones that are faster are
2274685679f7Sakolb * placed earlier.
2275685679f7Sakolb */
2276685679f7Sakolb if (tcp == NULL ||
2277685679f7Sakolb pri >= minclsyspri ||
2278685679f7Sakolb tp->t_cpu != tcp)
2279685679f7Sakolb break;
2280685679f7Sakolb
2281685679f7Sakolb /*
2282fb2f18f8Sesaxe * Steal immediately if, due to CMT processor architecture
2283fb2f18f8Sesaxe * migraiton between cp and tcp would incur no performance
2284fb2f18f8Sesaxe * penalty.
2285685679f7Sakolb */
2286fb2f18f8Sesaxe if (pg_cmt_can_migrate(cp, tcp))
2287685679f7Sakolb break;
2288685679f7Sakolb
2289fb2f18f8Sesaxe nosteal = nosteal_nsec;
2290fb2f18f8Sesaxe if (nosteal == 0)
2291685679f7Sakolb break;
2292685679f7Sakolb
2293685679f7Sakolb /*
2294685679f7Sakolb * Calculate time spent sitting on run queue
2295685679f7Sakolb */
2296685679f7Sakolb now = gethrtime_unscaled();
2297685679f7Sakolb rqtime = now - tp->t_waitrq;
2298685679f7Sakolb scalehrtime(&rqtime);
2299685679f7Sakolb
2300685679f7Sakolb /*
2301685679f7Sakolb * Steal immediately if the time spent on this run queue is more
2302685679f7Sakolb * than allowed nosteal delay.
2303685679f7Sakolb *
2304685679f7Sakolb * Negative rqtime check is needed here to avoid infinite
2305685679f7Sakolb * stealing delays caused by unlikely but not impossible
2306685679f7Sakolb * drifts between CPU times on different CPUs.
2307685679f7Sakolb */
2308685679f7Sakolb if (rqtime > nosteal || rqtime < 0)
2309685679f7Sakolb break;
2310685679f7Sakolb
2311685679f7Sakolb DTRACE_PROBE4(nosteal, kthread_t *, tp,
2312685679f7Sakolb cpu_t *, tcp, cpu_t *, cp, hrtime_t, rqtime);
2313685679f7Sakolb scalehrtime(&now);
2314685679f7Sakolb /*
2315685679f7Sakolb * Calculate when this thread becomes stealable
2316685679f7Sakolb */
2317685679f7Sakolb now += (nosteal - rqtime);
2318685679f7Sakolb
2319685679f7Sakolb /*
2320685679f7Sakolb * Calculate time when some thread becomes stealable
2321685679f7Sakolb */
2322685679f7Sakolb if (now < dp->disp_steal)
2323685679f7Sakolb dp->disp_steal = now;
23247c478bd9Sstevel@tonic-gate }
23257c478bd9Sstevel@tonic-gate
23267c478bd9Sstevel@tonic-gate /*
23277c478bd9Sstevel@tonic-gate * If there were no unbound threads on this queue, find the queue
2328685679f7Sakolb * where they are and then return later. The value of
2329685679f7Sakolb * disp_max_unbound_pri is not always accurate because it isn't
2330685679f7Sakolb * reduced until another idle CPU looks for work.
2331685679f7Sakolb */
2332685679f7Sakolb if (allbound)
2333685679f7Sakolb disp_fix_unbound_pri(dp, pri);
2334685679f7Sakolb
2335685679f7Sakolb /*
2336685679f7Sakolb * If we reached the end of the queue and found no unbound threads
2337685679f7Sakolb * then return NULL so that other CPUs will be considered. If there
2338685679f7Sakolb * are unbound threads but they cannot yet be stolen, then
2339685679f7Sakolb * return T_DONTSTEAL and try again later.
23407c478bd9Sstevel@tonic-gate */
23417c478bd9Sstevel@tonic-gate if (tp == NULL) {
23427c478bd9Sstevel@tonic-gate disp_lock_exit_nopreempt(&dp->disp_lock);
2343685679f7Sakolb return (allbound ? NULL : T_DONTSTEAL);
23447c478bd9Sstevel@tonic-gate }
23457c478bd9Sstevel@tonic-gate
23467c478bd9Sstevel@tonic-gate /*
23477c478bd9Sstevel@tonic-gate * Found a runnable, unbound thread, so remove it from queue.
23487c478bd9Sstevel@tonic-gate * dispdeq() requires that we have the thread locked, and we do,
23497c478bd9Sstevel@tonic-gate * by virtue of holding the dispatch queue lock. dispdeq() will
23507c478bd9Sstevel@tonic-gate * put the thread in transition state, thereby dropping the dispq
23517c478bd9Sstevel@tonic-gate * lock.
23527c478bd9Sstevel@tonic-gate */
2353685679f7Sakolb
23547c478bd9Sstevel@tonic-gate #ifdef DEBUG
23557c478bd9Sstevel@tonic-gate {
23567c478bd9Sstevel@tonic-gate int thread_was_on_queue;
23577c478bd9Sstevel@tonic-gate
23587c478bd9Sstevel@tonic-gate thread_was_on_queue = dispdeq(tp); /* drops disp_lock */
23597c478bd9Sstevel@tonic-gate ASSERT(thread_was_on_queue);
23607c478bd9Sstevel@tonic-gate }
2361685679f7Sakolb
23627c478bd9Sstevel@tonic-gate #else /* DEBUG */
23637c478bd9Sstevel@tonic-gate (void) dispdeq(tp); /* drops disp_lock */
23647c478bd9Sstevel@tonic-gate #endif /* DEBUG */
23657c478bd9Sstevel@tonic-gate
2366685679f7Sakolb /*
2367685679f7Sakolb * Reset the disp_queue steal time - we do not know what is the smallest
2368685679f7Sakolb * value across the queue is.
2369685679f7Sakolb */
2370685679f7Sakolb dp->disp_steal = 0;
2371685679f7Sakolb
23727c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_DONT_SWAP;
23737c478bd9Sstevel@tonic-gate
23747c478bd9Sstevel@tonic-gate /*
23757c478bd9Sstevel@tonic-gate * Setup thread to run on the current CPU.
23767c478bd9Sstevel@tonic-gate */
23777c478bd9Sstevel@tonic-gate tp->t_disp_queue = cp->cpu_disp;
23787c478bd9Sstevel@tonic-gate
23797c478bd9Sstevel@tonic-gate cp->cpu_dispthread = tp; /* protected by spl only */
23807c478bd9Sstevel@tonic-gate cp->cpu_dispatch_pri = pri;
23810f500aa6Sbpramod
23820f500aa6Sbpramod /*
23830f500aa6Sbpramod * There can be a memory synchronization race between disp_getbest()
23840f500aa6Sbpramod * and disp_ratify() vs cpu_resched() where cpu_resched() is trying
23850f500aa6Sbpramod * to preempt the current thread to run the enqueued thread while
23860f500aa6Sbpramod * disp_getbest() and disp_ratify() are changing the current thread
23870f500aa6Sbpramod * to the stolen thread. This may lead to a situation where
23880f500aa6Sbpramod * cpu_resched() tries to preempt the wrong thread and the
23890f500aa6Sbpramod * stolen thread continues to run on the CPU which has been tagged
23900f500aa6Sbpramod * for preemption.
23910f500aa6Sbpramod * Later the clock thread gets enqueued but doesn't get to run on the
23920f500aa6Sbpramod * CPU causing the system to hang.
23930f500aa6Sbpramod *
23940f500aa6Sbpramod * To avoid this, grabbing and dropping the disp_lock (which does
23950f500aa6Sbpramod * a memory barrier) is needed to synchronize the execution of
23960f500aa6Sbpramod * cpu_resched() with disp_getbest() and disp_ratify() and
23970f500aa6Sbpramod * synchronize the memory read and written by cpu_resched(),
23980f500aa6Sbpramod * disp_getbest(), and disp_ratify() with each other.
23990f500aa6Sbpramod * (see CR#6482861 for more details).
24000f500aa6Sbpramod */
24010f500aa6Sbpramod disp_lock_enter_high(&cp->cpu_disp->disp_lock);
24020f500aa6Sbpramod disp_lock_exit_high(&cp->cpu_disp->disp_lock);
24030f500aa6Sbpramod
24047c478bd9Sstevel@tonic-gate ASSERT(pri == DISP_PRIO(tp));
24057c478bd9Sstevel@tonic-gate
2406685679f7Sakolb DTRACE_PROBE3(steal, kthread_t *, tp, cpu_t *, tcp, cpu_t *, cp);
2407685679f7Sakolb
24087c478bd9Sstevel@tonic-gate thread_onproc(tp, cp); /* set t_state to TS_ONPROC */
24097c478bd9Sstevel@tonic-gate
24107c478bd9Sstevel@tonic-gate /*
24117c478bd9Sstevel@tonic-gate * Return with spl high so that swtch() won't need to raise it.
24127c478bd9Sstevel@tonic-gate * The disp_lock was dropped by dispdeq().
24137c478bd9Sstevel@tonic-gate */
24147c478bd9Sstevel@tonic-gate
24157c478bd9Sstevel@tonic-gate return (tp);
24167c478bd9Sstevel@tonic-gate }
24177c478bd9Sstevel@tonic-gate
24187c478bd9Sstevel@tonic-gate /*
24197c478bd9Sstevel@tonic-gate * disp_bound_common() - common routine for higher level functions
24207c478bd9Sstevel@tonic-gate * that check for bound threads under certain conditions.
24217c478bd9Sstevel@tonic-gate * If 'threadlistsafe' is set then there is no need to acquire
24227c478bd9Sstevel@tonic-gate * pidlock to stop the thread list from changing (eg, if
24237c478bd9Sstevel@tonic-gate * disp_bound_* is called with cpus paused).
24247c478bd9Sstevel@tonic-gate */
24257c478bd9Sstevel@tonic-gate static int
disp_bound_common(cpu_t * cp,int threadlistsafe,int flag)24267c478bd9Sstevel@tonic-gate disp_bound_common(cpu_t *cp, int threadlistsafe, int flag)
24277c478bd9Sstevel@tonic-gate {
24287c478bd9Sstevel@tonic-gate int found = 0;
24297c478bd9Sstevel@tonic-gate kthread_t *tp;
24307c478bd9Sstevel@tonic-gate
24317c478bd9Sstevel@tonic-gate ASSERT(flag);
24327c478bd9Sstevel@tonic-gate
24337c478bd9Sstevel@tonic-gate if (!threadlistsafe)
24347c478bd9Sstevel@tonic-gate mutex_enter(&pidlock);
24357c478bd9Sstevel@tonic-gate tp = curthread; /* faster than allthreads */
24367c478bd9Sstevel@tonic-gate do {
24377c478bd9Sstevel@tonic-gate if (tp->t_state != TS_FREE) {
24387c478bd9Sstevel@tonic-gate /*
24397c478bd9Sstevel@tonic-gate * If an interrupt thread is busy, but the
24407c478bd9Sstevel@tonic-gate * caller doesn't care (i.e. BOUND_INTR is off),
24417c478bd9Sstevel@tonic-gate * then just ignore it and continue through.
24427c478bd9Sstevel@tonic-gate */
24437c478bd9Sstevel@tonic-gate if ((tp->t_flag & T_INTR_THREAD) &&
24447c478bd9Sstevel@tonic-gate !(flag & BOUND_INTR))
24457c478bd9Sstevel@tonic-gate continue;
24467c478bd9Sstevel@tonic-gate
24477c478bd9Sstevel@tonic-gate /*
24487c478bd9Sstevel@tonic-gate * Skip the idle thread for the CPU
24497c478bd9Sstevel@tonic-gate * we're about to set offline.
24507c478bd9Sstevel@tonic-gate */
24517c478bd9Sstevel@tonic-gate if (tp == cp->cpu_idle_thread)
24527c478bd9Sstevel@tonic-gate continue;
24537c478bd9Sstevel@tonic-gate
24547c478bd9Sstevel@tonic-gate /*
24557c478bd9Sstevel@tonic-gate * Skip the pause thread for the CPU
24567c478bd9Sstevel@tonic-gate * we're about to set offline.
24577c478bd9Sstevel@tonic-gate */
24587c478bd9Sstevel@tonic-gate if (tp == cp->cpu_pause_thread)
24597c478bd9Sstevel@tonic-gate continue;
24607c478bd9Sstevel@tonic-gate
24617c478bd9Sstevel@tonic-gate if ((flag & BOUND_CPU) &&
24627c478bd9Sstevel@tonic-gate (tp->t_bound_cpu == cp ||
24637c478bd9Sstevel@tonic-gate tp->t_bind_cpu == cp->cpu_id ||
24647c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu == cp)) {
24657c478bd9Sstevel@tonic-gate found = 1;
24667c478bd9Sstevel@tonic-gate break;
24677c478bd9Sstevel@tonic-gate }
24687c478bd9Sstevel@tonic-gate
24697c478bd9Sstevel@tonic-gate if ((flag & BOUND_PARTITION) &&
24707c478bd9Sstevel@tonic-gate (tp->t_cpupart == cp->cpu_part)) {
24717c478bd9Sstevel@tonic-gate found = 1;
24727c478bd9Sstevel@tonic-gate break;
24737c478bd9Sstevel@tonic-gate }
24747c478bd9Sstevel@tonic-gate }
24757c478bd9Sstevel@tonic-gate } while ((tp = tp->t_next) != curthread && found == 0);
24767c478bd9Sstevel@tonic-gate if (!threadlistsafe)
24777c478bd9Sstevel@tonic-gate mutex_exit(&pidlock);
24787c478bd9Sstevel@tonic-gate return (found);
24797c478bd9Sstevel@tonic-gate }
24807c478bd9Sstevel@tonic-gate
24817c478bd9Sstevel@tonic-gate /*
24827c478bd9Sstevel@tonic-gate * disp_bound_threads - return nonzero if threads are bound to the processor.
24837c478bd9Sstevel@tonic-gate * Called infrequently. Keep this simple.
24847c478bd9Sstevel@tonic-gate * Includes threads that are asleep or stopped but not onproc.
24857c478bd9Sstevel@tonic-gate */
24867c478bd9Sstevel@tonic-gate int
disp_bound_threads(cpu_t * cp,int threadlistsafe)24877c478bd9Sstevel@tonic-gate disp_bound_threads(cpu_t *cp, int threadlistsafe)
24887c478bd9Sstevel@tonic-gate {
24897c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_CPU));
24907c478bd9Sstevel@tonic-gate }
24917c478bd9Sstevel@tonic-gate
24927c478bd9Sstevel@tonic-gate /*
24937c478bd9Sstevel@tonic-gate * disp_bound_anythreads - return nonzero if _any_ threads are bound
24947c478bd9Sstevel@tonic-gate * to the given processor, including interrupt threads.
24957c478bd9Sstevel@tonic-gate */
24967c478bd9Sstevel@tonic-gate int
disp_bound_anythreads(cpu_t * cp,int threadlistsafe)24977c478bd9Sstevel@tonic-gate disp_bound_anythreads(cpu_t *cp, int threadlistsafe)
24987c478bd9Sstevel@tonic-gate {
24997c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR));
25007c478bd9Sstevel@tonic-gate }
25017c478bd9Sstevel@tonic-gate
25027c478bd9Sstevel@tonic-gate /*
25037c478bd9Sstevel@tonic-gate * disp_bound_partition - return nonzero if threads are bound to the same
25047c478bd9Sstevel@tonic-gate * partition as the processor.
25057c478bd9Sstevel@tonic-gate * Called infrequently. Keep this simple.
25067c478bd9Sstevel@tonic-gate * Includes threads that are asleep or stopped but not onproc.
25077c478bd9Sstevel@tonic-gate */
25087c478bd9Sstevel@tonic-gate int
disp_bound_partition(cpu_t * cp,int threadlistsafe)25097c478bd9Sstevel@tonic-gate disp_bound_partition(cpu_t *cp, int threadlistsafe)
25107c478bd9Sstevel@tonic-gate {
25117c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION));
25127c478bd9Sstevel@tonic-gate }
25137c478bd9Sstevel@tonic-gate
25147c478bd9Sstevel@tonic-gate /*
25157c478bd9Sstevel@tonic-gate * disp_cpu_inactive - make a CPU inactive by moving all of its unbound
25167c478bd9Sstevel@tonic-gate * threads to other CPUs.
25177c478bd9Sstevel@tonic-gate */
25187c478bd9Sstevel@tonic-gate void
disp_cpu_inactive(cpu_t * cp)25197c478bd9Sstevel@tonic-gate disp_cpu_inactive(cpu_t *cp)
25207c478bd9Sstevel@tonic-gate {
25217c478bd9Sstevel@tonic-gate kthread_t *tp;
25227c478bd9Sstevel@tonic-gate disp_t *dp = cp->cpu_disp;
25237c478bd9Sstevel@tonic-gate dispq_t *dq;
25247c478bd9Sstevel@tonic-gate pri_t pri;
25257c478bd9Sstevel@tonic-gate int wasonq;
25267c478bd9Sstevel@tonic-gate
25277c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock);
25287c478bd9Sstevel@tonic-gate while ((pri = dp->disp_max_unbound_pri) != -1) {
25297c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri];
25307c478bd9Sstevel@tonic-gate tp = dq->dq_first;
25317c478bd9Sstevel@tonic-gate
25327c478bd9Sstevel@tonic-gate /*
25337c478bd9Sstevel@tonic-gate * Skip over bound threads.
25347c478bd9Sstevel@tonic-gate */
25357c478bd9Sstevel@tonic-gate while (tp != NULL && tp->t_bound_cpu != NULL) {
25367c478bd9Sstevel@tonic-gate tp = tp->t_link;
25377c478bd9Sstevel@tonic-gate }
25387c478bd9Sstevel@tonic-gate
25397c478bd9Sstevel@tonic-gate if (tp == NULL) {
25407c478bd9Sstevel@tonic-gate /* disp_max_unbound_pri must be inaccurate, so fix it */
25417c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(dp, pri);
25427c478bd9Sstevel@tonic-gate continue;
25437c478bd9Sstevel@tonic-gate }
25447c478bd9Sstevel@tonic-gate
25457c478bd9Sstevel@tonic-gate wasonq = dispdeq(tp); /* drops disp_lock */
25467c478bd9Sstevel@tonic-gate ASSERT(wasonq);
25477c478bd9Sstevel@tonic-gate ASSERT(tp->t_weakbound_cpu == NULL);
25487c478bd9Sstevel@tonic-gate
25497c478bd9Sstevel@tonic-gate setbackdq(tp);
25507c478bd9Sstevel@tonic-gate /*
25517c478bd9Sstevel@tonic-gate * Called from cpu_offline:
25527c478bd9Sstevel@tonic-gate *
25537c478bd9Sstevel@tonic-gate * cp has already been removed from the list of active cpus
25547c478bd9Sstevel@tonic-gate * and tp->t_cpu has been changed so there is no risk of
25557c478bd9Sstevel@tonic-gate * tp ending up back on cp.
25567c478bd9Sstevel@tonic-gate *
25577c478bd9Sstevel@tonic-gate * Called from cpupart_move_cpu:
25587c478bd9Sstevel@tonic-gate *
25597c478bd9Sstevel@tonic-gate * The cpu has moved to a new cpupart. Any threads that
25607c478bd9Sstevel@tonic-gate * were on it's dispatch queues before the move remain
25617c478bd9Sstevel@tonic-gate * in the old partition and can't run in the new partition.
25627c478bd9Sstevel@tonic-gate */
25637c478bd9Sstevel@tonic-gate ASSERT(tp->t_cpu != cp);
25647c478bd9Sstevel@tonic-gate thread_unlock(tp);
25657c478bd9Sstevel@tonic-gate
25667c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock);
25677c478bd9Sstevel@tonic-gate }
25687c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock);
25697c478bd9Sstevel@tonic-gate }
25707c478bd9Sstevel@tonic-gate
25717c478bd9Sstevel@tonic-gate /*
25727c478bd9Sstevel@tonic-gate * disp_lowpri_cpu - find CPU running the lowest priority thread.
25737c478bd9Sstevel@tonic-gate * The hint passed in is used as a starting point so we don't favor
25747c478bd9Sstevel@tonic-gate * CPU 0 or any other CPU. The caller should pass in the most recently
25757c478bd9Sstevel@tonic-gate * used CPU for the thread.
25767c478bd9Sstevel@tonic-gate *
25777c478bd9Sstevel@tonic-gate * The lgroup and priority are used to determine the best CPU to run on
25787c478bd9Sstevel@tonic-gate * in a NUMA machine. The lgroup specifies which CPUs are closest while
25797c478bd9Sstevel@tonic-gate * the thread priority will indicate whether the thread will actually run
25807c478bd9Sstevel@tonic-gate * there. To pick the best CPU, the CPUs inside and outside of the given
25817c478bd9Sstevel@tonic-gate * lgroup which are running the lowest priority threads are found. The
25827c478bd9Sstevel@tonic-gate * remote CPU is chosen only if the thread will not run locally on a CPU
25837c478bd9Sstevel@tonic-gate * within the lgroup, but will run on the remote CPU. If the thread
25847c478bd9Sstevel@tonic-gate * cannot immediately run on any CPU, the best local CPU will be chosen.
25857c478bd9Sstevel@tonic-gate *
25867c478bd9Sstevel@tonic-gate * The lpl specified also identifies the cpu partition from which
25877c478bd9Sstevel@tonic-gate * disp_lowpri_cpu should select a CPU.
25887c478bd9Sstevel@tonic-gate *
25897c478bd9Sstevel@tonic-gate * curcpu is used to indicate that disp_lowpri_cpu is being called on
25907c478bd9Sstevel@tonic-gate * behalf of the current thread. (curthread is looking for a new cpu)
25917c478bd9Sstevel@tonic-gate * In this case, cpu_dispatch_pri for this thread's cpu should be
25927c478bd9Sstevel@tonic-gate * ignored.
25937c478bd9Sstevel@tonic-gate *
25947c478bd9Sstevel@tonic-gate * If a cpu is the target of an offline request then try to avoid it.
25957c478bd9Sstevel@tonic-gate *
25967c478bd9Sstevel@tonic-gate * This function must be called at either high SPL, or with preemption
25977c478bd9Sstevel@tonic-gate * disabled, so that the "hint" CPU cannot be removed from the online
25987c478bd9Sstevel@tonic-gate * CPU list while we are traversing it.
25997c478bd9Sstevel@tonic-gate */
26007c478bd9Sstevel@tonic-gate cpu_t *
disp_lowpri_cpu(cpu_t * hint,lpl_t * lpl,pri_t tpri,cpu_t * curcpu)26017c478bd9Sstevel@tonic-gate disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu)
26027c478bd9Sstevel@tonic-gate {
26037c478bd9Sstevel@tonic-gate cpu_t *bestcpu;
26047c478bd9Sstevel@tonic-gate cpu_t *besthomecpu;
26057c478bd9Sstevel@tonic-gate cpu_t *cp, *cpstart;
26067c478bd9Sstevel@tonic-gate
26077c478bd9Sstevel@tonic-gate pri_t bestpri;
26087c478bd9Sstevel@tonic-gate pri_t cpupri;
26097c478bd9Sstevel@tonic-gate
26107c478bd9Sstevel@tonic-gate klgrpset_t done;
26117c478bd9Sstevel@tonic-gate klgrpset_t cur_set;
26127c478bd9Sstevel@tonic-gate
26137c478bd9Sstevel@tonic-gate lpl_t *lpl_iter, *lpl_leaf;
26147c478bd9Sstevel@tonic-gate int i;
26157c478bd9Sstevel@tonic-gate
26167c478bd9Sstevel@tonic-gate /*
26177c478bd9Sstevel@tonic-gate * Scan for a CPU currently running the lowest priority thread.
26187c478bd9Sstevel@tonic-gate * Cannot get cpu_lock here because it is adaptive.
26197c478bd9Sstevel@tonic-gate * We do not require lock on CPU list.
26207c478bd9Sstevel@tonic-gate */
26217c478bd9Sstevel@tonic-gate ASSERT(hint != NULL);
26227c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL);
26237c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu > 0);
26247c478bd9Sstevel@tonic-gate
26257c478bd9Sstevel@tonic-gate /*
26267c478bd9Sstevel@tonic-gate * First examine local CPUs. Note that it's possible the hint CPU
26277c478bd9Sstevel@tonic-gate * passed in in remote to the specified home lgroup. If our priority
26287c478bd9Sstevel@tonic-gate * isn't sufficient enough such that we can run immediately at home,
26297c478bd9Sstevel@tonic-gate * then examine CPUs remote to our home lgroup.
26307c478bd9Sstevel@tonic-gate * We would like to give preference to CPUs closest to "home".
26317c478bd9Sstevel@tonic-gate * If we can't find a CPU where we'll run at a given level
26327c478bd9Sstevel@tonic-gate * of locality, we expand our search to include the next level.
26337c478bd9Sstevel@tonic-gate */
26347c478bd9Sstevel@tonic-gate bestcpu = besthomecpu = NULL;
26357c478bd9Sstevel@tonic-gate klgrpset_clear(done);
26367c478bd9Sstevel@tonic-gate /* start with lpl we were passed */
26377c478bd9Sstevel@tonic-gate
26387c478bd9Sstevel@tonic-gate lpl_iter = lpl;
26397c478bd9Sstevel@tonic-gate
26407c478bd9Sstevel@tonic-gate do {
26417c478bd9Sstevel@tonic-gate
26427c478bd9Sstevel@tonic-gate bestpri = SHRT_MAX;
26437c478bd9Sstevel@tonic-gate klgrpset_clear(cur_set);
26447c478bd9Sstevel@tonic-gate
26457c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_iter->lpl_nrset; i++) {
26467c478bd9Sstevel@tonic-gate lpl_leaf = lpl_iter->lpl_rset[i];
26477c478bd9Sstevel@tonic-gate if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid))
26487c478bd9Sstevel@tonic-gate continue;
26497c478bd9Sstevel@tonic-gate
26507c478bd9Sstevel@tonic-gate klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid);
26517c478bd9Sstevel@tonic-gate
26527c478bd9Sstevel@tonic-gate if (hint->cpu_lpl == lpl_leaf)
26537c478bd9Sstevel@tonic-gate cp = cpstart = hint;
26547c478bd9Sstevel@tonic-gate else
26557c478bd9Sstevel@tonic-gate cp = cpstart = lpl_leaf->lpl_cpus;
26567c478bd9Sstevel@tonic-gate
26577c478bd9Sstevel@tonic-gate do {
26587c478bd9Sstevel@tonic-gate if (cp == curcpu)
26597c478bd9Sstevel@tonic-gate cpupri = -1;
26607c478bd9Sstevel@tonic-gate else if (cp == cpu_inmotion)
26617c478bd9Sstevel@tonic-gate cpupri = SHRT_MAX;
26627c478bd9Sstevel@tonic-gate else
26637c478bd9Sstevel@tonic-gate cpupri = cp->cpu_dispatch_pri;
26647c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_maxrunpri > cpupri)
26657c478bd9Sstevel@tonic-gate cpupri = cp->cpu_disp->disp_maxrunpri;
26667c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level > cpupri)
26677c478bd9Sstevel@tonic-gate cpupri = cp->cpu_chosen_level;
26687c478bd9Sstevel@tonic-gate if (cpupri < bestpri) {
26697c478bd9Sstevel@tonic-gate if (CPU_IDLING(cpupri)) {
26707c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags &
26717c478bd9Sstevel@tonic-gate CPU_QUIESCED) == 0);
26727c478bd9Sstevel@tonic-gate return (cp);
26737c478bd9Sstevel@tonic-gate }
26747c478bd9Sstevel@tonic-gate bestcpu = cp;
26757c478bd9Sstevel@tonic-gate bestpri = cpupri;
26767c478bd9Sstevel@tonic-gate }
26777c478bd9Sstevel@tonic-gate } while ((cp = cp->cpu_next_lpl) != cpstart);
26787c478bd9Sstevel@tonic-gate }
26797c478bd9Sstevel@tonic-gate
26807c478bd9Sstevel@tonic-gate if (bestcpu && (tpri > bestpri)) {
26817c478bd9Sstevel@tonic-gate ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0);
26827c478bd9Sstevel@tonic-gate return (bestcpu);
26837c478bd9Sstevel@tonic-gate }
26847c478bd9Sstevel@tonic-gate if (besthomecpu == NULL)
26857c478bd9Sstevel@tonic-gate besthomecpu = bestcpu;
26867c478bd9Sstevel@tonic-gate /*
26877c478bd9Sstevel@tonic-gate * Add the lgrps we just considered to the "done" set
26887c478bd9Sstevel@tonic-gate */
26897c478bd9Sstevel@tonic-gate klgrpset_or(done, cur_set);
26907c478bd9Sstevel@tonic-gate
26917c478bd9Sstevel@tonic-gate } while ((lpl_iter = lpl_iter->lpl_parent) != NULL);
26927c478bd9Sstevel@tonic-gate
26937c478bd9Sstevel@tonic-gate /*
26947c478bd9Sstevel@tonic-gate * The specified priority isn't high enough to run immediately
26957c478bd9Sstevel@tonic-gate * anywhere, so just return the best CPU from the home lgroup.
26967c478bd9Sstevel@tonic-gate */
26977c478bd9Sstevel@tonic-gate ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0);
26987c478bd9Sstevel@tonic-gate return (besthomecpu);
26997c478bd9Sstevel@tonic-gate }
27007c478bd9Sstevel@tonic-gate
27017c478bd9Sstevel@tonic-gate /*
27027c478bd9Sstevel@tonic-gate * This routine provides the generic idle cpu function for all processors.
27037c478bd9Sstevel@tonic-gate * If a processor has some specific code to execute when idle (say, to stop
27047c478bd9Sstevel@tonic-gate * the pipeline and save power) then that routine should be defined in the
27057c478bd9Sstevel@tonic-gate * processors specific code (module_xx.c) and the global variable idle_cpu
27067c478bd9Sstevel@tonic-gate * set to that function.
27077c478bd9Sstevel@tonic-gate */
27087c478bd9Sstevel@tonic-gate static void
generic_idle_cpu(void)27097c478bd9Sstevel@tonic-gate generic_idle_cpu(void)
27107c478bd9Sstevel@tonic-gate {
27117c478bd9Sstevel@tonic-gate }
27127c478bd9Sstevel@tonic-gate
27137c478bd9Sstevel@tonic-gate /*ARGSUSED*/
27147c478bd9Sstevel@tonic-gate static void
generic_enq_thread(cpu_t * cpu,int bound)27157c478bd9Sstevel@tonic-gate generic_enq_thread(cpu_t *cpu, int bound)
27167c478bd9Sstevel@tonic-gate {
27177c478bd9Sstevel@tonic-gate }
2718