xref: /titanic_54/usr/src/uts/common/disp/disp.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate 
31*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* from SVr4.0 1.30 */
32*7c478bd9Sstevel@tonic-gate 
33*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/signal.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/var.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/errno.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/inline.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/disp.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/class.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
49*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
50*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
51*7c478bd9Sstevel@tonic-gate #include <sys/tnf.h>
52*7c478bd9Sstevel@tonic-gate #include <sys/cpupart.h>
53*7c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
54*7c478bd9Sstevel@tonic-gate #include <sys/chip.h>
55*7c478bd9Sstevel@tonic-gate #include <sys/schedctl.h>
56*7c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
57*7c478bd9Sstevel@tonic-gate #include <sys/dtrace.h>
58*7c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate #include <vm/as.h>
61*7c478bd9Sstevel@tonic-gate 
62*7c478bd9Sstevel@tonic-gate #define	BOUND_CPU	0x1
63*7c478bd9Sstevel@tonic-gate #define	BOUND_PARTITION	0x2
64*7c478bd9Sstevel@tonic-gate #define	BOUND_INTR	0x4
65*7c478bd9Sstevel@tonic-gate 
66*7c478bd9Sstevel@tonic-gate /* Dispatch queue allocation structure and functions */
67*7c478bd9Sstevel@tonic-gate struct disp_queue_info {
68*7c478bd9Sstevel@tonic-gate 	disp_t	*dp;
69*7c478bd9Sstevel@tonic-gate 	dispq_t *olddispq;
70*7c478bd9Sstevel@tonic-gate 	dispq_t *newdispq;
71*7c478bd9Sstevel@tonic-gate 	ulong_t	*olddqactmap;
72*7c478bd9Sstevel@tonic-gate 	ulong_t	*newdqactmap;
73*7c478bd9Sstevel@tonic-gate 	int	oldnglobpris;
74*7c478bd9Sstevel@tonic-gate };
75*7c478bd9Sstevel@tonic-gate static void	disp_dq_alloc(struct disp_queue_info *dptr, int numpris,
76*7c478bd9Sstevel@tonic-gate     disp_t *dp);
77*7c478bd9Sstevel@tonic-gate static void	disp_dq_assign(struct disp_queue_info *dptr, int numpris);
78*7c478bd9Sstevel@tonic-gate static void	disp_dq_free(struct disp_queue_info *dptr);
79*7c478bd9Sstevel@tonic-gate 
80*7c478bd9Sstevel@tonic-gate /* platform-specific routine to call when processor is idle */
81*7c478bd9Sstevel@tonic-gate static void	generic_idle_cpu();
82*7c478bd9Sstevel@tonic-gate void		(*idle_cpu)() = generic_idle_cpu;
83*7c478bd9Sstevel@tonic-gate 
84*7c478bd9Sstevel@tonic-gate /* routines invoked when a CPU enters/exits the idle loop */
85*7c478bd9Sstevel@tonic-gate static void	idle_enter();
86*7c478bd9Sstevel@tonic-gate static void	idle_exit();
87*7c478bd9Sstevel@tonic-gate 
88*7c478bd9Sstevel@tonic-gate /* platform-specific routine to call when thread is enqueued */
89*7c478bd9Sstevel@tonic-gate static void	generic_enq_thread(cpu_t *, int);
90*7c478bd9Sstevel@tonic-gate void		(*disp_enq_thread)(cpu_t *, int) = generic_enq_thread;
91*7c478bd9Sstevel@tonic-gate 
92*7c478bd9Sstevel@tonic-gate pri_t	kpreemptpri;	/* priority where kernel preemption applies */
93*7c478bd9Sstevel@tonic-gate pri_t	upreemptpri = 0; /* priority where normal preemption applies */
94*7c478bd9Sstevel@tonic-gate pri_t	intr_pri;	/* interrupt thread priority base level */
95*7c478bd9Sstevel@tonic-gate 
96*7c478bd9Sstevel@tonic-gate #define	KPQPRI	-1 /* priority where cpu affinity is dropped for kp queue */
97*7c478bd9Sstevel@tonic-gate pri_t	kpqpri = KPQPRI; /* can be set in /etc/system */
98*7c478bd9Sstevel@tonic-gate disp_t	cpu0_disp;	/* boot CPU's dispatch queue */
99*7c478bd9Sstevel@tonic-gate disp_lock_t	swapped_lock;	/* lock swapped threads and swap queue */
100*7c478bd9Sstevel@tonic-gate int	nswapped;	/* total number of swapped threads */
101*7c478bd9Sstevel@tonic-gate void	disp_swapped_enq(kthread_t *tp);
102*7c478bd9Sstevel@tonic-gate static void	disp_swapped_setrun(kthread_t *tp);
103*7c478bd9Sstevel@tonic-gate static void	cpu_resched(cpu_t *cp, pri_t tpri);
104*7c478bd9Sstevel@tonic-gate 
105*7c478bd9Sstevel@tonic-gate /*
106*7c478bd9Sstevel@tonic-gate  * If this is set, only interrupt threads will cause kernel preemptions.
107*7c478bd9Sstevel@tonic-gate  * This is done by changing the value of kpreemptpri.  kpreemptpri
108*7c478bd9Sstevel@tonic-gate  * will either be the max sysclass pri + 1 or the min interrupt pri.
109*7c478bd9Sstevel@tonic-gate  */
110*7c478bd9Sstevel@tonic-gate int	only_intr_kpreempt;
111*7c478bd9Sstevel@tonic-gate 
112*7c478bd9Sstevel@tonic-gate extern void set_idle_cpu(int cpun);
113*7c478bd9Sstevel@tonic-gate extern void unset_idle_cpu(int cpun);
114*7c478bd9Sstevel@tonic-gate static void setkpdq(kthread_t *tp, int borf);
115*7c478bd9Sstevel@tonic-gate #define	SETKP_BACK	0
116*7c478bd9Sstevel@tonic-gate #define	SETKP_FRONT	1
117*7c478bd9Sstevel@tonic-gate /*
118*7c478bd9Sstevel@tonic-gate  * Parameter that determines how recently a thread must have run
119*7c478bd9Sstevel@tonic-gate  * on the CPU to be considered loosely-bound to that CPU to reduce
120*7c478bd9Sstevel@tonic-gate  * cold cache effects.  The interval is in hertz.
121*7c478bd9Sstevel@tonic-gate  *
122*7c478bd9Sstevel@tonic-gate  * The platform may define a per physical processor adjustment of
123*7c478bd9Sstevel@tonic-gate  * this parameter. For efficiency, the effective rechoose interval
124*7c478bd9Sstevel@tonic-gate  * (rechoose_interval + per chip adjustment) is maintained in the
125*7c478bd9Sstevel@tonic-gate  * cpu structures. See cpu_choose()
126*7c478bd9Sstevel@tonic-gate  */
127*7c478bd9Sstevel@tonic-gate int	rechoose_interval = RECHOOSE_INTERVAL;
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate static cpu_t	*cpu_choose(kthread_t *, pri_t);
130*7c478bd9Sstevel@tonic-gate 
131*7c478bd9Sstevel@tonic-gate id_t	defaultcid;	/* system "default" class; see dispadmin(1M) */
132*7c478bd9Sstevel@tonic-gate 
133*7c478bd9Sstevel@tonic-gate disp_lock_t	transition_lock;	/* lock on transitioning threads */
134*7c478bd9Sstevel@tonic-gate disp_lock_t	stop_lock;		/* lock on stopped threads */
135*7c478bd9Sstevel@tonic-gate disp_lock_t	shuttle_lock;		/* lock on shuttle objects */
136*7c478bd9Sstevel@tonic-gate 
137*7c478bd9Sstevel@tonic-gate static void		cpu_dispqalloc(int numpris);
138*7c478bd9Sstevel@tonic-gate 
139*7c478bd9Sstevel@tonic-gate static kthread_t	*disp_getwork(cpu_t *to);
140*7c478bd9Sstevel@tonic-gate static kthread_t	*disp_getbest(disp_t *from);
141*7c478bd9Sstevel@tonic-gate static kthread_t	*disp_ratify(kthread_t *tp, disp_t *kpq);
142*7c478bd9Sstevel@tonic-gate 
143*7c478bd9Sstevel@tonic-gate void	swtch_to(kthread_t *);
144*7c478bd9Sstevel@tonic-gate 
145*7c478bd9Sstevel@tonic-gate /*
146*7c478bd9Sstevel@tonic-gate  * dispatcher and scheduler initialization
147*7c478bd9Sstevel@tonic-gate  */
148*7c478bd9Sstevel@tonic-gate 
149*7c478bd9Sstevel@tonic-gate /*
150*7c478bd9Sstevel@tonic-gate  * disp_setup - Common code to calculate and allocate dispatcher
151*7c478bd9Sstevel@tonic-gate  *		variables and structures based on the maximum priority.
152*7c478bd9Sstevel@tonic-gate  */
153*7c478bd9Sstevel@tonic-gate static void
154*7c478bd9Sstevel@tonic-gate disp_setup(pri_t maxglobpri, pri_t oldnglobpris)
155*7c478bd9Sstevel@tonic-gate {
156*7c478bd9Sstevel@tonic-gate 	pri_t	newnglobpris;
157*7c478bd9Sstevel@tonic-gate 
158*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
159*7c478bd9Sstevel@tonic-gate 
160*7c478bd9Sstevel@tonic-gate 	newnglobpris = maxglobpri + 1 + LOCK_LEVEL;
161*7c478bd9Sstevel@tonic-gate 
162*7c478bd9Sstevel@tonic-gate 	if (newnglobpris > oldnglobpris) {
163*7c478bd9Sstevel@tonic-gate 		/*
164*7c478bd9Sstevel@tonic-gate 		 * Allocate new kp queues for each CPU partition.
165*7c478bd9Sstevel@tonic-gate 		 */
166*7c478bd9Sstevel@tonic-gate 		cpupart_kpqalloc(newnglobpris);
167*7c478bd9Sstevel@tonic-gate 
168*7c478bd9Sstevel@tonic-gate 		/*
169*7c478bd9Sstevel@tonic-gate 		 * Allocate new dispatch queues for each CPU.
170*7c478bd9Sstevel@tonic-gate 		 */
171*7c478bd9Sstevel@tonic-gate 		cpu_dispqalloc(newnglobpris);
172*7c478bd9Sstevel@tonic-gate 
173*7c478bd9Sstevel@tonic-gate 		/*
174*7c478bd9Sstevel@tonic-gate 		 * compute new interrupt thread base priority
175*7c478bd9Sstevel@tonic-gate 		 */
176*7c478bd9Sstevel@tonic-gate 		intr_pri = maxglobpri;
177*7c478bd9Sstevel@tonic-gate 		if (only_intr_kpreempt) {
178*7c478bd9Sstevel@tonic-gate 			kpreemptpri = intr_pri + 1;
179*7c478bd9Sstevel@tonic-gate 			if (kpqpri == KPQPRI)
180*7c478bd9Sstevel@tonic-gate 				kpqpri = kpreemptpri;
181*7c478bd9Sstevel@tonic-gate 		}
182*7c478bd9Sstevel@tonic-gate 		v.v_nglobpris = newnglobpris;
183*7c478bd9Sstevel@tonic-gate 	}
184*7c478bd9Sstevel@tonic-gate }
185*7c478bd9Sstevel@tonic-gate 
186*7c478bd9Sstevel@tonic-gate /*
187*7c478bd9Sstevel@tonic-gate  * dispinit - Called to initialize all loaded classes and the
188*7c478bd9Sstevel@tonic-gate  *	      dispatcher framework.
189*7c478bd9Sstevel@tonic-gate  */
190*7c478bd9Sstevel@tonic-gate void
191*7c478bd9Sstevel@tonic-gate dispinit(void)
192*7c478bd9Sstevel@tonic-gate {
193*7c478bd9Sstevel@tonic-gate 	id_t	cid;
194*7c478bd9Sstevel@tonic-gate 	pri_t	maxglobpri;
195*7c478bd9Sstevel@tonic-gate 	pri_t	cl_maxglobpri;
196*7c478bd9Sstevel@tonic-gate 
197*7c478bd9Sstevel@tonic-gate 	maxglobpri = -1;
198*7c478bd9Sstevel@tonic-gate 
199*7c478bd9Sstevel@tonic-gate 	/*
200*7c478bd9Sstevel@tonic-gate 	 * Initialize transition lock, which will always be set.
201*7c478bd9Sstevel@tonic-gate 	 */
202*7c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&transition_lock);
203*7c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&transition_lock);
204*7c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&stop_lock);
205*7c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&shuttle_lock);
206*7c478bd9Sstevel@tonic-gate 
207*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
208*7c478bd9Sstevel@tonic-gate 	CPU->cpu_disp->disp_maxrunpri = -1;
209*7c478bd9Sstevel@tonic-gate 	CPU->cpu_disp->disp_max_unbound_pri = -1;
210*7c478bd9Sstevel@tonic-gate 	/*
211*7c478bd9Sstevel@tonic-gate 	 * Initialize the default CPU partition.
212*7c478bd9Sstevel@tonic-gate 	 */
213*7c478bd9Sstevel@tonic-gate 	cpupart_initialize_default();
214*7c478bd9Sstevel@tonic-gate 	/*
215*7c478bd9Sstevel@tonic-gate 	 * Call the class specific initialization functions for
216*7c478bd9Sstevel@tonic-gate 	 * all pre-installed schedulers.
217*7c478bd9Sstevel@tonic-gate 	 *
218*7c478bd9Sstevel@tonic-gate 	 * We pass the size of a class specific parameter
219*7c478bd9Sstevel@tonic-gate 	 * buffer to each of the initialization functions
220*7c478bd9Sstevel@tonic-gate 	 * to try to catch problems with backward compatibility
221*7c478bd9Sstevel@tonic-gate 	 * of class modules.
222*7c478bd9Sstevel@tonic-gate 	 *
223*7c478bd9Sstevel@tonic-gate 	 * For example a new class module running on an old system
224*7c478bd9Sstevel@tonic-gate 	 * which didn't provide sufficiently large parameter buffers
225*7c478bd9Sstevel@tonic-gate 	 * would be bad news. Class initialization modules can check for
226*7c478bd9Sstevel@tonic-gate 	 * this and take action if they detect a problem.
227*7c478bd9Sstevel@tonic-gate 	 */
228*7c478bd9Sstevel@tonic-gate 
229*7c478bd9Sstevel@tonic-gate 	for (cid = 0; cid < nclass; cid++) {
230*7c478bd9Sstevel@tonic-gate 		sclass_t	*sc;
231*7c478bd9Sstevel@tonic-gate 
232*7c478bd9Sstevel@tonic-gate 		sc = &sclass[cid];
233*7c478bd9Sstevel@tonic-gate 		if (SCHED_INSTALLED(sc)) {
234*7c478bd9Sstevel@tonic-gate 			cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ,
235*7c478bd9Sstevel@tonic-gate 			    &sc->cl_funcs);
236*7c478bd9Sstevel@tonic-gate 			if (cl_maxglobpri > maxglobpri)
237*7c478bd9Sstevel@tonic-gate 				maxglobpri = cl_maxglobpri;
238*7c478bd9Sstevel@tonic-gate 		}
239*7c478bd9Sstevel@tonic-gate 	}
240*7c478bd9Sstevel@tonic-gate 	kpreemptpri = (pri_t)v.v_maxsyspri + 1;
241*7c478bd9Sstevel@tonic-gate 	if (kpqpri == KPQPRI)
242*7c478bd9Sstevel@tonic-gate 		kpqpri = kpreemptpri;
243*7c478bd9Sstevel@tonic-gate 
244*7c478bd9Sstevel@tonic-gate 	ASSERT(maxglobpri >= 0);
245*7c478bd9Sstevel@tonic-gate 	disp_setup(maxglobpri, 0);
246*7c478bd9Sstevel@tonic-gate 
247*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
248*7c478bd9Sstevel@tonic-gate 
249*7c478bd9Sstevel@tonic-gate 	/*
250*7c478bd9Sstevel@tonic-gate 	 * Get the default class ID; this may be later modified via
251*7c478bd9Sstevel@tonic-gate 	 * dispadmin(1M).  This will load the class (normally TS) and that will
252*7c478bd9Sstevel@tonic-gate 	 * call disp_add(), which is why we had to drop cpu_lock first.
253*7c478bd9Sstevel@tonic-gate 	 */
254*7c478bd9Sstevel@tonic-gate 	if (getcid(defaultclass, &defaultcid) != 0) {
255*7c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'",
256*7c478bd9Sstevel@tonic-gate 		    defaultclass);
257*7c478bd9Sstevel@tonic-gate 	}
258*7c478bd9Sstevel@tonic-gate }
259*7c478bd9Sstevel@tonic-gate 
260*7c478bd9Sstevel@tonic-gate /*
261*7c478bd9Sstevel@tonic-gate  * disp_add - Called with class pointer to initialize the dispatcher
262*7c478bd9Sstevel@tonic-gate  *	      for a newly loaded class.
263*7c478bd9Sstevel@tonic-gate  */
264*7c478bd9Sstevel@tonic-gate void
265*7c478bd9Sstevel@tonic-gate disp_add(sclass_t *clp)
266*7c478bd9Sstevel@tonic-gate {
267*7c478bd9Sstevel@tonic-gate 	pri_t	maxglobpri;
268*7c478bd9Sstevel@tonic-gate 	pri_t	cl_maxglobpri;
269*7c478bd9Sstevel@tonic-gate 
270*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
271*7c478bd9Sstevel@tonic-gate 	/*
272*7c478bd9Sstevel@tonic-gate 	 * Initialize the scheduler class.
273*7c478bd9Sstevel@tonic-gate 	 */
274*7c478bd9Sstevel@tonic-gate 	maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1);
275*7c478bd9Sstevel@tonic-gate 	cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs);
276*7c478bd9Sstevel@tonic-gate 	if (cl_maxglobpri > maxglobpri)
277*7c478bd9Sstevel@tonic-gate 		maxglobpri = cl_maxglobpri;
278*7c478bd9Sstevel@tonic-gate 
279*7c478bd9Sstevel@tonic-gate 	/*
280*7c478bd9Sstevel@tonic-gate 	 * Save old queue information.  Since we're initializing a
281*7c478bd9Sstevel@tonic-gate 	 * new scheduling class which has just been loaded, then
282*7c478bd9Sstevel@tonic-gate 	 * the size of the dispq may have changed.  We need to handle
283*7c478bd9Sstevel@tonic-gate 	 * that here.
284*7c478bd9Sstevel@tonic-gate 	 */
285*7c478bd9Sstevel@tonic-gate 	disp_setup(maxglobpri, v.v_nglobpris);
286*7c478bd9Sstevel@tonic-gate 
287*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
288*7c478bd9Sstevel@tonic-gate }
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate 
291*7c478bd9Sstevel@tonic-gate /*
292*7c478bd9Sstevel@tonic-gate  * For each CPU, allocate new dispatch queues
293*7c478bd9Sstevel@tonic-gate  * with the stated number of priorities.
294*7c478bd9Sstevel@tonic-gate  */
295*7c478bd9Sstevel@tonic-gate static void
296*7c478bd9Sstevel@tonic-gate cpu_dispqalloc(int numpris)
297*7c478bd9Sstevel@tonic-gate {
298*7c478bd9Sstevel@tonic-gate 	cpu_t	*cpup;
299*7c478bd9Sstevel@tonic-gate 	struct disp_queue_info	*disp_mem;
300*7c478bd9Sstevel@tonic-gate 	int i, num;
301*7c478bd9Sstevel@tonic-gate 
302*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
303*7c478bd9Sstevel@tonic-gate 
304*7c478bd9Sstevel@tonic-gate 	disp_mem = kmem_zalloc(NCPU *
305*7c478bd9Sstevel@tonic-gate 	    sizeof (struct disp_queue_info), KM_SLEEP);
306*7c478bd9Sstevel@tonic-gate 
307*7c478bd9Sstevel@tonic-gate 	/*
308*7c478bd9Sstevel@tonic-gate 	 * This routine must allocate all of the memory before stopping
309*7c478bd9Sstevel@tonic-gate 	 * the cpus because it must not sleep in kmem_alloc while the
310*7c478bd9Sstevel@tonic-gate 	 * CPUs are stopped.  Locks they hold will not be freed until they
311*7c478bd9Sstevel@tonic-gate 	 * are restarted.
312*7c478bd9Sstevel@tonic-gate 	 */
313*7c478bd9Sstevel@tonic-gate 	i = 0;
314*7c478bd9Sstevel@tonic-gate 	cpup = cpu_list;
315*7c478bd9Sstevel@tonic-gate 	do {
316*7c478bd9Sstevel@tonic-gate 		disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp);
317*7c478bd9Sstevel@tonic-gate 		i++;
318*7c478bd9Sstevel@tonic-gate 		cpup = cpup->cpu_next;
319*7c478bd9Sstevel@tonic-gate 	} while (cpup != cpu_list);
320*7c478bd9Sstevel@tonic-gate 	num = i;
321*7c478bd9Sstevel@tonic-gate 
322*7c478bd9Sstevel@tonic-gate 	pause_cpus(NULL);
323*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++)
324*7c478bd9Sstevel@tonic-gate 		disp_dq_assign(&disp_mem[i], numpris);
325*7c478bd9Sstevel@tonic-gate 	start_cpus();
326*7c478bd9Sstevel@tonic-gate 
327*7c478bd9Sstevel@tonic-gate 	/*
328*7c478bd9Sstevel@tonic-gate 	 * I must free all of the memory after starting the cpus because
329*7c478bd9Sstevel@tonic-gate 	 * I can not risk sleeping in kmem_free while the cpus are stopped.
330*7c478bd9Sstevel@tonic-gate 	 */
331*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++)
332*7c478bd9Sstevel@tonic-gate 		disp_dq_free(&disp_mem[i]);
333*7c478bd9Sstevel@tonic-gate 
334*7c478bd9Sstevel@tonic-gate 	kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info));
335*7c478bd9Sstevel@tonic-gate }
336*7c478bd9Sstevel@tonic-gate 
337*7c478bd9Sstevel@tonic-gate static void
338*7c478bd9Sstevel@tonic-gate disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t	*dp)
339*7c478bd9Sstevel@tonic-gate {
340*7c478bd9Sstevel@tonic-gate 	dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP);
341*7c478bd9Sstevel@tonic-gate 	dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) *
342*7c478bd9Sstevel@tonic-gate 	    sizeof (long), KM_SLEEP);
343*7c478bd9Sstevel@tonic-gate 	dptr->dp = dp;
344*7c478bd9Sstevel@tonic-gate }
345*7c478bd9Sstevel@tonic-gate 
346*7c478bd9Sstevel@tonic-gate static void
347*7c478bd9Sstevel@tonic-gate disp_dq_assign(struct disp_queue_info *dptr, int numpris)
348*7c478bd9Sstevel@tonic-gate {
349*7c478bd9Sstevel@tonic-gate 	disp_t	*dp;
350*7c478bd9Sstevel@tonic-gate 
351*7c478bd9Sstevel@tonic-gate 	dp = dptr->dp;
352*7c478bd9Sstevel@tonic-gate 	dptr->olddispq = dp->disp_q;
353*7c478bd9Sstevel@tonic-gate 	dptr->olddqactmap = dp->disp_qactmap;
354*7c478bd9Sstevel@tonic-gate 	dptr->oldnglobpris = dp->disp_npri;
355*7c478bd9Sstevel@tonic-gate 
356*7c478bd9Sstevel@tonic-gate 	ASSERT(dptr->oldnglobpris < numpris);
357*7c478bd9Sstevel@tonic-gate 
358*7c478bd9Sstevel@tonic-gate 	if (dptr->olddispq != NULL) {
359*7c478bd9Sstevel@tonic-gate 		/*
360*7c478bd9Sstevel@tonic-gate 		 * Use kcopy because bcopy is platform-specific
361*7c478bd9Sstevel@tonic-gate 		 * and could block while we might have paused the cpus.
362*7c478bd9Sstevel@tonic-gate 		 */
363*7c478bd9Sstevel@tonic-gate 		(void) kcopy(dptr->olddispq, dptr->newdispq,
364*7c478bd9Sstevel@tonic-gate 		    dptr->oldnglobpris * sizeof (dispq_t));
365*7c478bd9Sstevel@tonic-gate 		(void) kcopy(dptr->olddqactmap, dptr->newdqactmap,
366*7c478bd9Sstevel@tonic-gate 		    ((dptr->oldnglobpris / BT_NBIPUL) + 1) *
367*7c478bd9Sstevel@tonic-gate 		    sizeof (long));
368*7c478bd9Sstevel@tonic-gate 	}
369*7c478bd9Sstevel@tonic-gate 	dp->disp_q = dptr->newdispq;
370*7c478bd9Sstevel@tonic-gate 	dp->disp_qactmap = dptr->newdqactmap;
371*7c478bd9Sstevel@tonic-gate 	dp->disp_q_limit = &dptr->newdispq[numpris];
372*7c478bd9Sstevel@tonic-gate 	dp->disp_npri = numpris;
373*7c478bd9Sstevel@tonic-gate }
374*7c478bd9Sstevel@tonic-gate 
375*7c478bd9Sstevel@tonic-gate static void
376*7c478bd9Sstevel@tonic-gate disp_dq_free(struct disp_queue_info *dptr)
377*7c478bd9Sstevel@tonic-gate {
378*7c478bd9Sstevel@tonic-gate 	if (dptr->olddispq != NULL)
379*7c478bd9Sstevel@tonic-gate 		kmem_free(dptr->olddispq,
380*7c478bd9Sstevel@tonic-gate 		    dptr->oldnglobpris * sizeof (dispq_t));
381*7c478bd9Sstevel@tonic-gate 	if (dptr->olddqactmap != NULL)
382*7c478bd9Sstevel@tonic-gate 		kmem_free(dptr->olddqactmap,
383*7c478bd9Sstevel@tonic-gate 		    ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long));
384*7c478bd9Sstevel@tonic-gate }
385*7c478bd9Sstevel@tonic-gate 
386*7c478bd9Sstevel@tonic-gate /*
387*7c478bd9Sstevel@tonic-gate  * For a newly created CPU, initialize the dispatch queue.
388*7c478bd9Sstevel@tonic-gate  * This is called before the CPU is known through cpu[] or on any lists.
389*7c478bd9Sstevel@tonic-gate  */
390*7c478bd9Sstevel@tonic-gate void
391*7c478bd9Sstevel@tonic-gate disp_cpu_init(cpu_t *cp)
392*7c478bd9Sstevel@tonic-gate {
393*7c478bd9Sstevel@tonic-gate 	disp_t	*dp;
394*7c478bd9Sstevel@tonic-gate 	dispq_t	*newdispq;
395*7c478bd9Sstevel@tonic-gate 	ulong_t	*newdqactmap;
396*7c478bd9Sstevel@tonic-gate 
397*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));	/* protect dispatcher queue sizes */
398*7c478bd9Sstevel@tonic-gate 
399*7c478bd9Sstevel@tonic-gate 	if (cp == cpu0_disp.disp_cpu)
400*7c478bd9Sstevel@tonic-gate 		dp = &cpu0_disp;
401*7c478bd9Sstevel@tonic-gate 	else
402*7c478bd9Sstevel@tonic-gate 		dp = kmem_alloc(sizeof (disp_t), KM_SLEEP);
403*7c478bd9Sstevel@tonic-gate 	bzero(dp, sizeof (disp_t));
404*7c478bd9Sstevel@tonic-gate 	cp->cpu_disp = dp;
405*7c478bd9Sstevel@tonic-gate 	dp->disp_cpu = cp;
406*7c478bd9Sstevel@tonic-gate 	dp->disp_maxrunpri = -1;
407*7c478bd9Sstevel@tonic-gate 	dp->disp_max_unbound_pri = -1;
408*7c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp->cpu_thread_lock);
409*7c478bd9Sstevel@tonic-gate 	/*
410*7c478bd9Sstevel@tonic-gate 	 * Allocate memory for the dispatcher queue headers
411*7c478bd9Sstevel@tonic-gate 	 * and the active queue bitmap.
412*7c478bd9Sstevel@tonic-gate 	 */
413*7c478bd9Sstevel@tonic-gate 	newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP);
414*7c478bd9Sstevel@tonic-gate 	newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) *
415*7c478bd9Sstevel@tonic-gate 	    sizeof (long), KM_SLEEP);
416*7c478bd9Sstevel@tonic-gate 	dp->disp_q = newdispq;
417*7c478bd9Sstevel@tonic-gate 	dp->disp_qactmap = newdqactmap;
418*7c478bd9Sstevel@tonic-gate 	dp->disp_q_limit = &newdispq[v.v_nglobpris];
419*7c478bd9Sstevel@tonic-gate 	dp->disp_npri = v.v_nglobpris;
420*7c478bd9Sstevel@tonic-gate }
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate void
423*7c478bd9Sstevel@tonic-gate disp_cpu_fini(cpu_t *cp)
424*7c478bd9Sstevel@tonic-gate {
425*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
426*7c478bd9Sstevel@tonic-gate 
427*7c478bd9Sstevel@tonic-gate 	disp_kp_free(cp->cpu_disp);
428*7c478bd9Sstevel@tonic-gate 	if (cp->cpu_disp != &cpu0_disp)
429*7c478bd9Sstevel@tonic-gate 		kmem_free(cp->cpu_disp, sizeof (disp_t));
430*7c478bd9Sstevel@tonic-gate }
431*7c478bd9Sstevel@tonic-gate 
432*7c478bd9Sstevel@tonic-gate /*
433*7c478bd9Sstevel@tonic-gate  * Allocate new, larger kpreempt dispatch queue to replace the old one.
434*7c478bd9Sstevel@tonic-gate  */
435*7c478bd9Sstevel@tonic-gate void
436*7c478bd9Sstevel@tonic-gate disp_kp_alloc(disp_t *dq, pri_t npri)
437*7c478bd9Sstevel@tonic-gate {
438*7c478bd9Sstevel@tonic-gate 	struct disp_queue_info	mem_info;
439*7c478bd9Sstevel@tonic-gate 
440*7c478bd9Sstevel@tonic-gate 	if (npri > dq->disp_npri) {
441*7c478bd9Sstevel@tonic-gate 		/*
442*7c478bd9Sstevel@tonic-gate 		 * Allocate memory for the new array.
443*7c478bd9Sstevel@tonic-gate 		 */
444*7c478bd9Sstevel@tonic-gate 		disp_dq_alloc(&mem_info, npri, dq);
445*7c478bd9Sstevel@tonic-gate 
446*7c478bd9Sstevel@tonic-gate 		/*
447*7c478bd9Sstevel@tonic-gate 		 * We need to copy the old structures to the new
448*7c478bd9Sstevel@tonic-gate 		 * and free the old.
449*7c478bd9Sstevel@tonic-gate 		 */
450*7c478bd9Sstevel@tonic-gate 		disp_dq_assign(&mem_info, npri);
451*7c478bd9Sstevel@tonic-gate 		disp_dq_free(&mem_info);
452*7c478bd9Sstevel@tonic-gate 	}
453*7c478bd9Sstevel@tonic-gate }
454*7c478bd9Sstevel@tonic-gate 
455*7c478bd9Sstevel@tonic-gate /*
456*7c478bd9Sstevel@tonic-gate  * Free dispatch queue.
457*7c478bd9Sstevel@tonic-gate  * Used for the kpreempt queues for a removed CPU partition and
458*7c478bd9Sstevel@tonic-gate  * for the per-CPU queues of deleted CPUs.
459*7c478bd9Sstevel@tonic-gate  */
460*7c478bd9Sstevel@tonic-gate void
461*7c478bd9Sstevel@tonic-gate disp_kp_free(disp_t *dq)
462*7c478bd9Sstevel@tonic-gate {
463*7c478bd9Sstevel@tonic-gate 	struct disp_queue_info	mem_info;
464*7c478bd9Sstevel@tonic-gate 
465*7c478bd9Sstevel@tonic-gate 	mem_info.olddispq = dq->disp_q;
466*7c478bd9Sstevel@tonic-gate 	mem_info.olddqactmap = dq->disp_qactmap;
467*7c478bd9Sstevel@tonic-gate 	mem_info.oldnglobpris = dq->disp_npri;
468*7c478bd9Sstevel@tonic-gate 	disp_dq_free(&mem_info);
469*7c478bd9Sstevel@tonic-gate }
470*7c478bd9Sstevel@tonic-gate 
471*7c478bd9Sstevel@tonic-gate /*
472*7c478bd9Sstevel@tonic-gate  * End dispatcher and scheduler initialization.
473*7c478bd9Sstevel@tonic-gate  */
474*7c478bd9Sstevel@tonic-gate 
475*7c478bd9Sstevel@tonic-gate /*
476*7c478bd9Sstevel@tonic-gate  * See if there's anything to do other than remain idle.
477*7c478bd9Sstevel@tonic-gate  * Return non-zero if there is.
478*7c478bd9Sstevel@tonic-gate  *
479*7c478bd9Sstevel@tonic-gate  * This function must be called with high spl, or with
480*7c478bd9Sstevel@tonic-gate  * kernel preemption disabled to prevent the partition's
481*7c478bd9Sstevel@tonic-gate  * active cpu list from changing while being traversed.
482*7c478bd9Sstevel@tonic-gate  *
483*7c478bd9Sstevel@tonic-gate  */
484*7c478bd9Sstevel@tonic-gate int
485*7c478bd9Sstevel@tonic-gate disp_anywork(void)
486*7c478bd9Sstevel@tonic-gate {
487*7c478bd9Sstevel@tonic-gate 	cpu_t   *cp = CPU;
488*7c478bd9Sstevel@tonic-gate 	cpu_t   *ocp;
489*7c478bd9Sstevel@tonic-gate 
490*7c478bd9Sstevel@tonic-gate 	if (cp->cpu_disp->disp_nrunnable != 0)
491*7c478bd9Sstevel@tonic-gate 		return (1);
492*7c478bd9Sstevel@tonic-gate 
493*7c478bd9Sstevel@tonic-gate 	if (!(cp->cpu_flags & CPU_OFFLINE)) {
494*7c478bd9Sstevel@tonic-gate 		if (CP_MAXRUNPRI(cp->cpu_part) >= 0)
495*7c478bd9Sstevel@tonic-gate 			return (1);
496*7c478bd9Sstevel@tonic-gate 
497*7c478bd9Sstevel@tonic-gate 		/*
498*7c478bd9Sstevel@tonic-gate 		 * Work can be taken from another CPU if:
499*7c478bd9Sstevel@tonic-gate 		 *	- There is unbound work on the run queue
500*7c478bd9Sstevel@tonic-gate 		 *	- That work isn't a thread undergoing a
501*7c478bd9Sstevel@tonic-gate 		 *	- context switch on an otherwise empty queue.
502*7c478bd9Sstevel@tonic-gate 		 *	- The CPU isn't running the idle loop.
503*7c478bd9Sstevel@tonic-gate 		 */
504*7c478bd9Sstevel@tonic-gate 		for (ocp = cp->cpu_next_part; ocp != cp;
505*7c478bd9Sstevel@tonic-gate 		    ocp = ocp->cpu_next_part) {
506*7c478bd9Sstevel@tonic-gate 			ASSERT(CPU_ACTIVE(ocp));
507*7c478bd9Sstevel@tonic-gate 
508*7c478bd9Sstevel@tonic-gate 			if (ocp->cpu_disp->disp_max_unbound_pri != -1 &&
509*7c478bd9Sstevel@tonic-gate 			    !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) &&
510*7c478bd9Sstevel@tonic-gate 			    ocp->cpu_disp->disp_nrunnable == 1) &&
511*7c478bd9Sstevel@tonic-gate 			    ocp->cpu_dispatch_pri != -1)
512*7c478bd9Sstevel@tonic-gate 				return (1);
513*7c478bd9Sstevel@tonic-gate 		}
514*7c478bd9Sstevel@tonic-gate 	}
515*7c478bd9Sstevel@tonic-gate 	return (0);
516*7c478bd9Sstevel@tonic-gate }
517*7c478bd9Sstevel@tonic-gate 
518*7c478bd9Sstevel@tonic-gate /*
519*7c478bd9Sstevel@tonic-gate  * Called when CPU enters the idle loop
520*7c478bd9Sstevel@tonic-gate  */
521*7c478bd9Sstevel@tonic-gate static void
522*7c478bd9Sstevel@tonic-gate idle_enter()
523*7c478bd9Sstevel@tonic-gate {
524*7c478bd9Sstevel@tonic-gate 	cpu_t		*cp = CPU;
525*7c478bd9Sstevel@tonic-gate 
526*7c478bd9Sstevel@tonic-gate 	new_cpu_mstate(cp, CMS_IDLE);
527*7c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, idlethread, 1);
528*7c478bd9Sstevel@tonic-gate 	set_idle_cpu(cp->cpu_id);	/* arch-dependent hook */
529*7c478bd9Sstevel@tonic-gate }
530*7c478bd9Sstevel@tonic-gate 
531*7c478bd9Sstevel@tonic-gate /*
532*7c478bd9Sstevel@tonic-gate  * Called when CPU exits the idle loop
533*7c478bd9Sstevel@tonic-gate  */
534*7c478bd9Sstevel@tonic-gate static void
535*7c478bd9Sstevel@tonic-gate idle_exit()
536*7c478bd9Sstevel@tonic-gate {
537*7c478bd9Sstevel@tonic-gate 	cpu_t		*cp = CPU;
538*7c478bd9Sstevel@tonic-gate 
539*7c478bd9Sstevel@tonic-gate 	new_cpu_mstate(cp, CMS_SYSTEM);
540*7c478bd9Sstevel@tonic-gate 	unset_idle_cpu(cp->cpu_id);	/* arch-dependent hook */
541*7c478bd9Sstevel@tonic-gate }
542*7c478bd9Sstevel@tonic-gate 
543*7c478bd9Sstevel@tonic-gate /*
544*7c478bd9Sstevel@tonic-gate  * Idle loop.
545*7c478bd9Sstevel@tonic-gate  */
546*7c478bd9Sstevel@tonic-gate void
547*7c478bd9Sstevel@tonic-gate idle()
548*7c478bd9Sstevel@tonic-gate {
549*7c478bd9Sstevel@tonic-gate 	struct cpu	*cp = CPU;		/* pointer to this CPU */
550*7c478bd9Sstevel@tonic-gate 	kthread_t	*t;			/* taken thread */
551*7c478bd9Sstevel@tonic-gate 
552*7c478bd9Sstevel@tonic-gate 	idle_enter();
553*7c478bd9Sstevel@tonic-gate 
554*7c478bd9Sstevel@tonic-gate 	/*
555*7c478bd9Sstevel@tonic-gate 	 * Uniprocessor version of idle loop.
556*7c478bd9Sstevel@tonic-gate 	 * Do this until notified that we're on an actual multiprocessor.
557*7c478bd9Sstevel@tonic-gate 	 */
558*7c478bd9Sstevel@tonic-gate 	while (ncpus == 1) {
559*7c478bd9Sstevel@tonic-gate 		if (cp->cpu_disp->disp_nrunnable == 0) {
560*7c478bd9Sstevel@tonic-gate 			(*idle_cpu)();
561*7c478bd9Sstevel@tonic-gate 			continue;
562*7c478bd9Sstevel@tonic-gate 		}
563*7c478bd9Sstevel@tonic-gate 		idle_exit();
564*7c478bd9Sstevel@tonic-gate 		swtch();
565*7c478bd9Sstevel@tonic-gate 
566*7c478bd9Sstevel@tonic-gate 		idle_enter(); /* returned from swtch */
567*7c478bd9Sstevel@tonic-gate 	}
568*7c478bd9Sstevel@tonic-gate 
569*7c478bd9Sstevel@tonic-gate 	/*
570*7c478bd9Sstevel@tonic-gate 	 * Multiprocessor idle loop.
571*7c478bd9Sstevel@tonic-gate 	 */
572*7c478bd9Sstevel@tonic-gate 	for (;;) {
573*7c478bd9Sstevel@tonic-gate 		/*
574*7c478bd9Sstevel@tonic-gate 		 * If CPU is completely quiesced by p_online(2), just wait
575*7c478bd9Sstevel@tonic-gate 		 * here with minimal bus traffic until put online.
576*7c478bd9Sstevel@tonic-gate 		 */
577*7c478bd9Sstevel@tonic-gate 		while (cp->cpu_flags & CPU_QUIESCED)
578*7c478bd9Sstevel@tonic-gate 			(*idle_cpu)();
579*7c478bd9Sstevel@tonic-gate 
580*7c478bd9Sstevel@tonic-gate 		if (cp->cpu_disp->disp_nrunnable != 0) {
581*7c478bd9Sstevel@tonic-gate 			idle_exit();
582*7c478bd9Sstevel@tonic-gate 			swtch();
583*7c478bd9Sstevel@tonic-gate 		} else {
584*7c478bd9Sstevel@tonic-gate 			if (cp->cpu_flags & CPU_OFFLINE)
585*7c478bd9Sstevel@tonic-gate 				continue;
586*7c478bd9Sstevel@tonic-gate 			if ((t = disp_getwork(cp)) == NULL) {
587*7c478bd9Sstevel@tonic-gate 				if (cp->cpu_chosen_level != -1) {
588*7c478bd9Sstevel@tonic-gate 					disp_t *dp = cp->cpu_disp;
589*7c478bd9Sstevel@tonic-gate 					disp_t *kpq;
590*7c478bd9Sstevel@tonic-gate 
591*7c478bd9Sstevel@tonic-gate 					disp_lock_enter(&dp->disp_lock);
592*7c478bd9Sstevel@tonic-gate 					/*
593*7c478bd9Sstevel@tonic-gate 					 * Set kpq under lock to prevent
594*7c478bd9Sstevel@tonic-gate 					 * migration between partitions.
595*7c478bd9Sstevel@tonic-gate 					 */
596*7c478bd9Sstevel@tonic-gate 					kpq = &cp->cpu_part->cp_kp_queue;
597*7c478bd9Sstevel@tonic-gate 					if (kpq->disp_maxrunpri == -1)
598*7c478bd9Sstevel@tonic-gate 						cp->cpu_chosen_level = -1;
599*7c478bd9Sstevel@tonic-gate 					disp_lock_exit(&dp->disp_lock);
600*7c478bd9Sstevel@tonic-gate 				}
601*7c478bd9Sstevel@tonic-gate 				(*idle_cpu)();
602*7c478bd9Sstevel@tonic-gate 				continue;
603*7c478bd9Sstevel@tonic-gate 			}
604*7c478bd9Sstevel@tonic-gate 			idle_exit();
605*7c478bd9Sstevel@tonic-gate 			restore_mstate(t);
606*7c478bd9Sstevel@tonic-gate 			swtch_to(t);
607*7c478bd9Sstevel@tonic-gate 		}
608*7c478bd9Sstevel@tonic-gate 		idle_enter(); /* returned from swtch/swtch_to */
609*7c478bd9Sstevel@tonic-gate 	}
610*7c478bd9Sstevel@tonic-gate }
611*7c478bd9Sstevel@tonic-gate 
612*7c478bd9Sstevel@tonic-gate 
613*7c478bd9Sstevel@tonic-gate /*
614*7c478bd9Sstevel@tonic-gate  * Preempt the currently running thread in favor of the highest
615*7c478bd9Sstevel@tonic-gate  * priority thread.  The class of the current thread controls
616*7c478bd9Sstevel@tonic-gate  * where it goes on the dispatcher queues. If panicking, turn
617*7c478bd9Sstevel@tonic-gate  * preemption off.
618*7c478bd9Sstevel@tonic-gate  */
619*7c478bd9Sstevel@tonic-gate void
620*7c478bd9Sstevel@tonic-gate preempt()
621*7c478bd9Sstevel@tonic-gate {
622*7c478bd9Sstevel@tonic-gate 	kthread_t 	*t = curthread;
623*7c478bd9Sstevel@tonic-gate 	klwp_t 		*lwp = ttolwp(curthread);
624*7c478bd9Sstevel@tonic-gate 
625*7c478bd9Sstevel@tonic-gate 	if (panicstr)
626*7c478bd9Sstevel@tonic-gate 		return;
627*7c478bd9Sstevel@tonic-gate 
628*7c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start");
629*7c478bd9Sstevel@tonic-gate 
630*7c478bd9Sstevel@tonic-gate 	thread_lock(t);
631*7c478bd9Sstevel@tonic-gate 
632*7c478bd9Sstevel@tonic-gate 	if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) {
633*7c478bd9Sstevel@tonic-gate 		/*
634*7c478bd9Sstevel@tonic-gate 		 * this thread has already been chosen to be run on
635*7c478bd9Sstevel@tonic-gate 		 * another CPU. Clear kprunrun on this CPU since we're
636*7c478bd9Sstevel@tonic-gate 		 * already headed for swtch().
637*7c478bd9Sstevel@tonic-gate 		 */
638*7c478bd9Sstevel@tonic-gate 		CPU->cpu_kprunrun = 0;
639*7c478bd9Sstevel@tonic-gate 		thread_unlock_nopreempt(t);
640*7c478bd9Sstevel@tonic-gate 		TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end");
641*7c478bd9Sstevel@tonic-gate 	} else {
642*7c478bd9Sstevel@tonic-gate 		if (lwp != NULL)
643*7c478bd9Sstevel@tonic-gate 			lwp->lwp_ru.nivcsw++;
644*7c478bd9Sstevel@tonic-gate 		CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1);
645*7c478bd9Sstevel@tonic-gate 		THREAD_TRANSITION(t);
646*7c478bd9Sstevel@tonic-gate 		CL_PREEMPT(t);
647*7c478bd9Sstevel@tonic-gate 		DTRACE_SCHED(preempt);
648*7c478bd9Sstevel@tonic-gate 		thread_unlock_nopreempt(t);
649*7c478bd9Sstevel@tonic-gate 
650*7c478bd9Sstevel@tonic-gate 		TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end");
651*7c478bd9Sstevel@tonic-gate 
652*7c478bd9Sstevel@tonic-gate 		swtch();		/* clears CPU->cpu_runrun via disp() */
653*7c478bd9Sstevel@tonic-gate 	}
654*7c478bd9Sstevel@tonic-gate }
655*7c478bd9Sstevel@tonic-gate 
656*7c478bd9Sstevel@tonic-gate extern kthread_t *thread_unpin();
657*7c478bd9Sstevel@tonic-gate 
658*7c478bd9Sstevel@tonic-gate /*
659*7c478bd9Sstevel@tonic-gate  * disp() - find the highest priority thread for this processor to run, and
660*7c478bd9Sstevel@tonic-gate  * set it in TS_ONPROC state so that resume() can be called to run it.
661*7c478bd9Sstevel@tonic-gate  */
662*7c478bd9Sstevel@tonic-gate static kthread_t *
663*7c478bd9Sstevel@tonic-gate disp()
664*7c478bd9Sstevel@tonic-gate {
665*7c478bd9Sstevel@tonic-gate 	cpu_t		*cpup;
666*7c478bd9Sstevel@tonic-gate 	disp_t		*dp;
667*7c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
668*7c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
669*7c478bd9Sstevel@tonic-gate 	int		maxrunword;
670*7c478bd9Sstevel@tonic-gate 	pri_t		pri;
671*7c478bd9Sstevel@tonic-gate 	disp_t		*kpq;
672*7c478bd9Sstevel@tonic-gate 
673*7c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start");
674*7c478bd9Sstevel@tonic-gate 
675*7c478bd9Sstevel@tonic-gate 	cpup = CPU;
676*7c478bd9Sstevel@tonic-gate 	/*
677*7c478bd9Sstevel@tonic-gate 	 * Find the highest priority loaded, runnable thread.
678*7c478bd9Sstevel@tonic-gate 	 */
679*7c478bd9Sstevel@tonic-gate 	dp = cpup->cpu_disp;
680*7c478bd9Sstevel@tonic-gate 
681*7c478bd9Sstevel@tonic-gate reschedule:
682*7c478bd9Sstevel@tonic-gate 	/*
683*7c478bd9Sstevel@tonic-gate 	 * If there is more important work on the global queue with a better
684*7c478bd9Sstevel@tonic-gate 	 * priority than the maximum on this CPU, take it now.
685*7c478bd9Sstevel@tonic-gate 	 */
686*7c478bd9Sstevel@tonic-gate 	kpq = &cpup->cpu_part->cp_kp_queue;
687*7c478bd9Sstevel@tonic-gate 	while ((pri = kpq->disp_maxrunpri) >= 0 &&
688*7c478bd9Sstevel@tonic-gate 	    pri >= dp->disp_maxrunpri &&
689*7c478bd9Sstevel@tonic-gate 	    (cpup->cpu_flags & CPU_OFFLINE) == 0 &&
690*7c478bd9Sstevel@tonic-gate 	    (tp = disp_getbest(kpq)) != NULL) {
691*7c478bd9Sstevel@tonic-gate 		if (disp_ratify(tp, kpq) != NULL) {
692*7c478bd9Sstevel@tonic-gate 			TRACE_1(TR_FAC_DISP, TR_DISP_END,
693*7c478bd9Sstevel@tonic-gate 			    "disp_end:tid %p", tp);
694*7c478bd9Sstevel@tonic-gate 			restore_mstate(tp);
695*7c478bd9Sstevel@tonic-gate 			return (tp);
696*7c478bd9Sstevel@tonic-gate 		}
697*7c478bd9Sstevel@tonic-gate 	}
698*7c478bd9Sstevel@tonic-gate 
699*7c478bd9Sstevel@tonic-gate 	disp_lock_enter(&dp->disp_lock);
700*7c478bd9Sstevel@tonic-gate 	pri = dp->disp_maxrunpri;
701*7c478bd9Sstevel@tonic-gate 
702*7c478bd9Sstevel@tonic-gate 	/*
703*7c478bd9Sstevel@tonic-gate 	 * If there is nothing to run, look at what's runnable on other queues.
704*7c478bd9Sstevel@tonic-gate 	 * Choose the idle thread if the CPU is quiesced.
705*7c478bd9Sstevel@tonic-gate 	 * Note that CPUs that have the CPU_OFFLINE flag set can still run
706*7c478bd9Sstevel@tonic-gate 	 * interrupt threads, which will be the only threads on the CPU's own
707*7c478bd9Sstevel@tonic-gate 	 * queue, but cannot run threads from other queues.
708*7c478bd9Sstevel@tonic-gate 	 */
709*7c478bd9Sstevel@tonic-gate 	if (pri == -1) {
710*7c478bd9Sstevel@tonic-gate 		if (!(cpup->cpu_flags & CPU_OFFLINE)) {
711*7c478bd9Sstevel@tonic-gate 			disp_lock_exit(&dp->disp_lock);
712*7c478bd9Sstevel@tonic-gate 			if ((tp = disp_getwork(cpup)) == NULL) {
713*7c478bd9Sstevel@tonic-gate 				tp = cpup->cpu_idle_thread;
714*7c478bd9Sstevel@tonic-gate 				(void) splhigh();
715*7c478bd9Sstevel@tonic-gate 				THREAD_ONPROC(tp, cpup);
716*7c478bd9Sstevel@tonic-gate 				cpup->cpu_dispthread = tp;
717*7c478bd9Sstevel@tonic-gate 				cpup->cpu_dispatch_pri = -1;
718*7c478bd9Sstevel@tonic-gate 				cpup->cpu_runrun = cpup->cpu_kprunrun = 0;
719*7c478bd9Sstevel@tonic-gate 				cpup->cpu_chosen_level = -1;
720*7c478bd9Sstevel@tonic-gate 			}
721*7c478bd9Sstevel@tonic-gate 		} else {
722*7c478bd9Sstevel@tonic-gate 			disp_lock_exit_high(&dp->disp_lock);
723*7c478bd9Sstevel@tonic-gate 			tp = cpup->cpu_idle_thread;
724*7c478bd9Sstevel@tonic-gate 			THREAD_ONPROC(tp, cpup);
725*7c478bd9Sstevel@tonic-gate 			cpup->cpu_dispthread = tp;
726*7c478bd9Sstevel@tonic-gate 			cpup->cpu_dispatch_pri = -1;
727*7c478bd9Sstevel@tonic-gate 			cpup->cpu_runrun = cpup->cpu_kprunrun = 0;
728*7c478bd9Sstevel@tonic-gate 			cpup->cpu_chosen_level = -1;
729*7c478bd9Sstevel@tonic-gate 		}
730*7c478bd9Sstevel@tonic-gate 		TRACE_1(TR_FAC_DISP, TR_DISP_END,
731*7c478bd9Sstevel@tonic-gate 			"disp_end:tid %p", tp);
732*7c478bd9Sstevel@tonic-gate 		restore_mstate(tp);
733*7c478bd9Sstevel@tonic-gate 		return (tp);
734*7c478bd9Sstevel@tonic-gate 	}
735*7c478bd9Sstevel@tonic-gate 
736*7c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[pri];
737*7c478bd9Sstevel@tonic-gate 	tp = dq->dq_first;
738*7c478bd9Sstevel@tonic-gate 
739*7c478bd9Sstevel@tonic-gate 	ASSERT(tp != NULL);
740*7c478bd9Sstevel@tonic-gate 	ASSERT(tp->t_schedflag & TS_LOAD);	/* thread must be swapped in */
741*7c478bd9Sstevel@tonic-gate 
742*7c478bd9Sstevel@tonic-gate 	DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp);
743*7c478bd9Sstevel@tonic-gate 
744*7c478bd9Sstevel@tonic-gate 	/*
745*7c478bd9Sstevel@tonic-gate 	 * Found it so remove it from queue.
746*7c478bd9Sstevel@tonic-gate 	 */
747*7c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable--;
748*7c478bd9Sstevel@tonic-gate 	dq->dq_sruncnt--;
749*7c478bd9Sstevel@tonic-gate 	if ((dq->dq_first = tp->t_link) == NULL) {
750*7c478bd9Sstevel@tonic-gate 		ulong_t	*dqactmap = dp->disp_qactmap;
751*7c478bd9Sstevel@tonic-gate 
752*7c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_sruncnt == 0);
753*7c478bd9Sstevel@tonic-gate 		dq->dq_last = NULL;
754*7c478bd9Sstevel@tonic-gate 
755*7c478bd9Sstevel@tonic-gate 		/*
756*7c478bd9Sstevel@tonic-gate 		 * The queue is empty, so the corresponding bit needs to be
757*7c478bd9Sstevel@tonic-gate 		 * turned off in dqactmap.   If nrunnable != 0 just took the
758*7c478bd9Sstevel@tonic-gate 		 * last runnable thread off the
759*7c478bd9Sstevel@tonic-gate 		 * highest queue, so recompute disp_maxrunpri.
760*7c478bd9Sstevel@tonic-gate 		 */
761*7c478bd9Sstevel@tonic-gate 		maxrunword = pri >> BT_ULSHIFT;
762*7c478bd9Sstevel@tonic-gate 		dqactmap[maxrunword] &= ~BT_BIW(pri);
763*7c478bd9Sstevel@tonic-gate 
764*7c478bd9Sstevel@tonic-gate 		if (dp->disp_nrunnable == 0) {
765*7c478bd9Sstevel@tonic-gate 			dp->disp_max_unbound_pri = -1;
766*7c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = -1;
767*7c478bd9Sstevel@tonic-gate 		} else {
768*7c478bd9Sstevel@tonic-gate 			int ipri;
769*7c478bd9Sstevel@tonic-gate 
770*7c478bd9Sstevel@tonic-gate 			ipri = bt_gethighbit(dqactmap, maxrunword);
771*7c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = ipri;
772*7c478bd9Sstevel@tonic-gate 			if (ipri < dp->disp_max_unbound_pri)
773*7c478bd9Sstevel@tonic-gate 				dp->disp_max_unbound_pri = ipri;
774*7c478bd9Sstevel@tonic-gate 		}
775*7c478bd9Sstevel@tonic-gate 	} else {
776*7c478bd9Sstevel@tonic-gate 		tp->t_link = NULL;
777*7c478bd9Sstevel@tonic-gate 	}
778*7c478bd9Sstevel@tonic-gate 
779*7c478bd9Sstevel@tonic-gate 	/*
780*7c478bd9Sstevel@tonic-gate 	 * Set TS_DONT_SWAP flag to prevent another processor from swapping
781*7c478bd9Sstevel@tonic-gate 	 * out this thread before we have a chance to run it.
782*7c478bd9Sstevel@tonic-gate 	 * While running, it is protected against swapping by t_lock.
783*7c478bd9Sstevel@tonic-gate 	 */
784*7c478bd9Sstevel@tonic-gate 	tp->t_schedflag |= TS_DONT_SWAP;
785*7c478bd9Sstevel@tonic-gate 	cpup->cpu_dispthread = tp;		/* protected by spl only */
786*7c478bd9Sstevel@tonic-gate 	cpup->cpu_dispatch_pri = pri;
787*7c478bd9Sstevel@tonic-gate 	ASSERT(pri == DISP_PRIO(tp));
788*7c478bd9Sstevel@tonic-gate 	thread_onproc(tp, cpup);  		/* set t_state to TS_ONPROC */
789*7c478bd9Sstevel@tonic-gate 	disp_lock_exit_high(&dp->disp_lock);	/* drop run queue lock */
790*7c478bd9Sstevel@tonic-gate 
791*7c478bd9Sstevel@tonic-gate 	ASSERT(tp != NULL);
792*7c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_DISP, TR_DISP_END,
793*7c478bd9Sstevel@tonic-gate 		"disp_end:tid %p", tp);
794*7c478bd9Sstevel@tonic-gate 
795*7c478bd9Sstevel@tonic-gate 	if (disp_ratify(tp, kpq) == NULL)
796*7c478bd9Sstevel@tonic-gate 		goto reschedule;
797*7c478bd9Sstevel@tonic-gate 
798*7c478bd9Sstevel@tonic-gate 	restore_mstate(tp);
799*7c478bd9Sstevel@tonic-gate 	return (tp);
800*7c478bd9Sstevel@tonic-gate }
801*7c478bd9Sstevel@tonic-gate 
802*7c478bd9Sstevel@tonic-gate /*
803*7c478bd9Sstevel@tonic-gate  * swtch()
804*7c478bd9Sstevel@tonic-gate  *	Find best runnable thread and run it.
805*7c478bd9Sstevel@tonic-gate  *	Called with the current thread already switched to a new state,
806*7c478bd9Sstevel@tonic-gate  *	on a sleep queue, run queue, stopped, and not zombied.
807*7c478bd9Sstevel@tonic-gate  *	May be called at any spl level less than or equal to LOCK_LEVEL.
808*7c478bd9Sstevel@tonic-gate  *	Always drops spl to the base level (spl0()).
809*7c478bd9Sstevel@tonic-gate  */
810*7c478bd9Sstevel@tonic-gate void
811*7c478bd9Sstevel@tonic-gate swtch()
812*7c478bd9Sstevel@tonic-gate {
813*7c478bd9Sstevel@tonic-gate 	kthread_t	*t = curthread;
814*7c478bd9Sstevel@tonic-gate 	kthread_t	*next;
815*7c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
816*7c478bd9Sstevel@tonic-gate 
817*7c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
818*7c478bd9Sstevel@tonic-gate 
819*7c478bd9Sstevel@tonic-gate 	if (t->t_flag & T_INTR_THREAD)
820*7c478bd9Sstevel@tonic-gate 		cpu_intr_swtch_enter(t);
821*7c478bd9Sstevel@tonic-gate 
822*7c478bd9Sstevel@tonic-gate 	if (t->t_intr != NULL) {
823*7c478bd9Sstevel@tonic-gate 		/*
824*7c478bd9Sstevel@tonic-gate 		 * We are an interrupt thread.  Setup and return
825*7c478bd9Sstevel@tonic-gate 		 * the interrupted thread to be resumed.
826*7c478bd9Sstevel@tonic-gate 		 */
827*7c478bd9Sstevel@tonic-gate 		(void) splhigh();	/* block other scheduler action */
828*7c478bd9Sstevel@tonic-gate 		cp = CPU;		/* now protected against migration */
829*7c478bd9Sstevel@tonic-gate 		ASSERT(CPU_ON_INTR(cp) == 0);	/* not called with PIL > 10 */
830*7c478bd9Sstevel@tonic-gate 		CPU_STATS_ADDQ(cp, sys, pswitch, 1);
831*7c478bd9Sstevel@tonic-gate 		CPU_STATS_ADDQ(cp, sys, intrblk, 1);
832*7c478bd9Sstevel@tonic-gate 		next = thread_unpin();
833*7c478bd9Sstevel@tonic-gate 		TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
834*7c478bd9Sstevel@tonic-gate 		resume_from_intr(next);
835*7c478bd9Sstevel@tonic-gate 	} else {
836*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
837*7c478bd9Sstevel@tonic-gate 		if (t->t_state == TS_ONPROC &&
838*7c478bd9Sstevel@tonic-gate 		    t->t_disp_queue->disp_cpu == CPU &&
839*7c478bd9Sstevel@tonic-gate 		    t->t_preempt == 0) {
840*7c478bd9Sstevel@tonic-gate 			thread_lock(t);
841*7c478bd9Sstevel@tonic-gate 			ASSERT(t->t_state != TS_ONPROC ||
842*7c478bd9Sstevel@tonic-gate 			    t->t_disp_queue->disp_cpu != CPU ||
843*7c478bd9Sstevel@tonic-gate 			    t->t_preempt != 0);	/* cannot migrate */
844*7c478bd9Sstevel@tonic-gate 			thread_unlock_nopreempt(t);
845*7c478bd9Sstevel@tonic-gate 		}
846*7c478bd9Sstevel@tonic-gate #endif	/* DEBUG */
847*7c478bd9Sstevel@tonic-gate 		cp = CPU;
848*7c478bd9Sstevel@tonic-gate 		next = disp();		/* returns with spl high */
849*7c478bd9Sstevel@tonic-gate 		ASSERT(CPU_ON_INTR(cp) == 0);	/* not called with PIL > 10 */
850*7c478bd9Sstevel@tonic-gate 
851*7c478bd9Sstevel@tonic-gate 		/* OK to steal anything left on run queue */
852*7c478bd9Sstevel@tonic-gate 		cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL;
853*7c478bd9Sstevel@tonic-gate 
854*7c478bd9Sstevel@tonic-gate 		if (next != t) {
855*7c478bd9Sstevel@tonic-gate 			if (t == cp->cpu_idle_thread) {
856*7c478bd9Sstevel@tonic-gate 				CHIP_NRUNNING(cp->cpu_chip, 1);
857*7c478bd9Sstevel@tonic-gate 			} else if (next == cp->cpu_idle_thread) {
858*7c478bd9Sstevel@tonic-gate 				CHIP_NRUNNING(cp->cpu_chip, -1);
859*7c478bd9Sstevel@tonic-gate 			}
860*7c478bd9Sstevel@tonic-gate 
861*7c478bd9Sstevel@tonic-gate 			CPU_STATS_ADDQ(cp, sys, pswitch, 1);
862*7c478bd9Sstevel@tonic-gate 			cp->cpu_last_swtch = t->t_disp_time = lbolt;
863*7c478bd9Sstevel@tonic-gate 			TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
864*7c478bd9Sstevel@tonic-gate 
865*7c478bd9Sstevel@tonic-gate 			if (dtrace_vtime_active)
866*7c478bd9Sstevel@tonic-gate 				dtrace_vtime_switch(next);
867*7c478bd9Sstevel@tonic-gate 
868*7c478bd9Sstevel@tonic-gate 			resume(next);
869*7c478bd9Sstevel@tonic-gate 			/*
870*7c478bd9Sstevel@tonic-gate 			 * The TR_RESUME_END and TR_SWTCH_END trace points
871*7c478bd9Sstevel@tonic-gate 			 * appear at the end of resume(), because we may not
872*7c478bd9Sstevel@tonic-gate 			 * return here
873*7c478bd9Sstevel@tonic-gate 			 */
874*7c478bd9Sstevel@tonic-gate 		} else {
875*7c478bd9Sstevel@tonic-gate 			if (t->t_flag & T_INTR_THREAD)
876*7c478bd9Sstevel@tonic-gate 				cpu_intr_swtch_exit(t);
877*7c478bd9Sstevel@tonic-gate 
878*7c478bd9Sstevel@tonic-gate 			DTRACE_SCHED(remain__cpu);
879*7c478bd9Sstevel@tonic-gate 			TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end");
880*7c478bd9Sstevel@tonic-gate 			(void) spl0();
881*7c478bd9Sstevel@tonic-gate 		}
882*7c478bd9Sstevel@tonic-gate 	}
883*7c478bd9Sstevel@tonic-gate }
884*7c478bd9Sstevel@tonic-gate 
885*7c478bd9Sstevel@tonic-gate /*
886*7c478bd9Sstevel@tonic-gate  * swtch_from_zombie()
887*7c478bd9Sstevel@tonic-gate  *	Special case of swtch(), which allows checks for TS_ZOMB to be
888*7c478bd9Sstevel@tonic-gate  *	eliminated from normal resume.
889*7c478bd9Sstevel@tonic-gate  *	Find best runnable thread and run it.
890*7c478bd9Sstevel@tonic-gate  *	Called with the current thread zombied.
891*7c478bd9Sstevel@tonic-gate  *	Zombies cannot migrate, so CPU references are safe.
892*7c478bd9Sstevel@tonic-gate  */
893*7c478bd9Sstevel@tonic-gate void
894*7c478bd9Sstevel@tonic-gate swtch_from_zombie()
895*7c478bd9Sstevel@tonic-gate {
896*7c478bd9Sstevel@tonic-gate 	kthread_t	*next;
897*7c478bd9Sstevel@tonic-gate 	cpu_t		*cpu = CPU;
898*7c478bd9Sstevel@tonic-gate 
899*7c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
900*7c478bd9Sstevel@tonic-gate 
901*7c478bd9Sstevel@tonic-gate 	ASSERT(curthread->t_state == TS_ZOMB);
902*7c478bd9Sstevel@tonic-gate 
903*7c478bd9Sstevel@tonic-gate 	next = disp();			/* returns with spl high */
904*7c478bd9Sstevel@tonic-gate 	ASSERT(CPU_ON_INTR(CPU) == 0);	/* not called with PIL > 10 */
905*7c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(CPU, sys, pswitch, 1);
906*7c478bd9Sstevel@tonic-gate 	ASSERT(next != curthread);
907*7c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
908*7c478bd9Sstevel@tonic-gate 
909*7c478bd9Sstevel@tonic-gate 	if (next == cpu->cpu_idle_thread)
910*7c478bd9Sstevel@tonic-gate 		CHIP_NRUNNING(cpu->cpu_chip, -1);
911*7c478bd9Sstevel@tonic-gate 
912*7c478bd9Sstevel@tonic-gate 	if (dtrace_vtime_active)
913*7c478bd9Sstevel@tonic-gate 		dtrace_vtime_switch(next);
914*7c478bd9Sstevel@tonic-gate 
915*7c478bd9Sstevel@tonic-gate 	resume_from_zombie(next);
916*7c478bd9Sstevel@tonic-gate 	/*
917*7c478bd9Sstevel@tonic-gate 	 * The TR_RESUME_END and TR_SWTCH_END trace points
918*7c478bd9Sstevel@tonic-gate 	 * appear at the end of resume(), because we certainly will not
919*7c478bd9Sstevel@tonic-gate 	 * return here
920*7c478bd9Sstevel@tonic-gate 	 */
921*7c478bd9Sstevel@tonic-gate }
922*7c478bd9Sstevel@tonic-gate 
923*7c478bd9Sstevel@tonic-gate #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint))
924*7c478bd9Sstevel@tonic-gate static int
925*7c478bd9Sstevel@tonic-gate thread_on_queue(kthread_t *tp)
926*7c478bd9Sstevel@tonic-gate {
927*7c478bd9Sstevel@tonic-gate 	cpu_t	*cp;
928*7c478bd9Sstevel@tonic-gate 	cpu_t	*self;
929*7c478bd9Sstevel@tonic-gate 	disp_t	*dp;
930*7c478bd9Sstevel@tonic-gate 
931*7c478bd9Sstevel@tonic-gate 	self = CPU;
932*7c478bd9Sstevel@tonic-gate 	cp = self->cpu_next_onln;
933*7c478bd9Sstevel@tonic-gate 	dp = cp->cpu_disp;
934*7c478bd9Sstevel@tonic-gate 	for (;;) {
935*7c478bd9Sstevel@tonic-gate 		dispq_t		*dq;
936*7c478bd9Sstevel@tonic-gate 		dispq_t		*eq;
937*7c478bd9Sstevel@tonic-gate 
938*7c478bd9Sstevel@tonic-gate 		disp_lock_enter_high(&dp->disp_lock);
939*7c478bd9Sstevel@tonic-gate 		for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) {
940*7c478bd9Sstevel@tonic-gate 			kthread_t	*rp;
941*7c478bd9Sstevel@tonic-gate 
942*7c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_last == NULL ||
943*7c478bd9Sstevel@tonic-gate 				dq->dq_last->t_link == NULL);
944*7c478bd9Sstevel@tonic-gate 			for (rp = dq->dq_first; rp; rp = rp->t_link)
945*7c478bd9Sstevel@tonic-gate 				if (tp == rp) {
946*7c478bd9Sstevel@tonic-gate 					disp_lock_exit_high(&dp->disp_lock);
947*7c478bd9Sstevel@tonic-gate 					return (1);
948*7c478bd9Sstevel@tonic-gate 				}
949*7c478bd9Sstevel@tonic-gate 		}
950*7c478bd9Sstevel@tonic-gate 		disp_lock_exit_high(&dp->disp_lock);
951*7c478bd9Sstevel@tonic-gate 		if (cp == NULL)
952*7c478bd9Sstevel@tonic-gate 			break;
953*7c478bd9Sstevel@tonic-gate 		if (cp == self) {
954*7c478bd9Sstevel@tonic-gate 			cp = NULL;
955*7c478bd9Sstevel@tonic-gate 			dp = &cp->cpu_part->cp_kp_queue;
956*7c478bd9Sstevel@tonic-gate 		} else {
957*7c478bd9Sstevel@tonic-gate 			cp = cp->cpu_next_onln;
958*7c478bd9Sstevel@tonic-gate 			dp = cp->cpu_disp;
959*7c478bd9Sstevel@tonic-gate 		}
960*7c478bd9Sstevel@tonic-gate 	}
961*7c478bd9Sstevel@tonic-gate 	return (0);
962*7c478bd9Sstevel@tonic-gate }	/* end of thread_on_queue */
963*7c478bd9Sstevel@tonic-gate #else
964*7c478bd9Sstevel@tonic-gate 
965*7c478bd9Sstevel@tonic-gate #define	thread_on_queue(tp)	0	/* ASSERT must be !thread_on_queue */
966*7c478bd9Sstevel@tonic-gate 
967*7c478bd9Sstevel@tonic-gate #endif  /* DEBUG */
968*7c478bd9Sstevel@tonic-gate 
969*7c478bd9Sstevel@tonic-gate /*
970*7c478bd9Sstevel@tonic-gate  * like swtch(), but switch to a specified thread taken from another CPU.
971*7c478bd9Sstevel@tonic-gate  *	called with spl high..
972*7c478bd9Sstevel@tonic-gate  */
973*7c478bd9Sstevel@tonic-gate void
974*7c478bd9Sstevel@tonic-gate swtch_to(kthread_t *next)
975*7c478bd9Sstevel@tonic-gate {
976*7c478bd9Sstevel@tonic-gate 	cpu_t			*cp = CPU;
977*7c478bd9Sstevel@tonic-gate 
978*7c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
979*7c478bd9Sstevel@tonic-gate 
980*7c478bd9Sstevel@tonic-gate 	/*
981*7c478bd9Sstevel@tonic-gate 	 * Update context switch statistics.
982*7c478bd9Sstevel@tonic-gate 	 */
983*7c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, pswitch, 1);
984*7c478bd9Sstevel@tonic-gate 
985*7c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
986*7c478bd9Sstevel@tonic-gate 
987*7c478bd9Sstevel@tonic-gate 	if (curthread == cp->cpu_idle_thread)
988*7c478bd9Sstevel@tonic-gate 		CHIP_NRUNNING(cp->cpu_chip, 1);
989*7c478bd9Sstevel@tonic-gate 
990*7c478bd9Sstevel@tonic-gate 	/* OK to steal anything left on run queue */
991*7c478bd9Sstevel@tonic-gate 	cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL;
992*7c478bd9Sstevel@tonic-gate 
993*7c478bd9Sstevel@tonic-gate 	/* record last execution time */
994*7c478bd9Sstevel@tonic-gate 	cp->cpu_last_swtch = curthread->t_disp_time = lbolt;
995*7c478bd9Sstevel@tonic-gate 
996*7c478bd9Sstevel@tonic-gate 	if (dtrace_vtime_active)
997*7c478bd9Sstevel@tonic-gate 		dtrace_vtime_switch(next);
998*7c478bd9Sstevel@tonic-gate 
999*7c478bd9Sstevel@tonic-gate 	resume(next);
1000*7c478bd9Sstevel@tonic-gate 	/*
1001*7c478bd9Sstevel@tonic-gate 	 * The TR_RESUME_END and TR_SWTCH_END trace points
1002*7c478bd9Sstevel@tonic-gate 	 * appear at the end of resume(), because we may not
1003*7c478bd9Sstevel@tonic-gate 	 * return here
1004*7c478bd9Sstevel@tonic-gate 	 */
1005*7c478bd9Sstevel@tonic-gate }
1006*7c478bd9Sstevel@tonic-gate 
1007*7c478bd9Sstevel@tonic-gate 
1008*7c478bd9Sstevel@tonic-gate 
1009*7c478bd9Sstevel@tonic-gate #define	CPU_IDLING(pri)	((pri) == -1)
1010*7c478bd9Sstevel@tonic-gate 
1011*7c478bd9Sstevel@tonic-gate static void
1012*7c478bd9Sstevel@tonic-gate cpu_resched(cpu_t *cp, pri_t tpri)
1013*7c478bd9Sstevel@tonic-gate {
1014*7c478bd9Sstevel@tonic-gate 	int	call_poke_cpu = 0;
1015*7c478bd9Sstevel@tonic-gate 	pri_t   cpupri = cp->cpu_dispatch_pri;
1016*7c478bd9Sstevel@tonic-gate 
1017*7c478bd9Sstevel@tonic-gate 	if (!CPU_IDLING(cpupri) && (cpupri < tpri)) {
1018*7c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED,
1019*7c478bd9Sstevel@tonic-gate 		    "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri);
1020*7c478bd9Sstevel@tonic-gate 		if (tpri >= upreemptpri && cp->cpu_runrun == 0) {
1021*7c478bd9Sstevel@tonic-gate 			cp->cpu_runrun = 1;
1022*7c478bd9Sstevel@tonic-gate 			aston(cp->cpu_dispthread);
1023*7c478bd9Sstevel@tonic-gate 			if (tpri < kpreemptpri && cp != CPU)
1024*7c478bd9Sstevel@tonic-gate 				call_poke_cpu = 1;
1025*7c478bd9Sstevel@tonic-gate 		}
1026*7c478bd9Sstevel@tonic-gate 		if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) {
1027*7c478bd9Sstevel@tonic-gate 			cp->cpu_kprunrun = 1;
1028*7c478bd9Sstevel@tonic-gate 			if (cp != CPU)
1029*7c478bd9Sstevel@tonic-gate 				call_poke_cpu = 1;
1030*7c478bd9Sstevel@tonic-gate 		}
1031*7c478bd9Sstevel@tonic-gate 	}
1032*7c478bd9Sstevel@tonic-gate 
1033*7c478bd9Sstevel@tonic-gate 	/*
1034*7c478bd9Sstevel@tonic-gate 	 * Propagate cpu_runrun, and cpu_kprunrun to global visibility.
1035*7c478bd9Sstevel@tonic-gate 	 */
1036*7c478bd9Sstevel@tonic-gate 	membar_enter();
1037*7c478bd9Sstevel@tonic-gate 
1038*7c478bd9Sstevel@tonic-gate 	if (call_poke_cpu)
1039*7c478bd9Sstevel@tonic-gate 		poke_cpu(cp->cpu_id);
1040*7c478bd9Sstevel@tonic-gate }
1041*7c478bd9Sstevel@tonic-gate 
1042*7c478bd9Sstevel@tonic-gate /*
1043*7c478bd9Sstevel@tonic-gate  * Routine used by setbackdq() to balance load across the physical
1044*7c478bd9Sstevel@tonic-gate  * processors. Returns a CPU of a lesser loaded chip in the lgroup
1045*7c478bd9Sstevel@tonic-gate  * if balancing is necessary, or the "hint" CPU if it's not.
1046*7c478bd9Sstevel@tonic-gate  *
1047*7c478bd9Sstevel@tonic-gate  * - tp is the thread being enqueued
1048*7c478bd9Sstevel@tonic-gate  * - cp is a hint CPU (chosen by cpu_choose()).
1049*7c478bd9Sstevel@tonic-gate  * - curchip (if not NULL) is the chip on which the current thread
1050*7c478bd9Sstevel@tonic-gate  *   is running.
1051*7c478bd9Sstevel@tonic-gate  *
1052*7c478bd9Sstevel@tonic-gate  * The thread lock for "tp" must be held while calling this routine.
1053*7c478bd9Sstevel@tonic-gate  */
1054*7c478bd9Sstevel@tonic-gate static cpu_t *
1055*7c478bd9Sstevel@tonic-gate chip_balance(kthread_t *tp, cpu_t *cp, chip_t *curchip)
1056*7c478bd9Sstevel@tonic-gate {
1057*7c478bd9Sstevel@tonic-gate 	int	chp_nrun, ochp_nrun;
1058*7c478bd9Sstevel@tonic-gate 	chip_t	*chp, *nchp;
1059*7c478bd9Sstevel@tonic-gate 
1060*7c478bd9Sstevel@tonic-gate 	chp = cp->cpu_chip;
1061*7c478bd9Sstevel@tonic-gate 	chp_nrun = chp->chip_nrunning;
1062*7c478bd9Sstevel@tonic-gate 
1063*7c478bd9Sstevel@tonic-gate 	if (chp == curchip)
1064*7c478bd9Sstevel@tonic-gate 		chp_nrun--;	/* Ignore curthread */
1065*7c478bd9Sstevel@tonic-gate 
1066*7c478bd9Sstevel@tonic-gate 	/*
1067*7c478bd9Sstevel@tonic-gate 	 * If this chip isn't at all idle, then let
1068*7c478bd9Sstevel@tonic-gate 	 * run queue balancing do the work.
1069*7c478bd9Sstevel@tonic-gate 	 */
1070*7c478bd9Sstevel@tonic-gate 	if (chp_nrun == chp->chip_ncpu)
1071*7c478bd9Sstevel@tonic-gate 		return (cp);
1072*7c478bd9Sstevel@tonic-gate 
1073*7c478bd9Sstevel@tonic-gate 	nchp = chp->chip_balance;
1074*7c478bd9Sstevel@tonic-gate 	do {
1075*7c478bd9Sstevel@tonic-gate 		if (nchp == chp ||
1076*7c478bd9Sstevel@tonic-gate 		    !CHIP_IN_CPUPART(nchp, tp->t_cpupart))
1077*7c478bd9Sstevel@tonic-gate 			continue;
1078*7c478bd9Sstevel@tonic-gate 
1079*7c478bd9Sstevel@tonic-gate 		ochp_nrun = nchp->chip_nrunning;
1080*7c478bd9Sstevel@tonic-gate 
1081*7c478bd9Sstevel@tonic-gate 		/*
1082*7c478bd9Sstevel@tonic-gate 		 * If the other chip is running less threads,
1083*7c478bd9Sstevel@tonic-gate 		 * or if it's running the same number of threads, but
1084*7c478bd9Sstevel@tonic-gate 		 * has more online logical CPUs, then choose to balance.
1085*7c478bd9Sstevel@tonic-gate 		 */
1086*7c478bd9Sstevel@tonic-gate 		if (chp_nrun > ochp_nrun ||
1087*7c478bd9Sstevel@tonic-gate 		    (chp_nrun == ochp_nrun &&
1088*7c478bd9Sstevel@tonic-gate 		    nchp->chip_ncpu > chp->chip_ncpu)) {
1089*7c478bd9Sstevel@tonic-gate 			cp = nchp->chip_cpus;
1090*7c478bd9Sstevel@tonic-gate 			nchp->chip_cpus = cp->cpu_next_chip;
1091*7c478bd9Sstevel@tonic-gate 
1092*7c478bd9Sstevel@tonic-gate 			/*
1093*7c478bd9Sstevel@tonic-gate 			 * Find a CPU on the chip in the correct
1094*7c478bd9Sstevel@tonic-gate 			 * partition. We know at least one exists
1095*7c478bd9Sstevel@tonic-gate 			 * because of the CHIP_IN_CPUPART() check above.
1096*7c478bd9Sstevel@tonic-gate 			 */
1097*7c478bd9Sstevel@tonic-gate 			while (cp->cpu_part != tp->t_cpupart)
1098*7c478bd9Sstevel@tonic-gate 				cp = cp->cpu_next_chip;
1099*7c478bd9Sstevel@tonic-gate 		}
1100*7c478bd9Sstevel@tonic-gate 		chp->chip_balance = nchp->chip_next_lgrp;
1101*7c478bd9Sstevel@tonic-gate 		break;
1102*7c478bd9Sstevel@tonic-gate 	} while ((nchp = nchp->chip_next_lgrp) != chp->chip_balance);
1103*7c478bd9Sstevel@tonic-gate 
1104*7c478bd9Sstevel@tonic-gate 	ASSERT(CHIP_IN_CPUPART(cp->cpu_chip, tp->t_cpupart));
1105*7c478bd9Sstevel@tonic-gate 	return (cp);
1106*7c478bd9Sstevel@tonic-gate }
1107*7c478bd9Sstevel@tonic-gate 
1108*7c478bd9Sstevel@tonic-gate /*
1109*7c478bd9Sstevel@tonic-gate  * setbackdq() keeps runqs balanced such that the difference in length
1110*7c478bd9Sstevel@tonic-gate  * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF.
1111*7c478bd9Sstevel@tonic-gate  * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths
1112*7c478bd9Sstevel@tonic-gate  * must match.  When per-thread TS_RUNQMATCH flag is set, setbackdq() will
1113*7c478bd9Sstevel@tonic-gate  * try to keep runqs perfectly balanced regardless of the thread priority.
1114*7c478bd9Sstevel@tonic-gate  */
1115*7c478bd9Sstevel@tonic-gate #define	RUNQ_MATCH_PRI	16	/* pri below which queue lengths must match */
1116*7c478bd9Sstevel@tonic-gate #define	RUNQ_MAX_DIFF	2	/* maximum runq length difference */
1117*7c478bd9Sstevel@tonic-gate #define	RUNQ_LEN(cp, pri)	((cp)->cpu_disp->disp_q[pri].dq_sruncnt)
1118*7c478bd9Sstevel@tonic-gate 
1119*7c478bd9Sstevel@tonic-gate /*
1120*7c478bd9Sstevel@tonic-gate  * Put the specified thread on the back of the dispatcher
1121*7c478bd9Sstevel@tonic-gate  * queue corresponding to its current priority.
1122*7c478bd9Sstevel@tonic-gate  *
1123*7c478bd9Sstevel@tonic-gate  * Called with the thread in transition, onproc or stopped state
1124*7c478bd9Sstevel@tonic-gate  * and locked (transition implies locked) and at high spl.
1125*7c478bd9Sstevel@tonic-gate  * Returns with the thread in TS_RUN state and still locked.
1126*7c478bd9Sstevel@tonic-gate  */
1127*7c478bd9Sstevel@tonic-gate void
1128*7c478bd9Sstevel@tonic-gate setbackdq(kthread_t *tp)
1129*7c478bd9Sstevel@tonic-gate {
1130*7c478bd9Sstevel@tonic-gate 	dispq_t	*dq;
1131*7c478bd9Sstevel@tonic-gate 	disp_t		*dp;
1132*7c478bd9Sstevel@tonic-gate 	chip_t		*curchip = NULL;
1133*7c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
1134*7c478bd9Sstevel@tonic-gate 	pri_t		tpri;
1135*7c478bd9Sstevel@tonic-gate 	int		bound;
1136*7c478bd9Sstevel@tonic-gate 
1137*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
1138*7c478bd9Sstevel@tonic-gate 	ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1139*7c478bd9Sstevel@tonic-gate 
1140*7c478bd9Sstevel@tonic-gate 	if (tp->t_waitrq == 0) {
1141*7c478bd9Sstevel@tonic-gate 		hrtime_t curtime;
1142*7c478bd9Sstevel@tonic-gate 
1143*7c478bd9Sstevel@tonic-gate 		curtime = gethrtime_unscaled();
1144*7c478bd9Sstevel@tonic-gate 		(void) cpu_update_pct(tp, curtime);
1145*7c478bd9Sstevel@tonic-gate 		tp->t_waitrq = curtime;
1146*7c478bd9Sstevel@tonic-gate 	} else {
1147*7c478bd9Sstevel@tonic-gate 		(void) cpu_update_pct(tp, gethrtime_unscaled());
1148*7c478bd9Sstevel@tonic-gate 	}
1149*7c478bd9Sstevel@tonic-gate 
1150*7c478bd9Sstevel@tonic-gate 	ASSERT(!thread_on_queue(tp));	/* make sure tp isn't on a runq */
1151*7c478bd9Sstevel@tonic-gate 
1152*7c478bd9Sstevel@tonic-gate 	/*
1153*7c478bd9Sstevel@tonic-gate 	 * If thread is "swapped" or on the swap queue don't
1154*7c478bd9Sstevel@tonic-gate 	 * queue it, but wake sched.
1155*7c478bd9Sstevel@tonic-gate 	 */
1156*7c478bd9Sstevel@tonic-gate 	if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1157*7c478bd9Sstevel@tonic-gate 		disp_swapped_setrun(tp);
1158*7c478bd9Sstevel@tonic-gate 		return;
1159*7c478bd9Sstevel@tonic-gate 	}
1160*7c478bd9Sstevel@tonic-gate 
1161*7c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
1162*7c478bd9Sstevel@tonic-gate 	if (tp == curthread) {
1163*7c478bd9Sstevel@tonic-gate 		curchip = CPU->cpu_chip;
1164*7c478bd9Sstevel@tonic-gate 	}
1165*7c478bd9Sstevel@tonic-gate 
1166*7c478bd9Sstevel@tonic-gate 	if (ncpus == 1)
1167*7c478bd9Sstevel@tonic-gate 		cp = tp->t_cpu;
1168*7c478bd9Sstevel@tonic-gate 	else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) {
1169*7c478bd9Sstevel@tonic-gate 		if (tpri >= kpqpri) {
1170*7c478bd9Sstevel@tonic-gate 			setkpdq(tp, SETKP_BACK);
1171*7c478bd9Sstevel@tonic-gate 			return;
1172*7c478bd9Sstevel@tonic-gate 		}
1173*7c478bd9Sstevel@tonic-gate 		/*
1174*7c478bd9Sstevel@tonic-gate 		 * Let cpu_choose suggest a CPU.
1175*7c478bd9Sstevel@tonic-gate 		 */
1176*7c478bd9Sstevel@tonic-gate 		cp = cpu_choose(tp, tpri);
1177*7c478bd9Sstevel@tonic-gate 
1178*7c478bd9Sstevel@tonic-gate 		if (tp->t_cpupart == cp->cpu_part) {
1179*7c478bd9Sstevel@tonic-gate 			int	qlen;
1180*7c478bd9Sstevel@tonic-gate 
1181*7c478bd9Sstevel@tonic-gate 			/*
1182*7c478bd9Sstevel@tonic-gate 			 * Select another CPU if we need
1183*7c478bd9Sstevel@tonic-gate 			 * to do some load balancing across the
1184*7c478bd9Sstevel@tonic-gate 			 * physical processors.
1185*7c478bd9Sstevel@tonic-gate 			 */
1186*7c478bd9Sstevel@tonic-gate 			if (CHIP_SHOULD_BALANCE(cp->cpu_chip))
1187*7c478bd9Sstevel@tonic-gate 				cp = chip_balance(tp, cp, curchip);
1188*7c478bd9Sstevel@tonic-gate 
1189*7c478bd9Sstevel@tonic-gate 			/*
1190*7c478bd9Sstevel@tonic-gate 			 * Balance across the run queues
1191*7c478bd9Sstevel@tonic-gate 			 */
1192*7c478bd9Sstevel@tonic-gate 			qlen = RUNQ_LEN(cp, tpri);
1193*7c478bd9Sstevel@tonic-gate 			if (tpri >= RUNQ_MATCH_PRI &&
1194*7c478bd9Sstevel@tonic-gate 			    !(tp->t_schedflag & TS_RUNQMATCH))
1195*7c478bd9Sstevel@tonic-gate 				qlen -= RUNQ_MAX_DIFF;
1196*7c478bd9Sstevel@tonic-gate 			if (qlen > 0) {
1197*7c478bd9Sstevel@tonic-gate 				cpu_t	*np;
1198*7c478bd9Sstevel@tonic-gate 
1199*7c478bd9Sstevel@tonic-gate 				if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID)
1200*7c478bd9Sstevel@tonic-gate 					np = cp->cpu_next_part;
1201*7c478bd9Sstevel@tonic-gate 				else {
1202*7c478bd9Sstevel@tonic-gate 					if ((np = cp->cpu_next_lpl) == cp)
1203*7c478bd9Sstevel@tonic-gate 						np = cp->cpu_next_part;
1204*7c478bd9Sstevel@tonic-gate 				}
1205*7c478bd9Sstevel@tonic-gate 				if (RUNQ_LEN(np, tpri) < qlen)
1206*7c478bd9Sstevel@tonic-gate 					cp = np;
1207*7c478bd9Sstevel@tonic-gate 			}
1208*7c478bd9Sstevel@tonic-gate 		} else {
1209*7c478bd9Sstevel@tonic-gate 			/*
1210*7c478bd9Sstevel@tonic-gate 			 * Migrate to a cpu in the new partition.
1211*7c478bd9Sstevel@tonic-gate 			 */
1212*7c478bd9Sstevel@tonic-gate 			cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1213*7c478bd9Sstevel@tonic-gate 			    tp->t_lpl, tp->t_pri, NULL);
1214*7c478bd9Sstevel@tonic-gate 		}
1215*7c478bd9Sstevel@tonic-gate 		bound = 0;
1216*7c478bd9Sstevel@tonic-gate 		ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1217*7c478bd9Sstevel@tonic-gate 	} else {
1218*7c478bd9Sstevel@tonic-gate 		/*
1219*7c478bd9Sstevel@tonic-gate 		 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1220*7c478bd9Sstevel@tonic-gate 		 * a short time until weak binding that existed when the
1221*7c478bd9Sstevel@tonic-gate 		 * strong binding was established has dropped) so we must
1222*7c478bd9Sstevel@tonic-gate 		 * favour weak binding over strong.
1223*7c478bd9Sstevel@tonic-gate 		 */
1224*7c478bd9Sstevel@tonic-gate 		cp = tp->t_weakbound_cpu ?
1225*7c478bd9Sstevel@tonic-gate 		    tp->t_weakbound_cpu : tp->t_bound_cpu;
1226*7c478bd9Sstevel@tonic-gate 		bound = 1;
1227*7c478bd9Sstevel@tonic-gate 	}
1228*7c478bd9Sstevel@tonic-gate 	dp = cp->cpu_disp;
1229*7c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&dp->disp_lock);
1230*7c478bd9Sstevel@tonic-gate 
1231*7c478bd9Sstevel@tonic-gate 	DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0);
1232*7c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
1233*7c478bd9Sstevel@tonic-gate 		tpri, cp, tp);
1234*7c478bd9Sstevel@tonic-gate 
1235*7c478bd9Sstevel@tonic-gate #ifndef NPROBE
1236*7c478bd9Sstevel@tonic-gate 	/* Kernel probe */
1237*7c478bd9Sstevel@tonic-gate 	if (tnf_tracing_active)
1238*7c478bd9Sstevel@tonic-gate 		tnf_thread_queue(tp, cp, tpri);
1239*7c478bd9Sstevel@tonic-gate #endif /* NPROBE */
1240*7c478bd9Sstevel@tonic-gate 
1241*7c478bd9Sstevel@tonic-gate 	ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1242*7c478bd9Sstevel@tonic-gate 
1243*7c478bd9Sstevel@tonic-gate 	THREAD_RUN(tp, &dp->disp_lock);		/* set t_state to TS_RUN */
1244*7c478bd9Sstevel@tonic-gate 	tp->t_disp_queue = dp;
1245*7c478bd9Sstevel@tonic-gate 	tp->t_link = NULL;
1246*7c478bd9Sstevel@tonic-gate 
1247*7c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[tpri];
1248*7c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable++;
1249*7c478bd9Sstevel@tonic-gate 	membar_enter();
1250*7c478bd9Sstevel@tonic-gate 
1251*7c478bd9Sstevel@tonic-gate 	if (dq->dq_sruncnt++ != 0) {
1252*7c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_first != NULL);
1253*7c478bd9Sstevel@tonic-gate 		dq->dq_last->t_link = tp;
1254*7c478bd9Sstevel@tonic-gate 		dq->dq_last = tp;
1255*7c478bd9Sstevel@tonic-gate 	} else {
1256*7c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_first == NULL);
1257*7c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_last == NULL);
1258*7c478bd9Sstevel@tonic-gate 		dq->dq_first = dq->dq_last = tp;
1259*7c478bd9Sstevel@tonic-gate 		BT_SET(dp->disp_qactmap, tpri);
1260*7c478bd9Sstevel@tonic-gate 		if (tpri > dp->disp_maxrunpri) {
1261*7c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = tpri;
1262*7c478bd9Sstevel@tonic-gate 			membar_enter();
1263*7c478bd9Sstevel@tonic-gate 			cpu_resched(cp, tpri);
1264*7c478bd9Sstevel@tonic-gate 		}
1265*7c478bd9Sstevel@tonic-gate 	}
1266*7c478bd9Sstevel@tonic-gate 
1267*7c478bd9Sstevel@tonic-gate 	if (!bound && tpri > dp->disp_max_unbound_pri) {
1268*7c478bd9Sstevel@tonic-gate 		if (tp == curthread && dp->disp_max_unbound_pri == -1 &&
1269*7c478bd9Sstevel@tonic-gate 		    cp == CPU) {
1270*7c478bd9Sstevel@tonic-gate 			/*
1271*7c478bd9Sstevel@tonic-gate 			 * If there are no other unbound threads on the
1272*7c478bd9Sstevel@tonic-gate 			 * run queue, don't allow other CPUs to steal
1273*7c478bd9Sstevel@tonic-gate 			 * this thread while we are in the middle of a
1274*7c478bd9Sstevel@tonic-gate 			 * context switch. We may just switch to it
1275*7c478bd9Sstevel@tonic-gate 			 * again right away. CPU_DISP_DONTSTEAL is cleared
1276*7c478bd9Sstevel@tonic-gate 			 * in swtch and swtch_to.
1277*7c478bd9Sstevel@tonic-gate 			 */
1278*7c478bd9Sstevel@tonic-gate 			cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1279*7c478bd9Sstevel@tonic-gate 		}
1280*7c478bd9Sstevel@tonic-gate 		dp->disp_max_unbound_pri = tpri;
1281*7c478bd9Sstevel@tonic-gate 	}
1282*7c478bd9Sstevel@tonic-gate 	(*disp_enq_thread)(cp, bound);
1283*7c478bd9Sstevel@tonic-gate }
1284*7c478bd9Sstevel@tonic-gate 
1285*7c478bd9Sstevel@tonic-gate /*
1286*7c478bd9Sstevel@tonic-gate  * Put the specified thread on the front of the dispatcher
1287*7c478bd9Sstevel@tonic-gate  * queue corresponding to its current priority.
1288*7c478bd9Sstevel@tonic-gate  *
1289*7c478bd9Sstevel@tonic-gate  * Called with the thread in transition, onproc or stopped state
1290*7c478bd9Sstevel@tonic-gate  * and locked (transition implies locked) and at high spl.
1291*7c478bd9Sstevel@tonic-gate  * Returns with the thread in TS_RUN state and still locked.
1292*7c478bd9Sstevel@tonic-gate  */
1293*7c478bd9Sstevel@tonic-gate void
1294*7c478bd9Sstevel@tonic-gate setfrontdq(kthread_t *tp)
1295*7c478bd9Sstevel@tonic-gate {
1296*7c478bd9Sstevel@tonic-gate 	disp_t		*dp;
1297*7c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
1298*7c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
1299*7c478bd9Sstevel@tonic-gate 	pri_t		tpri;
1300*7c478bd9Sstevel@tonic-gate 	int		bound;
1301*7c478bd9Sstevel@tonic-gate 
1302*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
1303*7c478bd9Sstevel@tonic-gate 	ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
1304*7c478bd9Sstevel@tonic-gate 
1305*7c478bd9Sstevel@tonic-gate 	if (tp->t_waitrq == 0) {
1306*7c478bd9Sstevel@tonic-gate 		hrtime_t curtime;
1307*7c478bd9Sstevel@tonic-gate 
1308*7c478bd9Sstevel@tonic-gate 		curtime = gethrtime_unscaled();
1309*7c478bd9Sstevel@tonic-gate 		(void) cpu_update_pct(tp, curtime);
1310*7c478bd9Sstevel@tonic-gate 		tp->t_waitrq = curtime;
1311*7c478bd9Sstevel@tonic-gate 	} else {
1312*7c478bd9Sstevel@tonic-gate 		(void) cpu_update_pct(tp, gethrtime_unscaled());
1313*7c478bd9Sstevel@tonic-gate 	}
1314*7c478bd9Sstevel@tonic-gate 
1315*7c478bd9Sstevel@tonic-gate 	ASSERT(!thread_on_queue(tp));	/* make sure tp isn't on a runq */
1316*7c478bd9Sstevel@tonic-gate 
1317*7c478bd9Sstevel@tonic-gate 	/*
1318*7c478bd9Sstevel@tonic-gate 	 * If thread is "swapped" or on the swap queue don't
1319*7c478bd9Sstevel@tonic-gate 	 * queue it, but wake sched.
1320*7c478bd9Sstevel@tonic-gate 	 */
1321*7c478bd9Sstevel@tonic-gate 	if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
1322*7c478bd9Sstevel@tonic-gate 		disp_swapped_setrun(tp);
1323*7c478bd9Sstevel@tonic-gate 		return;
1324*7c478bd9Sstevel@tonic-gate 	}
1325*7c478bd9Sstevel@tonic-gate 
1326*7c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
1327*7c478bd9Sstevel@tonic-gate 	if (ncpus == 1)
1328*7c478bd9Sstevel@tonic-gate 		cp = tp->t_cpu;
1329*7c478bd9Sstevel@tonic-gate 	else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) {
1330*7c478bd9Sstevel@tonic-gate 		if (tpri >= kpqpri) {
1331*7c478bd9Sstevel@tonic-gate 			setkpdq(tp, SETKP_FRONT);
1332*7c478bd9Sstevel@tonic-gate 			return;
1333*7c478bd9Sstevel@tonic-gate 		}
1334*7c478bd9Sstevel@tonic-gate 		cp = tp->t_cpu;
1335*7c478bd9Sstevel@tonic-gate 		if (tp->t_cpupart == cp->cpu_part) {
1336*7c478bd9Sstevel@tonic-gate 			/*
1337*7c478bd9Sstevel@tonic-gate 			 * If we are of higher or equal priority than
1338*7c478bd9Sstevel@tonic-gate 			 * the highest priority runnable thread of
1339*7c478bd9Sstevel@tonic-gate 			 * the current CPU, just pick this CPU.  Otherwise
1340*7c478bd9Sstevel@tonic-gate 			 * Let cpu_choose() select the CPU.  If this cpu
1341*7c478bd9Sstevel@tonic-gate 			 * is the target of an offline request then do not
1342*7c478bd9Sstevel@tonic-gate 			 * pick it - a thread_nomigrate() on the in motion
1343*7c478bd9Sstevel@tonic-gate 			 * cpu relies on this when it forces a preempt.
1344*7c478bd9Sstevel@tonic-gate 			 */
1345*7c478bd9Sstevel@tonic-gate 			if (tpri < cp->cpu_disp->disp_maxrunpri ||
1346*7c478bd9Sstevel@tonic-gate 			    cp == cpu_inmotion)
1347*7c478bd9Sstevel@tonic-gate 				cp = cpu_choose(tp, tpri);
1348*7c478bd9Sstevel@tonic-gate 		} else {
1349*7c478bd9Sstevel@tonic-gate 			/*
1350*7c478bd9Sstevel@tonic-gate 			 * Migrate to a cpu in the new partition.
1351*7c478bd9Sstevel@tonic-gate 			 */
1352*7c478bd9Sstevel@tonic-gate 			cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
1353*7c478bd9Sstevel@tonic-gate 			    tp->t_lpl, tp->t_pri, NULL);
1354*7c478bd9Sstevel@tonic-gate 		}
1355*7c478bd9Sstevel@tonic-gate 		bound = 0;
1356*7c478bd9Sstevel@tonic-gate 		ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1357*7c478bd9Sstevel@tonic-gate 	} else {
1358*7c478bd9Sstevel@tonic-gate 		/*
1359*7c478bd9Sstevel@tonic-gate 		 * It is possible that t_weakbound_cpu != t_bound_cpu (for
1360*7c478bd9Sstevel@tonic-gate 		 * a short time until weak binding that existed when the
1361*7c478bd9Sstevel@tonic-gate 		 * strong binding was established has dropped) so we must
1362*7c478bd9Sstevel@tonic-gate 		 * favour weak binding over strong.
1363*7c478bd9Sstevel@tonic-gate 		 */
1364*7c478bd9Sstevel@tonic-gate 		cp = tp->t_weakbound_cpu ?
1365*7c478bd9Sstevel@tonic-gate 		    tp->t_weakbound_cpu : tp->t_bound_cpu;
1366*7c478bd9Sstevel@tonic-gate 		bound = 1;
1367*7c478bd9Sstevel@tonic-gate 	}
1368*7c478bd9Sstevel@tonic-gate 	dp = cp->cpu_disp;
1369*7c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&dp->disp_lock);
1370*7c478bd9Sstevel@tonic-gate 
1371*7c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1372*7c478bd9Sstevel@tonic-gate 	DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1);
1373*7c478bd9Sstevel@tonic-gate 
1374*7c478bd9Sstevel@tonic-gate #ifndef NPROBE
1375*7c478bd9Sstevel@tonic-gate 	/* Kernel probe */
1376*7c478bd9Sstevel@tonic-gate 	if (tnf_tracing_active)
1377*7c478bd9Sstevel@tonic-gate 		tnf_thread_queue(tp, cp, tpri);
1378*7c478bd9Sstevel@tonic-gate #endif /* NPROBE */
1379*7c478bd9Sstevel@tonic-gate 
1380*7c478bd9Sstevel@tonic-gate 	ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1381*7c478bd9Sstevel@tonic-gate 
1382*7c478bd9Sstevel@tonic-gate 	THREAD_RUN(tp, &dp->disp_lock);		/* set TS_RUN state and lock */
1383*7c478bd9Sstevel@tonic-gate 	tp->t_disp_queue = dp;
1384*7c478bd9Sstevel@tonic-gate 
1385*7c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[tpri];
1386*7c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable++;
1387*7c478bd9Sstevel@tonic-gate 	membar_enter();
1388*7c478bd9Sstevel@tonic-gate 
1389*7c478bd9Sstevel@tonic-gate 	if (dq->dq_sruncnt++ != 0) {
1390*7c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_last != NULL);
1391*7c478bd9Sstevel@tonic-gate 		tp->t_link = dq->dq_first;
1392*7c478bd9Sstevel@tonic-gate 		dq->dq_first = tp;
1393*7c478bd9Sstevel@tonic-gate 	} else {
1394*7c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_last == NULL);
1395*7c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_first == NULL);
1396*7c478bd9Sstevel@tonic-gate 		tp->t_link = NULL;
1397*7c478bd9Sstevel@tonic-gate 		dq->dq_first = dq->dq_last = tp;
1398*7c478bd9Sstevel@tonic-gate 		BT_SET(dp->disp_qactmap, tpri);
1399*7c478bd9Sstevel@tonic-gate 		if (tpri > dp->disp_maxrunpri) {
1400*7c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = tpri;
1401*7c478bd9Sstevel@tonic-gate 			membar_enter();
1402*7c478bd9Sstevel@tonic-gate 			cpu_resched(cp, tpri);
1403*7c478bd9Sstevel@tonic-gate 		}
1404*7c478bd9Sstevel@tonic-gate 	}
1405*7c478bd9Sstevel@tonic-gate 
1406*7c478bd9Sstevel@tonic-gate 	if (!bound && tpri > dp->disp_max_unbound_pri) {
1407*7c478bd9Sstevel@tonic-gate 		if (tp == curthread && dp->disp_max_unbound_pri == -1 &&
1408*7c478bd9Sstevel@tonic-gate 		    cp == CPU) {
1409*7c478bd9Sstevel@tonic-gate 			/*
1410*7c478bd9Sstevel@tonic-gate 			 * If there are no other unbound threads on the
1411*7c478bd9Sstevel@tonic-gate 			 * run queue, don't allow other CPUs to steal
1412*7c478bd9Sstevel@tonic-gate 			 * this thread while we are in the middle of a
1413*7c478bd9Sstevel@tonic-gate 			 * context switch. We may just switch to it
1414*7c478bd9Sstevel@tonic-gate 			 * again right away. CPU_DISP_DONTSTEAL is cleared
1415*7c478bd9Sstevel@tonic-gate 			 * in swtch and swtch_to.
1416*7c478bd9Sstevel@tonic-gate 			 */
1417*7c478bd9Sstevel@tonic-gate 			cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
1418*7c478bd9Sstevel@tonic-gate 		}
1419*7c478bd9Sstevel@tonic-gate 		dp->disp_max_unbound_pri = tpri;
1420*7c478bd9Sstevel@tonic-gate 	}
1421*7c478bd9Sstevel@tonic-gate 	(*disp_enq_thread)(cp, bound);
1422*7c478bd9Sstevel@tonic-gate }
1423*7c478bd9Sstevel@tonic-gate 
1424*7c478bd9Sstevel@tonic-gate /*
1425*7c478bd9Sstevel@tonic-gate  * Put a high-priority unbound thread on the kp queue
1426*7c478bd9Sstevel@tonic-gate  */
1427*7c478bd9Sstevel@tonic-gate static void
1428*7c478bd9Sstevel@tonic-gate setkpdq(kthread_t *tp, int borf)
1429*7c478bd9Sstevel@tonic-gate {
1430*7c478bd9Sstevel@tonic-gate 	dispq_t	*dq;
1431*7c478bd9Sstevel@tonic-gate 	disp_t	*dp;
1432*7c478bd9Sstevel@tonic-gate 	cpu_t	*cp;
1433*7c478bd9Sstevel@tonic-gate 	pri_t	tpri;
1434*7c478bd9Sstevel@tonic-gate 
1435*7c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
1436*7c478bd9Sstevel@tonic-gate 
1437*7c478bd9Sstevel@tonic-gate 	dp = &tp->t_cpupart->cp_kp_queue;
1438*7c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&dp->disp_lock);
1439*7c478bd9Sstevel@tonic-gate 
1440*7c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
1441*7c478bd9Sstevel@tonic-gate 
1442*7c478bd9Sstevel@tonic-gate 	ASSERT(tpri >= 0 && tpri < dp->disp_npri);
1443*7c478bd9Sstevel@tonic-gate 	DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf);
1444*7c478bd9Sstevel@tonic-gate 	THREAD_RUN(tp, &dp->disp_lock);		/* set t_state to TS_RUN */
1445*7c478bd9Sstevel@tonic-gate 	tp->t_disp_queue = dp;
1446*7c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable++;
1447*7c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[tpri];
1448*7c478bd9Sstevel@tonic-gate 
1449*7c478bd9Sstevel@tonic-gate 	if (dq->dq_sruncnt++ != 0) {
1450*7c478bd9Sstevel@tonic-gate 		if (borf == SETKP_BACK) {
1451*7c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_first != NULL);
1452*7c478bd9Sstevel@tonic-gate 			tp->t_link = NULL;
1453*7c478bd9Sstevel@tonic-gate 			dq->dq_last->t_link = tp;
1454*7c478bd9Sstevel@tonic-gate 			dq->dq_last = tp;
1455*7c478bd9Sstevel@tonic-gate 		} else {
1456*7c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_last != NULL);
1457*7c478bd9Sstevel@tonic-gate 			tp->t_link = dq->dq_first;
1458*7c478bd9Sstevel@tonic-gate 			dq->dq_first = tp;
1459*7c478bd9Sstevel@tonic-gate 		}
1460*7c478bd9Sstevel@tonic-gate 	} else {
1461*7c478bd9Sstevel@tonic-gate 		if (borf == SETKP_BACK) {
1462*7c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_first == NULL);
1463*7c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_last == NULL);
1464*7c478bd9Sstevel@tonic-gate 			dq->dq_first = dq->dq_last = tp;
1465*7c478bd9Sstevel@tonic-gate 		} else {
1466*7c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_last == NULL);
1467*7c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_first == NULL);
1468*7c478bd9Sstevel@tonic-gate 			tp->t_link = NULL;
1469*7c478bd9Sstevel@tonic-gate 			dq->dq_first = dq->dq_last = tp;
1470*7c478bd9Sstevel@tonic-gate 		}
1471*7c478bd9Sstevel@tonic-gate 		BT_SET(dp->disp_qactmap, tpri);
1472*7c478bd9Sstevel@tonic-gate 		if (tpri > dp->disp_max_unbound_pri)
1473*7c478bd9Sstevel@tonic-gate 			dp->disp_max_unbound_pri = tpri;
1474*7c478bd9Sstevel@tonic-gate 		if (tpri > dp->disp_maxrunpri) {
1475*7c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = tpri;
1476*7c478bd9Sstevel@tonic-gate 			membar_enter();
1477*7c478bd9Sstevel@tonic-gate 		}
1478*7c478bd9Sstevel@tonic-gate 	}
1479*7c478bd9Sstevel@tonic-gate 
1480*7c478bd9Sstevel@tonic-gate 	cp = tp->t_cpu;
1481*7c478bd9Sstevel@tonic-gate 	if (tp->t_cpupart != cp->cpu_part) {
1482*7c478bd9Sstevel@tonic-gate 		/* migrate to a cpu in the new partition */
1483*7c478bd9Sstevel@tonic-gate 		cp = tp->t_cpupart->cp_cpulist;
1484*7c478bd9Sstevel@tonic-gate 	}
1485*7c478bd9Sstevel@tonic-gate 	cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL);
1486*7c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&cp->cpu_disp->disp_lock);
1487*7c478bd9Sstevel@tonic-gate 	ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
1488*7c478bd9Sstevel@tonic-gate 
1489*7c478bd9Sstevel@tonic-gate #ifndef NPROBE
1490*7c478bd9Sstevel@tonic-gate 	/* Kernel probe */
1491*7c478bd9Sstevel@tonic-gate 	if (tnf_tracing_active)
1492*7c478bd9Sstevel@tonic-gate 		tnf_thread_queue(tp, cp, tpri);
1493*7c478bd9Sstevel@tonic-gate #endif /* NPROBE */
1494*7c478bd9Sstevel@tonic-gate 
1495*7c478bd9Sstevel@tonic-gate 	if (cp->cpu_chosen_level < tpri)
1496*7c478bd9Sstevel@tonic-gate 		cp->cpu_chosen_level = tpri;
1497*7c478bd9Sstevel@tonic-gate 	cpu_resched(cp, tpri);
1498*7c478bd9Sstevel@tonic-gate 	disp_lock_exit_high(&cp->cpu_disp->disp_lock);
1499*7c478bd9Sstevel@tonic-gate 	(*disp_enq_thread)(cp, 0);
1500*7c478bd9Sstevel@tonic-gate }
1501*7c478bd9Sstevel@tonic-gate 
1502*7c478bd9Sstevel@tonic-gate /*
1503*7c478bd9Sstevel@tonic-gate  * Remove a thread from the dispatcher queue if it is on it.
1504*7c478bd9Sstevel@tonic-gate  * It is not an error if it is not found but we return whether
1505*7c478bd9Sstevel@tonic-gate  * or not it was found in case the caller wants to check.
1506*7c478bd9Sstevel@tonic-gate  */
1507*7c478bd9Sstevel@tonic-gate int
1508*7c478bd9Sstevel@tonic-gate dispdeq(kthread_t *tp)
1509*7c478bd9Sstevel@tonic-gate {
1510*7c478bd9Sstevel@tonic-gate 	disp_t		*dp;
1511*7c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
1512*7c478bd9Sstevel@tonic-gate 	kthread_t	*rp;
1513*7c478bd9Sstevel@tonic-gate 	kthread_t	*trp;
1514*7c478bd9Sstevel@tonic-gate 	kthread_t	**ptp;
1515*7c478bd9Sstevel@tonic-gate 	int		tpri;
1516*7c478bd9Sstevel@tonic-gate 
1517*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
1518*7c478bd9Sstevel@tonic-gate 
1519*7c478bd9Sstevel@tonic-gate 	if (tp->t_state != TS_RUN)
1520*7c478bd9Sstevel@tonic-gate 		return (0);
1521*7c478bd9Sstevel@tonic-gate 
1522*7c478bd9Sstevel@tonic-gate 	/*
1523*7c478bd9Sstevel@tonic-gate 	 * The thread is "swapped" or is on the swap queue and
1524*7c478bd9Sstevel@tonic-gate 	 * hence no longer on the run queue, so return true.
1525*7c478bd9Sstevel@tonic-gate 	 */
1526*7c478bd9Sstevel@tonic-gate 	if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD)
1527*7c478bd9Sstevel@tonic-gate 		return (1);
1528*7c478bd9Sstevel@tonic-gate 
1529*7c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
1530*7c478bd9Sstevel@tonic-gate 	dp = tp->t_disp_queue;
1531*7c478bd9Sstevel@tonic-gate 	ASSERT(tpri < dp->disp_npri);
1532*7c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[tpri];
1533*7c478bd9Sstevel@tonic-gate 	ptp = &dq->dq_first;
1534*7c478bd9Sstevel@tonic-gate 	rp = *ptp;
1535*7c478bd9Sstevel@tonic-gate 	trp = NULL;
1536*7c478bd9Sstevel@tonic-gate 
1537*7c478bd9Sstevel@tonic-gate 	ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL);
1538*7c478bd9Sstevel@tonic-gate 
1539*7c478bd9Sstevel@tonic-gate 	/*
1540*7c478bd9Sstevel@tonic-gate 	 * Search for thread in queue.
1541*7c478bd9Sstevel@tonic-gate 	 * Double links would simplify this at the expense of disp/setrun.
1542*7c478bd9Sstevel@tonic-gate 	 */
1543*7c478bd9Sstevel@tonic-gate 	while (rp != tp && rp != NULL) {
1544*7c478bd9Sstevel@tonic-gate 		trp = rp;
1545*7c478bd9Sstevel@tonic-gate 		ptp = &trp->t_link;
1546*7c478bd9Sstevel@tonic-gate 		rp = trp->t_link;
1547*7c478bd9Sstevel@tonic-gate 	}
1548*7c478bd9Sstevel@tonic-gate 
1549*7c478bd9Sstevel@tonic-gate 	if (rp == NULL) {
1550*7c478bd9Sstevel@tonic-gate 		panic("dispdeq: thread not on queue");
1551*7c478bd9Sstevel@tonic-gate 	}
1552*7c478bd9Sstevel@tonic-gate 
1553*7c478bd9Sstevel@tonic-gate 	DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp);
1554*7c478bd9Sstevel@tonic-gate 
1555*7c478bd9Sstevel@tonic-gate 	/*
1556*7c478bd9Sstevel@tonic-gate 	 * Found it so remove it from queue.
1557*7c478bd9Sstevel@tonic-gate 	 */
1558*7c478bd9Sstevel@tonic-gate 	if ((*ptp = rp->t_link) == NULL)
1559*7c478bd9Sstevel@tonic-gate 		dq->dq_last = trp;
1560*7c478bd9Sstevel@tonic-gate 
1561*7c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable--;
1562*7c478bd9Sstevel@tonic-gate 	if (--dq->dq_sruncnt == 0) {
1563*7c478bd9Sstevel@tonic-gate 		dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri);
1564*7c478bd9Sstevel@tonic-gate 		if (dp->disp_nrunnable == 0) {
1565*7c478bd9Sstevel@tonic-gate 			dp->disp_max_unbound_pri = -1;
1566*7c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = -1;
1567*7c478bd9Sstevel@tonic-gate 		} else if (tpri == dp->disp_maxrunpri) {
1568*7c478bd9Sstevel@tonic-gate 			int ipri;
1569*7c478bd9Sstevel@tonic-gate 
1570*7c478bd9Sstevel@tonic-gate 			ipri = bt_gethighbit(dp->disp_qactmap,
1571*7c478bd9Sstevel@tonic-gate 			    dp->disp_maxrunpri >> BT_ULSHIFT);
1572*7c478bd9Sstevel@tonic-gate 			if (ipri < dp->disp_max_unbound_pri)
1573*7c478bd9Sstevel@tonic-gate 				dp->disp_max_unbound_pri = ipri;
1574*7c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = ipri;
1575*7c478bd9Sstevel@tonic-gate 		}
1576*7c478bd9Sstevel@tonic-gate 	}
1577*7c478bd9Sstevel@tonic-gate 	tp->t_link = NULL;
1578*7c478bd9Sstevel@tonic-gate 	THREAD_TRANSITION(tp);		/* put in intermediate state */
1579*7c478bd9Sstevel@tonic-gate 	return (1);
1580*7c478bd9Sstevel@tonic-gate }
1581*7c478bd9Sstevel@tonic-gate 
1582*7c478bd9Sstevel@tonic-gate 
1583*7c478bd9Sstevel@tonic-gate /*
1584*7c478bd9Sstevel@tonic-gate  * dq_sruninc and dq_srundec are public functions for
1585*7c478bd9Sstevel@tonic-gate  * incrementing/decrementing the sruncnts when a thread on
1586*7c478bd9Sstevel@tonic-gate  * a dispatcher queue is made schedulable/unschedulable by
1587*7c478bd9Sstevel@tonic-gate  * resetting the TS_LOAD flag.
1588*7c478bd9Sstevel@tonic-gate  *
1589*7c478bd9Sstevel@tonic-gate  * The caller MUST have the thread lock and therefore the dispatcher
1590*7c478bd9Sstevel@tonic-gate  * queue lock so that the operation which changes
1591*7c478bd9Sstevel@tonic-gate  * the flag, the operation that checks the status of the thread to
1592*7c478bd9Sstevel@tonic-gate  * determine if it's on a disp queue AND the call to this function
1593*7c478bd9Sstevel@tonic-gate  * are one atomic operation with respect to interrupts.
1594*7c478bd9Sstevel@tonic-gate  */
1595*7c478bd9Sstevel@tonic-gate 
1596*7c478bd9Sstevel@tonic-gate /*
1597*7c478bd9Sstevel@tonic-gate  * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread.
1598*7c478bd9Sstevel@tonic-gate  */
1599*7c478bd9Sstevel@tonic-gate void
1600*7c478bd9Sstevel@tonic-gate dq_sruninc(kthread_t *t)
1601*7c478bd9Sstevel@tonic-gate {
1602*7c478bd9Sstevel@tonic-gate 	ASSERT(t->t_state == TS_RUN);
1603*7c478bd9Sstevel@tonic-gate 	ASSERT(t->t_schedflag & TS_LOAD);
1604*7c478bd9Sstevel@tonic-gate 
1605*7c478bd9Sstevel@tonic-gate 	THREAD_TRANSITION(t);
1606*7c478bd9Sstevel@tonic-gate 	setfrontdq(t);
1607*7c478bd9Sstevel@tonic-gate }
1608*7c478bd9Sstevel@tonic-gate 
1609*7c478bd9Sstevel@tonic-gate /*
1610*7c478bd9Sstevel@tonic-gate  * See comment on calling conventions above.
1611*7c478bd9Sstevel@tonic-gate  * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread.
1612*7c478bd9Sstevel@tonic-gate  */
1613*7c478bd9Sstevel@tonic-gate void
1614*7c478bd9Sstevel@tonic-gate dq_srundec(kthread_t *t)
1615*7c478bd9Sstevel@tonic-gate {
1616*7c478bd9Sstevel@tonic-gate 	ASSERT(t->t_schedflag & TS_LOAD);
1617*7c478bd9Sstevel@tonic-gate 
1618*7c478bd9Sstevel@tonic-gate 	(void) dispdeq(t);
1619*7c478bd9Sstevel@tonic-gate 	disp_swapped_enq(t);
1620*7c478bd9Sstevel@tonic-gate }
1621*7c478bd9Sstevel@tonic-gate 
1622*7c478bd9Sstevel@tonic-gate /*
1623*7c478bd9Sstevel@tonic-gate  * Change the dispatcher lock of thread to the "swapped_lock"
1624*7c478bd9Sstevel@tonic-gate  * and return with thread lock still held.
1625*7c478bd9Sstevel@tonic-gate  *
1626*7c478bd9Sstevel@tonic-gate  * Called with thread_lock held, in transition state, and at high spl.
1627*7c478bd9Sstevel@tonic-gate  */
1628*7c478bd9Sstevel@tonic-gate void
1629*7c478bd9Sstevel@tonic-gate disp_swapped_enq(kthread_t *tp)
1630*7c478bd9Sstevel@tonic-gate {
1631*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
1632*7c478bd9Sstevel@tonic-gate 	ASSERT(tp->t_schedflag & TS_LOAD);
1633*7c478bd9Sstevel@tonic-gate 
1634*7c478bd9Sstevel@tonic-gate 	switch (tp->t_state) {
1635*7c478bd9Sstevel@tonic-gate 	case TS_RUN:
1636*7c478bd9Sstevel@tonic-gate 		disp_lock_enter_high(&swapped_lock);
1637*7c478bd9Sstevel@tonic-gate 		THREAD_SWAP(tp, &swapped_lock);	/* set TS_RUN state and lock */
1638*7c478bd9Sstevel@tonic-gate 		break;
1639*7c478bd9Sstevel@tonic-gate 	case TS_ONPROC:
1640*7c478bd9Sstevel@tonic-gate 		disp_lock_enter_high(&swapped_lock);
1641*7c478bd9Sstevel@tonic-gate 		THREAD_TRANSITION(tp);
1642*7c478bd9Sstevel@tonic-gate 		wake_sched_sec = 1;		/* tell clock to wake sched */
1643*7c478bd9Sstevel@tonic-gate 		THREAD_SWAP(tp, &swapped_lock);	/* set TS_RUN state and lock */
1644*7c478bd9Sstevel@tonic-gate 		break;
1645*7c478bd9Sstevel@tonic-gate 	default:
1646*7c478bd9Sstevel@tonic-gate 		panic("disp_swapped: tp: %p bad t_state", (void *)tp);
1647*7c478bd9Sstevel@tonic-gate 	}
1648*7c478bd9Sstevel@tonic-gate }
1649*7c478bd9Sstevel@tonic-gate 
1650*7c478bd9Sstevel@tonic-gate /*
1651*7c478bd9Sstevel@tonic-gate  * This routine is called by setbackdq/setfrontdq if the thread is
1652*7c478bd9Sstevel@tonic-gate  * not loaded or loaded and on the swap queue.
1653*7c478bd9Sstevel@tonic-gate  *
1654*7c478bd9Sstevel@tonic-gate  * Thread state TS_SLEEP implies that a swapped thread
1655*7c478bd9Sstevel@tonic-gate  * has been woken up and needs to be swapped in by the swapper.
1656*7c478bd9Sstevel@tonic-gate  *
1657*7c478bd9Sstevel@tonic-gate  * Thread state TS_RUN, it implies that the priority of a swapped
1658*7c478bd9Sstevel@tonic-gate  * thread is being increased by scheduling class (e.g. ts_update).
1659*7c478bd9Sstevel@tonic-gate  */
1660*7c478bd9Sstevel@tonic-gate static void
1661*7c478bd9Sstevel@tonic-gate disp_swapped_setrun(kthread_t *tp)
1662*7c478bd9Sstevel@tonic-gate {
1663*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
1664*7c478bd9Sstevel@tonic-gate 	ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD);
1665*7c478bd9Sstevel@tonic-gate 
1666*7c478bd9Sstevel@tonic-gate 	switch (tp->t_state) {
1667*7c478bd9Sstevel@tonic-gate 	case TS_SLEEP:
1668*7c478bd9Sstevel@tonic-gate 		disp_lock_enter_high(&swapped_lock);
1669*7c478bd9Sstevel@tonic-gate 		/*
1670*7c478bd9Sstevel@tonic-gate 		 * Wakeup sched immediately (i.e., next tick) if the
1671*7c478bd9Sstevel@tonic-gate 		 * thread priority is above maxclsyspri.
1672*7c478bd9Sstevel@tonic-gate 		 */
1673*7c478bd9Sstevel@tonic-gate 		if (DISP_PRIO(tp) > maxclsyspri)
1674*7c478bd9Sstevel@tonic-gate 			wake_sched = 1;
1675*7c478bd9Sstevel@tonic-gate 		else
1676*7c478bd9Sstevel@tonic-gate 			wake_sched_sec = 1;
1677*7c478bd9Sstevel@tonic-gate 		THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */
1678*7c478bd9Sstevel@tonic-gate 		break;
1679*7c478bd9Sstevel@tonic-gate 	case TS_RUN:				/* called from ts_update */
1680*7c478bd9Sstevel@tonic-gate 		break;
1681*7c478bd9Sstevel@tonic-gate 	default:
1682*7c478bd9Sstevel@tonic-gate 		panic("disp_swapped_setrun: tp: %p bad t_state", tp);
1683*7c478bd9Sstevel@tonic-gate 	}
1684*7c478bd9Sstevel@tonic-gate }
1685*7c478bd9Sstevel@tonic-gate 
1686*7c478bd9Sstevel@tonic-gate 
1687*7c478bd9Sstevel@tonic-gate /*
1688*7c478bd9Sstevel@tonic-gate  *	Make a thread give up its processor.  Find the processor on
1689*7c478bd9Sstevel@tonic-gate  *	which this thread is executing, and have that processor
1690*7c478bd9Sstevel@tonic-gate  *	preempt.
1691*7c478bd9Sstevel@tonic-gate  */
1692*7c478bd9Sstevel@tonic-gate void
1693*7c478bd9Sstevel@tonic-gate cpu_surrender(kthread_t *tp)
1694*7c478bd9Sstevel@tonic-gate {
1695*7c478bd9Sstevel@tonic-gate 	cpu_t	*cpup;
1696*7c478bd9Sstevel@tonic-gate 	int	max_pri;
1697*7c478bd9Sstevel@tonic-gate 	int	max_run_pri;
1698*7c478bd9Sstevel@tonic-gate 	klwp_t	*lwp;
1699*7c478bd9Sstevel@tonic-gate 
1700*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
1701*7c478bd9Sstevel@tonic-gate 
1702*7c478bd9Sstevel@tonic-gate 	if (tp->t_state != TS_ONPROC)
1703*7c478bd9Sstevel@tonic-gate 		return;
1704*7c478bd9Sstevel@tonic-gate 	cpup = tp->t_disp_queue->disp_cpu;	/* CPU thread dispatched to */
1705*7c478bd9Sstevel@tonic-gate 	max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */
1706*7c478bd9Sstevel@tonic-gate 	max_run_pri = CP_MAXRUNPRI(cpup->cpu_part);
1707*7c478bd9Sstevel@tonic-gate 	if (max_pri < max_run_pri)
1708*7c478bd9Sstevel@tonic-gate 		max_pri = max_run_pri;
1709*7c478bd9Sstevel@tonic-gate 
1710*7c478bd9Sstevel@tonic-gate 	cpup->cpu_runrun = 1;
1711*7c478bd9Sstevel@tonic-gate 	if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) {
1712*7c478bd9Sstevel@tonic-gate 		cpup->cpu_kprunrun = 1;
1713*7c478bd9Sstevel@tonic-gate 	}
1714*7c478bd9Sstevel@tonic-gate 
1715*7c478bd9Sstevel@tonic-gate 	/*
1716*7c478bd9Sstevel@tonic-gate 	 * Propagate cpu_runrun, and cpu_kprunrun to global visibility.
1717*7c478bd9Sstevel@tonic-gate 	 */
1718*7c478bd9Sstevel@tonic-gate 	membar_enter();
1719*7c478bd9Sstevel@tonic-gate 
1720*7c478bd9Sstevel@tonic-gate 	DTRACE_SCHED1(surrender, kthread_t *, tp);
1721*7c478bd9Sstevel@tonic-gate 
1722*7c478bd9Sstevel@tonic-gate 	/*
1723*7c478bd9Sstevel@tonic-gate 	 * Make the target thread take an excursion through trap()
1724*7c478bd9Sstevel@tonic-gate 	 * to do preempt() (unless we're already in trap or post_syscall,
1725*7c478bd9Sstevel@tonic-gate 	 * calling cpu_surrender via CL_TRAPRET).
1726*7c478bd9Sstevel@tonic-gate 	 */
1727*7c478bd9Sstevel@tonic-gate 	if (tp != curthread || (lwp = tp->t_lwp) == NULL ||
1728*7c478bd9Sstevel@tonic-gate 	    lwp->lwp_state != LWP_USER) {
1729*7c478bd9Sstevel@tonic-gate 		aston(tp);
1730*7c478bd9Sstevel@tonic-gate 		if (cpup != CPU)
1731*7c478bd9Sstevel@tonic-gate 			poke_cpu(cpup->cpu_id);
1732*7c478bd9Sstevel@tonic-gate 	}
1733*7c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER,
1734*7c478bd9Sstevel@tonic-gate 	    "cpu_surrender:tid %p cpu %p", tp, cpup);
1735*7c478bd9Sstevel@tonic-gate }
1736*7c478bd9Sstevel@tonic-gate 
1737*7c478bd9Sstevel@tonic-gate 
1738*7c478bd9Sstevel@tonic-gate /*
1739*7c478bd9Sstevel@tonic-gate  * Commit to and ratify a scheduling decision
1740*7c478bd9Sstevel@tonic-gate  */
1741*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1742*7c478bd9Sstevel@tonic-gate static kthread_t *
1743*7c478bd9Sstevel@tonic-gate disp_ratify(kthread_t *tp, disp_t *kpq)
1744*7c478bd9Sstevel@tonic-gate {
1745*7c478bd9Sstevel@tonic-gate 	pri_t	tpri, maxpri;
1746*7c478bd9Sstevel@tonic-gate 	pri_t	maxkpri;
1747*7c478bd9Sstevel@tonic-gate 	cpu_t	*cpup;
1748*7c478bd9Sstevel@tonic-gate 
1749*7c478bd9Sstevel@tonic-gate 	ASSERT(tp != NULL);
1750*7c478bd9Sstevel@tonic-gate 	/*
1751*7c478bd9Sstevel@tonic-gate 	 * Commit to, then ratify scheduling decision
1752*7c478bd9Sstevel@tonic-gate 	 */
1753*7c478bd9Sstevel@tonic-gate 	cpup = CPU;
1754*7c478bd9Sstevel@tonic-gate 	if (cpup->cpu_runrun != 0)
1755*7c478bd9Sstevel@tonic-gate 		cpup->cpu_runrun = 0;
1756*7c478bd9Sstevel@tonic-gate 	if (cpup->cpu_kprunrun != 0)
1757*7c478bd9Sstevel@tonic-gate 		cpup->cpu_kprunrun = 0;
1758*7c478bd9Sstevel@tonic-gate 	if (cpup->cpu_chosen_level != -1)
1759*7c478bd9Sstevel@tonic-gate 		cpup->cpu_chosen_level = -1;
1760*7c478bd9Sstevel@tonic-gate 	membar_enter();
1761*7c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
1762*7c478bd9Sstevel@tonic-gate 	maxpri = cpup->cpu_disp->disp_maxrunpri;
1763*7c478bd9Sstevel@tonic-gate 	maxkpri = kpq->disp_maxrunpri;
1764*7c478bd9Sstevel@tonic-gate 	if (maxpri < maxkpri)
1765*7c478bd9Sstevel@tonic-gate 		maxpri = maxkpri;
1766*7c478bd9Sstevel@tonic-gate 	if (tpri < maxpri) {
1767*7c478bd9Sstevel@tonic-gate 		/*
1768*7c478bd9Sstevel@tonic-gate 		 * should have done better
1769*7c478bd9Sstevel@tonic-gate 		 * put this one back and indicate to try again
1770*7c478bd9Sstevel@tonic-gate 		 */
1771*7c478bd9Sstevel@tonic-gate 		cpup->cpu_dispthread = curthread;	/* fixup dispthread */
1772*7c478bd9Sstevel@tonic-gate 		cpup->cpu_dispatch_pri = DISP_PRIO(curthread);
1773*7c478bd9Sstevel@tonic-gate 		thread_lock_high(tp);
1774*7c478bd9Sstevel@tonic-gate 		THREAD_TRANSITION(tp);
1775*7c478bd9Sstevel@tonic-gate 		setfrontdq(tp);
1776*7c478bd9Sstevel@tonic-gate 		thread_unlock_nopreempt(tp);
1777*7c478bd9Sstevel@tonic-gate 
1778*7c478bd9Sstevel@tonic-gate 		tp = NULL;
1779*7c478bd9Sstevel@tonic-gate 	}
1780*7c478bd9Sstevel@tonic-gate 	return (tp);
1781*7c478bd9Sstevel@tonic-gate }
1782*7c478bd9Sstevel@tonic-gate 
1783*7c478bd9Sstevel@tonic-gate /*
1784*7c478bd9Sstevel@tonic-gate  * See if there is any work on the dispatcher queue for other CPUs.
1785*7c478bd9Sstevel@tonic-gate  * If there is, dequeue the best thread and return.
1786*7c478bd9Sstevel@tonic-gate  */
1787*7c478bd9Sstevel@tonic-gate static kthread_t *
1788*7c478bd9Sstevel@tonic-gate disp_getwork(cpu_t *cp)
1789*7c478bd9Sstevel@tonic-gate {
1790*7c478bd9Sstevel@tonic-gate 	cpu_t		*ocp;		/* other CPU */
1791*7c478bd9Sstevel@tonic-gate 	cpu_t		*ocp_start;
1792*7c478bd9Sstevel@tonic-gate 	cpu_t		*tcp;		/* target local CPU */
1793*7c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
1794*7c478bd9Sstevel@tonic-gate 	pri_t		maxpri;
1795*7c478bd9Sstevel@tonic-gate 	int		s;
1796*7c478bd9Sstevel@tonic-gate 	disp_t		*kpq;		/* kp queue for this partition */
1797*7c478bd9Sstevel@tonic-gate 	lpl_t		*lpl, *lpl_leaf;
1798*7c478bd9Sstevel@tonic-gate 	int		hint, leafidx;
1799*7c478bd9Sstevel@tonic-gate 
1800*7c478bd9Sstevel@tonic-gate 	maxpri = -1;
1801*7c478bd9Sstevel@tonic-gate 	tcp = NULL;
1802*7c478bd9Sstevel@tonic-gate 
1803*7c478bd9Sstevel@tonic-gate 	kpq = &cp->cpu_part->cp_kp_queue;
1804*7c478bd9Sstevel@tonic-gate 	while (kpq->disp_maxrunpri >= 0) {
1805*7c478bd9Sstevel@tonic-gate 		/*
1806*7c478bd9Sstevel@tonic-gate 		 * Try to take a thread from the kp_queue.
1807*7c478bd9Sstevel@tonic-gate 		 */
1808*7c478bd9Sstevel@tonic-gate 		tp = (disp_getbest(kpq));
1809*7c478bd9Sstevel@tonic-gate 		if (tp)
1810*7c478bd9Sstevel@tonic-gate 			return (disp_ratify(tp, kpq));
1811*7c478bd9Sstevel@tonic-gate 	}
1812*7c478bd9Sstevel@tonic-gate 
1813*7c478bd9Sstevel@tonic-gate 	s = splhigh();		/* protect the cpu_active list */
1814*7c478bd9Sstevel@tonic-gate 
1815*7c478bd9Sstevel@tonic-gate 	/*
1816*7c478bd9Sstevel@tonic-gate 	 * Try to find something to do on another CPU's run queue.
1817*7c478bd9Sstevel@tonic-gate 	 * Loop through all other CPUs looking for the one with the highest
1818*7c478bd9Sstevel@tonic-gate 	 * priority unbound thread.
1819*7c478bd9Sstevel@tonic-gate 	 *
1820*7c478bd9Sstevel@tonic-gate 	 * On NUMA machines, the partition's CPUs are consulted in order of
1821*7c478bd9Sstevel@tonic-gate 	 * distance from the current CPU. This way, the first available
1822*7c478bd9Sstevel@tonic-gate 	 * work found is also the closest, and will suffer the least
1823*7c478bd9Sstevel@tonic-gate 	 * from being migrated.
1824*7c478bd9Sstevel@tonic-gate 	 */
1825*7c478bd9Sstevel@tonic-gate 	lpl = lpl_leaf = cp->cpu_lpl;
1826*7c478bd9Sstevel@tonic-gate 	hint = leafidx = 0;
1827*7c478bd9Sstevel@tonic-gate 
1828*7c478bd9Sstevel@tonic-gate 	/*
1829*7c478bd9Sstevel@tonic-gate 	 * This loop traverses the lpl hierarchy. Higher level lpls represent
1830*7c478bd9Sstevel@tonic-gate 	 * broader levels of locality
1831*7c478bd9Sstevel@tonic-gate 	 */
1832*7c478bd9Sstevel@tonic-gate 	do {
1833*7c478bd9Sstevel@tonic-gate 		/* This loop iterates over the lpl's leaves */
1834*7c478bd9Sstevel@tonic-gate 		do {
1835*7c478bd9Sstevel@tonic-gate 			if (lpl_leaf != cp->cpu_lpl)
1836*7c478bd9Sstevel@tonic-gate 				ocp = lpl_leaf->lpl_cpus;
1837*7c478bd9Sstevel@tonic-gate 			else
1838*7c478bd9Sstevel@tonic-gate 				ocp = cp->cpu_next_lpl;
1839*7c478bd9Sstevel@tonic-gate 
1840*7c478bd9Sstevel@tonic-gate 			/* This loop iterates over the CPUs in the leaf */
1841*7c478bd9Sstevel@tonic-gate 			ocp_start = ocp;
1842*7c478bd9Sstevel@tonic-gate 			do {
1843*7c478bd9Sstevel@tonic-gate 				pri_t pri;
1844*7c478bd9Sstevel@tonic-gate 
1845*7c478bd9Sstevel@tonic-gate 				ASSERT(CPU_ACTIVE(ocp));
1846*7c478bd9Sstevel@tonic-gate 
1847*7c478bd9Sstevel@tonic-gate 				/*
1848*7c478bd9Sstevel@tonic-gate 				 * End our stroll around the partition if:
1849*7c478bd9Sstevel@tonic-gate 				 *
1850*7c478bd9Sstevel@tonic-gate 				 * - Something became runnable on the local
1851*7c478bd9Sstevel@tonic-gate 				 *	queue
1852*7c478bd9Sstevel@tonic-gate 				 *
1853*7c478bd9Sstevel@tonic-gate 				 * - We're at the broadest level of locality and
1854*7c478bd9Sstevel@tonic-gate 				 *   we happen across another idle CPU. At the
1855*7c478bd9Sstevel@tonic-gate 				 *   highest level of locality, all CPUs will
1856*7c478bd9Sstevel@tonic-gate 				 *   walk the partition's CPUs in the same
1857*7c478bd9Sstevel@tonic-gate 				 *   order, so we can end our stroll taking
1858*7c478bd9Sstevel@tonic-gate 				 *   comfort in knowing the other idle CPU is
1859*7c478bd9Sstevel@tonic-gate 				 *   already covering the next portion of the
1860*7c478bd9Sstevel@tonic-gate 				 *   list.
1861*7c478bd9Sstevel@tonic-gate 				 */
1862*7c478bd9Sstevel@tonic-gate 				if (cp->cpu_disp->disp_nrunnable != 0)
1863*7c478bd9Sstevel@tonic-gate 					break;
1864*7c478bd9Sstevel@tonic-gate 				if (ocp->cpu_dispatch_pri == -1) {
1865*7c478bd9Sstevel@tonic-gate 					if (ocp->cpu_disp_flags &
1866*7c478bd9Sstevel@tonic-gate 					    CPU_DISP_HALTED)
1867*7c478bd9Sstevel@tonic-gate 						continue;
1868*7c478bd9Sstevel@tonic-gate 					else if (lpl->lpl_parent == NULL)
1869*7c478bd9Sstevel@tonic-gate 						break;
1870*7c478bd9Sstevel@tonic-gate 				}
1871*7c478bd9Sstevel@tonic-gate 
1872*7c478bd9Sstevel@tonic-gate 				/*
1873*7c478bd9Sstevel@tonic-gate 				 * If there's only one thread and the CPU
1874*7c478bd9Sstevel@tonic-gate 				 * is in the middle of a context switch,
1875*7c478bd9Sstevel@tonic-gate 				 * or it's currently running the idle thread,
1876*7c478bd9Sstevel@tonic-gate 				 * don't steal it.
1877*7c478bd9Sstevel@tonic-gate 				 */
1878*7c478bd9Sstevel@tonic-gate 				if ((ocp->cpu_disp_flags &
1879*7c478bd9Sstevel@tonic-gate 					CPU_DISP_DONTSTEAL) &&
1880*7c478bd9Sstevel@tonic-gate 				    ocp->cpu_disp->disp_nrunnable == 1)
1881*7c478bd9Sstevel@tonic-gate 					continue;
1882*7c478bd9Sstevel@tonic-gate 
1883*7c478bd9Sstevel@tonic-gate 				pri = ocp->cpu_disp->disp_max_unbound_pri;
1884*7c478bd9Sstevel@tonic-gate 				if (pri > maxpri) {
1885*7c478bd9Sstevel@tonic-gate 					maxpri = pri;
1886*7c478bd9Sstevel@tonic-gate 					tcp = ocp;
1887*7c478bd9Sstevel@tonic-gate 				}
1888*7c478bd9Sstevel@tonic-gate 			} while ((ocp = ocp->cpu_next_lpl) != ocp_start);
1889*7c478bd9Sstevel@tonic-gate 
1890*7c478bd9Sstevel@tonic-gate 			if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) {
1891*7c478bd9Sstevel@tonic-gate 				leafidx = 0;
1892*7c478bd9Sstevel@tonic-gate 				lpl_leaf = lpl->lpl_rset[leafidx];
1893*7c478bd9Sstevel@tonic-gate 			}
1894*7c478bd9Sstevel@tonic-gate 		} while (leafidx != hint);
1895*7c478bd9Sstevel@tonic-gate 
1896*7c478bd9Sstevel@tonic-gate 		hint = leafidx = lpl->lpl_hint;
1897*7c478bd9Sstevel@tonic-gate 		if ((lpl = lpl->lpl_parent) != NULL)
1898*7c478bd9Sstevel@tonic-gate 			lpl_leaf = lpl->lpl_rset[hint];
1899*7c478bd9Sstevel@tonic-gate 	} while (!tcp && lpl);
1900*7c478bd9Sstevel@tonic-gate 
1901*7c478bd9Sstevel@tonic-gate 	splx(s);
1902*7c478bd9Sstevel@tonic-gate 
1903*7c478bd9Sstevel@tonic-gate 	/*
1904*7c478bd9Sstevel@tonic-gate 	 * If another queue looks good, and there is still nothing on
1905*7c478bd9Sstevel@tonic-gate 	 * the local queue, try to transfer one or more threads
1906*7c478bd9Sstevel@tonic-gate 	 * from it to our queue.
1907*7c478bd9Sstevel@tonic-gate 	 */
1908*7c478bd9Sstevel@tonic-gate 	if (tcp && cp->cpu_disp->disp_nrunnable == 0) {
1909*7c478bd9Sstevel@tonic-gate 		tp = (disp_getbest(tcp->cpu_disp));
1910*7c478bd9Sstevel@tonic-gate 		if (tp)
1911*7c478bd9Sstevel@tonic-gate 			return (disp_ratify(tp, kpq));
1912*7c478bd9Sstevel@tonic-gate 	}
1913*7c478bd9Sstevel@tonic-gate 	return (NULL);
1914*7c478bd9Sstevel@tonic-gate }
1915*7c478bd9Sstevel@tonic-gate 
1916*7c478bd9Sstevel@tonic-gate 
1917*7c478bd9Sstevel@tonic-gate /*
1918*7c478bd9Sstevel@tonic-gate  * disp_fix_unbound_pri()
1919*7c478bd9Sstevel@tonic-gate  *	Determines the maximum priority of unbound threads on the queue.
1920*7c478bd9Sstevel@tonic-gate  *	The priority is kept for the queue, but is only increased, never
1921*7c478bd9Sstevel@tonic-gate  *	reduced unless some CPU is looking for something on that queue.
1922*7c478bd9Sstevel@tonic-gate  *
1923*7c478bd9Sstevel@tonic-gate  *	The priority argument is the known upper limit.
1924*7c478bd9Sstevel@tonic-gate  *
1925*7c478bd9Sstevel@tonic-gate  *	Perhaps this should be kept accurately, but that probably means
1926*7c478bd9Sstevel@tonic-gate  *	separate bitmaps for bound and unbound threads.  Since only idled
1927*7c478bd9Sstevel@tonic-gate  *	CPUs will have to do this recalculation, it seems better this way.
1928*7c478bd9Sstevel@tonic-gate  */
1929*7c478bd9Sstevel@tonic-gate static void
1930*7c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(disp_t *dp, pri_t pri)
1931*7c478bd9Sstevel@tonic-gate {
1932*7c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
1933*7c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
1934*7c478bd9Sstevel@tonic-gate 	ulong_t		*dqactmap = dp->disp_qactmap;
1935*7c478bd9Sstevel@tonic-gate 	ulong_t		mapword;
1936*7c478bd9Sstevel@tonic-gate 	int		wx;
1937*7c478bd9Sstevel@tonic-gate 
1938*7c478bd9Sstevel@tonic-gate 	ASSERT(DISP_LOCK_HELD(&dp->disp_lock));
1939*7c478bd9Sstevel@tonic-gate 
1940*7c478bd9Sstevel@tonic-gate 	ASSERT(pri >= 0);			/* checked by caller */
1941*7c478bd9Sstevel@tonic-gate 
1942*7c478bd9Sstevel@tonic-gate 	/*
1943*7c478bd9Sstevel@tonic-gate 	 * Start the search at the next lowest priority below the supplied
1944*7c478bd9Sstevel@tonic-gate 	 * priority.  This depends on the bitmap implementation.
1945*7c478bd9Sstevel@tonic-gate 	 */
1946*7c478bd9Sstevel@tonic-gate 	do {
1947*7c478bd9Sstevel@tonic-gate 		wx = pri >> BT_ULSHIFT;		/* index of word in map */
1948*7c478bd9Sstevel@tonic-gate 
1949*7c478bd9Sstevel@tonic-gate 		/*
1950*7c478bd9Sstevel@tonic-gate 		 * Form mask for all lower priorities in the word.
1951*7c478bd9Sstevel@tonic-gate 		 */
1952*7c478bd9Sstevel@tonic-gate 		mapword = dqactmap[wx] & (BT_BIW(pri) - 1);
1953*7c478bd9Sstevel@tonic-gate 
1954*7c478bd9Sstevel@tonic-gate 		/*
1955*7c478bd9Sstevel@tonic-gate 		 * Get next lower active priority.
1956*7c478bd9Sstevel@tonic-gate 		 */
1957*7c478bd9Sstevel@tonic-gate 		if (mapword != 0) {
1958*7c478bd9Sstevel@tonic-gate 			pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1;
1959*7c478bd9Sstevel@tonic-gate 		} else if (wx > 0) {
1960*7c478bd9Sstevel@tonic-gate 			pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */
1961*7c478bd9Sstevel@tonic-gate 			if (pri < 0)
1962*7c478bd9Sstevel@tonic-gate 				break;
1963*7c478bd9Sstevel@tonic-gate 		} else {
1964*7c478bd9Sstevel@tonic-gate 			pri = -1;
1965*7c478bd9Sstevel@tonic-gate 			break;
1966*7c478bd9Sstevel@tonic-gate 		}
1967*7c478bd9Sstevel@tonic-gate 
1968*7c478bd9Sstevel@tonic-gate 		/*
1969*7c478bd9Sstevel@tonic-gate 		 * Search the queue for unbound, runnable threads.
1970*7c478bd9Sstevel@tonic-gate 		 */
1971*7c478bd9Sstevel@tonic-gate 		dq = &dp->disp_q[pri];
1972*7c478bd9Sstevel@tonic-gate 		tp = dq->dq_first;
1973*7c478bd9Sstevel@tonic-gate 
1974*7c478bd9Sstevel@tonic-gate 		while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) {
1975*7c478bd9Sstevel@tonic-gate 			tp = tp->t_link;
1976*7c478bd9Sstevel@tonic-gate 		}
1977*7c478bd9Sstevel@tonic-gate 
1978*7c478bd9Sstevel@tonic-gate 		/*
1979*7c478bd9Sstevel@tonic-gate 		 * If a thread was found, set the priority and return.
1980*7c478bd9Sstevel@tonic-gate 		 */
1981*7c478bd9Sstevel@tonic-gate 	} while (tp == NULL);
1982*7c478bd9Sstevel@tonic-gate 
1983*7c478bd9Sstevel@tonic-gate 	/*
1984*7c478bd9Sstevel@tonic-gate 	 * pri holds the maximum unbound thread priority or -1.
1985*7c478bd9Sstevel@tonic-gate 	 */
1986*7c478bd9Sstevel@tonic-gate 	if (dp->disp_max_unbound_pri != pri)
1987*7c478bd9Sstevel@tonic-gate 		dp->disp_max_unbound_pri = pri;
1988*7c478bd9Sstevel@tonic-gate }
1989*7c478bd9Sstevel@tonic-gate 
1990*7c478bd9Sstevel@tonic-gate /*
1991*7c478bd9Sstevel@tonic-gate  * disp_adjust_unbound_pri() - thread is becoming unbound, so we should
1992*7c478bd9Sstevel@tonic-gate  * 	check if the CPU to which is was previously bound should have
1993*7c478bd9Sstevel@tonic-gate  * 	its disp_max_unbound_pri increased.
1994*7c478bd9Sstevel@tonic-gate  */
1995*7c478bd9Sstevel@tonic-gate void
1996*7c478bd9Sstevel@tonic-gate disp_adjust_unbound_pri(kthread_t *tp)
1997*7c478bd9Sstevel@tonic-gate {
1998*7c478bd9Sstevel@tonic-gate 	disp_t *dp;
1999*7c478bd9Sstevel@tonic-gate 	pri_t tpri;
2000*7c478bd9Sstevel@tonic-gate 
2001*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
2002*7c478bd9Sstevel@tonic-gate 
2003*7c478bd9Sstevel@tonic-gate 	/*
2004*7c478bd9Sstevel@tonic-gate 	 * Don't do anything if the thread is not bound, or
2005*7c478bd9Sstevel@tonic-gate 	 * currently not runnable or swapped out.
2006*7c478bd9Sstevel@tonic-gate 	 */
2007*7c478bd9Sstevel@tonic-gate 	if (tp->t_bound_cpu == NULL ||
2008*7c478bd9Sstevel@tonic-gate 	    tp->t_state != TS_RUN ||
2009*7c478bd9Sstevel@tonic-gate 	    tp->t_schedflag & TS_ON_SWAPQ)
2010*7c478bd9Sstevel@tonic-gate 		return;
2011*7c478bd9Sstevel@tonic-gate 
2012*7c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
2013*7c478bd9Sstevel@tonic-gate 	dp = tp->t_bound_cpu->cpu_disp;
2014*7c478bd9Sstevel@tonic-gate 	ASSERT(tpri >= 0 && tpri < dp->disp_npri);
2015*7c478bd9Sstevel@tonic-gate 	if (tpri > dp->disp_max_unbound_pri)
2016*7c478bd9Sstevel@tonic-gate 		dp->disp_max_unbound_pri = tpri;
2017*7c478bd9Sstevel@tonic-gate }
2018*7c478bd9Sstevel@tonic-gate 
2019*7c478bd9Sstevel@tonic-gate /*
2020*7c478bd9Sstevel@tonic-gate  * disp_getbest() - de-queue the highest priority unbound runnable thread.
2021*7c478bd9Sstevel@tonic-gate  *	returns with the thread unlocked and onproc
2022*7c478bd9Sstevel@tonic-gate  *	but at splhigh (like disp()).
2023*7c478bd9Sstevel@tonic-gate  *	returns NULL if nothing found.
2024*7c478bd9Sstevel@tonic-gate  *
2025*7c478bd9Sstevel@tonic-gate  *	Passed a pointer to a dispatch queue not associated with this CPU.
2026*7c478bd9Sstevel@tonic-gate  */
2027*7c478bd9Sstevel@tonic-gate static kthread_t *
2028*7c478bd9Sstevel@tonic-gate disp_getbest(disp_t *dp)
2029*7c478bd9Sstevel@tonic-gate {
2030*7c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
2031*7c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
2032*7c478bd9Sstevel@tonic-gate 	pri_t		pri;
2033*7c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
2034*7c478bd9Sstevel@tonic-gate 
2035*7c478bd9Sstevel@tonic-gate 	disp_lock_enter(&dp->disp_lock);
2036*7c478bd9Sstevel@tonic-gate 
2037*7c478bd9Sstevel@tonic-gate 	/*
2038*7c478bd9Sstevel@tonic-gate 	 * If there is nothing to run, or the CPU is in the middle of a
2039*7c478bd9Sstevel@tonic-gate 	 * context switch of the only thread, return NULL.
2040*7c478bd9Sstevel@tonic-gate 	 */
2041*7c478bd9Sstevel@tonic-gate 	pri = dp->disp_max_unbound_pri;
2042*7c478bd9Sstevel@tonic-gate 	if (pri == -1 ||
2043*7c478bd9Sstevel@tonic-gate 		(dp->disp_cpu != NULL &&
2044*7c478bd9Sstevel@tonic-gate 		    (dp->disp_cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL) &&
2045*7c478bd9Sstevel@tonic-gate 		dp->disp_cpu->cpu_disp->disp_nrunnable == 1)) {
2046*7c478bd9Sstevel@tonic-gate 		disp_lock_exit_nopreempt(&dp->disp_lock);
2047*7c478bd9Sstevel@tonic-gate 		return (NULL);
2048*7c478bd9Sstevel@tonic-gate 	}
2049*7c478bd9Sstevel@tonic-gate 
2050*7c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[pri];
2051*7c478bd9Sstevel@tonic-gate 	tp = dq->dq_first;
2052*7c478bd9Sstevel@tonic-gate 
2053*7c478bd9Sstevel@tonic-gate 	/*
2054*7c478bd9Sstevel@tonic-gate 	 * Skip over bound threads.
2055*7c478bd9Sstevel@tonic-gate 	 * Bound threads can be here even though disp_max_unbound_pri
2056*7c478bd9Sstevel@tonic-gate 	 * indicated this level.  Besides, it not always accurate because it
2057*7c478bd9Sstevel@tonic-gate 	 * isn't reduced until another CPU looks for work.
2058*7c478bd9Sstevel@tonic-gate 	 * Note that tp could be NULL right away due to this.
2059*7c478bd9Sstevel@tonic-gate 	 */
2060*7c478bd9Sstevel@tonic-gate 	while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) {
2061*7c478bd9Sstevel@tonic-gate 		tp = tp->t_link;
2062*7c478bd9Sstevel@tonic-gate 	}
2063*7c478bd9Sstevel@tonic-gate 
2064*7c478bd9Sstevel@tonic-gate 	/*
2065*7c478bd9Sstevel@tonic-gate 	 * If there were no unbound threads on this queue, find the queue
2066*7c478bd9Sstevel@tonic-gate 	 * where they are and then return NULL so that other CPUs will be
2067*7c478bd9Sstevel@tonic-gate 	 * considered.
2068*7c478bd9Sstevel@tonic-gate 	 */
2069*7c478bd9Sstevel@tonic-gate 	if (tp == NULL) {
2070*7c478bd9Sstevel@tonic-gate 		disp_fix_unbound_pri(dp, pri);
2071*7c478bd9Sstevel@tonic-gate 		disp_lock_exit_nopreempt(&dp->disp_lock);
2072*7c478bd9Sstevel@tonic-gate 		return (NULL);
2073*7c478bd9Sstevel@tonic-gate 	}
2074*7c478bd9Sstevel@tonic-gate 
2075*7c478bd9Sstevel@tonic-gate 	/*
2076*7c478bd9Sstevel@tonic-gate 	 * Found a runnable, unbound thread, so remove it from queue.
2077*7c478bd9Sstevel@tonic-gate 	 * dispdeq() requires that we have the thread locked, and we do,
2078*7c478bd9Sstevel@tonic-gate 	 * by virtue of holding the dispatch queue lock.  dispdeq() will
2079*7c478bd9Sstevel@tonic-gate 	 * put the thread in transition state, thereby dropping the dispq
2080*7c478bd9Sstevel@tonic-gate 	 * lock.
2081*7c478bd9Sstevel@tonic-gate 	 */
2082*7c478bd9Sstevel@tonic-gate #ifdef DEBUG
2083*7c478bd9Sstevel@tonic-gate 	{
2084*7c478bd9Sstevel@tonic-gate 		int	thread_was_on_queue;
2085*7c478bd9Sstevel@tonic-gate 
2086*7c478bd9Sstevel@tonic-gate 		thread_was_on_queue = dispdeq(tp);	/* drops disp_lock */
2087*7c478bd9Sstevel@tonic-gate 		ASSERT(thread_was_on_queue);
2088*7c478bd9Sstevel@tonic-gate 	}
2089*7c478bd9Sstevel@tonic-gate #else /* DEBUG */
2090*7c478bd9Sstevel@tonic-gate 	(void) dispdeq(tp);			/* drops disp_lock */
2091*7c478bd9Sstevel@tonic-gate #endif /* DEBUG */
2092*7c478bd9Sstevel@tonic-gate 
2093*7c478bd9Sstevel@tonic-gate 	tp->t_schedflag |= TS_DONT_SWAP;
2094*7c478bd9Sstevel@tonic-gate 
2095*7c478bd9Sstevel@tonic-gate 	/*
2096*7c478bd9Sstevel@tonic-gate 	 * Setup thread to run on the current CPU.
2097*7c478bd9Sstevel@tonic-gate 	 */
2098*7c478bd9Sstevel@tonic-gate 	cp = CPU;
2099*7c478bd9Sstevel@tonic-gate 
2100*7c478bd9Sstevel@tonic-gate 	tp->t_disp_queue = cp->cpu_disp;
2101*7c478bd9Sstevel@tonic-gate 
2102*7c478bd9Sstevel@tonic-gate 	cp->cpu_dispthread = tp;		/* protected by spl only */
2103*7c478bd9Sstevel@tonic-gate 	cp->cpu_dispatch_pri = pri;
2104*7c478bd9Sstevel@tonic-gate 	ASSERT(pri == DISP_PRIO(tp));
2105*7c478bd9Sstevel@tonic-gate 
2106*7c478bd9Sstevel@tonic-gate 	thread_onproc(tp, cp);			/* set t_state to TS_ONPROC */
2107*7c478bd9Sstevel@tonic-gate 
2108*7c478bd9Sstevel@tonic-gate 	/*
2109*7c478bd9Sstevel@tonic-gate 	 * Return with spl high so that swtch() won't need to raise it.
2110*7c478bd9Sstevel@tonic-gate 	 * The disp_lock was dropped by dispdeq().
2111*7c478bd9Sstevel@tonic-gate 	 */
2112*7c478bd9Sstevel@tonic-gate 
2113*7c478bd9Sstevel@tonic-gate 	return (tp);
2114*7c478bd9Sstevel@tonic-gate }
2115*7c478bd9Sstevel@tonic-gate 
2116*7c478bd9Sstevel@tonic-gate /*
2117*7c478bd9Sstevel@tonic-gate  * disp_bound_common() - common routine for higher level functions
2118*7c478bd9Sstevel@tonic-gate  *	that check for bound threads under certain conditions.
2119*7c478bd9Sstevel@tonic-gate  *	If 'threadlistsafe' is set then there is no need to acquire
2120*7c478bd9Sstevel@tonic-gate  *	pidlock to stop the thread list from changing (eg, if
2121*7c478bd9Sstevel@tonic-gate  *	disp_bound_* is called with cpus paused).
2122*7c478bd9Sstevel@tonic-gate  */
2123*7c478bd9Sstevel@tonic-gate static int
2124*7c478bd9Sstevel@tonic-gate disp_bound_common(cpu_t *cp, int threadlistsafe, int flag)
2125*7c478bd9Sstevel@tonic-gate {
2126*7c478bd9Sstevel@tonic-gate 	int		found = 0;
2127*7c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
2128*7c478bd9Sstevel@tonic-gate 
2129*7c478bd9Sstevel@tonic-gate 	ASSERT(flag);
2130*7c478bd9Sstevel@tonic-gate 
2131*7c478bd9Sstevel@tonic-gate 	if (!threadlistsafe)
2132*7c478bd9Sstevel@tonic-gate 		mutex_enter(&pidlock);
2133*7c478bd9Sstevel@tonic-gate 	tp = curthread;		/* faster than allthreads */
2134*7c478bd9Sstevel@tonic-gate 	do {
2135*7c478bd9Sstevel@tonic-gate 		if (tp->t_state != TS_FREE) {
2136*7c478bd9Sstevel@tonic-gate 			/*
2137*7c478bd9Sstevel@tonic-gate 			 * If an interrupt thread is busy, but the
2138*7c478bd9Sstevel@tonic-gate 			 * caller doesn't care (i.e. BOUND_INTR is off),
2139*7c478bd9Sstevel@tonic-gate 			 * then just ignore it and continue through.
2140*7c478bd9Sstevel@tonic-gate 			 */
2141*7c478bd9Sstevel@tonic-gate 			if ((tp->t_flag & T_INTR_THREAD) &&
2142*7c478bd9Sstevel@tonic-gate 			    !(flag & BOUND_INTR))
2143*7c478bd9Sstevel@tonic-gate 				continue;
2144*7c478bd9Sstevel@tonic-gate 
2145*7c478bd9Sstevel@tonic-gate 			/*
2146*7c478bd9Sstevel@tonic-gate 			 * Skip the idle thread for the CPU
2147*7c478bd9Sstevel@tonic-gate 			 * we're about to set offline.
2148*7c478bd9Sstevel@tonic-gate 			 */
2149*7c478bd9Sstevel@tonic-gate 			if (tp == cp->cpu_idle_thread)
2150*7c478bd9Sstevel@tonic-gate 				continue;
2151*7c478bd9Sstevel@tonic-gate 
2152*7c478bd9Sstevel@tonic-gate 			/*
2153*7c478bd9Sstevel@tonic-gate 			 * Skip the pause thread for the CPU
2154*7c478bd9Sstevel@tonic-gate 			 * we're about to set offline.
2155*7c478bd9Sstevel@tonic-gate 			 */
2156*7c478bd9Sstevel@tonic-gate 			if (tp == cp->cpu_pause_thread)
2157*7c478bd9Sstevel@tonic-gate 				continue;
2158*7c478bd9Sstevel@tonic-gate 
2159*7c478bd9Sstevel@tonic-gate 			if ((flag & BOUND_CPU) &&
2160*7c478bd9Sstevel@tonic-gate 			    (tp->t_bound_cpu == cp ||
2161*7c478bd9Sstevel@tonic-gate 			    tp->t_bind_cpu == cp->cpu_id ||
2162*7c478bd9Sstevel@tonic-gate 			    tp->t_weakbound_cpu == cp)) {
2163*7c478bd9Sstevel@tonic-gate 				found = 1;
2164*7c478bd9Sstevel@tonic-gate 				break;
2165*7c478bd9Sstevel@tonic-gate 			}
2166*7c478bd9Sstevel@tonic-gate 
2167*7c478bd9Sstevel@tonic-gate 			if ((flag & BOUND_PARTITION) &&
2168*7c478bd9Sstevel@tonic-gate 			    (tp->t_cpupart == cp->cpu_part)) {
2169*7c478bd9Sstevel@tonic-gate 				found = 1;
2170*7c478bd9Sstevel@tonic-gate 				break;
2171*7c478bd9Sstevel@tonic-gate 			}
2172*7c478bd9Sstevel@tonic-gate 		}
2173*7c478bd9Sstevel@tonic-gate 	} while ((tp = tp->t_next) != curthread && found == 0);
2174*7c478bd9Sstevel@tonic-gate 	if (!threadlistsafe)
2175*7c478bd9Sstevel@tonic-gate 		mutex_exit(&pidlock);
2176*7c478bd9Sstevel@tonic-gate 	return (found);
2177*7c478bd9Sstevel@tonic-gate }
2178*7c478bd9Sstevel@tonic-gate 
2179*7c478bd9Sstevel@tonic-gate /*
2180*7c478bd9Sstevel@tonic-gate  * disp_bound_threads - return nonzero if threads are bound to the processor.
2181*7c478bd9Sstevel@tonic-gate  *	Called infrequently.  Keep this simple.
2182*7c478bd9Sstevel@tonic-gate  *	Includes threads that are asleep or stopped but not onproc.
2183*7c478bd9Sstevel@tonic-gate  */
2184*7c478bd9Sstevel@tonic-gate int
2185*7c478bd9Sstevel@tonic-gate disp_bound_threads(cpu_t *cp, int threadlistsafe)
2186*7c478bd9Sstevel@tonic-gate {
2187*7c478bd9Sstevel@tonic-gate 	return (disp_bound_common(cp, threadlistsafe, BOUND_CPU));
2188*7c478bd9Sstevel@tonic-gate }
2189*7c478bd9Sstevel@tonic-gate 
2190*7c478bd9Sstevel@tonic-gate /*
2191*7c478bd9Sstevel@tonic-gate  * disp_bound_anythreads - return nonzero if _any_ threads are bound
2192*7c478bd9Sstevel@tonic-gate  * to the given processor, including interrupt threads.
2193*7c478bd9Sstevel@tonic-gate  */
2194*7c478bd9Sstevel@tonic-gate int
2195*7c478bd9Sstevel@tonic-gate disp_bound_anythreads(cpu_t *cp, int threadlistsafe)
2196*7c478bd9Sstevel@tonic-gate {
2197*7c478bd9Sstevel@tonic-gate 	return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR));
2198*7c478bd9Sstevel@tonic-gate }
2199*7c478bd9Sstevel@tonic-gate 
2200*7c478bd9Sstevel@tonic-gate /*
2201*7c478bd9Sstevel@tonic-gate  * disp_bound_partition - return nonzero if threads are bound to the same
2202*7c478bd9Sstevel@tonic-gate  * partition as the processor.
2203*7c478bd9Sstevel@tonic-gate  *	Called infrequently.  Keep this simple.
2204*7c478bd9Sstevel@tonic-gate  *	Includes threads that are asleep or stopped but not onproc.
2205*7c478bd9Sstevel@tonic-gate  */
2206*7c478bd9Sstevel@tonic-gate int
2207*7c478bd9Sstevel@tonic-gate disp_bound_partition(cpu_t *cp, int threadlistsafe)
2208*7c478bd9Sstevel@tonic-gate {
2209*7c478bd9Sstevel@tonic-gate 	return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION));
2210*7c478bd9Sstevel@tonic-gate }
2211*7c478bd9Sstevel@tonic-gate 
2212*7c478bd9Sstevel@tonic-gate /*
2213*7c478bd9Sstevel@tonic-gate  * disp_cpu_inactive - make a CPU inactive by moving all of its unbound
2214*7c478bd9Sstevel@tonic-gate  * threads to other CPUs.
2215*7c478bd9Sstevel@tonic-gate  */
2216*7c478bd9Sstevel@tonic-gate void
2217*7c478bd9Sstevel@tonic-gate disp_cpu_inactive(cpu_t *cp)
2218*7c478bd9Sstevel@tonic-gate {
2219*7c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
2220*7c478bd9Sstevel@tonic-gate 	disp_t		*dp = cp->cpu_disp;
2221*7c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
2222*7c478bd9Sstevel@tonic-gate 	pri_t		pri;
2223*7c478bd9Sstevel@tonic-gate 	int		wasonq;
2224*7c478bd9Sstevel@tonic-gate 
2225*7c478bd9Sstevel@tonic-gate 	disp_lock_enter(&dp->disp_lock);
2226*7c478bd9Sstevel@tonic-gate 	while ((pri = dp->disp_max_unbound_pri) != -1) {
2227*7c478bd9Sstevel@tonic-gate 		dq = &dp->disp_q[pri];
2228*7c478bd9Sstevel@tonic-gate 		tp = dq->dq_first;
2229*7c478bd9Sstevel@tonic-gate 
2230*7c478bd9Sstevel@tonic-gate 		/*
2231*7c478bd9Sstevel@tonic-gate 		 * Skip over bound threads.
2232*7c478bd9Sstevel@tonic-gate 		 */
2233*7c478bd9Sstevel@tonic-gate 		while (tp != NULL && tp->t_bound_cpu != NULL) {
2234*7c478bd9Sstevel@tonic-gate 			tp = tp->t_link;
2235*7c478bd9Sstevel@tonic-gate 		}
2236*7c478bd9Sstevel@tonic-gate 
2237*7c478bd9Sstevel@tonic-gate 		if (tp == NULL) {
2238*7c478bd9Sstevel@tonic-gate 			/* disp_max_unbound_pri must be inaccurate, so fix it */
2239*7c478bd9Sstevel@tonic-gate 			disp_fix_unbound_pri(dp, pri);
2240*7c478bd9Sstevel@tonic-gate 			continue;
2241*7c478bd9Sstevel@tonic-gate 		}
2242*7c478bd9Sstevel@tonic-gate 
2243*7c478bd9Sstevel@tonic-gate 		wasonq = dispdeq(tp);		/* drops disp_lock */
2244*7c478bd9Sstevel@tonic-gate 		ASSERT(wasonq);
2245*7c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_weakbound_cpu == NULL);
2246*7c478bd9Sstevel@tonic-gate 
2247*7c478bd9Sstevel@tonic-gate 		setbackdq(tp);
2248*7c478bd9Sstevel@tonic-gate 		/*
2249*7c478bd9Sstevel@tonic-gate 		 * Called from cpu_offline:
2250*7c478bd9Sstevel@tonic-gate 		 *
2251*7c478bd9Sstevel@tonic-gate 		 * cp has already been removed from the list of active cpus
2252*7c478bd9Sstevel@tonic-gate 		 * and tp->t_cpu has been changed so there is no risk of
2253*7c478bd9Sstevel@tonic-gate 		 * tp ending up back on cp.
2254*7c478bd9Sstevel@tonic-gate 		 *
2255*7c478bd9Sstevel@tonic-gate 		 * Called from cpupart_move_cpu:
2256*7c478bd9Sstevel@tonic-gate 		 *
2257*7c478bd9Sstevel@tonic-gate 		 * The cpu has moved to a new cpupart.  Any threads that
2258*7c478bd9Sstevel@tonic-gate 		 * were on it's dispatch queues before the move remain
2259*7c478bd9Sstevel@tonic-gate 		 * in the old partition and can't run in the new partition.
2260*7c478bd9Sstevel@tonic-gate 		 */
2261*7c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_cpu != cp);
2262*7c478bd9Sstevel@tonic-gate 		thread_unlock(tp);
2263*7c478bd9Sstevel@tonic-gate 
2264*7c478bd9Sstevel@tonic-gate 		disp_lock_enter(&dp->disp_lock);
2265*7c478bd9Sstevel@tonic-gate 	}
2266*7c478bd9Sstevel@tonic-gate 	disp_lock_exit(&dp->disp_lock);
2267*7c478bd9Sstevel@tonic-gate }
2268*7c478bd9Sstevel@tonic-gate 
2269*7c478bd9Sstevel@tonic-gate /*
2270*7c478bd9Sstevel@tonic-gate  * disp_lowpri_cpu - find CPU running the lowest priority thread.
2271*7c478bd9Sstevel@tonic-gate  *	The hint passed in is used as a starting point so we don't favor
2272*7c478bd9Sstevel@tonic-gate  *	CPU 0 or any other CPU.  The caller should pass in the most recently
2273*7c478bd9Sstevel@tonic-gate  *	used CPU for the thread.
2274*7c478bd9Sstevel@tonic-gate  *
2275*7c478bd9Sstevel@tonic-gate  *	The lgroup and priority are used to determine the best CPU to run on
2276*7c478bd9Sstevel@tonic-gate  *	in a NUMA machine.  The lgroup specifies which CPUs are closest while
2277*7c478bd9Sstevel@tonic-gate  *	the thread priority will indicate whether the thread will actually run
2278*7c478bd9Sstevel@tonic-gate  *	there.  To pick the best CPU, the CPUs inside and outside of the given
2279*7c478bd9Sstevel@tonic-gate  *	lgroup which are running the lowest priority threads are found.  The
2280*7c478bd9Sstevel@tonic-gate  *	remote CPU is chosen only if the thread will not run locally on a CPU
2281*7c478bd9Sstevel@tonic-gate  *	within the lgroup, but will run on the remote CPU. If the thread
2282*7c478bd9Sstevel@tonic-gate  *	cannot immediately run on any CPU, the best local CPU will be chosen.
2283*7c478bd9Sstevel@tonic-gate  *
2284*7c478bd9Sstevel@tonic-gate  *	The lpl specified also identifies the cpu partition from which
2285*7c478bd9Sstevel@tonic-gate  *	disp_lowpri_cpu should select a CPU.
2286*7c478bd9Sstevel@tonic-gate  *
2287*7c478bd9Sstevel@tonic-gate  *	curcpu is used to indicate that disp_lowpri_cpu is being called on
2288*7c478bd9Sstevel@tonic-gate  *      behalf of the current thread. (curthread is looking for a new cpu)
2289*7c478bd9Sstevel@tonic-gate  *      In this case, cpu_dispatch_pri for this thread's cpu should be
2290*7c478bd9Sstevel@tonic-gate  *      ignored.
2291*7c478bd9Sstevel@tonic-gate  *
2292*7c478bd9Sstevel@tonic-gate  *      If a cpu is the target of an offline request then try to avoid it.
2293*7c478bd9Sstevel@tonic-gate  *
2294*7c478bd9Sstevel@tonic-gate  *	This function must be called at either high SPL, or with preemption
2295*7c478bd9Sstevel@tonic-gate  *	disabled, so that the "hint" CPU cannot be removed from the online
2296*7c478bd9Sstevel@tonic-gate  *	CPU list while we are traversing it.
2297*7c478bd9Sstevel@tonic-gate  */
2298*7c478bd9Sstevel@tonic-gate cpu_t *
2299*7c478bd9Sstevel@tonic-gate disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu)
2300*7c478bd9Sstevel@tonic-gate {
2301*7c478bd9Sstevel@tonic-gate 	cpu_t	*bestcpu;
2302*7c478bd9Sstevel@tonic-gate 	cpu_t	*besthomecpu;
2303*7c478bd9Sstevel@tonic-gate 	cpu_t   *cp, *cpstart;
2304*7c478bd9Sstevel@tonic-gate 
2305*7c478bd9Sstevel@tonic-gate 	pri_t   bestpri;
2306*7c478bd9Sstevel@tonic-gate 	pri_t   cpupri;
2307*7c478bd9Sstevel@tonic-gate 
2308*7c478bd9Sstevel@tonic-gate 	klgrpset_t	done;
2309*7c478bd9Sstevel@tonic-gate 	klgrpset_t	cur_set;
2310*7c478bd9Sstevel@tonic-gate 
2311*7c478bd9Sstevel@tonic-gate 	lpl_t		*lpl_iter, *lpl_leaf;
2312*7c478bd9Sstevel@tonic-gate 	int		i;
2313*7c478bd9Sstevel@tonic-gate 
2314*7c478bd9Sstevel@tonic-gate 	/*
2315*7c478bd9Sstevel@tonic-gate 	 * Scan for a CPU currently running the lowest priority thread.
2316*7c478bd9Sstevel@tonic-gate 	 * Cannot get cpu_lock here because it is adaptive.
2317*7c478bd9Sstevel@tonic-gate 	 * We do not require lock on CPU list.
2318*7c478bd9Sstevel@tonic-gate 	 */
2319*7c478bd9Sstevel@tonic-gate 	ASSERT(hint != NULL);
2320*7c478bd9Sstevel@tonic-gate 	ASSERT(lpl != NULL);
2321*7c478bd9Sstevel@tonic-gate 	ASSERT(lpl->lpl_ncpu > 0);
2322*7c478bd9Sstevel@tonic-gate 
2323*7c478bd9Sstevel@tonic-gate 	/*
2324*7c478bd9Sstevel@tonic-gate 	 * First examine local CPUs. Note that it's possible the hint CPU
2325*7c478bd9Sstevel@tonic-gate 	 * passed in in remote to the specified home lgroup. If our priority
2326*7c478bd9Sstevel@tonic-gate 	 * isn't sufficient enough such that we can run immediately at home,
2327*7c478bd9Sstevel@tonic-gate 	 * then examine CPUs remote to our home lgroup.
2328*7c478bd9Sstevel@tonic-gate 	 * We would like to give preference to CPUs closest to "home".
2329*7c478bd9Sstevel@tonic-gate 	 * If we can't find a CPU where we'll run at a given level
2330*7c478bd9Sstevel@tonic-gate 	 * of locality, we expand our search to include the next level.
2331*7c478bd9Sstevel@tonic-gate 	 */
2332*7c478bd9Sstevel@tonic-gate 	bestcpu = besthomecpu = NULL;
2333*7c478bd9Sstevel@tonic-gate 	klgrpset_clear(done);
2334*7c478bd9Sstevel@tonic-gate 	/* start with lpl we were passed */
2335*7c478bd9Sstevel@tonic-gate 
2336*7c478bd9Sstevel@tonic-gate 	lpl_iter = lpl;
2337*7c478bd9Sstevel@tonic-gate 
2338*7c478bd9Sstevel@tonic-gate 	do {
2339*7c478bd9Sstevel@tonic-gate 
2340*7c478bd9Sstevel@tonic-gate 		bestpri = SHRT_MAX;
2341*7c478bd9Sstevel@tonic-gate 		klgrpset_clear(cur_set);
2342*7c478bd9Sstevel@tonic-gate 
2343*7c478bd9Sstevel@tonic-gate 		for (i = 0; i < lpl_iter->lpl_nrset; i++) {
2344*7c478bd9Sstevel@tonic-gate 			lpl_leaf = lpl_iter->lpl_rset[i];
2345*7c478bd9Sstevel@tonic-gate 			if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid))
2346*7c478bd9Sstevel@tonic-gate 				continue;
2347*7c478bd9Sstevel@tonic-gate 
2348*7c478bd9Sstevel@tonic-gate 			klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid);
2349*7c478bd9Sstevel@tonic-gate 
2350*7c478bd9Sstevel@tonic-gate 			if (hint->cpu_lpl == lpl_leaf)
2351*7c478bd9Sstevel@tonic-gate 				cp = cpstart = hint;
2352*7c478bd9Sstevel@tonic-gate 			else
2353*7c478bd9Sstevel@tonic-gate 				cp = cpstart = lpl_leaf->lpl_cpus;
2354*7c478bd9Sstevel@tonic-gate 
2355*7c478bd9Sstevel@tonic-gate 			do {
2356*7c478bd9Sstevel@tonic-gate 
2357*7c478bd9Sstevel@tonic-gate 				if (cp == curcpu)
2358*7c478bd9Sstevel@tonic-gate 					cpupri = -1;
2359*7c478bd9Sstevel@tonic-gate 				else if (cp == cpu_inmotion)
2360*7c478bd9Sstevel@tonic-gate 					cpupri = SHRT_MAX;
2361*7c478bd9Sstevel@tonic-gate 				else
2362*7c478bd9Sstevel@tonic-gate 					cpupri = cp->cpu_dispatch_pri;
2363*7c478bd9Sstevel@tonic-gate 
2364*7c478bd9Sstevel@tonic-gate 				if (cp->cpu_disp->disp_maxrunpri > cpupri)
2365*7c478bd9Sstevel@tonic-gate 					cpupri = cp->cpu_disp->disp_maxrunpri;
2366*7c478bd9Sstevel@tonic-gate 				if (cp->cpu_chosen_level > cpupri)
2367*7c478bd9Sstevel@tonic-gate 					cpupri = cp->cpu_chosen_level;
2368*7c478bd9Sstevel@tonic-gate 				if (cpupri < bestpri) {
2369*7c478bd9Sstevel@tonic-gate 					if (CPU_IDLING(cpupri)) {
2370*7c478bd9Sstevel@tonic-gate 						ASSERT((cp->cpu_flags &
2371*7c478bd9Sstevel@tonic-gate 						    CPU_QUIESCED) == 0);
2372*7c478bd9Sstevel@tonic-gate 						return (cp);
2373*7c478bd9Sstevel@tonic-gate 					}
2374*7c478bd9Sstevel@tonic-gate 					bestcpu = cp;
2375*7c478bd9Sstevel@tonic-gate 					bestpri = cpupri;
2376*7c478bd9Sstevel@tonic-gate 				}
2377*7c478bd9Sstevel@tonic-gate 			} while ((cp = cp->cpu_next_lpl) != cpstart);
2378*7c478bd9Sstevel@tonic-gate 		}
2379*7c478bd9Sstevel@tonic-gate 
2380*7c478bd9Sstevel@tonic-gate 		if (bestcpu && (tpri > bestpri)) {
2381*7c478bd9Sstevel@tonic-gate 			ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0);
2382*7c478bd9Sstevel@tonic-gate 			return (bestcpu);
2383*7c478bd9Sstevel@tonic-gate 		}
2384*7c478bd9Sstevel@tonic-gate 		if (besthomecpu == NULL)
2385*7c478bd9Sstevel@tonic-gate 			besthomecpu = bestcpu;
2386*7c478bd9Sstevel@tonic-gate 		/*
2387*7c478bd9Sstevel@tonic-gate 		 * Add the lgrps we just considered to the "done" set
2388*7c478bd9Sstevel@tonic-gate 		 */
2389*7c478bd9Sstevel@tonic-gate 		klgrpset_or(done, cur_set);
2390*7c478bd9Sstevel@tonic-gate 
2391*7c478bd9Sstevel@tonic-gate 	} while ((lpl_iter = lpl_iter->lpl_parent) != NULL);
2392*7c478bd9Sstevel@tonic-gate 
2393*7c478bd9Sstevel@tonic-gate 	/*
2394*7c478bd9Sstevel@tonic-gate 	 * The specified priority isn't high enough to run immediately
2395*7c478bd9Sstevel@tonic-gate 	 * anywhere, so just return the best CPU from the home lgroup.
2396*7c478bd9Sstevel@tonic-gate 	 */
2397*7c478bd9Sstevel@tonic-gate 	ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0);
2398*7c478bd9Sstevel@tonic-gate 	return (besthomecpu);
2399*7c478bd9Sstevel@tonic-gate }
2400*7c478bd9Sstevel@tonic-gate 
2401*7c478bd9Sstevel@tonic-gate /*
2402*7c478bd9Sstevel@tonic-gate  * This routine provides the generic idle cpu function for all processors.
2403*7c478bd9Sstevel@tonic-gate  * If a processor has some specific code to execute when idle (say, to stop
2404*7c478bd9Sstevel@tonic-gate  * the pipeline and save power) then that routine should be defined in the
2405*7c478bd9Sstevel@tonic-gate  * processors specific code (module_xx.c) and the global variable idle_cpu
2406*7c478bd9Sstevel@tonic-gate  * set to that function.
2407*7c478bd9Sstevel@tonic-gate  */
2408*7c478bd9Sstevel@tonic-gate static void
2409*7c478bd9Sstevel@tonic-gate generic_idle_cpu(void)
2410*7c478bd9Sstevel@tonic-gate {
2411*7c478bd9Sstevel@tonic-gate }
2412*7c478bd9Sstevel@tonic-gate 
2413*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
2414*7c478bd9Sstevel@tonic-gate static void
2415*7c478bd9Sstevel@tonic-gate generic_enq_thread(cpu_t *cpu, int bound)
2416*7c478bd9Sstevel@tonic-gate {
2417*7c478bd9Sstevel@tonic-gate }
2418*7c478bd9Sstevel@tonic-gate 
2419*7c478bd9Sstevel@tonic-gate /*
2420*7c478bd9Sstevel@tonic-gate  * Select a CPU for this thread to run on.  Choose t->t_cpu unless:
2421*7c478bd9Sstevel@tonic-gate  *	- t->t_cpu is not in this thread's assigned lgrp
2422*7c478bd9Sstevel@tonic-gate  *	- the time since the thread last came off t->t_cpu exceeds the
2423*7c478bd9Sstevel@tonic-gate  *	  rechoose time for this cpu (ignore this if t is curthread in
2424*7c478bd9Sstevel@tonic-gate  *	  which case it's on CPU and t->t_disp_time is inaccurate)
2425*7c478bd9Sstevel@tonic-gate  *	- t->t_cpu is presently the target of an offline or partition move
2426*7c478bd9Sstevel@tonic-gate  *	  request
2427*7c478bd9Sstevel@tonic-gate  */
2428*7c478bd9Sstevel@tonic-gate static cpu_t *
2429*7c478bd9Sstevel@tonic-gate cpu_choose(kthread_t *t, pri_t tpri)
2430*7c478bd9Sstevel@tonic-gate {
2431*7c478bd9Sstevel@tonic-gate 	ASSERT(tpri < kpqpri);
2432*7c478bd9Sstevel@tonic-gate 
2433*7c478bd9Sstevel@tonic-gate 	if ((((lbolt - t->t_disp_time) > t->t_cpu->cpu_rechoose) &&
2434*7c478bd9Sstevel@tonic-gate 	    t != curthread) || t->t_cpu == cpu_inmotion) {
2435*7c478bd9Sstevel@tonic-gate 		return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, NULL));
2436*7c478bd9Sstevel@tonic-gate 	}
2437*7c478bd9Sstevel@tonic-gate 
2438*7c478bd9Sstevel@tonic-gate 	/*
2439*7c478bd9Sstevel@tonic-gate 	 * Take a trip through disp_lowpri_cpu() if the thread was
2440*7c478bd9Sstevel@tonic-gate 	 * running outside it's home lgroup
2441*7c478bd9Sstevel@tonic-gate 	 */
2442*7c478bd9Sstevel@tonic-gate 	if (!klgrpset_ismember(t->t_lpl->lpl_lgrp->lgrp_set[LGRP_RSRC_CPU],
2443*7c478bd9Sstevel@tonic-gate 	    t->t_cpu->cpu_lpl->lpl_lgrpid)) {
2444*7c478bd9Sstevel@tonic-gate 		return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri,
2445*7c478bd9Sstevel@tonic-gate 		    (t == curthread) ? t->t_cpu : NULL));
2446*7c478bd9Sstevel@tonic-gate 	}
2447*7c478bd9Sstevel@tonic-gate 	return (t->t_cpu);
2448*7c478bd9Sstevel@tonic-gate }
2449