xref: /titanic_51/usr/src/uts/common/os/waitq.c (revision c97ad5cdc75eb73e3cc38542ca3ba783574b0a7a)
1*c97ad5cdSakolb /*
2*c97ad5cdSakolb  * CDDL HEADER START
3*c97ad5cdSakolb  *
4*c97ad5cdSakolb  * The contents of this file are subject to the terms of the
5*c97ad5cdSakolb  * Common Development and Distribution License (the "License").
6*c97ad5cdSakolb  * You may not use this file except in compliance with the License.
7*c97ad5cdSakolb  *
8*c97ad5cdSakolb  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*c97ad5cdSakolb  * or http://www.opensolaris.org/os/licensing.
10*c97ad5cdSakolb  * See the License for the specific language governing permissions
11*c97ad5cdSakolb  * and limitations under the License.
12*c97ad5cdSakolb  *
13*c97ad5cdSakolb  * When distributing Covered Code, include this CDDL HEADER in each
14*c97ad5cdSakolb  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*c97ad5cdSakolb  * If applicable, add the following below this CDDL HEADER, with the
16*c97ad5cdSakolb  * fields enclosed by brackets "[]" replaced with your own identifying
17*c97ad5cdSakolb  * information: Portions Copyright [yyyy] [name of copyright owner]
18*c97ad5cdSakolb  *
19*c97ad5cdSakolb  * CDDL HEADER END
20*c97ad5cdSakolb  */
21*c97ad5cdSakolb /*
22*c97ad5cdSakolb  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23*c97ad5cdSakolb  * Use is subject to license terms.
24*c97ad5cdSakolb  */
25*c97ad5cdSakolb 
26*c97ad5cdSakolb #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*c97ad5cdSakolb 
28*c97ad5cdSakolb #include <sys/param.h>
29*c97ad5cdSakolb #include <sys/systm.h>
30*c97ad5cdSakolb #include <sys/thread.h>
31*c97ad5cdSakolb #include <sys/class.h>
32*c97ad5cdSakolb #include <sys/debug.h>
33*c97ad5cdSakolb #include <sys/cpuvar.h>
34*c97ad5cdSakolb #include <sys/waitq.h>
35*c97ad5cdSakolb #include <sys/cmn_err.h>
36*c97ad5cdSakolb #include <sys/time.h>
37*c97ad5cdSakolb #include <sys/dtrace.h>
38*c97ad5cdSakolb #include <sys/sdt.h>
39*c97ad5cdSakolb #include <sys/zone.h>
40*c97ad5cdSakolb 
41*c97ad5cdSakolb /*
42*c97ad5cdSakolb  * Wait queue implementation.
43*c97ad5cdSakolb  */
44*c97ad5cdSakolb 
45*c97ad5cdSakolb void
46*c97ad5cdSakolb waitq_init(waitq_t *wq)
47*c97ad5cdSakolb {
48*c97ad5cdSakolb 	DISP_LOCK_INIT(&wq->wq_lock);
49*c97ad5cdSakolb 	wq->wq_first = NULL;
50*c97ad5cdSakolb 	wq->wq_count = 0;
51*c97ad5cdSakolb 	wq->wq_blocked = B_TRUE;
52*c97ad5cdSakolb }
53*c97ad5cdSakolb 
54*c97ad5cdSakolb void
55*c97ad5cdSakolb waitq_fini(waitq_t *wq)
56*c97ad5cdSakolb {
57*c97ad5cdSakolb 	ASSERT(wq->wq_count == 0);
58*c97ad5cdSakolb 	ASSERT(wq->wq_first == NULL);
59*c97ad5cdSakolb 	ASSERT(wq->wq_blocked == B_TRUE);
60*c97ad5cdSakolb 	ASSERT(!DISP_LOCK_HELD(&wq->wq_lock));
61*c97ad5cdSakolb 
62*c97ad5cdSakolb 	DISP_LOCK_DESTROY(&wq->wq_lock);
63*c97ad5cdSakolb }
64*c97ad5cdSakolb 
65*c97ad5cdSakolb /*
66*c97ad5cdSakolb  * Operations on waitq_t structures.
67*c97ad5cdSakolb  *
68*c97ad5cdSakolb  * A wait queue is a singly linked NULL-terminated list with doubly
69*c97ad5cdSakolb  * linked circular sublists.  The singly linked list is in descending
70*c97ad5cdSakolb  * priority order and FIFO for threads of the same priority.  It links
71*c97ad5cdSakolb  * through the t_link field of the thread structure.  The doubly linked
72*c97ad5cdSakolb  * sublists link threads of the same priority.  They use the t_priforw
73*c97ad5cdSakolb  * and t_priback fields of the thread structure.
74*c97ad5cdSakolb  *
75*c97ad5cdSakolb  * Graphically (with priorities in parens):
76*c97ad5cdSakolb  *
77*c97ad5cdSakolb  *         ________________           _______                   _______
78*c97ad5cdSakolb  *        /                \         /       \                 /       \
79*c97ad5cdSakolb  *        |                |         |       |                 |       |
80*c97ad5cdSakolb  *        v                v         v       v                 v       v
81*c97ad5cdSakolb  *     t1(60)-->t2(60)-->t3(60)-->t4(50)-->t5(50)-->t6(30)-->t7(0)-->t8(0)
82*c97ad5cdSakolb  *        ^      ^  ^      ^         ^       ^       ^  ^      ^       ^
83*c97ad5cdSakolb  *        |      |  |      |         |       |       |  |      |       |
84*c97ad5cdSakolb  *        \______/  \______/         \_______/       \__/      \_______/
85*c97ad5cdSakolb  *
86*c97ad5cdSakolb  * There are three interesting operations on a waitq list: inserting
87*c97ad5cdSakolb  * a thread into the proper position according to priority; removing a
88*c97ad5cdSakolb  * thread given a pointer to it; and walking the list, possibly
89*c97ad5cdSakolb  * removing threads along the way.  This design allows all three
90*c97ad5cdSakolb  * operations to be performed efficiently and easily.
91*c97ad5cdSakolb  *
92*c97ad5cdSakolb  * To insert a thread, traverse the list looking for the sublist of
93*c97ad5cdSakolb  * the same priority as the thread (or one of a lower priority,
94*c97ad5cdSakolb  * meaning there are no other threads in the list of the same
95*c97ad5cdSakolb  * priority).  This can be done without touching all threads in the
96*c97ad5cdSakolb  * list by following the links between the first threads in each
97*c97ad5cdSakolb  * sublist.  Given a thread t that is the head of a sublist (the first
98*c97ad5cdSakolb  * thread of that priority found when following the t_link pointers),
99*c97ad5cdSakolb  * t->t_priback->t_link points to the head of the next sublist.  It's
100*c97ad5cdSakolb  * important to do this since a waitq may contain thousands of
101*c97ad5cdSakolb  * threads.
102*c97ad5cdSakolb  *
103*c97ad5cdSakolb  * Removing a thread from the list is also efficient.  First, the
104*c97ad5cdSakolb  * t_waitq field contains a pointer to the waitq on which a thread
105*c97ad5cdSakolb  * is waiting (or NULL if it's not on a waitq).  This is used to
106*c97ad5cdSakolb  * determine if the given thread is on the given waitq without
107*c97ad5cdSakolb  * searching the list.  Assuming it is, if it's not the head of a
108*c97ad5cdSakolb  * sublist, just remove it from the sublist and use the t_priback
109*c97ad5cdSakolb  * pointer to find the thread that points to it with t_link.  If it is
110*c97ad5cdSakolb  * the head of a sublist, search for it by walking the sublist heads,
111*c97ad5cdSakolb  * similar to searching for a given priority level when inserting a
112*c97ad5cdSakolb  * thread.
113*c97ad5cdSakolb  *
114*c97ad5cdSakolb  * To walk the list, simply follow the t_link pointers.  Removing
115*c97ad5cdSakolb  * threads along the way can be done easily if the code maintains a
116*c97ad5cdSakolb  * pointer to the t_link field that pointed to the thread being
117*c97ad5cdSakolb  * removed.
118*c97ad5cdSakolb  */
119*c97ad5cdSakolb 
120*c97ad5cdSakolb static void
121*c97ad5cdSakolb waitq_link(waitq_t *wq, kthread_t *t)
122*c97ad5cdSakolb {
123*c97ad5cdSakolb 	kthread_t *next_tp;
124*c97ad5cdSakolb 	kthread_t *last_tp;
125*c97ad5cdSakolb 	kthread_t **tpp;
126*c97ad5cdSakolb 	pri_t tpri, next_pri, last_pri = -1;
127*c97ad5cdSakolb 
128*c97ad5cdSakolb 	ASSERT(DISP_LOCK_HELD(&wq->wq_lock));
129*c97ad5cdSakolb 
130*c97ad5cdSakolb 	tpri = DISP_PRIO(t);
131*c97ad5cdSakolb 	tpp = &wq->wq_first;
132*c97ad5cdSakolb 	while ((next_tp = *tpp) != NULL) {
133*c97ad5cdSakolb 		next_pri = DISP_PRIO(next_tp);
134*c97ad5cdSakolb 		if (tpri > next_pri)
135*c97ad5cdSakolb 			break;
136*c97ad5cdSakolb 		last_tp = next_tp->t_priback;
137*c97ad5cdSakolb 		last_pri = next_pri;
138*c97ad5cdSakolb 		tpp = &last_tp->t_link;
139*c97ad5cdSakolb 	}
140*c97ad5cdSakolb 	*tpp = t;
141*c97ad5cdSakolb 	t->t_link = next_tp;
142*c97ad5cdSakolb 	if (last_pri == tpri) {
143*c97ad5cdSakolb 		/* last_tp points to the last thread of this priority */
144*c97ad5cdSakolb 		t->t_priback = last_tp;
145*c97ad5cdSakolb 		t->t_priforw = last_tp->t_priforw;
146*c97ad5cdSakolb 		last_tp->t_priforw->t_priback = t;
147*c97ad5cdSakolb 		last_tp->t_priforw = t;
148*c97ad5cdSakolb 	} else {
149*c97ad5cdSakolb 		t->t_priback = t->t_priforw = t;
150*c97ad5cdSakolb 	}
151*c97ad5cdSakolb 	wq->wq_count++;
152*c97ad5cdSakolb 	t->t_waitq = wq;
153*c97ad5cdSakolb }
154*c97ad5cdSakolb 
155*c97ad5cdSakolb static void
156*c97ad5cdSakolb waitq_unlink(waitq_t *wq, kthread_t *t)
157*c97ad5cdSakolb {
158*c97ad5cdSakolb 	kthread_t *nt;
159*c97ad5cdSakolb 	kthread_t **ptl;
160*c97ad5cdSakolb 
161*c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
162*c97ad5cdSakolb 	ASSERT(DISP_LOCK_HELD(&wq->wq_lock));
163*c97ad5cdSakolb 	ASSERT(t->t_waitq == wq);
164*c97ad5cdSakolb 
165*c97ad5cdSakolb 	ptl = &t->t_priback->t_link;
166*c97ad5cdSakolb 	/*
167*c97ad5cdSakolb 	 * Is it the head of a priority sublist?  If so, need to walk
168*c97ad5cdSakolb 	 * the priorities to find the t_link pointer that points to it.
169*c97ad5cdSakolb 	 */
170*c97ad5cdSakolb 	if (*ptl != t) {
171*c97ad5cdSakolb 		/*
172*c97ad5cdSakolb 		 * Find the right priority level.
173*c97ad5cdSakolb 		 */
174*c97ad5cdSakolb 		ptl = &t->t_waitq->wq_first;
175*c97ad5cdSakolb 		while ((nt = *ptl) != t)
176*c97ad5cdSakolb 			ptl = &nt->t_priback->t_link;
177*c97ad5cdSakolb 	}
178*c97ad5cdSakolb 	/*
179*c97ad5cdSakolb 	 * Remove thread from the t_link list.
180*c97ad5cdSakolb 	 */
181*c97ad5cdSakolb 	*ptl = t->t_link;
182*c97ad5cdSakolb 
183*c97ad5cdSakolb 	/*
184*c97ad5cdSakolb 	 * Take it off the priority sublist if there's more than one
185*c97ad5cdSakolb 	 * thread there.
186*c97ad5cdSakolb 	 */
187*c97ad5cdSakolb 	if (t->t_priforw != t) {
188*c97ad5cdSakolb 		t->t_priback->t_priforw = t->t_priforw;
189*c97ad5cdSakolb 		t->t_priforw->t_priback = t->t_priback;
190*c97ad5cdSakolb 	}
191*c97ad5cdSakolb 	t->t_link = NULL;
192*c97ad5cdSakolb 
193*c97ad5cdSakolb 	wq->wq_count--;
194*c97ad5cdSakolb 	t->t_waitq = NULL;
195*c97ad5cdSakolb 	t->t_priforw = NULL;
196*c97ad5cdSakolb 	t->t_priback = NULL;
197*c97ad5cdSakolb }
198*c97ad5cdSakolb 
199*c97ad5cdSakolb /*
200*c97ad5cdSakolb  * Put specified thread to specified wait queue without dropping thread's lock.
201*c97ad5cdSakolb  * Returns 1 if thread was successfully placed on project's wait queue, or
202*c97ad5cdSakolb  * 0 if wait queue is blocked.
203*c97ad5cdSakolb  */
204*c97ad5cdSakolb int
205*c97ad5cdSakolb waitq_enqueue(waitq_t *wq, kthread_t *t)
206*c97ad5cdSakolb {
207*c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
208*c97ad5cdSakolb 	ASSERT(t->t_sleepq == NULL);
209*c97ad5cdSakolb 	ASSERT(t->t_waitq == NULL);
210*c97ad5cdSakolb 	ASSERT(t->t_link == NULL);
211*c97ad5cdSakolb 
212*c97ad5cdSakolb 	disp_lock_enter_high(&wq->wq_lock);
213*c97ad5cdSakolb 
214*c97ad5cdSakolb 	/*
215*c97ad5cdSakolb 	 * Can't enqueue anything on a blocked wait queue
216*c97ad5cdSakolb 	 */
217*c97ad5cdSakolb 	if (wq->wq_blocked) {
218*c97ad5cdSakolb 		disp_lock_exit_high(&wq->wq_lock);
219*c97ad5cdSakolb 		return (0);
220*c97ad5cdSakolb 	}
221*c97ad5cdSakolb 
222*c97ad5cdSakolb 	/*
223*c97ad5cdSakolb 	 * Mark the time when thread is placed on wait queue. The microstate
224*c97ad5cdSakolb 	 * accounting code uses this timestamp to determine wait times.
225*c97ad5cdSakolb 	 */
226*c97ad5cdSakolb 	t->t_waitrq = gethrtime_unscaled();
227*c97ad5cdSakolb 
228*c97ad5cdSakolb 	/*
229*c97ad5cdSakolb 	 * Mark thread as not swappable.  If necessary, it will get
230*c97ad5cdSakolb 	 * swapped out when it returns to the userland.
231*c97ad5cdSakolb 	 */
232*c97ad5cdSakolb 	t->t_schedflag |= TS_DONT_SWAP;
233*c97ad5cdSakolb 	DTRACE_SCHED1(cpucaps__sleep, kthread_t *, t);
234*c97ad5cdSakolb 	waitq_link(wq, t);
235*c97ad5cdSakolb 
236*c97ad5cdSakolb 	THREAD_WAIT(t, &wq->wq_lock);
237*c97ad5cdSakolb 	return (1);
238*c97ad5cdSakolb }
239*c97ad5cdSakolb 
240*c97ad5cdSakolb /*
241*c97ad5cdSakolb  * Change thread's priority while on the wait queue.
242*c97ad5cdSakolb  * Dequeue and equeue it again so that it gets placed in the right place.
243*c97ad5cdSakolb  */
244*c97ad5cdSakolb void
245*c97ad5cdSakolb waitq_change_pri(kthread_t *t, pri_t new_pri)
246*c97ad5cdSakolb {
247*c97ad5cdSakolb 	waitq_t *wq = t->t_waitq;
248*c97ad5cdSakolb 
249*c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
250*c97ad5cdSakolb 	ASSERT(ISWAITING(t));
251*c97ad5cdSakolb 	ASSERT(wq != NULL);
252*c97ad5cdSakolb 
253*c97ad5cdSakolb 	waitq_unlink(wq, t);
254*c97ad5cdSakolb 	t->t_pri = new_pri;
255*c97ad5cdSakolb 	waitq_link(wq, t);
256*c97ad5cdSakolb }
257*c97ad5cdSakolb 
258*c97ad5cdSakolb static void
259*c97ad5cdSakolb waitq_dequeue(waitq_t *wq, kthread_t *t)
260*c97ad5cdSakolb {
261*c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
262*c97ad5cdSakolb 	ASSERT(t->t_waitq == wq);
263*c97ad5cdSakolb 	ASSERT(ISWAITING(t));
264*c97ad5cdSakolb 
265*c97ad5cdSakolb 	waitq_unlink(wq, t);
266*c97ad5cdSakolb 	DTRACE_SCHED1(cpucaps__wakeup, kthread_t *, t);
267*c97ad5cdSakolb 
268*c97ad5cdSakolb 	/*
269*c97ad5cdSakolb 	 * Change thread to transition state without dropping
270*c97ad5cdSakolb 	 * the wait queue lock.
271*c97ad5cdSakolb 	 */
272*c97ad5cdSakolb 	THREAD_TRANSITION_NOLOCK(t);
273*c97ad5cdSakolb }
274*c97ad5cdSakolb 
275*c97ad5cdSakolb /*
276*c97ad5cdSakolb  * Return True iff there are any threads on the specified wait queue.
277*c97ad5cdSakolb  * The check is done **without holding any locks**.
278*c97ad5cdSakolb  */
279*c97ad5cdSakolb boolean_t
280*c97ad5cdSakolb waitq_isempty(waitq_t *wq)
281*c97ad5cdSakolb {
282*c97ad5cdSakolb 	return (wq->wq_count == 0);
283*c97ad5cdSakolb }
284*c97ad5cdSakolb 
285*c97ad5cdSakolb /*
286*c97ad5cdSakolb  * Take thread off its wait queue and make it runnable.
287*c97ad5cdSakolb  * Returns with thread lock held.
288*c97ad5cdSakolb  */
289*c97ad5cdSakolb void
290*c97ad5cdSakolb waitq_setrun(kthread_t *t)
291*c97ad5cdSakolb {
292*c97ad5cdSakolb 	waitq_t *wq = t->t_waitq;
293*c97ad5cdSakolb 
294*c97ad5cdSakolb 	ASSERT(THREAD_LOCK_HELD(t));
295*c97ad5cdSakolb 
296*c97ad5cdSakolb 	ASSERT(ISWAITING(t));
297*c97ad5cdSakolb 	if (wq == NULL)
298*c97ad5cdSakolb 		panic("waitq_setrun: thread %p is not on waitq", t);
299*c97ad5cdSakolb 	waitq_dequeue(wq, t);
300*c97ad5cdSakolb 
301*c97ad5cdSakolb 	disp_lock_exit_high(&wq->wq_lock);
302*c97ad5cdSakolb 	CL_SETRUN(t);
303*c97ad5cdSakolb }
304*c97ad5cdSakolb 
305*c97ad5cdSakolb /*
306*c97ad5cdSakolb  * Take the first thread off the wait queue and return pointer to it.
307*c97ad5cdSakolb  */
308*c97ad5cdSakolb static kthread_t *
309*c97ad5cdSakolb waitq_takeone(waitq_t *wq)
310*c97ad5cdSakolb {
311*c97ad5cdSakolb 	kthread_t *t;
312*c97ad5cdSakolb 
313*c97ad5cdSakolb 	disp_lock_enter(&wq->wq_lock);
314*c97ad5cdSakolb 	if ((t = wq->wq_first) != NULL)
315*c97ad5cdSakolb 		waitq_dequeue(wq, wq->wq_first);
316*c97ad5cdSakolb 	disp_lock_exit(&wq->wq_lock);
317*c97ad5cdSakolb 	return (t);
318*c97ad5cdSakolb }
319*c97ad5cdSakolb 
320*c97ad5cdSakolb /*
321*c97ad5cdSakolb  * Take the first thread off the wait queue and make it runnable.
322*c97ad5cdSakolb  * Return the pointer to the thread or NULL if waitq is empty
323*c97ad5cdSakolb  */
324*c97ad5cdSakolb static kthread_t *
325*c97ad5cdSakolb waitq_runfirst(waitq_t *wq)
326*c97ad5cdSakolb {
327*c97ad5cdSakolb 	kthread_t *t;
328*c97ad5cdSakolb 
329*c97ad5cdSakolb 	t = waitq_takeone(wq);
330*c97ad5cdSakolb 	if (t != NULL) {
331*c97ad5cdSakolb 		CL_SETRUN(t);
332*c97ad5cdSakolb 		thread_unlock(t);	/* drops dispq lock */
333*c97ad5cdSakolb 	}
334*c97ad5cdSakolb 	return (t);
335*c97ad5cdSakolb }
336*c97ad5cdSakolb 
337*c97ad5cdSakolb /*
338*c97ad5cdSakolb  * Take the first thread off the wait queue and make it runnable.
339*c97ad5cdSakolb  */
340*c97ad5cdSakolb void
341*c97ad5cdSakolb waitq_runone(waitq_t *wq)
342*c97ad5cdSakolb {
343*c97ad5cdSakolb 	(void) waitq_runfirst(wq);
344*c97ad5cdSakolb }
345*c97ad5cdSakolb 
346*c97ad5cdSakolb /*
347*c97ad5cdSakolb  * Take all threads off the wait queue and make them runnable.
348*c97ad5cdSakolb  */
349*c97ad5cdSakolb static void
350*c97ad5cdSakolb waitq_runall(waitq_t *wq)
351*c97ad5cdSakolb {
352*c97ad5cdSakolb 	while (waitq_runfirst(wq) != NULL)
353*c97ad5cdSakolb 		;
354*c97ad5cdSakolb }
355*c97ad5cdSakolb 
356*c97ad5cdSakolb /*
357*c97ad5cdSakolb  * Prevent any new threads from entering wait queue and make all threads
358*c97ad5cdSakolb  * currently on the wait queue runnable. After waitq_block() completion, no
359*c97ad5cdSakolb  * threads should ever appear on the wait queue untill it is unblocked.
360*c97ad5cdSakolb  */
361*c97ad5cdSakolb void
362*c97ad5cdSakolb waitq_block(waitq_t *wq)
363*c97ad5cdSakolb {
364*c97ad5cdSakolb 	ASSERT(!wq->wq_blocked);
365*c97ad5cdSakolb 	disp_lock_enter(&wq->wq_lock);
366*c97ad5cdSakolb 	wq->wq_blocked = B_TRUE;
367*c97ad5cdSakolb 	disp_lock_exit(&wq->wq_lock);
368*c97ad5cdSakolb 	waitq_runall(wq);
369*c97ad5cdSakolb 	ASSERT(waitq_isempty(wq));
370*c97ad5cdSakolb }
371*c97ad5cdSakolb 
372*c97ad5cdSakolb /*
373*c97ad5cdSakolb  * Allow threads to be placed on the wait queue.
374*c97ad5cdSakolb  */
375*c97ad5cdSakolb void
376*c97ad5cdSakolb waitq_unblock(waitq_t *wq)
377*c97ad5cdSakolb {
378*c97ad5cdSakolb 	disp_lock_enter(&wq->wq_lock);
379*c97ad5cdSakolb 
380*c97ad5cdSakolb 	ASSERT(waitq_isempty(wq));
381*c97ad5cdSakolb 	ASSERT(wq->wq_blocked);
382*c97ad5cdSakolb 
383*c97ad5cdSakolb 	wq->wq_blocked = B_FALSE;
384*c97ad5cdSakolb 
385*c97ad5cdSakolb 	disp_lock_exit(&wq->wq_lock);
386*c97ad5cdSakolb }
387