xref: /illumos-gate/usr/src/uts/common/os/waitq.c (revision 46a7ef8aa64ee52c612c9b1b02ca19542a7a2e89)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/thread.h>
31 #include <sys/class.h>
32 #include <sys/debug.h>
33 #include <sys/cpuvar.h>
34 #include <sys/waitq.h>
35 #include <sys/cmn_err.h>
36 #include <sys/time.h>
37 #include <sys/dtrace.h>
38 #include <sys/sdt.h>
39 #include <sys/zone.h>
40 
41 /*
42  * Wait queue implementation.
43  */
44 
45 void
46 waitq_init(waitq_t *wq)
47 {
48 	DISP_LOCK_INIT(&wq->wq_lock);
49 	wq->wq_first = NULL;
50 	wq->wq_count = 0;
51 	wq->wq_blocked = B_TRUE;
52 }
53 
54 void
55 waitq_fini(waitq_t *wq)
56 {
57 	ASSERT(wq->wq_count == 0);
58 	ASSERT(wq->wq_first == NULL);
59 	ASSERT(wq->wq_blocked == B_TRUE);
60 	ASSERT(!DISP_LOCK_HELD(&wq->wq_lock));
61 
62 	DISP_LOCK_DESTROY(&wq->wq_lock);
63 }
64 
65 /*
66  * Operations on waitq_t structures.
67  *
68  * A wait queue is a singly linked NULL-terminated list with doubly
69  * linked circular sublists.  The singly linked list is in descending
70  * priority order and FIFO for threads of the same priority.  It links
71  * through the t_link field of the thread structure.  The doubly linked
72  * sublists link threads of the same priority.  They use the t_priforw
73  * and t_priback fields of the thread structure.
74  *
75  * Graphically (with priorities in parens):
76  *
77  *         ________________           _______                   _______
78  *        /                \         /       \                 /       \
79  *        |                |         |       |                 |       |
80  *        v                v         v       v                 v       v
81  *     t1(60)-->t2(60)-->t3(60)-->t4(50)-->t5(50)-->t6(30)-->t7(0)-->t8(0)
82  *        ^      ^  ^      ^         ^       ^       ^  ^      ^       ^
83  *        |      |  |      |         |       |       |  |      |       |
84  *        \______/  \______/         \_______/       \__/      \_______/
85  *
86  * There are three interesting operations on a waitq list: inserting
87  * a thread into the proper position according to priority; removing a
88  * thread given a pointer to it; and walking the list, possibly
89  * removing threads along the way.  This design allows all three
90  * operations to be performed efficiently and easily.
91  *
92  * To insert a thread, traverse the list looking for the sublist of
93  * the same priority as the thread (or one of a lower priority,
94  * meaning there are no other threads in the list of the same
95  * priority).  This can be done without touching all threads in the
96  * list by following the links between the first threads in each
97  * sublist.  Given a thread t that is the head of a sublist (the first
98  * thread of that priority found when following the t_link pointers),
99  * t->t_priback->t_link points to the head of the next sublist.  It's
100  * important to do this since a waitq may contain thousands of
101  * threads.
102  *
103  * Removing a thread from the list is also efficient.  First, the
104  * t_waitq field contains a pointer to the waitq on which a thread
105  * is waiting (or NULL if it's not on a waitq).  This is used to
106  * determine if the given thread is on the given waitq without
107  * searching the list.  Assuming it is, if it's not the head of a
108  * sublist, just remove it from the sublist and use the t_priback
109  * pointer to find the thread that points to it with t_link.  If it is
110  * the head of a sublist, search for it by walking the sublist heads,
111  * similar to searching for a given priority level when inserting a
112  * thread.
113  *
114  * To walk the list, simply follow the t_link pointers.  Removing
115  * threads along the way can be done easily if the code maintains a
116  * pointer to the t_link field that pointed to the thread being
117  * removed.
118  */
119 
120 static void
121 waitq_link(waitq_t *wq, kthread_t *t)
122 {
123 	kthread_t *next_tp;
124 	kthread_t *last_tp;
125 	kthread_t **tpp;
126 	pri_t tpri, next_pri, last_pri = -1;
127 
128 	ASSERT(DISP_LOCK_HELD(&wq->wq_lock));
129 
130 	tpri = DISP_PRIO(t);
131 	tpp = &wq->wq_first;
132 	while ((next_tp = *tpp) != NULL) {
133 		next_pri = DISP_PRIO(next_tp);
134 		if (tpri > next_pri)
135 			break;
136 		last_tp = next_tp->t_priback;
137 		last_pri = next_pri;
138 		tpp = &last_tp->t_link;
139 	}
140 	*tpp = t;
141 	t->t_link = next_tp;
142 	if (last_pri == tpri) {
143 		/* last_tp points to the last thread of this priority */
144 		t->t_priback = last_tp;
145 		t->t_priforw = last_tp->t_priforw;
146 		last_tp->t_priforw->t_priback = t;
147 		last_tp->t_priforw = t;
148 	} else {
149 		t->t_priback = t->t_priforw = t;
150 	}
151 	wq->wq_count++;
152 	t->t_waitq = wq;
153 }
154 
155 static void
156 waitq_unlink(waitq_t *wq, kthread_t *t)
157 {
158 	kthread_t *nt;
159 	kthread_t **ptl;
160 
161 	ASSERT(THREAD_LOCK_HELD(t));
162 	ASSERT(DISP_LOCK_HELD(&wq->wq_lock));
163 	ASSERT(t->t_waitq == wq);
164 
165 	ptl = &t->t_priback->t_link;
166 	/*
167 	 * Is it the head of a priority sublist?  If so, need to walk
168 	 * the priorities to find the t_link pointer that points to it.
169 	 */
170 	if (*ptl != t) {
171 		/*
172 		 * Find the right priority level.
173 		 */
174 		ptl = &t->t_waitq->wq_first;
175 		while ((nt = *ptl) != t)
176 			ptl = &nt->t_priback->t_link;
177 	}
178 	/*
179 	 * Remove thread from the t_link list.
180 	 */
181 	*ptl = t->t_link;
182 
183 	/*
184 	 * Take it off the priority sublist if there's more than one
185 	 * thread there.
186 	 */
187 	if (t->t_priforw != t) {
188 		t->t_priback->t_priforw = t->t_priforw;
189 		t->t_priforw->t_priback = t->t_priback;
190 	}
191 	t->t_link = NULL;
192 
193 	wq->wq_count--;
194 	t->t_waitq = NULL;
195 	t->t_priforw = NULL;
196 	t->t_priback = NULL;
197 }
198 
199 /*
200  * Put specified thread to specified wait queue without dropping thread's lock.
201  * Returns 1 if thread was successfully placed on project's wait queue, or
202  * 0 if wait queue is blocked.
203  */
204 int
205 waitq_enqueue(waitq_t *wq, kthread_t *t)
206 {
207 	ASSERT(THREAD_LOCK_HELD(t));
208 	ASSERT(t->t_sleepq == NULL);
209 	ASSERT(t->t_waitq == NULL);
210 	ASSERT(t->t_link == NULL);
211 
212 	disp_lock_enter_high(&wq->wq_lock);
213 
214 	/*
215 	 * Can't enqueue anything on a blocked wait queue
216 	 */
217 	if (wq->wq_blocked) {
218 		disp_lock_exit_high(&wq->wq_lock);
219 		return (0);
220 	}
221 
222 	/*
223 	 * Mark the time when thread is placed on wait queue. The microstate
224 	 * accounting code uses this timestamp to determine wait times.
225 	 */
226 	t->t_waitrq = gethrtime_unscaled();
227 
228 	/*
229 	 * Mark thread as not swappable.  If necessary, it will get
230 	 * swapped out when it returns to the userland.
231 	 */
232 	t->t_schedflag |= TS_DONT_SWAP;
233 	DTRACE_SCHED1(cpucaps__sleep, kthread_t *, t);
234 	waitq_link(wq, t);
235 
236 	THREAD_WAIT(t, &wq->wq_lock);
237 	return (1);
238 }
239 
240 /*
241  * Change thread's priority while on the wait queue.
242  * Dequeue and equeue it again so that it gets placed in the right place.
243  */
244 void
245 waitq_change_pri(kthread_t *t, pri_t new_pri)
246 {
247 	waitq_t *wq = t->t_waitq;
248 
249 	ASSERT(THREAD_LOCK_HELD(t));
250 	ASSERT(ISWAITING(t));
251 	ASSERT(wq != NULL);
252 
253 	waitq_unlink(wq, t);
254 	t->t_pri = new_pri;
255 	waitq_link(wq, t);
256 }
257 
258 static void
259 waitq_dequeue(waitq_t *wq, kthread_t *t)
260 {
261 	ASSERT(THREAD_LOCK_HELD(t));
262 	ASSERT(t->t_waitq == wq);
263 	ASSERT(ISWAITING(t));
264 
265 	waitq_unlink(wq, t);
266 	DTRACE_SCHED1(cpucaps__wakeup, kthread_t *, t);
267 
268 	/*
269 	 * Change thread to transition state and drop the wait queue lock. The
270 	 * thread will remain locked since its t_lockp points to the
271 	 * transition_lock.
272 	 */
273 	THREAD_TRANSITION(t);
274 }
275 
276 /*
277  * Return True iff there are any threads on the specified wait queue.
278  * The check is done **without holding any locks**.
279  */
280 boolean_t
281 waitq_isempty(waitq_t *wq)
282 {
283 	return (wq->wq_count == 0);
284 }
285 
286 /*
287  * Take thread off its wait queue and make it runnable.
288  * Returns with thread lock held.
289  */
290 void
291 waitq_setrun(kthread_t *t)
292 {
293 	waitq_t *wq = t->t_waitq;
294 
295 	ASSERT(THREAD_LOCK_HELD(t));
296 
297 	ASSERT(ISWAITING(t));
298 	if (wq == NULL)
299 		panic("waitq_setrun: thread %p is not on waitq", t);
300 	waitq_dequeue(wq, t);
301 	CL_SETRUN(t);
302 }
303 
304 /*
305  * Take the first thread off the wait queue and return pointer to it.
306  */
307 static kthread_t *
308 waitq_takeone(waitq_t *wq)
309 {
310 	kthread_t *t;
311 
312 	disp_lock_enter(&wq->wq_lock);
313 	/*
314 	 * waitq_dequeue drops wait queue lock but leaves the CPU at high PIL.
315 	 */
316 	if ((t = wq->wq_first) != NULL)
317 		waitq_dequeue(wq, wq->wq_first);
318 	else
319 		disp_lock_exit(&wq->wq_lock);
320 	return (t);
321 }
322 
323 /*
324  * Take the first thread off the wait queue and make it runnable.
325  * Return the pointer to the thread or NULL if waitq is empty
326  */
327 static kthread_t *
328 waitq_runfirst(waitq_t *wq)
329 {
330 	kthread_t *t;
331 
332 	t = waitq_takeone(wq);
333 	if (t != NULL) {
334 		/*
335 		 * t should have transition lock held.
336 		 * CL_SETRUN() will replace it with dispq lock and keep it held.
337 		 * thread_unlock() will drop dispq lock and restore PIL.
338 		 */
339 		ASSERT(THREAD_LOCK_HELD(t));
340 		CL_SETRUN(t);
341 		thread_unlock(t);
342 	}
343 	return (t);
344 }
345 
346 /*
347  * Take the first thread off the wait queue and make it runnable.
348  */
349 void
350 waitq_runone(waitq_t *wq)
351 {
352 	(void) waitq_runfirst(wq);
353 }
354 
355 /*
356  * Take all threads off the wait queue and make them runnable.
357  */
358 static void
359 waitq_runall(waitq_t *wq)
360 {
361 	while (waitq_runfirst(wq) != NULL)
362 		;
363 }
364 
365 /*
366  * Prevent any new threads from entering wait queue and make all threads
367  * currently on the wait queue runnable. After waitq_block() completion, no
368  * threads should ever appear on the wait queue untill it is unblocked.
369  */
370 void
371 waitq_block(waitq_t *wq)
372 {
373 	ASSERT(!wq->wq_blocked);
374 	disp_lock_enter(&wq->wq_lock);
375 	wq->wq_blocked = B_TRUE;
376 	disp_lock_exit(&wq->wq_lock);
377 	waitq_runall(wq);
378 	ASSERT(waitq_isempty(wq));
379 }
380 
381 /*
382  * Allow threads to be placed on the wait queue.
383  */
384 void
385 waitq_unblock(waitq_t *wq)
386 {
387 	disp_lock_enter(&wq->wq_lock);
388 
389 	ASSERT(waitq_isempty(wq));
390 	ASSERT(wq->wq_blocked);
391 
392 	wq->wq_blocked = B_FALSE;
393 
394 	disp_lock_exit(&wq->wq_lock);
395 }
396