xref: /illumos-gate/usr/src/uts/common/os/waitq.c (revision b3403853e80914bd0aade9b5b605da4878078173)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/thread.h>
29 #include <sys/class.h>
30 #include <sys/debug.h>
31 #include <sys/cpuvar.h>
32 #include <sys/waitq.h>
33 #include <sys/cmn_err.h>
34 #include <sys/time.h>
35 #include <sys/dtrace.h>
36 #include <sys/sdt.h>
37 #include <sys/zone.h>
38 
39 /*
40  * Wait queue implementation.
41  */
42 
43 void
44 waitq_init(waitq_t *wq)
45 {
46 	DISP_LOCK_INIT(&wq->wq_lock);
47 	wq->wq_first = NULL;
48 	wq->wq_count = 0;
49 	wq->wq_blocked = B_TRUE;
50 }
51 
52 void
53 waitq_fini(waitq_t *wq)
54 {
55 	ASSERT(wq->wq_count == 0);
56 	ASSERT(wq->wq_first == NULL);
57 	ASSERT(wq->wq_blocked == B_TRUE);
58 	ASSERT(!DISP_LOCK_HELD(&wq->wq_lock));
59 
60 	DISP_LOCK_DESTROY(&wq->wq_lock);
61 }
62 
63 /*
64  * Operations on waitq_t structures.
65  *
66  * A wait queue is a singly linked NULL-terminated list with doubly
67  * linked circular sublists.  The singly linked list is in descending
68  * priority order and FIFO for threads of the same priority.  It links
69  * through the t_link field of the thread structure.  The doubly linked
70  * sublists link threads of the same priority.  They use the t_priforw
71  * and t_priback fields of the thread structure.
72  *
73  * Graphically (with priorities in parens):
74  *
75  *         ________________           _______                   _______
76  *        /                \         /       \                 /       \
77  *        |                |         |       |                 |       |
78  *        v                v         v       v                 v       v
79  *     t1(60)-->t2(60)-->t3(60)-->t4(50)-->t5(50)-->t6(30)-->t7(0)-->t8(0)
80  *        ^      ^  ^      ^         ^       ^       ^  ^      ^       ^
81  *        |      |  |      |         |       |       |  |      |       |
82  *        \______/  \______/         \_______/       \__/      \_______/
83  *
84  * There are three interesting operations on a waitq list: inserting
85  * a thread into the proper position according to priority; removing a
86  * thread given a pointer to it; and walking the list, possibly
87  * removing threads along the way.  This design allows all three
88  * operations to be performed efficiently and easily.
89  *
90  * To insert a thread, traverse the list looking for the sublist of
91  * the same priority as the thread (or one of a lower priority,
92  * meaning there are no other threads in the list of the same
93  * priority).  This can be done without touching all threads in the
94  * list by following the links between the first threads in each
95  * sublist.  Given a thread t that is the head of a sublist (the first
96  * thread of that priority found when following the t_link pointers),
97  * t->t_priback->t_link points to the head of the next sublist.  It's
98  * important to do this since a waitq may contain thousands of
99  * threads.
100  *
101  * Removing a thread from the list is also efficient.  First, the
102  * t_waitq field contains a pointer to the waitq on which a thread
103  * is waiting (or NULL if it's not on a waitq).  This is used to
104  * determine if the given thread is on the given waitq without
105  * searching the list.  Assuming it is, if it's not the head of a
106  * sublist, just remove it from the sublist and use the t_priback
107  * pointer to find the thread that points to it with t_link.  If it is
108  * the head of a sublist, search for it by walking the sublist heads,
109  * similar to searching for a given priority level when inserting a
110  * thread.
111  *
112  * To walk the list, simply follow the t_link pointers.  Removing
113  * threads along the way can be done easily if the code maintains a
114  * pointer to the t_link field that pointed to the thread being
115  * removed.
116  */
117 
118 static void
119 waitq_link(waitq_t *wq, kthread_t *t)
120 {
121 	kthread_t *next_tp;
122 	kthread_t *last_tp = NULL;
123 	kthread_t **tpp;
124 	pri_t tpri, next_pri, last_pri = -1;
125 
126 	ASSERT(DISP_LOCK_HELD(&wq->wq_lock));
127 
128 	tpri = DISP_PRIO(t);
129 	tpp = &wq->wq_first;
130 	while ((next_tp = *tpp) != NULL) {
131 		next_pri = DISP_PRIO(next_tp);
132 		if (tpri > next_pri)
133 			break;
134 		last_tp = next_tp->t_priback;
135 		last_pri = next_pri;
136 		tpp = &last_tp->t_link;
137 	}
138 	*tpp = t;
139 	t->t_link = next_tp;
140 	if (last_tp != NULL && last_pri == tpri) {
141 		/* last_tp points to the last thread of this priority */
142 		t->t_priback = last_tp;
143 		t->t_priforw = last_tp->t_priforw;
144 		last_tp->t_priforw->t_priback = t;
145 		last_tp->t_priforw = t;
146 	} else {
147 		t->t_priback = t->t_priforw = t;
148 	}
149 	wq->wq_count++;
150 	t->t_waitq = wq;
151 }
152 
153 static void
154 waitq_unlink(waitq_t *wq, kthread_t *t)
155 {
156 	kthread_t *nt;
157 	kthread_t **ptl;
158 
159 	ASSERT(THREAD_LOCK_HELD(t));
160 	ASSERT(DISP_LOCK_HELD(&wq->wq_lock));
161 	ASSERT(t->t_waitq == wq);
162 
163 	ptl = &t->t_priback->t_link;
164 	/*
165 	 * Is it the head of a priority sublist?  If so, need to walk
166 	 * the priorities to find the t_link pointer that points to it.
167 	 */
168 	if (*ptl != t) {
169 		/*
170 		 * Find the right priority level.
171 		 */
172 		ptl = &t->t_waitq->wq_first;
173 		while ((nt = *ptl) != t)
174 			ptl = &nt->t_priback->t_link;
175 	}
176 	/*
177 	 * Remove thread from the t_link list.
178 	 */
179 	*ptl = t->t_link;
180 
181 	/*
182 	 * Take it off the priority sublist if there's more than one
183 	 * thread there.
184 	 */
185 	if (t->t_priforw != t) {
186 		t->t_priback->t_priforw = t->t_priforw;
187 		t->t_priforw->t_priback = t->t_priback;
188 	}
189 	t->t_link = NULL;
190 
191 	wq->wq_count--;
192 	t->t_waitq = NULL;
193 	t->t_priforw = NULL;
194 	t->t_priback = NULL;
195 }
196 
197 /*
198  * Put specified thread to specified wait queue without dropping thread's lock.
199  * Returns 1 if thread was successfully placed on project's wait queue, or
200  * 0 if wait queue is blocked.
201  */
202 int
203 waitq_enqueue(waitq_t *wq, kthread_t *t)
204 {
205 	ASSERT(THREAD_LOCK_HELD(t));
206 	ASSERT(t->t_sleepq == NULL);
207 	ASSERT(t->t_waitq == NULL);
208 	ASSERT(t->t_link == NULL);
209 
210 	disp_lock_enter_high(&wq->wq_lock);
211 
212 	/*
213 	 * Can't enqueue anything on a blocked wait queue
214 	 */
215 	if (wq->wq_blocked) {
216 		disp_lock_exit_high(&wq->wq_lock);
217 		return (0);
218 	}
219 
220 	/*
221 	 * Mark the time when thread is placed on wait queue. The microstate
222 	 * accounting code uses this timestamp to determine wait times.
223 	 */
224 	t->t_waitrq = gethrtime_unscaled();
225 
226 	/*
227 	 * Mark thread as not swappable.  If necessary, it will get
228 	 * swapped out when it returns to the userland.
229 	 */
230 	t->t_schedflag |= TS_DONT_SWAP;
231 	DTRACE_SCHED1(cpucaps__sleep, kthread_t *, t);
232 	waitq_link(wq, t);
233 
234 	THREAD_WAIT(t, &wq->wq_lock);
235 	return (1);
236 }
237 
238 /*
239  * Change thread's priority while on the wait queue.
240  * Dequeue and equeue it again so that it gets placed in the right place.
241  */
242 void
243 waitq_change_pri(kthread_t *t, pri_t new_pri)
244 {
245 	waitq_t *wq = t->t_waitq;
246 
247 	ASSERT(THREAD_LOCK_HELD(t));
248 	ASSERT(ISWAITING(t));
249 	ASSERT(wq != NULL);
250 
251 	waitq_unlink(wq, t);
252 	t->t_pri = new_pri;
253 	waitq_link(wq, t);
254 }
255 
256 static void
257 waitq_dequeue(waitq_t *wq, kthread_t *t)
258 {
259 	ASSERT(THREAD_LOCK_HELD(t));
260 	ASSERT(t->t_waitq == wq);
261 	ASSERT(ISWAITING(t));
262 
263 	waitq_unlink(wq, t);
264 	DTRACE_SCHED1(cpucaps__wakeup, kthread_t *, t);
265 
266 	/*
267 	 * Change thread to transition state and drop the wait queue lock. The
268 	 * thread will remain locked since its t_lockp points to the
269 	 * transition_lock.
270 	 */
271 	THREAD_TRANSITION(t);
272 }
273 
274 /*
275  * Return True iff there are any threads on the specified wait queue.
276  * The check is done **without holding any locks**.
277  */
278 boolean_t
279 waitq_isempty(waitq_t *wq)
280 {
281 	return (wq->wq_count == 0);
282 }
283 
284 /*
285  * Take thread off its wait queue and make it runnable.
286  * Returns with thread lock held.
287  */
288 void
289 waitq_setrun(kthread_t *t)
290 {
291 	waitq_t *wq = t->t_waitq;
292 
293 	ASSERT(THREAD_LOCK_HELD(t));
294 
295 	ASSERT(ISWAITING(t));
296 	if (wq == NULL)
297 		panic("waitq_setrun: thread %p is not on waitq", (void *)t);
298 	waitq_dequeue(wq, t);
299 	CL_SETRUN(t);
300 }
301 
302 /*
303  * Take the first thread off the wait queue and return pointer to it.
304  */
305 static kthread_t *
306 waitq_takeone(waitq_t *wq)
307 {
308 	kthread_t *t;
309 
310 	disp_lock_enter(&wq->wq_lock);
311 	/*
312 	 * waitq_dequeue drops wait queue lock but leaves the CPU at high PIL.
313 	 */
314 	if ((t = wq->wq_first) != NULL)
315 		waitq_dequeue(wq, wq->wq_first);
316 	else
317 		disp_lock_exit(&wq->wq_lock);
318 	return (t);
319 }
320 
321 /*
322  * Take the first thread off the wait queue and make it runnable.
323  * Return the pointer to the thread or NULL if waitq is empty
324  */
325 static kthread_t *
326 waitq_runfirst(waitq_t *wq)
327 {
328 	kthread_t *t;
329 
330 	t = waitq_takeone(wq);
331 	if (t != NULL) {
332 		/*
333 		 * t should have transition lock held.
334 		 * CL_SETRUN() will replace it with dispq lock and keep it held.
335 		 * thread_unlock() will drop dispq lock and restore PIL.
336 		 */
337 		ASSERT(THREAD_LOCK_HELD(t));
338 		CL_SETRUN(t);
339 		thread_unlock(t);
340 	}
341 	return (t);
342 }
343 
344 /*
345  * Take the first thread off the wait queue and make it runnable.
346  */
347 void
348 waitq_runone(waitq_t *wq)
349 {
350 	(void) waitq_runfirst(wq);
351 }
352 
353 /*
354  * Take all threads off the wait queue and make them runnable.
355  */
356 static void
357 waitq_runall(waitq_t *wq)
358 {
359 	while (waitq_runfirst(wq) != NULL)
360 		;
361 }
362 
363 /*
364  * Prevent any new threads from entering wait queue and make all threads
365  * currently on the wait queue runnable. After waitq_block() completion, no
366  * threads should ever appear on the wait queue untill it is unblocked.
367  */
368 void
369 waitq_block(waitq_t *wq)
370 {
371 	ASSERT(!wq->wq_blocked);
372 	disp_lock_enter(&wq->wq_lock);
373 	wq->wq_blocked = B_TRUE;
374 	disp_lock_exit(&wq->wq_lock);
375 	waitq_runall(wq);
376 	ASSERT(waitq_isempty(wq));
377 }
378 
379 /*
380  * Allow threads to be placed on the wait queue.
381  */
382 void
383 waitq_unblock(waitq_t *wq)
384 {
385 	disp_lock_enter(&wq->wq_lock);
386 
387 	ASSERT(waitq_isempty(wq));
388 	ASSERT(wq->wq_blocked);
389 
390 	wq->wq_blocked = B_FALSE;
391 
392 	disp_lock_exit(&wq->wq_lock);
393 }
394