1*c97ad5cdSakolb /* 2*c97ad5cdSakolb * CDDL HEADER START 3*c97ad5cdSakolb * 4*c97ad5cdSakolb * The contents of this file are subject to the terms of the 5*c97ad5cdSakolb * Common Development and Distribution License (the "License"). 6*c97ad5cdSakolb * You may not use this file except in compliance with the License. 7*c97ad5cdSakolb * 8*c97ad5cdSakolb * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*c97ad5cdSakolb * or http://www.opensolaris.org/os/licensing. 10*c97ad5cdSakolb * See the License for the specific language governing permissions 11*c97ad5cdSakolb * and limitations under the License. 12*c97ad5cdSakolb * 13*c97ad5cdSakolb * When distributing Covered Code, include this CDDL HEADER in each 14*c97ad5cdSakolb * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*c97ad5cdSakolb * If applicable, add the following below this CDDL HEADER, with the 16*c97ad5cdSakolb * fields enclosed by brackets "[]" replaced with your own identifying 17*c97ad5cdSakolb * information: Portions Copyright [yyyy] [name of copyright owner] 18*c97ad5cdSakolb * 19*c97ad5cdSakolb * CDDL HEADER END 20*c97ad5cdSakolb */ 21*c97ad5cdSakolb /* 22*c97ad5cdSakolb * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23*c97ad5cdSakolb * Use is subject to license terms. 24*c97ad5cdSakolb */ 25*c97ad5cdSakolb 26*c97ad5cdSakolb #pragma ident "%Z%%M% %I% %E% SMI" 27*c97ad5cdSakolb 28*c97ad5cdSakolb #include <sys/param.h> 29*c97ad5cdSakolb #include <sys/systm.h> 30*c97ad5cdSakolb #include <sys/thread.h> 31*c97ad5cdSakolb #include <sys/class.h> 32*c97ad5cdSakolb #include <sys/debug.h> 33*c97ad5cdSakolb #include <sys/cpuvar.h> 34*c97ad5cdSakolb #include <sys/waitq.h> 35*c97ad5cdSakolb #include <sys/cmn_err.h> 36*c97ad5cdSakolb #include <sys/time.h> 37*c97ad5cdSakolb #include <sys/dtrace.h> 38*c97ad5cdSakolb #include <sys/sdt.h> 39*c97ad5cdSakolb #include <sys/zone.h> 40*c97ad5cdSakolb 41*c97ad5cdSakolb /* 42*c97ad5cdSakolb * Wait queue implementation. 43*c97ad5cdSakolb */ 44*c97ad5cdSakolb 45*c97ad5cdSakolb void 46*c97ad5cdSakolb waitq_init(waitq_t *wq) 47*c97ad5cdSakolb { 48*c97ad5cdSakolb DISP_LOCK_INIT(&wq->wq_lock); 49*c97ad5cdSakolb wq->wq_first = NULL; 50*c97ad5cdSakolb wq->wq_count = 0; 51*c97ad5cdSakolb wq->wq_blocked = B_TRUE; 52*c97ad5cdSakolb } 53*c97ad5cdSakolb 54*c97ad5cdSakolb void 55*c97ad5cdSakolb waitq_fini(waitq_t *wq) 56*c97ad5cdSakolb { 57*c97ad5cdSakolb ASSERT(wq->wq_count == 0); 58*c97ad5cdSakolb ASSERT(wq->wq_first == NULL); 59*c97ad5cdSakolb ASSERT(wq->wq_blocked == B_TRUE); 60*c97ad5cdSakolb ASSERT(!DISP_LOCK_HELD(&wq->wq_lock)); 61*c97ad5cdSakolb 62*c97ad5cdSakolb DISP_LOCK_DESTROY(&wq->wq_lock); 63*c97ad5cdSakolb } 64*c97ad5cdSakolb 65*c97ad5cdSakolb /* 66*c97ad5cdSakolb * Operations on waitq_t structures. 67*c97ad5cdSakolb * 68*c97ad5cdSakolb * A wait queue is a singly linked NULL-terminated list with doubly 69*c97ad5cdSakolb * linked circular sublists. The singly linked list is in descending 70*c97ad5cdSakolb * priority order and FIFO for threads of the same priority. It links 71*c97ad5cdSakolb * through the t_link field of the thread structure. The doubly linked 72*c97ad5cdSakolb * sublists link threads of the same priority. They use the t_priforw 73*c97ad5cdSakolb * and t_priback fields of the thread structure. 74*c97ad5cdSakolb * 75*c97ad5cdSakolb * Graphically (with priorities in parens): 76*c97ad5cdSakolb * 77*c97ad5cdSakolb * ________________ _______ _______ 78*c97ad5cdSakolb * / \ / \ / \ 79*c97ad5cdSakolb * | | | | | | 80*c97ad5cdSakolb * v v v v v v 81*c97ad5cdSakolb * t1(60)-->t2(60)-->t3(60)-->t4(50)-->t5(50)-->t6(30)-->t7(0)-->t8(0) 82*c97ad5cdSakolb * ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ 83*c97ad5cdSakolb * | | | | | | | | | | 84*c97ad5cdSakolb * \______/ \______/ \_______/ \__/ \_______/ 85*c97ad5cdSakolb * 86*c97ad5cdSakolb * There are three interesting operations on a waitq list: inserting 87*c97ad5cdSakolb * a thread into the proper position according to priority; removing a 88*c97ad5cdSakolb * thread given a pointer to it; and walking the list, possibly 89*c97ad5cdSakolb * removing threads along the way. This design allows all three 90*c97ad5cdSakolb * operations to be performed efficiently and easily. 91*c97ad5cdSakolb * 92*c97ad5cdSakolb * To insert a thread, traverse the list looking for the sublist of 93*c97ad5cdSakolb * the same priority as the thread (or one of a lower priority, 94*c97ad5cdSakolb * meaning there are no other threads in the list of the same 95*c97ad5cdSakolb * priority). This can be done without touching all threads in the 96*c97ad5cdSakolb * list by following the links between the first threads in each 97*c97ad5cdSakolb * sublist. Given a thread t that is the head of a sublist (the first 98*c97ad5cdSakolb * thread of that priority found when following the t_link pointers), 99*c97ad5cdSakolb * t->t_priback->t_link points to the head of the next sublist. It's 100*c97ad5cdSakolb * important to do this since a waitq may contain thousands of 101*c97ad5cdSakolb * threads. 102*c97ad5cdSakolb * 103*c97ad5cdSakolb * Removing a thread from the list is also efficient. First, the 104*c97ad5cdSakolb * t_waitq field contains a pointer to the waitq on which a thread 105*c97ad5cdSakolb * is waiting (or NULL if it's not on a waitq). This is used to 106*c97ad5cdSakolb * determine if the given thread is on the given waitq without 107*c97ad5cdSakolb * searching the list. Assuming it is, if it's not the head of a 108*c97ad5cdSakolb * sublist, just remove it from the sublist and use the t_priback 109*c97ad5cdSakolb * pointer to find the thread that points to it with t_link. If it is 110*c97ad5cdSakolb * the head of a sublist, search for it by walking the sublist heads, 111*c97ad5cdSakolb * similar to searching for a given priority level when inserting a 112*c97ad5cdSakolb * thread. 113*c97ad5cdSakolb * 114*c97ad5cdSakolb * To walk the list, simply follow the t_link pointers. Removing 115*c97ad5cdSakolb * threads along the way can be done easily if the code maintains a 116*c97ad5cdSakolb * pointer to the t_link field that pointed to the thread being 117*c97ad5cdSakolb * removed. 118*c97ad5cdSakolb */ 119*c97ad5cdSakolb 120*c97ad5cdSakolb static void 121*c97ad5cdSakolb waitq_link(waitq_t *wq, kthread_t *t) 122*c97ad5cdSakolb { 123*c97ad5cdSakolb kthread_t *next_tp; 124*c97ad5cdSakolb kthread_t *last_tp; 125*c97ad5cdSakolb kthread_t **tpp; 126*c97ad5cdSakolb pri_t tpri, next_pri, last_pri = -1; 127*c97ad5cdSakolb 128*c97ad5cdSakolb ASSERT(DISP_LOCK_HELD(&wq->wq_lock)); 129*c97ad5cdSakolb 130*c97ad5cdSakolb tpri = DISP_PRIO(t); 131*c97ad5cdSakolb tpp = &wq->wq_first; 132*c97ad5cdSakolb while ((next_tp = *tpp) != NULL) { 133*c97ad5cdSakolb next_pri = DISP_PRIO(next_tp); 134*c97ad5cdSakolb if (tpri > next_pri) 135*c97ad5cdSakolb break; 136*c97ad5cdSakolb last_tp = next_tp->t_priback; 137*c97ad5cdSakolb last_pri = next_pri; 138*c97ad5cdSakolb tpp = &last_tp->t_link; 139*c97ad5cdSakolb } 140*c97ad5cdSakolb *tpp = t; 141*c97ad5cdSakolb t->t_link = next_tp; 142*c97ad5cdSakolb if (last_pri == tpri) { 143*c97ad5cdSakolb /* last_tp points to the last thread of this priority */ 144*c97ad5cdSakolb t->t_priback = last_tp; 145*c97ad5cdSakolb t->t_priforw = last_tp->t_priforw; 146*c97ad5cdSakolb last_tp->t_priforw->t_priback = t; 147*c97ad5cdSakolb last_tp->t_priforw = t; 148*c97ad5cdSakolb } else { 149*c97ad5cdSakolb t->t_priback = t->t_priforw = t; 150*c97ad5cdSakolb } 151*c97ad5cdSakolb wq->wq_count++; 152*c97ad5cdSakolb t->t_waitq = wq; 153*c97ad5cdSakolb } 154*c97ad5cdSakolb 155*c97ad5cdSakolb static void 156*c97ad5cdSakolb waitq_unlink(waitq_t *wq, kthread_t *t) 157*c97ad5cdSakolb { 158*c97ad5cdSakolb kthread_t *nt; 159*c97ad5cdSakolb kthread_t **ptl; 160*c97ad5cdSakolb 161*c97ad5cdSakolb ASSERT(THREAD_LOCK_HELD(t)); 162*c97ad5cdSakolb ASSERT(DISP_LOCK_HELD(&wq->wq_lock)); 163*c97ad5cdSakolb ASSERT(t->t_waitq == wq); 164*c97ad5cdSakolb 165*c97ad5cdSakolb ptl = &t->t_priback->t_link; 166*c97ad5cdSakolb /* 167*c97ad5cdSakolb * Is it the head of a priority sublist? If so, need to walk 168*c97ad5cdSakolb * the priorities to find the t_link pointer that points to it. 169*c97ad5cdSakolb */ 170*c97ad5cdSakolb if (*ptl != t) { 171*c97ad5cdSakolb /* 172*c97ad5cdSakolb * Find the right priority level. 173*c97ad5cdSakolb */ 174*c97ad5cdSakolb ptl = &t->t_waitq->wq_first; 175*c97ad5cdSakolb while ((nt = *ptl) != t) 176*c97ad5cdSakolb ptl = &nt->t_priback->t_link; 177*c97ad5cdSakolb } 178*c97ad5cdSakolb /* 179*c97ad5cdSakolb * Remove thread from the t_link list. 180*c97ad5cdSakolb */ 181*c97ad5cdSakolb *ptl = t->t_link; 182*c97ad5cdSakolb 183*c97ad5cdSakolb /* 184*c97ad5cdSakolb * Take it off the priority sublist if there's more than one 185*c97ad5cdSakolb * thread there. 186*c97ad5cdSakolb */ 187*c97ad5cdSakolb if (t->t_priforw != t) { 188*c97ad5cdSakolb t->t_priback->t_priforw = t->t_priforw; 189*c97ad5cdSakolb t->t_priforw->t_priback = t->t_priback; 190*c97ad5cdSakolb } 191*c97ad5cdSakolb t->t_link = NULL; 192*c97ad5cdSakolb 193*c97ad5cdSakolb wq->wq_count--; 194*c97ad5cdSakolb t->t_waitq = NULL; 195*c97ad5cdSakolb t->t_priforw = NULL; 196*c97ad5cdSakolb t->t_priback = NULL; 197*c97ad5cdSakolb } 198*c97ad5cdSakolb 199*c97ad5cdSakolb /* 200*c97ad5cdSakolb * Put specified thread to specified wait queue without dropping thread's lock. 201*c97ad5cdSakolb * Returns 1 if thread was successfully placed on project's wait queue, or 202*c97ad5cdSakolb * 0 if wait queue is blocked. 203*c97ad5cdSakolb */ 204*c97ad5cdSakolb int 205*c97ad5cdSakolb waitq_enqueue(waitq_t *wq, kthread_t *t) 206*c97ad5cdSakolb { 207*c97ad5cdSakolb ASSERT(THREAD_LOCK_HELD(t)); 208*c97ad5cdSakolb ASSERT(t->t_sleepq == NULL); 209*c97ad5cdSakolb ASSERT(t->t_waitq == NULL); 210*c97ad5cdSakolb ASSERT(t->t_link == NULL); 211*c97ad5cdSakolb 212*c97ad5cdSakolb disp_lock_enter_high(&wq->wq_lock); 213*c97ad5cdSakolb 214*c97ad5cdSakolb /* 215*c97ad5cdSakolb * Can't enqueue anything on a blocked wait queue 216*c97ad5cdSakolb */ 217*c97ad5cdSakolb if (wq->wq_blocked) { 218*c97ad5cdSakolb disp_lock_exit_high(&wq->wq_lock); 219*c97ad5cdSakolb return (0); 220*c97ad5cdSakolb } 221*c97ad5cdSakolb 222*c97ad5cdSakolb /* 223*c97ad5cdSakolb * Mark the time when thread is placed on wait queue. The microstate 224*c97ad5cdSakolb * accounting code uses this timestamp to determine wait times. 225*c97ad5cdSakolb */ 226*c97ad5cdSakolb t->t_waitrq = gethrtime_unscaled(); 227*c97ad5cdSakolb 228*c97ad5cdSakolb /* 229*c97ad5cdSakolb * Mark thread as not swappable. If necessary, it will get 230*c97ad5cdSakolb * swapped out when it returns to the userland. 231*c97ad5cdSakolb */ 232*c97ad5cdSakolb t->t_schedflag |= TS_DONT_SWAP; 233*c97ad5cdSakolb DTRACE_SCHED1(cpucaps__sleep, kthread_t *, t); 234*c97ad5cdSakolb waitq_link(wq, t); 235*c97ad5cdSakolb 236*c97ad5cdSakolb THREAD_WAIT(t, &wq->wq_lock); 237*c97ad5cdSakolb return (1); 238*c97ad5cdSakolb } 239*c97ad5cdSakolb 240*c97ad5cdSakolb /* 241*c97ad5cdSakolb * Change thread's priority while on the wait queue. 242*c97ad5cdSakolb * Dequeue and equeue it again so that it gets placed in the right place. 243*c97ad5cdSakolb */ 244*c97ad5cdSakolb void 245*c97ad5cdSakolb waitq_change_pri(kthread_t *t, pri_t new_pri) 246*c97ad5cdSakolb { 247*c97ad5cdSakolb waitq_t *wq = t->t_waitq; 248*c97ad5cdSakolb 249*c97ad5cdSakolb ASSERT(THREAD_LOCK_HELD(t)); 250*c97ad5cdSakolb ASSERT(ISWAITING(t)); 251*c97ad5cdSakolb ASSERT(wq != NULL); 252*c97ad5cdSakolb 253*c97ad5cdSakolb waitq_unlink(wq, t); 254*c97ad5cdSakolb t->t_pri = new_pri; 255*c97ad5cdSakolb waitq_link(wq, t); 256*c97ad5cdSakolb } 257*c97ad5cdSakolb 258*c97ad5cdSakolb static void 259*c97ad5cdSakolb waitq_dequeue(waitq_t *wq, kthread_t *t) 260*c97ad5cdSakolb { 261*c97ad5cdSakolb ASSERT(THREAD_LOCK_HELD(t)); 262*c97ad5cdSakolb ASSERT(t->t_waitq == wq); 263*c97ad5cdSakolb ASSERT(ISWAITING(t)); 264*c97ad5cdSakolb 265*c97ad5cdSakolb waitq_unlink(wq, t); 266*c97ad5cdSakolb DTRACE_SCHED1(cpucaps__wakeup, kthread_t *, t); 267*c97ad5cdSakolb 268*c97ad5cdSakolb /* 269*c97ad5cdSakolb * Change thread to transition state without dropping 270*c97ad5cdSakolb * the wait queue lock. 271*c97ad5cdSakolb */ 272*c97ad5cdSakolb THREAD_TRANSITION_NOLOCK(t); 273*c97ad5cdSakolb } 274*c97ad5cdSakolb 275*c97ad5cdSakolb /* 276*c97ad5cdSakolb * Return True iff there are any threads on the specified wait queue. 277*c97ad5cdSakolb * The check is done **without holding any locks**. 278*c97ad5cdSakolb */ 279*c97ad5cdSakolb boolean_t 280*c97ad5cdSakolb waitq_isempty(waitq_t *wq) 281*c97ad5cdSakolb { 282*c97ad5cdSakolb return (wq->wq_count == 0); 283*c97ad5cdSakolb } 284*c97ad5cdSakolb 285*c97ad5cdSakolb /* 286*c97ad5cdSakolb * Take thread off its wait queue and make it runnable. 287*c97ad5cdSakolb * Returns with thread lock held. 288*c97ad5cdSakolb */ 289*c97ad5cdSakolb void 290*c97ad5cdSakolb waitq_setrun(kthread_t *t) 291*c97ad5cdSakolb { 292*c97ad5cdSakolb waitq_t *wq = t->t_waitq; 293*c97ad5cdSakolb 294*c97ad5cdSakolb ASSERT(THREAD_LOCK_HELD(t)); 295*c97ad5cdSakolb 296*c97ad5cdSakolb ASSERT(ISWAITING(t)); 297*c97ad5cdSakolb if (wq == NULL) 298*c97ad5cdSakolb panic("waitq_setrun: thread %p is not on waitq", t); 299*c97ad5cdSakolb waitq_dequeue(wq, t); 300*c97ad5cdSakolb 301*c97ad5cdSakolb disp_lock_exit_high(&wq->wq_lock); 302*c97ad5cdSakolb CL_SETRUN(t); 303*c97ad5cdSakolb } 304*c97ad5cdSakolb 305*c97ad5cdSakolb /* 306*c97ad5cdSakolb * Take the first thread off the wait queue and return pointer to it. 307*c97ad5cdSakolb */ 308*c97ad5cdSakolb static kthread_t * 309*c97ad5cdSakolb waitq_takeone(waitq_t *wq) 310*c97ad5cdSakolb { 311*c97ad5cdSakolb kthread_t *t; 312*c97ad5cdSakolb 313*c97ad5cdSakolb disp_lock_enter(&wq->wq_lock); 314*c97ad5cdSakolb if ((t = wq->wq_first) != NULL) 315*c97ad5cdSakolb waitq_dequeue(wq, wq->wq_first); 316*c97ad5cdSakolb disp_lock_exit(&wq->wq_lock); 317*c97ad5cdSakolb return (t); 318*c97ad5cdSakolb } 319*c97ad5cdSakolb 320*c97ad5cdSakolb /* 321*c97ad5cdSakolb * Take the first thread off the wait queue and make it runnable. 322*c97ad5cdSakolb * Return the pointer to the thread or NULL if waitq is empty 323*c97ad5cdSakolb */ 324*c97ad5cdSakolb static kthread_t * 325*c97ad5cdSakolb waitq_runfirst(waitq_t *wq) 326*c97ad5cdSakolb { 327*c97ad5cdSakolb kthread_t *t; 328*c97ad5cdSakolb 329*c97ad5cdSakolb t = waitq_takeone(wq); 330*c97ad5cdSakolb if (t != NULL) { 331*c97ad5cdSakolb CL_SETRUN(t); 332*c97ad5cdSakolb thread_unlock(t); /* drops dispq lock */ 333*c97ad5cdSakolb } 334*c97ad5cdSakolb return (t); 335*c97ad5cdSakolb } 336*c97ad5cdSakolb 337*c97ad5cdSakolb /* 338*c97ad5cdSakolb * Take the first thread off the wait queue and make it runnable. 339*c97ad5cdSakolb */ 340*c97ad5cdSakolb void 341*c97ad5cdSakolb waitq_runone(waitq_t *wq) 342*c97ad5cdSakolb { 343*c97ad5cdSakolb (void) waitq_runfirst(wq); 344*c97ad5cdSakolb } 345*c97ad5cdSakolb 346*c97ad5cdSakolb /* 347*c97ad5cdSakolb * Take all threads off the wait queue and make them runnable. 348*c97ad5cdSakolb */ 349*c97ad5cdSakolb static void 350*c97ad5cdSakolb waitq_runall(waitq_t *wq) 351*c97ad5cdSakolb { 352*c97ad5cdSakolb while (waitq_runfirst(wq) != NULL) 353*c97ad5cdSakolb ; 354*c97ad5cdSakolb } 355*c97ad5cdSakolb 356*c97ad5cdSakolb /* 357*c97ad5cdSakolb * Prevent any new threads from entering wait queue and make all threads 358*c97ad5cdSakolb * currently on the wait queue runnable. After waitq_block() completion, no 359*c97ad5cdSakolb * threads should ever appear on the wait queue untill it is unblocked. 360*c97ad5cdSakolb */ 361*c97ad5cdSakolb void 362*c97ad5cdSakolb waitq_block(waitq_t *wq) 363*c97ad5cdSakolb { 364*c97ad5cdSakolb ASSERT(!wq->wq_blocked); 365*c97ad5cdSakolb disp_lock_enter(&wq->wq_lock); 366*c97ad5cdSakolb wq->wq_blocked = B_TRUE; 367*c97ad5cdSakolb disp_lock_exit(&wq->wq_lock); 368*c97ad5cdSakolb waitq_runall(wq); 369*c97ad5cdSakolb ASSERT(waitq_isempty(wq)); 370*c97ad5cdSakolb } 371*c97ad5cdSakolb 372*c97ad5cdSakolb /* 373*c97ad5cdSakolb * Allow threads to be placed on the wait queue. 374*c97ad5cdSakolb */ 375*c97ad5cdSakolb void 376*c97ad5cdSakolb waitq_unblock(waitq_t *wq) 377*c97ad5cdSakolb { 378*c97ad5cdSakolb disp_lock_enter(&wq->wq_lock); 379*c97ad5cdSakolb 380*c97ad5cdSakolb ASSERT(waitq_isempty(wq)); 381*c97ad5cdSakolb ASSERT(wq->wq_blocked); 382*c97ad5cdSakolb 383*c97ad5cdSakolb wq->wq_blocked = B_FALSE; 384*c97ad5cdSakolb 385*c97ad5cdSakolb disp_lock_exit(&wq->wq_lock); 386*c97ad5cdSakolb } 387