1 /*- 2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_sched.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/kdb.h> 36 #include <sys/kernel.h> 37 #include <sys/ktr.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/proc.h> 41 #include <sys/queue.h> 42 #include <sys/sched.h> 43 #include <sys/smp.h> 44 #include <sys/sysctl.h> 45 46 #include <machine/cpu.h> 47 48 /* Uncomment this to enable logging of critical_enter/exit. */ 49 #if 0 50 #define KTR_CRITICAL KTR_SCHED 51 #else 52 #define KTR_CRITICAL 0 53 #endif 54 55 #ifdef FULL_PREEMPTION 56 #ifndef PREEMPTION 57 #error "The FULL_PREEMPTION option requires the PREEMPTION option" 58 #endif 59 #endif 60 61 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS); 62 63 /* 64 * kern.sched.preemption allows user space to determine if preemption support 65 * is compiled in or not. It is not currently a boot or runtime flag that 66 * can be changed. 67 */ 68 #ifdef PREEMPTION 69 static int kern_sched_preemption = 1; 70 #else 71 static int kern_sched_preemption = 0; 72 #endif 73 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD, 74 &kern_sched_preemption, 0, "Kernel preemption enabled"); 75 76 #ifdef SCHED_STATS 77 long switch_preempt; 78 long switch_owepreempt; 79 long switch_turnstile; 80 long switch_sleepq; 81 long switch_sleepqtimo; 82 long switch_relinquish; 83 long switch_needresched; 84 static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats"); 85 SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, ""); 86 SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, ""); 87 SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, ""); 88 SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, ""); 89 SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, ""); 90 SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, ""); 91 SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, ""); 92 static int 93 sysctl_stats_reset(SYSCTL_HANDLER_ARGS) 94 { 95 int error; 96 int val; 97 98 val = 0; 99 error = sysctl_handle_int(oidp, &val, 0, req); 100 if (error != 0 || req->newptr == NULL) 101 return (error); 102 if (val == 0) 103 return (0); 104 switch_preempt = 0; 105 switch_owepreempt = 0; 106 switch_turnstile = 0; 107 switch_sleepq = 0; 108 switch_sleepqtimo = 0; 109 switch_relinquish = 0; 110 switch_needresched = 0; 111 112 return (0); 113 } 114 115 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_WR, NULL, 116 0, sysctl_stats_reset, "I", "Reset scheduler statistics"); 117 #endif 118 119 /************************************************************************ 120 * Functions that manipulate runnability from a thread perspective. * 121 ************************************************************************/ 122 /* 123 * Select the thread that will be run next. 124 */ 125 struct thread * 126 choosethread(void) 127 { 128 struct thread *td; 129 130 retry: 131 td = sched_choose(); 132 133 /* 134 * If we are in panic, only allow system threads, 135 * plus the one we are running in, to be run. 136 */ 137 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 && 138 (td->td_flags & TDF_INPANIC) == 0)) { 139 /* note that it is no longer on the run queue */ 140 TD_SET_CAN_RUN(td); 141 goto retry; 142 } 143 144 TD_SET_RUNNING(td); 145 return (td); 146 } 147 148 /* 149 * Kernel thread preemption implementation. Critical sections mark 150 * regions of code in which preemptions are not allowed. 151 */ 152 void 153 critical_enter(void) 154 { 155 struct thread *td; 156 157 td = curthread; 158 td->td_critnest++; 159 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td, 160 (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 161 } 162 163 void 164 critical_exit(void) 165 { 166 struct thread *td; 167 168 td = curthread; 169 KASSERT(td->td_critnest != 0, 170 ("critical_exit: td_critnest == 0")); 171 172 if (td->td_critnest == 1) { 173 td->td_critnest = 0; 174 if (td->td_owepreempt) { 175 td->td_critnest = 1; 176 thread_lock(td); 177 td->td_critnest--; 178 SCHED_STAT_INC(switch_owepreempt); 179 mi_switch(SW_INVOL|SW_PREEMPT, NULL); 180 thread_unlock(td); 181 } 182 } else 183 td->td_critnest--; 184 185 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td, 186 (long)td->td_proc->p_pid, td->td_name, td->td_critnest); 187 } 188 189 /************************************************************************ 190 * SYSTEM RUN QUEUE manipulations and tests * 191 ************************************************************************/ 192 /* 193 * Initialize a run structure. 194 */ 195 void 196 runq_init(struct runq *rq) 197 { 198 int i; 199 200 bzero(rq, sizeof *rq); 201 for (i = 0; i < RQ_NQS; i++) 202 TAILQ_INIT(&rq->rq_queues[i]); 203 } 204 205 /* 206 * Clear the status bit of the queue corresponding to priority level pri, 207 * indicating that it is empty. 208 */ 209 static __inline void 210 runq_clrbit(struct runq *rq, int pri) 211 { 212 struct rqbits *rqb; 213 214 rqb = &rq->rq_status; 215 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d", 216 rqb->rqb_bits[RQB_WORD(pri)], 217 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri), 218 RQB_BIT(pri), RQB_WORD(pri)); 219 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri); 220 } 221 222 /* 223 * Find the index of the first non-empty run queue. This is done by 224 * scanning the status bits, a set bit indicates a non-empty queue. 225 */ 226 static __inline int 227 runq_findbit(struct runq *rq) 228 { 229 struct rqbits *rqb; 230 int pri; 231 int i; 232 233 rqb = &rq->rq_status; 234 for (i = 0; i < RQB_LEN; i++) 235 if (rqb->rqb_bits[i]) { 236 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW); 237 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d", 238 rqb->rqb_bits[i], i, pri); 239 return (pri); 240 } 241 242 return (-1); 243 } 244 245 static __inline int 246 runq_findbit_from(struct runq *rq, u_char pri) 247 { 248 struct rqbits *rqb; 249 rqb_word_t mask; 250 int i; 251 252 /* 253 * Set the mask for the first word so we ignore priorities before 'pri'. 254 */ 255 mask = (rqb_word_t)-1 << (pri & (RQB_BPW - 1)); 256 rqb = &rq->rq_status; 257 again: 258 for (i = RQB_WORD(pri); i < RQB_LEN; mask = -1, i++) { 259 mask = rqb->rqb_bits[i] & mask; 260 if (mask == 0) 261 continue; 262 pri = RQB_FFS(mask) + (i << RQB_L2BPW); 263 CTR3(KTR_RUNQ, "runq_findbit_from: bits=%#x i=%d pri=%d", 264 mask, i, pri); 265 return (pri); 266 } 267 if (pri == 0) 268 return (-1); 269 /* 270 * Wrap back around to the beginning of the list just once so we 271 * scan the whole thing. 272 */ 273 pri = 0; 274 goto again; 275 } 276 277 /* 278 * Set the status bit of the queue corresponding to priority level pri, 279 * indicating that it is non-empty. 280 */ 281 static __inline void 282 runq_setbit(struct runq *rq, int pri) 283 { 284 struct rqbits *rqb; 285 286 rqb = &rq->rq_status; 287 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d", 288 rqb->rqb_bits[RQB_WORD(pri)], 289 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri), 290 RQB_BIT(pri), RQB_WORD(pri)); 291 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri); 292 } 293 294 /* 295 * Add the thread to the queue specified by its priority, and set the 296 * corresponding status bit. 297 */ 298 void 299 runq_add(struct runq *rq, struct thread *td, int flags) 300 { 301 struct rqhead *rqh; 302 int pri; 303 304 pri = td->td_priority / RQ_PPQ; 305 td->td_rqindex = pri; 306 runq_setbit(rq, pri); 307 rqh = &rq->rq_queues[pri]; 308 CTR4(KTR_RUNQ, "runq_add: td=%p pri=%d %d rqh=%p", 309 td, td->td_priority, pri, rqh); 310 if (flags & SRQ_PREEMPTED) { 311 TAILQ_INSERT_HEAD(rqh, td, td_runq); 312 } else { 313 TAILQ_INSERT_TAIL(rqh, td, td_runq); 314 } 315 } 316 317 void 318 runq_add_pri(struct runq *rq, struct thread *td, u_char pri, int flags) 319 { 320 struct rqhead *rqh; 321 322 KASSERT(pri < RQ_NQS, ("runq_add_pri: %d out of range", pri)); 323 td->td_rqindex = pri; 324 runq_setbit(rq, pri); 325 rqh = &rq->rq_queues[pri]; 326 CTR4(KTR_RUNQ, "runq_add_pri: td=%p pri=%d idx=%d rqh=%p", 327 td, td->td_priority, pri, rqh); 328 if (flags & SRQ_PREEMPTED) { 329 TAILQ_INSERT_HEAD(rqh, td, td_runq); 330 } else { 331 TAILQ_INSERT_TAIL(rqh, td, td_runq); 332 } 333 } 334 /* 335 * Return true if there are runnable processes of any priority on the run 336 * queue, false otherwise. Has no side effects, does not modify the run 337 * queue structure. 338 */ 339 int 340 runq_check(struct runq *rq) 341 { 342 struct rqbits *rqb; 343 int i; 344 345 rqb = &rq->rq_status; 346 for (i = 0; i < RQB_LEN; i++) 347 if (rqb->rqb_bits[i]) { 348 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d", 349 rqb->rqb_bits[i], i); 350 return (1); 351 } 352 CTR0(KTR_RUNQ, "runq_check: empty"); 353 354 return (0); 355 } 356 357 /* 358 * Find the highest priority process on the run queue. 359 */ 360 struct thread * 361 runq_choose_fuzz(struct runq *rq, int fuzz) 362 { 363 struct rqhead *rqh; 364 struct thread *td; 365 int pri; 366 367 while ((pri = runq_findbit(rq)) != -1) { 368 rqh = &rq->rq_queues[pri]; 369 /* fuzz == 1 is normal.. 0 or less are ignored */ 370 if (fuzz > 1) { 371 /* 372 * In the first couple of entries, check if 373 * there is one for our CPU as a preference. 374 */ 375 int count = fuzz; 376 int cpu = PCPU_GET(cpuid); 377 struct thread *td2; 378 td2 = td = TAILQ_FIRST(rqh); 379 380 while (count-- && td2) { 381 if (td->td_lastcpu == cpu) { 382 td = td2; 383 break; 384 } 385 td2 = TAILQ_NEXT(td2, td_runq); 386 } 387 } else 388 td = TAILQ_FIRST(rqh); 389 KASSERT(td != NULL, ("runq_choose_fuzz: no proc on busy queue")); 390 CTR3(KTR_RUNQ, 391 "runq_choose_fuzz: pri=%d thread=%p rqh=%p", pri, td, rqh); 392 return (td); 393 } 394 CTR1(KTR_RUNQ, "runq_choose_fuzz: idleproc pri=%d", pri); 395 396 return (NULL); 397 } 398 399 /* 400 * Find the highest priority process on the run queue. 401 */ 402 struct thread * 403 runq_choose(struct runq *rq) 404 { 405 struct rqhead *rqh; 406 struct thread *td; 407 int pri; 408 409 while ((pri = runq_findbit(rq)) != -1) { 410 rqh = &rq->rq_queues[pri]; 411 td = TAILQ_FIRST(rqh); 412 KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); 413 CTR3(KTR_RUNQ, 414 "runq_choose: pri=%d thread=%p rqh=%p", pri, td, rqh); 415 return (td); 416 } 417 CTR1(KTR_RUNQ, "runq_choose: idlethread pri=%d", pri); 418 419 return (NULL); 420 } 421 422 struct thread * 423 runq_choose_from(struct runq *rq, u_char idx) 424 { 425 struct rqhead *rqh; 426 struct thread *td; 427 int pri; 428 429 if ((pri = runq_findbit_from(rq, idx)) != -1) { 430 rqh = &rq->rq_queues[pri]; 431 td = TAILQ_FIRST(rqh); 432 KASSERT(td != NULL, ("runq_choose: no thread on busy queue")); 433 CTR4(KTR_RUNQ, 434 "runq_choose_from: pri=%d thread=%p idx=%d rqh=%p", 435 pri, td, td->td_rqindex, rqh); 436 return (td); 437 } 438 CTR1(KTR_RUNQ, "runq_choose_from: idlethread pri=%d", pri); 439 440 return (NULL); 441 } 442 /* 443 * Remove the thread from the queue specified by its priority, and clear the 444 * corresponding status bit if the queue becomes empty. 445 * Caller must set state afterwards. 446 */ 447 void 448 runq_remove(struct runq *rq, struct thread *td) 449 { 450 451 runq_remove_idx(rq, td, NULL); 452 } 453 454 void 455 runq_remove_idx(struct runq *rq, struct thread *td, u_char *idx) 456 { 457 struct rqhead *rqh; 458 u_char pri; 459 460 KASSERT(td->td_flags & TDF_INMEM, 461 ("runq_remove_idx: thread swapped out")); 462 pri = td->td_rqindex; 463 KASSERT(pri < RQ_NQS, ("runq_remove_idx: Invalid index %d\n", pri)); 464 rqh = &rq->rq_queues[pri]; 465 CTR4(KTR_RUNQ, "runq_remove_idx: td=%p, pri=%d %d rqh=%p", 466 td, td->td_priority, pri, rqh); 467 TAILQ_REMOVE(rqh, td, td_runq); 468 if (TAILQ_EMPTY(rqh)) { 469 CTR0(KTR_RUNQ, "runq_remove_idx: empty"); 470 runq_clrbit(rq, pri); 471 if (idx != NULL && *idx == pri) 472 *idx = (pri + 1) % RQ_NQS; 473 } 474 } 475