1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
5 * All rights reserved.
6 * Copyright (c) 2024 The FreeBSD Foundation
7 *
8 * Portions of this software were developed by Olivier Certner
9 * <olce.freebsd@certner.fr> at Kumacom SARL under sponsorship from the FreeBSD
10 * Foundation.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include <sys/cdefs.h>
35 #include "opt_sched.h"
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kdb.h>
40 #include <sys/kernel.h>
41 #include <sys/ktr.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/queue.h>
46 #include <sys/runq.h>
47 #include <sys/sched.h>
48 #include <sys/smp.h>
49 #include <sys/sysctl.h>
50
51 #include <machine/cpu.h>
52
53 /* Uncomment this to enable logging of critical_enter/exit. */
54 #if 0
55 #define KTR_CRITICAL KTR_SCHED
56 #else
57 #define KTR_CRITICAL 0
58 #endif
59
60 #ifdef FULL_PREEMPTION
61 #ifndef PREEMPTION
62 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
63 #endif
64 #endif
65
66 /*
67 * kern.sched.preemption allows user space to determine if preemption support
68 * is compiled in or not. It is not currently a boot or runtime flag that
69 * can be changed.
70 */
71 #ifdef PREEMPTION
72 static int kern_sched_preemption = 1;
73 #else
74 static int kern_sched_preemption = 0;
75 #endif
76 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
77 &kern_sched_preemption, 0, "Kernel preemption enabled");
78
79 /*
80 * Support for scheduler stats exported via kern.sched.stats. All stats may
81 * be reset with kern.sched.stats.reset = 1. Stats may be defined elsewhere
82 * with SCHED_STAT_DEFINE().
83 */
84 #ifdef SCHED_STATS
85 SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
86 "switch stats");
87
88 /* Switch reasons from mi_switch(9). */
89 DPCPU_DEFINE(long, sched_switch_stats[SWT_COUNT]);
90 SCHED_STAT_DEFINE_VAR(owepreempt,
91 &DPCPU_NAME(sched_switch_stats[SWT_OWEPREEMPT]), "");
92 SCHED_STAT_DEFINE_VAR(turnstile,
93 &DPCPU_NAME(sched_switch_stats[SWT_TURNSTILE]), "");
94 SCHED_STAT_DEFINE_VAR(sleepq,
95 &DPCPU_NAME(sched_switch_stats[SWT_SLEEPQ]), "");
96 SCHED_STAT_DEFINE_VAR(relinquish,
97 &DPCPU_NAME(sched_switch_stats[SWT_RELINQUISH]), "");
98 SCHED_STAT_DEFINE_VAR(needresched,
99 &DPCPU_NAME(sched_switch_stats[SWT_NEEDRESCHED]), "");
100 SCHED_STAT_DEFINE_VAR(idle,
101 &DPCPU_NAME(sched_switch_stats[SWT_IDLE]), "");
102 SCHED_STAT_DEFINE_VAR(iwait,
103 &DPCPU_NAME(sched_switch_stats[SWT_IWAIT]), "");
104 SCHED_STAT_DEFINE_VAR(suspend,
105 &DPCPU_NAME(sched_switch_stats[SWT_SUSPEND]), "");
106 SCHED_STAT_DEFINE_VAR(remotepreempt,
107 &DPCPU_NAME(sched_switch_stats[SWT_REMOTEPREEMPT]), "");
108 SCHED_STAT_DEFINE_VAR(remotewakeidle,
109 &DPCPU_NAME(sched_switch_stats[SWT_REMOTEWAKEIDLE]), "");
110 SCHED_STAT_DEFINE_VAR(bind,
111 &DPCPU_NAME(sched_switch_stats[SWT_BIND]), "");
112
113 static int
sysctl_stats_reset(SYSCTL_HANDLER_ARGS)114 sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
115 {
116 struct sysctl_oid *p;
117 uintptr_t counter;
118 int error;
119 int val;
120 int i;
121
122 val = 0;
123 error = sysctl_handle_int(oidp, &val, 0, req);
124 if (error != 0 || req->newptr == NULL)
125 return (error);
126 if (val == 0)
127 return (0);
128 /*
129 * Traverse the list of children of _kern_sched_stats and reset each
130 * to 0. Skip the reset entry.
131 */
132 RB_FOREACH(p, sysctl_oid_list, oidp->oid_parent) {
133 if (p == oidp || p->oid_arg1 == NULL)
134 continue;
135 counter = (uintptr_t)p->oid_arg1;
136 CPU_FOREACH(i) {
137 *(long *)(dpcpu_off[i] + counter) = 0;
138 }
139 }
140 return (0);
141 }
142
143 SYSCTL_PROC(_kern_sched_stats, OID_AUTO, reset,
144 CTLTYPE_INT | CTLFLAG_WR | CTLFLAG_MPSAFE, NULL, 0,
145 sysctl_stats_reset, "I",
146 "Reset scheduler statistics");
147 #endif
148
149 /************************************************************************
150 * Functions that manipulate runnability from a thread perspective. *
151 ************************************************************************/
152 /*
153 * Select the thread that will be run next.
154 */
155
156 static __noinline struct thread *
choosethread_panic(struct thread * td)157 choosethread_panic(struct thread *td)
158 {
159
160 /*
161 * If we are in panic, only allow system threads,
162 * plus the one we are running in, to be run.
163 */
164 retry:
165 if (((td->td_proc->p_flag & P_SYSTEM) == 0 &&
166 (td->td_flags & TDF_INPANIC) == 0)) {
167 /* note that it is no longer on the run queue */
168 TD_SET_CAN_RUN(td);
169 td = sched_choose();
170 goto retry;
171 }
172
173 TD_SET_RUNNING(td);
174 return (td);
175 }
176
177 struct thread *
choosethread(void)178 choosethread(void)
179 {
180 struct thread *td;
181
182 td = sched_choose();
183
184 if (KERNEL_PANICKED())
185 return (choosethread_panic(td));
186
187 TD_SET_RUNNING(td);
188 return (td);
189 }
190
191 /*
192 * Kernel thread preemption implementation. Critical sections mark
193 * regions of code in which preemptions are not allowed.
194 *
195 * It might seem a good idea to inline critical_enter() but, in order
196 * to prevent instructions reordering by the compiler, a __compiler_membar()
197 * would have to be used here (the same as sched_pin()). The performance
198 * penalty imposed by the membar could, then, produce slower code than
199 * the function call itself, for most cases.
200 */
201 void
critical_enter_KBI(void)202 critical_enter_KBI(void)
203 {
204 #ifdef KTR
205 struct thread *td = curthread;
206 #endif
207 critical_enter();
208 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
209 (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
210 }
211
212 void __noinline
critical_exit_preempt(void)213 critical_exit_preempt(void)
214 {
215 struct thread *td;
216 int flags;
217
218 /*
219 * If td_critnest is 0, it is possible that we are going to get
220 * preempted again before reaching the code below. This happens
221 * rarely and is harmless. However, this means td_owepreempt may
222 * now be unset.
223 */
224 td = curthread;
225 if (td->td_critnest != 0)
226 return;
227 if (kdb_active)
228 return;
229
230 /*
231 * Microoptimization: we committed to switch,
232 * disable preemption in interrupt handlers
233 * while spinning for the thread lock.
234 */
235 td->td_critnest = 1;
236 thread_lock(td);
237 td->td_critnest--;
238 flags = SW_INVOL | SW_PREEMPT;
239 if (TD_IS_IDLETHREAD(td))
240 flags |= SWT_IDLE;
241 else
242 flags |= SWT_OWEPREEMPT;
243 mi_switch(flags);
244 }
245
246 void
critical_exit_KBI(void)247 critical_exit_KBI(void)
248 {
249 #ifdef KTR
250 struct thread *td = curthread;
251 #endif
252 critical_exit();
253 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
254 (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
255 }
256
257 /************************************************************************
258 * SYSTEM RUN QUEUE manipulations and tests *
259 ************************************************************************/
260 _Static_assert(RQ_NQS <= 256,
261 "'td_rqindex' must be turned into a bigger unsigned type");
262 /* A macro instead of a function to get the proper calling function's name. */
263 #define CHECK_IDX(idx) ({ \
264 __typeof(idx) _idx __unused = (idx); \
265 KASSERT(0 <= _idx && _idx < RQ_NQS, \
266 ("%s: %s out of range: %d", __func__, __STRING(idx), _idx)); \
267 })
268
269 /* Status words' individual bit manipulators' internals. */
270 typedef uintptr_t runq_sw_op(int idx, int sw_idx, rqsw_t sw_bit,
271 rqsw_t *swp);
272 static inline uintptr_t runq_sw_apply(struct runq *rq, int idx,
273 runq_sw_op *op);
274
275 static inline uintptr_t runq_sw_set_not_empty_op(int idx, int sw_idx,
276 rqsw_t sw_bit, rqsw_t *swp);
277 static inline uintptr_t runq_sw_set_empty_op(int idx, int sw_idx,
278 rqsw_t sw_bit, rqsw_t *swp);
279 static inline uintptr_t runq_sw_is_empty_op(int idx, int sw_idx,
280 rqsw_t sw_bit, rqsw_t *swp);
281
282 /* Status words' individual bit manipulators. */
283 static inline void runq_sw_set_not_empty(struct runq *rq, int idx);
284 static inline void runq_sw_set_empty(struct runq *rq, int idx);
285 static inline bool runq_sw_is_empty(struct runq *rq, int idx);
286
287 /*
288 * Initialize a run structure.
289 */
290 void
runq_init(struct runq * rq)291 runq_init(struct runq *rq)
292 {
293 int i;
294
295 bzero(rq, sizeof(*rq));
296 for (i = 0; i < RQ_NQS; i++)
297 TAILQ_INIT(&rq->rq_queues[i]);
298 }
299
300 /*
301 * Helper to implement functions operating on a particular status word bit.
302 *
303 * The operator is passed the initial 'idx', the corresponding status word index
304 * in 'rq_status' in 'sw_idx', a status word with only that bit set in 'sw_bit'
305 * and a pointer to the corresponding status word in 'swp'.
306 */
307 static inline uintptr_t
runq_sw_apply(struct runq * rq,int idx,runq_sw_op * op)308 runq_sw_apply(struct runq *rq, int idx, runq_sw_op *op)
309 {
310 rqsw_t *swp;
311 rqsw_t sw_bit;
312 int sw_idx;
313
314 CHECK_IDX(idx);
315
316 sw_idx = RQSW_IDX(idx);
317 sw_bit = RQSW_BIT(idx);
318 swp = &rq->rq_status.rq_sw[sw_idx];
319
320 return (op(idx, sw_idx, sw_bit, swp));
321 }
322
323 static inline uintptr_t
runq_sw_set_not_empty_op(int idx,int sw_idx,rqsw_t sw_bit,rqsw_t * swp)324 runq_sw_set_not_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
325 {
326 rqsw_t old_sw __unused = *swp;
327
328 *swp |= sw_bit;
329 CTR4(KTR_RUNQ,
330 "runq_sw_set_not_empty: idx=%d sw_idx=%d "
331 "bits=" RQSW_PRI "->" RQSW_PRI,
332 idx, sw_idx, old_sw, *swp);
333 return (0);
334 }
335
336 /*
337 * Modify the status words to indicate that some queue is not empty.
338 *
339 * Sets the status bit corresponding to the queue at index 'idx'.
340 */
341 static inline void
runq_sw_set_not_empty(struct runq * rq,int idx)342 runq_sw_set_not_empty(struct runq *rq, int idx)
343 {
344
345 (void)runq_sw_apply(rq, idx, &runq_sw_set_not_empty_op);
346 }
347
348 static inline uintptr_t
runq_sw_set_empty_op(int idx,int sw_idx,rqsw_t sw_bit,rqsw_t * swp)349 runq_sw_set_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
350 {
351 rqsw_t old_sw __unused = *swp;
352
353 *swp &= ~sw_bit;
354 CTR4(KTR_RUNQ,
355 "runq_sw_set_empty: idx=%d sw_idx=%d "
356 "bits=" RQSW_PRI "->" RQSW_PRI,
357 idx, sw_idx, old_sw, *swp);
358 return (0);
359 }
360
361 /*
362 * Modify the status words to indicate that some queue is empty.
363 *
364 * Clears the status bit corresponding to the queue at index 'idx'.
365 */
366 static inline void
runq_sw_set_empty(struct runq * rq,int idx)367 runq_sw_set_empty(struct runq *rq, int idx)
368 {
369
370 (void)runq_sw_apply(rq, idx, &runq_sw_set_empty_op);
371 }
372
373 static inline uintptr_t
runq_sw_is_empty_op(int idx,int sw_idx,rqsw_t sw_bit,rqsw_t * swp)374 runq_sw_is_empty_op(int idx, int sw_idx, rqsw_t sw_bit, rqsw_t *swp)
375 {
376 return ((*swp & sw_bit) == 0);
377 }
378
379 /*
380 * Returns whether the status words indicate that some queue is empty.
381 */
382 static inline bool
runq_sw_is_empty(struct runq * rq,int idx)383 runq_sw_is_empty(struct runq *rq, int idx)
384 {
385 return (runq_sw_apply(rq, idx, &runq_sw_is_empty_op));
386 }
387
388 /*
389 * Returns whether a particular queue is empty.
390 */
391 bool
runq_is_queue_empty(struct runq * rq,int idx)392 runq_is_queue_empty(struct runq *rq, int idx)
393 {
394
395 return (runq_sw_is_empty(rq, idx));
396 }
397
398 /*
399 * Add the thread to the queue specified by its priority, and set the
400 * corresponding status bit.
401 */
402 void
runq_add(struct runq * rq,struct thread * td,int flags)403 runq_add(struct runq *rq, struct thread *td, int flags)
404 {
405
406 runq_add_idx(rq, td, RQ_PRI_TO_QUEUE_IDX(td->td_priority), flags);
407 }
408
409 void
runq_add_idx(struct runq * rq,struct thread * td,int idx,int flags)410 runq_add_idx(struct runq *rq, struct thread *td, int idx, int flags)
411 {
412 struct rq_queue *rqq;
413
414 /*
415 * runq_sw_*() functions assert that 'idx' is non-negative and below
416 * 'RQ_NQS', and a static assert earlier in this file ensures that
417 * 'RQ_NQS' is no more than 256.
418 */
419 td->td_rqindex = idx;
420 runq_sw_set_not_empty(rq, idx);
421 rqq = &rq->rq_queues[idx];
422 CTR4(KTR_RUNQ, "runq_add_idx: td=%p pri=%d idx=%d rqq=%p",
423 td, td->td_priority, idx, rqq);
424 if (flags & SRQ_PREEMPTED)
425 TAILQ_INSERT_HEAD(rqq, td, td_runq);
426 else
427 TAILQ_INSERT_TAIL(rqq, td, td_runq);
428 }
429
430 /*
431 * Remove the thread from the queue specified by its priority, and clear the
432 * corresponding status bit if the queue becomes empty.
433 *
434 * Returns whether the corresponding queue is empty after removal.
435 */
436 bool
runq_remove(struct runq * rq,struct thread * td)437 runq_remove(struct runq *rq, struct thread *td)
438 {
439 struct rq_queue *rqq;
440 int idx;
441
442 KASSERT(td->td_flags & TDF_INMEM, ("runq_remove: Thread swapped out"));
443 idx = td->td_rqindex;
444 CHECK_IDX(idx);
445 rqq = &rq->rq_queues[idx];
446 CTR4(KTR_RUNQ, "runq_remove: td=%p pri=%d idx=%d rqq=%p",
447 td, td->td_priority, idx, rqq);
448 TAILQ_REMOVE(rqq, td, td_runq);
449 if (TAILQ_EMPTY(rqq)) {
450 runq_sw_set_empty(rq, idx);
451 CTR1(KTR_RUNQ, "runq_remove: queue at idx=%d now empty", idx);
452 return (true);
453 }
454 return (false);
455 }
456
457 static inline int
runq_findq_status_word(struct runq * const rq,const int w_idx,const rqsw_t w,runq_pred_t * const pred,void * const pred_data)458 runq_findq_status_word(struct runq *const rq, const int w_idx,
459 const rqsw_t w, runq_pred_t *const pred, void *const pred_data)
460 {
461 struct rq_queue *q;
462 rqsw_t tw = w;
463 int idx, b_idx;
464
465 while (tw != 0) {
466 b_idx = RQSW_BSF(tw);
467 idx = RQSW_TO_QUEUE_IDX(w_idx, b_idx);
468 q = &rq->rq_queues[idx];
469 KASSERT(!TAILQ_EMPTY(q),
470 ("runq_findq(): No thread on non-empty queue with idx=%d",
471 idx));
472 if (pred(idx, q, pred_data))
473 return (idx);
474 tw &= ~RQSW_BIT(idx);
475 }
476
477 return (-1);
478 }
479
480 /*
481 * Find in the passed range (bounds included) the index of the first (i.e.,
482 * having lower index) non-empty queue that passes pred().
483 *
484 * Considered queues are those with index 'lvl_min' up to 'lvl_max' (bounds
485 * included). If no queue matches, returns -1.
486 *
487 * This is done by scanning the status words (a set bit indicates a non-empty
488 * queue) and calling pred() with corresponding queue indices. pred() must
489 * return whether the corresponding queue is accepted. It is passed private
490 * data through 'pred_data', which can be used both for extra input and output.
491 */
492 int
runq_findq(struct runq * const rq,const int lvl_min,const int lvl_max,runq_pred_t * const pred,void * const pred_data)493 runq_findq(struct runq *const rq, const int lvl_min, const int lvl_max,
494 runq_pred_t *const pred, void *const pred_data)
495 {
496 const rqsw_t (*const rqsw)[RQSW_NB] = &rq->rq_status.rq_sw;
497 rqsw_t w;
498 int i, last, idx;
499
500 CHECK_IDX(lvl_min);
501 CHECK_IDX(lvl_max);
502 KASSERT(lvl_min <= lvl_max,
503 ("lvl_min: %d > lvl_max: %d!", lvl_min, lvl_max));
504
505 i = RQSW_IDX(lvl_min);
506 last = RQSW_IDX(lvl_max);
507 /* Clear bits for runqueues below 'lvl_min'. */
508 w = (*rqsw)[i] & ~(RQSW_BIT(lvl_min) - 1);
509 if (i == last)
510 goto last_mask;
511 idx = runq_findq_status_word(rq, i, w, pred, pred_data);
512 if (idx != -1)
513 goto return_idx;
514
515 for (++i; i < last; ++i) {
516 w = (*rqsw)[i];
517 idx = runq_findq_status_word(rq, i, w, pred, pred_data);
518 if (idx != -1)
519 goto return_idx;
520 }
521
522 MPASS(i == last);
523 w = (*rqsw)[i];
524 last_mask:
525 /* Clear bits for runqueues above 'lvl_max'. */
526 w &= (RQSW_BIT(lvl_max) - 1) | RQSW_BIT(lvl_max);
527 idx = runq_findq_status_word(rq, i, w, pred, pred_data);
528 if (idx != -1)
529 goto return_idx;
530 return (-1);
531 return_idx:
532 CTR4(KTR_RUNQ,
533 "runq_findq: bits=" RQSW_PRI "->" RQSW_PRI " i=%d idx=%d",
534 (*rqsw)[i], w, i, idx);
535 return (idx);
536 }
537
538 static bool
runq_first_thread_pred(const int idx,struct rq_queue * const q,void * const data)539 runq_first_thread_pred(const int idx, struct rq_queue *const q, void *const data)
540 {
541 struct thread **const tdp = data;
542 struct thread *const td = TAILQ_FIRST(q);
543
544 *tdp = td;
545 return (true);
546 }
547
548 /*
549 * Inline this function for the benefit of this file's internal uses, but make
550 * sure it has an external definition as it is exported.
551 */
552 extern inline struct thread *
runq_first_thread_range(struct runq * const rq,const int lvl_min,const int lvl_max)553 runq_first_thread_range(struct runq *const rq, const int lvl_min,
554 const int lvl_max)
555 {
556 struct thread *td = NULL;
557
558 (void)runq_findq(rq, lvl_min, lvl_max, runq_first_thread_pred, &td);
559 return (td);
560 }
561
562 static inline struct thread *
runq_first_thread(struct runq * const rq)563 runq_first_thread(struct runq *const rq)
564 {
565
566 return (runq_first_thread_range(rq, 0, RQ_NQS - 1));
567 }
568
569 /*
570 * Return true if there are some processes of any priority on the run queue,
571 * false otherwise. Has no side effects. Supports racy lookups (required by
572 * 4BSD).
573 */
574 bool
runq_not_empty(struct runq * rq)575 runq_not_empty(struct runq *rq)
576 {
577 const rqsw_t (*const rqsw)[RQSW_NB] = &rq->rq_status.rq_sw;
578 int sw_idx;
579
580 for (sw_idx = 0; sw_idx < RQSW_NB; ++sw_idx) {
581 const rqsw_t w = (*rqsw)[sw_idx];
582
583 if (w != 0) {
584 CTR3(KTR_RUNQ, "runq_not_empty: not empty; "
585 "rq=%p, sw_idx=%d, bits=" RQSW_PRI,
586 rq, sw_idx, w);
587 return (true);
588 }
589 }
590
591 CTR1(KTR_RUNQ, "runq_not_empty: empty; rq=%p", rq);
592 return (false);
593 }
594
595 /*
596 * Find the highest priority process on the run queue.
597 */
598 struct thread *
runq_choose(struct runq * rq)599 runq_choose(struct runq *rq)
600 {
601 struct thread *td;
602
603 td = runq_first_thread(rq);
604 if (td != NULL) {
605 CTR2(KTR_RUNQ, "runq_choose: idx=%d td=%p", td->td_rqindex, td);
606 return (td);
607 }
608
609 CTR0(KTR_RUNQ, "runq_choose: idlethread");
610 return (NULL);
611 }
612
613 struct runq_fuzz_pred_data {
614 int fuzz;
615 struct thread *td;
616 };
617
618 static bool
runq_fuzz_pred(const int idx,struct rq_queue * const q,void * const data)619 runq_fuzz_pred(const int idx, struct rq_queue *const q, void *const data)
620 {
621 struct runq_fuzz_pred_data *const d = data;
622 const int fuzz = d->fuzz;
623 struct thread *td;
624
625 td = TAILQ_FIRST(q);
626
627 if (fuzz > 1) {
628 /*
629 * In the first couple of entries, check if
630 * there is one for our CPU as a preference.
631 */
632 struct thread *td2 = td;
633 int count = fuzz;
634 int cpu = PCPU_GET(cpuid);
635
636 while (count-- != 0 && td2 != NULL) {
637 if (td2->td_lastcpu == cpu) {
638 td = td2;
639 break;
640 }
641 td2 = TAILQ_NEXT(td2, td_runq);
642 }
643 }
644
645 d->td = td;
646 return (true);
647 }
648
649 /*
650 * Find the highest priority process on the run queue.
651 */
652 struct thread *
runq_choose_fuzz(struct runq * rq,int fuzz)653 runq_choose_fuzz(struct runq *rq, int fuzz)
654 {
655 struct runq_fuzz_pred_data data = {
656 .fuzz = fuzz,
657 .td = NULL
658 };
659 int idx;
660
661 idx = runq_findq(rq, 0, RQ_NQS - 1, runq_fuzz_pred, &data);
662 if (idx != -1) {
663 MPASS(data.td != NULL);
664 CTR2(KTR_RUNQ, "runq_choose_fuzz: idx=%d td=%p", idx, data.td);
665 return (data.td);
666 }
667
668 MPASS(data.td == NULL);
669 CTR0(KTR_RUNQ, "runq_choose_fuzz: idlethread");
670 return (NULL);
671 }
672