1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR 4 * policies) 5 */ 6 #include "sched.h" 7 8 #include "pelt.h" 9 10 int sched_rr_timeslice = RR_TIMESLICE; 11 int sysctl_sched_rr_timeslice = (MSEC_PER_SEC / HZ) * RR_TIMESLICE; 12 /* More than 4 hours if BW_SHIFT equals 20. */ 13 static const u64 max_rt_runtime = MAX_BW; 14 15 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 16 17 struct rt_bandwidth def_rt_bandwidth; 18 19 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) 20 { 21 struct rt_bandwidth *rt_b = 22 container_of(timer, struct rt_bandwidth, rt_period_timer); 23 int idle = 0; 24 int overrun; 25 26 raw_spin_lock(&rt_b->rt_runtime_lock); 27 for (;;) { 28 overrun = hrtimer_forward_now(timer, rt_b->rt_period); 29 if (!overrun) 30 break; 31 32 raw_spin_unlock(&rt_b->rt_runtime_lock); 33 idle = do_sched_rt_period_timer(rt_b, overrun); 34 raw_spin_lock(&rt_b->rt_runtime_lock); 35 } 36 if (idle) 37 rt_b->rt_period_active = 0; 38 raw_spin_unlock(&rt_b->rt_runtime_lock); 39 40 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 41 } 42 43 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) 44 { 45 rt_b->rt_period = ns_to_ktime(period); 46 rt_b->rt_runtime = runtime; 47 48 raw_spin_lock_init(&rt_b->rt_runtime_lock); 49 50 hrtimer_init(&rt_b->rt_period_timer, CLOCK_MONOTONIC, 51 HRTIMER_MODE_REL_HARD); 52 rt_b->rt_period_timer.function = sched_rt_period_timer; 53 } 54 55 static void start_rt_bandwidth(struct rt_bandwidth *rt_b) 56 { 57 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 58 return; 59 60 raw_spin_lock(&rt_b->rt_runtime_lock); 61 if (!rt_b->rt_period_active) { 62 rt_b->rt_period_active = 1; 63 /* 64 * SCHED_DEADLINE updates the bandwidth, as a run away 65 * RT task with a DL task could hog a CPU. But DL does 66 * not reset the period. If a deadline task was running 67 * without an RT task running, it can cause RT tasks to 68 * throttle when they start up. Kick the timer right away 69 * to update the period. 70 */ 71 hrtimer_forward_now(&rt_b->rt_period_timer, ns_to_ktime(0)); 72 hrtimer_start_expires(&rt_b->rt_period_timer, 73 HRTIMER_MODE_ABS_PINNED_HARD); 74 } 75 raw_spin_unlock(&rt_b->rt_runtime_lock); 76 } 77 78 void init_rt_rq(struct rt_rq *rt_rq) 79 { 80 struct rt_prio_array *array; 81 int i; 82 83 array = &rt_rq->active; 84 for (i = 0; i < MAX_RT_PRIO; i++) { 85 INIT_LIST_HEAD(array->queue + i); 86 __clear_bit(i, array->bitmap); 87 } 88 /* delimiter for bitsearch: */ 89 __set_bit(MAX_RT_PRIO, array->bitmap); 90 91 #if defined CONFIG_SMP 92 rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 93 rt_rq->highest_prio.next = MAX_RT_PRIO-1; 94 rt_rq->rt_nr_migratory = 0; 95 rt_rq->overloaded = 0; 96 plist_head_init(&rt_rq->pushable_tasks); 97 #endif /* CONFIG_SMP */ 98 /* We start is dequeued state, because no RT tasks are queued */ 99 rt_rq->rt_queued = 0; 100 101 rt_rq->rt_time = 0; 102 rt_rq->rt_throttled = 0; 103 rt_rq->rt_runtime = 0; 104 raw_spin_lock_init(&rt_rq->rt_runtime_lock); 105 } 106 107 #ifdef CONFIG_RT_GROUP_SCHED 108 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) 109 { 110 hrtimer_cancel(&rt_b->rt_period_timer); 111 } 112 113 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) 114 115 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 116 { 117 #ifdef CONFIG_SCHED_DEBUG 118 WARN_ON_ONCE(!rt_entity_is_task(rt_se)); 119 #endif 120 return container_of(rt_se, struct task_struct, rt); 121 } 122 123 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 124 { 125 return rt_rq->rq; 126 } 127 128 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 129 { 130 return rt_se->rt_rq; 131 } 132 133 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 134 { 135 struct rt_rq *rt_rq = rt_se->rt_rq; 136 137 return rt_rq->rq; 138 } 139 140 void free_rt_sched_group(struct task_group *tg) 141 { 142 int i; 143 144 if (tg->rt_se) 145 destroy_rt_bandwidth(&tg->rt_bandwidth); 146 147 for_each_possible_cpu(i) { 148 if (tg->rt_rq) 149 kfree(tg->rt_rq[i]); 150 if (tg->rt_se) 151 kfree(tg->rt_se[i]); 152 } 153 154 kfree(tg->rt_rq); 155 kfree(tg->rt_se); 156 } 157 158 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 159 struct sched_rt_entity *rt_se, int cpu, 160 struct sched_rt_entity *parent) 161 { 162 struct rq *rq = cpu_rq(cpu); 163 164 rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 165 rt_rq->rt_nr_boosted = 0; 166 rt_rq->rq = rq; 167 rt_rq->tg = tg; 168 169 tg->rt_rq[cpu] = rt_rq; 170 tg->rt_se[cpu] = rt_se; 171 172 if (!rt_se) 173 return; 174 175 if (!parent) 176 rt_se->rt_rq = &rq->rt; 177 else 178 rt_se->rt_rq = parent->my_q; 179 180 rt_se->my_q = rt_rq; 181 rt_se->parent = parent; 182 INIT_LIST_HEAD(&rt_se->run_list); 183 } 184 185 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 186 { 187 struct rt_rq *rt_rq; 188 struct sched_rt_entity *rt_se; 189 int i; 190 191 tg->rt_rq = kcalloc(nr_cpu_ids, sizeof(rt_rq), GFP_KERNEL); 192 if (!tg->rt_rq) 193 goto err; 194 tg->rt_se = kcalloc(nr_cpu_ids, sizeof(rt_se), GFP_KERNEL); 195 if (!tg->rt_se) 196 goto err; 197 198 init_rt_bandwidth(&tg->rt_bandwidth, 199 ktime_to_ns(def_rt_bandwidth.rt_period), 0); 200 201 for_each_possible_cpu(i) { 202 rt_rq = kzalloc_node(sizeof(struct rt_rq), 203 GFP_KERNEL, cpu_to_node(i)); 204 if (!rt_rq) 205 goto err; 206 207 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 208 GFP_KERNEL, cpu_to_node(i)); 209 if (!rt_se) 210 goto err_free_rq; 211 212 init_rt_rq(rt_rq); 213 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; 214 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); 215 } 216 217 return 1; 218 219 err_free_rq: 220 kfree(rt_rq); 221 err: 222 return 0; 223 } 224 225 #else /* CONFIG_RT_GROUP_SCHED */ 226 227 #define rt_entity_is_task(rt_se) (1) 228 229 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 230 { 231 return container_of(rt_se, struct task_struct, rt); 232 } 233 234 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 235 { 236 return container_of(rt_rq, struct rq, rt); 237 } 238 239 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 240 { 241 struct task_struct *p = rt_task_of(rt_se); 242 243 return task_rq(p); 244 } 245 246 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 247 { 248 struct rq *rq = rq_of_rt_se(rt_se); 249 250 return &rq->rt; 251 } 252 253 void free_rt_sched_group(struct task_group *tg) { } 254 255 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 256 { 257 return 1; 258 } 259 #endif /* CONFIG_RT_GROUP_SCHED */ 260 261 #ifdef CONFIG_SMP 262 263 static void pull_rt_task(struct rq *this_rq); 264 265 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 266 { 267 /* Try to pull RT tasks here if we lower this rq's prio */ 268 return rq->online && rq->rt.highest_prio.curr > prev->prio; 269 } 270 271 static inline int rt_overloaded(struct rq *rq) 272 { 273 return atomic_read(&rq->rd->rto_count); 274 } 275 276 static inline void rt_set_overload(struct rq *rq) 277 { 278 if (!rq->online) 279 return; 280 281 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); 282 /* 283 * Make sure the mask is visible before we set 284 * the overload count. That is checked to determine 285 * if we should look at the mask. It would be a shame 286 * if we looked at the mask, but the mask was not 287 * updated yet. 288 * 289 * Matched by the barrier in pull_rt_task(). 290 */ 291 smp_wmb(); 292 atomic_inc(&rq->rd->rto_count); 293 } 294 295 static inline void rt_clear_overload(struct rq *rq) 296 { 297 if (!rq->online) 298 return; 299 300 /* the order here really doesn't matter */ 301 atomic_dec(&rq->rd->rto_count); 302 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); 303 } 304 305 static void update_rt_migration(struct rt_rq *rt_rq) 306 { 307 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { 308 if (!rt_rq->overloaded) { 309 rt_set_overload(rq_of_rt_rq(rt_rq)); 310 rt_rq->overloaded = 1; 311 } 312 } else if (rt_rq->overloaded) { 313 rt_clear_overload(rq_of_rt_rq(rt_rq)); 314 rt_rq->overloaded = 0; 315 } 316 } 317 318 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 319 { 320 struct task_struct *p; 321 322 if (!rt_entity_is_task(rt_se)) 323 return; 324 325 p = rt_task_of(rt_se); 326 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 327 328 rt_rq->rt_nr_total++; 329 if (p->nr_cpus_allowed > 1) 330 rt_rq->rt_nr_migratory++; 331 332 update_rt_migration(rt_rq); 333 } 334 335 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 336 { 337 struct task_struct *p; 338 339 if (!rt_entity_is_task(rt_se)) 340 return; 341 342 p = rt_task_of(rt_se); 343 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 344 345 rt_rq->rt_nr_total--; 346 if (p->nr_cpus_allowed > 1) 347 rt_rq->rt_nr_migratory--; 348 349 update_rt_migration(rt_rq); 350 } 351 352 static inline int has_pushable_tasks(struct rq *rq) 353 { 354 return !plist_head_empty(&rq->rt.pushable_tasks); 355 } 356 357 static DEFINE_PER_CPU(struct callback_head, rt_push_head); 358 static DEFINE_PER_CPU(struct callback_head, rt_pull_head); 359 360 static void push_rt_tasks(struct rq *); 361 static void pull_rt_task(struct rq *); 362 363 static inline void rt_queue_push_tasks(struct rq *rq) 364 { 365 if (!has_pushable_tasks(rq)) 366 return; 367 368 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks); 369 } 370 371 static inline void rt_queue_pull_task(struct rq *rq) 372 { 373 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task); 374 } 375 376 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 377 { 378 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 379 plist_node_init(&p->pushable_tasks, p->prio); 380 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); 381 382 /* Update the highest prio pushable task */ 383 if (p->prio < rq->rt.highest_prio.next) 384 rq->rt.highest_prio.next = p->prio; 385 } 386 387 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 388 { 389 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 390 391 /* Update the new highest prio pushable task */ 392 if (has_pushable_tasks(rq)) { 393 p = plist_first_entry(&rq->rt.pushable_tasks, 394 struct task_struct, pushable_tasks); 395 rq->rt.highest_prio.next = p->prio; 396 } else { 397 rq->rt.highest_prio.next = MAX_RT_PRIO-1; 398 } 399 } 400 401 #else 402 403 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 404 { 405 } 406 407 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 408 { 409 } 410 411 static inline 412 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 413 { 414 } 415 416 static inline 417 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 418 { 419 } 420 421 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 422 { 423 return false; 424 } 425 426 static inline void pull_rt_task(struct rq *this_rq) 427 { 428 } 429 430 static inline void rt_queue_push_tasks(struct rq *rq) 431 { 432 } 433 #endif /* CONFIG_SMP */ 434 435 static void enqueue_top_rt_rq(struct rt_rq *rt_rq); 436 static void dequeue_top_rt_rq(struct rt_rq *rt_rq); 437 438 static inline int on_rt_rq(struct sched_rt_entity *rt_se) 439 { 440 return rt_se->on_rq; 441 } 442 443 #ifdef CONFIG_UCLAMP_TASK 444 /* 445 * Verify the fitness of task @p to run on @cpu taking into account the uclamp 446 * settings. 447 * 448 * This check is only important for heterogeneous systems where uclamp_min value 449 * is higher than the capacity of a @cpu. For non-heterogeneous system this 450 * function will always return true. 451 * 452 * The function will return true if the capacity of the @cpu is >= the 453 * uclamp_min and false otherwise. 454 * 455 * Note that uclamp_min will be clamped to uclamp_max if uclamp_min 456 * > uclamp_max. 457 */ 458 static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu) 459 { 460 unsigned int min_cap; 461 unsigned int max_cap; 462 unsigned int cpu_cap; 463 464 /* Only heterogeneous systems can benefit from this check */ 465 if (!static_branch_unlikely(&sched_asym_cpucapacity)) 466 return true; 467 468 min_cap = uclamp_eff_value(p, UCLAMP_MIN); 469 max_cap = uclamp_eff_value(p, UCLAMP_MAX); 470 471 cpu_cap = capacity_orig_of(cpu); 472 473 return cpu_cap >= min(min_cap, max_cap); 474 } 475 #else 476 static inline bool rt_task_fits_capacity(struct task_struct *p, int cpu) 477 { 478 return true; 479 } 480 #endif 481 482 #ifdef CONFIG_RT_GROUP_SCHED 483 484 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 485 { 486 if (!rt_rq->tg) 487 return RUNTIME_INF; 488 489 return rt_rq->rt_runtime; 490 } 491 492 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 493 { 494 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 495 } 496 497 typedef struct task_group *rt_rq_iter_t; 498 499 static inline struct task_group *next_task_group(struct task_group *tg) 500 { 501 do { 502 tg = list_entry_rcu(tg->list.next, 503 typeof(struct task_group), list); 504 } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); 505 506 if (&tg->list == &task_groups) 507 tg = NULL; 508 509 return tg; 510 } 511 512 #define for_each_rt_rq(rt_rq, iter, rq) \ 513 for (iter = container_of(&task_groups, typeof(*iter), list); \ 514 (iter = next_task_group(iter)) && \ 515 (rt_rq = iter->rt_rq[cpu_of(rq)]);) 516 517 #define for_each_sched_rt_entity(rt_se) \ 518 for (; rt_se; rt_se = rt_se->parent) 519 520 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 521 { 522 return rt_se->my_q; 523 } 524 525 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags); 526 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags); 527 528 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 529 { 530 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 531 struct rq *rq = rq_of_rt_rq(rt_rq); 532 struct sched_rt_entity *rt_se; 533 534 int cpu = cpu_of(rq); 535 536 rt_se = rt_rq->tg->rt_se[cpu]; 537 538 if (rt_rq->rt_nr_running) { 539 if (!rt_se) 540 enqueue_top_rt_rq(rt_rq); 541 else if (!on_rt_rq(rt_se)) 542 enqueue_rt_entity(rt_se, 0); 543 544 if (rt_rq->highest_prio.curr < curr->prio) 545 resched_curr(rq); 546 } 547 } 548 549 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 550 { 551 struct sched_rt_entity *rt_se; 552 int cpu = cpu_of(rq_of_rt_rq(rt_rq)); 553 554 rt_se = rt_rq->tg->rt_se[cpu]; 555 556 if (!rt_se) { 557 dequeue_top_rt_rq(rt_rq); 558 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ 559 cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); 560 } 561 else if (on_rt_rq(rt_se)) 562 dequeue_rt_entity(rt_se, 0); 563 } 564 565 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 566 { 567 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; 568 } 569 570 static int rt_se_boosted(struct sched_rt_entity *rt_se) 571 { 572 struct rt_rq *rt_rq = group_rt_rq(rt_se); 573 struct task_struct *p; 574 575 if (rt_rq) 576 return !!rt_rq->rt_nr_boosted; 577 578 p = rt_task_of(rt_se); 579 return p->prio != p->normal_prio; 580 } 581 582 #ifdef CONFIG_SMP 583 static inline const struct cpumask *sched_rt_period_mask(void) 584 { 585 return this_rq()->rd->span; 586 } 587 #else 588 static inline const struct cpumask *sched_rt_period_mask(void) 589 { 590 return cpu_online_mask; 591 } 592 #endif 593 594 static inline 595 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 596 { 597 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 598 } 599 600 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 601 { 602 return &rt_rq->tg->rt_bandwidth; 603 } 604 605 #else /* !CONFIG_RT_GROUP_SCHED */ 606 607 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 608 { 609 return rt_rq->rt_runtime; 610 } 611 612 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 613 { 614 return ktime_to_ns(def_rt_bandwidth.rt_period); 615 } 616 617 typedef struct rt_rq *rt_rq_iter_t; 618 619 #define for_each_rt_rq(rt_rq, iter, rq) \ 620 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 621 622 #define for_each_sched_rt_entity(rt_se) \ 623 for (; rt_se; rt_se = NULL) 624 625 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 626 { 627 return NULL; 628 } 629 630 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 631 { 632 struct rq *rq = rq_of_rt_rq(rt_rq); 633 634 if (!rt_rq->rt_nr_running) 635 return; 636 637 enqueue_top_rt_rq(rt_rq); 638 resched_curr(rq); 639 } 640 641 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 642 { 643 dequeue_top_rt_rq(rt_rq); 644 } 645 646 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 647 { 648 return rt_rq->rt_throttled; 649 } 650 651 static inline const struct cpumask *sched_rt_period_mask(void) 652 { 653 return cpu_online_mask; 654 } 655 656 static inline 657 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 658 { 659 return &cpu_rq(cpu)->rt; 660 } 661 662 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 663 { 664 return &def_rt_bandwidth; 665 } 666 667 #endif /* CONFIG_RT_GROUP_SCHED */ 668 669 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) 670 { 671 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 672 673 return (hrtimer_active(&rt_b->rt_period_timer) || 674 rt_rq->rt_time < rt_b->rt_runtime); 675 } 676 677 #ifdef CONFIG_SMP 678 /* 679 * We ran out of runtime, see if we can borrow some from our neighbours. 680 */ 681 static void do_balance_runtime(struct rt_rq *rt_rq) 682 { 683 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 684 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; 685 int i, weight; 686 u64 rt_period; 687 688 weight = cpumask_weight(rd->span); 689 690 raw_spin_lock(&rt_b->rt_runtime_lock); 691 rt_period = ktime_to_ns(rt_b->rt_period); 692 for_each_cpu(i, rd->span) { 693 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 694 s64 diff; 695 696 if (iter == rt_rq) 697 continue; 698 699 raw_spin_lock(&iter->rt_runtime_lock); 700 /* 701 * Either all rqs have inf runtime and there's nothing to steal 702 * or __disable_runtime() below sets a specific rq to inf to 703 * indicate its been disabled and disallow stealing. 704 */ 705 if (iter->rt_runtime == RUNTIME_INF) 706 goto next; 707 708 /* 709 * From runqueues with spare time, take 1/n part of their 710 * spare time, but no more than our period. 711 */ 712 diff = iter->rt_runtime - iter->rt_time; 713 if (diff > 0) { 714 diff = div_u64((u64)diff, weight); 715 if (rt_rq->rt_runtime + diff > rt_period) 716 diff = rt_period - rt_rq->rt_runtime; 717 iter->rt_runtime -= diff; 718 rt_rq->rt_runtime += diff; 719 if (rt_rq->rt_runtime == rt_period) { 720 raw_spin_unlock(&iter->rt_runtime_lock); 721 break; 722 } 723 } 724 next: 725 raw_spin_unlock(&iter->rt_runtime_lock); 726 } 727 raw_spin_unlock(&rt_b->rt_runtime_lock); 728 } 729 730 /* 731 * Ensure this RQ takes back all the runtime it lend to its neighbours. 732 */ 733 static void __disable_runtime(struct rq *rq) 734 { 735 struct root_domain *rd = rq->rd; 736 rt_rq_iter_t iter; 737 struct rt_rq *rt_rq; 738 739 if (unlikely(!scheduler_running)) 740 return; 741 742 for_each_rt_rq(rt_rq, iter, rq) { 743 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 744 s64 want; 745 int i; 746 747 raw_spin_lock(&rt_b->rt_runtime_lock); 748 raw_spin_lock(&rt_rq->rt_runtime_lock); 749 /* 750 * Either we're all inf and nobody needs to borrow, or we're 751 * already disabled and thus have nothing to do, or we have 752 * exactly the right amount of runtime to take out. 753 */ 754 if (rt_rq->rt_runtime == RUNTIME_INF || 755 rt_rq->rt_runtime == rt_b->rt_runtime) 756 goto balanced; 757 raw_spin_unlock(&rt_rq->rt_runtime_lock); 758 759 /* 760 * Calculate the difference between what we started out with 761 * and what we current have, that's the amount of runtime 762 * we lend and now have to reclaim. 763 */ 764 want = rt_b->rt_runtime - rt_rq->rt_runtime; 765 766 /* 767 * Greedy reclaim, take back as much as we can. 768 */ 769 for_each_cpu(i, rd->span) { 770 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 771 s64 diff; 772 773 /* 774 * Can't reclaim from ourselves or disabled runqueues. 775 */ 776 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 777 continue; 778 779 raw_spin_lock(&iter->rt_runtime_lock); 780 if (want > 0) { 781 diff = min_t(s64, iter->rt_runtime, want); 782 iter->rt_runtime -= diff; 783 want -= diff; 784 } else { 785 iter->rt_runtime -= want; 786 want -= want; 787 } 788 raw_spin_unlock(&iter->rt_runtime_lock); 789 790 if (!want) 791 break; 792 } 793 794 raw_spin_lock(&rt_rq->rt_runtime_lock); 795 /* 796 * We cannot be left wanting - that would mean some runtime 797 * leaked out of the system. 798 */ 799 BUG_ON(want); 800 balanced: 801 /* 802 * Disable all the borrow logic by pretending we have inf 803 * runtime - in which case borrowing doesn't make sense. 804 */ 805 rt_rq->rt_runtime = RUNTIME_INF; 806 rt_rq->rt_throttled = 0; 807 raw_spin_unlock(&rt_rq->rt_runtime_lock); 808 raw_spin_unlock(&rt_b->rt_runtime_lock); 809 810 /* Make rt_rq available for pick_next_task() */ 811 sched_rt_rq_enqueue(rt_rq); 812 } 813 } 814 815 static void __enable_runtime(struct rq *rq) 816 { 817 rt_rq_iter_t iter; 818 struct rt_rq *rt_rq; 819 820 if (unlikely(!scheduler_running)) 821 return; 822 823 /* 824 * Reset each runqueue's bandwidth settings 825 */ 826 for_each_rt_rq(rt_rq, iter, rq) { 827 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 828 829 raw_spin_lock(&rt_b->rt_runtime_lock); 830 raw_spin_lock(&rt_rq->rt_runtime_lock); 831 rt_rq->rt_runtime = rt_b->rt_runtime; 832 rt_rq->rt_time = 0; 833 rt_rq->rt_throttled = 0; 834 raw_spin_unlock(&rt_rq->rt_runtime_lock); 835 raw_spin_unlock(&rt_b->rt_runtime_lock); 836 } 837 } 838 839 static void balance_runtime(struct rt_rq *rt_rq) 840 { 841 if (!sched_feat(RT_RUNTIME_SHARE)) 842 return; 843 844 if (rt_rq->rt_time > rt_rq->rt_runtime) { 845 raw_spin_unlock(&rt_rq->rt_runtime_lock); 846 do_balance_runtime(rt_rq); 847 raw_spin_lock(&rt_rq->rt_runtime_lock); 848 } 849 } 850 #else /* !CONFIG_SMP */ 851 static inline void balance_runtime(struct rt_rq *rt_rq) {} 852 #endif /* CONFIG_SMP */ 853 854 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 855 { 856 int i, idle = 1, throttled = 0; 857 const struct cpumask *span; 858 859 span = sched_rt_period_mask(); 860 #ifdef CONFIG_RT_GROUP_SCHED 861 /* 862 * FIXME: isolated CPUs should really leave the root task group, 863 * whether they are isolcpus or were isolated via cpusets, lest 864 * the timer run on a CPU which does not service all runqueues, 865 * potentially leaving other CPUs indefinitely throttled. If 866 * isolation is really required, the user will turn the throttle 867 * off to kill the perturbations it causes anyway. Meanwhile, 868 * this maintains functionality for boot and/or troubleshooting. 869 */ 870 if (rt_b == &root_task_group.rt_bandwidth) 871 span = cpu_online_mask; 872 #endif 873 for_each_cpu(i, span) { 874 int enqueue = 0; 875 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 876 struct rq *rq = rq_of_rt_rq(rt_rq); 877 int skip; 878 879 /* 880 * When span == cpu_online_mask, taking each rq->lock 881 * can be time-consuming. Try to avoid it when possible. 882 */ 883 raw_spin_lock(&rt_rq->rt_runtime_lock); 884 if (!sched_feat(RT_RUNTIME_SHARE) && rt_rq->rt_runtime != RUNTIME_INF) 885 rt_rq->rt_runtime = rt_b->rt_runtime; 886 skip = !rt_rq->rt_time && !rt_rq->rt_nr_running; 887 raw_spin_unlock(&rt_rq->rt_runtime_lock); 888 if (skip) 889 continue; 890 891 raw_spin_rq_lock(rq); 892 update_rq_clock(rq); 893 894 if (rt_rq->rt_time) { 895 u64 runtime; 896 897 raw_spin_lock(&rt_rq->rt_runtime_lock); 898 if (rt_rq->rt_throttled) 899 balance_runtime(rt_rq); 900 runtime = rt_rq->rt_runtime; 901 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 902 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 903 rt_rq->rt_throttled = 0; 904 enqueue = 1; 905 906 /* 907 * When we're idle and a woken (rt) task is 908 * throttled check_preempt_curr() will set 909 * skip_update and the time between the wakeup 910 * and this unthrottle will get accounted as 911 * 'runtime'. 912 */ 913 if (rt_rq->rt_nr_running && rq->curr == rq->idle) 914 rq_clock_cancel_skipupdate(rq); 915 } 916 if (rt_rq->rt_time || rt_rq->rt_nr_running) 917 idle = 0; 918 raw_spin_unlock(&rt_rq->rt_runtime_lock); 919 } else if (rt_rq->rt_nr_running) { 920 idle = 0; 921 if (!rt_rq_throttled(rt_rq)) 922 enqueue = 1; 923 } 924 if (rt_rq->rt_throttled) 925 throttled = 1; 926 927 if (enqueue) 928 sched_rt_rq_enqueue(rt_rq); 929 raw_spin_rq_unlock(rq); 930 } 931 932 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) 933 return 1; 934 935 return idle; 936 } 937 938 static inline int rt_se_prio(struct sched_rt_entity *rt_se) 939 { 940 #ifdef CONFIG_RT_GROUP_SCHED 941 struct rt_rq *rt_rq = group_rt_rq(rt_se); 942 943 if (rt_rq) 944 return rt_rq->highest_prio.curr; 945 #endif 946 947 return rt_task_of(rt_se)->prio; 948 } 949 950 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 951 { 952 u64 runtime = sched_rt_runtime(rt_rq); 953 954 if (rt_rq->rt_throttled) 955 return rt_rq_throttled(rt_rq); 956 957 if (runtime >= sched_rt_period(rt_rq)) 958 return 0; 959 960 balance_runtime(rt_rq); 961 runtime = sched_rt_runtime(rt_rq); 962 if (runtime == RUNTIME_INF) 963 return 0; 964 965 if (rt_rq->rt_time > runtime) { 966 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 967 968 /* 969 * Don't actually throttle groups that have no runtime assigned 970 * but accrue some time due to boosting. 971 */ 972 if (likely(rt_b->rt_runtime)) { 973 rt_rq->rt_throttled = 1; 974 printk_deferred_once("sched: RT throttling activated\n"); 975 } else { 976 /* 977 * In case we did anyway, make it go away, 978 * replenishment is a joke, since it will replenish us 979 * with exactly 0 ns. 980 */ 981 rt_rq->rt_time = 0; 982 } 983 984 if (rt_rq_throttled(rt_rq)) { 985 sched_rt_rq_dequeue(rt_rq); 986 return 1; 987 } 988 } 989 990 return 0; 991 } 992 993 /* 994 * Update the current task's runtime statistics. Skip current tasks that 995 * are not in our scheduling class. 996 */ 997 static void update_curr_rt(struct rq *rq) 998 { 999 struct task_struct *curr = rq->curr; 1000 struct sched_rt_entity *rt_se = &curr->rt; 1001 u64 delta_exec; 1002 u64 now; 1003 1004 if (curr->sched_class != &rt_sched_class) 1005 return; 1006 1007 now = rq_clock_task(rq); 1008 delta_exec = now - curr->se.exec_start; 1009 if (unlikely((s64)delta_exec <= 0)) 1010 return; 1011 1012 schedstat_set(curr->se.statistics.exec_max, 1013 max(curr->se.statistics.exec_max, delta_exec)); 1014 1015 curr->se.sum_exec_runtime += delta_exec; 1016 account_group_exec_runtime(curr, delta_exec); 1017 1018 curr->se.exec_start = now; 1019 cgroup_account_cputime(curr, delta_exec); 1020 1021 if (!rt_bandwidth_enabled()) 1022 return; 1023 1024 for_each_sched_rt_entity(rt_se) { 1025 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1026 1027 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 1028 raw_spin_lock(&rt_rq->rt_runtime_lock); 1029 rt_rq->rt_time += delta_exec; 1030 if (sched_rt_runtime_exceeded(rt_rq)) 1031 resched_curr(rq); 1032 raw_spin_unlock(&rt_rq->rt_runtime_lock); 1033 } 1034 } 1035 } 1036 1037 static void 1038 dequeue_top_rt_rq(struct rt_rq *rt_rq) 1039 { 1040 struct rq *rq = rq_of_rt_rq(rt_rq); 1041 1042 BUG_ON(&rq->rt != rt_rq); 1043 1044 if (!rt_rq->rt_queued) 1045 return; 1046 1047 BUG_ON(!rq->nr_running); 1048 1049 sub_nr_running(rq, rt_rq->rt_nr_running); 1050 rt_rq->rt_queued = 0; 1051 1052 } 1053 1054 static void 1055 enqueue_top_rt_rq(struct rt_rq *rt_rq) 1056 { 1057 struct rq *rq = rq_of_rt_rq(rt_rq); 1058 1059 BUG_ON(&rq->rt != rt_rq); 1060 1061 if (rt_rq->rt_queued) 1062 return; 1063 1064 if (rt_rq_throttled(rt_rq)) 1065 return; 1066 1067 if (rt_rq->rt_nr_running) { 1068 add_nr_running(rq, rt_rq->rt_nr_running); 1069 rt_rq->rt_queued = 1; 1070 } 1071 1072 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ 1073 cpufreq_update_util(rq, 0); 1074 } 1075 1076 #if defined CONFIG_SMP 1077 1078 static void 1079 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1080 { 1081 struct rq *rq = rq_of_rt_rq(rt_rq); 1082 1083 #ifdef CONFIG_RT_GROUP_SCHED 1084 /* 1085 * Change rq's cpupri only if rt_rq is the top queue. 1086 */ 1087 if (&rq->rt != rt_rq) 1088 return; 1089 #endif 1090 if (rq->online && prio < prev_prio) 1091 cpupri_set(&rq->rd->cpupri, rq->cpu, prio); 1092 } 1093 1094 static void 1095 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1096 { 1097 struct rq *rq = rq_of_rt_rq(rt_rq); 1098 1099 #ifdef CONFIG_RT_GROUP_SCHED 1100 /* 1101 * Change rq's cpupri only if rt_rq is the top queue. 1102 */ 1103 if (&rq->rt != rt_rq) 1104 return; 1105 #endif 1106 if (rq->online && rt_rq->highest_prio.curr != prev_prio) 1107 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); 1108 } 1109 1110 #else /* CONFIG_SMP */ 1111 1112 static inline 1113 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1114 static inline 1115 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1116 1117 #endif /* CONFIG_SMP */ 1118 1119 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 1120 static void 1121 inc_rt_prio(struct rt_rq *rt_rq, int prio) 1122 { 1123 int prev_prio = rt_rq->highest_prio.curr; 1124 1125 if (prio < prev_prio) 1126 rt_rq->highest_prio.curr = prio; 1127 1128 inc_rt_prio_smp(rt_rq, prio, prev_prio); 1129 } 1130 1131 static void 1132 dec_rt_prio(struct rt_rq *rt_rq, int prio) 1133 { 1134 int prev_prio = rt_rq->highest_prio.curr; 1135 1136 if (rt_rq->rt_nr_running) { 1137 1138 WARN_ON(prio < prev_prio); 1139 1140 /* 1141 * This may have been our highest task, and therefore 1142 * we may have some recomputation to do 1143 */ 1144 if (prio == prev_prio) { 1145 struct rt_prio_array *array = &rt_rq->active; 1146 1147 rt_rq->highest_prio.curr = 1148 sched_find_first_bit(array->bitmap); 1149 } 1150 1151 } else { 1152 rt_rq->highest_prio.curr = MAX_RT_PRIO-1; 1153 } 1154 1155 dec_rt_prio_smp(rt_rq, prio, prev_prio); 1156 } 1157 1158 #else 1159 1160 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} 1161 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} 1162 1163 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ 1164 1165 #ifdef CONFIG_RT_GROUP_SCHED 1166 1167 static void 1168 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1169 { 1170 if (rt_se_boosted(rt_se)) 1171 rt_rq->rt_nr_boosted++; 1172 1173 if (rt_rq->tg) 1174 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); 1175 } 1176 1177 static void 1178 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1179 { 1180 if (rt_se_boosted(rt_se)) 1181 rt_rq->rt_nr_boosted--; 1182 1183 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); 1184 } 1185 1186 #else /* CONFIG_RT_GROUP_SCHED */ 1187 1188 static void 1189 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1190 { 1191 start_rt_bandwidth(&def_rt_bandwidth); 1192 } 1193 1194 static inline 1195 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} 1196 1197 #endif /* CONFIG_RT_GROUP_SCHED */ 1198 1199 static inline 1200 unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) 1201 { 1202 struct rt_rq *group_rq = group_rt_rq(rt_se); 1203 1204 if (group_rq) 1205 return group_rq->rt_nr_running; 1206 else 1207 return 1; 1208 } 1209 1210 static inline 1211 unsigned int rt_se_rr_nr_running(struct sched_rt_entity *rt_se) 1212 { 1213 struct rt_rq *group_rq = group_rt_rq(rt_se); 1214 struct task_struct *tsk; 1215 1216 if (group_rq) 1217 return group_rq->rr_nr_running; 1218 1219 tsk = rt_task_of(rt_se); 1220 1221 return (tsk->policy == SCHED_RR) ? 1 : 0; 1222 } 1223 1224 static inline 1225 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1226 { 1227 int prio = rt_se_prio(rt_se); 1228 1229 WARN_ON(!rt_prio(prio)); 1230 rt_rq->rt_nr_running += rt_se_nr_running(rt_se); 1231 rt_rq->rr_nr_running += rt_se_rr_nr_running(rt_se); 1232 1233 inc_rt_prio(rt_rq, prio); 1234 inc_rt_migration(rt_se, rt_rq); 1235 inc_rt_group(rt_se, rt_rq); 1236 } 1237 1238 static inline 1239 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1240 { 1241 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 1242 WARN_ON(!rt_rq->rt_nr_running); 1243 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); 1244 rt_rq->rr_nr_running -= rt_se_rr_nr_running(rt_se); 1245 1246 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 1247 dec_rt_migration(rt_se, rt_rq); 1248 dec_rt_group(rt_se, rt_rq); 1249 } 1250 1251 /* 1252 * Change rt_se->run_list location unless SAVE && !MOVE 1253 * 1254 * assumes ENQUEUE/DEQUEUE flags match 1255 */ 1256 static inline bool move_entity(unsigned int flags) 1257 { 1258 if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) == DEQUEUE_SAVE) 1259 return false; 1260 1261 return true; 1262 } 1263 1264 static void __delist_rt_entity(struct sched_rt_entity *rt_se, struct rt_prio_array *array) 1265 { 1266 list_del_init(&rt_se->run_list); 1267 1268 if (list_empty(array->queue + rt_se_prio(rt_se))) 1269 __clear_bit(rt_se_prio(rt_se), array->bitmap); 1270 1271 rt_se->on_list = 0; 1272 } 1273 1274 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 1275 { 1276 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1277 struct rt_prio_array *array = &rt_rq->active; 1278 struct rt_rq *group_rq = group_rt_rq(rt_se); 1279 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1280 1281 /* 1282 * Don't enqueue the group if its throttled, or when empty. 1283 * The latter is a consequence of the former when a child group 1284 * get throttled and the current group doesn't have any other 1285 * active members. 1286 */ 1287 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) { 1288 if (rt_se->on_list) 1289 __delist_rt_entity(rt_se, array); 1290 return; 1291 } 1292 1293 if (move_entity(flags)) { 1294 WARN_ON_ONCE(rt_se->on_list); 1295 if (flags & ENQUEUE_HEAD) 1296 list_add(&rt_se->run_list, queue); 1297 else 1298 list_add_tail(&rt_se->run_list, queue); 1299 1300 __set_bit(rt_se_prio(rt_se), array->bitmap); 1301 rt_se->on_list = 1; 1302 } 1303 rt_se->on_rq = 1; 1304 1305 inc_rt_tasks(rt_se, rt_rq); 1306 } 1307 1308 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 1309 { 1310 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1311 struct rt_prio_array *array = &rt_rq->active; 1312 1313 if (move_entity(flags)) { 1314 WARN_ON_ONCE(!rt_se->on_list); 1315 __delist_rt_entity(rt_se, array); 1316 } 1317 rt_se->on_rq = 0; 1318 1319 dec_rt_tasks(rt_se, rt_rq); 1320 } 1321 1322 /* 1323 * Because the prio of an upper entry depends on the lower 1324 * entries, we must remove entries top - down. 1325 */ 1326 static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags) 1327 { 1328 struct sched_rt_entity *back = NULL; 1329 1330 for_each_sched_rt_entity(rt_se) { 1331 rt_se->back = back; 1332 back = rt_se; 1333 } 1334 1335 dequeue_top_rt_rq(rt_rq_of_se(back)); 1336 1337 for (rt_se = back; rt_se; rt_se = rt_se->back) { 1338 if (on_rt_rq(rt_se)) 1339 __dequeue_rt_entity(rt_se, flags); 1340 } 1341 } 1342 1343 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 1344 { 1345 struct rq *rq = rq_of_rt_se(rt_se); 1346 1347 dequeue_rt_stack(rt_se, flags); 1348 for_each_sched_rt_entity(rt_se) 1349 __enqueue_rt_entity(rt_se, flags); 1350 enqueue_top_rt_rq(&rq->rt); 1351 } 1352 1353 static void dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) 1354 { 1355 struct rq *rq = rq_of_rt_se(rt_se); 1356 1357 dequeue_rt_stack(rt_se, flags); 1358 1359 for_each_sched_rt_entity(rt_se) { 1360 struct rt_rq *rt_rq = group_rt_rq(rt_se); 1361 1362 if (rt_rq && rt_rq->rt_nr_running) 1363 __enqueue_rt_entity(rt_se, flags); 1364 } 1365 enqueue_top_rt_rq(&rq->rt); 1366 } 1367 1368 /* 1369 * Adding/removing a task to/from a priority array: 1370 */ 1371 static void 1372 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1373 { 1374 struct sched_rt_entity *rt_se = &p->rt; 1375 1376 if (flags & ENQUEUE_WAKEUP) 1377 rt_se->timeout = 0; 1378 1379 enqueue_rt_entity(rt_se, flags); 1380 1381 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1382 enqueue_pushable_task(rq, p); 1383 } 1384 1385 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1386 { 1387 struct sched_rt_entity *rt_se = &p->rt; 1388 1389 update_curr_rt(rq); 1390 dequeue_rt_entity(rt_se, flags); 1391 1392 dequeue_pushable_task(rq, p); 1393 } 1394 1395 /* 1396 * Put task to the head or the end of the run list without the overhead of 1397 * dequeue followed by enqueue. 1398 */ 1399 static void 1400 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) 1401 { 1402 if (on_rt_rq(rt_se)) { 1403 struct rt_prio_array *array = &rt_rq->active; 1404 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1405 1406 if (head) 1407 list_move(&rt_se->run_list, queue); 1408 else 1409 list_move_tail(&rt_se->run_list, queue); 1410 } 1411 } 1412 1413 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) 1414 { 1415 struct sched_rt_entity *rt_se = &p->rt; 1416 struct rt_rq *rt_rq; 1417 1418 for_each_sched_rt_entity(rt_se) { 1419 rt_rq = rt_rq_of_se(rt_se); 1420 requeue_rt_entity(rt_rq, rt_se, head); 1421 } 1422 } 1423 1424 static void yield_task_rt(struct rq *rq) 1425 { 1426 requeue_task_rt(rq, rq->curr, 0); 1427 } 1428 1429 #ifdef CONFIG_SMP 1430 static int find_lowest_rq(struct task_struct *task); 1431 1432 static int 1433 select_task_rq_rt(struct task_struct *p, int cpu, int flags) 1434 { 1435 struct task_struct *curr; 1436 struct rq *rq; 1437 bool test; 1438 1439 /* For anything but wake ups, just return the task_cpu */ 1440 if (!(flags & (WF_TTWU | WF_FORK))) 1441 goto out; 1442 1443 rq = cpu_rq(cpu); 1444 1445 rcu_read_lock(); 1446 curr = READ_ONCE(rq->curr); /* unlocked access */ 1447 1448 /* 1449 * If the current task on @p's runqueue is an RT task, then 1450 * try to see if we can wake this RT task up on another 1451 * runqueue. Otherwise simply start this RT task 1452 * on its current runqueue. 1453 * 1454 * We want to avoid overloading runqueues. If the woken 1455 * task is a higher priority, then it will stay on this CPU 1456 * and the lower prio task should be moved to another CPU. 1457 * Even though this will probably make the lower prio task 1458 * lose its cache, we do not want to bounce a higher task 1459 * around just because it gave up its CPU, perhaps for a 1460 * lock? 1461 * 1462 * For equal prio tasks, we just let the scheduler sort it out. 1463 * 1464 * Otherwise, just let it ride on the affined RQ and the 1465 * post-schedule router will push the preempted task away 1466 * 1467 * This test is optimistic, if we get it wrong the load-balancer 1468 * will have to sort it out. 1469 * 1470 * We take into account the capacity of the CPU to ensure it fits the 1471 * requirement of the task - which is only important on heterogeneous 1472 * systems like big.LITTLE. 1473 */ 1474 test = curr && 1475 unlikely(rt_task(curr)) && 1476 (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio); 1477 1478 if (test || !rt_task_fits_capacity(p, cpu)) { 1479 int target = find_lowest_rq(p); 1480 1481 /* 1482 * Bail out if we were forcing a migration to find a better 1483 * fitting CPU but our search failed. 1484 */ 1485 if (!test && target != -1 && !rt_task_fits_capacity(p, target)) 1486 goto out_unlock; 1487 1488 /* 1489 * Don't bother moving it if the destination CPU is 1490 * not running a lower priority task. 1491 */ 1492 if (target != -1 && 1493 p->prio < cpu_rq(target)->rt.highest_prio.curr) 1494 cpu = target; 1495 } 1496 1497 out_unlock: 1498 rcu_read_unlock(); 1499 1500 out: 1501 return cpu; 1502 } 1503 1504 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1505 { 1506 /* 1507 * Current can't be migrated, useless to reschedule, 1508 * let's hope p can move out. 1509 */ 1510 if (rq->curr->nr_cpus_allowed == 1 || 1511 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) 1512 return; 1513 1514 /* 1515 * p is migratable, so let's not schedule it and 1516 * see if it is pushed or pulled somewhere else. 1517 */ 1518 if (p->nr_cpus_allowed != 1 && 1519 cpupri_find(&rq->rd->cpupri, p, NULL)) 1520 return; 1521 1522 /* 1523 * There appear to be other CPUs that can accept 1524 * the current task but none can run 'p', so lets reschedule 1525 * to try and push the current task away: 1526 */ 1527 requeue_task_rt(rq, p, 1); 1528 resched_curr(rq); 1529 } 1530 1531 static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf) 1532 { 1533 if (!on_rt_rq(&p->rt) && need_pull_rt_task(rq, p)) { 1534 /* 1535 * This is OK, because current is on_cpu, which avoids it being 1536 * picked for load-balance and preemption/IRQs are still 1537 * disabled avoiding further scheduler activity on it and we've 1538 * not yet started the picking loop. 1539 */ 1540 rq_unpin_lock(rq, rf); 1541 pull_rt_task(rq); 1542 rq_repin_lock(rq, rf); 1543 } 1544 1545 return sched_stop_runnable(rq) || sched_dl_runnable(rq) || sched_rt_runnable(rq); 1546 } 1547 #endif /* CONFIG_SMP */ 1548 1549 /* 1550 * Preempt the current task with a newly woken task if needed: 1551 */ 1552 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1553 { 1554 if (p->prio < rq->curr->prio) { 1555 resched_curr(rq); 1556 return; 1557 } 1558 1559 #ifdef CONFIG_SMP 1560 /* 1561 * If: 1562 * 1563 * - the newly woken task is of equal priority to the current task 1564 * - the newly woken task is non-migratable while current is migratable 1565 * - current will be preempted on the next reschedule 1566 * 1567 * we should check to see if current can readily move to a different 1568 * cpu. If so, we will reschedule to allow the push logic to try 1569 * to move current somewhere else, making room for our non-migratable 1570 * task. 1571 */ 1572 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) 1573 check_preempt_equal_prio(rq, p); 1574 #endif 1575 } 1576 1577 static inline void set_next_task_rt(struct rq *rq, struct task_struct *p, bool first) 1578 { 1579 p->se.exec_start = rq_clock_task(rq); 1580 1581 /* The running task is never eligible for pushing */ 1582 dequeue_pushable_task(rq, p); 1583 1584 if (!first) 1585 return; 1586 1587 /* 1588 * If prev task was rt, put_prev_task() has already updated the 1589 * utilization. We only care of the case where we start to schedule a 1590 * rt task 1591 */ 1592 if (rq->curr->sched_class != &rt_sched_class) 1593 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); 1594 1595 rt_queue_push_tasks(rq); 1596 } 1597 1598 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 1599 struct rt_rq *rt_rq) 1600 { 1601 struct rt_prio_array *array = &rt_rq->active; 1602 struct sched_rt_entity *next = NULL; 1603 struct list_head *queue; 1604 int idx; 1605 1606 idx = sched_find_first_bit(array->bitmap); 1607 BUG_ON(idx >= MAX_RT_PRIO); 1608 1609 queue = array->queue + idx; 1610 next = list_entry(queue->next, struct sched_rt_entity, run_list); 1611 1612 return next; 1613 } 1614 1615 static struct task_struct *_pick_next_task_rt(struct rq *rq) 1616 { 1617 struct sched_rt_entity *rt_se; 1618 struct rt_rq *rt_rq = &rq->rt; 1619 1620 do { 1621 rt_se = pick_next_rt_entity(rq, rt_rq); 1622 BUG_ON(!rt_se); 1623 rt_rq = group_rt_rq(rt_se); 1624 } while (rt_rq); 1625 1626 return rt_task_of(rt_se); 1627 } 1628 1629 static struct task_struct *pick_task_rt(struct rq *rq) 1630 { 1631 struct task_struct *p; 1632 1633 if (!sched_rt_runnable(rq)) 1634 return NULL; 1635 1636 p = _pick_next_task_rt(rq); 1637 1638 return p; 1639 } 1640 1641 static struct task_struct *pick_next_task_rt(struct rq *rq) 1642 { 1643 struct task_struct *p = pick_task_rt(rq); 1644 1645 if (p) 1646 set_next_task_rt(rq, p, true); 1647 1648 return p; 1649 } 1650 1651 static void put_prev_task_rt(struct rq *rq, struct task_struct *p) 1652 { 1653 update_curr_rt(rq); 1654 1655 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); 1656 1657 /* 1658 * The previous task needs to be made eligible for pushing 1659 * if it is still active 1660 */ 1661 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) 1662 enqueue_pushable_task(rq, p); 1663 } 1664 1665 #ifdef CONFIG_SMP 1666 1667 /* Only try algorithms three times */ 1668 #define RT_MAX_TRIES 3 1669 1670 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1671 { 1672 if (!task_running(rq, p) && 1673 cpumask_test_cpu(cpu, &p->cpus_mask)) 1674 return 1; 1675 1676 return 0; 1677 } 1678 1679 /* 1680 * Return the highest pushable rq's task, which is suitable to be executed 1681 * on the CPU, NULL otherwise 1682 */ 1683 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) 1684 { 1685 struct plist_head *head = &rq->rt.pushable_tasks; 1686 struct task_struct *p; 1687 1688 if (!has_pushable_tasks(rq)) 1689 return NULL; 1690 1691 plist_for_each_entry(p, head, pushable_tasks) { 1692 if (pick_rt_task(rq, p, cpu)) 1693 return p; 1694 } 1695 1696 return NULL; 1697 } 1698 1699 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1700 1701 static int find_lowest_rq(struct task_struct *task) 1702 { 1703 struct sched_domain *sd; 1704 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); 1705 int this_cpu = smp_processor_id(); 1706 int cpu = task_cpu(task); 1707 int ret; 1708 1709 /* Make sure the mask is initialized first */ 1710 if (unlikely(!lowest_mask)) 1711 return -1; 1712 1713 if (task->nr_cpus_allowed == 1) 1714 return -1; /* No other targets possible */ 1715 1716 /* 1717 * If we're on asym system ensure we consider the different capacities 1718 * of the CPUs when searching for the lowest_mask. 1719 */ 1720 if (static_branch_unlikely(&sched_asym_cpucapacity)) { 1721 1722 ret = cpupri_find_fitness(&task_rq(task)->rd->cpupri, 1723 task, lowest_mask, 1724 rt_task_fits_capacity); 1725 } else { 1726 1727 ret = cpupri_find(&task_rq(task)->rd->cpupri, 1728 task, lowest_mask); 1729 } 1730 1731 if (!ret) 1732 return -1; /* No targets found */ 1733 1734 /* 1735 * At this point we have built a mask of CPUs representing the 1736 * lowest priority tasks in the system. Now we want to elect 1737 * the best one based on our affinity and topology. 1738 * 1739 * We prioritize the last CPU that the task executed on since 1740 * it is most likely cache-hot in that location. 1741 */ 1742 if (cpumask_test_cpu(cpu, lowest_mask)) 1743 return cpu; 1744 1745 /* 1746 * Otherwise, we consult the sched_domains span maps to figure 1747 * out which CPU is logically closest to our hot cache data. 1748 */ 1749 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1750 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1751 1752 rcu_read_lock(); 1753 for_each_domain(cpu, sd) { 1754 if (sd->flags & SD_WAKE_AFFINE) { 1755 int best_cpu; 1756 1757 /* 1758 * "this_cpu" is cheaper to preempt than a 1759 * remote processor. 1760 */ 1761 if (this_cpu != -1 && 1762 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { 1763 rcu_read_unlock(); 1764 return this_cpu; 1765 } 1766 1767 best_cpu = cpumask_any_and_distribute(lowest_mask, 1768 sched_domain_span(sd)); 1769 if (best_cpu < nr_cpu_ids) { 1770 rcu_read_unlock(); 1771 return best_cpu; 1772 } 1773 } 1774 } 1775 rcu_read_unlock(); 1776 1777 /* 1778 * And finally, if there were no matches within the domains 1779 * just give the caller *something* to work with from the compatible 1780 * locations. 1781 */ 1782 if (this_cpu != -1) 1783 return this_cpu; 1784 1785 cpu = cpumask_any_distribute(lowest_mask); 1786 if (cpu < nr_cpu_ids) 1787 return cpu; 1788 1789 return -1; 1790 } 1791 1792 /* Will lock the rq it finds */ 1793 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) 1794 { 1795 struct rq *lowest_rq = NULL; 1796 int tries; 1797 int cpu; 1798 1799 for (tries = 0; tries < RT_MAX_TRIES; tries++) { 1800 cpu = find_lowest_rq(task); 1801 1802 if ((cpu == -1) || (cpu == rq->cpu)) 1803 break; 1804 1805 lowest_rq = cpu_rq(cpu); 1806 1807 if (lowest_rq->rt.highest_prio.curr <= task->prio) { 1808 /* 1809 * Target rq has tasks of equal or higher priority, 1810 * retrying does not release any lock and is unlikely 1811 * to yield a different result. 1812 */ 1813 lowest_rq = NULL; 1814 break; 1815 } 1816 1817 /* if the prio of this runqueue changed, try again */ 1818 if (double_lock_balance(rq, lowest_rq)) { 1819 /* 1820 * We had to unlock the run queue. In 1821 * the mean time, task could have 1822 * migrated already or had its affinity changed. 1823 * Also make sure that it wasn't scheduled on its rq. 1824 */ 1825 if (unlikely(task_rq(task) != rq || 1826 !cpumask_test_cpu(lowest_rq->cpu, &task->cpus_mask) || 1827 task_running(rq, task) || 1828 !rt_task(task) || 1829 !task_on_rq_queued(task))) { 1830 1831 double_unlock_balance(rq, lowest_rq); 1832 lowest_rq = NULL; 1833 break; 1834 } 1835 } 1836 1837 /* If this rq is still suitable use it. */ 1838 if (lowest_rq->rt.highest_prio.curr > task->prio) 1839 break; 1840 1841 /* try again */ 1842 double_unlock_balance(rq, lowest_rq); 1843 lowest_rq = NULL; 1844 } 1845 1846 return lowest_rq; 1847 } 1848 1849 static struct task_struct *pick_next_pushable_task(struct rq *rq) 1850 { 1851 struct task_struct *p; 1852 1853 if (!has_pushable_tasks(rq)) 1854 return NULL; 1855 1856 p = plist_first_entry(&rq->rt.pushable_tasks, 1857 struct task_struct, pushable_tasks); 1858 1859 BUG_ON(rq->cpu != task_cpu(p)); 1860 BUG_ON(task_current(rq, p)); 1861 BUG_ON(p->nr_cpus_allowed <= 1); 1862 1863 BUG_ON(!task_on_rq_queued(p)); 1864 BUG_ON(!rt_task(p)); 1865 1866 return p; 1867 } 1868 1869 /* 1870 * If the current CPU has more than one RT task, see if the non 1871 * running task can migrate over to a CPU that is running a task 1872 * of lesser priority. 1873 */ 1874 static int push_rt_task(struct rq *rq, bool pull) 1875 { 1876 struct task_struct *next_task; 1877 struct rq *lowest_rq; 1878 int ret = 0; 1879 1880 if (!rq->rt.overloaded) 1881 return 0; 1882 1883 next_task = pick_next_pushable_task(rq); 1884 if (!next_task) 1885 return 0; 1886 1887 retry: 1888 if (is_migration_disabled(next_task)) { 1889 struct task_struct *push_task = NULL; 1890 int cpu; 1891 1892 if (!pull || rq->push_busy) 1893 return 0; 1894 1895 cpu = find_lowest_rq(rq->curr); 1896 if (cpu == -1 || cpu == rq->cpu) 1897 return 0; 1898 1899 /* 1900 * Given we found a CPU with lower priority than @next_task, 1901 * therefore it should be running. However we cannot migrate it 1902 * to this other CPU, instead attempt to push the current 1903 * running task on this CPU away. 1904 */ 1905 push_task = get_push_task(rq); 1906 if (push_task) { 1907 raw_spin_rq_unlock(rq); 1908 stop_one_cpu_nowait(rq->cpu, push_cpu_stop, 1909 push_task, &rq->push_work); 1910 raw_spin_rq_lock(rq); 1911 } 1912 1913 return 0; 1914 } 1915 1916 if (WARN_ON(next_task == rq->curr)) 1917 return 0; 1918 1919 /* 1920 * It's possible that the next_task slipped in of 1921 * higher priority than current. If that's the case 1922 * just reschedule current. 1923 */ 1924 if (unlikely(next_task->prio < rq->curr->prio)) { 1925 resched_curr(rq); 1926 return 0; 1927 } 1928 1929 /* We might release rq lock */ 1930 get_task_struct(next_task); 1931 1932 /* find_lock_lowest_rq locks the rq if found */ 1933 lowest_rq = find_lock_lowest_rq(next_task, rq); 1934 if (!lowest_rq) { 1935 struct task_struct *task; 1936 /* 1937 * find_lock_lowest_rq releases rq->lock 1938 * so it is possible that next_task has migrated. 1939 * 1940 * We need to make sure that the task is still on the same 1941 * run-queue and is also still the next task eligible for 1942 * pushing. 1943 */ 1944 task = pick_next_pushable_task(rq); 1945 if (task == next_task) { 1946 /* 1947 * The task hasn't migrated, and is still the next 1948 * eligible task, but we failed to find a run-queue 1949 * to push it to. Do not retry in this case, since 1950 * other CPUs will pull from us when ready. 1951 */ 1952 goto out; 1953 } 1954 1955 if (!task) 1956 /* No more tasks, just exit */ 1957 goto out; 1958 1959 /* 1960 * Something has shifted, try again. 1961 */ 1962 put_task_struct(next_task); 1963 next_task = task; 1964 goto retry; 1965 } 1966 1967 deactivate_task(rq, next_task, 0); 1968 set_task_cpu(next_task, lowest_rq->cpu); 1969 activate_task(lowest_rq, next_task, 0); 1970 resched_curr(lowest_rq); 1971 ret = 1; 1972 1973 double_unlock_balance(rq, lowest_rq); 1974 out: 1975 put_task_struct(next_task); 1976 1977 return ret; 1978 } 1979 1980 static void push_rt_tasks(struct rq *rq) 1981 { 1982 /* push_rt_task will return true if it moved an RT */ 1983 while (push_rt_task(rq, false)) 1984 ; 1985 } 1986 1987 #ifdef HAVE_RT_PUSH_IPI 1988 1989 /* 1990 * When a high priority task schedules out from a CPU and a lower priority 1991 * task is scheduled in, a check is made to see if there's any RT tasks 1992 * on other CPUs that are waiting to run because a higher priority RT task 1993 * is currently running on its CPU. In this case, the CPU with multiple RT 1994 * tasks queued on it (overloaded) needs to be notified that a CPU has opened 1995 * up that may be able to run one of its non-running queued RT tasks. 1996 * 1997 * All CPUs with overloaded RT tasks need to be notified as there is currently 1998 * no way to know which of these CPUs have the highest priority task waiting 1999 * to run. Instead of trying to take a spinlock on each of these CPUs, 2000 * which has shown to cause large latency when done on machines with many 2001 * CPUs, sending an IPI to the CPUs to have them push off the overloaded 2002 * RT tasks waiting to run. 2003 * 2004 * Just sending an IPI to each of the CPUs is also an issue, as on large 2005 * count CPU machines, this can cause an IPI storm on a CPU, especially 2006 * if its the only CPU with multiple RT tasks queued, and a large number 2007 * of CPUs scheduling a lower priority task at the same time. 2008 * 2009 * Each root domain has its own irq work function that can iterate over 2010 * all CPUs with RT overloaded tasks. Since all CPUs with overloaded RT 2011 * task must be checked if there's one or many CPUs that are lowering 2012 * their priority, there's a single irq work iterator that will try to 2013 * push off RT tasks that are waiting to run. 2014 * 2015 * When a CPU schedules a lower priority task, it will kick off the 2016 * irq work iterator that will jump to each CPU with overloaded RT tasks. 2017 * As it only takes the first CPU that schedules a lower priority task 2018 * to start the process, the rto_start variable is incremented and if 2019 * the atomic result is one, then that CPU will try to take the rto_lock. 2020 * This prevents high contention on the lock as the process handles all 2021 * CPUs scheduling lower priority tasks. 2022 * 2023 * All CPUs that are scheduling a lower priority task will increment the 2024 * rt_loop_next variable. This will make sure that the irq work iterator 2025 * checks all RT overloaded CPUs whenever a CPU schedules a new lower 2026 * priority task, even if the iterator is in the middle of a scan. Incrementing 2027 * the rt_loop_next will cause the iterator to perform another scan. 2028 * 2029 */ 2030 static int rto_next_cpu(struct root_domain *rd) 2031 { 2032 int next; 2033 int cpu; 2034 2035 /* 2036 * When starting the IPI RT pushing, the rto_cpu is set to -1, 2037 * rt_next_cpu() will simply return the first CPU found in 2038 * the rto_mask. 2039 * 2040 * If rto_next_cpu() is called with rto_cpu is a valid CPU, it 2041 * will return the next CPU found in the rto_mask. 2042 * 2043 * If there are no more CPUs left in the rto_mask, then a check is made 2044 * against rto_loop and rto_loop_next. rto_loop is only updated with 2045 * the rto_lock held, but any CPU may increment the rto_loop_next 2046 * without any locking. 2047 */ 2048 for (;;) { 2049 2050 /* When rto_cpu is -1 this acts like cpumask_first() */ 2051 cpu = cpumask_next(rd->rto_cpu, rd->rto_mask); 2052 2053 rd->rto_cpu = cpu; 2054 2055 if (cpu < nr_cpu_ids) 2056 return cpu; 2057 2058 rd->rto_cpu = -1; 2059 2060 /* 2061 * ACQUIRE ensures we see the @rto_mask changes 2062 * made prior to the @next value observed. 2063 * 2064 * Matches WMB in rt_set_overload(). 2065 */ 2066 next = atomic_read_acquire(&rd->rto_loop_next); 2067 2068 if (rd->rto_loop == next) 2069 break; 2070 2071 rd->rto_loop = next; 2072 } 2073 2074 return -1; 2075 } 2076 2077 static inline bool rto_start_trylock(atomic_t *v) 2078 { 2079 return !atomic_cmpxchg_acquire(v, 0, 1); 2080 } 2081 2082 static inline void rto_start_unlock(atomic_t *v) 2083 { 2084 atomic_set_release(v, 0); 2085 } 2086 2087 static void tell_cpu_to_push(struct rq *rq) 2088 { 2089 int cpu = -1; 2090 2091 /* Keep the loop going if the IPI is currently active */ 2092 atomic_inc(&rq->rd->rto_loop_next); 2093 2094 /* Only one CPU can initiate a loop at a time */ 2095 if (!rto_start_trylock(&rq->rd->rto_loop_start)) 2096 return; 2097 2098 raw_spin_lock(&rq->rd->rto_lock); 2099 2100 /* 2101 * The rto_cpu is updated under the lock, if it has a valid CPU 2102 * then the IPI is still running and will continue due to the 2103 * update to loop_next, and nothing needs to be done here. 2104 * Otherwise it is finishing up and an ipi needs to be sent. 2105 */ 2106 if (rq->rd->rto_cpu < 0) 2107 cpu = rto_next_cpu(rq->rd); 2108 2109 raw_spin_unlock(&rq->rd->rto_lock); 2110 2111 rto_start_unlock(&rq->rd->rto_loop_start); 2112 2113 if (cpu >= 0) { 2114 /* Make sure the rd does not get freed while pushing */ 2115 sched_get_rd(rq->rd); 2116 irq_work_queue_on(&rq->rd->rto_push_work, cpu); 2117 } 2118 } 2119 2120 /* Called from hardirq context */ 2121 void rto_push_irq_work_func(struct irq_work *work) 2122 { 2123 struct root_domain *rd = 2124 container_of(work, struct root_domain, rto_push_work); 2125 struct rq *rq; 2126 int cpu; 2127 2128 rq = this_rq(); 2129 2130 /* 2131 * We do not need to grab the lock to check for has_pushable_tasks. 2132 * When it gets updated, a check is made if a push is possible. 2133 */ 2134 if (has_pushable_tasks(rq)) { 2135 raw_spin_rq_lock(rq); 2136 while (push_rt_task(rq, true)) 2137 ; 2138 raw_spin_rq_unlock(rq); 2139 } 2140 2141 raw_spin_lock(&rd->rto_lock); 2142 2143 /* Pass the IPI to the next rt overloaded queue */ 2144 cpu = rto_next_cpu(rd); 2145 2146 raw_spin_unlock(&rd->rto_lock); 2147 2148 if (cpu < 0) { 2149 sched_put_rd(rd); 2150 return; 2151 } 2152 2153 /* Try the next RT overloaded CPU */ 2154 irq_work_queue_on(&rd->rto_push_work, cpu); 2155 } 2156 #endif /* HAVE_RT_PUSH_IPI */ 2157 2158 static void pull_rt_task(struct rq *this_rq) 2159 { 2160 int this_cpu = this_rq->cpu, cpu; 2161 bool resched = false; 2162 struct task_struct *p, *push_task; 2163 struct rq *src_rq; 2164 int rt_overload_count = rt_overloaded(this_rq); 2165 2166 if (likely(!rt_overload_count)) 2167 return; 2168 2169 /* 2170 * Match the barrier from rt_set_overloaded; this guarantees that if we 2171 * see overloaded we must also see the rto_mask bit. 2172 */ 2173 smp_rmb(); 2174 2175 /* If we are the only overloaded CPU do nothing */ 2176 if (rt_overload_count == 1 && 2177 cpumask_test_cpu(this_rq->cpu, this_rq->rd->rto_mask)) 2178 return; 2179 2180 #ifdef HAVE_RT_PUSH_IPI 2181 if (sched_feat(RT_PUSH_IPI)) { 2182 tell_cpu_to_push(this_rq); 2183 return; 2184 } 2185 #endif 2186 2187 for_each_cpu(cpu, this_rq->rd->rto_mask) { 2188 if (this_cpu == cpu) 2189 continue; 2190 2191 src_rq = cpu_rq(cpu); 2192 2193 /* 2194 * Don't bother taking the src_rq->lock if the next highest 2195 * task is known to be lower-priority than our current task. 2196 * This may look racy, but if this value is about to go 2197 * logically higher, the src_rq will push this task away. 2198 * And if its going logically lower, we do not care 2199 */ 2200 if (src_rq->rt.highest_prio.next >= 2201 this_rq->rt.highest_prio.curr) 2202 continue; 2203 2204 /* 2205 * We can potentially drop this_rq's lock in 2206 * double_lock_balance, and another CPU could 2207 * alter this_rq 2208 */ 2209 push_task = NULL; 2210 double_lock_balance(this_rq, src_rq); 2211 2212 /* 2213 * We can pull only a task, which is pushable 2214 * on its rq, and no others. 2215 */ 2216 p = pick_highest_pushable_task(src_rq, this_cpu); 2217 2218 /* 2219 * Do we have an RT task that preempts 2220 * the to-be-scheduled task? 2221 */ 2222 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 2223 WARN_ON(p == src_rq->curr); 2224 WARN_ON(!task_on_rq_queued(p)); 2225 2226 /* 2227 * There's a chance that p is higher in priority 2228 * than what's currently running on its CPU. 2229 * This is just that p is waking up and hasn't 2230 * had a chance to schedule. We only pull 2231 * p if it is lower in priority than the 2232 * current task on the run queue 2233 */ 2234 if (p->prio < src_rq->curr->prio) 2235 goto skip; 2236 2237 if (is_migration_disabled(p)) { 2238 push_task = get_push_task(src_rq); 2239 } else { 2240 deactivate_task(src_rq, p, 0); 2241 set_task_cpu(p, this_cpu); 2242 activate_task(this_rq, p, 0); 2243 resched = true; 2244 } 2245 /* 2246 * We continue with the search, just in 2247 * case there's an even higher prio task 2248 * in another runqueue. (low likelihood 2249 * but possible) 2250 */ 2251 } 2252 skip: 2253 double_unlock_balance(this_rq, src_rq); 2254 2255 if (push_task) { 2256 raw_spin_rq_unlock(this_rq); 2257 stop_one_cpu_nowait(src_rq->cpu, push_cpu_stop, 2258 push_task, &src_rq->push_work); 2259 raw_spin_rq_lock(this_rq); 2260 } 2261 } 2262 2263 if (resched) 2264 resched_curr(this_rq); 2265 } 2266 2267 /* 2268 * If we are not running and we are not going to reschedule soon, we should 2269 * try to push tasks away now 2270 */ 2271 static void task_woken_rt(struct rq *rq, struct task_struct *p) 2272 { 2273 bool need_to_push = !task_running(rq, p) && 2274 !test_tsk_need_resched(rq->curr) && 2275 p->nr_cpus_allowed > 1 && 2276 (dl_task(rq->curr) || rt_task(rq->curr)) && 2277 (rq->curr->nr_cpus_allowed < 2 || 2278 rq->curr->prio <= p->prio); 2279 2280 if (need_to_push) 2281 push_rt_tasks(rq); 2282 } 2283 2284 /* Assumes rq->lock is held */ 2285 static void rq_online_rt(struct rq *rq) 2286 { 2287 if (rq->rt.overloaded) 2288 rt_set_overload(rq); 2289 2290 __enable_runtime(rq); 2291 2292 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 2293 } 2294 2295 /* Assumes rq->lock is held */ 2296 static void rq_offline_rt(struct rq *rq) 2297 { 2298 if (rq->rt.overloaded) 2299 rt_clear_overload(rq); 2300 2301 __disable_runtime(rq); 2302 2303 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); 2304 } 2305 2306 /* 2307 * When switch from the rt queue, we bring ourselves to a position 2308 * that we might want to pull RT tasks from other runqueues. 2309 */ 2310 static void switched_from_rt(struct rq *rq, struct task_struct *p) 2311 { 2312 /* 2313 * If there are other RT tasks then we will reschedule 2314 * and the scheduling of the other RT tasks will handle 2315 * the balancing. But if we are the last RT task 2316 * we may need to handle the pulling of RT tasks 2317 * now. 2318 */ 2319 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running) 2320 return; 2321 2322 rt_queue_pull_task(rq); 2323 } 2324 2325 void __init init_sched_rt_class(void) 2326 { 2327 unsigned int i; 2328 2329 for_each_possible_cpu(i) { 2330 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 2331 GFP_KERNEL, cpu_to_node(i)); 2332 } 2333 } 2334 #endif /* CONFIG_SMP */ 2335 2336 /* 2337 * When switching a task to RT, we may overload the runqueue 2338 * with RT tasks. In this case we try to push them off to 2339 * other runqueues. 2340 */ 2341 static void switched_to_rt(struct rq *rq, struct task_struct *p) 2342 { 2343 /* 2344 * If we are running, update the avg_rt tracking, as the running time 2345 * will now on be accounted into the latter. 2346 */ 2347 if (task_current(rq, p)) { 2348 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 0); 2349 return; 2350 } 2351 2352 /* 2353 * If we are not running we may need to preempt the current 2354 * running task. If that current running task is also an RT task 2355 * then see if we can move to another run queue. 2356 */ 2357 if (task_on_rq_queued(p)) { 2358 #ifdef CONFIG_SMP 2359 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) 2360 rt_queue_push_tasks(rq); 2361 #endif /* CONFIG_SMP */ 2362 if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq))) 2363 resched_curr(rq); 2364 } 2365 } 2366 2367 /* 2368 * Priority of the task has changed. This may cause 2369 * us to initiate a push or pull. 2370 */ 2371 static void 2372 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) 2373 { 2374 if (!task_on_rq_queued(p)) 2375 return; 2376 2377 if (task_current(rq, p)) { 2378 #ifdef CONFIG_SMP 2379 /* 2380 * If our priority decreases while running, we 2381 * may need to pull tasks to this runqueue. 2382 */ 2383 if (oldprio < p->prio) 2384 rt_queue_pull_task(rq); 2385 2386 /* 2387 * If there's a higher priority task waiting to run 2388 * then reschedule. 2389 */ 2390 if (p->prio > rq->rt.highest_prio.curr) 2391 resched_curr(rq); 2392 #else 2393 /* For UP simply resched on drop of prio */ 2394 if (oldprio < p->prio) 2395 resched_curr(rq); 2396 #endif /* CONFIG_SMP */ 2397 } else { 2398 /* 2399 * This task is not running, but if it is 2400 * greater than the current running task 2401 * then reschedule. 2402 */ 2403 if (p->prio < rq->curr->prio) 2404 resched_curr(rq); 2405 } 2406 } 2407 2408 #ifdef CONFIG_POSIX_TIMERS 2409 static void watchdog(struct rq *rq, struct task_struct *p) 2410 { 2411 unsigned long soft, hard; 2412 2413 /* max may change after cur was read, this will be fixed next tick */ 2414 soft = task_rlimit(p, RLIMIT_RTTIME); 2415 hard = task_rlimit_max(p, RLIMIT_RTTIME); 2416 2417 if (soft != RLIM_INFINITY) { 2418 unsigned long next; 2419 2420 if (p->rt.watchdog_stamp != jiffies) { 2421 p->rt.timeout++; 2422 p->rt.watchdog_stamp = jiffies; 2423 } 2424 2425 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 2426 if (p->rt.timeout > next) { 2427 posix_cputimers_rt_watchdog(&p->posix_cputimers, 2428 p->se.sum_exec_runtime); 2429 } 2430 } 2431 } 2432 #else 2433 static inline void watchdog(struct rq *rq, struct task_struct *p) { } 2434 #endif 2435 2436 /* 2437 * scheduler tick hitting a task of our scheduling class. 2438 * 2439 * NOTE: This function can be called remotely by the tick offload that 2440 * goes along full dynticks. Therefore no local assumption can be made 2441 * and everything must be accessed through the @rq and @curr passed in 2442 * parameters. 2443 */ 2444 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 2445 { 2446 struct sched_rt_entity *rt_se = &p->rt; 2447 2448 update_curr_rt(rq); 2449 update_rt_rq_load_avg(rq_clock_pelt(rq), rq, 1); 2450 2451 watchdog(rq, p); 2452 2453 /* 2454 * RR tasks need a special form of timeslice management. 2455 * FIFO tasks have no timeslices. 2456 */ 2457 if (p->policy != SCHED_RR) 2458 return; 2459 2460 if (--p->rt.time_slice) 2461 return; 2462 2463 p->rt.time_slice = sched_rr_timeslice; 2464 2465 /* 2466 * Requeue to the end of queue if we (and all of our ancestors) are not 2467 * the only element on the queue 2468 */ 2469 for_each_sched_rt_entity(rt_se) { 2470 if (rt_se->run_list.prev != rt_se->run_list.next) { 2471 requeue_task_rt(rq, p, 0); 2472 resched_curr(rq); 2473 return; 2474 } 2475 } 2476 } 2477 2478 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) 2479 { 2480 /* 2481 * Time slice is 0 for SCHED_FIFO tasks 2482 */ 2483 if (task->policy == SCHED_RR) 2484 return sched_rr_timeslice; 2485 else 2486 return 0; 2487 } 2488 2489 DEFINE_SCHED_CLASS(rt) = { 2490 2491 .enqueue_task = enqueue_task_rt, 2492 .dequeue_task = dequeue_task_rt, 2493 .yield_task = yield_task_rt, 2494 2495 .check_preempt_curr = check_preempt_curr_rt, 2496 2497 .pick_next_task = pick_next_task_rt, 2498 .put_prev_task = put_prev_task_rt, 2499 .set_next_task = set_next_task_rt, 2500 2501 #ifdef CONFIG_SMP 2502 .balance = balance_rt, 2503 .pick_task = pick_task_rt, 2504 .select_task_rq = select_task_rq_rt, 2505 .set_cpus_allowed = set_cpus_allowed_common, 2506 .rq_online = rq_online_rt, 2507 .rq_offline = rq_offline_rt, 2508 .task_woken = task_woken_rt, 2509 .switched_from = switched_from_rt, 2510 .find_lock_rq = find_lock_lowest_rq, 2511 #endif 2512 2513 .task_tick = task_tick_rt, 2514 2515 .get_rr_interval = get_rr_interval_rt, 2516 2517 .prio_changed = prio_changed_rt, 2518 .switched_to = switched_to_rt, 2519 2520 .update_curr = update_curr_rt, 2521 2522 #ifdef CONFIG_UCLAMP_TASK 2523 .uclamp_enabled = 1, 2524 #endif 2525 }; 2526 2527 #ifdef CONFIG_RT_GROUP_SCHED 2528 /* 2529 * Ensure that the real time constraints are schedulable. 2530 */ 2531 static DEFINE_MUTEX(rt_constraints_mutex); 2532 2533 static inline int tg_has_rt_tasks(struct task_group *tg) 2534 { 2535 struct task_struct *task; 2536 struct css_task_iter it; 2537 int ret = 0; 2538 2539 /* 2540 * Autogroups do not have RT tasks; see autogroup_create(). 2541 */ 2542 if (task_group_is_autogroup(tg)) 2543 return 0; 2544 2545 css_task_iter_start(&tg->css, 0, &it); 2546 while (!ret && (task = css_task_iter_next(&it))) 2547 ret |= rt_task(task); 2548 css_task_iter_end(&it); 2549 2550 return ret; 2551 } 2552 2553 struct rt_schedulable_data { 2554 struct task_group *tg; 2555 u64 rt_period; 2556 u64 rt_runtime; 2557 }; 2558 2559 static int tg_rt_schedulable(struct task_group *tg, void *data) 2560 { 2561 struct rt_schedulable_data *d = data; 2562 struct task_group *child; 2563 unsigned long total, sum = 0; 2564 u64 period, runtime; 2565 2566 period = ktime_to_ns(tg->rt_bandwidth.rt_period); 2567 runtime = tg->rt_bandwidth.rt_runtime; 2568 2569 if (tg == d->tg) { 2570 period = d->rt_period; 2571 runtime = d->rt_runtime; 2572 } 2573 2574 /* 2575 * Cannot have more runtime than the period. 2576 */ 2577 if (runtime > period && runtime != RUNTIME_INF) 2578 return -EINVAL; 2579 2580 /* 2581 * Ensure we don't starve existing RT tasks if runtime turns zero. 2582 */ 2583 if (rt_bandwidth_enabled() && !runtime && 2584 tg->rt_bandwidth.rt_runtime && tg_has_rt_tasks(tg)) 2585 return -EBUSY; 2586 2587 total = to_ratio(period, runtime); 2588 2589 /* 2590 * Nobody can have more than the global setting allows. 2591 */ 2592 if (total > to_ratio(global_rt_period(), global_rt_runtime())) 2593 return -EINVAL; 2594 2595 /* 2596 * The sum of our children's runtime should not exceed our own. 2597 */ 2598 list_for_each_entry_rcu(child, &tg->children, siblings) { 2599 period = ktime_to_ns(child->rt_bandwidth.rt_period); 2600 runtime = child->rt_bandwidth.rt_runtime; 2601 2602 if (child == d->tg) { 2603 period = d->rt_period; 2604 runtime = d->rt_runtime; 2605 } 2606 2607 sum += to_ratio(period, runtime); 2608 } 2609 2610 if (sum > total) 2611 return -EINVAL; 2612 2613 return 0; 2614 } 2615 2616 static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime) 2617 { 2618 int ret; 2619 2620 struct rt_schedulable_data data = { 2621 .tg = tg, 2622 .rt_period = period, 2623 .rt_runtime = runtime, 2624 }; 2625 2626 rcu_read_lock(); 2627 ret = walk_tg_tree(tg_rt_schedulable, tg_nop, &data); 2628 rcu_read_unlock(); 2629 2630 return ret; 2631 } 2632 2633 static int tg_set_rt_bandwidth(struct task_group *tg, 2634 u64 rt_period, u64 rt_runtime) 2635 { 2636 int i, err = 0; 2637 2638 /* 2639 * Disallowing the root group RT runtime is BAD, it would disallow the 2640 * kernel creating (and or operating) RT threads. 2641 */ 2642 if (tg == &root_task_group && rt_runtime == 0) 2643 return -EINVAL; 2644 2645 /* No period doesn't make any sense. */ 2646 if (rt_period == 0) 2647 return -EINVAL; 2648 2649 /* 2650 * Bound quota to defend quota against overflow during bandwidth shift. 2651 */ 2652 if (rt_runtime != RUNTIME_INF && rt_runtime > max_rt_runtime) 2653 return -EINVAL; 2654 2655 mutex_lock(&rt_constraints_mutex); 2656 err = __rt_schedulable(tg, rt_period, rt_runtime); 2657 if (err) 2658 goto unlock; 2659 2660 raw_spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock); 2661 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); 2662 tg->rt_bandwidth.rt_runtime = rt_runtime; 2663 2664 for_each_possible_cpu(i) { 2665 struct rt_rq *rt_rq = tg->rt_rq[i]; 2666 2667 raw_spin_lock(&rt_rq->rt_runtime_lock); 2668 rt_rq->rt_runtime = rt_runtime; 2669 raw_spin_unlock(&rt_rq->rt_runtime_lock); 2670 } 2671 raw_spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock); 2672 unlock: 2673 mutex_unlock(&rt_constraints_mutex); 2674 2675 return err; 2676 } 2677 2678 int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us) 2679 { 2680 u64 rt_runtime, rt_period; 2681 2682 rt_period = ktime_to_ns(tg->rt_bandwidth.rt_period); 2683 rt_runtime = (u64)rt_runtime_us * NSEC_PER_USEC; 2684 if (rt_runtime_us < 0) 2685 rt_runtime = RUNTIME_INF; 2686 else if ((u64)rt_runtime_us > U64_MAX / NSEC_PER_USEC) 2687 return -EINVAL; 2688 2689 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); 2690 } 2691 2692 long sched_group_rt_runtime(struct task_group *tg) 2693 { 2694 u64 rt_runtime_us; 2695 2696 if (tg->rt_bandwidth.rt_runtime == RUNTIME_INF) 2697 return -1; 2698 2699 rt_runtime_us = tg->rt_bandwidth.rt_runtime; 2700 do_div(rt_runtime_us, NSEC_PER_USEC); 2701 return rt_runtime_us; 2702 } 2703 2704 int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us) 2705 { 2706 u64 rt_runtime, rt_period; 2707 2708 if (rt_period_us > U64_MAX / NSEC_PER_USEC) 2709 return -EINVAL; 2710 2711 rt_period = rt_period_us * NSEC_PER_USEC; 2712 rt_runtime = tg->rt_bandwidth.rt_runtime; 2713 2714 return tg_set_rt_bandwidth(tg, rt_period, rt_runtime); 2715 } 2716 2717 long sched_group_rt_period(struct task_group *tg) 2718 { 2719 u64 rt_period_us; 2720 2721 rt_period_us = ktime_to_ns(tg->rt_bandwidth.rt_period); 2722 do_div(rt_period_us, NSEC_PER_USEC); 2723 return rt_period_us; 2724 } 2725 2726 static int sched_rt_global_constraints(void) 2727 { 2728 int ret = 0; 2729 2730 mutex_lock(&rt_constraints_mutex); 2731 ret = __rt_schedulable(NULL, 0, 0); 2732 mutex_unlock(&rt_constraints_mutex); 2733 2734 return ret; 2735 } 2736 2737 int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) 2738 { 2739 /* Don't accept realtime tasks when there is no way for them to run */ 2740 if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) 2741 return 0; 2742 2743 return 1; 2744 } 2745 2746 #else /* !CONFIG_RT_GROUP_SCHED */ 2747 static int sched_rt_global_constraints(void) 2748 { 2749 unsigned long flags; 2750 int i; 2751 2752 raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags); 2753 for_each_possible_cpu(i) { 2754 struct rt_rq *rt_rq = &cpu_rq(i)->rt; 2755 2756 raw_spin_lock(&rt_rq->rt_runtime_lock); 2757 rt_rq->rt_runtime = global_rt_runtime(); 2758 raw_spin_unlock(&rt_rq->rt_runtime_lock); 2759 } 2760 raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags); 2761 2762 return 0; 2763 } 2764 #endif /* CONFIG_RT_GROUP_SCHED */ 2765 2766 static int sched_rt_global_validate(void) 2767 { 2768 if (sysctl_sched_rt_period <= 0) 2769 return -EINVAL; 2770 2771 if ((sysctl_sched_rt_runtime != RUNTIME_INF) && 2772 ((sysctl_sched_rt_runtime > sysctl_sched_rt_period) || 2773 ((u64)sysctl_sched_rt_runtime * 2774 NSEC_PER_USEC > max_rt_runtime))) 2775 return -EINVAL; 2776 2777 return 0; 2778 } 2779 2780 static void sched_rt_do_global(void) 2781 { 2782 def_rt_bandwidth.rt_runtime = global_rt_runtime(); 2783 def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); 2784 } 2785 2786 int sched_rt_handler(struct ctl_table *table, int write, void *buffer, 2787 size_t *lenp, loff_t *ppos) 2788 { 2789 int old_period, old_runtime; 2790 static DEFINE_MUTEX(mutex); 2791 int ret; 2792 2793 mutex_lock(&mutex); 2794 old_period = sysctl_sched_rt_period; 2795 old_runtime = sysctl_sched_rt_runtime; 2796 2797 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2798 2799 if (!ret && write) { 2800 ret = sched_rt_global_validate(); 2801 if (ret) 2802 goto undo; 2803 2804 ret = sched_dl_global_validate(); 2805 if (ret) 2806 goto undo; 2807 2808 ret = sched_rt_global_constraints(); 2809 if (ret) 2810 goto undo; 2811 2812 sched_rt_do_global(); 2813 sched_dl_do_global(); 2814 } 2815 if (0) { 2816 undo: 2817 sysctl_sched_rt_period = old_period; 2818 sysctl_sched_rt_runtime = old_runtime; 2819 } 2820 mutex_unlock(&mutex); 2821 2822 return ret; 2823 } 2824 2825 int sched_rr_handler(struct ctl_table *table, int write, void *buffer, 2826 size_t *lenp, loff_t *ppos) 2827 { 2828 int ret; 2829 static DEFINE_MUTEX(mutex); 2830 2831 mutex_lock(&mutex); 2832 ret = proc_dointvec(table, write, buffer, lenp, ppos); 2833 /* 2834 * Make sure that internally we keep jiffies. 2835 * Also, writing zero resets the timeslice to default: 2836 */ 2837 if (!ret && write) { 2838 sched_rr_timeslice = 2839 sysctl_sched_rr_timeslice <= 0 ? RR_TIMESLICE : 2840 msecs_to_jiffies(sysctl_sched_rr_timeslice); 2841 } 2842 mutex_unlock(&mutex); 2843 2844 return ret; 2845 } 2846 2847 #ifdef CONFIG_SCHED_DEBUG 2848 void print_rt_stats(struct seq_file *m, int cpu) 2849 { 2850 rt_rq_iter_t iter; 2851 struct rt_rq *rt_rq; 2852 2853 rcu_read_lock(); 2854 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) 2855 print_rt_rq(m, cpu, rt_rq); 2856 rcu_read_unlock(); 2857 } 2858 #endif /* CONFIG_SCHED_DEBUG */ 2859