1 /* 2 * Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR 3 * policies) 4 */ 5 6 #include "sched.h" 7 8 #include <linux/slab.h> 9 #include <linux/irq_work.h> 10 11 int sched_rr_timeslice = RR_TIMESLICE; 12 13 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun); 14 15 struct rt_bandwidth def_rt_bandwidth; 16 17 static enum hrtimer_restart sched_rt_period_timer(struct hrtimer *timer) 18 { 19 struct rt_bandwidth *rt_b = 20 container_of(timer, struct rt_bandwidth, rt_period_timer); 21 int idle = 0; 22 int overrun; 23 24 raw_spin_lock(&rt_b->rt_runtime_lock); 25 for (;;) { 26 overrun = hrtimer_forward_now(timer, rt_b->rt_period); 27 if (!overrun) 28 break; 29 30 raw_spin_unlock(&rt_b->rt_runtime_lock); 31 idle = do_sched_rt_period_timer(rt_b, overrun); 32 raw_spin_lock(&rt_b->rt_runtime_lock); 33 } 34 if (idle) 35 rt_b->rt_period_active = 0; 36 raw_spin_unlock(&rt_b->rt_runtime_lock); 37 38 return idle ? HRTIMER_NORESTART : HRTIMER_RESTART; 39 } 40 41 void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime) 42 { 43 rt_b->rt_period = ns_to_ktime(period); 44 rt_b->rt_runtime = runtime; 45 46 raw_spin_lock_init(&rt_b->rt_runtime_lock); 47 48 hrtimer_init(&rt_b->rt_period_timer, 49 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 50 rt_b->rt_period_timer.function = sched_rt_period_timer; 51 } 52 53 static void start_rt_bandwidth(struct rt_bandwidth *rt_b) 54 { 55 if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) 56 return; 57 58 raw_spin_lock(&rt_b->rt_runtime_lock); 59 if (!rt_b->rt_period_active) { 60 rt_b->rt_period_active = 1; 61 hrtimer_forward_now(&rt_b->rt_period_timer, rt_b->rt_period); 62 hrtimer_start_expires(&rt_b->rt_period_timer, HRTIMER_MODE_ABS_PINNED); 63 } 64 raw_spin_unlock(&rt_b->rt_runtime_lock); 65 } 66 67 #ifdef CONFIG_SMP 68 static void push_irq_work_func(struct irq_work *work); 69 #endif 70 71 void init_rt_rq(struct rt_rq *rt_rq) 72 { 73 struct rt_prio_array *array; 74 int i; 75 76 array = &rt_rq->active; 77 for (i = 0; i < MAX_RT_PRIO; i++) { 78 INIT_LIST_HEAD(array->queue + i); 79 __clear_bit(i, array->bitmap); 80 } 81 /* delimiter for bitsearch: */ 82 __set_bit(MAX_RT_PRIO, array->bitmap); 83 84 #if defined CONFIG_SMP 85 rt_rq->highest_prio.curr = MAX_RT_PRIO; 86 rt_rq->highest_prio.next = MAX_RT_PRIO; 87 rt_rq->rt_nr_migratory = 0; 88 rt_rq->overloaded = 0; 89 plist_head_init(&rt_rq->pushable_tasks); 90 91 #ifdef HAVE_RT_PUSH_IPI 92 rt_rq->push_flags = 0; 93 rt_rq->push_cpu = nr_cpu_ids; 94 raw_spin_lock_init(&rt_rq->push_lock); 95 init_irq_work(&rt_rq->push_work, push_irq_work_func); 96 #endif 97 #endif /* CONFIG_SMP */ 98 /* We start is dequeued state, because no RT tasks are queued */ 99 rt_rq->rt_queued = 0; 100 101 rt_rq->rt_time = 0; 102 rt_rq->rt_throttled = 0; 103 rt_rq->rt_runtime = 0; 104 raw_spin_lock_init(&rt_rq->rt_runtime_lock); 105 } 106 107 #ifdef CONFIG_RT_GROUP_SCHED 108 static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b) 109 { 110 hrtimer_cancel(&rt_b->rt_period_timer); 111 } 112 113 #define rt_entity_is_task(rt_se) (!(rt_se)->my_q) 114 115 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 116 { 117 #ifdef CONFIG_SCHED_DEBUG 118 WARN_ON_ONCE(!rt_entity_is_task(rt_se)); 119 #endif 120 return container_of(rt_se, struct task_struct, rt); 121 } 122 123 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 124 { 125 return rt_rq->rq; 126 } 127 128 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 129 { 130 return rt_se->rt_rq; 131 } 132 133 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 134 { 135 struct rt_rq *rt_rq = rt_se->rt_rq; 136 137 return rt_rq->rq; 138 } 139 140 void free_rt_sched_group(struct task_group *tg) 141 { 142 int i; 143 144 if (tg->rt_se) 145 destroy_rt_bandwidth(&tg->rt_bandwidth); 146 147 for_each_possible_cpu(i) { 148 if (tg->rt_rq) 149 kfree(tg->rt_rq[i]); 150 if (tg->rt_se) 151 kfree(tg->rt_se[i]); 152 } 153 154 kfree(tg->rt_rq); 155 kfree(tg->rt_se); 156 } 157 158 void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq, 159 struct sched_rt_entity *rt_se, int cpu, 160 struct sched_rt_entity *parent) 161 { 162 struct rq *rq = cpu_rq(cpu); 163 164 rt_rq->highest_prio.curr = MAX_RT_PRIO; 165 rt_rq->rt_nr_boosted = 0; 166 rt_rq->rq = rq; 167 rt_rq->tg = tg; 168 169 tg->rt_rq[cpu] = rt_rq; 170 tg->rt_se[cpu] = rt_se; 171 172 if (!rt_se) 173 return; 174 175 if (!parent) 176 rt_se->rt_rq = &rq->rt; 177 else 178 rt_se->rt_rq = parent->my_q; 179 180 rt_se->my_q = rt_rq; 181 rt_se->parent = parent; 182 INIT_LIST_HEAD(&rt_se->run_list); 183 } 184 185 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 186 { 187 struct rt_rq *rt_rq; 188 struct sched_rt_entity *rt_se; 189 int i; 190 191 tg->rt_rq = kzalloc(sizeof(rt_rq) * nr_cpu_ids, GFP_KERNEL); 192 if (!tg->rt_rq) 193 goto err; 194 tg->rt_se = kzalloc(sizeof(rt_se) * nr_cpu_ids, GFP_KERNEL); 195 if (!tg->rt_se) 196 goto err; 197 198 init_rt_bandwidth(&tg->rt_bandwidth, 199 ktime_to_ns(def_rt_bandwidth.rt_period), 0); 200 201 for_each_possible_cpu(i) { 202 rt_rq = kzalloc_node(sizeof(struct rt_rq), 203 GFP_KERNEL, cpu_to_node(i)); 204 if (!rt_rq) 205 goto err; 206 207 rt_se = kzalloc_node(sizeof(struct sched_rt_entity), 208 GFP_KERNEL, cpu_to_node(i)); 209 if (!rt_se) 210 goto err_free_rq; 211 212 init_rt_rq(rt_rq); 213 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime; 214 init_tg_rt_entry(tg, rt_rq, rt_se, i, parent->rt_se[i]); 215 } 216 217 return 1; 218 219 err_free_rq: 220 kfree(rt_rq); 221 err: 222 return 0; 223 } 224 225 #else /* CONFIG_RT_GROUP_SCHED */ 226 227 #define rt_entity_is_task(rt_se) (1) 228 229 static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se) 230 { 231 return container_of(rt_se, struct task_struct, rt); 232 } 233 234 static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq) 235 { 236 return container_of(rt_rq, struct rq, rt); 237 } 238 239 static inline struct rq *rq_of_rt_se(struct sched_rt_entity *rt_se) 240 { 241 struct task_struct *p = rt_task_of(rt_se); 242 243 return task_rq(p); 244 } 245 246 static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se) 247 { 248 struct rq *rq = rq_of_rt_se(rt_se); 249 250 return &rq->rt; 251 } 252 253 void free_rt_sched_group(struct task_group *tg) { } 254 255 int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent) 256 { 257 return 1; 258 } 259 #endif /* CONFIG_RT_GROUP_SCHED */ 260 261 #ifdef CONFIG_SMP 262 263 static void pull_rt_task(struct rq *this_rq); 264 265 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 266 { 267 /* Try to pull RT tasks here if we lower this rq's prio */ 268 return rq->rt.highest_prio.curr > prev->prio; 269 } 270 271 static inline int rt_overloaded(struct rq *rq) 272 { 273 return atomic_read(&rq->rd->rto_count); 274 } 275 276 static inline void rt_set_overload(struct rq *rq) 277 { 278 if (!rq->online) 279 return; 280 281 cpumask_set_cpu(rq->cpu, rq->rd->rto_mask); 282 /* 283 * Make sure the mask is visible before we set 284 * the overload count. That is checked to determine 285 * if we should look at the mask. It would be a shame 286 * if we looked at the mask, but the mask was not 287 * updated yet. 288 * 289 * Matched by the barrier in pull_rt_task(). 290 */ 291 smp_wmb(); 292 atomic_inc(&rq->rd->rto_count); 293 } 294 295 static inline void rt_clear_overload(struct rq *rq) 296 { 297 if (!rq->online) 298 return; 299 300 /* the order here really doesn't matter */ 301 atomic_dec(&rq->rd->rto_count); 302 cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask); 303 } 304 305 static void update_rt_migration(struct rt_rq *rt_rq) 306 { 307 if (rt_rq->rt_nr_migratory && rt_rq->rt_nr_total > 1) { 308 if (!rt_rq->overloaded) { 309 rt_set_overload(rq_of_rt_rq(rt_rq)); 310 rt_rq->overloaded = 1; 311 } 312 } else if (rt_rq->overloaded) { 313 rt_clear_overload(rq_of_rt_rq(rt_rq)); 314 rt_rq->overloaded = 0; 315 } 316 } 317 318 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 319 { 320 struct task_struct *p; 321 322 if (!rt_entity_is_task(rt_se)) 323 return; 324 325 p = rt_task_of(rt_se); 326 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 327 328 rt_rq->rt_nr_total++; 329 if (p->nr_cpus_allowed > 1) 330 rt_rq->rt_nr_migratory++; 331 332 update_rt_migration(rt_rq); 333 } 334 335 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 336 { 337 struct task_struct *p; 338 339 if (!rt_entity_is_task(rt_se)) 340 return; 341 342 p = rt_task_of(rt_se); 343 rt_rq = &rq_of_rt_rq(rt_rq)->rt; 344 345 rt_rq->rt_nr_total--; 346 if (p->nr_cpus_allowed > 1) 347 rt_rq->rt_nr_migratory--; 348 349 update_rt_migration(rt_rq); 350 } 351 352 static inline int has_pushable_tasks(struct rq *rq) 353 { 354 return !plist_head_empty(&rq->rt.pushable_tasks); 355 } 356 357 static DEFINE_PER_CPU(struct callback_head, rt_push_head); 358 static DEFINE_PER_CPU(struct callback_head, rt_pull_head); 359 360 static void push_rt_tasks(struct rq *); 361 static void pull_rt_task(struct rq *); 362 363 static inline void queue_push_tasks(struct rq *rq) 364 { 365 if (!has_pushable_tasks(rq)) 366 return; 367 368 queue_balance_callback(rq, &per_cpu(rt_push_head, rq->cpu), push_rt_tasks); 369 } 370 371 static inline void queue_pull_task(struct rq *rq) 372 { 373 queue_balance_callback(rq, &per_cpu(rt_pull_head, rq->cpu), pull_rt_task); 374 } 375 376 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 377 { 378 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 379 plist_node_init(&p->pushable_tasks, p->prio); 380 plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks); 381 382 /* Update the highest prio pushable task */ 383 if (p->prio < rq->rt.highest_prio.next) 384 rq->rt.highest_prio.next = p->prio; 385 } 386 387 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 388 { 389 plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks); 390 391 /* Update the new highest prio pushable task */ 392 if (has_pushable_tasks(rq)) { 393 p = plist_first_entry(&rq->rt.pushable_tasks, 394 struct task_struct, pushable_tasks); 395 rq->rt.highest_prio.next = p->prio; 396 } else 397 rq->rt.highest_prio.next = MAX_RT_PRIO; 398 } 399 400 #else 401 402 static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p) 403 { 404 } 405 406 static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p) 407 { 408 } 409 410 static inline 411 void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 412 { 413 } 414 415 static inline 416 void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 417 { 418 } 419 420 static inline bool need_pull_rt_task(struct rq *rq, struct task_struct *prev) 421 { 422 return false; 423 } 424 425 static inline void pull_rt_task(struct rq *this_rq) 426 { 427 } 428 429 static inline void queue_push_tasks(struct rq *rq) 430 { 431 } 432 #endif /* CONFIG_SMP */ 433 434 static void enqueue_top_rt_rq(struct rt_rq *rt_rq); 435 static void dequeue_top_rt_rq(struct rt_rq *rt_rq); 436 437 static inline int on_rt_rq(struct sched_rt_entity *rt_se) 438 { 439 return !list_empty(&rt_se->run_list); 440 } 441 442 #ifdef CONFIG_RT_GROUP_SCHED 443 444 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 445 { 446 if (!rt_rq->tg) 447 return RUNTIME_INF; 448 449 return rt_rq->rt_runtime; 450 } 451 452 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 453 { 454 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); 455 } 456 457 typedef struct task_group *rt_rq_iter_t; 458 459 static inline struct task_group *next_task_group(struct task_group *tg) 460 { 461 do { 462 tg = list_entry_rcu(tg->list.next, 463 typeof(struct task_group), list); 464 } while (&tg->list != &task_groups && task_group_is_autogroup(tg)); 465 466 if (&tg->list == &task_groups) 467 tg = NULL; 468 469 return tg; 470 } 471 472 #define for_each_rt_rq(rt_rq, iter, rq) \ 473 for (iter = container_of(&task_groups, typeof(*iter), list); \ 474 (iter = next_task_group(iter)) && \ 475 (rt_rq = iter->rt_rq[cpu_of(rq)]);) 476 477 #define for_each_sched_rt_entity(rt_se) \ 478 for (; rt_se; rt_se = rt_se->parent) 479 480 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 481 { 482 return rt_se->my_q; 483 } 484 485 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head); 486 static void dequeue_rt_entity(struct sched_rt_entity *rt_se); 487 488 static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 489 { 490 struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; 491 struct rq *rq = rq_of_rt_rq(rt_rq); 492 struct sched_rt_entity *rt_se; 493 494 int cpu = cpu_of(rq); 495 496 rt_se = rt_rq->tg->rt_se[cpu]; 497 498 if (rt_rq->rt_nr_running) { 499 if (!rt_se) 500 enqueue_top_rt_rq(rt_rq); 501 else if (!on_rt_rq(rt_se)) 502 enqueue_rt_entity(rt_se, false); 503 504 if (rt_rq->highest_prio.curr < curr->prio) 505 resched_curr(rq); 506 } 507 } 508 509 static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 510 { 511 struct sched_rt_entity *rt_se; 512 int cpu = cpu_of(rq_of_rt_rq(rt_rq)); 513 514 rt_se = rt_rq->tg->rt_se[cpu]; 515 516 if (!rt_se) 517 dequeue_top_rt_rq(rt_rq); 518 else if (on_rt_rq(rt_se)) 519 dequeue_rt_entity(rt_se); 520 } 521 522 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 523 { 524 return rt_rq->rt_throttled && !rt_rq->rt_nr_boosted; 525 } 526 527 static int rt_se_boosted(struct sched_rt_entity *rt_se) 528 { 529 struct rt_rq *rt_rq = group_rt_rq(rt_se); 530 struct task_struct *p; 531 532 if (rt_rq) 533 return !!rt_rq->rt_nr_boosted; 534 535 p = rt_task_of(rt_se); 536 return p->prio != p->normal_prio; 537 } 538 539 #ifdef CONFIG_SMP 540 static inline const struct cpumask *sched_rt_period_mask(void) 541 { 542 return this_rq()->rd->span; 543 } 544 #else 545 static inline const struct cpumask *sched_rt_period_mask(void) 546 { 547 return cpu_online_mask; 548 } 549 #endif 550 551 static inline 552 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 553 { 554 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 555 } 556 557 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 558 { 559 return &rt_rq->tg->rt_bandwidth; 560 } 561 562 #else /* !CONFIG_RT_GROUP_SCHED */ 563 564 static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 565 { 566 return rt_rq->rt_runtime; 567 } 568 569 static inline u64 sched_rt_period(struct rt_rq *rt_rq) 570 { 571 return ktime_to_ns(def_rt_bandwidth.rt_period); 572 } 573 574 typedef struct rt_rq *rt_rq_iter_t; 575 576 #define for_each_rt_rq(rt_rq, iter, rq) \ 577 for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL) 578 579 #define for_each_sched_rt_entity(rt_se) \ 580 for (; rt_se; rt_se = NULL) 581 582 static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) 583 { 584 return NULL; 585 } 586 587 static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) 588 { 589 struct rq *rq = rq_of_rt_rq(rt_rq); 590 591 if (!rt_rq->rt_nr_running) 592 return; 593 594 enqueue_top_rt_rq(rt_rq); 595 resched_curr(rq); 596 } 597 598 static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) 599 { 600 dequeue_top_rt_rq(rt_rq); 601 } 602 603 static inline int rt_rq_throttled(struct rt_rq *rt_rq) 604 { 605 return rt_rq->rt_throttled; 606 } 607 608 static inline const struct cpumask *sched_rt_period_mask(void) 609 { 610 return cpu_online_mask; 611 } 612 613 static inline 614 struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) 615 { 616 return &cpu_rq(cpu)->rt; 617 } 618 619 static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) 620 { 621 return &def_rt_bandwidth; 622 } 623 624 #endif /* CONFIG_RT_GROUP_SCHED */ 625 626 bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) 627 { 628 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 629 630 return (hrtimer_active(&rt_b->rt_period_timer) || 631 rt_rq->rt_time < rt_b->rt_runtime); 632 } 633 634 #ifdef CONFIG_SMP 635 /* 636 * We ran out of runtime, see if we can borrow some from our neighbours. 637 */ 638 static void do_balance_runtime(struct rt_rq *rt_rq) 639 { 640 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 641 struct root_domain *rd = rq_of_rt_rq(rt_rq)->rd; 642 int i, weight; 643 u64 rt_period; 644 645 weight = cpumask_weight(rd->span); 646 647 raw_spin_lock(&rt_b->rt_runtime_lock); 648 rt_period = ktime_to_ns(rt_b->rt_period); 649 for_each_cpu(i, rd->span) { 650 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 651 s64 diff; 652 653 if (iter == rt_rq) 654 continue; 655 656 raw_spin_lock(&iter->rt_runtime_lock); 657 /* 658 * Either all rqs have inf runtime and there's nothing to steal 659 * or __disable_runtime() below sets a specific rq to inf to 660 * indicate its been disabled and disalow stealing. 661 */ 662 if (iter->rt_runtime == RUNTIME_INF) 663 goto next; 664 665 /* 666 * From runqueues with spare time, take 1/n part of their 667 * spare time, but no more than our period. 668 */ 669 diff = iter->rt_runtime - iter->rt_time; 670 if (diff > 0) { 671 diff = div_u64((u64)diff, weight); 672 if (rt_rq->rt_runtime + diff > rt_period) 673 diff = rt_period - rt_rq->rt_runtime; 674 iter->rt_runtime -= diff; 675 rt_rq->rt_runtime += diff; 676 if (rt_rq->rt_runtime == rt_period) { 677 raw_spin_unlock(&iter->rt_runtime_lock); 678 break; 679 } 680 } 681 next: 682 raw_spin_unlock(&iter->rt_runtime_lock); 683 } 684 raw_spin_unlock(&rt_b->rt_runtime_lock); 685 } 686 687 /* 688 * Ensure this RQ takes back all the runtime it lend to its neighbours. 689 */ 690 static void __disable_runtime(struct rq *rq) 691 { 692 struct root_domain *rd = rq->rd; 693 rt_rq_iter_t iter; 694 struct rt_rq *rt_rq; 695 696 if (unlikely(!scheduler_running)) 697 return; 698 699 for_each_rt_rq(rt_rq, iter, rq) { 700 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 701 s64 want; 702 int i; 703 704 raw_spin_lock(&rt_b->rt_runtime_lock); 705 raw_spin_lock(&rt_rq->rt_runtime_lock); 706 /* 707 * Either we're all inf and nobody needs to borrow, or we're 708 * already disabled and thus have nothing to do, or we have 709 * exactly the right amount of runtime to take out. 710 */ 711 if (rt_rq->rt_runtime == RUNTIME_INF || 712 rt_rq->rt_runtime == rt_b->rt_runtime) 713 goto balanced; 714 raw_spin_unlock(&rt_rq->rt_runtime_lock); 715 716 /* 717 * Calculate the difference between what we started out with 718 * and what we current have, that's the amount of runtime 719 * we lend and now have to reclaim. 720 */ 721 want = rt_b->rt_runtime - rt_rq->rt_runtime; 722 723 /* 724 * Greedy reclaim, take back as much as we can. 725 */ 726 for_each_cpu(i, rd->span) { 727 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); 728 s64 diff; 729 730 /* 731 * Can't reclaim from ourselves or disabled runqueues. 732 */ 733 if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) 734 continue; 735 736 raw_spin_lock(&iter->rt_runtime_lock); 737 if (want > 0) { 738 diff = min_t(s64, iter->rt_runtime, want); 739 iter->rt_runtime -= diff; 740 want -= diff; 741 } else { 742 iter->rt_runtime -= want; 743 want -= want; 744 } 745 raw_spin_unlock(&iter->rt_runtime_lock); 746 747 if (!want) 748 break; 749 } 750 751 raw_spin_lock(&rt_rq->rt_runtime_lock); 752 /* 753 * We cannot be left wanting - that would mean some runtime 754 * leaked out of the system. 755 */ 756 BUG_ON(want); 757 balanced: 758 /* 759 * Disable all the borrow logic by pretending we have inf 760 * runtime - in which case borrowing doesn't make sense. 761 */ 762 rt_rq->rt_runtime = RUNTIME_INF; 763 rt_rq->rt_throttled = 0; 764 raw_spin_unlock(&rt_rq->rt_runtime_lock); 765 raw_spin_unlock(&rt_b->rt_runtime_lock); 766 767 /* Make rt_rq available for pick_next_task() */ 768 sched_rt_rq_enqueue(rt_rq); 769 } 770 } 771 772 static void __enable_runtime(struct rq *rq) 773 { 774 rt_rq_iter_t iter; 775 struct rt_rq *rt_rq; 776 777 if (unlikely(!scheduler_running)) 778 return; 779 780 /* 781 * Reset each runqueue's bandwidth settings 782 */ 783 for_each_rt_rq(rt_rq, iter, rq) { 784 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 785 786 raw_spin_lock(&rt_b->rt_runtime_lock); 787 raw_spin_lock(&rt_rq->rt_runtime_lock); 788 rt_rq->rt_runtime = rt_b->rt_runtime; 789 rt_rq->rt_time = 0; 790 rt_rq->rt_throttled = 0; 791 raw_spin_unlock(&rt_rq->rt_runtime_lock); 792 raw_spin_unlock(&rt_b->rt_runtime_lock); 793 } 794 } 795 796 static void balance_runtime(struct rt_rq *rt_rq) 797 { 798 if (!sched_feat(RT_RUNTIME_SHARE)) 799 return; 800 801 if (rt_rq->rt_time > rt_rq->rt_runtime) { 802 raw_spin_unlock(&rt_rq->rt_runtime_lock); 803 do_balance_runtime(rt_rq); 804 raw_spin_lock(&rt_rq->rt_runtime_lock); 805 } 806 } 807 #else /* !CONFIG_SMP */ 808 static inline void balance_runtime(struct rt_rq *rt_rq) {} 809 #endif /* CONFIG_SMP */ 810 811 static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 812 { 813 int i, idle = 1, throttled = 0; 814 const struct cpumask *span; 815 816 span = sched_rt_period_mask(); 817 #ifdef CONFIG_RT_GROUP_SCHED 818 /* 819 * FIXME: isolated CPUs should really leave the root task group, 820 * whether they are isolcpus or were isolated via cpusets, lest 821 * the timer run on a CPU which does not service all runqueues, 822 * potentially leaving other CPUs indefinitely throttled. If 823 * isolation is really required, the user will turn the throttle 824 * off to kill the perturbations it causes anyway. Meanwhile, 825 * this maintains functionality for boot and/or troubleshooting. 826 */ 827 if (rt_b == &root_task_group.rt_bandwidth) 828 span = cpu_online_mask; 829 #endif 830 for_each_cpu(i, span) { 831 int enqueue = 0; 832 struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); 833 struct rq *rq = rq_of_rt_rq(rt_rq); 834 835 raw_spin_lock(&rq->lock); 836 if (rt_rq->rt_time) { 837 u64 runtime; 838 839 raw_spin_lock(&rt_rq->rt_runtime_lock); 840 if (rt_rq->rt_throttled) 841 balance_runtime(rt_rq); 842 runtime = rt_rq->rt_runtime; 843 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 844 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 845 rt_rq->rt_throttled = 0; 846 enqueue = 1; 847 848 /* 849 * When we're idle and a woken (rt) task is 850 * throttled check_preempt_curr() will set 851 * skip_update and the time between the wakeup 852 * and this unthrottle will get accounted as 853 * 'runtime'. 854 */ 855 if (rt_rq->rt_nr_running && rq->curr == rq->idle) 856 rq_clock_skip_update(rq, false); 857 } 858 if (rt_rq->rt_time || rt_rq->rt_nr_running) 859 idle = 0; 860 raw_spin_unlock(&rt_rq->rt_runtime_lock); 861 } else if (rt_rq->rt_nr_running) { 862 idle = 0; 863 if (!rt_rq_throttled(rt_rq)) 864 enqueue = 1; 865 } 866 if (rt_rq->rt_throttled) 867 throttled = 1; 868 869 if (enqueue) 870 sched_rt_rq_enqueue(rt_rq); 871 raw_spin_unlock(&rq->lock); 872 } 873 874 if (!throttled && (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)) 875 return 1; 876 877 return idle; 878 } 879 880 static inline int rt_se_prio(struct sched_rt_entity *rt_se) 881 { 882 #ifdef CONFIG_RT_GROUP_SCHED 883 struct rt_rq *rt_rq = group_rt_rq(rt_se); 884 885 if (rt_rq) 886 return rt_rq->highest_prio.curr; 887 #endif 888 889 return rt_task_of(rt_se)->prio; 890 } 891 892 static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) 893 { 894 u64 runtime = sched_rt_runtime(rt_rq); 895 896 if (rt_rq->rt_throttled) 897 return rt_rq_throttled(rt_rq); 898 899 if (runtime >= sched_rt_period(rt_rq)) 900 return 0; 901 902 balance_runtime(rt_rq); 903 runtime = sched_rt_runtime(rt_rq); 904 if (runtime == RUNTIME_INF) 905 return 0; 906 907 if (rt_rq->rt_time > runtime) { 908 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); 909 910 /* 911 * Don't actually throttle groups that have no runtime assigned 912 * but accrue some time due to boosting. 913 */ 914 if (likely(rt_b->rt_runtime)) { 915 rt_rq->rt_throttled = 1; 916 printk_deferred_once("sched: RT throttling activated\n"); 917 } else { 918 /* 919 * In case we did anyway, make it go away, 920 * replenishment is a joke, since it will replenish us 921 * with exactly 0 ns. 922 */ 923 rt_rq->rt_time = 0; 924 } 925 926 if (rt_rq_throttled(rt_rq)) { 927 sched_rt_rq_dequeue(rt_rq); 928 return 1; 929 } 930 } 931 932 return 0; 933 } 934 935 /* 936 * Update the current task's runtime statistics. Skip current tasks that 937 * are not in our scheduling class. 938 */ 939 static void update_curr_rt(struct rq *rq) 940 { 941 struct task_struct *curr = rq->curr; 942 struct sched_rt_entity *rt_se = &curr->rt; 943 u64 delta_exec; 944 945 if (curr->sched_class != &rt_sched_class) 946 return; 947 948 delta_exec = rq_clock_task(rq) - curr->se.exec_start; 949 if (unlikely((s64)delta_exec <= 0)) 950 return; 951 952 schedstat_set(curr->se.statistics.exec_max, 953 max(curr->se.statistics.exec_max, delta_exec)); 954 955 curr->se.sum_exec_runtime += delta_exec; 956 account_group_exec_runtime(curr, delta_exec); 957 958 curr->se.exec_start = rq_clock_task(rq); 959 cpuacct_charge(curr, delta_exec); 960 961 sched_rt_avg_update(rq, delta_exec); 962 963 if (!rt_bandwidth_enabled()) 964 return; 965 966 for_each_sched_rt_entity(rt_se) { 967 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 968 969 if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { 970 raw_spin_lock(&rt_rq->rt_runtime_lock); 971 rt_rq->rt_time += delta_exec; 972 if (sched_rt_runtime_exceeded(rt_rq)) 973 resched_curr(rq); 974 raw_spin_unlock(&rt_rq->rt_runtime_lock); 975 } 976 } 977 } 978 979 static void 980 dequeue_top_rt_rq(struct rt_rq *rt_rq) 981 { 982 struct rq *rq = rq_of_rt_rq(rt_rq); 983 984 BUG_ON(&rq->rt != rt_rq); 985 986 if (!rt_rq->rt_queued) 987 return; 988 989 BUG_ON(!rq->nr_running); 990 991 sub_nr_running(rq, rt_rq->rt_nr_running); 992 rt_rq->rt_queued = 0; 993 } 994 995 static void 996 enqueue_top_rt_rq(struct rt_rq *rt_rq) 997 { 998 struct rq *rq = rq_of_rt_rq(rt_rq); 999 1000 BUG_ON(&rq->rt != rt_rq); 1001 1002 if (rt_rq->rt_queued) 1003 return; 1004 if (rt_rq_throttled(rt_rq) || !rt_rq->rt_nr_running) 1005 return; 1006 1007 add_nr_running(rq, rt_rq->rt_nr_running); 1008 rt_rq->rt_queued = 1; 1009 } 1010 1011 #if defined CONFIG_SMP 1012 1013 static void 1014 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1015 { 1016 struct rq *rq = rq_of_rt_rq(rt_rq); 1017 1018 #ifdef CONFIG_RT_GROUP_SCHED 1019 /* 1020 * Change rq's cpupri only if rt_rq is the top queue. 1021 */ 1022 if (&rq->rt != rt_rq) 1023 return; 1024 #endif 1025 if (rq->online && prio < prev_prio) 1026 cpupri_set(&rq->rd->cpupri, rq->cpu, prio); 1027 } 1028 1029 static void 1030 dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) 1031 { 1032 struct rq *rq = rq_of_rt_rq(rt_rq); 1033 1034 #ifdef CONFIG_RT_GROUP_SCHED 1035 /* 1036 * Change rq's cpupri only if rt_rq is the top queue. 1037 */ 1038 if (&rq->rt != rt_rq) 1039 return; 1040 #endif 1041 if (rq->online && rt_rq->highest_prio.curr != prev_prio) 1042 cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr); 1043 } 1044 1045 #else /* CONFIG_SMP */ 1046 1047 static inline 1048 void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1049 static inline 1050 void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {} 1051 1052 #endif /* CONFIG_SMP */ 1053 1054 #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 1055 static void 1056 inc_rt_prio(struct rt_rq *rt_rq, int prio) 1057 { 1058 int prev_prio = rt_rq->highest_prio.curr; 1059 1060 if (prio < prev_prio) 1061 rt_rq->highest_prio.curr = prio; 1062 1063 inc_rt_prio_smp(rt_rq, prio, prev_prio); 1064 } 1065 1066 static void 1067 dec_rt_prio(struct rt_rq *rt_rq, int prio) 1068 { 1069 int prev_prio = rt_rq->highest_prio.curr; 1070 1071 if (rt_rq->rt_nr_running) { 1072 1073 WARN_ON(prio < prev_prio); 1074 1075 /* 1076 * This may have been our highest task, and therefore 1077 * we may have some recomputation to do 1078 */ 1079 if (prio == prev_prio) { 1080 struct rt_prio_array *array = &rt_rq->active; 1081 1082 rt_rq->highest_prio.curr = 1083 sched_find_first_bit(array->bitmap); 1084 } 1085 1086 } else 1087 rt_rq->highest_prio.curr = MAX_RT_PRIO; 1088 1089 dec_rt_prio_smp(rt_rq, prio, prev_prio); 1090 } 1091 1092 #else 1093 1094 static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {} 1095 static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {} 1096 1097 #endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */ 1098 1099 #ifdef CONFIG_RT_GROUP_SCHED 1100 1101 static void 1102 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1103 { 1104 if (rt_se_boosted(rt_se)) 1105 rt_rq->rt_nr_boosted++; 1106 1107 if (rt_rq->tg) 1108 start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); 1109 } 1110 1111 static void 1112 dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1113 { 1114 if (rt_se_boosted(rt_se)) 1115 rt_rq->rt_nr_boosted--; 1116 1117 WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted); 1118 } 1119 1120 #else /* CONFIG_RT_GROUP_SCHED */ 1121 1122 static void 1123 inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1124 { 1125 start_rt_bandwidth(&def_rt_bandwidth); 1126 } 1127 1128 static inline 1129 void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {} 1130 1131 #endif /* CONFIG_RT_GROUP_SCHED */ 1132 1133 static inline 1134 unsigned int rt_se_nr_running(struct sched_rt_entity *rt_se) 1135 { 1136 struct rt_rq *group_rq = group_rt_rq(rt_se); 1137 1138 if (group_rq) 1139 return group_rq->rt_nr_running; 1140 else 1141 return 1; 1142 } 1143 1144 static inline 1145 void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1146 { 1147 int prio = rt_se_prio(rt_se); 1148 1149 WARN_ON(!rt_prio(prio)); 1150 rt_rq->rt_nr_running += rt_se_nr_running(rt_se); 1151 1152 inc_rt_prio(rt_rq, prio); 1153 inc_rt_migration(rt_se, rt_rq); 1154 inc_rt_group(rt_se, rt_rq); 1155 } 1156 1157 static inline 1158 void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 1159 { 1160 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 1161 WARN_ON(!rt_rq->rt_nr_running); 1162 rt_rq->rt_nr_running -= rt_se_nr_running(rt_se); 1163 1164 dec_rt_prio(rt_rq, rt_se_prio(rt_se)); 1165 dec_rt_migration(rt_se, rt_rq); 1166 dec_rt_group(rt_se, rt_rq); 1167 } 1168 1169 static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 1170 { 1171 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1172 struct rt_prio_array *array = &rt_rq->active; 1173 struct rt_rq *group_rq = group_rt_rq(rt_se); 1174 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1175 1176 /* 1177 * Don't enqueue the group if its throttled, or when empty. 1178 * The latter is a consequence of the former when a child group 1179 * get throttled and the current group doesn't have any other 1180 * active members. 1181 */ 1182 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 1183 return; 1184 1185 if (head) 1186 list_add(&rt_se->run_list, queue); 1187 else 1188 list_add_tail(&rt_se->run_list, queue); 1189 __set_bit(rt_se_prio(rt_se), array->bitmap); 1190 1191 inc_rt_tasks(rt_se, rt_rq); 1192 } 1193 1194 static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) 1195 { 1196 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 1197 struct rt_prio_array *array = &rt_rq->active; 1198 1199 list_del_init(&rt_se->run_list); 1200 if (list_empty(array->queue + rt_se_prio(rt_se))) 1201 __clear_bit(rt_se_prio(rt_se), array->bitmap); 1202 1203 dec_rt_tasks(rt_se, rt_rq); 1204 } 1205 1206 /* 1207 * Because the prio of an upper entry depends on the lower 1208 * entries, we must remove entries top - down. 1209 */ 1210 static void dequeue_rt_stack(struct sched_rt_entity *rt_se) 1211 { 1212 struct sched_rt_entity *back = NULL; 1213 1214 for_each_sched_rt_entity(rt_se) { 1215 rt_se->back = back; 1216 back = rt_se; 1217 } 1218 1219 dequeue_top_rt_rq(rt_rq_of_se(back)); 1220 1221 for (rt_se = back; rt_se; rt_se = rt_se->back) { 1222 if (on_rt_rq(rt_se)) 1223 __dequeue_rt_entity(rt_se); 1224 } 1225 } 1226 1227 static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head) 1228 { 1229 struct rq *rq = rq_of_rt_se(rt_se); 1230 1231 dequeue_rt_stack(rt_se); 1232 for_each_sched_rt_entity(rt_se) 1233 __enqueue_rt_entity(rt_se, head); 1234 enqueue_top_rt_rq(&rq->rt); 1235 } 1236 1237 static void dequeue_rt_entity(struct sched_rt_entity *rt_se) 1238 { 1239 struct rq *rq = rq_of_rt_se(rt_se); 1240 1241 dequeue_rt_stack(rt_se); 1242 1243 for_each_sched_rt_entity(rt_se) { 1244 struct rt_rq *rt_rq = group_rt_rq(rt_se); 1245 1246 if (rt_rq && rt_rq->rt_nr_running) 1247 __enqueue_rt_entity(rt_se, false); 1248 } 1249 enqueue_top_rt_rq(&rq->rt); 1250 } 1251 1252 /* 1253 * Adding/removing a task to/from a priority array: 1254 */ 1255 static void 1256 enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1257 { 1258 struct sched_rt_entity *rt_se = &p->rt; 1259 1260 if (flags & ENQUEUE_WAKEUP) 1261 rt_se->timeout = 0; 1262 1263 enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); 1264 1265 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1266 enqueue_pushable_task(rq, p); 1267 } 1268 1269 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags) 1270 { 1271 struct sched_rt_entity *rt_se = &p->rt; 1272 1273 update_curr_rt(rq); 1274 dequeue_rt_entity(rt_se); 1275 1276 dequeue_pushable_task(rq, p); 1277 } 1278 1279 /* 1280 * Put task to the head or the end of the run list without the overhead of 1281 * dequeue followed by enqueue. 1282 */ 1283 static void 1284 requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) 1285 { 1286 if (on_rt_rq(rt_se)) { 1287 struct rt_prio_array *array = &rt_rq->active; 1288 struct list_head *queue = array->queue + rt_se_prio(rt_se); 1289 1290 if (head) 1291 list_move(&rt_se->run_list, queue); 1292 else 1293 list_move_tail(&rt_se->run_list, queue); 1294 } 1295 } 1296 1297 static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) 1298 { 1299 struct sched_rt_entity *rt_se = &p->rt; 1300 struct rt_rq *rt_rq; 1301 1302 for_each_sched_rt_entity(rt_se) { 1303 rt_rq = rt_rq_of_se(rt_se); 1304 requeue_rt_entity(rt_rq, rt_se, head); 1305 } 1306 } 1307 1308 static void yield_task_rt(struct rq *rq) 1309 { 1310 requeue_task_rt(rq, rq->curr, 0); 1311 } 1312 1313 #ifdef CONFIG_SMP 1314 static int find_lowest_rq(struct task_struct *task); 1315 1316 static int 1317 select_task_rq_rt(struct task_struct *p, int cpu, int sd_flag, int flags) 1318 { 1319 struct task_struct *curr; 1320 struct rq *rq; 1321 1322 /* For anything but wake ups, just return the task_cpu */ 1323 if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK) 1324 goto out; 1325 1326 rq = cpu_rq(cpu); 1327 1328 rcu_read_lock(); 1329 curr = READ_ONCE(rq->curr); /* unlocked access */ 1330 1331 /* 1332 * If the current task on @p's runqueue is an RT task, then 1333 * try to see if we can wake this RT task up on another 1334 * runqueue. Otherwise simply start this RT task 1335 * on its current runqueue. 1336 * 1337 * We want to avoid overloading runqueues. If the woken 1338 * task is a higher priority, then it will stay on this CPU 1339 * and the lower prio task should be moved to another CPU. 1340 * Even though this will probably make the lower prio task 1341 * lose its cache, we do not want to bounce a higher task 1342 * around just because it gave up its CPU, perhaps for a 1343 * lock? 1344 * 1345 * For equal prio tasks, we just let the scheduler sort it out. 1346 * 1347 * Otherwise, just let it ride on the affined RQ and the 1348 * post-schedule router will push the preempted task away 1349 * 1350 * This test is optimistic, if we get it wrong the load-balancer 1351 * will have to sort it out. 1352 */ 1353 if (curr && unlikely(rt_task(curr)) && 1354 (curr->nr_cpus_allowed < 2 || 1355 curr->prio <= p->prio)) { 1356 int target = find_lowest_rq(p); 1357 1358 /* 1359 * Don't bother moving it if the destination CPU is 1360 * not running a lower priority task. 1361 */ 1362 if (target != -1 && 1363 p->prio < cpu_rq(target)->rt.highest_prio.curr) 1364 cpu = target; 1365 } 1366 rcu_read_unlock(); 1367 1368 out: 1369 return cpu; 1370 } 1371 1372 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) 1373 { 1374 /* 1375 * Current can't be migrated, useless to reschedule, 1376 * let's hope p can move out. 1377 */ 1378 if (rq->curr->nr_cpus_allowed == 1 || 1379 !cpupri_find(&rq->rd->cpupri, rq->curr, NULL)) 1380 return; 1381 1382 /* 1383 * p is migratable, so let's not schedule it and 1384 * see if it is pushed or pulled somewhere else. 1385 */ 1386 if (p->nr_cpus_allowed != 1 1387 && cpupri_find(&rq->rd->cpupri, p, NULL)) 1388 return; 1389 1390 /* 1391 * There appears to be other cpus that can accept 1392 * current and none to run 'p', so lets reschedule 1393 * to try and push current away: 1394 */ 1395 requeue_task_rt(rq, p, 1); 1396 resched_curr(rq); 1397 } 1398 1399 #endif /* CONFIG_SMP */ 1400 1401 /* 1402 * Preempt the current task with a newly woken task if needed: 1403 */ 1404 static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1405 { 1406 if (p->prio < rq->curr->prio) { 1407 resched_curr(rq); 1408 return; 1409 } 1410 1411 #ifdef CONFIG_SMP 1412 /* 1413 * If: 1414 * 1415 * - the newly woken task is of equal priority to the current task 1416 * - the newly woken task is non-migratable while current is migratable 1417 * - current will be preempted on the next reschedule 1418 * 1419 * we should check to see if current can readily move to a different 1420 * cpu. If so, we will reschedule to allow the push logic to try 1421 * to move current somewhere else, making room for our non-migratable 1422 * task. 1423 */ 1424 if (p->prio == rq->curr->prio && !test_tsk_need_resched(rq->curr)) 1425 check_preempt_equal_prio(rq, p); 1426 #endif 1427 } 1428 1429 static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 1430 struct rt_rq *rt_rq) 1431 { 1432 struct rt_prio_array *array = &rt_rq->active; 1433 struct sched_rt_entity *next = NULL; 1434 struct list_head *queue; 1435 int idx; 1436 1437 idx = sched_find_first_bit(array->bitmap); 1438 BUG_ON(idx >= MAX_RT_PRIO); 1439 1440 queue = array->queue + idx; 1441 next = list_entry(queue->next, struct sched_rt_entity, run_list); 1442 1443 return next; 1444 } 1445 1446 static struct task_struct *_pick_next_task_rt(struct rq *rq) 1447 { 1448 struct sched_rt_entity *rt_se; 1449 struct task_struct *p; 1450 struct rt_rq *rt_rq = &rq->rt; 1451 1452 do { 1453 rt_se = pick_next_rt_entity(rq, rt_rq); 1454 BUG_ON(!rt_se); 1455 rt_rq = group_rt_rq(rt_se); 1456 } while (rt_rq); 1457 1458 p = rt_task_of(rt_se); 1459 p->se.exec_start = rq_clock_task(rq); 1460 1461 return p; 1462 } 1463 1464 static struct task_struct * 1465 pick_next_task_rt(struct rq *rq, struct task_struct *prev) 1466 { 1467 struct task_struct *p; 1468 struct rt_rq *rt_rq = &rq->rt; 1469 1470 if (need_pull_rt_task(rq, prev)) { 1471 /* 1472 * This is OK, because current is on_cpu, which avoids it being 1473 * picked for load-balance and preemption/IRQs are still 1474 * disabled avoiding further scheduler activity on it and we're 1475 * being very careful to re-start the picking loop. 1476 */ 1477 lockdep_unpin_lock(&rq->lock); 1478 pull_rt_task(rq); 1479 lockdep_pin_lock(&rq->lock); 1480 /* 1481 * pull_rt_task() can drop (and re-acquire) rq->lock; this 1482 * means a dl or stop task can slip in, in which case we need 1483 * to re-start task selection. 1484 */ 1485 if (unlikely((rq->stop && task_on_rq_queued(rq->stop)) || 1486 rq->dl.dl_nr_running)) 1487 return RETRY_TASK; 1488 } 1489 1490 /* 1491 * We may dequeue prev's rt_rq in put_prev_task(). 1492 * So, we update time before rt_nr_running check. 1493 */ 1494 if (prev->sched_class == &rt_sched_class) 1495 update_curr_rt(rq); 1496 1497 if (!rt_rq->rt_queued) 1498 return NULL; 1499 1500 put_prev_task(rq, prev); 1501 1502 p = _pick_next_task_rt(rq); 1503 1504 /* The running task is never eligible for pushing */ 1505 dequeue_pushable_task(rq, p); 1506 1507 queue_push_tasks(rq); 1508 1509 return p; 1510 } 1511 1512 static void put_prev_task_rt(struct rq *rq, struct task_struct *p) 1513 { 1514 update_curr_rt(rq); 1515 1516 /* 1517 * The previous task needs to be made eligible for pushing 1518 * if it is still active 1519 */ 1520 if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) 1521 enqueue_pushable_task(rq, p); 1522 } 1523 1524 #ifdef CONFIG_SMP 1525 1526 /* Only try algorithms three times */ 1527 #define RT_MAX_TRIES 3 1528 1529 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) 1530 { 1531 if (!task_running(rq, p) && 1532 cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) 1533 return 1; 1534 return 0; 1535 } 1536 1537 /* 1538 * Return the highest pushable rq's task, which is suitable to be executed 1539 * on the cpu, NULL otherwise 1540 */ 1541 static struct task_struct *pick_highest_pushable_task(struct rq *rq, int cpu) 1542 { 1543 struct plist_head *head = &rq->rt.pushable_tasks; 1544 struct task_struct *p; 1545 1546 if (!has_pushable_tasks(rq)) 1547 return NULL; 1548 1549 plist_for_each_entry(p, head, pushable_tasks) { 1550 if (pick_rt_task(rq, p, cpu)) 1551 return p; 1552 } 1553 1554 return NULL; 1555 } 1556 1557 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask); 1558 1559 static int find_lowest_rq(struct task_struct *task) 1560 { 1561 struct sched_domain *sd; 1562 struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask); 1563 int this_cpu = smp_processor_id(); 1564 int cpu = task_cpu(task); 1565 1566 /* Make sure the mask is initialized first */ 1567 if (unlikely(!lowest_mask)) 1568 return -1; 1569 1570 if (task->nr_cpus_allowed == 1) 1571 return -1; /* No other targets possible */ 1572 1573 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) 1574 return -1; /* No targets found */ 1575 1576 /* 1577 * At this point we have built a mask of cpus representing the 1578 * lowest priority tasks in the system. Now we want to elect 1579 * the best one based on our affinity and topology. 1580 * 1581 * We prioritize the last cpu that the task executed on since 1582 * it is most likely cache-hot in that location. 1583 */ 1584 if (cpumask_test_cpu(cpu, lowest_mask)) 1585 return cpu; 1586 1587 /* 1588 * Otherwise, we consult the sched_domains span maps to figure 1589 * out which cpu is logically closest to our hot cache data. 1590 */ 1591 if (!cpumask_test_cpu(this_cpu, lowest_mask)) 1592 this_cpu = -1; /* Skip this_cpu opt if not among lowest */ 1593 1594 rcu_read_lock(); 1595 for_each_domain(cpu, sd) { 1596 if (sd->flags & SD_WAKE_AFFINE) { 1597 int best_cpu; 1598 1599 /* 1600 * "this_cpu" is cheaper to preempt than a 1601 * remote processor. 1602 */ 1603 if (this_cpu != -1 && 1604 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { 1605 rcu_read_unlock(); 1606 return this_cpu; 1607 } 1608 1609 best_cpu = cpumask_first_and(lowest_mask, 1610 sched_domain_span(sd)); 1611 if (best_cpu < nr_cpu_ids) { 1612 rcu_read_unlock(); 1613 return best_cpu; 1614 } 1615 } 1616 } 1617 rcu_read_unlock(); 1618 1619 /* 1620 * And finally, if there were no matches within the domains 1621 * just give the caller *something* to work with from the compatible 1622 * locations. 1623 */ 1624 if (this_cpu != -1) 1625 return this_cpu; 1626 1627 cpu = cpumask_any(lowest_mask); 1628 if (cpu < nr_cpu_ids) 1629 return cpu; 1630 return -1; 1631 } 1632 1633 /* Will lock the rq it finds */ 1634 static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) 1635 { 1636 struct rq *lowest_rq = NULL; 1637 int tries; 1638 int cpu; 1639 1640 for (tries = 0; tries < RT_MAX_TRIES; tries++) { 1641 cpu = find_lowest_rq(task); 1642 1643 if ((cpu == -1) || (cpu == rq->cpu)) 1644 break; 1645 1646 lowest_rq = cpu_rq(cpu); 1647 1648 if (lowest_rq->rt.highest_prio.curr <= task->prio) { 1649 /* 1650 * Target rq has tasks of equal or higher priority, 1651 * retrying does not release any lock and is unlikely 1652 * to yield a different result. 1653 */ 1654 lowest_rq = NULL; 1655 break; 1656 } 1657 1658 /* if the prio of this runqueue changed, try again */ 1659 if (double_lock_balance(rq, lowest_rq)) { 1660 /* 1661 * We had to unlock the run queue. In 1662 * the mean time, task could have 1663 * migrated already or had its affinity changed. 1664 * Also make sure that it wasn't scheduled on its rq. 1665 */ 1666 if (unlikely(task_rq(task) != rq || 1667 !cpumask_test_cpu(lowest_rq->cpu, 1668 tsk_cpus_allowed(task)) || 1669 task_running(rq, task) || 1670 !task_on_rq_queued(task))) { 1671 1672 double_unlock_balance(rq, lowest_rq); 1673 lowest_rq = NULL; 1674 break; 1675 } 1676 } 1677 1678 /* If this rq is still suitable use it. */ 1679 if (lowest_rq->rt.highest_prio.curr > task->prio) 1680 break; 1681 1682 /* try again */ 1683 double_unlock_balance(rq, lowest_rq); 1684 lowest_rq = NULL; 1685 } 1686 1687 return lowest_rq; 1688 } 1689 1690 static struct task_struct *pick_next_pushable_task(struct rq *rq) 1691 { 1692 struct task_struct *p; 1693 1694 if (!has_pushable_tasks(rq)) 1695 return NULL; 1696 1697 p = plist_first_entry(&rq->rt.pushable_tasks, 1698 struct task_struct, pushable_tasks); 1699 1700 BUG_ON(rq->cpu != task_cpu(p)); 1701 BUG_ON(task_current(rq, p)); 1702 BUG_ON(p->nr_cpus_allowed <= 1); 1703 1704 BUG_ON(!task_on_rq_queued(p)); 1705 BUG_ON(!rt_task(p)); 1706 1707 return p; 1708 } 1709 1710 /* 1711 * If the current CPU has more than one RT task, see if the non 1712 * running task can migrate over to a CPU that is running a task 1713 * of lesser priority. 1714 */ 1715 static int push_rt_task(struct rq *rq) 1716 { 1717 struct task_struct *next_task; 1718 struct rq *lowest_rq; 1719 int ret = 0; 1720 1721 if (!rq->rt.overloaded) 1722 return 0; 1723 1724 next_task = pick_next_pushable_task(rq); 1725 if (!next_task) 1726 return 0; 1727 1728 retry: 1729 if (unlikely(next_task == rq->curr)) { 1730 WARN_ON(1); 1731 return 0; 1732 } 1733 1734 /* 1735 * It's possible that the next_task slipped in of 1736 * higher priority than current. If that's the case 1737 * just reschedule current. 1738 */ 1739 if (unlikely(next_task->prio < rq->curr->prio)) { 1740 resched_curr(rq); 1741 return 0; 1742 } 1743 1744 /* We might release rq lock */ 1745 get_task_struct(next_task); 1746 1747 /* find_lock_lowest_rq locks the rq if found */ 1748 lowest_rq = find_lock_lowest_rq(next_task, rq); 1749 if (!lowest_rq) { 1750 struct task_struct *task; 1751 /* 1752 * find_lock_lowest_rq releases rq->lock 1753 * so it is possible that next_task has migrated. 1754 * 1755 * We need to make sure that the task is still on the same 1756 * run-queue and is also still the next task eligible for 1757 * pushing. 1758 */ 1759 task = pick_next_pushable_task(rq); 1760 if (task_cpu(next_task) == rq->cpu && task == next_task) { 1761 /* 1762 * The task hasn't migrated, and is still the next 1763 * eligible task, but we failed to find a run-queue 1764 * to push it to. Do not retry in this case, since 1765 * other cpus will pull from us when ready. 1766 */ 1767 goto out; 1768 } 1769 1770 if (!task) 1771 /* No more tasks, just exit */ 1772 goto out; 1773 1774 /* 1775 * Something has shifted, try again. 1776 */ 1777 put_task_struct(next_task); 1778 next_task = task; 1779 goto retry; 1780 } 1781 1782 deactivate_task(rq, next_task, 0); 1783 set_task_cpu(next_task, lowest_rq->cpu); 1784 activate_task(lowest_rq, next_task, 0); 1785 ret = 1; 1786 1787 resched_curr(lowest_rq); 1788 1789 double_unlock_balance(rq, lowest_rq); 1790 1791 out: 1792 put_task_struct(next_task); 1793 1794 return ret; 1795 } 1796 1797 static void push_rt_tasks(struct rq *rq) 1798 { 1799 /* push_rt_task will return true if it moved an RT */ 1800 while (push_rt_task(rq)) 1801 ; 1802 } 1803 1804 #ifdef HAVE_RT_PUSH_IPI 1805 /* 1806 * The search for the next cpu always starts at rq->cpu and ends 1807 * when we reach rq->cpu again. It will never return rq->cpu. 1808 * This returns the next cpu to check, or nr_cpu_ids if the loop 1809 * is complete. 1810 * 1811 * rq->rt.push_cpu holds the last cpu returned by this function, 1812 * or if this is the first instance, it must hold rq->cpu. 1813 */ 1814 static int rto_next_cpu(struct rq *rq) 1815 { 1816 int prev_cpu = rq->rt.push_cpu; 1817 int cpu; 1818 1819 cpu = cpumask_next(prev_cpu, rq->rd->rto_mask); 1820 1821 /* 1822 * If the previous cpu is less than the rq's CPU, then it already 1823 * passed the end of the mask, and has started from the beginning. 1824 * We end if the next CPU is greater or equal to rq's CPU. 1825 */ 1826 if (prev_cpu < rq->cpu) { 1827 if (cpu >= rq->cpu) 1828 return nr_cpu_ids; 1829 1830 } else if (cpu >= nr_cpu_ids) { 1831 /* 1832 * We passed the end of the mask, start at the beginning. 1833 * If the result is greater or equal to the rq's CPU, then 1834 * the loop is finished. 1835 */ 1836 cpu = cpumask_first(rq->rd->rto_mask); 1837 if (cpu >= rq->cpu) 1838 return nr_cpu_ids; 1839 } 1840 rq->rt.push_cpu = cpu; 1841 1842 /* Return cpu to let the caller know if the loop is finished or not */ 1843 return cpu; 1844 } 1845 1846 static int find_next_push_cpu(struct rq *rq) 1847 { 1848 struct rq *next_rq; 1849 int cpu; 1850 1851 while (1) { 1852 cpu = rto_next_cpu(rq); 1853 if (cpu >= nr_cpu_ids) 1854 break; 1855 next_rq = cpu_rq(cpu); 1856 1857 /* Make sure the next rq can push to this rq */ 1858 if (next_rq->rt.highest_prio.next < rq->rt.highest_prio.curr) 1859 break; 1860 } 1861 1862 return cpu; 1863 } 1864 1865 #define RT_PUSH_IPI_EXECUTING 1 1866 #define RT_PUSH_IPI_RESTART 2 1867 1868 static void tell_cpu_to_push(struct rq *rq) 1869 { 1870 int cpu; 1871 1872 if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { 1873 raw_spin_lock(&rq->rt.push_lock); 1874 /* Make sure it's still executing */ 1875 if (rq->rt.push_flags & RT_PUSH_IPI_EXECUTING) { 1876 /* 1877 * Tell the IPI to restart the loop as things have 1878 * changed since it started. 1879 */ 1880 rq->rt.push_flags |= RT_PUSH_IPI_RESTART; 1881 raw_spin_unlock(&rq->rt.push_lock); 1882 return; 1883 } 1884 raw_spin_unlock(&rq->rt.push_lock); 1885 } 1886 1887 /* When here, there's no IPI going around */ 1888 1889 rq->rt.push_cpu = rq->cpu; 1890 cpu = find_next_push_cpu(rq); 1891 if (cpu >= nr_cpu_ids) 1892 return; 1893 1894 rq->rt.push_flags = RT_PUSH_IPI_EXECUTING; 1895 1896 irq_work_queue_on(&rq->rt.push_work, cpu); 1897 } 1898 1899 /* Called from hardirq context */ 1900 static void try_to_push_tasks(void *arg) 1901 { 1902 struct rt_rq *rt_rq = arg; 1903 struct rq *rq, *src_rq; 1904 int this_cpu; 1905 int cpu; 1906 1907 this_cpu = rt_rq->push_cpu; 1908 1909 /* Paranoid check */ 1910 BUG_ON(this_cpu != smp_processor_id()); 1911 1912 rq = cpu_rq(this_cpu); 1913 src_rq = rq_of_rt_rq(rt_rq); 1914 1915 again: 1916 if (has_pushable_tasks(rq)) { 1917 raw_spin_lock(&rq->lock); 1918 push_rt_task(rq); 1919 raw_spin_unlock(&rq->lock); 1920 } 1921 1922 /* Pass the IPI to the next rt overloaded queue */ 1923 raw_spin_lock(&rt_rq->push_lock); 1924 /* 1925 * If the source queue changed since the IPI went out, 1926 * we need to restart the search from that CPU again. 1927 */ 1928 if (rt_rq->push_flags & RT_PUSH_IPI_RESTART) { 1929 rt_rq->push_flags &= ~RT_PUSH_IPI_RESTART; 1930 rt_rq->push_cpu = src_rq->cpu; 1931 } 1932 1933 cpu = find_next_push_cpu(src_rq); 1934 1935 if (cpu >= nr_cpu_ids) 1936 rt_rq->push_flags &= ~RT_PUSH_IPI_EXECUTING; 1937 raw_spin_unlock(&rt_rq->push_lock); 1938 1939 if (cpu >= nr_cpu_ids) 1940 return; 1941 1942 /* 1943 * It is possible that a restart caused this CPU to be 1944 * chosen again. Don't bother with an IPI, just see if we 1945 * have more to push. 1946 */ 1947 if (unlikely(cpu == rq->cpu)) 1948 goto again; 1949 1950 /* Try the next RT overloaded CPU */ 1951 irq_work_queue_on(&rt_rq->push_work, cpu); 1952 } 1953 1954 static void push_irq_work_func(struct irq_work *work) 1955 { 1956 struct rt_rq *rt_rq = container_of(work, struct rt_rq, push_work); 1957 1958 try_to_push_tasks(rt_rq); 1959 } 1960 #endif /* HAVE_RT_PUSH_IPI */ 1961 1962 static void pull_rt_task(struct rq *this_rq) 1963 { 1964 int this_cpu = this_rq->cpu, cpu; 1965 bool resched = false; 1966 struct task_struct *p; 1967 struct rq *src_rq; 1968 1969 if (likely(!rt_overloaded(this_rq))) 1970 return; 1971 1972 /* 1973 * Match the barrier from rt_set_overloaded; this guarantees that if we 1974 * see overloaded we must also see the rto_mask bit. 1975 */ 1976 smp_rmb(); 1977 1978 #ifdef HAVE_RT_PUSH_IPI 1979 if (sched_feat(RT_PUSH_IPI)) { 1980 tell_cpu_to_push(this_rq); 1981 return; 1982 } 1983 #endif 1984 1985 for_each_cpu(cpu, this_rq->rd->rto_mask) { 1986 if (this_cpu == cpu) 1987 continue; 1988 1989 src_rq = cpu_rq(cpu); 1990 1991 /* 1992 * Don't bother taking the src_rq->lock if the next highest 1993 * task is known to be lower-priority than our current task. 1994 * This may look racy, but if this value is about to go 1995 * logically higher, the src_rq will push this task away. 1996 * And if its going logically lower, we do not care 1997 */ 1998 if (src_rq->rt.highest_prio.next >= 1999 this_rq->rt.highest_prio.curr) 2000 continue; 2001 2002 /* 2003 * We can potentially drop this_rq's lock in 2004 * double_lock_balance, and another CPU could 2005 * alter this_rq 2006 */ 2007 double_lock_balance(this_rq, src_rq); 2008 2009 /* 2010 * We can pull only a task, which is pushable 2011 * on its rq, and no others. 2012 */ 2013 p = pick_highest_pushable_task(src_rq, this_cpu); 2014 2015 /* 2016 * Do we have an RT task that preempts 2017 * the to-be-scheduled task? 2018 */ 2019 if (p && (p->prio < this_rq->rt.highest_prio.curr)) { 2020 WARN_ON(p == src_rq->curr); 2021 WARN_ON(!task_on_rq_queued(p)); 2022 2023 /* 2024 * There's a chance that p is higher in priority 2025 * than what's currently running on its cpu. 2026 * This is just that p is wakeing up and hasn't 2027 * had a chance to schedule. We only pull 2028 * p if it is lower in priority than the 2029 * current task on the run queue 2030 */ 2031 if (p->prio < src_rq->curr->prio) 2032 goto skip; 2033 2034 resched = true; 2035 2036 deactivate_task(src_rq, p, 0); 2037 set_task_cpu(p, this_cpu); 2038 activate_task(this_rq, p, 0); 2039 /* 2040 * We continue with the search, just in 2041 * case there's an even higher prio task 2042 * in another runqueue. (low likelihood 2043 * but possible) 2044 */ 2045 } 2046 skip: 2047 double_unlock_balance(this_rq, src_rq); 2048 } 2049 2050 if (resched) 2051 resched_curr(this_rq); 2052 } 2053 2054 /* 2055 * If we are not running and we are not going to reschedule soon, we should 2056 * try to push tasks away now 2057 */ 2058 static void task_woken_rt(struct rq *rq, struct task_struct *p) 2059 { 2060 if (!task_running(rq, p) && 2061 !test_tsk_need_resched(rq->curr) && 2062 p->nr_cpus_allowed > 1 && 2063 (dl_task(rq->curr) || rt_task(rq->curr)) && 2064 (rq->curr->nr_cpus_allowed < 2 || 2065 rq->curr->prio <= p->prio)) 2066 push_rt_tasks(rq); 2067 } 2068 2069 /* Assumes rq->lock is held */ 2070 static void rq_online_rt(struct rq *rq) 2071 { 2072 if (rq->rt.overloaded) 2073 rt_set_overload(rq); 2074 2075 __enable_runtime(rq); 2076 2077 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr); 2078 } 2079 2080 /* Assumes rq->lock is held */ 2081 static void rq_offline_rt(struct rq *rq) 2082 { 2083 if (rq->rt.overloaded) 2084 rt_clear_overload(rq); 2085 2086 __disable_runtime(rq); 2087 2088 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); 2089 } 2090 2091 /* 2092 * When switch from the rt queue, we bring ourselves to a position 2093 * that we might want to pull RT tasks from other runqueues. 2094 */ 2095 static void switched_from_rt(struct rq *rq, struct task_struct *p) 2096 { 2097 /* 2098 * If there are other RT tasks then we will reschedule 2099 * and the scheduling of the other RT tasks will handle 2100 * the balancing. But if we are the last RT task 2101 * we may need to handle the pulling of RT tasks 2102 * now. 2103 */ 2104 if (!task_on_rq_queued(p) || rq->rt.rt_nr_running) 2105 return; 2106 2107 queue_pull_task(rq); 2108 } 2109 2110 void __init init_sched_rt_class(void) 2111 { 2112 unsigned int i; 2113 2114 for_each_possible_cpu(i) { 2115 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask, i), 2116 GFP_KERNEL, cpu_to_node(i)); 2117 } 2118 } 2119 #endif /* CONFIG_SMP */ 2120 2121 /* 2122 * When switching a task to RT, we may overload the runqueue 2123 * with RT tasks. In this case we try to push them off to 2124 * other runqueues. 2125 */ 2126 static void switched_to_rt(struct rq *rq, struct task_struct *p) 2127 { 2128 /* 2129 * If we are already running, then there's nothing 2130 * that needs to be done. But if we are not running 2131 * we may need to preempt the current running task. 2132 * If that current running task is also an RT task 2133 * then see if we can move to another run queue. 2134 */ 2135 if (task_on_rq_queued(p) && rq->curr != p) { 2136 #ifdef CONFIG_SMP 2137 if (p->nr_cpus_allowed > 1 && rq->rt.overloaded) 2138 queue_push_tasks(rq); 2139 #else 2140 if (p->prio < rq->curr->prio) 2141 resched_curr(rq); 2142 #endif /* CONFIG_SMP */ 2143 } 2144 } 2145 2146 /* 2147 * Priority of the task has changed. This may cause 2148 * us to initiate a push or pull. 2149 */ 2150 static void 2151 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) 2152 { 2153 if (!task_on_rq_queued(p)) 2154 return; 2155 2156 if (rq->curr == p) { 2157 #ifdef CONFIG_SMP 2158 /* 2159 * If our priority decreases while running, we 2160 * may need to pull tasks to this runqueue. 2161 */ 2162 if (oldprio < p->prio) 2163 queue_pull_task(rq); 2164 2165 /* 2166 * If there's a higher priority task waiting to run 2167 * then reschedule. 2168 */ 2169 if (p->prio > rq->rt.highest_prio.curr) 2170 resched_curr(rq); 2171 #else 2172 /* For UP simply resched on drop of prio */ 2173 if (oldprio < p->prio) 2174 resched_curr(rq); 2175 #endif /* CONFIG_SMP */ 2176 } else { 2177 /* 2178 * This task is not running, but if it is 2179 * greater than the current running task 2180 * then reschedule. 2181 */ 2182 if (p->prio < rq->curr->prio) 2183 resched_curr(rq); 2184 } 2185 } 2186 2187 static void watchdog(struct rq *rq, struct task_struct *p) 2188 { 2189 unsigned long soft, hard; 2190 2191 /* max may change after cur was read, this will be fixed next tick */ 2192 soft = task_rlimit(p, RLIMIT_RTTIME); 2193 hard = task_rlimit_max(p, RLIMIT_RTTIME); 2194 2195 if (soft != RLIM_INFINITY) { 2196 unsigned long next; 2197 2198 if (p->rt.watchdog_stamp != jiffies) { 2199 p->rt.timeout++; 2200 p->rt.watchdog_stamp = jiffies; 2201 } 2202 2203 next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); 2204 if (p->rt.timeout > next) 2205 p->cputime_expires.sched_exp = p->se.sum_exec_runtime; 2206 } 2207 } 2208 2209 static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) 2210 { 2211 struct sched_rt_entity *rt_se = &p->rt; 2212 2213 update_curr_rt(rq); 2214 2215 watchdog(rq, p); 2216 2217 /* 2218 * RR tasks need a special form of timeslice management. 2219 * FIFO tasks have no timeslices. 2220 */ 2221 if (p->policy != SCHED_RR) 2222 return; 2223 2224 if (--p->rt.time_slice) 2225 return; 2226 2227 p->rt.time_slice = sched_rr_timeslice; 2228 2229 /* 2230 * Requeue to the end of queue if we (and all of our ancestors) are not 2231 * the only element on the queue 2232 */ 2233 for_each_sched_rt_entity(rt_se) { 2234 if (rt_se->run_list.prev != rt_se->run_list.next) { 2235 requeue_task_rt(rq, p, 0); 2236 resched_curr(rq); 2237 return; 2238 } 2239 } 2240 } 2241 2242 static void set_curr_task_rt(struct rq *rq) 2243 { 2244 struct task_struct *p = rq->curr; 2245 2246 p->se.exec_start = rq_clock_task(rq); 2247 2248 /* The running task is never eligible for pushing */ 2249 dequeue_pushable_task(rq, p); 2250 } 2251 2252 static unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task) 2253 { 2254 /* 2255 * Time slice is 0 for SCHED_FIFO tasks 2256 */ 2257 if (task->policy == SCHED_RR) 2258 return sched_rr_timeslice; 2259 else 2260 return 0; 2261 } 2262 2263 const struct sched_class rt_sched_class = { 2264 .next = &fair_sched_class, 2265 .enqueue_task = enqueue_task_rt, 2266 .dequeue_task = dequeue_task_rt, 2267 .yield_task = yield_task_rt, 2268 2269 .check_preempt_curr = check_preempt_curr_rt, 2270 2271 .pick_next_task = pick_next_task_rt, 2272 .put_prev_task = put_prev_task_rt, 2273 2274 #ifdef CONFIG_SMP 2275 .select_task_rq = select_task_rq_rt, 2276 2277 .set_cpus_allowed = set_cpus_allowed_common, 2278 .rq_online = rq_online_rt, 2279 .rq_offline = rq_offline_rt, 2280 .task_woken = task_woken_rt, 2281 .switched_from = switched_from_rt, 2282 #endif 2283 2284 .set_curr_task = set_curr_task_rt, 2285 .task_tick = task_tick_rt, 2286 2287 .get_rr_interval = get_rr_interval_rt, 2288 2289 .prio_changed = prio_changed_rt, 2290 .switched_to = switched_to_rt, 2291 2292 .update_curr = update_curr_rt, 2293 }; 2294 2295 #ifdef CONFIG_SCHED_DEBUG 2296 extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); 2297 2298 void print_rt_stats(struct seq_file *m, int cpu) 2299 { 2300 rt_rq_iter_t iter; 2301 struct rt_rq *rt_rq; 2302 2303 rcu_read_lock(); 2304 for_each_rt_rq(rt_rq, iter, cpu_rq(cpu)) 2305 print_rt_rq(m, cpu, rt_rq); 2306 rcu_read_unlock(); 2307 } 2308 #endif /* CONFIG_SCHED_DEBUG */ 2309