1 /* 2 * Deadline Scheduling Class (SCHED_DEADLINE) 3 * 4 * Earliest Deadline First (EDF) + Constant Bandwidth Server (CBS). 5 * 6 * Tasks that periodically executes their instances for less than their 7 * runtime won't miss any of their deadlines. 8 * Tasks that are not periodic or sporadic or that tries to execute more 9 * than their reserved bandwidth will be slowed down (and may potentially 10 * miss some of their deadlines), and won't affect any other task. 11 * 12 * Copyright (C) 2012 Dario Faggioli <raistlin@linux.it>, 13 * Juri Lelli <juri.lelli@gmail.com>, 14 * Michael Trimarchi <michael@amarulasolutions.com>, 15 * Fabio Checconi <fchecconi@gmail.com> 16 */ 17 #include "sched.h" 18 19 #include <linux/slab.h> 20 21 struct dl_bandwidth def_dl_bandwidth; 22 23 static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) 24 { 25 return container_of(dl_se, struct task_struct, dl); 26 } 27 28 static inline struct rq *rq_of_dl_rq(struct dl_rq *dl_rq) 29 { 30 return container_of(dl_rq, struct rq, dl); 31 } 32 33 static inline struct dl_rq *dl_rq_of_se(struct sched_dl_entity *dl_se) 34 { 35 struct task_struct *p = dl_task_of(dl_se); 36 struct rq *rq = task_rq(p); 37 38 return &rq->dl; 39 } 40 41 static inline int on_dl_rq(struct sched_dl_entity *dl_se) 42 { 43 return !RB_EMPTY_NODE(&dl_se->rb_node); 44 } 45 46 static inline int is_leftmost(struct task_struct *p, struct dl_rq *dl_rq) 47 { 48 struct sched_dl_entity *dl_se = &p->dl; 49 50 return dl_rq->rb_leftmost == &dl_se->rb_node; 51 } 52 53 void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime) 54 { 55 raw_spin_lock_init(&dl_b->dl_runtime_lock); 56 dl_b->dl_period = period; 57 dl_b->dl_runtime = runtime; 58 } 59 60 void init_dl_bw(struct dl_bw *dl_b) 61 { 62 raw_spin_lock_init(&dl_b->lock); 63 raw_spin_lock(&def_dl_bandwidth.dl_runtime_lock); 64 if (global_rt_runtime() == RUNTIME_INF) 65 dl_b->bw = -1; 66 else 67 dl_b->bw = to_ratio(global_rt_period(), global_rt_runtime()); 68 raw_spin_unlock(&def_dl_bandwidth.dl_runtime_lock); 69 dl_b->total_bw = 0; 70 } 71 72 void init_dl_rq(struct dl_rq *dl_rq) 73 { 74 dl_rq->rb_root = RB_ROOT; 75 76 #ifdef CONFIG_SMP 77 /* zero means no -deadline tasks */ 78 dl_rq->earliest_dl.curr = dl_rq->earliest_dl.next = 0; 79 80 dl_rq->dl_nr_migratory = 0; 81 dl_rq->overloaded = 0; 82 dl_rq->pushable_dl_tasks_root = RB_ROOT; 83 #else 84 init_dl_bw(&dl_rq->dl_bw); 85 #endif 86 } 87 88 #ifdef CONFIG_SMP 89 90 static inline int dl_overloaded(struct rq *rq) 91 { 92 return atomic_read(&rq->rd->dlo_count); 93 } 94 95 static inline void dl_set_overload(struct rq *rq) 96 { 97 if (!rq->online) 98 return; 99 100 cpumask_set_cpu(rq->cpu, rq->rd->dlo_mask); 101 /* 102 * Must be visible before the overload count is 103 * set (as in sched_rt.c). 104 * 105 * Matched by the barrier in pull_dl_task(). 106 */ 107 smp_wmb(); 108 atomic_inc(&rq->rd->dlo_count); 109 } 110 111 static inline void dl_clear_overload(struct rq *rq) 112 { 113 if (!rq->online) 114 return; 115 116 atomic_dec(&rq->rd->dlo_count); 117 cpumask_clear_cpu(rq->cpu, rq->rd->dlo_mask); 118 } 119 120 static void update_dl_migration(struct dl_rq *dl_rq) 121 { 122 if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) { 123 if (!dl_rq->overloaded) { 124 dl_set_overload(rq_of_dl_rq(dl_rq)); 125 dl_rq->overloaded = 1; 126 } 127 } else if (dl_rq->overloaded) { 128 dl_clear_overload(rq_of_dl_rq(dl_rq)); 129 dl_rq->overloaded = 0; 130 } 131 } 132 133 static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 134 { 135 struct task_struct *p = dl_task_of(dl_se); 136 137 if (p->nr_cpus_allowed > 1) 138 dl_rq->dl_nr_migratory++; 139 140 update_dl_migration(dl_rq); 141 } 142 143 static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 144 { 145 struct task_struct *p = dl_task_of(dl_se); 146 147 if (p->nr_cpus_allowed > 1) 148 dl_rq->dl_nr_migratory--; 149 150 update_dl_migration(dl_rq); 151 } 152 153 /* 154 * The list of pushable -deadline task is not a plist, like in 155 * sched_rt.c, it is an rb-tree with tasks ordered by deadline. 156 */ 157 static void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) 158 { 159 struct dl_rq *dl_rq = &rq->dl; 160 struct rb_node **link = &dl_rq->pushable_dl_tasks_root.rb_node; 161 struct rb_node *parent = NULL; 162 struct task_struct *entry; 163 int leftmost = 1; 164 165 BUG_ON(!RB_EMPTY_NODE(&p->pushable_dl_tasks)); 166 167 while (*link) { 168 parent = *link; 169 entry = rb_entry(parent, struct task_struct, 170 pushable_dl_tasks); 171 if (dl_entity_preempt(&p->dl, &entry->dl)) 172 link = &parent->rb_left; 173 else { 174 link = &parent->rb_right; 175 leftmost = 0; 176 } 177 } 178 179 if (leftmost) { 180 dl_rq->pushable_dl_tasks_leftmost = &p->pushable_dl_tasks; 181 dl_rq->earliest_dl.next = p->dl.deadline; 182 } 183 184 rb_link_node(&p->pushable_dl_tasks, parent, link); 185 rb_insert_color(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); 186 } 187 188 static void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) 189 { 190 struct dl_rq *dl_rq = &rq->dl; 191 192 if (RB_EMPTY_NODE(&p->pushable_dl_tasks)) 193 return; 194 195 if (dl_rq->pushable_dl_tasks_leftmost == &p->pushable_dl_tasks) { 196 struct rb_node *next_node; 197 198 next_node = rb_next(&p->pushable_dl_tasks); 199 dl_rq->pushable_dl_tasks_leftmost = next_node; 200 if (next_node) { 201 dl_rq->earliest_dl.next = rb_entry(next_node, 202 struct task_struct, pushable_dl_tasks)->dl.deadline; 203 } 204 } 205 206 rb_erase(&p->pushable_dl_tasks, &dl_rq->pushable_dl_tasks_root); 207 RB_CLEAR_NODE(&p->pushable_dl_tasks); 208 } 209 210 static inline int has_pushable_dl_tasks(struct rq *rq) 211 { 212 return !RB_EMPTY_ROOT(&rq->dl.pushable_dl_tasks_root); 213 } 214 215 static int push_dl_task(struct rq *rq); 216 217 static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) 218 { 219 return dl_task(prev); 220 } 221 222 static DEFINE_PER_CPU(struct callback_head, dl_push_head); 223 static DEFINE_PER_CPU(struct callback_head, dl_pull_head); 224 225 static void push_dl_tasks(struct rq *); 226 static void pull_dl_task(struct rq *); 227 228 static inline void queue_push_tasks(struct rq *rq) 229 { 230 if (!has_pushable_dl_tasks(rq)) 231 return; 232 233 queue_balance_callback(rq, &per_cpu(dl_push_head, rq->cpu), push_dl_tasks); 234 } 235 236 static inline void queue_pull_task(struct rq *rq) 237 { 238 queue_balance_callback(rq, &per_cpu(dl_pull_head, rq->cpu), pull_dl_task); 239 } 240 241 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq); 242 243 static struct rq *dl_task_offline_migration(struct rq *rq, struct task_struct *p) 244 { 245 struct rq *later_rq = NULL; 246 247 later_rq = find_lock_later_rq(p, rq); 248 if (!later_rq) { 249 int cpu; 250 251 /* 252 * If we cannot preempt any rq, fall back to pick any 253 * online cpu. 254 */ 255 cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed); 256 if (cpu >= nr_cpu_ids) { 257 /* 258 * Fail to find any suitable cpu. 259 * The task will never come back! 260 */ 261 BUG_ON(dl_bandwidth_enabled()); 262 263 /* 264 * If admission control is disabled we 265 * try a little harder to let the task 266 * run. 267 */ 268 cpu = cpumask_any(cpu_active_mask); 269 } 270 later_rq = cpu_rq(cpu); 271 double_lock_balance(rq, later_rq); 272 } 273 274 set_task_cpu(p, later_rq->cpu); 275 double_unlock_balance(later_rq, rq); 276 277 return later_rq; 278 } 279 280 #else 281 282 static inline 283 void enqueue_pushable_dl_task(struct rq *rq, struct task_struct *p) 284 { 285 } 286 287 static inline 288 void dequeue_pushable_dl_task(struct rq *rq, struct task_struct *p) 289 { 290 } 291 292 static inline 293 void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 294 { 295 } 296 297 static inline 298 void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 299 { 300 } 301 302 static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) 303 { 304 return false; 305 } 306 307 static inline void pull_dl_task(struct rq *rq) 308 { 309 } 310 311 static inline void queue_push_tasks(struct rq *rq) 312 { 313 } 314 315 static inline void queue_pull_task(struct rq *rq) 316 { 317 } 318 #endif /* CONFIG_SMP */ 319 320 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags); 321 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags); 322 static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, 323 int flags); 324 325 /* 326 * We are being explicitly informed that a new instance is starting, 327 * and this means that: 328 * - the absolute deadline of the entity has to be placed at 329 * current time + relative deadline; 330 * - the runtime of the entity has to be set to the maximum value. 331 * 332 * The capability of specifying such event is useful whenever a -deadline 333 * entity wants to (try to!) synchronize its behaviour with the scheduler's 334 * one, and to (try to!) reconcile itself with its own scheduling 335 * parameters. 336 */ 337 static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se) 338 { 339 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 340 struct rq *rq = rq_of_dl_rq(dl_rq); 341 342 WARN_ON(dl_se->dl_boosted); 343 WARN_ON(dl_time_before(rq_clock(rq), dl_se->deadline)); 344 345 /* 346 * We are racing with the deadline timer. So, do nothing because 347 * the deadline timer handler will take care of properly recharging 348 * the runtime and postponing the deadline 349 */ 350 if (dl_se->dl_throttled) 351 return; 352 353 /* 354 * We use the regular wall clock time to set deadlines in the 355 * future; in fact, we must consider execution overheads (time 356 * spent on hardirq context, etc.). 357 */ 358 dl_se->deadline = rq_clock(rq) + dl_se->dl_deadline; 359 dl_se->runtime = dl_se->dl_runtime; 360 } 361 362 /* 363 * Pure Earliest Deadline First (EDF) scheduling does not deal with the 364 * possibility of a entity lasting more than what it declared, and thus 365 * exhausting its runtime. 366 * 367 * Here we are interested in making runtime overrun possible, but we do 368 * not want a entity which is misbehaving to affect the scheduling of all 369 * other entities. 370 * Therefore, a budgeting strategy called Constant Bandwidth Server (CBS) 371 * is used, in order to confine each entity within its own bandwidth. 372 * 373 * This function deals exactly with that, and ensures that when the runtime 374 * of a entity is replenished, its deadline is also postponed. That ensures 375 * the overrunning entity can't interfere with other entity in the system and 376 * can't make them miss their deadlines. Reasons why this kind of overruns 377 * could happen are, typically, a entity voluntarily trying to overcome its 378 * runtime, or it just underestimated it during sched_setattr(). 379 */ 380 static void replenish_dl_entity(struct sched_dl_entity *dl_se, 381 struct sched_dl_entity *pi_se) 382 { 383 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 384 struct rq *rq = rq_of_dl_rq(dl_rq); 385 386 BUG_ON(pi_se->dl_runtime <= 0); 387 388 /* 389 * This could be the case for a !-dl task that is boosted. 390 * Just go with full inherited parameters. 391 */ 392 if (dl_se->dl_deadline == 0) { 393 dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; 394 dl_se->runtime = pi_se->dl_runtime; 395 } 396 397 if (dl_se->dl_yielded && dl_se->runtime > 0) 398 dl_se->runtime = 0; 399 400 /* 401 * We keep moving the deadline away until we get some 402 * available runtime for the entity. This ensures correct 403 * handling of situations where the runtime overrun is 404 * arbitrary large. 405 */ 406 while (dl_se->runtime <= 0) { 407 dl_se->deadline += pi_se->dl_period; 408 dl_se->runtime += pi_se->dl_runtime; 409 } 410 411 /* 412 * At this point, the deadline really should be "in 413 * the future" with respect to rq->clock. If it's 414 * not, we are, for some reason, lagging too much! 415 * Anyway, after having warn userspace abut that, 416 * we still try to keep the things running by 417 * resetting the deadline and the budget of the 418 * entity. 419 */ 420 if (dl_time_before(dl_se->deadline, rq_clock(rq))) { 421 printk_deferred_once("sched: DL replenish lagged too much\n"); 422 dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; 423 dl_se->runtime = pi_se->dl_runtime; 424 } 425 426 if (dl_se->dl_yielded) 427 dl_se->dl_yielded = 0; 428 if (dl_se->dl_throttled) 429 dl_se->dl_throttled = 0; 430 } 431 432 /* 433 * Here we check if --at time t-- an entity (which is probably being 434 * [re]activated or, in general, enqueued) can use its remaining runtime 435 * and its current deadline _without_ exceeding the bandwidth it is 436 * assigned (function returns true if it can't). We are in fact applying 437 * one of the CBS rules: when a task wakes up, if the residual runtime 438 * over residual deadline fits within the allocated bandwidth, then we 439 * can keep the current (absolute) deadline and residual budget without 440 * disrupting the schedulability of the system. Otherwise, we should 441 * refill the runtime and set the deadline a period in the future, 442 * because keeping the current (absolute) deadline of the task would 443 * result in breaking guarantees promised to other tasks (refer to 444 * Documentation/scheduler/sched-deadline.txt for more informations). 445 * 446 * This function returns true if: 447 * 448 * runtime / (deadline - t) > dl_runtime / dl_deadline , 449 * 450 * IOW we can't recycle current parameters. 451 * 452 * Notice that the bandwidth check is done against the deadline. For 453 * task with deadline equal to period this is the same of using 454 * dl_period instead of dl_deadline in the equation above. 455 */ 456 static bool dl_entity_overflow(struct sched_dl_entity *dl_se, 457 struct sched_dl_entity *pi_se, u64 t) 458 { 459 u64 left, right; 460 461 /* 462 * left and right are the two sides of the equation above, 463 * after a bit of shuffling to use multiplications instead 464 * of divisions. 465 * 466 * Note that none of the time values involved in the two 467 * multiplications are absolute: dl_deadline and dl_runtime 468 * are the relative deadline and the maximum runtime of each 469 * instance, runtime is the runtime left for the last instance 470 * and (deadline - t), since t is rq->clock, is the time left 471 * to the (absolute) deadline. Even if overflowing the u64 type 472 * is very unlikely to occur in both cases, here we scale down 473 * as we want to avoid that risk at all. Scaling down by 10 474 * means that we reduce granularity to 1us. We are fine with it, 475 * since this is only a true/false check and, anyway, thinking 476 * of anything below microseconds resolution is actually fiction 477 * (but still we want to give the user that illusion >;). 478 */ 479 left = (pi_se->dl_deadline >> DL_SCALE) * (dl_se->runtime >> DL_SCALE); 480 right = ((dl_se->deadline - t) >> DL_SCALE) * 481 (pi_se->dl_runtime >> DL_SCALE); 482 483 return dl_time_before(right, left); 484 } 485 486 /* 487 * When a -deadline entity is queued back on the runqueue, its runtime and 488 * deadline might need updating. 489 * 490 * The policy here is that we update the deadline of the entity only if: 491 * - the current deadline is in the past, 492 * - using the remaining runtime with the current deadline would make 493 * the entity exceed its bandwidth. 494 */ 495 static void update_dl_entity(struct sched_dl_entity *dl_se, 496 struct sched_dl_entity *pi_se) 497 { 498 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 499 struct rq *rq = rq_of_dl_rq(dl_rq); 500 501 if (dl_time_before(dl_se->deadline, rq_clock(rq)) || 502 dl_entity_overflow(dl_se, pi_se, rq_clock(rq))) { 503 dl_se->deadline = rq_clock(rq) + pi_se->dl_deadline; 504 dl_se->runtime = pi_se->dl_runtime; 505 } 506 } 507 508 static inline u64 dl_next_period(struct sched_dl_entity *dl_se) 509 { 510 return dl_se->deadline - dl_se->dl_deadline + dl_se->dl_period; 511 } 512 513 /* 514 * If the entity depleted all its runtime, and if we want it to sleep 515 * while waiting for some new execution time to become available, we 516 * set the bandwidth replenishment timer to the replenishment instant 517 * and try to activate it. 518 * 519 * Notice that it is important for the caller to know if the timer 520 * actually started or not (i.e., the replenishment instant is in 521 * the future or in the past). 522 */ 523 static int start_dl_timer(struct task_struct *p) 524 { 525 struct sched_dl_entity *dl_se = &p->dl; 526 struct hrtimer *timer = &dl_se->dl_timer; 527 struct rq *rq = task_rq(p); 528 ktime_t now, act; 529 s64 delta; 530 531 lockdep_assert_held(&rq->lock); 532 533 /* 534 * We want the timer to fire at the deadline, but considering 535 * that it is actually coming from rq->clock and not from 536 * hrtimer's time base reading. 537 */ 538 act = ns_to_ktime(dl_next_period(dl_se)); 539 now = hrtimer_cb_get_time(timer); 540 delta = ktime_to_ns(now) - rq_clock(rq); 541 act = ktime_add_ns(act, delta); 542 543 /* 544 * If the expiry time already passed, e.g., because the value 545 * chosen as the deadline is too small, don't even try to 546 * start the timer in the past! 547 */ 548 if (ktime_us_delta(act, now) < 0) 549 return 0; 550 551 /* 552 * !enqueued will guarantee another callback; even if one is already in 553 * progress. This ensures a balanced {get,put}_task_struct(). 554 * 555 * The race against __run_timer() clearing the enqueued state is 556 * harmless because we're holding task_rq()->lock, therefore the timer 557 * expiring after we've done the check will wait on its task_rq_lock() 558 * and observe our state. 559 */ 560 if (!hrtimer_is_queued(timer)) { 561 get_task_struct(p); 562 hrtimer_start(timer, act, HRTIMER_MODE_ABS); 563 } 564 565 return 1; 566 } 567 568 /* 569 * This is the bandwidth enforcement timer callback. If here, we know 570 * a task is not on its dl_rq, since the fact that the timer was running 571 * means the task is throttled and needs a runtime replenishment. 572 * 573 * However, what we actually do depends on the fact the task is active, 574 * (it is on its rq) or has been removed from there by a call to 575 * dequeue_task_dl(). In the former case we must issue the runtime 576 * replenishment and add the task back to the dl_rq; in the latter, we just 577 * do nothing but clearing dl_throttled, so that runtime and deadline 578 * updating (and the queueing back to dl_rq) will be done by the 579 * next call to enqueue_task_dl(). 580 */ 581 static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) 582 { 583 struct sched_dl_entity *dl_se = container_of(timer, 584 struct sched_dl_entity, 585 dl_timer); 586 struct task_struct *p = dl_task_of(dl_se); 587 struct rq_flags rf; 588 struct rq *rq; 589 590 rq = task_rq_lock(p, &rf); 591 592 /* 593 * The task might have changed its scheduling policy to something 594 * different than SCHED_DEADLINE (through switched_from_dl()). 595 */ 596 if (!dl_task(p)) { 597 __dl_clear_params(p); 598 goto unlock; 599 } 600 601 /* 602 * The task might have been boosted by someone else and might be in the 603 * boosting/deboosting path, its not throttled. 604 */ 605 if (dl_se->dl_boosted) 606 goto unlock; 607 608 /* 609 * Spurious timer due to start_dl_timer() race; or we already received 610 * a replenishment from rt_mutex_setprio(). 611 */ 612 if (!dl_se->dl_throttled) 613 goto unlock; 614 615 sched_clock_tick(); 616 update_rq_clock(rq); 617 618 /* 619 * If the throttle happened during sched-out; like: 620 * 621 * schedule() 622 * deactivate_task() 623 * dequeue_task_dl() 624 * update_curr_dl() 625 * start_dl_timer() 626 * __dequeue_task_dl() 627 * prev->on_rq = 0; 628 * 629 * We can be both throttled and !queued. Replenish the counter 630 * but do not enqueue -- wait for our wakeup to do that. 631 */ 632 if (!task_on_rq_queued(p)) { 633 replenish_dl_entity(dl_se, dl_se); 634 goto unlock; 635 } 636 637 #ifdef CONFIG_SMP 638 if (unlikely(!rq->online)) { 639 /* 640 * If the runqueue is no longer available, migrate the 641 * task elsewhere. This necessarily changes rq. 642 */ 643 lockdep_unpin_lock(&rq->lock, rf.cookie); 644 rq = dl_task_offline_migration(rq, p); 645 rf.cookie = lockdep_pin_lock(&rq->lock); 646 update_rq_clock(rq); 647 648 /* 649 * Now that the task has been migrated to the new RQ and we 650 * have that locked, proceed as normal and enqueue the task 651 * there. 652 */ 653 } 654 #endif 655 656 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH); 657 if (dl_task(rq->curr)) 658 check_preempt_curr_dl(rq, p, 0); 659 else 660 resched_curr(rq); 661 662 #ifdef CONFIG_SMP 663 /* 664 * Queueing this task back might have overloaded rq, check if we need 665 * to kick someone away. 666 */ 667 if (has_pushable_dl_tasks(rq)) { 668 /* 669 * Nothing relies on rq->lock after this, so its safe to drop 670 * rq->lock. 671 */ 672 rq_unpin_lock(rq, &rf); 673 push_dl_task(rq); 674 rq_repin_lock(rq, &rf); 675 } 676 #endif 677 678 unlock: 679 task_rq_unlock(rq, p, &rf); 680 681 /* 682 * This can free the task_struct, including this hrtimer, do not touch 683 * anything related to that after this. 684 */ 685 put_task_struct(p); 686 687 return HRTIMER_NORESTART; 688 } 689 690 void init_dl_task_timer(struct sched_dl_entity *dl_se) 691 { 692 struct hrtimer *timer = &dl_se->dl_timer; 693 694 hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 695 timer->function = dl_task_timer; 696 } 697 698 /* 699 * During the activation, CBS checks if it can reuse the current task's 700 * runtime and period. If the deadline of the task is in the past, CBS 701 * cannot use the runtime, and so it replenishes the task. This rule 702 * works fine for implicit deadline tasks (deadline == period), and the 703 * CBS was designed for implicit deadline tasks. However, a task with 704 * constrained deadline (deadine < period) might be awakened after the 705 * deadline, but before the next period. In this case, replenishing the 706 * task would allow it to run for runtime / deadline. As in this case 707 * deadline < period, CBS enables a task to run for more than the 708 * runtime / period. In a very loaded system, this can cause a domino 709 * effect, making other tasks miss their deadlines. 710 * 711 * To avoid this problem, in the activation of a constrained deadline 712 * task after the deadline but before the next period, throttle the 713 * task and set the replenishing timer to the begin of the next period, 714 * unless it is boosted. 715 */ 716 static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) 717 { 718 struct task_struct *p = dl_task_of(dl_se); 719 struct rq *rq = rq_of_dl_rq(dl_rq_of_se(dl_se)); 720 721 if (dl_time_before(dl_se->deadline, rq_clock(rq)) && 722 dl_time_before(rq_clock(rq), dl_next_period(dl_se))) { 723 if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) 724 return; 725 dl_se->dl_throttled = 1; 726 } 727 } 728 729 static 730 int dl_runtime_exceeded(struct sched_dl_entity *dl_se) 731 { 732 return (dl_se->runtime <= 0); 733 } 734 735 extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); 736 737 /* 738 * Update the current task's runtime statistics (provided it is still 739 * a -deadline task and has not been removed from the dl_rq). 740 */ 741 static void update_curr_dl(struct rq *rq) 742 { 743 struct task_struct *curr = rq->curr; 744 struct sched_dl_entity *dl_se = &curr->dl; 745 u64 delta_exec; 746 747 if (!dl_task(curr) || !on_dl_rq(dl_se)) 748 return; 749 750 /* 751 * Consumed budget is computed considering the time as 752 * observed by schedulable tasks (excluding time spent 753 * in hardirq context, etc.). Deadlines are instead 754 * computed using hard walltime. This seems to be the more 755 * natural solution, but the full ramifications of this 756 * approach need further study. 757 */ 758 delta_exec = rq_clock_task(rq) - curr->se.exec_start; 759 if (unlikely((s64)delta_exec <= 0)) { 760 if (unlikely(dl_se->dl_yielded)) 761 goto throttle; 762 return; 763 } 764 765 /* kick cpufreq (see the comment in kernel/sched/sched.h). */ 766 cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL); 767 768 schedstat_set(curr->se.statistics.exec_max, 769 max(curr->se.statistics.exec_max, delta_exec)); 770 771 curr->se.sum_exec_runtime += delta_exec; 772 account_group_exec_runtime(curr, delta_exec); 773 774 curr->se.exec_start = rq_clock_task(rq); 775 cpuacct_charge(curr, delta_exec); 776 777 sched_rt_avg_update(rq, delta_exec); 778 779 dl_se->runtime -= delta_exec; 780 781 throttle: 782 if (dl_runtime_exceeded(dl_se) || dl_se->dl_yielded) { 783 dl_se->dl_throttled = 1; 784 __dequeue_task_dl(rq, curr, 0); 785 if (unlikely(dl_se->dl_boosted || !start_dl_timer(curr))) 786 enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); 787 788 if (!is_leftmost(curr, &rq->dl)) 789 resched_curr(rq); 790 } 791 792 /* 793 * Because -- for now -- we share the rt bandwidth, we need to 794 * account our runtime there too, otherwise actual rt tasks 795 * would be able to exceed the shared quota. 796 * 797 * Account to the root rt group for now. 798 * 799 * The solution we're working towards is having the RT groups scheduled 800 * using deadline servers -- however there's a few nasties to figure 801 * out before that can happen. 802 */ 803 if (rt_bandwidth_enabled()) { 804 struct rt_rq *rt_rq = &rq->rt; 805 806 raw_spin_lock(&rt_rq->rt_runtime_lock); 807 /* 808 * We'll let actual RT tasks worry about the overflow here, we 809 * have our own CBS to keep us inline; only account when RT 810 * bandwidth is relevant. 811 */ 812 if (sched_rt_bandwidth_account(rt_rq)) 813 rt_rq->rt_time += delta_exec; 814 raw_spin_unlock(&rt_rq->rt_runtime_lock); 815 } 816 } 817 818 #ifdef CONFIG_SMP 819 820 static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) 821 { 822 struct rq *rq = rq_of_dl_rq(dl_rq); 823 824 if (dl_rq->earliest_dl.curr == 0 || 825 dl_time_before(deadline, dl_rq->earliest_dl.curr)) { 826 dl_rq->earliest_dl.curr = deadline; 827 cpudl_set(&rq->rd->cpudl, rq->cpu, deadline); 828 } 829 } 830 831 static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) 832 { 833 struct rq *rq = rq_of_dl_rq(dl_rq); 834 835 /* 836 * Since we may have removed our earliest (and/or next earliest) 837 * task we must recompute them. 838 */ 839 if (!dl_rq->dl_nr_running) { 840 dl_rq->earliest_dl.curr = 0; 841 dl_rq->earliest_dl.next = 0; 842 cpudl_clear(&rq->rd->cpudl, rq->cpu); 843 } else { 844 struct rb_node *leftmost = dl_rq->rb_leftmost; 845 struct sched_dl_entity *entry; 846 847 entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); 848 dl_rq->earliest_dl.curr = entry->deadline; 849 cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline); 850 } 851 } 852 853 #else 854 855 static inline void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} 856 static inline void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) {} 857 858 #endif /* CONFIG_SMP */ 859 860 static inline 861 void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 862 { 863 int prio = dl_task_of(dl_se)->prio; 864 u64 deadline = dl_se->deadline; 865 866 WARN_ON(!dl_prio(prio)); 867 dl_rq->dl_nr_running++; 868 add_nr_running(rq_of_dl_rq(dl_rq), 1); 869 870 inc_dl_deadline(dl_rq, deadline); 871 inc_dl_migration(dl_se, dl_rq); 872 } 873 874 static inline 875 void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) 876 { 877 int prio = dl_task_of(dl_se)->prio; 878 879 WARN_ON(!dl_prio(prio)); 880 WARN_ON(!dl_rq->dl_nr_running); 881 dl_rq->dl_nr_running--; 882 sub_nr_running(rq_of_dl_rq(dl_rq), 1); 883 884 dec_dl_deadline(dl_rq, dl_se->deadline); 885 dec_dl_migration(dl_se, dl_rq); 886 } 887 888 static void __enqueue_dl_entity(struct sched_dl_entity *dl_se) 889 { 890 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 891 struct rb_node **link = &dl_rq->rb_root.rb_node; 892 struct rb_node *parent = NULL; 893 struct sched_dl_entity *entry; 894 int leftmost = 1; 895 896 BUG_ON(!RB_EMPTY_NODE(&dl_se->rb_node)); 897 898 while (*link) { 899 parent = *link; 900 entry = rb_entry(parent, struct sched_dl_entity, rb_node); 901 if (dl_time_before(dl_se->deadline, entry->deadline)) 902 link = &parent->rb_left; 903 else { 904 link = &parent->rb_right; 905 leftmost = 0; 906 } 907 } 908 909 if (leftmost) 910 dl_rq->rb_leftmost = &dl_se->rb_node; 911 912 rb_link_node(&dl_se->rb_node, parent, link); 913 rb_insert_color(&dl_se->rb_node, &dl_rq->rb_root); 914 915 inc_dl_tasks(dl_se, dl_rq); 916 } 917 918 static void __dequeue_dl_entity(struct sched_dl_entity *dl_se) 919 { 920 struct dl_rq *dl_rq = dl_rq_of_se(dl_se); 921 922 if (RB_EMPTY_NODE(&dl_se->rb_node)) 923 return; 924 925 if (dl_rq->rb_leftmost == &dl_se->rb_node) { 926 struct rb_node *next_node; 927 928 next_node = rb_next(&dl_se->rb_node); 929 dl_rq->rb_leftmost = next_node; 930 } 931 932 rb_erase(&dl_se->rb_node, &dl_rq->rb_root); 933 RB_CLEAR_NODE(&dl_se->rb_node); 934 935 dec_dl_tasks(dl_se, dl_rq); 936 } 937 938 static void 939 enqueue_dl_entity(struct sched_dl_entity *dl_se, 940 struct sched_dl_entity *pi_se, int flags) 941 { 942 BUG_ON(on_dl_rq(dl_se)); 943 944 /* 945 * If this is a wakeup or a new instance, the scheduling 946 * parameters of the task might need updating. Otherwise, 947 * we want a replenishment of its runtime. 948 */ 949 if (flags & ENQUEUE_WAKEUP) 950 update_dl_entity(dl_se, pi_se); 951 else if (flags & ENQUEUE_REPLENISH) 952 replenish_dl_entity(dl_se, pi_se); 953 954 __enqueue_dl_entity(dl_se); 955 } 956 957 static void dequeue_dl_entity(struct sched_dl_entity *dl_se) 958 { 959 __dequeue_dl_entity(dl_se); 960 } 961 962 static inline bool dl_is_constrained(struct sched_dl_entity *dl_se) 963 { 964 return dl_se->dl_deadline < dl_se->dl_period; 965 } 966 967 static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags) 968 { 969 struct task_struct *pi_task = rt_mutex_get_top_task(p); 970 struct sched_dl_entity *pi_se = &p->dl; 971 972 /* 973 * Use the scheduling parameters of the top pi-waiter 974 * task if we have one and its (absolute) deadline is 975 * smaller than our one... OTW we keep our runtime and 976 * deadline. 977 */ 978 if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) { 979 pi_se = &pi_task->dl; 980 } else if (!dl_prio(p->normal_prio)) { 981 /* 982 * Special case in which we have a !SCHED_DEADLINE task 983 * that is going to be deboosted, but exceedes its 984 * runtime while doing so. No point in replenishing 985 * it, as it's going to return back to its original 986 * scheduling class after this. 987 */ 988 BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH); 989 return; 990 } 991 992 /* 993 * Check if a constrained deadline task was activated 994 * after the deadline but before the next period. 995 * If that is the case, the task will be throttled and 996 * the replenishment timer will be set to the next period. 997 */ 998 if (!p->dl.dl_throttled && dl_is_constrained(&p->dl)) 999 dl_check_constrained_dl(&p->dl); 1000 1001 /* 1002 * If p is throttled, we do nothing. In fact, if it exhausted 1003 * its budget it needs a replenishment and, since it now is on 1004 * its rq, the bandwidth timer callback (which clearly has not 1005 * run yet) will take care of this. 1006 */ 1007 if (p->dl.dl_throttled && !(flags & ENQUEUE_REPLENISH)) 1008 return; 1009 1010 enqueue_dl_entity(&p->dl, pi_se, flags); 1011 1012 if (!task_current(rq, p) && p->nr_cpus_allowed > 1) 1013 enqueue_pushable_dl_task(rq, p); 1014 } 1015 1016 static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) 1017 { 1018 dequeue_dl_entity(&p->dl); 1019 dequeue_pushable_dl_task(rq, p); 1020 } 1021 1022 static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) 1023 { 1024 update_curr_dl(rq); 1025 __dequeue_task_dl(rq, p, flags); 1026 } 1027 1028 /* 1029 * Yield task semantic for -deadline tasks is: 1030 * 1031 * get off from the CPU until our next instance, with 1032 * a new runtime. This is of little use now, since we 1033 * don't have a bandwidth reclaiming mechanism. Anyway, 1034 * bandwidth reclaiming is planned for the future, and 1035 * yield_task_dl will indicate that some spare budget 1036 * is available for other task instances to use it. 1037 */ 1038 static void yield_task_dl(struct rq *rq) 1039 { 1040 /* 1041 * We make the task go to sleep until its current deadline by 1042 * forcing its runtime to zero. This way, update_curr_dl() stops 1043 * it and the bandwidth timer will wake it up and will give it 1044 * new scheduling parameters (thanks to dl_yielded=1). 1045 */ 1046 rq->curr->dl.dl_yielded = 1; 1047 1048 update_rq_clock(rq); 1049 update_curr_dl(rq); 1050 /* 1051 * Tell update_rq_clock() that we've just updated, 1052 * so we don't do microscopic update in schedule() 1053 * and double the fastpath cost. 1054 */ 1055 rq_clock_skip_update(rq, true); 1056 } 1057 1058 #ifdef CONFIG_SMP 1059 1060 static int find_later_rq(struct task_struct *task); 1061 1062 static int 1063 select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) 1064 { 1065 struct task_struct *curr; 1066 struct rq *rq; 1067 1068 if (sd_flag != SD_BALANCE_WAKE) 1069 goto out; 1070 1071 rq = cpu_rq(cpu); 1072 1073 rcu_read_lock(); 1074 curr = READ_ONCE(rq->curr); /* unlocked access */ 1075 1076 /* 1077 * If we are dealing with a -deadline task, we must 1078 * decide where to wake it up. 1079 * If it has a later deadline and the current task 1080 * on this rq can't move (provided the waking task 1081 * can!) we prefer to send it somewhere else. On the 1082 * other hand, if it has a shorter deadline, we 1083 * try to make it stay here, it might be important. 1084 */ 1085 if (unlikely(dl_task(curr)) && 1086 (curr->nr_cpus_allowed < 2 || 1087 !dl_entity_preempt(&p->dl, &curr->dl)) && 1088 (p->nr_cpus_allowed > 1)) { 1089 int target = find_later_rq(p); 1090 1091 if (target != -1 && 1092 (dl_time_before(p->dl.deadline, 1093 cpu_rq(target)->dl.earliest_dl.curr) || 1094 (cpu_rq(target)->dl.dl_nr_running == 0))) 1095 cpu = target; 1096 } 1097 rcu_read_unlock(); 1098 1099 out: 1100 return cpu; 1101 } 1102 1103 static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) 1104 { 1105 /* 1106 * Current can't be migrated, useless to reschedule, 1107 * let's hope p can move out. 1108 */ 1109 if (rq->curr->nr_cpus_allowed == 1 || 1110 cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) 1111 return; 1112 1113 /* 1114 * p is migratable, so let's not schedule it and 1115 * see if it is pushed or pulled somewhere else. 1116 */ 1117 if (p->nr_cpus_allowed != 1 && 1118 cpudl_find(&rq->rd->cpudl, p, NULL) != -1) 1119 return; 1120 1121 resched_curr(rq); 1122 } 1123 1124 #endif /* CONFIG_SMP */ 1125 1126 /* 1127 * Only called when both the current and waking task are -deadline 1128 * tasks. 1129 */ 1130 static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, 1131 int flags) 1132 { 1133 if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { 1134 resched_curr(rq); 1135 return; 1136 } 1137 1138 #ifdef CONFIG_SMP 1139 /* 1140 * In the unlikely case current and p have the same deadline 1141 * let us try to decide what's the best thing to do... 1142 */ 1143 if ((p->dl.deadline == rq->curr->dl.deadline) && 1144 !test_tsk_need_resched(rq->curr)) 1145 check_preempt_equal_dl(rq, p); 1146 #endif /* CONFIG_SMP */ 1147 } 1148 1149 #ifdef CONFIG_SCHED_HRTICK 1150 static void start_hrtick_dl(struct rq *rq, struct task_struct *p) 1151 { 1152 hrtick_start(rq, p->dl.runtime); 1153 } 1154 #else /* !CONFIG_SCHED_HRTICK */ 1155 static void start_hrtick_dl(struct rq *rq, struct task_struct *p) 1156 { 1157 } 1158 #endif 1159 1160 static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, 1161 struct dl_rq *dl_rq) 1162 { 1163 struct rb_node *left = dl_rq->rb_leftmost; 1164 1165 if (!left) 1166 return NULL; 1167 1168 return rb_entry(left, struct sched_dl_entity, rb_node); 1169 } 1170 1171 struct task_struct * 1172 pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) 1173 { 1174 struct sched_dl_entity *dl_se; 1175 struct task_struct *p; 1176 struct dl_rq *dl_rq; 1177 1178 dl_rq = &rq->dl; 1179 1180 if (need_pull_dl_task(rq, prev)) { 1181 /* 1182 * This is OK, because current is on_cpu, which avoids it being 1183 * picked for load-balance and preemption/IRQs are still 1184 * disabled avoiding further scheduler activity on it and we're 1185 * being very careful to re-start the picking loop. 1186 */ 1187 rq_unpin_lock(rq, rf); 1188 pull_dl_task(rq); 1189 rq_repin_lock(rq, rf); 1190 /* 1191 * pull_dl_task() can drop (and re-acquire) rq->lock; this 1192 * means a stop task can slip in, in which case we need to 1193 * re-start task selection. 1194 */ 1195 if (rq->stop && task_on_rq_queued(rq->stop)) 1196 return RETRY_TASK; 1197 } 1198 1199 /* 1200 * When prev is DL, we may throttle it in put_prev_task(). 1201 * So, we update time before we check for dl_nr_running. 1202 */ 1203 if (prev->sched_class == &dl_sched_class) 1204 update_curr_dl(rq); 1205 1206 if (unlikely(!dl_rq->dl_nr_running)) 1207 return NULL; 1208 1209 put_prev_task(rq, prev); 1210 1211 dl_se = pick_next_dl_entity(rq, dl_rq); 1212 BUG_ON(!dl_se); 1213 1214 p = dl_task_of(dl_se); 1215 p->se.exec_start = rq_clock_task(rq); 1216 1217 /* Running task will never be pushed. */ 1218 dequeue_pushable_dl_task(rq, p); 1219 1220 if (hrtick_enabled(rq)) 1221 start_hrtick_dl(rq, p); 1222 1223 queue_push_tasks(rq); 1224 1225 return p; 1226 } 1227 1228 static void put_prev_task_dl(struct rq *rq, struct task_struct *p) 1229 { 1230 update_curr_dl(rq); 1231 1232 if (on_dl_rq(&p->dl) && p->nr_cpus_allowed > 1) 1233 enqueue_pushable_dl_task(rq, p); 1234 } 1235 1236 static void task_tick_dl(struct rq *rq, struct task_struct *p, int queued) 1237 { 1238 update_curr_dl(rq); 1239 1240 /* 1241 * Even when we have runtime, update_curr_dl() might have resulted in us 1242 * not being the leftmost task anymore. In that case NEED_RESCHED will 1243 * be set and schedule() will start a new hrtick for the next task. 1244 */ 1245 if (hrtick_enabled(rq) && queued && p->dl.runtime > 0 && 1246 is_leftmost(p, &rq->dl)) 1247 start_hrtick_dl(rq, p); 1248 } 1249 1250 static void task_fork_dl(struct task_struct *p) 1251 { 1252 /* 1253 * SCHED_DEADLINE tasks cannot fork and this is achieved through 1254 * sched_fork() 1255 */ 1256 } 1257 1258 static void task_dead_dl(struct task_struct *p) 1259 { 1260 struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); 1261 1262 /* 1263 * Since we are TASK_DEAD we won't slip out of the domain! 1264 */ 1265 raw_spin_lock_irq(&dl_b->lock); 1266 /* XXX we should retain the bw until 0-lag */ 1267 dl_b->total_bw -= p->dl.dl_bw; 1268 raw_spin_unlock_irq(&dl_b->lock); 1269 } 1270 1271 static void set_curr_task_dl(struct rq *rq) 1272 { 1273 struct task_struct *p = rq->curr; 1274 1275 p->se.exec_start = rq_clock_task(rq); 1276 1277 /* You can't push away the running task */ 1278 dequeue_pushable_dl_task(rq, p); 1279 } 1280 1281 #ifdef CONFIG_SMP 1282 1283 /* Only try algorithms three times */ 1284 #define DL_MAX_TRIES 3 1285 1286 static int pick_dl_task(struct rq *rq, struct task_struct *p, int cpu) 1287 { 1288 if (!task_running(rq, p) && 1289 cpumask_test_cpu(cpu, &p->cpus_allowed)) 1290 return 1; 1291 return 0; 1292 } 1293 1294 /* 1295 * Return the earliest pushable rq's task, which is suitable to be executed 1296 * on the CPU, NULL otherwise: 1297 */ 1298 static struct task_struct *pick_earliest_pushable_dl_task(struct rq *rq, int cpu) 1299 { 1300 struct rb_node *next_node = rq->dl.pushable_dl_tasks_leftmost; 1301 struct task_struct *p = NULL; 1302 1303 if (!has_pushable_dl_tasks(rq)) 1304 return NULL; 1305 1306 next_node: 1307 if (next_node) { 1308 p = rb_entry(next_node, struct task_struct, pushable_dl_tasks); 1309 1310 if (pick_dl_task(rq, p, cpu)) 1311 return p; 1312 1313 next_node = rb_next(next_node); 1314 goto next_node; 1315 } 1316 1317 return NULL; 1318 } 1319 1320 static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); 1321 1322 static int find_later_rq(struct task_struct *task) 1323 { 1324 struct sched_domain *sd; 1325 struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl); 1326 int this_cpu = smp_processor_id(); 1327 int best_cpu, cpu = task_cpu(task); 1328 1329 /* Make sure the mask is initialized first */ 1330 if (unlikely(!later_mask)) 1331 return -1; 1332 1333 if (task->nr_cpus_allowed == 1) 1334 return -1; 1335 1336 /* 1337 * We have to consider system topology and task affinity 1338 * first, then we can look for a suitable cpu. 1339 */ 1340 best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, 1341 task, later_mask); 1342 if (best_cpu == -1) 1343 return -1; 1344 1345 /* 1346 * If we are here, some target has been found, 1347 * the most suitable of which is cached in best_cpu. 1348 * This is, among the runqueues where the current tasks 1349 * have later deadlines than the task's one, the rq 1350 * with the latest possible one. 1351 * 1352 * Now we check how well this matches with task's 1353 * affinity and system topology. 1354 * 1355 * The last cpu where the task run is our first 1356 * guess, since it is most likely cache-hot there. 1357 */ 1358 if (cpumask_test_cpu(cpu, later_mask)) 1359 return cpu; 1360 /* 1361 * Check if this_cpu is to be skipped (i.e., it is 1362 * not in the mask) or not. 1363 */ 1364 if (!cpumask_test_cpu(this_cpu, later_mask)) 1365 this_cpu = -1; 1366 1367 rcu_read_lock(); 1368 for_each_domain(cpu, sd) { 1369 if (sd->flags & SD_WAKE_AFFINE) { 1370 1371 /* 1372 * If possible, preempting this_cpu is 1373 * cheaper than migrating. 1374 */ 1375 if (this_cpu != -1 && 1376 cpumask_test_cpu(this_cpu, sched_domain_span(sd))) { 1377 rcu_read_unlock(); 1378 return this_cpu; 1379 } 1380 1381 /* 1382 * Last chance: if best_cpu is valid and is 1383 * in the mask, that becomes our choice. 1384 */ 1385 if (best_cpu < nr_cpu_ids && 1386 cpumask_test_cpu(best_cpu, sched_domain_span(sd))) { 1387 rcu_read_unlock(); 1388 return best_cpu; 1389 } 1390 } 1391 } 1392 rcu_read_unlock(); 1393 1394 /* 1395 * At this point, all our guesses failed, we just return 1396 * 'something', and let the caller sort the things out. 1397 */ 1398 if (this_cpu != -1) 1399 return this_cpu; 1400 1401 cpu = cpumask_any(later_mask); 1402 if (cpu < nr_cpu_ids) 1403 return cpu; 1404 1405 return -1; 1406 } 1407 1408 /* Locks the rq it finds */ 1409 static struct rq *find_lock_later_rq(struct task_struct *task, struct rq *rq) 1410 { 1411 struct rq *later_rq = NULL; 1412 int tries; 1413 int cpu; 1414 1415 for (tries = 0; tries < DL_MAX_TRIES; tries++) { 1416 cpu = find_later_rq(task); 1417 1418 if ((cpu == -1) || (cpu == rq->cpu)) 1419 break; 1420 1421 later_rq = cpu_rq(cpu); 1422 1423 if (later_rq->dl.dl_nr_running && 1424 !dl_time_before(task->dl.deadline, 1425 later_rq->dl.earliest_dl.curr)) { 1426 /* 1427 * Target rq has tasks of equal or earlier deadline, 1428 * retrying does not release any lock and is unlikely 1429 * to yield a different result. 1430 */ 1431 later_rq = NULL; 1432 break; 1433 } 1434 1435 /* Retry if something changed. */ 1436 if (double_lock_balance(rq, later_rq)) { 1437 if (unlikely(task_rq(task) != rq || 1438 !cpumask_test_cpu(later_rq->cpu, &task->cpus_allowed) || 1439 task_running(rq, task) || 1440 !dl_task(task) || 1441 !task_on_rq_queued(task))) { 1442 double_unlock_balance(rq, later_rq); 1443 later_rq = NULL; 1444 break; 1445 } 1446 } 1447 1448 /* 1449 * If the rq we found has no -deadline task, or 1450 * its earliest one has a later deadline than our 1451 * task, the rq is a good one. 1452 */ 1453 if (!later_rq->dl.dl_nr_running || 1454 dl_time_before(task->dl.deadline, 1455 later_rq->dl.earliest_dl.curr)) 1456 break; 1457 1458 /* Otherwise we try again. */ 1459 double_unlock_balance(rq, later_rq); 1460 later_rq = NULL; 1461 } 1462 1463 return later_rq; 1464 } 1465 1466 static struct task_struct *pick_next_pushable_dl_task(struct rq *rq) 1467 { 1468 struct task_struct *p; 1469 1470 if (!has_pushable_dl_tasks(rq)) 1471 return NULL; 1472 1473 p = rb_entry(rq->dl.pushable_dl_tasks_leftmost, 1474 struct task_struct, pushable_dl_tasks); 1475 1476 BUG_ON(rq->cpu != task_cpu(p)); 1477 BUG_ON(task_current(rq, p)); 1478 BUG_ON(p->nr_cpus_allowed <= 1); 1479 1480 BUG_ON(!task_on_rq_queued(p)); 1481 BUG_ON(!dl_task(p)); 1482 1483 return p; 1484 } 1485 1486 /* 1487 * See if the non running -deadline tasks on this rq 1488 * can be sent to some other CPU where they can preempt 1489 * and start executing. 1490 */ 1491 static int push_dl_task(struct rq *rq) 1492 { 1493 struct task_struct *next_task; 1494 struct rq *later_rq; 1495 int ret = 0; 1496 1497 if (!rq->dl.overloaded) 1498 return 0; 1499 1500 next_task = pick_next_pushable_dl_task(rq); 1501 if (!next_task) 1502 return 0; 1503 1504 retry: 1505 if (unlikely(next_task == rq->curr)) { 1506 WARN_ON(1); 1507 return 0; 1508 } 1509 1510 /* 1511 * If next_task preempts rq->curr, and rq->curr 1512 * can move away, it makes sense to just reschedule 1513 * without going further in pushing next_task. 1514 */ 1515 if (dl_task(rq->curr) && 1516 dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && 1517 rq->curr->nr_cpus_allowed > 1) { 1518 resched_curr(rq); 1519 return 0; 1520 } 1521 1522 /* We might release rq lock */ 1523 get_task_struct(next_task); 1524 1525 /* Will lock the rq it'll find */ 1526 later_rq = find_lock_later_rq(next_task, rq); 1527 if (!later_rq) { 1528 struct task_struct *task; 1529 1530 /* 1531 * We must check all this again, since 1532 * find_lock_later_rq releases rq->lock and it is 1533 * then possible that next_task has migrated. 1534 */ 1535 task = pick_next_pushable_dl_task(rq); 1536 if (task_cpu(next_task) == rq->cpu && task == next_task) { 1537 /* 1538 * The task is still there. We don't try 1539 * again, some other cpu will pull it when ready. 1540 */ 1541 goto out; 1542 } 1543 1544 if (!task) 1545 /* No more tasks */ 1546 goto out; 1547 1548 put_task_struct(next_task); 1549 next_task = task; 1550 goto retry; 1551 } 1552 1553 deactivate_task(rq, next_task, 0); 1554 set_task_cpu(next_task, later_rq->cpu); 1555 activate_task(later_rq, next_task, 0); 1556 ret = 1; 1557 1558 resched_curr(later_rq); 1559 1560 double_unlock_balance(rq, later_rq); 1561 1562 out: 1563 put_task_struct(next_task); 1564 1565 return ret; 1566 } 1567 1568 static void push_dl_tasks(struct rq *rq) 1569 { 1570 /* push_dl_task() will return true if it moved a -deadline task */ 1571 while (push_dl_task(rq)) 1572 ; 1573 } 1574 1575 static void pull_dl_task(struct rq *this_rq) 1576 { 1577 int this_cpu = this_rq->cpu, cpu; 1578 struct task_struct *p; 1579 bool resched = false; 1580 struct rq *src_rq; 1581 u64 dmin = LONG_MAX; 1582 1583 if (likely(!dl_overloaded(this_rq))) 1584 return; 1585 1586 /* 1587 * Match the barrier from dl_set_overloaded; this guarantees that if we 1588 * see overloaded we must also see the dlo_mask bit. 1589 */ 1590 smp_rmb(); 1591 1592 for_each_cpu(cpu, this_rq->rd->dlo_mask) { 1593 if (this_cpu == cpu) 1594 continue; 1595 1596 src_rq = cpu_rq(cpu); 1597 1598 /* 1599 * It looks racy, abd it is! However, as in sched_rt.c, 1600 * we are fine with this. 1601 */ 1602 if (this_rq->dl.dl_nr_running && 1603 dl_time_before(this_rq->dl.earliest_dl.curr, 1604 src_rq->dl.earliest_dl.next)) 1605 continue; 1606 1607 /* Might drop this_rq->lock */ 1608 double_lock_balance(this_rq, src_rq); 1609 1610 /* 1611 * If there are no more pullable tasks on the 1612 * rq, we're done with it. 1613 */ 1614 if (src_rq->dl.dl_nr_running <= 1) 1615 goto skip; 1616 1617 p = pick_earliest_pushable_dl_task(src_rq, this_cpu); 1618 1619 /* 1620 * We found a task to be pulled if: 1621 * - it preempts our current (if there's one), 1622 * - it will preempt the last one we pulled (if any). 1623 */ 1624 if (p && dl_time_before(p->dl.deadline, dmin) && 1625 (!this_rq->dl.dl_nr_running || 1626 dl_time_before(p->dl.deadline, 1627 this_rq->dl.earliest_dl.curr))) { 1628 WARN_ON(p == src_rq->curr); 1629 WARN_ON(!task_on_rq_queued(p)); 1630 1631 /* 1632 * Then we pull iff p has actually an earlier 1633 * deadline than the current task of its runqueue. 1634 */ 1635 if (dl_time_before(p->dl.deadline, 1636 src_rq->curr->dl.deadline)) 1637 goto skip; 1638 1639 resched = true; 1640 1641 deactivate_task(src_rq, p, 0); 1642 set_task_cpu(p, this_cpu); 1643 activate_task(this_rq, p, 0); 1644 dmin = p->dl.deadline; 1645 1646 /* Is there any other task even earlier? */ 1647 } 1648 skip: 1649 double_unlock_balance(this_rq, src_rq); 1650 } 1651 1652 if (resched) 1653 resched_curr(this_rq); 1654 } 1655 1656 /* 1657 * Since the task is not running and a reschedule is not going to happen 1658 * anytime soon on its runqueue, we try pushing it away now. 1659 */ 1660 static void task_woken_dl(struct rq *rq, struct task_struct *p) 1661 { 1662 if (!task_running(rq, p) && 1663 !test_tsk_need_resched(rq->curr) && 1664 p->nr_cpus_allowed > 1 && 1665 dl_task(rq->curr) && 1666 (rq->curr->nr_cpus_allowed < 2 || 1667 !dl_entity_preempt(&p->dl, &rq->curr->dl))) { 1668 push_dl_tasks(rq); 1669 } 1670 } 1671 1672 static void set_cpus_allowed_dl(struct task_struct *p, 1673 const struct cpumask *new_mask) 1674 { 1675 struct root_domain *src_rd; 1676 struct rq *rq; 1677 1678 BUG_ON(!dl_task(p)); 1679 1680 rq = task_rq(p); 1681 src_rd = rq->rd; 1682 /* 1683 * Migrating a SCHED_DEADLINE task between exclusive 1684 * cpusets (different root_domains) entails a bandwidth 1685 * update. We already made space for us in the destination 1686 * domain (see cpuset_can_attach()). 1687 */ 1688 if (!cpumask_intersects(src_rd->span, new_mask)) { 1689 struct dl_bw *src_dl_b; 1690 1691 src_dl_b = dl_bw_of(cpu_of(rq)); 1692 /* 1693 * We now free resources of the root_domain we are migrating 1694 * off. In the worst case, sched_setattr() may temporary fail 1695 * until we complete the update. 1696 */ 1697 raw_spin_lock(&src_dl_b->lock); 1698 __dl_clear(src_dl_b, p->dl.dl_bw); 1699 raw_spin_unlock(&src_dl_b->lock); 1700 } 1701 1702 set_cpus_allowed_common(p, new_mask); 1703 } 1704 1705 /* Assumes rq->lock is held */ 1706 static void rq_online_dl(struct rq *rq) 1707 { 1708 if (rq->dl.overloaded) 1709 dl_set_overload(rq); 1710 1711 cpudl_set_freecpu(&rq->rd->cpudl, rq->cpu); 1712 if (rq->dl.dl_nr_running > 0) 1713 cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr); 1714 } 1715 1716 /* Assumes rq->lock is held */ 1717 static void rq_offline_dl(struct rq *rq) 1718 { 1719 if (rq->dl.overloaded) 1720 dl_clear_overload(rq); 1721 1722 cpudl_clear(&rq->rd->cpudl, rq->cpu); 1723 cpudl_clear_freecpu(&rq->rd->cpudl, rq->cpu); 1724 } 1725 1726 void __init init_sched_dl_class(void) 1727 { 1728 unsigned int i; 1729 1730 for_each_possible_cpu(i) 1731 zalloc_cpumask_var_node(&per_cpu(local_cpu_mask_dl, i), 1732 GFP_KERNEL, cpu_to_node(i)); 1733 } 1734 1735 #endif /* CONFIG_SMP */ 1736 1737 static void switched_from_dl(struct rq *rq, struct task_struct *p) 1738 { 1739 /* 1740 * Start the deadline timer; if we switch back to dl before this we'll 1741 * continue consuming our current CBS slice. If we stay outside of 1742 * SCHED_DEADLINE until the deadline passes, the timer will reset the 1743 * task. 1744 */ 1745 if (!start_dl_timer(p)) 1746 __dl_clear_params(p); 1747 1748 /* 1749 * Since this might be the only -deadline task on the rq, 1750 * this is the right place to try to pull some other one 1751 * from an overloaded cpu, if any. 1752 */ 1753 if (!task_on_rq_queued(p) || rq->dl.dl_nr_running) 1754 return; 1755 1756 queue_pull_task(rq); 1757 } 1758 1759 /* 1760 * When switching to -deadline, we may overload the rq, then 1761 * we try to push someone off, if possible. 1762 */ 1763 static void switched_to_dl(struct rq *rq, struct task_struct *p) 1764 { 1765 1766 /* If p is not queued we will update its parameters at next wakeup. */ 1767 if (!task_on_rq_queued(p)) 1768 return; 1769 1770 /* 1771 * If p is boosted we already updated its params in 1772 * rt_mutex_setprio()->enqueue_task(..., ENQUEUE_REPLENISH), 1773 * p's deadline being now already after rq_clock(rq). 1774 */ 1775 if (dl_time_before(p->dl.deadline, rq_clock(rq))) 1776 setup_new_dl_entity(&p->dl); 1777 1778 if (rq->curr != p) { 1779 #ifdef CONFIG_SMP 1780 if (p->nr_cpus_allowed > 1 && rq->dl.overloaded) 1781 queue_push_tasks(rq); 1782 #endif 1783 if (dl_task(rq->curr)) 1784 check_preempt_curr_dl(rq, p, 0); 1785 else 1786 resched_curr(rq); 1787 } 1788 } 1789 1790 /* 1791 * If the scheduling parameters of a -deadline task changed, 1792 * a push or pull operation might be needed. 1793 */ 1794 static void prio_changed_dl(struct rq *rq, struct task_struct *p, 1795 int oldprio) 1796 { 1797 if (task_on_rq_queued(p) || rq->curr == p) { 1798 #ifdef CONFIG_SMP 1799 /* 1800 * This might be too much, but unfortunately 1801 * we don't have the old deadline value, and 1802 * we can't argue if the task is increasing 1803 * or lowering its prio, so... 1804 */ 1805 if (!rq->dl.overloaded) 1806 queue_pull_task(rq); 1807 1808 /* 1809 * If we now have a earlier deadline task than p, 1810 * then reschedule, provided p is still on this 1811 * runqueue. 1812 */ 1813 if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline)) 1814 resched_curr(rq); 1815 #else 1816 /* 1817 * Again, we don't know if p has a earlier 1818 * or later deadline, so let's blindly set a 1819 * (maybe not needed) rescheduling point. 1820 */ 1821 resched_curr(rq); 1822 #endif /* CONFIG_SMP */ 1823 } 1824 } 1825 1826 const struct sched_class dl_sched_class = { 1827 .next = &rt_sched_class, 1828 .enqueue_task = enqueue_task_dl, 1829 .dequeue_task = dequeue_task_dl, 1830 .yield_task = yield_task_dl, 1831 1832 .check_preempt_curr = check_preempt_curr_dl, 1833 1834 .pick_next_task = pick_next_task_dl, 1835 .put_prev_task = put_prev_task_dl, 1836 1837 #ifdef CONFIG_SMP 1838 .select_task_rq = select_task_rq_dl, 1839 .set_cpus_allowed = set_cpus_allowed_dl, 1840 .rq_online = rq_online_dl, 1841 .rq_offline = rq_offline_dl, 1842 .task_woken = task_woken_dl, 1843 #endif 1844 1845 .set_curr_task = set_curr_task_dl, 1846 .task_tick = task_tick_dl, 1847 .task_fork = task_fork_dl, 1848 .task_dead = task_dead_dl, 1849 1850 .prio_changed = prio_changed_dl, 1851 .switched_from = switched_from_dl, 1852 .switched_to = switched_to_dl, 1853 1854 .update_curr = update_curr_dl, 1855 }; 1856 1857 #ifdef CONFIG_SCHED_DEBUG 1858 extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq); 1859 1860 void print_dl_stats(struct seq_file *m, int cpu) 1861 { 1862 print_dl_rq(m, cpu, &cpu_rq(cpu)->dl); 1863 } 1864 #endif /* CONFIG_SCHED_DEBUG */ 1865