1 #include <linux/export.h> 2 #include <linux/sched.h> 3 #include <linux/tsacct_kern.h> 4 #include <linux/kernel_stat.h> 5 #include <linux/static_key.h> 6 #include <linux/context_tracking.h> 7 #include "sched.h" 8 #ifdef CONFIG_PARAVIRT 9 #include <asm/paravirt.h> 10 #endif 11 12 13 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 14 15 /* 16 * There are no locks covering percpu hardirq/softirq time. 17 * They are only modified in vtime_account, on corresponding CPU 18 * with interrupts disabled. So, writes are safe. 19 * They are read and saved off onto struct rq in update_rq_clock(). 20 * This may result in other CPU reading this CPU's irq time and can 21 * race with irq/vtime_account on this CPU. We would either get old 22 * or new value with a side effect of accounting a slice of irq time to wrong 23 * task when irq is in progress while we read rq->clock. That is a worthy 24 * compromise in place of having locks on each irq in account_system_time. 25 */ 26 DEFINE_PER_CPU(struct irqtime, cpu_irqtime); 27 28 static int sched_clock_irqtime; 29 30 void enable_sched_clock_irqtime(void) 31 { 32 sched_clock_irqtime = 1; 33 } 34 35 void disable_sched_clock_irqtime(void) 36 { 37 sched_clock_irqtime = 0; 38 } 39 40 /* 41 * Called before incrementing preempt_count on {soft,}irq_enter 42 * and before decrementing preempt_count on {soft,}irq_exit. 43 */ 44 void irqtime_account_irq(struct task_struct *curr) 45 { 46 struct irqtime *irqtime = this_cpu_ptr(&cpu_irqtime); 47 s64 delta; 48 int cpu; 49 50 if (!sched_clock_irqtime) 51 return; 52 53 cpu = smp_processor_id(); 54 delta = sched_clock_cpu(cpu) - irqtime->irq_start_time; 55 irqtime->irq_start_time += delta; 56 57 u64_stats_update_begin(&irqtime->sync); 58 /* 59 * We do not account for softirq time from ksoftirqd here. 60 * We want to continue accounting softirq time to ksoftirqd thread 61 * in that case, so as not to confuse scheduler with a special task 62 * that do not consume any time, but still wants to run. 63 */ 64 if (hardirq_count()) 65 irqtime->hardirq_time += delta; 66 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) 67 irqtime->softirq_time += delta; 68 69 u64_stats_update_end(&irqtime->sync); 70 } 71 EXPORT_SYMBOL_GPL(irqtime_account_irq); 72 73 static cputime_t irqtime_account_update(u64 irqtime, int idx, cputime_t maxtime) 74 { 75 u64 *cpustat = kcpustat_this_cpu->cpustat; 76 cputime_t irq_cputime; 77 78 irq_cputime = nsecs_to_cputime64(irqtime) - cpustat[idx]; 79 irq_cputime = min(irq_cputime, maxtime); 80 cpustat[idx] += irq_cputime; 81 82 return irq_cputime; 83 } 84 85 static cputime_t irqtime_account_hi_update(cputime_t maxtime) 86 { 87 return irqtime_account_update(__this_cpu_read(cpu_irqtime.hardirq_time), 88 CPUTIME_IRQ, maxtime); 89 } 90 91 static cputime_t irqtime_account_si_update(cputime_t maxtime) 92 { 93 return irqtime_account_update(__this_cpu_read(cpu_irqtime.softirq_time), 94 CPUTIME_SOFTIRQ, maxtime); 95 } 96 97 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 98 99 #define sched_clock_irqtime (0) 100 101 static cputime_t irqtime_account_hi_update(cputime_t dummy) 102 { 103 return 0; 104 } 105 106 static cputime_t irqtime_account_si_update(cputime_t dummy) 107 { 108 return 0; 109 } 110 111 #endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ 112 113 static inline void task_group_account_field(struct task_struct *p, int index, 114 u64 tmp) 115 { 116 /* 117 * Since all updates are sure to touch the root cgroup, we 118 * get ourselves ahead and touch it first. If the root cgroup 119 * is the only cgroup, then nothing else should be necessary. 120 * 121 */ 122 __this_cpu_add(kernel_cpustat.cpustat[index], tmp); 123 124 cpuacct_account_field(p, index, tmp); 125 } 126 127 /* 128 * Account user cpu time to a process. 129 * @p: the process that the cpu time gets accounted to 130 * @cputime: the cpu time spent in user space since the last update 131 * @cputime_scaled: cputime scaled by cpu frequency 132 */ 133 void account_user_time(struct task_struct *p, cputime_t cputime, 134 cputime_t cputime_scaled) 135 { 136 int index; 137 138 /* Add user time to process. */ 139 p->utime += cputime; 140 p->utimescaled += cputime_scaled; 141 account_group_user_time(p, cputime); 142 143 index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; 144 145 /* Add user time to cpustat. */ 146 task_group_account_field(p, index, (__force u64) cputime); 147 148 /* Account for user time used */ 149 acct_account_cputime(p); 150 } 151 152 /* 153 * Account guest cpu time to a process. 154 * @p: the process that the cpu time gets accounted to 155 * @cputime: the cpu time spent in virtual machine since the last update 156 * @cputime_scaled: cputime scaled by cpu frequency 157 */ 158 static void account_guest_time(struct task_struct *p, cputime_t cputime, 159 cputime_t cputime_scaled) 160 { 161 u64 *cpustat = kcpustat_this_cpu->cpustat; 162 163 /* Add guest time to process. */ 164 p->utime += cputime; 165 p->utimescaled += cputime_scaled; 166 account_group_user_time(p, cputime); 167 p->gtime += cputime; 168 169 /* Add guest time to cpustat. */ 170 if (task_nice(p) > 0) { 171 cpustat[CPUTIME_NICE] += (__force u64) cputime; 172 cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; 173 } else { 174 cpustat[CPUTIME_USER] += (__force u64) cputime; 175 cpustat[CPUTIME_GUEST] += (__force u64) cputime; 176 } 177 } 178 179 /* 180 * Account system cpu time to a process and desired cpustat field 181 * @p: the process that the cpu time gets accounted to 182 * @cputime: the cpu time spent in kernel space since the last update 183 * @cputime_scaled: cputime scaled by cpu frequency 184 * @target_cputime64: pointer to cpustat field that has to be updated 185 */ 186 static inline 187 void __account_system_time(struct task_struct *p, cputime_t cputime, 188 cputime_t cputime_scaled, int index) 189 { 190 /* Add system time to process. */ 191 p->stime += cputime; 192 p->stimescaled += cputime_scaled; 193 account_group_system_time(p, cputime); 194 195 /* Add system time to cpustat. */ 196 task_group_account_field(p, index, (__force u64) cputime); 197 198 /* Account for system time used */ 199 acct_account_cputime(p); 200 } 201 202 /* 203 * Account system cpu time to a process. 204 * @p: the process that the cpu time gets accounted to 205 * @hardirq_offset: the offset to subtract from hardirq_count() 206 * @cputime: the cpu time spent in kernel space since the last update 207 * @cputime_scaled: cputime scaled by cpu frequency 208 */ 209 void account_system_time(struct task_struct *p, int hardirq_offset, 210 cputime_t cputime, cputime_t cputime_scaled) 211 { 212 int index; 213 214 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 215 account_guest_time(p, cputime, cputime_scaled); 216 return; 217 } 218 219 if (hardirq_count() - hardirq_offset) 220 index = CPUTIME_IRQ; 221 else if (in_serving_softirq()) 222 index = CPUTIME_SOFTIRQ; 223 else 224 index = CPUTIME_SYSTEM; 225 226 __account_system_time(p, cputime, cputime_scaled, index); 227 } 228 229 /* 230 * Account for involuntary wait time. 231 * @cputime: the cpu time spent in involuntary wait 232 */ 233 void account_steal_time(cputime_t cputime) 234 { 235 u64 *cpustat = kcpustat_this_cpu->cpustat; 236 237 cpustat[CPUTIME_STEAL] += (__force u64) cputime; 238 } 239 240 /* 241 * Account for idle time. 242 * @cputime: the cpu time spent in idle wait 243 */ 244 void account_idle_time(cputime_t cputime) 245 { 246 u64 *cpustat = kcpustat_this_cpu->cpustat; 247 struct rq *rq = this_rq(); 248 249 if (atomic_read(&rq->nr_iowait) > 0) 250 cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; 251 else 252 cpustat[CPUTIME_IDLE] += (__force u64) cputime; 253 } 254 255 /* 256 * When a guest is interrupted for a longer amount of time, missed clock 257 * ticks are not redelivered later. Due to that, this function may on 258 * occasion account more time than the calling functions think elapsed. 259 */ 260 static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) 261 { 262 #ifdef CONFIG_PARAVIRT 263 if (static_key_false(¶virt_steal_enabled)) { 264 cputime_t steal_cputime; 265 u64 steal; 266 267 steal = paravirt_steal_clock(smp_processor_id()); 268 steal -= this_rq()->prev_steal_time; 269 270 steal_cputime = min(nsecs_to_cputime(steal), maxtime); 271 account_steal_time(steal_cputime); 272 this_rq()->prev_steal_time += cputime_to_nsecs(steal_cputime); 273 274 return steal_cputime; 275 } 276 #endif 277 return 0; 278 } 279 280 /* 281 * Account how much elapsed time was spent in steal, irq, or softirq time. 282 */ 283 static inline cputime_t account_other_time(cputime_t max) 284 { 285 cputime_t accounted; 286 287 /* Shall be converted to a lockdep-enabled lightweight check */ 288 WARN_ON_ONCE(!irqs_disabled()); 289 290 accounted = steal_account_process_time(max); 291 292 if (accounted < max) 293 accounted += irqtime_account_hi_update(max - accounted); 294 295 if (accounted < max) 296 accounted += irqtime_account_si_update(max - accounted); 297 298 return accounted; 299 } 300 301 #ifdef CONFIG_64BIT 302 static inline u64 read_sum_exec_runtime(struct task_struct *t) 303 { 304 return t->se.sum_exec_runtime; 305 } 306 #else 307 static u64 read_sum_exec_runtime(struct task_struct *t) 308 { 309 u64 ns; 310 struct rq_flags rf; 311 struct rq *rq; 312 313 rq = task_rq_lock(t, &rf); 314 ns = t->se.sum_exec_runtime; 315 task_rq_unlock(rq, t, &rf); 316 317 return ns; 318 } 319 #endif 320 321 /* 322 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live 323 * tasks (sum on group iteration) belonging to @tsk's group. 324 */ 325 void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) 326 { 327 struct signal_struct *sig = tsk->signal; 328 cputime_t utime, stime; 329 struct task_struct *t; 330 unsigned int seq, nextseq; 331 unsigned long flags; 332 333 /* 334 * Update current task runtime to account pending time since last 335 * scheduler action or thread_group_cputime() call. This thread group 336 * might have other running tasks on different CPUs, but updating 337 * their runtime can affect syscall performance, so we skip account 338 * those pending times and rely only on values updated on tick or 339 * other scheduler action. 340 */ 341 if (same_thread_group(current, tsk)) 342 (void) task_sched_runtime(current); 343 344 rcu_read_lock(); 345 /* Attempt a lockless read on the first round. */ 346 nextseq = 0; 347 do { 348 seq = nextseq; 349 flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); 350 times->utime = sig->utime; 351 times->stime = sig->stime; 352 times->sum_exec_runtime = sig->sum_sched_runtime; 353 354 for_each_thread(tsk, t) { 355 task_cputime(t, &utime, &stime); 356 times->utime += utime; 357 times->stime += stime; 358 times->sum_exec_runtime += read_sum_exec_runtime(t); 359 } 360 /* If lockless access failed, take the lock. */ 361 nextseq = 1; 362 } while (need_seqretry(&sig->stats_lock, seq)); 363 done_seqretry_irqrestore(&sig->stats_lock, seq, flags); 364 rcu_read_unlock(); 365 } 366 367 #ifdef CONFIG_IRQ_TIME_ACCOUNTING 368 /* 369 * Account a tick to a process and cpustat 370 * @p: the process that the cpu time gets accounted to 371 * @user_tick: is the tick from userspace 372 * @rq: the pointer to rq 373 * 374 * Tick demultiplexing follows the order 375 * - pending hardirq update 376 * - pending softirq update 377 * - user_time 378 * - idle_time 379 * - system time 380 * - check for guest_time 381 * - else account as system_time 382 * 383 * Check for hardirq is done both for system and user time as there is 384 * no timer going off while we are on hardirq and hence we may never get an 385 * opportunity to update it solely in system time. 386 * p->stime and friends are only updated on system time and not on irq 387 * softirq as those do not count in task exec_runtime any more. 388 */ 389 static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 390 struct rq *rq, int ticks) 391 { 392 u64 cputime = (__force u64) cputime_one_jiffy * ticks; 393 cputime_t scaled, other; 394 395 /* 396 * When returning from idle, many ticks can get accounted at 397 * once, including some ticks of steal, irq, and softirq time. 398 * Subtract those ticks from the amount of time accounted to 399 * idle, or potentially user or system time. Due to rounding, 400 * other time can exceed ticks occasionally. 401 */ 402 other = account_other_time(ULONG_MAX); 403 if (other >= cputime) 404 return; 405 cputime -= other; 406 scaled = cputime_to_scaled(cputime); 407 408 if (this_cpu_ksoftirqd() == p) { 409 /* 410 * ksoftirqd time do not get accounted in cpu_softirq_time. 411 * So, we have to handle it separately here. 412 * Also, p->stime needs to be updated for ksoftirqd. 413 */ 414 __account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ); 415 } else if (user_tick) { 416 account_user_time(p, cputime, scaled); 417 } else if (p == rq->idle) { 418 account_idle_time(cputime); 419 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 420 account_guest_time(p, cputime, scaled); 421 } else { 422 __account_system_time(p, cputime, scaled, CPUTIME_SYSTEM); 423 } 424 } 425 426 static void irqtime_account_idle_ticks(int ticks) 427 { 428 struct rq *rq = this_rq(); 429 430 irqtime_account_process_tick(current, 0, rq, ticks); 431 } 432 #else /* CONFIG_IRQ_TIME_ACCOUNTING */ 433 static inline void irqtime_account_idle_ticks(int ticks) {} 434 static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 435 struct rq *rq, int nr_ticks) {} 436 #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 437 438 /* 439 * Use precise platform statistics if available: 440 */ 441 #ifdef CONFIG_VIRT_CPU_ACCOUNTING 442 443 #ifndef __ARCH_HAS_VTIME_TASK_SWITCH 444 void vtime_common_task_switch(struct task_struct *prev) 445 { 446 if (is_idle_task(prev)) 447 vtime_account_idle(prev); 448 else 449 vtime_account_system(prev); 450 451 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 452 vtime_account_user(prev); 453 #endif 454 arch_vtime_task_switch(prev); 455 } 456 #endif 457 458 #endif /* CONFIG_VIRT_CPU_ACCOUNTING */ 459 460 461 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 462 /* 463 * Archs that account the whole time spent in the idle task 464 * (outside irq) as idle time can rely on this and just implement 465 * vtime_account_system() and vtime_account_idle(). Archs that 466 * have other meaning of the idle time (s390 only includes the 467 * time spent by the CPU when it's in low power mode) must override 468 * vtime_account(). 469 */ 470 #ifndef __ARCH_HAS_VTIME_ACCOUNT 471 void vtime_account_irq_enter(struct task_struct *tsk) 472 { 473 if (!in_interrupt() && is_idle_task(tsk)) 474 vtime_account_idle(tsk); 475 else 476 vtime_account_system(tsk); 477 } 478 EXPORT_SYMBOL_GPL(vtime_account_irq_enter); 479 #endif /* __ARCH_HAS_VTIME_ACCOUNT */ 480 481 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 482 { 483 *ut = p->utime; 484 *st = p->stime; 485 } 486 EXPORT_SYMBOL_GPL(task_cputime_adjusted); 487 488 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 489 { 490 struct task_cputime cputime; 491 492 thread_group_cputime(p, &cputime); 493 494 *ut = cputime.utime; 495 *st = cputime.stime; 496 } 497 #else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 498 /* 499 * Account a single tick of cpu time. 500 * @p: the process that the cpu time gets accounted to 501 * @user_tick: indicates if the tick is a user or a system tick 502 */ 503 void account_process_tick(struct task_struct *p, int user_tick) 504 { 505 cputime_t cputime, scaled, steal; 506 struct rq *rq = this_rq(); 507 508 if (vtime_accounting_cpu_enabled()) 509 return; 510 511 if (sched_clock_irqtime) { 512 irqtime_account_process_tick(p, user_tick, rq, 1); 513 return; 514 } 515 516 cputime = cputime_one_jiffy; 517 steal = steal_account_process_time(ULONG_MAX); 518 519 if (steal >= cputime) 520 return; 521 522 cputime -= steal; 523 scaled = cputime_to_scaled(cputime); 524 525 if (user_tick) 526 account_user_time(p, cputime, scaled); 527 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 528 account_system_time(p, HARDIRQ_OFFSET, cputime, scaled); 529 else 530 account_idle_time(cputime); 531 } 532 533 /* 534 * Account multiple ticks of idle time. 535 * @ticks: number of stolen ticks 536 */ 537 void account_idle_ticks(unsigned long ticks) 538 { 539 cputime_t cputime, steal; 540 541 if (sched_clock_irqtime) { 542 irqtime_account_idle_ticks(ticks); 543 return; 544 } 545 546 cputime = jiffies_to_cputime(ticks); 547 steal = steal_account_process_time(ULONG_MAX); 548 549 if (steal >= cputime) 550 return; 551 552 cputime -= steal; 553 account_idle_time(cputime); 554 } 555 556 /* 557 * Perform (stime * rtime) / total, but avoid multiplication overflow by 558 * loosing precision when the numbers are big. 559 */ 560 static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) 561 { 562 u64 scaled; 563 564 for (;;) { 565 /* Make sure "rtime" is the bigger of stime/rtime */ 566 if (stime > rtime) 567 swap(rtime, stime); 568 569 /* Make sure 'total' fits in 32 bits */ 570 if (total >> 32) 571 goto drop_precision; 572 573 /* Does rtime (and thus stime) fit in 32 bits? */ 574 if (!(rtime >> 32)) 575 break; 576 577 /* Can we just balance rtime/stime rather than dropping bits? */ 578 if (stime >> 31) 579 goto drop_precision; 580 581 /* We can grow stime and shrink rtime and try to make them both fit */ 582 stime <<= 1; 583 rtime >>= 1; 584 continue; 585 586 drop_precision: 587 /* We drop from rtime, it has more bits than stime */ 588 rtime >>= 1; 589 total >>= 1; 590 } 591 592 /* 593 * Make sure gcc understands that this is a 32x32->64 multiply, 594 * followed by a 64/32->64 divide. 595 */ 596 scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); 597 return (__force cputime_t) scaled; 598 } 599 600 /* 601 * Adjust tick based cputime random precision against scheduler runtime 602 * accounting. 603 * 604 * Tick based cputime accounting depend on random scheduling timeslices of a 605 * task to be interrupted or not by the timer. Depending on these 606 * circumstances, the number of these interrupts may be over or 607 * under-optimistic, matching the real user and system cputime with a variable 608 * precision. 609 * 610 * Fix this by scaling these tick based values against the total runtime 611 * accounted by the CFS scheduler. 612 * 613 * This code provides the following guarantees: 614 * 615 * stime + utime == rtime 616 * stime_i+1 >= stime_i, utime_i+1 >= utime_i 617 * 618 * Assuming that rtime_i+1 >= rtime_i. 619 */ 620 static void cputime_adjust(struct task_cputime *curr, 621 struct prev_cputime *prev, 622 cputime_t *ut, cputime_t *st) 623 { 624 cputime_t rtime, stime, utime; 625 unsigned long flags; 626 627 /* Serialize concurrent callers such that we can honour our guarantees */ 628 raw_spin_lock_irqsave(&prev->lock, flags); 629 rtime = nsecs_to_cputime(curr->sum_exec_runtime); 630 631 /* 632 * This is possible under two circumstances: 633 * - rtime isn't monotonic after all (a bug); 634 * - we got reordered by the lock. 635 * 636 * In both cases this acts as a filter such that the rest of the code 637 * can assume it is monotonic regardless of anything else. 638 */ 639 if (prev->stime + prev->utime >= rtime) 640 goto out; 641 642 stime = curr->stime; 643 utime = curr->utime; 644 645 /* 646 * If either stime or both stime and utime are 0, assume all runtime is 647 * userspace. Once a task gets some ticks, the monotonicy code at 648 * 'update' will ensure things converge to the observed ratio. 649 */ 650 if (stime == 0) { 651 utime = rtime; 652 goto update; 653 } 654 655 if (utime == 0) { 656 stime = rtime; 657 goto update; 658 } 659 660 stime = scale_stime((__force u64)stime, (__force u64)rtime, 661 (__force u64)(stime + utime)); 662 663 update: 664 /* 665 * Make sure stime doesn't go backwards; this preserves monotonicity 666 * for utime because rtime is monotonic. 667 * 668 * utime_i+1 = rtime_i+1 - stime_i 669 * = rtime_i+1 - (rtime_i - utime_i) 670 * = (rtime_i+1 - rtime_i) + utime_i 671 * >= utime_i 672 */ 673 if (stime < prev->stime) 674 stime = prev->stime; 675 utime = rtime - stime; 676 677 /* 678 * Make sure utime doesn't go backwards; this still preserves 679 * monotonicity for stime, analogous argument to above. 680 */ 681 if (utime < prev->utime) { 682 utime = prev->utime; 683 stime = rtime - utime; 684 } 685 686 prev->stime = stime; 687 prev->utime = utime; 688 out: 689 *ut = prev->utime; 690 *st = prev->stime; 691 raw_spin_unlock_irqrestore(&prev->lock, flags); 692 } 693 694 void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 695 { 696 struct task_cputime cputime = { 697 .sum_exec_runtime = p->se.sum_exec_runtime, 698 }; 699 700 task_cputime(p, &cputime.utime, &cputime.stime); 701 cputime_adjust(&cputime, &p->prev_cputime, ut, st); 702 } 703 EXPORT_SYMBOL_GPL(task_cputime_adjusted); 704 705 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 706 { 707 struct task_cputime cputime; 708 709 thread_group_cputime(p, &cputime); 710 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); 711 } 712 #endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 713 714 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 715 static cputime_t vtime_delta(struct task_struct *tsk) 716 { 717 unsigned long now = READ_ONCE(jiffies); 718 719 if (time_before(now, (unsigned long)tsk->vtime_snap)) 720 return 0; 721 722 return jiffies_to_cputime(now - tsk->vtime_snap); 723 } 724 725 static cputime_t get_vtime_delta(struct task_struct *tsk) 726 { 727 unsigned long now = READ_ONCE(jiffies); 728 cputime_t delta, other; 729 730 /* 731 * Unlike tick based timing, vtime based timing never has lost 732 * ticks, and no need for steal time accounting to make up for 733 * lost ticks. Vtime accounts a rounded version of actual 734 * elapsed time. Limit account_other_time to prevent rounding 735 * errors from causing elapsed vtime to go negative. 736 */ 737 delta = jiffies_to_cputime(now - tsk->vtime_snap); 738 other = account_other_time(delta); 739 WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); 740 tsk->vtime_snap = now; 741 742 return delta - other; 743 } 744 745 static void __vtime_account_system(struct task_struct *tsk) 746 { 747 cputime_t delta_cpu = get_vtime_delta(tsk); 748 749 account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); 750 } 751 752 void vtime_account_system(struct task_struct *tsk) 753 { 754 if (!vtime_delta(tsk)) 755 return; 756 757 write_seqcount_begin(&tsk->vtime_seqcount); 758 __vtime_account_system(tsk); 759 write_seqcount_end(&tsk->vtime_seqcount); 760 } 761 762 void vtime_account_user(struct task_struct *tsk) 763 { 764 cputime_t delta_cpu; 765 766 write_seqcount_begin(&tsk->vtime_seqcount); 767 tsk->vtime_snap_whence = VTIME_SYS; 768 if (vtime_delta(tsk)) { 769 delta_cpu = get_vtime_delta(tsk); 770 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); 771 } 772 write_seqcount_end(&tsk->vtime_seqcount); 773 } 774 775 void vtime_user_enter(struct task_struct *tsk) 776 { 777 write_seqcount_begin(&tsk->vtime_seqcount); 778 if (vtime_delta(tsk)) 779 __vtime_account_system(tsk); 780 tsk->vtime_snap_whence = VTIME_USER; 781 write_seqcount_end(&tsk->vtime_seqcount); 782 } 783 784 void vtime_guest_enter(struct task_struct *tsk) 785 { 786 /* 787 * The flags must be updated under the lock with 788 * the vtime_snap flush and update. 789 * That enforces a right ordering and update sequence 790 * synchronization against the reader (task_gtime()) 791 * that can thus safely catch up with a tickless delta. 792 */ 793 write_seqcount_begin(&tsk->vtime_seqcount); 794 if (vtime_delta(tsk)) 795 __vtime_account_system(tsk); 796 current->flags |= PF_VCPU; 797 write_seqcount_end(&tsk->vtime_seqcount); 798 } 799 EXPORT_SYMBOL_GPL(vtime_guest_enter); 800 801 void vtime_guest_exit(struct task_struct *tsk) 802 { 803 write_seqcount_begin(&tsk->vtime_seqcount); 804 __vtime_account_system(tsk); 805 current->flags &= ~PF_VCPU; 806 write_seqcount_end(&tsk->vtime_seqcount); 807 } 808 EXPORT_SYMBOL_GPL(vtime_guest_exit); 809 810 void vtime_account_idle(struct task_struct *tsk) 811 { 812 cputime_t delta_cpu = get_vtime_delta(tsk); 813 814 account_idle_time(delta_cpu); 815 } 816 817 void arch_vtime_task_switch(struct task_struct *prev) 818 { 819 write_seqcount_begin(&prev->vtime_seqcount); 820 prev->vtime_snap_whence = VTIME_INACTIVE; 821 write_seqcount_end(&prev->vtime_seqcount); 822 823 write_seqcount_begin(¤t->vtime_seqcount); 824 current->vtime_snap_whence = VTIME_SYS; 825 current->vtime_snap = jiffies; 826 write_seqcount_end(¤t->vtime_seqcount); 827 } 828 829 void vtime_init_idle(struct task_struct *t, int cpu) 830 { 831 unsigned long flags; 832 833 local_irq_save(flags); 834 write_seqcount_begin(&t->vtime_seqcount); 835 t->vtime_snap_whence = VTIME_SYS; 836 t->vtime_snap = jiffies; 837 write_seqcount_end(&t->vtime_seqcount); 838 local_irq_restore(flags); 839 } 840 841 cputime_t task_gtime(struct task_struct *t) 842 { 843 unsigned int seq; 844 cputime_t gtime; 845 846 if (!vtime_accounting_enabled()) 847 return t->gtime; 848 849 do { 850 seq = read_seqcount_begin(&t->vtime_seqcount); 851 852 gtime = t->gtime; 853 if (t->vtime_snap_whence == VTIME_SYS && t->flags & PF_VCPU) 854 gtime += vtime_delta(t); 855 856 } while (read_seqcount_retry(&t->vtime_seqcount, seq)); 857 858 return gtime; 859 } 860 861 /* 862 * Fetch cputime raw values from fields of task_struct and 863 * add up the pending nohz execution time since the last 864 * cputime snapshot. 865 */ 866 static void 867 fetch_task_cputime(struct task_struct *t, 868 cputime_t *u_dst, cputime_t *s_dst, 869 cputime_t *u_src, cputime_t *s_src, 870 cputime_t *udelta, cputime_t *sdelta) 871 { 872 unsigned int seq; 873 unsigned long long delta; 874 875 do { 876 *udelta = 0; 877 *sdelta = 0; 878 879 seq = read_seqcount_begin(&t->vtime_seqcount); 880 881 if (u_dst) 882 *u_dst = *u_src; 883 if (s_dst) 884 *s_dst = *s_src; 885 886 /* Task is sleeping, nothing to add */ 887 if (t->vtime_snap_whence == VTIME_INACTIVE || 888 is_idle_task(t)) 889 continue; 890 891 delta = vtime_delta(t); 892 893 /* 894 * Task runs either in user or kernel space, add pending nohz time to 895 * the right place. 896 */ 897 if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { 898 *udelta = delta; 899 } else { 900 if (t->vtime_snap_whence == VTIME_SYS) 901 *sdelta = delta; 902 } 903 } while (read_seqcount_retry(&t->vtime_seqcount, seq)); 904 } 905 906 907 void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) 908 { 909 cputime_t udelta, sdelta; 910 911 if (!vtime_accounting_enabled()) { 912 if (utime) 913 *utime = t->utime; 914 if (stime) 915 *stime = t->stime; 916 return; 917 } 918 919 fetch_task_cputime(t, utime, stime, &t->utime, 920 &t->stime, &udelta, &sdelta); 921 if (utime) 922 *utime += udelta; 923 if (stime) 924 *stime += sdelta; 925 } 926 927 void task_cputime_scaled(struct task_struct *t, 928 cputime_t *utimescaled, cputime_t *stimescaled) 929 { 930 cputime_t udelta, sdelta; 931 932 if (!vtime_accounting_enabled()) { 933 if (utimescaled) 934 *utimescaled = t->utimescaled; 935 if (stimescaled) 936 *stimescaled = t->stimescaled; 937 return; 938 } 939 940 fetch_task_cputime(t, utimescaled, stimescaled, 941 &t->utimescaled, &t->stimescaled, &udelta, &sdelta); 942 if (utimescaled) 943 *utimescaled += cputime_to_scaled(udelta); 944 if (stimescaled) 945 *stimescaled += cputime_to_scaled(sdelta); 946 } 947 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 948