1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * kernel/sched/debug.c 4 * 5 * Print the CFS rbtree and other debugging details 6 * 7 * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar 8 */ 9 #include <linux/debugfs.h> 10 #include <linux/nmi.h> 11 #include "sched.h" 12 13 /* 14 * This allows printing both to /sys/kernel/debug/sched/debug and 15 * to the console 16 */ 17 #define SEQ_printf(m, x...) \ 18 do { \ 19 if (m) \ 20 seq_printf(m, x); \ 21 else \ 22 pr_cont(x); \ 23 } while (0) 24 25 /* 26 * Ease the printing of nsec fields: 27 */ 28 static long long nsec_high(unsigned long long nsec) 29 { 30 if ((long long)nsec < 0) { 31 nsec = -nsec; 32 do_div(nsec, 1000000); 33 return -nsec; 34 } 35 do_div(nsec, 1000000); 36 37 return nsec; 38 } 39 40 static unsigned long nsec_low(unsigned long long nsec) 41 { 42 if ((long long)nsec < 0) 43 nsec = -nsec; 44 45 return do_div(nsec, 1000000); 46 } 47 48 #define SPLIT_NS(x) nsec_high(x), nsec_low(x) 49 50 #define SCHED_FEAT(name, enabled) \ 51 #name , 52 53 static const char * const sched_feat_names[] = { 54 #include "features.h" 55 }; 56 57 #undef SCHED_FEAT 58 59 static int sched_feat_show(struct seq_file *m, void *v) 60 { 61 int i; 62 63 for (i = 0; i < __SCHED_FEAT_NR; i++) { 64 if (!(sysctl_sched_features & (1UL << i))) 65 seq_puts(m, "NO_"); 66 seq_printf(m, "%s ", sched_feat_names[i]); 67 } 68 seq_puts(m, "\n"); 69 70 return 0; 71 } 72 73 #ifdef CONFIG_JUMP_LABEL 74 75 #define jump_label_key__true STATIC_KEY_INIT_TRUE 76 #define jump_label_key__false STATIC_KEY_INIT_FALSE 77 78 #define SCHED_FEAT(name, enabled) \ 79 jump_label_key__##enabled , 80 81 struct static_key sched_feat_keys[__SCHED_FEAT_NR] = { 82 #include "features.h" 83 }; 84 85 #undef SCHED_FEAT 86 87 static void sched_feat_disable(int i) 88 { 89 static_key_disable_cpuslocked(&sched_feat_keys[i]); 90 } 91 92 static void sched_feat_enable(int i) 93 { 94 static_key_enable_cpuslocked(&sched_feat_keys[i]); 95 } 96 #else /* !CONFIG_JUMP_LABEL: */ 97 static void sched_feat_disable(int i) { }; 98 static void sched_feat_enable(int i) { }; 99 #endif /* !CONFIG_JUMP_LABEL */ 100 101 static int sched_feat_set(char *cmp) 102 { 103 int i; 104 int neg = 0; 105 106 if (strncmp(cmp, "NO_", 3) == 0) { 107 neg = 1; 108 cmp += 3; 109 } 110 111 i = match_string(sched_feat_names, __SCHED_FEAT_NR, cmp); 112 if (i < 0) 113 return i; 114 115 if (neg) { 116 sysctl_sched_features &= ~(1UL << i); 117 sched_feat_disable(i); 118 } else { 119 sysctl_sched_features |= (1UL << i); 120 sched_feat_enable(i); 121 } 122 123 return 0; 124 } 125 126 static ssize_t 127 sched_feat_write(struct file *filp, const char __user *ubuf, 128 size_t cnt, loff_t *ppos) 129 { 130 char buf[64]; 131 char *cmp; 132 int ret; 133 struct inode *inode; 134 135 if (cnt > 63) 136 cnt = 63; 137 138 if (copy_from_user(&buf, ubuf, cnt)) 139 return -EFAULT; 140 141 buf[cnt] = 0; 142 cmp = strstrip(buf); 143 144 /* Ensure the static_key remains in a consistent state */ 145 inode = file_inode(filp); 146 cpus_read_lock(); 147 inode_lock(inode); 148 ret = sched_feat_set(cmp); 149 inode_unlock(inode); 150 cpus_read_unlock(); 151 if (ret < 0) 152 return ret; 153 154 *ppos += cnt; 155 156 return cnt; 157 } 158 159 static int sched_feat_open(struct inode *inode, struct file *filp) 160 { 161 return single_open(filp, sched_feat_show, NULL); 162 } 163 164 static const struct file_operations sched_feat_fops = { 165 .open = sched_feat_open, 166 .write = sched_feat_write, 167 .read = seq_read, 168 .llseek = seq_lseek, 169 .release = single_release, 170 }; 171 172 static ssize_t sched_scaling_write(struct file *filp, const char __user *ubuf, 173 size_t cnt, loff_t *ppos) 174 { 175 unsigned int scaling; 176 int ret; 177 178 ret = kstrtouint_from_user(ubuf, cnt, 10, &scaling); 179 if (ret) 180 return ret; 181 182 if (scaling >= SCHED_TUNABLESCALING_END) 183 return -EINVAL; 184 185 sysctl_sched_tunable_scaling = scaling; 186 if (sched_update_scaling()) 187 return -EINVAL; 188 189 *ppos += cnt; 190 return cnt; 191 } 192 193 static int sched_scaling_show(struct seq_file *m, void *v) 194 { 195 seq_printf(m, "%d\n", sysctl_sched_tunable_scaling); 196 return 0; 197 } 198 199 static int sched_scaling_open(struct inode *inode, struct file *filp) 200 { 201 return single_open(filp, sched_scaling_show, NULL); 202 } 203 204 static const struct file_operations sched_scaling_fops = { 205 .open = sched_scaling_open, 206 .write = sched_scaling_write, 207 .read = seq_read, 208 .llseek = seq_lseek, 209 .release = single_release, 210 }; 211 212 #ifdef CONFIG_PREEMPT_DYNAMIC 213 214 static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, 215 size_t cnt, loff_t *ppos) 216 { 217 char buf[16]; 218 int mode; 219 220 if (cnt > 15) 221 cnt = 15; 222 223 if (copy_from_user(&buf, ubuf, cnt)) 224 return -EFAULT; 225 226 buf[cnt] = 0; 227 mode = sched_dynamic_mode(strstrip(buf)); 228 if (mode < 0) 229 return mode; 230 231 sched_dynamic_update(mode); 232 233 *ppos += cnt; 234 235 return cnt; 236 } 237 238 static int sched_dynamic_show(struct seq_file *m, void *v) 239 { 240 int i = (IS_ENABLED(CONFIG_PREEMPT_RT) || IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY)) * 2; 241 int j; 242 243 /* Count entries in NULL terminated preempt_modes */ 244 for (j = 0; preempt_modes[j]; j++) 245 ; 246 j -= !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY); 247 248 for (; i < j; i++) { 249 if (preempt_dynamic_mode == i) 250 seq_puts(m, "("); 251 seq_puts(m, preempt_modes[i]); 252 if (preempt_dynamic_mode == i) 253 seq_puts(m, ")"); 254 255 seq_puts(m, " "); 256 } 257 258 seq_puts(m, "\n"); 259 return 0; 260 } 261 262 static int sched_dynamic_open(struct inode *inode, struct file *filp) 263 { 264 return single_open(filp, sched_dynamic_show, NULL); 265 } 266 267 static const struct file_operations sched_dynamic_fops = { 268 .open = sched_dynamic_open, 269 .write = sched_dynamic_write, 270 .read = seq_read, 271 .llseek = seq_lseek, 272 .release = single_release, 273 }; 274 275 #endif /* CONFIG_PREEMPT_DYNAMIC */ 276 277 __read_mostly bool sched_debug_verbose; 278 279 static struct dentry *sd_dentry; 280 281 282 static ssize_t sched_verbose_write(struct file *filp, const char __user *ubuf, 283 size_t cnt, loff_t *ppos) 284 { 285 ssize_t result; 286 bool orig; 287 288 cpus_read_lock(); 289 sched_domains_mutex_lock(); 290 291 orig = sched_debug_verbose; 292 result = debugfs_write_file_bool(filp, ubuf, cnt, ppos); 293 294 if (sched_debug_verbose && !orig) 295 update_sched_domain_debugfs(); 296 else if (!sched_debug_verbose && orig) { 297 debugfs_remove(sd_dentry); 298 sd_dentry = NULL; 299 } 300 301 sched_domains_mutex_unlock(); 302 cpus_read_unlock(); 303 304 return result; 305 } 306 307 static const struct file_operations sched_verbose_fops = { 308 .read = debugfs_read_file_bool, 309 .write = sched_verbose_write, 310 .open = simple_open, 311 .llseek = default_llseek, 312 }; 313 314 static const struct seq_operations sched_debug_sops; 315 316 static int sched_debug_open(struct inode *inode, struct file *filp) 317 { 318 return seq_open(filp, &sched_debug_sops); 319 } 320 321 static const struct file_operations sched_debug_fops = { 322 .open = sched_debug_open, 323 .read = seq_read, 324 .llseek = seq_lseek, 325 .release = seq_release, 326 }; 327 328 enum dl_param { 329 DL_RUNTIME = 0, 330 DL_PERIOD, 331 }; 332 333 static unsigned long fair_server_period_max = (1UL << 22) * NSEC_PER_USEC; /* ~4 seconds */ 334 static unsigned long fair_server_period_min = (100) * NSEC_PER_USEC; /* 100 us */ 335 336 static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubuf, 337 size_t cnt, loff_t *ppos, enum dl_param param) 338 { 339 long cpu = (long) ((struct seq_file *) filp->private_data)->private; 340 struct rq *rq = cpu_rq(cpu); 341 u64 runtime, period; 342 size_t err; 343 int retval; 344 u64 value; 345 346 err = kstrtoull_from_user(ubuf, cnt, 10, &value); 347 if (err) 348 return err; 349 350 scoped_guard (rq_lock_irqsave, rq) { 351 runtime = rq->fair_server.dl_runtime; 352 period = rq->fair_server.dl_period; 353 354 switch (param) { 355 case DL_RUNTIME: 356 if (runtime == value) 357 break; 358 runtime = value; 359 break; 360 case DL_PERIOD: 361 if (value == period) 362 break; 363 period = value; 364 break; 365 } 366 367 if (runtime > period || 368 period > fair_server_period_max || 369 period < fair_server_period_min) { 370 return -EINVAL; 371 } 372 373 update_rq_clock(rq); 374 dl_server_stop(&rq->fair_server); 375 376 retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0); 377 if (retval) 378 cnt = retval; 379 380 if (!runtime) 381 printk_deferred("Fair server disabled in CPU %d, system may crash due to starvation.\n", 382 cpu_of(rq)); 383 384 if (rq->cfs.h_nr_queued) 385 dl_server_start(&rq->fair_server); 386 } 387 388 *ppos += cnt; 389 return cnt; 390 } 391 392 static size_t sched_fair_server_show(struct seq_file *m, void *v, enum dl_param param) 393 { 394 unsigned long cpu = (unsigned long) m->private; 395 struct rq *rq = cpu_rq(cpu); 396 u64 value; 397 398 switch (param) { 399 case DL_RUNTIME: 400 value = rq->fair_server.dl_runtime; 401 break; 402 case DL_PERIOD: 403 value = rq->fair_server.dl_period; 404 break; 405 } 406 407 seq_printf(m, "%llu\n", value); 408 return 0; 409 410 } 411 412 static ssize_t 413 sched_fair_server_runtime_write(struct file *filp, const char __user *ubuf, 414 size_t cnt, loff_t *ppos) 415 { 416 return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_RUNTIME); 417 } 418 419 static int sched_fair_server_runtime_show(struct seq_file *m, void *v) 420 { 421 return sched_fair_server_show(m, v, DL_RUNTIME); 422 } 423 424 static int sched_fair_server_runtime_open(struct inode *inode, struct file *filp) 425 { 426 return single_open(filp, sched_fair_server_runtime_show, inode->i_private); 427 } 428 429 static const struct file_operations fair_server_runtime_fops = { 430 .open = sched_fair_server_runtime_open, 431 .write = sched_fair_server_runtime_write, 432 .read = seq_read, 433 .llseek = seq_lseek, 434 .release = single_release, 435 }; 436 437 static ssize_t 438 sched_fair_server_period_write(struct file *filp, const char __user *ubuf, 439 size_t cnt, loff_t *ppos) 440 { 441 return sched_fair_server_write(filp, ubuf, cnt, ppos, DL_PERIOD); 442 } 443 444 static int sched_fair_server_period_show(struct seq_file *m, void *v) 445 { 446 return sched_fair_server_show(m, v, DL_PERIOD); 447 } 448 449 static int sched_fair_server_period_open(struct inode *inode, struct file *filp) 450 { 451 return single_open(filp, sched_fair_server_period_show, inode->i_private); 452 } 453 454 static const struct file_operations fair_server_period_fops = { 455 .open = sched_fair_server_period_open, 456 .write = sched_fair_server_period_write, 457 .read = seq_read, 458 .llseek = seq_lseek, 459 .release = single_release, 460 }; 461 462 static struct dentry *debugfs_sched; 463 464 static void debugfs_fair_server_init(void) 465 { 466 struct dentry *d_fair; 467 unsigned long cpu; 468 469 d_fair = debugfs_create_dir("fair_server", debugfs_sched); 470 if (!d_fair) 471 return; 472 473 for_each_possible_cpu(cpu) { 474 struct dentry *d_cpu; 475 char buf[32]; 476 477 snprintf(buf, sizeof(buf), "cpu%lu", cpu); 478 d_cpu = debugfs_create_dir(buf, d_fair); 479 480 debugfs_create_file("runtime", 0644, d_cpu, (void *) cpu, &fair_server_runtime_fops); 481 debugfs_create_file("period", 0644, d_cpu, (void *) cpu, &fair_server_period_fops); 482 } 483 } 484 485 static __init int sched_init_debug(void) 486 { 487 struct dentry __maybe_unused *numa; 488 489 debugfs_sched = debugfs_create_dir("sched", NULL); 490 491 debugfs_create_file("features", 0644, debugfs_sched, NULL, &sched_feat_fops); 492 debugfs_create_file_unsafe("verbose", 0644, debugfs_sched, &sched_debug_verbose, &sched_verbose_fops); 493 #ifdef CONFIG_PREEMPT_DYNAMIC 494 debugfs_create_file("preempt", 0644, debugfs_sched, NULL, &sched_dynamic_fops); 495 #endif 496 497 debugfs_create_u32("base_slice_ns", 0644, debugfs_sched, &sysctl_sched_base_slice); 498 499 debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched, &sysctl_resched_latency_warn_ms); 500 debugfs_create_u32("latency_warn_once", 0644, debugfs_sched, &sysctl_resched_latency_warn_once); 501 502 debugfs_create_file("tunable_scaling", 0644, debugfs_sched, NULL, &sched_scaling_fops); 503 debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched, &sysctl_sched_migration_cost); 504 debugfs_create_u32("nr_migrate", 0644, debugfs_sched, &sysctl_sched_nr_migrate); 505 506 sched_domains_mutex_lock(); 507 update_sched_domain_debugfs(); 508 sched_domains_mutex_unlock(); 509 510 #ifdef CONFIG_NUMA_BALANCING 511 numa = debugfs_create_dir("numa_balancing", debugfs_sched); 512 513 debugfs_create_u32("scan_delay_ms", 0644, numa, &sysctl_numa_balancing_scan_delay); 514 debugfs_create_u32("scan_period_min_ms", 0644, numa, &sysctl_numa_balancing_scan_period_min); 515 debugfs_create_u32("scan_period_max_ms", 0644, numa, &sysctl_numa_balancing_scan_period_max); 516 debugfs_create_u32("scan_size_mb", 0644, numa, &sysctl_numa_balancing_scan_size); 517 debugfs_create_u32("hot_threshold_ms", 0644, numa, &sysctl_numa_balancing_hot_threshold); 518 #endif /* CONFIG_NUMA_BALANCING */ 519 520 debugfs_create_file("debug", 0444, debugfs_sched, NULL, &sched_debug_fops); 521 522 debugfs_fair_server_init(); 523 524 return 0; 525 } 526 late_initcall(sched_init_debug); 527 528 static cpumask_var_t sd_sysctl_cpus; 529 530 static int sd_flags_show(struct seq_file *m, void *v) 531 { 532 unsigned long flags = *(unsigned int *)m->private; 533 int idx; 534 535 for_each_set_bit(idx, &flags, __SD_FLAG_CNT) { 536 seq_puts(m, sd_flag_debug[idx].name); 537 seq_puts(m, " "); 538 } 539 seq_puts(m, "\n"); 540 541 return 0; 542 } 543 544 static int sd_flags_open(struct inode *inode, struct file *file) 545 { 546 return single_open(file, sd_flags_show, inode->i_private); 547 } 548 549 static const struct file_operations sd_flags_fops = { 550 .open = sd_flags_open, 551 .read = seq_read, 552 .llseek = seq_lseek, 553 .release = single_release, 554 }; 555 556 static void register_sd(struct sched_domain *sd, struct dentry *parent) 557 { 558 #define SDM(type, mode, member) \ 559 debugfs_create_##type(#member, mode, parent, &sd->member) 560 561 SDM(ulong, 0644, min_interval); 562 SDM(ulong, 0644, max_interval); 563 SDM(u64, 0644, max_newidle_lb_cost); 564 SDM(u32, 0644, busy_factor); 565 SDM(u32, 0644, imbalance_pct); 566 SDM(u32, 0644, cache_nice_tries); 567 SDM(str, 0444, name); 568 569 #undef SDM 570 571 debugfs_create_file("flags", 0444, parent, &sd->flags, &sd_flags_fops); 572 debugfs_create_file("groups_flags", 0444, parent, &sd->groups->flags, &sd_flags_fops); 573 debugfs_create_u32("level", 0444, parent, (u32 *)&sd->level); 574 575 if (sd->flags & SD_ASYM_PACKING) 576 debugfs_create_u32("group_asym_prefer_cpu", 0444, parent, 577 (u32 *)&sd->groups->asym_prefer_cpu); 578 } 579 580 void update_sched_domain_debugfs(void) 581 { 582 int cpu, i; 583 584 /* 585 * This can unfortunately be invoked before sched_debug_init() creates 586 * the debug directory. Don't touch sd_sysctl_cpus until then. 587 */ 588 if (!debugfs_sched) 589 return; 590 591 if (!sched_debug_verbose) 592 return; 593 594 if (!cpumask_available(sd_sysctl_cpus)) { 595 if (!alloc_cpumask_var(&sd_sysctl_cpus, GFP_KERNEL)) 596 return; 597 cpumask_copy(sd_sysctl_cpus, cpu_possible_mask); 598 } 599 600 if (!sd_dentry) { 601 sd_dentry = debugfs_create_dir("domains", debugfs_sched); 602 603 /* rebuild sd_sysctl_cpus if empty since it gets cleared below */ 604 if (cpumask_empty(sd_sysctl_cpus)) 605 cpumask_copy(sd_sysctl_cpus, cpu_online_mask); 606 } 607 608 for_each_cpu(cpu, sd_sysctl_cpus) { 609 struct sched_domain *sd; 610 struct dentry *d_cpu; 611 char buf[32]; 612 613 snprintf(buf, sizeof(buf), "cpu%d", cpu); 614 debugfs_lookup_and_remove(buf, sd_dentry); 615 d_cpu = debugfs_create_dir(buf, sd_dentry); 616 617 i = 0; 618 for_each_domain(cpu, sd) { 619 struct dentry *d_sd; 620 621 snprintf(buf, sizeof(buf), "domain%d", i); 622 d_sd = debugfs_create_dir(buf, d_cpu); 623 624 register_sd(sd, d_sd); 625 i++; 626 } 627 628 __cpumask_clear_cpu(cpu, sd_sysctl_cpus); 629 } 630 } 631 632 void dirty_sched_domain_sysctl(int cpu) 633 { 634 if (cpumask_available(sd_sysctl_cpus)) 635 __cpumask_set_cpu(cpu, sd_sysctl_cpus); 636 } 637 638 #ifdef CONFIG_FAIR_GROUP_SCHED 639 static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg) 640 { 641 struct sched_entity *se = tg->se[cpu]; 642 643 #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F) 644 #define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", \ 645 #F, (long long)schedstat_val(stats->F)) 646 #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F)) 647 #define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", \ 648 #F, SPLIT_NS((long long)schedstat_val(stats->F))) 649 650 if (!se) 651 return; 652 653 PN(se->exec_start); 654 PN(se->vruntime); 655 PN(se->sum_exec_runtime); 656 657 if (schedstat_enabled()) { 658 struct sched_statistics *stats; 659 stats = __schedstats_from_se(se); 660 661 PN_SCHEDSTAT(wait_start); 662 PN_SCHEDSTAT(sleep_start); 663 PN_SCHEDSTAT(block_start); 664 PN_SCHEDSTAT(sleep_max); 665 PN_SCHEDSTAT(block_max); 666 PN_SCHEDSTAT(exec_max); 667 PN_SCHEDSTAT(slice_max); 668 PN_SCHEDSTAT(wait_max); 669 PN_SCHEDSTAT(wait_sum); 670 P_SCHEDSTAT(wait_count); 671 } 672 673 P(se->load.weight); 674 P(se->avg.load_avg); 675 P(se->avg.util_avg); 676 P(se->avg.runnable_avg); 677 678 #undef PN_SCHEDSTAT 679 #undef PN 680 #undef P_SCHEDSTAT 681 #undef P 682 } 683 #endif /* CONFIG_FAIR_GROUP_SCHED */ 684 685 #ifdef CONFIG_CGROUP_SCHED 686 static DEFINE_SPINLOCK(sched_debug_lock); 687 static char group_path[PATH_MAX]; 688 689 static void task_group_path(struct task_group *tg, char *path, int plen) 690 { 691 if (autogroup_path(tg, path, plen)) 692 return; 693 694 cgroup_path(tg->css.cgroup, path, plen); 695 } 696 697 /* 698 * Only 1 SEQ_printf_task_group_path() caller can use the full length 699 * group_path[] for cgroup path. Other simultaneous callers will have 700 * to use a shorter stack buffer. A "..." suffix is appended at the end 701 * of the stack buffer so that it will show up in case the output length 702 * matches the given buffer size to indicate possible path name truncation. 703 */ 704 #define SEQ_printf_task_group_path(m, tg, fmt...) \ 705 { \ 706 if (spin_trylock(&sched_debug_lock)) { \ 707 task_group_path(tg, group_path, sizeof(group_path)); \ 708 SEQ_printf(m, fmt, group_path); \ 709 spin_unlock(&sched_debug_lock); \ 710 } else { \ 711 char buf[128]; \ 712 char *bufend = buf + sizeof(buf) - 3; \ 713 task_group_path(tg, buf, bufend - buf); \ 714 strcpy(bufend - 1, "..."); \ 715 SEQ_printf(m, fmt, buf); \ 716 } \ 717 } 718 #endif 719 720 static void 721 print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) 722 { 723 if (task_current(rq, p)) 724 SEQ_printf(m, ">R"); 725 else 726 SEQ_printf(m, " %c", task_state_to_char(p)); 727 728 SEQ_printf(m, " %15s %5d %9Ld.%06ld %c %9Ld.%06ld %c %9Ld.%06ld %9Ld.%06ld %9Ld %5d ", 729 p->comm, task_pid_nr(p), 730 SPLIT_NS(p->se.vruntime), 731 entity_eligible(cfs_rq_of(&p->se), &p->se) ? 'E' : 'N', 732 SPLIT_NS(p->se.deadline), 733 p->se.custom_slice ? 'S' : ' ', 734 SPLIT_NS(p->se.slice), 735 SPLIT_NS(p->se.sum_exec_runtime), 736 (long long)(p->nvcsw + p->nivcsw), 737 p->prio); 738 739 SEQ_printf(m, "%9lld.%06ld %9lld.%06ld %9lld.%06ld", 740 SPLIT_NS(schedstat_val_or_zero(p->stats.wait_sum)), 741 SPLIT_NS(schedstat_val_or_zero(p->stats.sum_sleep_runtime)), 742 SPLIT_NS(schedstat_val_or_zero(p->stats.sum_block_runtime))); 743 744 #ifdef CONFIG_NUMA_BALANCING 745 SEQ_printf(m, " %d %d", task_node(p), task_numa_group_id(p)); 746 #endif 747 #ifdef CONFIG_CGROUP_SCHED 748 SEQ_printf_task_group_path(m, task_group(p), " %s") 749 #endif 750 751 SEQ_printf(m, "\n"); 752 } 753 754 static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) 755 { 756 struct task_struct *g, *p; 757 758 SEQ_printf(m, "\n"); 759 SEQ_printf(m, "runnable tasks:\n"); 760 SEQ_printf(m, " S task PID vruntime eligible " 761 "deadline slice sum-exec switches " 762 "prio wait-time sum-sleep sum-block" 763 #ifdef CONFIG_NUMA_BALANCING 764 " node group-id" 765 #endif 766 #ifdef CONFIG_CGROUP_SCHED 767 " group-path" 768 #endif 769 "\n"); 770 SEQ_printf(m, "-------------------------------------------------------" 771 "------------------------------------------------------" 772 "------------------------------------------------------" 773 #ifdef CONFIG_NUMA_BALANCING 774 "--------------" 775 #endif 776 #ifdef CONFIG_CGROUP_SCHED 777 "--------------" 778 #endif 779 "\n"); 780 781 rcu_read_lock(); 782 for_each_process_thread(g, p) { 783 if (task_cpu(p) != rq_cpu) 784 continue; 785 786 print_task(m, rq, p); 787 } 788 rcu_read_unlock(); 789 } 790 791 void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) 792 { 793 s64 left_vruntime = -1, zero_vruntime, right_vruntime = -1, left_deadline = -1, spread; 794 struct sched_entity *last, *first, *root; 795 struct rq *rq = cpu_rq(cpu); 796 unsigned long flags; 797 798 #ifdef CONFIG_FAIR_GROUP_SCHED 799 SEQ_printf(m, "\n"); 800 SEQ_printf_task_group_path(m, cfs_rq->tg, "cfs_rq[%d]:%s\n", cpu); 801 #else 802 SEQ_printf(m, "\n"); 803 SEQ_printf(m, "cfs_rq[%d]:\n", cpu); 804 #endif 805 806 raw_spin_rq_lock_irqsave(rq, flags); 807 root = __pick_root_entity(cfs_rq); 808 if (root) 809 left_vruntime = root->min_vruntime; 810 first = __pick_first_entity(cfs_rq); 811 if (first) 812 left_deadline = first->deadline; 813 last = __pick_last_entity(cfs_rq); 814 if (last) 815 right_vruntime = last->vruntime; 816 zero_vruntime = cfs_rq->zero_vruntime; 817 raw_spin_rq_unlock_irqrestore(rq, flags); 818 819 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_deadline", 820 SPLIT_NS(left_deadline)); 821 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "left_vruntime", 822 SPLIT_NS(left_vruntime)); 823 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "zero_vruntime", 824 SPLIT_NS(zero_vruntime)); 825 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "avg_vruntime", 826 SPLIT_NS(avg_vruntime(cfs_rq))); 827 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "right_vruntime", 828 SPLIT_NS(right_vruntime)); 829 spread = right_vruntime - left_vruntime; 830 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "spread", SPLIT_NS(spread)); 831 SEQ_printf(m, " .%-30s: %d\n", "nr_queued", cfs_rq->nr_queued); 832 SEQ_printf(m, " .%-30s: %d\n", "h_nr_runnable", cfs_rq->h_nr_runnable); 833 SEQ_printf(m, " .%-30s: %d\n", "h_nr_queued", cfs_rq->h_nr_queued); 834 SEQ_printf(m, " .%-30s: %d\n", "h_nr_idle", cfs_rq->h_nr_idle); 835 SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); 836 SEQ_printf(m, " .%-30s: %lu\n", "load_avg", 837 cfs_rq->avg.load_avg); 838 SEQ_printf(m, " .%-30s: %lu\n", "runnable_avg", 839 cfs_rq->avg.runnable_avg); 840 SEQ_printf(m, " .%-30s: %lu\n", "util_avg", 841 cfs_rq->avg.util_avg); 842 SEQ_printf(m, " .%-30s: %u\n", "util_est", 843 cfs_rq->avg.util_est); 844 SEQ_printf(m, " .%-30s: %ld\n", "removed.load_avg", 845 cfs_rq->removed.load_avg); 846 SEQ_printf(m, " .%-30s: %ld\n", "removed.util_avg", 847 cfs_rq->removed.util_avg); 848 SEQ_printf(m, " .%-30s: %ld\n", "removed.runnable_avg", 849 cfs_rq->removed.runnable_avg); 850 #ifdef CONFIG_FAIR_GROUP_SCHED 851 SEQ_printf(m, " .%-30s: %lu\n", "tg_load_avg_contrib", 852 cfs_rq->tg_load_avg_contrib); 853 SEQ_printf(m, " .%-30s: %ld\n", "tg_load_avg", 854 atomic_long_read(&cfs_rq->tg->load_avg)); 855 #endif /* CONFIG_FAIR_GROUP_SCHED */ 856 #ifdef CONFIG_CFS_BANDWIDTH 857 SEQ_printf(m, " .%-30s: %d\n", "throttled", 858 cfs_rq->throttled); 859 SEQ_printf(m, " .%-30s: %d\n", "throttle_count", 860 cfs_rq->throttle_count); 861 #endif 862 863 #ifdef CONFIG_FAIR_GROUP_SCHED 864 print_cfs_group_stats(m, cpu, cfs_rq->tg); 865 #endif 866 } 867 868 void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) 869 { 870 #ifdef CONFIG_RT_GROUP_SCHED 871 SEQ_printf(m, "\n"); 872 SEQ_printf_task_group_path(m, rt_rq->tg, "rt_rq[%d]:%s\n", cpu); 873 #else 874 SEQ_printf(m, "\n"); 875 SEQ_printf(m, "rt_rq[%d]:\n", cpu); 876 #endif 877 878 #define P(x) \ 879 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) 880 #define PU(x) \ 881 SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x)) 882 #define PN(x) \ 883 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x)) 884 885 PU(rt_nr_running); 886 887 #ifdef CONFIG_RT_GROUP_SCHED 888 P(rt_throttled); 889 PN(rt_time); 890 PN(rt_runtime); 891 #endif 892 893 #undef PN 894 #undef PU 895 #undef P 896 } 897 898 void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq) 899 { 900 struct dl_bw *dl_bw; 901 902 SEQ_printf(m, "\n"); 903 SEQ_printf(m, "dl_rq[%d]:\n", cpu); 904 905 #define PU(x) \ 906 SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x)) 907 908 PU(dl_nr_running); 909 dl_bw = &cpu_rq(cpu)->rd->dl_bw; 910 SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->bw", dl_bw->bw); 911 SEQ_printf(m, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw->total_bw); 912 913 #undef PU 914 } 915 916 static void print_cpu(struct seq_file *m, int cpu) 917 { 918 struct rq *rq = cpu_rq(cpu); 919 920 #ifdef CONFIG_X86 921 { 922 unsigned int freq = cpu_khz ? : 1; 923 924 SEQ_printf(m, "cpu#%d, %u.%03u MHz\n", 925 cpu, freq / 1000, (freq % 1000)); 926 } 927 #else /* !CONFIG_X86: */ 928 SEQ_printf(m, "cpu#%d\n", cpu); 929 #endif /* !CONFIG_X86 */ 930 931 #define P(x) \ 932 do { \ 933 if (sizeof(rq->x) == 4) \ 934 SEQ_printf(m, " .%-30s: %d\n", #x, (int)(rq->x)); \ 935 else \ 936 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\ 937 } while (0) 938 939 #define PN(x) \ 940 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x)) 941 942 P(nr_running); 943 P(nr_switches); 944 P(nr_uninterruptible); 945 PN(next_balance); 946 SEQ_printf(m, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr))); 947 PN(clock); 948 PN(clock_task); 949 #undef P 950 #undef PN 951 952 #define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n); 953 P64(avg_idle); 954 P64(max_idle_balance_cost); 955 #undef P64 956 957 #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n)); 958 if (schedstat_enabled()) { 959 P(yld_count); 960 P(sched_count); 961 P(sched_goidle); 962 P(ttwu_count); 963 P(ttwu_local); 964 } 965 #undef P 966 967 print_cfs_stats(m, cpu); 968 print_rt_stats(m, cpu); 969 print_dl_stats(m, cpu); 970 971 print_rq(m, rq, cpu); 972 SEQ_printf(m, "\n"); 973 } 974 975 static const char *sched_tunable_scaling_names[] = { 976 "none", 977 "logarithmic", 978 "linear" 979 }; 980 981 static void sched_debug_header(struct seq_file *m) 982 { 983 u64 ktime, sched_clk, cpu_clk; 984 unsigned long flags; 985 986 local_irq_save(flags); 987 ktime = ktime_to_ns(ktime_get()); 988 sched_clk = sched_clock(); 989 cpu_clk = local_clock(); 990 local_irq_restore(flags); 991 992 SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n", 993 init_utsname()->release, 994 (int)strcspn(init_utsname()->version, " "), 995 init_utsname()->version); 996 997 #define P(x) \ 998 SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x)) 999 #define PN(x) \ 1000 SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) 1001 PN(ktime); 1002 PN(sched_clk); 1003 PN(cpu_clk); 1004 P(jiffies); 1005 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK 1006 P(sched_clock_stable()); 1007 #endif 1008 #undef PN 1009 #undef P 1010 1011 SEQ_printf(m, "\n"); 1012 SEQ_printf(m, "sysctl_sched\n"); 1013 1014 #define P(x) \ 1015 SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x)) 1016 #define PN(x) \ 1017 SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x)) 1018 PN(sysctl_sched_base_slice); 1019 P(sysctl_sched_features); 1020 #undef PN 1021 #undef P 1022 1023 SEQ_printf(m, " .%-40s: %d (%s)\n", 1024 "sysctl_sched_tunable_scaling", 1025 sysctl_sched_tunable_scaling, 1026 sched_tunable_scaling_names[sysctl_sched_tunable_scaling]); 1027 SEQ_printf(m, "\n"); 1028 } 1029 1030 static int sched_debug_show(struct seq_file *m, void *v) 1031 { 1032 int cpu = (unsigned long)(v - 2); 1033 1034 if (cpu != -1) 1035 print_cpu(m, cpu); 1036 else 1037 sched_debug_header(m); 1038 1039 return 0; 1040 } 1041 1042 void sysrq_sched_debug_show(void) 1043 { 1044 int cpu; 1045 1046 sched_debug_header(NULL); 1047 for_each_online_cpu(cpu) { 1048 /* 1049 * Need to reset softlockup watchdogs on all CPUs, because 1050 * another CPU might be blocked waiting for us to process 1051 * an IPI or stop_machine. 1052 */ 1053 touch_nmi_watchdog(); 1054 touch_all_softlockup_watchdogs(); 1055 print_cpu(NULL, cpu); 1056 } 1057 } 1058 1059 /* 1060 * This iterator needs some explanation. 1061 * It returns 1 for the header position. 1062 * This means 2 is CPU 0. 1063 * In a hotplugged system some CPUs, including CPU 0, may be missing so we have 1064 * to use cpumask_* to iterate over the CPUs. 1065 */ 1066 static void *sched_debug_start(struct seq_file *file, loff_t *offset) 1067 { 1068 unsigned long n = *offset; 1069 1070 if (n == 0) 1071 return (void *) 1; 1072 1073 n--; 1074 1075 if (n > 0) 1076 n = cpumask_next(n - 1, cpu_online_mask); 1077 else 1078 n = cpumask_first(cpu_online_mask); 1079 1080 *offset = n + 1; 1081 1082 if (n < nr_cpu_ids) 1083 return (void *)(unsigned long)(n + 2); 1084 1085 return NULL; 1086 } 1087 1088 static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset) 1089 { 1090 (*offset)++; 1091 return sched_debug_start(file, offset); 1092 } 1093 1094 static void sched_debug_stop(struct seq_file *file, void *data) 1095 { 1096 } 1097 1098 static const struct seq_operations sched_debug_sops = { 1099 .start = sched_debug_start, 1100 .next = sched_debug_next, 1101 .stop = sched_debug_stop, 1102 .show = sched_debug_show, 1103 }; 1104 1105 #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F)) 1106 #define __P(F) __PS(#F, F) 1107 #define P(F) __PS(#F, p->F) 1108 #define PM(F, M) __PS(#F, p->F & (M)) 1109 #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F))) 1110 #define __PN(F) __PSN(#F, F) 1111 #define PN(F) __PSN(#F, p->F) 1112 1113 1114 #ifdef CONFIG_NUMA_BALANCING 1115 void print_numa_stats(struct seq_file *m, int node, unsigned long tsf, 1116 unsigned long tpf, unsigned long gsf, unsigned long gpf) 1117 { 1118 SEQ_printf(m, "numa_faults node=%d ", node); 1119 SEQ_printf(m, "task_private=%lu task_shared=%lu ", tpf, tsf); 1120 SEQ_printf(m, "group_private=%lu group_shared=%lu\n", gpf, gsf); 1121 } 1122 #endif 1123 1124 1125 static void sched_show_numa(struct task_struct *p, struct seq_file *m) 1126 { 1127 #ifdef CONFIG_NUMA_BALANCING 1128 if (p->mm) 1129 P(mm->numa_scan_seq); 1130 1131 P(numa_pages_migrated); 1132 P(numa_preferred_nid); 1133 P(total_numa_faults); 1134 SEQ_printf(m, "current_node=%d, numa_group_id=%d\n", 1135 task_node(p), task_numa_group_id(p)); 1136 show_numa_stats(p, m); 1137 #endif /* CONFIG_NUMA_BALANCING */ 1138 } 1139 1140 void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, 1141 struct seq_file *m) 1142 { 1143 unsigned long nr_switches; 1144 1145 SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr_ns(p, ns), 1146 get_nr_threads(p)); 1147 SEQ_printf(m, 1148 "---------------------------------------------------------" 1149 "----------\n"); 1150 1151 #define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->stats.F)) 1152 #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->stats.F)) 1153 1154 PN(se.exec_start); 1155 PN(se.vruntime); 1156 PN(se.sum_exec_runtime); 1157 1158 nr_switches = p->nvcsw + p->nivcsw; 1159 1160 P(se.nr_migrations); 1161 1162 if (schedstat_enabled()) { 1163 u64 avg_atom, avg_per_cpu; 1164 1165 PN_SCHEDSTAT(sum_sleep_runtime); 1166 PN_SCHEDSTAT(sum_block_runtime); 1167 PN_SCHEDSTAT(wait_start); 1168 PN_SCHEDSTAT(sleep_start); 1169 PN_SCHEDSTAT(block_start); 1170 PN_SCHEDSTAT(sleep_max); 1171 PN_SCHEDSTAT(block_max); 1172 PN_SCHEDSTAT(exec_max); 1173 PN_SCHEDSTAT(slice_max); 1174 PN_SCHEDSTAT(wait_max); 1175 PN_SCHEDSTAT(wait_sum); 1176 P_SCHEDSTAT(wait_count); 1177 PN_SCHEDSTAT(iowait_sum); 1178 P_SCHEDSTAT(iowait_count); 1179 P_SCHEDSTAT(nr_migrations_cold); 1180 P_SCHEDSTAT(nr_failed_migrations_affine); 1181 P_SCHEDSTAT(nr_failed_migrations_running); 1182 P_SCHEDSTAT(nr_failed_migrations_hot); 1183 P_SCHEDSTAT(nr_forced_migrations); 1184 P_SCHEDSTAT(nr_wakeups); 1185 P_SCHEDSTAT(nr_wakeups_sync); 1186 P_SCHEDSTAT(nr_wakeups_migrate); 1187 P_SCHEDSTAT(nr_wakeups_local); 1188 P_SCHEDSTAT(nr_wakeups_remote); 1189 P_SCHEDSTAT(nr_wakeups_affine); 1190 P_SCHEDSTAT(nr_wakeups_affine_attempts); 1191 P_SCHEDSTAT(nr_wakeups_passive); 1192 P_SCHEDSTAT(nr_wakeups_idle); 1193 1194 avg_atom = p->se.sum_exec_runtime; 1195 if (nr_switches) 1196 avg_atom = div64_ul(avg_atom, nr_switches); 1197 else 1198 avg_atom = -1LL; 1199 1200 avg_per_cpu = p->se.sum_exec_runtime; 1201 if (p->se.nr_migrations) { 1202 avg_per_cpu = div64_u64(avg_per_cpu, 1203 p->se.nr_migrations); 1204 } else { 1205 avg_per_cpu = -1LL; 1206 } 1207 1208 __PN(avg_atom); 1209 __PN(avg_per_cpu); 1210 1211 #ifdef CONFIG_SCHED_CORE 1212 PN_SCHEDSTAT(core_forceidle_sum); 1213 #endif 1214 } 1215 1216 __P(nr_switches); 1217 __PS("nr_voluntary_switches", p->nvcsw); 1218 __PS("nr_involuntary_switches", p->nivcsw); 1219 1220 P(se.load.weight); 1221 P(se.avg.load_sum); 1222 P(se.avg.runnable_sum); 1223 P(se.avg.util_sum); 1224 P(se.avg.load_avg); 1225 P(se.avg.runnable_avg); 1226 P(se.avg.util_avg); 1227 P(se.avg.last_update_time); 1228 PM(se.avg.util_est, ~UTIL_AVG_UNCHANGED); 1229 #ifdef CONFIG_UCLAMP_TASK 1230 __PS("uclamp.min", p->uclamp_req[UCLAMP_MIN].value); 1231 __PS("uclamp.max", p->uclamp_req[UCLAMP_MAX].value); 1232 __PS("effective uclamp.min", uclamp_eff_value(p, UCLAMP_MIN)); 1233 __PS("effective uclamp.max", uclamp_eff_value(p, UCLAMP_MAX)); 1234 #endif /* CONFIG_UCLAMP_TASK */ 1235 P(policy); 1236 P(prio); 1237 if (task_has_dl_policy(p)) { 1238 P(dl.runtime); 1239 P(dl.deadline); 1240 } else if (fair_policy(p->policy)) { 1241 P(se.slice); 1242 } 1243 #ifdef CONFIG_SCHED_CLASS_EXT 1244 __PS("ext.enabled", task_on_scx(p)); 1245 #endif 1246 #undef PN_SCHEDSTAT 1247 #undef P_SCHEDSTAT 1248 1249 { 1250 unsigned int this_cpu = raw_smp_processor_id(); 1251 u64 t0, t1; 1252 1253 t0 = cpu_clock(this_cpu); 1254 t1 = cpu_clock(this_cpu); 1255 __PS("clock-delta", t1-t0); 1256 } 1257 1258 sched_show_numa(p, m); 1259 } 1260 1261 void proc_sched_set_task(struct task_struct *p) 1262 { 1263 #ifdef CONFIG_SCHEDSTATS 1264 memset(&p->stats, 0, sizeof(p->stats)); 1265 #endif 1266 } 1267 1268 void resched_latency_warn(int cpu, u64 latency) 1269 { 1270 static DEFINE_RATELIMIT_STATE(latency_check_ratelimit, 60 * 60 * HZ, 1); 1271 1272 if (likely(!__ratelimit(&latency_check_ratelimit))) 1273 return; 1274 1275 pr_err("sched: CPU %d need_resched set for > %llu ns (%d ticks) without schedule\n", 1276 cpu, latency, cpu_rq(cpu)->ticks_without_resched); 1277 dump_stack(); 1278 } 1279