1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Detect Hung Task 4 * 5 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state 6 * 7 */ 8 9 #include <linux/mm.h> 10 #include <linux/cpu.h> 11 #include <linux/nmi.h> 12 #include <linux/init.h> 13 #include <linux/delay.h> 14 #include <linux/freezer.h> 15 #include <linux/kthread.h> 16 #include <linux/lockdep.h> 17 #include <linux/export.h> 18 #include <linux/panic_notifier.h> 19 #include <linux/sysctl.h> 20 #include <linux/suspend.h> 21 #include <linux/utsname.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/debug.h> 24 #include <linux/sched/sysctl.h> 25 #include <linux/hung_task.h> 26 27 #include <trace/events/sched.h> 28 29 /* 30 * The number of tasks checked: 31 */ 32 static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; 33 34 /* 35 * Total number of tasks detected as hung since boot: 36 */ 37 static unsigned long __read_mostly sysctl_hung_task_detect_count; 38 39 /* 40 * Limit number of tasks checked in a batch. 41 * 42 * This value controls the preemptibility of khungtaskd since preemption 43 * is disabled during the critical section. It also controls the size of 44 * the RCU grace period. So it needs to be upper-bound. 45 */ 46 #define HUNG_TASK_LOCK_BREAK (HZ / 10) 47 48 /* 49 * Zero means infinite timeout - no checking done: 50 */ 51 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; 52 EXPORT_SYMBOL_GPL(sysctl_hung_task_timeout_secs); 53 54 /* 55 * Zero (default value) means use sysctl_hung_task_timeout_secs: 56 */ 57 static unsigned long __read_mostly sysctl_hung_task_check_interval_secs; 58 59 static int __read_mostly sysctl_hung_task_warnings = 10; 60 61 static int __read_mostly did_panic; 62 static bool hung_task_show_lock; 63 static bool hung_task_call_panic; 64 static bool hung_task_show_all_bt; 65 66 static struct task_struct *watchdog_task; 67 68 #ifdef CONFIG_SMP 69 /* 70 * Should we dump all CPUs backtraces in a hung task event? 71 * Defaults to 0, can be changed via sysctl. 72 */ 73 static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; 74 #else 75 #define sysctl_hung_task_all_cpu_backtrace 0 76 #endif /* CONFIG_SMP */ 77 78 /* 79 * Should we panic (and reboot, if panic_timeout= is set) when a 80 * hung task is detected: 81 */ 82 static unsigned int __read_mostly sysctl_hung_task_panic = 83 IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC); 84 85 static int 86 hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) 87 { 88 did_panic = 1; 89 90 return NOTIFY_DONE; 91 } 92 93 static struct notifier_block panic_block = { 94 .notifier_call = hung_task_panic, 95 }; 96 97 98 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER 99 static void debug_show_blocker(struct task_struct *task) 100 { 101 struct task_struct *g, *t; 102 unsigned long owner, blocker, blocker_type; 103 104 RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held"); 105 106 blocker = READ_ONCE(task->blocker); 107 if (!blocker) 108 return; 109 110 blocker_type = hung_task_get_blocker_type(blocker); 111 112 switch (blocker_type) { 113 case BLOCKER_TYPE_MUTEX: 114 owner = mutex_get_owner( 115 (struct mutex *)hung_task_blocker_to_lock(blocker)); 116 break; 117 case BLOCKER_TYPE_SEM: 118 owner = sem_last_holder( 119 (struct semaphore *)hung_task_blocker_to_lock(blocker)); 120 break; 121 default: 122 WARN_ON_ONCE(1); 123 return; 124 } 125 126 127 if (unlikely(!owner)) { 128 switch (blocker_type) { 129 case BLOCKER_TYPE_MUTEX: 130 pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n", 131 task->comm, task->pid); 132 break; 133 case BLOCKER_TYPE_SEM: 134 pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n", 135 task->comm, task->pid); 136 break; 137 } 138 return; 139 } 140 141 /* Ensure the owner information is correct. */ 142 for_each_process_thread(g, t) { 143 if ((unsigned long)t != owner) 144 continue; 145 146 switch (blocker_type) { 147 case BLOCKER_TYPE_MUTEX: 148 pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n", 149 task->comm, task->pid, t->comm, t->pid); 150 break; 151 case BLOCKER_TYPE_SEM: 152 pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n", 153 task->comm, task->pid, t->comm, t->pid); 154 break; 155 } 156 sched_show_task(t); 157 return; 158 } 159 } 160 #else 161 static inline void debug_show_blocker(struct task_struct *task) 162 { 163 } 164 #endif 165 166 static void check_hung_task(struct task_struct *t, unsigned long timeout) 167 { 168 unsigned long switch_count = t->nvcsw + t->nivcsw; 169 170 /* 171 * Ensure the task is not frozen. 172 * Also, skip vfork and any other user process that freezer should skip. 173 */ 174 if (unlikely(READ_ONCE(t->__state) & TASK_FROZEN)) 175 return; 176 177 /* 178 * When a freshly created task is scheduled once, changes its state to 179 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it 180 * musn't be checked. 181 */ 182 if (unlikely(!switch_count)) 183 return; 184 185 if (switch_count != t->last_switch_count) { 186 t->last_switch_count = switch_count; 187 t->last_switch_time = jiffies; 188 return; 189 } 190 if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) 191 return; 192 193 /* 194 * This counter tracks the total number of tasks detected as hung 195 * since boot. 196 */ 197 sysctl_hung_task_detect_count++; 198 199 trace_sched_process_hang(t); 200 201 if (sysctl_hung_task_panic) { 202 console_verbose(); 203 hung_task_show_lock = true; 204 hung_task_call_panic = true; 205 } 206 207 /* 208 * Ok, the task did not get scheduled for more than 2 minutes, 209 * complain: 210 */ 211 if (sysctl_hung_task_warnings || hung_task_call_panic) { 212 if (sysctl_hung_task_warnings > 0) 213 sysctl_hung_task_warnings--; 214 pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", 215 t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); 216 pr_err(" %s %s %.*s\n", 217 print_tainted(), init_utsname()->release, 218 (int)strcspn(init_utsname()->version, " "), 219 init_utsname()->version); 220 if (t->flags & PF_POSTCOREDUMP) 221 pr_err(" Blocked by coredump.\n"); 222 pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" 223 " disables this message.\n"); 224 sched_show_task(t); 225 debug_show_blocker(t); 226 hung_task_show_lock = true; 227 228 if (sysctl_hung_task_all_cpu_backtrace) 229 hung_task_show_all_bt = true; 230 if (!sysctl_hung_task_warnings) 231 pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n"); 232 } 233 234 touch_nmi_watchdog(); 235 } 236 237 /* 238 * To avoid extending the RCU grace period for an unbounded amount of time, 239 * periodically exit the critical section and enter a new one. 240 * 241 * For preemptible RCU it is sufficient to call rcu_read_unlock in order 242 * to exit the grace period. For classic RCU, a reschedule is required. 243 */ 244 static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) 245 { 246 bool can_cont; 247 248 get_task_struct(g); 249 get_task_struct(t); 250 rcu_read_unlock(); 251 cond_resched(); 252 rcu_read_lock(); 253 can_cont = pid_alive(g) && pid_alive(t); 254 put_task_struct(t); 255 put_task_struct(g); 256 257 return can_cont; 258 } 259 260 /* 261 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for 262 * a really long time (120 seconds). If that happens, print out 263 * a warning. 264 */ 265 static void check_hung_uninterruptible_tasks(unsigned long timeout) 266 { 267 int max_count = sysctl_hung_task_check_count; 268 unsigned long last_break = jiffies; 269 struct task_struct *g, *t; 270 271 /* 272 * If the system crashed already then all bets are off, 273 * do not report extra hung tasks: 274 */ 275 if (test_taint(TAINT_DIE) || did_panic) 276 return; 277 278 hung_task_show_lock = false; 279 rcu_read_lock(); 280 for_each_process_thread(g, t) { 281 unsigned int state; 282 283 if (!max_count--) 284 goto unlock; 285 if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) { 286 if (!rcu_lock_break(g, t)) 287 goto unlock; 288 last_break = jiffies; 289 } 290 /* 291 * skip the TASK_KILLABLE tasks -- these can be killed 292 * skip the TASK_IDLE tasks -- those are genuinely idle 293 */ 294 state = READ_ONCE(t->__state); 295 if ((state & TASK_UNINTERRUPTIBLE) && 296 !(state & TASK_WAKEKILL) && 297 !(state & TASK_NOLOAD)) 298 check_hung_task(t, timeout); 299 } 300 unlock: 301 rcu_read_unlock(); 302 if (hung_task_show_lock) 303 debug_show_all_locks(); 304 305 if (hung_task_show_all_bt) { 306 hung_task_show_all_bt = false; 307 trigger_all_cpu_backtrace(); 308 } 309 310 if (hung_task_call_panic) 311 panic("hung_task: blocked tasks"); 312 } 313 314 static long hung_timeout_jiffies(unsigned long last_checked, 315 unsigned long timeout) 316 { 317 /* timeout of 0 will disable the watchdog */ 318 return timeout ? last_checked - jiffies + timeout * HZ : 319 MAX_SCHEDULE_TIMEOUT; 320 } 321 322 #ifdef CONFIG_SYSCTL 323 /* 324 * Process updating of timeout sysctl 325 */ 326 static int proc_dohung_task_timeout_secs(const struct ctl_table *table, int write, 327 void *buffer, 328 size_t *lenp, loff_t *ppos) 329 { 330 int ret; 331 332 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 333 334 if (ret || !write) 335 goto out; 336 337 wake_up_process(watchdog_task); 338 339 out: 340 return ret; 341 } 342 343 /* 344 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs 345 * and hung_task_check_interval_secs 346 */ 347 static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ); 348 static const struct ctl_table hung_task_sysctls[] = { 349 #ifdef CONFIG_SMP 350 { 351 .procname = "hung_task_all_cpu_backtrace", 352 .data = &sysctl_hung_task_all_cpu_backtrace, 353 .maxlen = sizeof(int), 354 .mode = 0644, 355 .proc_handler = proc_dointvec_minmax, 356 .extra1 = SYSCTL_ZERO, 357 .extra2 = SYSCTL_ONE, 358 }, 359 #endif /* CONFIG_SMP */ 360 { 361 .procname = "hung_task_panic", 362 .data = &sysctl_hung_task_panic, 363 .maxlen = sizeof(int), 364 .mode = 0644, 365 .proc_handler = proc_dointvec_minmax, 366 .extra1 = SYSCTL_ZERO, 367 .extra2 = SYSCTL_ONE, 368 }, 369 { 370 .procname = "hung_task_check_count", 371 .data = &sysctl_hung_task_check_count, 372 .maxlen = sizeof(int), 373 .mode = 0644, 374 .proc_handler = proc_dointvec_minmax, 375 .extra1 = SYSCTL_ZERO, 376 }, 377 { 378 .procname = "hung_task_timeout_secs", 379 .data = &sysctl_hung_task_timeout_secs, 380 .maxlen = sizeof(unsigned long), 381 .mode = 0644, 382 .proc_handler = proc_dohung_task_timeout_secs, 383 .extra2 = (void *)&hung_task_timeout_max, 384 }, 385 { 386 .procname = "hung_task_check_interval_secs", 387 .data = &sysctl_hung_task_check_interval_secs, 388 .maxlen = sizeof(unsigned long), 389 .mode = 0644, 390 .proc_handler = proc_dohung_task_timeout_secs, 391 .extra2 = (void *)&hung_task_timeout_max, 392 }, 393 { 394 .procname = "hung_task_warnings", 395 .data = &sysctl_hung_task_warnings, 396 .maxlen = sizeof(int), 397 .mode = 0644, 398 .proc_handler = proc_dointvec_minmax, 399 .extra1 = SYSCTL_NEG_ONE, 400 }, 401 { 402 .procname = "hung_task_detect_count", 403 .data = &sysctl_hung_task_detect_count, 404 .maxlen = sizeof(unsigned long), 405 .mode = 0444, 406 .proc_handler = proc_doulongvec_minmax, 407 }, 408 }; 409 410 static void __init hung_task_sysctl_init(void) 411 { 412 register_sysctl_init("kernel", hung_task_sysctls); 413 } 414 #else 415 #define hung_task_sysctl_init() do { } while (0) 416 #endif /* CONFIG_SYSCTL */ 417 418 419 static atomic_t reset_hung_task = ATOMIC_INIT(0); 420 421 void reset_hung_task_detector(void) 422 { 423 atomic_set(&reset_hung_task, 1); 424 } 425 EXPORT_SYMBOL_GPL(reset_hung_task_detector); 426 427 static bool hung_detector_suspended; 428 429 static int hungtask_pm_notify(struct notifier_block *self, 430 unsigned long action, void *hcpu) 431 { 432 switch (action) { 433 case PM_SUSPEND_PREPARE: 434 case PM_HIBERNATION_PREPARE: 435 case PM_RESTORE_PREPARE: 436 hung_detector_suspended = true; 437 break; 438 case PM_POST_SUSPEND: 439 case PM_POST_HIBERNATION: 440 case PM_POST_RESTORE: 441 hung_detector_suspended = false; 442 break; 443 default: 444 break; 445 } 446 return NOTIFY_OK; 447 } 448 449 /* 450 * kthread which checks for tasks stuck in D state 451 */ 452 static int watchdog(void *dummy) 453 { 454 unsigned long hung_last_checked = jiffies; 455 456 set_user_nice(current, 0); 457 458 for ( ; ; ) { 459 unsigned long timeout = sysctl_hung_task_timeout_secs; 460 unsigned long interval = sysctl_hung_task_check_interval_secs; 461 long t; 462 463 if (interval == 0) 464 interval = timeout; 465 interval = min_t(unsigned long, interval, timeout); 466 t = hung_timeout_jiffies(hung_last_checked, interval); 467 if (t <= 0) { 468 if (!atomic_xchg(&reset_hung_task, 0) && 469 !hung_detector_suspended) 470 check_hung_uninterruptible_tasks(timeout); 471 hung_last_checked = jiffies; 472 continue; 473 } 474 schedule_timeout_interruptible(t); 475 } 476 477 return 0; 478 } 479 480 static int __init hung_task_init(void) 481 { 482 atomic_notifier_chain_register(&panic_notifier_list, &panic_block); 483 484 /* Disable hung task detector on suspend */ 485 pm_notifier(hungtask_pm_notify, 0); 486 487 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); 488 hung_task_sysctl_init(); 489 490 return 0; 491 } 492 subsys_initcall(hung_task_init); 493