1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Detect Hung Task 4 * 5 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state 6 * 7 */ 8 9 #include <linux/mm.h> 10 #include <linux/cpu.h> 11 #include <linux/nmi.h> 12 #include <linux/init.h> 13 #include <linux/delay.h> 14 #include <linux/freezer.h> 15 #include <linux/kthread.h> 16 #include <linux/lockdep.h> 17 #include <linux/export.h> 18 #include <linux/panic_notifier.h> 19 #include <linux/sysctl.h> 20 #include <linux/suspend.h> 21 #include <linux/utsname.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/debug.h> 24 #include <linux/sched/sysctl.h> 25 #include <linux/hung_task.h> 26 #include <linux/rwsem.h> 27 28 #include <trace/events/sched.h> 29 30 /* 31 * The number of tasks checked: 32 */ 33 static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; 34 35 /* 36 * Total number of tasks detected as hung since boot: 37 */ 38 static unsigned long __read_mostly sysctl_hung_task_detect_count; 39 40 /* 41 * Limit number of tasks checked in a batch. 42 * 43 * This value controls the preemptibility of khungtaskd since preemption 44 * is disabled during the critical section. It also controls the size of 45 * the RCU grace period. So it needs to be upper-bound. 46 */ 47 #define HUNG_TASK_LOCK_BREAK (HZ / 10) 48 49 /* 50 * Zero means infinite timeout - no checking done: 51 */ 52 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; 53 EXPORT_SYMBOL_GPL(sysctl_hung_task_timeout_secs); 54 55 /* 56 * Zero (default value) means use sysctl_hung_task_timeout_secs: 57 */ 58 static unsigned long __read_mostly sysctl_hung_task_check_interval_secs; 59 60 static int __read_mostly sysctl_hung_task_warnings = 10; 61 62 static int __read_mostly did_panic; 63 static bool hung_task_show_lock; 64 static bool hung_task_call_panic; 65 static bool hung_task_show_all_bt; 66 67 static struct task_struct *watchdog_task; 68 69 #ifdef CONFIG_SMP 70 /* 71 * Should we dump all CPUs backtraces in a hung task event? 72 * Defaults to 0, can be changed via sysctl. 73 */ 74 static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; 75 #else 76 #define sysctl_hung_task_all_cpu_backtrace 0 77 #endif /* CONFIG_SMP */ 78 79 /* 80 * Should we panic (and reboot, if panic_timeout= is set) when a 81 * hung task is detected: 82 */ 83 static unsigned int __read_mostly sysctl_hung_task_panic = 84 IS_ENABLED(CONFIG_BOOTPARAM_HUNG_TASK_PANIC); 85 86 static int 87 hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) 88 { 89 did_panic = 1; 90 91 return NOTIFY_DONE; 92 } 93 94 static struct notifier_block panic_block = { 95 .notifier_call = hung_task_panic, 96 }; 97 98 static bool task_is_hung(struct task_struct *t, unsigned long timeout) 99 { 100 unsigned long switch_count = t->nvcsw + t->nivcsw; 101 unsigned int state = READ_ONCE(t->__state); 102 103 /* 104 * skip the TASK_KILLABLE tasks -- these can be killed 105 * skip the TASK_IDLE tasks -- those are genuinely idle 106 * skip the TASK_FROZEN task -- it reasonably stops scheduling by freezer 107 */ 108 if (!(state & TASK_UNINTERRUPTIBLE) || 109 (state & (TASK_WAKEKILL | TASK_NOLOAD | TASK_FROZEN))) 110 return false; 111 112 /* 113 * When a freshly created task is scheduled once, changes its state to 114 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it 115 * musn't be checked. 116 */ 117 if (unlikely(!switch_count)) 118 return false; 119 120 if (switch_count != t->last_switch_count) { 121 t->last_switch_count = switch_count; 122 t->last_switch_time = jiffies; 123 return false; 124 } 125 if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) 126 return false; 127 128 return true; 129 } 130 131 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER 132 static void debug_show_blocker(struct task_struct *task, unsigned long timeout) 133 { 134 struct task_struct *g, *t; 135 unsigned long owner, blocker, blocker_type; 136 const char *rwsem_blocked_by, *rwsem_blocked_as; 137 138 RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held"); 139 140 blocker = READ_ONCE(task->blocker); 141 if (!blocker) 142 return; 143 144 blocker_type = hung_task_get_blocker_type(blocker); 145 146 switch (blocker_type) { 147 case BLOCKER_TYPE_MUTEX: 148 owner = mutex_get_owner(hung_task_blocker_to_lock(blocker)); 149 break; 150 case BLOCKER_TYPE_SEM: 151 owner = sem_last_holder(hung_task_blocker_to_lock(blocker)); 152 break; 153 case BLOCKER_TYPE_RWSEM_READER: 154 case BLOCKER_TYPE_RWSEM_WRITER: 155 owner = (unsigned long)rwsem_owner( 156 hung_task_blocker_to_lock(blocker)); 157 rwsem_blocked_as = (blocker_type == BLOCKER_TYPE_RWSEM_READER) ? 158 "reader" : "writer"; 159 rwsem_blocked_by = is_rwsem_reader_owned( 160 hung_task_blocker_to_lock(blocker)) ? 161 "reader" : "writer"; 162 break; 163 default: 164 WARN_ON_ONCE(1); 165 return; 166 } 167 168 169 if (unlikely(!owner)) { 170 switch (blocker_type) { 171 case BLOCKER_TYPE_MUTEX: 172 pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n", 173 task->comm, task->pid); 174 break; 175 case BLOCKER_TYPE_SEM: 176 pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n", 177 task->comm, task->pid); 178 break; 179 case BLOCKER_TYPE_RWSEM_READER: 180 case BLOCKER_TYPE_RWSEM_WRITER: 181 pr_err("INFO: task %s:%d is blocked on an rw-semaphore, but the owner is not found.\n", 182 task->comm, task->pid); 183 break; 184 } 185 return; 186 } 187 188 /* Ensure the owner information is correct. */ 189 for_each_process_thread(g, t) { 190 if ((unsigned long)t != owner) 191 continue; 192 193 switch (blocker_type) { 194 case BLOCKER_TYPE_MUTEX: 195 pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n", 196 task->comm, task->pid, t->comm, t->pid); 197 break; 198 case BLOCKER_TYPE_SEM: 199 pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n", 200 task->comm, task->pid, t->comm, t->pid); 201 break; 202 case BLOCKER_TYPE_RWSEM_READER: 203 case BLOCKER_TYPE_RWSEM_WRITER: 204 pr_err("INFO: task %s:%d <%s> blocked on an rw-semaphore likely owned by task %s:%d <%s>\n", 205 task->comm, task->pid, rwsem_blocked_as, t->comm, 206 t->pid, rwsem_blocked_by); 207 break; 208 } 209 /* Avoid duplicated task dump, skip if the task is also hung. */ 210 if (!task_is_hung(t, timeout)) 211 sched_show_task(t); 212 return; 213 } 214 } 215 #else 216 static inline void debug_show_blocker(struct task_struct *task, unsigned long timeout) 217 { 218 } 219 #endif 220 221 static void check_hung_task(struct task_struct *t, unsigned long timeout) 222 { 223 if (!task_is_hung(t, timeout)) 224 return; 225 226 /* 227 * This counter tracks the total number of tasks detected as hung 228 * since boot. 229 */ 230 sysctl_hung_task_detect_count++; 231 232 trace_sched_process_hang(t); 233 234 if (sysctl_hung_task_panic) { 235 console_verbose(); 236 hung_task_show_lock = true; 237 hung_task_call_panic = true; 238 } 239 240 /* 241 * Ok, the task did not get scheduled for more than 2 minutes, 242 * complain: 243 */ 244 if (sysctl_hung_task_warnings || hung_task_call_panic) { 245 if (sysctl_hung_task_warnings > 0) 246 sysctl_hung_task_warnings--; 247 pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", 248 t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); 249 pr_err(" %s %s %.*s\n", 250 print_tainted(), init_utsname()->release, 251 (int)strcspn(init_utsname()->version, " "), 252 init_utsname()->version); 253 if (t->flags & PF_POSTCOREDUMP) 254 pr_err(" Blocked by coredump.\n"); 255 pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" 256 " disables this message.\n"); 257 sched_show_task(t); 258 debug_show_blocker(t, timeout); 259 hung_task_show_lock = true; 260 261 if (sysctl_hung_task_all_cpu_backtrace) 262 hung_task_show_all_bt = true; 263 if (!sysctl_hung_task_warnings) 264 pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n"); 265 } 266 267 touch_nmi_watchdog(); 268 } 269 270 /* 271 * To avoid extending the RCU grace period for an unbounded amount of time, 272 * periodically exit the critical section and enter a new one. 273 * 274 * For preemptible RCU it is sufficient to call rcu_read_unlock in order 275 * to exit the grace period. For classic RCU, a reschedule is required. 276 */ 277 static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) 278 { 279 bool can_cont; 280 281 get_task_struct(g); 282 get_task_struct(t); 283 rcu_read_unlock(); 284 cond_resched(); 285 rcu_read_lock(); 286 can_cont = pid_alive(g) && pid_alive(t); 287 put_task_struct(t); 288 put_task_struct(g); 289 290 return can_cont; 291 } 292 293 /* 294 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for 295 * a really long time (120 seconds). If that happens, print out 296 * a warning. 297 */ 298 static void check_hung_uninterruptible_tasks(unsigned long timeout) 299 { 300 int max_count = sysctl_hung_task_check_count; 301 unsigned long last_break = jiffies; 302 struct task_struct *g, *t; 303 304 /* 305 * If the system crashed already then all bets are off, 306 * do not report extra hung tasks: 307 */ 308 if (test_taint(TAINT_DIE) || did_panic) 309 return; 310 311 hung_task_show_lock = false; 312 rcu_read_lock(); 313 for_each_process_thread(g, t) { 314 315 if (!max_count--) 316 goto unlock; 317 if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) { 318 if (!rcu_lock_break(g, t)) 319 goto unlock; 320 last_break = jiffies; 321 } 322 323 check_hung_task(t, timeout); 324 } 325 unlock: 326 rcu_read_unlock(); 327 if (hung_task_show_lock) 328 debug_show_all_locks(); 329 330 if (hung_task_show_all_bt) { 331 hung_task_show_all_bt = false; 332 trigger_all_cpu_backtrace(); 333 } 334 335 if (hung_task_call_panic) 336 panic("hung_task: blocked tasks"); 337 } 338 339 static long hung_timeout_jiffies(unsigned long last_checked, 340 unsigned long timeout) 341 { 342 /* timeout of 0 will disable the watchdog */ 343 return timeout ? last_checked - jiffies + timeout * HZ : 344 MAX_SCHEDULE_TIMEOUT; 345 } 346 347 #ifdef CONFIG_SYSCTL 348 /* 349 * Process updating of timeout sysctl 350 */ 351 static int proc_dohung_task_timeout_secs(const struct ctl_table *table, int write, 352 void *buffer, 353 size_t *lenp, loff_t *ppos) 354 { 355 int ret; 356 357 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 358 359 if (ret || !write) 360 goto out; 361 362 wake_up_process(watchdog_task); 363 364 out: 365 return ret; 366 } 367 368 /* 369 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs 370 * and hung_task_check_interval_secs 371 */ 372 static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ); 373 static const struct ctl_table hung_task_sysctls[] = { 374 #ifdef CONFIG_SMP 375 { 376 .procname = "hung_task_all_cpu_backtrace", 377 .data = &sysctl_hung_task_all_cpu_backtrace, 378 .maxlen = sizeof(int), 379 .mode = 0644, 380 .proc_handler = proc_dointvec_minmax, 381 .extra1 = SYSCTL_ZERO, 382 .extra2 = SYSCTL_ONE, 383 }, 384 #endif /* CONFIG_SMP */ 385 { 386 .procname = "hung_task_panic", 387 .data = &sysctl_hung_task_panic, 388 .maxlen = sizeof(int), 389 .mode = 0644, 390 .proc_handler = proc_dointvec_minmax, 391 .extra1 = SYSCTL_ZERO, 392 .extra2 = SYSCTL_ONE, 393 }, 394 { 395 .procname = "hung_task_check_count", 396 .data = &sysctl_hung_task_check_count, 397 .maxlen = sizeof(int), 398 .mode = 0644, 399 .proc_handler = proc_dointvec_minmax, 400 .extra1 = SYSCTL_ZERO, 401 }, 402 { 403 .procname = "hung_task_timeout_secs", 404 .data = &sysctl_hung_task_timeout_secs, 405 .maxlen = sizeof(unsigned long), 406 .mode = 0644, 407 .proc_handler = proc_dohung_task_timeout_secs, 408 .extra2 = (void *)&hung_task_timeout_max, 409 }, 410 { 411 .procname = "hung_task_check_interval_secs", 412 .data = &sysctl_hung_task_check_interval_secs, 413 .maxlen = sizeof(unsigned long), 414 .mode = 0644, 415 .proc_handler = proc_dohung_task_timeout_secs, 416 .extra2 = (void *)&hung_task_timeout_max, 417 }, 418 { 419 .procname = "hung_task_warnings", 420 .data = &sysctl_hung_task_warnings, 421 .maxlen = sizeof(int), 422 .mode = 0644, 423 .proc_handler = proc_dointvec_minmax, 424 .extra1 = SYSCTL_NEG_ONE, 425 }, 426 { 427 .procname = "hung_task_detect_count", 428 .data = &sysctl_hung_task_detect_count, 429 .maxlen = sizeof(unsigned long), 430 .mode = 0444, 431 .proc_handler = proc_doulongvec_minmax, 432 }, 433 }; 434 435 static void __init hung_task_sysctl_init(void) 436 { 437 register_sysctl_init("kernel", hung_task_sysctls); 438 } 439 #else 440 #define hung_task_sysctl_init() do { } while (0) 441 #endif /* CONFIG_SYSCTL */ 442 443 444 static atomic_t reset_hung_task = ATOMIC_INIT(0); 445 446 void reset_hung_task_detector(void) 447 { 448 atomic_set(&reset_hung_task, 1); 449 } 450 EXPORT_SYMBOL_GPL(reset_hung_task_detector); 451 452 static bool hung_detector_suspended; 453 454 static int hungtask_pm_notify(struct notifier_block *self, 455 unsigned long action, void *hcpu) 456 { 457 switch (action) { 458 case PM_SUSPEND_PREPARE: 459 case PM_HIBERNATION_PREPARE: 460 case PM_RESTORE_PREPARE: 461 hung_detector_suspended = true; 462 break; 463 case PM_POST_SUSPEND: 464 case PM_POST_HIBERNATION: 465 case PM_POST_RESTORE: 466 hung_detector_suspended = false; 467 break; 468 default: 469 break; 470 } 471 return NOTIFY_OK; 472 } 473 474 /* 475 * kthread which checks for tasks stuck in D state 476 */ 477 static int watchdog(void *dummy) 478 { 479 unsigned long hung_last_checked = jiffies; 480 481 set_user_nice(current, 0); 482 483 for ( ; ; ) { 484 unsigned long timeout = sysctl_hung_task_timeout_secs; 485 unsigned long interval = sysctl_hung_task_check_interval_secs; 486 long t; 487 488 if (interval == 0) 489 interval = timeout; 490 interval = min_t(unsigned long, interval, timeout); 491 t = hung_timeout_jiffies(hung_last_checked, interval); 492 if (t <= 0) { 493 if (!atomic_xchg(&reset_hung_task, 0) && 494 !hung_detector_suspended) 495 check_hung_uninterruptible_tasks(timeout); 496 hung_last_checked = jiffies; 497 continue; 498 } 499 schedule_timeout_interruptible(t); 500 } 501 502 return 0; 503 } 504 505 static int __init hung_task_init(void) 506 { 507 atomic_notifier_chain_register(&panic_notifier_list, &panic_block); 508 509 /* Disable hung task detector on suspend */ 510 pm_notifier(hungtask_pm_notify, 0); 511 512 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); 513 hung_task_sysctl_init(); 514 515 return 0; 516 } 517 subsys_initcall(hung_task_init); 518