1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Detect Hung Task 4 * 5 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state 6 * 7 */ 8 9 #include <linux/mm.h> 10 #include <linux/cpu.h> 11 #include <linux/nmi.h> 12 #include <linux/init.h> 13 #include <linux/delay.h> 14 #include <linux/freezer.h> 15 #include <linux/kthread.h> 16 #include <linux/lockdep.h> 17 #include <linux/export.h> 18 #include <linux/panic_notifier.h> 19 #include <linux/sysctl.h> 20 #include <linux/suspend.h> 21 #include <linux/utsname.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/debug.h> 24 #include <linux/sched/sysctl.h> 25 #include <linux/hung_task.h> 26 #include <linux/rwsem.h> 27 28 #include <trace/events/sched.h> 29 30 /* 31 * The number of tasks checked: 32 */ 33 static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; 34 35 /* 36 * Total number of tasks detected as hung since boot: 37 */ 38 static unsigned long __read_mostly sysctl_hung_task_detect_count; 39 40 /* 41 * Limit number of tasks checked in a batch. 42 * 43 * This value controls the preemptibility of khungtaskd since preemption 44 * is disabled during the critical section. It also controls the size of 45 * the RCU grace period. So it needs to be upper-bound. 46 */ 47 #define HUNG_TASK_LOCK_BREAK (HZ / 10) 48 49 /* 50 * Zero means infinite timeout - no checking done: 51 */ 52 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; 53 EXPORT_SYMBOL_GPL(sysctl_hung_task_timeout_secs); 54 55 /* 56 * Zero (default value) means use sysctl_hung_task_timeout_secs: 57 */ 58 static unsigned long __read_mostly sysctl_hung_task_check_interval_secs; 59 60 static int __read_mostly sysctl_hung_task_warnings = 10; 61 62 static int __read_mostly did_panic; 63 static bool hung_task_show_lock; 64 static bool hung_task_call_panic; 65 static bool hung_task_show_all_bt; 66 67 static struct task_struct *watchdog_task; 68 69 #ifdef CONFIG_SMP 70 /* 71 * Should we dump all CPUs backtraces in a hung task event? 72 * Defaults to 0, can be changed via sysctl. 73 */ 74 static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; 75 #else 76 #define sysctl_hung_task_all_cpu_backtrace 0 77 #endif /* CONFIG_SMP */ 78 79 /* 80 * Should we panic (and reboot, if panic_timeout= is set) when a 81 * hung task is detected: 82 */ 83 static unsigned int __read_mostly sysctl_hung_task_panic = 84 CONFIG_BOOTPARAM_HUNG_TASK_PANIC; 85 86 static int 87 hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) 88 { 89 did_panic = 1; 90 91 return NOTIFY_DONE; 92 } 93 94 static struct notifier_block panic_block = { 95 .notifier_call = hung_task_panic, 96 }; 97 98 static bool task_is_hung(struct task_struct *t, unsigned long timeout) 99 { 100 unsigned long switch_count = t->nvcsw + t->nivcsw; 101 unsigned int state = READ_ONCE(t->__state); 102 103 /* 104 * skip the TASK_KILLABLE tasks -- these can be killed 105 * skip the TASK_IDLE tasks -- those are genuinely idle 106 * skip the TASK_FROZEN task -- it reasonably stops scheduling by freezer 107 */ 108 if (!(state & TASK_UNINTERRUPTIBLE) || 109 (state & (TASK_WAKEKILL | TASK_NOLOAD | TASK_FROZEN))) 110 return false; 111 112 /* 113 * When a freshly created task is scheduled once, changes its state to 114 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it 115 * musn't be checked. 116 */ 117 if (unlikely(!switch_count)) 118 return false; 119 120 if (switch_count != t->last_switch_count) { 121 t->last_switch_count = switch_count; 122 t->last_switch_time = jiffies; 123 return false; 124 } 125 if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) 126 return false; 127 128 return true; 129 } 130 131 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER 132 static void debug_show_blocker(struct task_struct *task, unsigned long timeout) 133 { 134 struct task_struct *g, *t; 135 unsigned long owner, blocker, blocker_type; 136 const char *rwsem_blocked_by, *rwsem_blocked_as; 137 138 RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held"); 139 140 blocker = READ_ONCE(task->blocker); 141 if (!blocker) 142 return; 143 144 blocker_type = hung_task_get_blocker_type(blocker); 145 146 switch (blocker_type) { 147 case BLOCKER_TYPE_MUTEX: 148 owner = mutex_get_owner(hung_task_blocker_to_lock(blocker)); 149 break; 150 case BLOCKER_TYPE_SEM: 151 owner = sem_last_holder(hung_task_blocker_to_lock(blocker)); 152 break; 153 case BLOCKER_TYPE_RWSEM_READER: 154 case BLOCKER_TYPE_RWSEM_WRITER: 155 owner = (unsigned long)rwsem_owner( 156 hung_task_blocker_to_lock(blocker)); 157 rwsem_blocked_as = (blocker_type == BLOCKER_TYPE_RWSEM_READER) ? 158 "reader" : "writer"; 159 rwsem_blocked_by = is_rwsem_reader_owned( 160 hung_task_blocker_to_lock(blocker)) ? 161 "reader" : "writer"; 162 break; 163 default: 164 WARN_ON_ONCE(1); 165 return; 166 } 167 168 169 if (unlikely(!owner)) { 170 switch (blocker_type) { 171 case BLOCKER_TYPE_MUTEX: 172 pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n", 173 task->comm, task->pid); 174 break; 175 case BLOCKER_TYPE_SEM: 176 pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n", 177 task->comm, task->pid); 178 break; 179 case BLOCKER_TYPE_RWSEM_READER: 180 case BLOCKER_TYPE_RWSEM_WRITER: 181 pr_err("INFO: task %s:%d is blocked on an rw-semaphore, but the owner is not found.\n", 182 task->comm, task->pid); 183 break; 184 } 185 return; 186 } 187 188 /* Ensure the owner information is correct. */ 189 for_each_process_thread(g, t) { 190 if ((unsigned long)t != owner) 191 continue; 192 193 switch (blocker_type) { 194 case BLOCKER_TYPE_MUTEX: 195 pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n", 196 task->comm, task->pid, t->comm, t->pid); 197 break; 198 case BLOCKER_TYPE_SEM: 199 pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n", 200 task->comm, task->pid, t->comm, t->pid); 201 break; 202 case BLOCKER_TYPE_RWSEM_READER: 203 case BLOCKER_TYPE_RWSEM_WRITER: 204 pr_err("INFO: task %s:%d <%s> blocked on an rw-semaphore likely owned by task %s:%d <%s>\n", 205 task->comm, task->pid, rwsem_blocked_as, t->comm, 206 t->pid, rwsem_blocked_by); 207 break; 208 } 209 /* Avoid duplicated task dump, skip if the task is also hung. */ 210 if (!task_is_hung(t, timeout)) 211 sched_show_task(t); 212 return; 213 } 214 } 215 #else 216 static inline void debug_show_blocker(struct task_struct *task, unsigned long timeout) 217 { 218 } 219 #endif 220 221 static void check_hung_task(struct task_struct *t, unsigned long timeout, 222 unsigned long prev_detect_count) 223 { 224 unsigned long total_hung_task; 225 226 if (!task_is_hung(t, timeout)) 227 return; 228 229 /* 230 * This counter tracks the total number of tasks detected as hung 231 * since boot. 232 */ 233 sysctl_hung_task_detect_count++; 234 235 total_hung_task = sysctl_hung_task_detect_count - prev_detect_count; 236 trace_sched_process_hang(t); 237 238 if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) { 239 console_verbose(); 240 hung_task_show_lock = true; 241 hung_task_call_panic = true; 242 } 243 244 /* 245 * Ok, the task did not get scheduled for more than 2 minutes, 246 * complain: 247 */ 248 if (sysctl_hung_task_warnings || hung_task_call_panic) { 249 if (sysctl_hung_task_warnings > 0) 250 sysctl_hung_task_warnings--; 251 pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", 252 t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); 253 pr_err(" %s %s %.*s\n", 254 print_tainted(), init_utsname()->release, 255 (int)strcspn(init_utsname()->version, " "), 256 init_utsname()->version); 257 if (t->flags & PF_POSTCOREDUMP) 258 pr_err(" Blocked by coredump.\n"); 259 pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" 260 " disables this message.\n"); 261 sched_show_task(t); 262 debug_show_blocker(t, timeout); 263 hung_task_show_lock = true; 264 265 if (sysctl_hung_task_all_cpu_backtrace) 266 hung_task_show_all_bt = true; 267 if (!sysctl_hung_task_warnings) 268 pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n"); 269 } 270 271 touch_nmi_watchdog(); 272 } 273 274 /* 275 * To avoid extending the RCU grace period for an unbounded amount of time, 276 * periodically exit the critical section and enter a new one. 277 * 278 * For preemptible RCU it is sufficient to call rcu_read_unlock in order 279 * to exit the grace period. For classic RCU, a reschedule is required. 280 */ 281 static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) 282 { 283 bool can_cont; 284 285 get_task_struct(g); 286 get_task_struct(t); 287 rcu_read_unlock(); 288 cond_resched(); 289 rcu_read_lock(); 290 can_cont = pid_alive(g) && pid_alive(t); 291 put_task_struct(t); 292 put_task_struct(g); 293 294 return can_cont; 295 } 296 297 /* 298 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for 299 * a really long time (120 seconds). If that happens, print out 300 * a warning. 301 */ 302 static void check_hung_uninterruptible_tasks(unsigned long timeout) 303 { 304 int max_count = sysctl_hung_task_check_count; 305 unsigned long last_break = jiffies; 306 struct task_struct *g, *t; 307 unsigned long prev_detect_count = sysctl_hung_task_detect_count; 308 309 /* 310 * If the system crashed already then all bets are off, 311 * do not report extra hung tasks: 312 */ 313 if (test_taint(TAINT_DIE) || did_panic) 314 return; 315 316 hung_task_show_lock = false; 317 rcu_read_lock(); 318 for_each_process_thread(g, t) { 319 320 if (!max_count--) 321 goto unlock; 322 if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) { 323 if (!rcu_lock_break(g, t)) 324 goto unlock; 325 last_break = jiffies; 326 } 327 328 check_hung_task(t, timeout, prev_detect_count); 329 } 330 unlock: 331 rcu_read_unlock(); 332 if (hung_task_show_lock) 333 debug_show_all_locks(); 334 335 if (hung_task_show_all_bt) { 336 hung_task_show_all_bt = false; 337 trigger_all_cpu_backtrace(); 338 } 339 340 if (hung_task_call_panic) 341 panic("hung_task: blocked tasks"); 342 } 343 344 static long hung_timeout_jiffies(unsigned long last_checked, 345 unsigned long timeout) 346 { 347 /* timeout of 0 will disable the watchdog */ 348 return timeout ? last_checked - jiffies + timeout * HZ : 349 MAX_SCHEDULE_TIMEOUT; 350 } 351 352 #ifdef CONFIG_SYSCTL 353 /* 354 * Process updating of timeout sysctl 355 */ 356 static int proc_dohung_task_timeout_secs(const struct ctl_table *table, int write, 357 void *buffer, 358 size_t *lenp, loff_t *ppos) 359 { 360 int ret; 361 362 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 363 364 if (ret || !write) 365 goto out; 366 367 wake_up_process(watchdog_task); 368 369 out: 370 return ret; 371 } 372 373 /* 374 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs 375 * and hung_task_check_interval_secs 376 */ 377 static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ); 378 static const struct ctl_table hung_task_sysctls[] = { 379 #ifdef CONFIG_SMP 380 { 381 .procname = "hung_task_all_cpu_backtrace", 382 .data = &sysctl_hung_task_all_cpu_backtrace, 383 .maxlen = sizeof(int), 384 .mode = 0644, 385 .proc_handler = proc_dointvec_minmax, 386 .extra1 = SYSCTL_ZERO, 387 .extra2 = SYSCTL_ONE, 388 }, 389 #endif /* CONFIG_SMP */ 390 { 391 .procname = "hung_task_panic", 392 .data = &sysctl_hung_task_panic, 393 .maxlen = sizeof(int), 394 .mode = 0644, 395 .proc_handler = proc_dointvec_minmax, 396 .extra1 = SYSCTL_ZERO, 397 .extra2 = SYSCTL_INT_MAX, 398 }, 399 { 400 .procname = "hung_task_check_count", 401 .data = &sysctl_hung_task_check_count, 402 .maxlen = sizeof(int), 403 .mode = 0644, 404 .proc_handler = proc_dointvec_minmax, 405 .extra1 = SYSCTL_ZERO, 406 }, 407 { 408 .procname = "hung_task_timeout_secs", 409 .data = &sysctl_hung_task_timeout_secs, 410 .maxlen = sizeof(unsigned long), 411 .mode = 0644, 412 .proc_handler = proc_dohung_task_timeout_secs, 413 .extra2 = (void *)&hung_task_timeout_max, 414 }, 415 { 416 .procname = "hung_task_check_interval_secs", 417 .data = &sysctl_hung_task_check_interval_secs, 418 .maxlen = sizeof(unsigned long), 419 .mode = 0644, 420 .proc_handler = proc_dohung_task_timeout_secs, 421 .extra2 = (void *)&hung_task_timeout_max, 422 }, 423 { 424 .procname = "hung_task_warnings", 425 .data = &sysctl_hung_task_warnings, 426 .maxlen = sizeof(int), 427 .mode = 0644, 428 .proc_handler = proc_dointvec_minmax, 429 .extra1 = SYSCTL_NEG_ONE, 430 }, 431 { 432 .procname = "hung_task_detect_count", 433 .data = &sysctl_hung_task_detect_count, 434 .maxlen = sizeof(unsigned long), 435 .mode = 0444, 436 .proc_handler = proc_doulongvec_minmax, 437 }, 438 }; 439 440 static void __init hung_task_sysctl_init(void) 441 { 442 register_sysctl_init("kernel", hung_task_sysctls); 443 } 444 #else 445 #define hung_task_sysctl_init() do { } while (0) 446 #endif /* CONFIG_SYSCTL */ 447 448 449 static atomic_t reset_hung_task = ATOMIC_INIT(0); 450 451 void reset_hung_task_detector(void) 452 { 453 atomic_set(&reset_hung_task, 1); 454 } 455 EXPORT_SYMBOL_GPL(reset_hung_task_detector); 456 457 static bool hung_detector_suspended; 458 459 static int hungtask_pm_notify(struct notifier_block *self, 460 unsigned long action, void *hcpu) 461 { 462 switch (action) { 463 case PM_SUSPEND_PREPARE: 464 case PM_HIBERNATION_PREPARE: 465 case PM_RESTORE_PREPARE: 466 hung_detector_suspended = true; 467 break; 468 case PM_POST_SUSPEND: 469 case PM_POST_HIBERNATION: 470 case PM_POST_RESTORE: 471 hung_detector_suspended = false; 472 break; 473 default: 474 break; 475 } 476 return NOTIFY_OK; 477 } 478 479 /* 480 * kthread which checks for tasks stuck in D state 481 */ 482 static int watchdog(void *dummy) 483 { 484 unsigned long hung_last_checked = jiffies; 485 486 set_user_nice(current, 0); 487 488 for ( ; ; ) { 489 unsigned long timeout = sysctl_hung_task_timeout_secs; 490 unsigned long interval = sysctl_hung_task_check_interval_secs; 491 long t; 492 493 if (interval == 0) 494 interval = timeout; 495 interval = min_t(unsigned long, interval, timeout); 496 t = hung_timeout_jiffies(hung_last_checked, interval); 497 if (t <= 0) { 498 if (!atomic_xchg(&reset_hung_task, 0) && 499 !hung_detector_suspended) 500 check_hung_uninterruptible_tasks(timeout); 501 hung_last_checked = jiffies; 502 continue; 503 } 504 schedule_timeout_interruptible(t); 505 } 506 507 return 0; 508 } 509 510 static int __init hung_task_init(void) 511 { 512 atomic_notifier_chain_register(&panic_notifier_list, &panic_block); 513 514 /* Disable hung task detector on suspend */ 515 pm_notifier(hungtask_pm_notify, 0); 516 517 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); 518 hung_task_sysctl_init(); 519 520 return 0; 521 } 522 subsys_initcall(hung_task_init); 523