1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Detect Hung Task 4 * 5 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state 6 * 7 */ 8 9 #include <linux/mm.h> 10 #include <linux/cpu.h> 11 #include <linux/nmi.h> 12 #include <linux/init.h> 13 #include <linux/delay.h> 14 #include <linux/freezer.h> 15 #include <linux/kthread.h> 16 #include <linux/lockdep.h> 17 #include <linux/export.h> 18 #include <linux/panic_notifier.h> 19 #include <linux/sysctl.h> 20 #include <linux/suspend.h> 21 #include <linux/utsname.h> 22 #include <linux/sched/signal.h> 23 #include <linux/sched/debug.h> 24 #include <linux/sched/sysctl.h> 25 #include <linux/hung_task.h> 26 #include <linux/rwsem.h> 27 28 #include <trace/events/sched.h> 29 30 /* 31 * The number of tasks checked: 32 */ 33 static int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; 34 35 /* 36 * Total number of tasks detected as hung since boot: 37 */ 38 static unsigned long __read_mostly sysctl_hung_task_detect_count; 39 40 /* 41 * Limit number of tasks checked in a batch. 42 * 43 * This value controls the preemptibility of khungtaskd since preemption 44 * is disabled during the critical section. It also controls the size of 45 * the RCU grace period. So it needs to be upper-bound. 46 */ 47 #define HUNG_TASK_LOCK_BREAK (HZ / 10) 48 49 /* 50 * Zero means infinite timeout - no checking done: 51 */ 52 unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; 53 54 /* 55 * Zero (default value) means use sysctl_hung_task_timeout_secs: 56 */ 57 static unsigned long __read_mostly sysctl_hung_task_check_interval_secs; 58 59 static int __read_mostly sysctl_hung_task_warnings = 10; 60 61 static int __read_mostly did_panic; 62 static bool hung_task_show_lock; 63 static bool hung_task_call_panic; 64 static bool hung_task_show_all_bt; 65 66 static struct task_struct *watchdog_task; 67 68 #ifdef CONFIG_SMP 69 /* 70 * Should we dump all CPUs backtraces in a hung task event? 71 * Defaults to 0, can be changed via sysctl. 72 */ 73 static unsigned int __read_mostly sysctl_hung_task_all_cpu_backtrace; 74 #else 75 #define sysctl_hung_task_all_cpu_backtrace 0 76 #endif /* CONFIG_SMP */ 77 78 /* 79 * Should we panic (and reboot, if panic_timeout= is set) when a 80 * hung task is detected: 81 */ 82 static unsigned int __read_mostly sysctl_hung_task_panic = 83 CONFIG_BOOTPARAM_HUNG_TASK_PANIC; 84 85 static int 86 hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr) 87 { 88 did_panic = 1; 89 90 return NOTIFY_DONE; 91 } 92 93 static struct notifier_block panic_block = { 94 .notifier_call = hung_task_panic, 95 }; 96 97 static bool task_is_hung(struct task_struct *t, unsigned long timeout) 98 { 99 unsigned long switch_count = t->nvcsw + t->nivcsw; 100 unsigned int state = READ_ONCE(t->__state); 101 102 /* 103 * skip the TASK_KILLABLE tasks -- these can be killed 104 * skip the TASK_IDLE tasks -- those are genuinely idle 105 * skip the TASK_FROZEN task -- it reasonably stops scheduling by freezer 106 */ 107 if (!(state & TASK_UNINTERRUPTIBLE) || 108 (state & (TASK_WAKEKILL | TASK_NOLOAD | TASK_FROZEN))) 109 return false; 110 111 /* 112 * When a freshly created task is scheduled once, changes its state to 113 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it 114 * musn't be checked. 115 */ 116 if (unlikely(!switch_count)) 117 return false; 118 119 if (switch_count != t->last_switch_count) { 120 t->last_switch_count = switch_count; 121 t->last_switch_time = jiffies; 122 return false; 123 } 124 if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) 125 return false; 126 127 return true; 128 } 129 130 #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER 131 static void debug_show_blocker(struct task_struct *task, unsigned long timeout) 132 { 133 struct task_struct *g, *t; 134 unsigned long owner, blocker, blocker_type; 135 const char *rwsem_blocked_by, *rwsem_blocked_as; 136 137 RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "No rcu lock held"); 138 139 blocker = READ_ONCE(task->blocker); 140 if (!blocker) 141 return; 142 143 blocker_type = hung_task_get_blocker_type(blocker); 144 145 switch (blocker_type) { 146 case BLOCKER_TYPE_MUTEX: 147 owner = mutex_get_owner(hung_task_blocker_to_lock(blocker)); 148 break; 149 case BLOCKER_TYPE_SEM: 150 owner = sem_last_holder(hung_task_blocker_to_lock(blocker)); 151 break; 152 case BLOCKER_TYPE_RWSEM_READER: 153 case BLOCKER_TYPE_RWSEM_WRITER: 154 owner = (unsigned long)rwsem_owner( 155 hung_task_blocker_to_lock(blocker)); 156 rwsem_blocked_as = (blocker_type == BLOCKER_TYPE_RWSEM_READER) ? 157 "reader" : "writer"; 158 rwsem_blocked_by = is_rwsem_reader_owned( 159 hung_task_blocker_to_lock(blocker)) ? 160 "reader" : "writer"; 161 break; 162 default: 163 WARN_ON_ONCE(1); 164 return; 165 } 166 167 168 if (unlikely(!owner)) { 169 switch (blocker_type) { 170 case BLOCKER_TYPE_MUTEX: 171 pr_err("INFO: task %s:%d is blocked on a mutex, but the owner is not found.\n", 172 task->comm, task->pid); 173 break; 174 case BLOCKER_TYPE_SEM: 175 pr_err("INFO: task %s:%d is blocked on a semaphore, but the last holder is not found.\n", 176 task->comm, task->pid); 177 break; 178 case BLOCKER_TYPE_RWSEM_READER: 179 case BLOCKER_TYPE_RWSEM_WRITER: 180 pr_err("INFO: task %s:%d is blocked on an rw-semaphore, but the owner is not found.\n", 181 task->comm, task->pid); 182 break; 183 } 184 return; 185 } 186 187 /* Ensure the owner information is correct. */ 188 for_each_process_thread(g, t) { 189 if ((unsigned long)t != owner) 190 continue; 191 192 switch (blocker_type) { 193 case BLOCKER_TYPE_MUTEX: 194 pr_err("INFO: task %s:%d is blocked on a mutex likely owned by task %s:%d.\n", 195 task->comm, task->pid, t->comm, t->pid); 196 break; 197 case BLOCKER_TYPE_SEM: 198 pr_err("INFO: task %s:%d blocked on a semaphore likely last held by task %s:%d\n", 199 task->comm, task->pid, t->comm, t->pid); 200 break; 201 case BLOCKER_TYPE_RWSEM_READER: 202 case BLOCKER_TYPE_RWSEM_WRITER: 203 pr_err("INFO: task %s:%d <%s> blocked on an rw-semaphore likely owned by task %s:%d <%s>\n", 204 task->comm, task->pid, rwsem_blocked_as, t->comm, 205 t->pid, rwsem_blocked_by); 206 break; 207 } 208 /* Avoid duplicated task dump, skip if the task is also hung. */ 209 if (!task_is_hung(t, timeout)) 210 sched_show_task(t); 211 return; 212 } 213 } 214 #else 215 static inline void debug_show_blocker(struct task_struct *task, unsigned long timeout) 216 { 217 } 218 #endif 219 220 static void check_hung_task(struct task_struct *t, unsigned long timeout, 221 unsigned long prev_detect_count) 222 { 223 unsigned long total_hung_task; 224 225 if (!task_is_hung(t, timeout)) 226 return; 227 228 /* 229 * This counter tracks the total number of tasks detected as hung 230 * since boot. 231 */ 232 sysctl_hung_task_detect_count++; 233 234 total_hung_task = sysctl_hung_task_detect_count - prev_detect_count; 235 trace_sched_process_hang(t); 236 237 if (sysctl_hung_task_panic && total_hung_task >= sysctl_hung_task_panic) { 238 console_verbose(); 239 hung_task_show_lock = true; 240 hung_task_call_panic = true; 241 } 242 243 /* 244 * Ok, the task did not get scheduled for more than 2 minutes, 245 * complain: 246 */ 247 if (sysctl_hung_task_warnings || hung_task_call_panic) { 248 if (sysctl_hung_task_warnings > 0) 249 sysctl_hung_task_warnings--; 250 pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", 251 t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); 252 pr_err(" %s %s %.*s\n", 253 print_tainted(), init_utsname()->release, 254 (int)strcspn(init_utsname()->version, " "), 255 init_utsname()->version); 256 if (t->flags & PF_POSTCOREDUMP) 257 pr_err(" Blocked by coredump.\n"); 258 pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" 259 " disables this message.\n"); 260 sched_show_task(t); 261 debug_show_blocker(t, timeout); 262 hung_task_show_lock = true; 263 264 if (sysctl_hung_task_all_cpu_backtrace) 265 hung_task_show_all_bt = true; 266 if (!sysctl_hung_task_warnings) 267 pr_info("Future hung task reports are suppressed, see sysctl kernel.hung_task_warnings\n"); 268 } 269 270 touch_nmi_watchdog(); 271 } 272 273 /* 274 * To avoid extending the RCU grace period for an unbounded amount of time, 275 * periodically exit the critical section and enter a new one. 276 * 277 * For preemptible RCU it is sufficient to call rcu_read_unlock in order 278 * to exit the grace period. For classic RCU, a reschedule is required. 279 */ 280 static bool rcu_lock_break(struct task_struct *g, struct task_struct *t) 281 { 282 bool can_cont; 283 284 get_task_struct(g); 285 get_task_struct(t); 286 rcu_read_unlock(); 287 cond_resched(); 288 rcu_read_lock(); 289 can_cont = pid_alive(g) && pid_alive(t); 290 put_task_struct(t); 291 put_task_struct(g); 292 293 return can_cont; 294 } 295 296 /* 297 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for 298 * a really long time (120 seconds). If that happens, print out 299 * a warning. 300 */ 301 static void check_hung_uninterruptible_tasks(unsigned long timeout) 302 { 303 int max_count = sysctl_hung_task_check_count; 304 unsigned long last_break = jiffies; 305 struct task_struct *g, *t; 306 unsigned long prev_detect_count = sysctl_hung_task_detect_count; 307 308 /* 309 * If the system crashed already then all bets are off, 310 * do not report extra hung tasks: 311 */ 312 if (test_taint(TAINT_DIE) || did_panic) 313 return; 314 315 hung_task_show_lock = false; 316 rcu_read_lock(); 317 for_each_process_thread(g, t) { 318 319 if (!max_count--) 320 goto unlock; 321 if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) { 322 if (!rcu_lock_break(g, t)) 323 goto unlock; 324 last_break = jiffies; 325 } 326 327 check_hung_task(t, timeout, prev_detect_count); 328 } 329 unlock: 330 rcu_read_unlock(); 331 if (hung_task_show_lock) 332 debug_show_all_locks(); 333 334 if (hung_task_show_all_bt) { 335 hung_task_show_all_bt = false; 336 trigger_all_cpu_backtrace(); 337 } 338 339 if (hung_task_call_panic) 340 panic("hung_task: blocked tasks"); 341 } 342 343 static long hung_timeout_jiffies(unsigned long last_checked, 344 unsigned long timeout) 345 { 346 /* timeout of 0 will disable the watchdog */ 347 return timeout ? last_checked - jiffies + timeout * HZ : 348 MAX_SCHEDULE_TIMEOUT; 349 } 350 351 #ifdef CONFIG_SYSCTL 352 /* 353 * Process updating of timeout sysctl 354 */ 355 static int proc_dohung_task_timeout_secs(const struct ctl_table *table, int write, 356 void *buffer, 357 size_t *lenp, loff_t *ppos) 358 { 359 int ret; 360 361 ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos); 362 363 if (ret || !write) 364 goto out; 365 366 wake_up_process(watchdog_task); 367 368 out: 369 return ret; 370 } 371 372 /* 373 * This is needed for proc_doulongvec_minmax of sysctl_hung_task_timeout_secs 374 * and hung_task_check_interval_secs 375 */ 376 static const unsigned long hung_task_timeout_max = (LONG_MAX / HZ); 377 static const struct ctl_table hung_task_sysctls[] = { 378 #ifdef CONFIG_SMP 379 { 380 .procname = "hung_task_all_cpu_backtrace", 381 .data = &sysctl_hung_task_all_cpu_backtrace, 382 .maxlen = sizeof(int), 383 .mode = 0644, 384 .proc_handler = proc_dointvec_minmax, 385 .extra1 = SYSCTL_ZERO, 386 .extra2 = SYSCTL_ONE, 387 }, 388 #endif /* CONFIG_SMP */ 389 { 390 .procname = "hung_task_panic", 391 .data = &sysctl_hung_task_panic, 392 .maxlen = sizeof(int), 393 .mode = 0644, 394 .proc_handler = proc_dointvec_minmax, 395 .extra1 = SYSCTL_ZERO, 396 .extra2 = SYSCTL_INT_MAX, 397 }, 398 { 399 .procname = "hung_task_check_count", 400 .data = &sysctl_hung_task_check_count, 401 .maxlen = sizeof(int), 402 .mode = 0644, 403 .proc_handler = proc_dointvec_minmax, 404 .extra1 = SYSCTL_ZERO, 405 }, 406 { 407 .procname = "hung_task_timeout_secs", 408 .data = &sysctl_hung_task_timeout_secs, 409 .maxlen = sizeof(unsigned long), 410 .mode = 0644, 411 .proc_handler = proc_dohung_task_timeout_secs, 412 .extra2 = (void *)&hung_task_timeout_max, 413 }, 414 { 415 .procname = "hung_task_check_interval_secs", 416 .data = &sysctl_hung_task_check_interval_secs, 417 .maxlen = sizeof(unsigned long), 418 .mode = 0644, 419 .proc_handler = proc_dohung_task_timeout_secs, 420 .extra2 = (void *)&hung_task_timeout_max, 421 }, 422 { 423 .procname = "hung_task_warnings", 424 .data = &sysctl_hung_task_warnings, 425 .maxlen = sizeof(int), 426 .mode = 0644, 427 .proc_handler = proc_dointvec_minmax, 428 .extra1 = SYSCTL_NEG_ONE, 429 }, 430 { 431 .procname = "hung_task_detect_count", 432 .data = &sysctl_hung_task_detect_count, 433 .maxlen = sizeof(unsigned long), 434 .mode = 0444, 435 .proc_handler = proc_doulongvec_minmax, 436 }, 437 }; 438 439 static void __init hung_task_sysctl_init(void) 440 { 441 register_sysctl_init("kernel", hung_task_sysctls); 442 } 443 #else 444 #define hung_task_sysctl_init() do { } while (0) 445 #endif /* CONFIG_SYSCTL */ 446 447 448 static atomic_t reset_hung_task = ATOMIC_INIT(0); 449 450 void reset_hung_task_detector(void) 451 { 452 atomic_set(&reset_hung_task, 1); 453 } 454 EXPORT_SYMBOL_GPL(reset_hung_task_detector); 455 456 static bool hung_detector_suspended; 457 458 static int hungtask_pm_notify(struct notifier_block *self, 459 unsigned long action, void *hcpu) 460 { 461 switch (action) { 462 case PM_SUSPEND_PREPARE: 463 case PM_HIBERNATION_PREPARE: 464 case PM_RESTORE_PREPARE: 465 hung_detector_suspended = true; 466 break; 467 case PM_POST_SUSPEND: 468 case PM_POST_HIBERNATION: 469 case PM_POST_RESTORE: 470 hung_detector_suspended = false; 471 break; 472 default: 473 break; 474 } 475 return NOTIFY_OK; 476 } 477 478 /* 479 * kthread which checks for tasks stuck in D state 480 */ 481 static int watchdog(void *dummy) 482 { 483 unsigned long hung_last_checked = jiffies; 484 485 set_user_nice(current, 0); 486 487 for ( ; ; ) { 488 unsigned long timeout = sysctl_hung_task_timeout_secs; 489 unsigned long interval = sysctl_hung_task_check_interval_secs; 490 long t; 491 492 if (interval == 0) 493 interval = timeout; 494 interval = min_t(unsigned long, interval, timeout); 495 t = hung_timeout_jiffies(hung_last_checked, interval); 496 if (t <= 0) { 497 if (!atomic_xchg(&reset_hung_task, 0) && 498 !hung_detector_suspended) 499 check_hung_uninterruptible_tasks(timeout); 500 hung_last_checked = jiffies; 501 continue; 502 } 503 schedule_timeout_interruptible(t); 504 } 505 506 return 0; 507 } 508 509 static int __init hung_task_init(void) 510 { 511 atomic_notifier_chain_register(&panic_notifier_list, &panic_block); 512 513 /* Disable hung task detector on suspend */ 514 pm_notifier(hungtask_pm_notify, 0); 515 516 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); 517 hung_task_sysctl_init(); 518 519 return 0; 520 } 521 subsys_initcall(hung_task_init); 522