1 /* 2 * linux/mm/oom_kill.c 3 * 4 * Copyright (C) 1998,2000 Rik van Riel 5 * Thanks go out to Claus Fischer for some serious inspiration and 6 * for goading me into coding this file... 7 * Copyright (C) 2010 Google, Inc. 8 * Rewritten by David Rientjes 9 * 10 * The routines in this file are used to kill a process when 11 * we're seriously out of memory. This gets called from __alloc_pages() 12 * in mm/page_alloc.c when we really run out of memory. 13 * 14 * Since we won't call these routines often (on a well-configured 15 * machine) this file will double as a 'coding guide' and a signpost 16 * for newbie kernel hackers. It features several pointers to major 17 * kernel subsystems and hints as to where to find out what things do. 18 */ 19 20 #include <linux/oom.h> 21 #include <linux/mm.h> 22 #include <linux/err.h> 23 #include <linux/gfp.h> 24 #include <linux/sched.h> 25 #include <linux/swap.h> 26 #include <linux/timex.h> 27 #include <linux/jiffies.h> 28 #include <linux/cpuset.h> 29 #include <linux/module.h> 30 #include <linux/notifier.h> 31 #include <linux/memcontrol.h> 32 #include <linux/mempolicy.h> 33 #include <linux/security.h> 34 #include <linux/ptrace.h> 35 36 int sysctl_panic_on_oom; 37 int sysctl_oom_kill_allocating_task; 38 int sysctl_oom_dump_tasks = 1; 39 static DEFINE_SPINLOCK(zone_scan_lock); 40 41 #ifdef CONFIG_NUMA 42 /** 43 * has_intersects_mems_allowed() - check task eligiblity for kill 44 * @tsk: task struct of which task to consider 45 * @mask: nodemask passed to page allocator for mempolicy ooms 46 * 47 * Task eligibility is determined by whether or not a candidate task, @tsk, 48 * shares the same mempolicy nodes as current if it is bound by such a policy 49 * and whether or not it has the same set of allowed cpuset nodes. 50 */ 51 static bool has_intersects_mems_allowed(struct task_struct *tsk, 52 const nodemask_t *mask) 53 { 54 struct task_struct *start = tsk; 55 56 do { 57 if (mask) { 58 /* 59 * If this is a mempolicy constrained oom, tsk's 60 * cpuset is irrelevant. Only return true if its 61 * mempolicy intersects current, otherwise it may be 62 * needlessly killed. 63 */ 64 if (mempolicy_nodemask_intersects(tsk, mask)) 65 return true; 66 } else { 67 /* 68 * This is not a mempolicy constrained oom, so only 69 * check the mems of tsk's cpuset. 70 */ 71 if (cpuset_mems_allowed_intersects(current, tsk)) 72 return true; 73 } 74 } while_each_thread(start, tsk); 75 76 return false; 77 } 78 #else 79 static bool has_intersects_mems_allowed(struct task_struct *tsk, 80 const nodemask_t *mask) 81 { 82 return true; 83 } 84 #endif /* CONFIG_NUMA */ 85 86 /* 87 * The process p may have detached its own ->mm while exiting or through 88 * use_mm(), but one or more of its subthreads may still have a valid 89 * pointer. Return p, or any of its subthreads with a valid ->mm, with 90 * task_lock() held. 91 */ 92 struct task_struct *find_lock_task_mm(struct task_struct *p) 93 { 94 struct task_struct *t = p; 95 96 do { 97 task_lock(t); 98 if (likely(t->mm)) 99 return t; 100 task_unlock(t); 101 } while_each_thread(p, t); 102 103 return NULL; 104 } 105 106 /* return true if the task is not adequate as candidate victim task. */ 107 static bool oom_unkillable_task(struct task_struct *p, 108 const struct mem_cgroup *mem, const nodemask_t *nodemask) 109 { 110 if (is_global_init(p)) 111 return true; 112 if (p->flags & PF_KTHREAD) 113 return true; 114 115 /* When mem_cgroup_out_of_memory() and p is not member of the group */ 116 if (mem && !task_in_mem_cgroup(p, mem)) 117 return true; 118 119 /* p may not have freeable memory in nodemask */ 120 if (!has_intersects_mems_allowed(p, nodemask)) 121 return true; 122 123 return false; 124 } 125 126 /** 127 * oom_badness - heuristic function to determine which candidate task to kill 128 * @p: task struct of which task we should calculate 129 * @totalpages: total present RAM allowed for page allocation 130 * 131 * The heuristic for determining which task to kill is made to be as simple and 132 * predictable as possible. The goal is to return the highest value for the 133 * task consuming the most memory to avoid subsequent oom failures. 134 */ 135 unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem, 136 const nodemask_t *nodemask, unsigned long totalpages) 137 { 138 int points; 139 140 if (oom_unkillable_task(p, mem, nodemask)) 141 return 0; 142 143 p = find_lock_task_mm(p); 144 if (!p) 145 return 0; 146 147 /* 148 * Shortcut check for a thread sharing p->mm that is OOM_SCORE_ADJ_MIN 149 * so the entire heuristic doesn't need to be executed for something 150 * that cannot be killed. 151 */ 152 if (atomic_read(&p->mm->oom_disable_count)) { 153 task_unlock(p); 154 return 0; 155 } 156 157 /* 158 * When the PF_OOM_ORIGIN bit is set, it indicates the task should have 159 * priority for oom killing. 160 */ 161 if (p->flags & PF_OOM_ORIGIN) { 162 task_unlock(p); 163 return 1000; 164 } 165 166 /* 167 * The memory controller may have a limit of 0 bytes, so avoid a divide 168 * by zero, if necessary. 169 */ 170 if (!totalpages) 171 totalpages = 1; 172 173 /* 174 * The baseline for the badness score is the proportion of RAM that each 175 * task's rss, pagetable and swap space use. 176 */ 177 points = get_mm_rss(p->mm) + p->mm->nr_ptes; 178 points += get_mm_counter(p->mm, MM_SWAPENTS); 179 180 points *= 1000; 181 points /= totalpages; 182 task_unlock(p); 183 184 /* 185 * Root processes get 3% bonus, just like the __vm_enough_memory() 186 * implementation used by LSMs. 187 */ 188 if (has_capability_noaudit(p, CAP_SYS_ADMIN)) 189 points -= 30; 190 191 /* 192 * /proc/pid/oom_score_adj ranges from -1000 to +1000 such that it may 193 * either completely disable oom killing or always prefer a certain 194 * task. 195 */ 196 points += p->signal->oom_score_adj; 197 198 /* 199 * Never return 0 for an eligible task that may be killed since it's 200 * possible that no single user task uses more than 0.1% of memory and 201 * no single admin tasks uses more than 3.0%. 202 */ 203 if (points <= 0) 204 return 1; 205 return (points < 1000) ? points : 1000; 206 } 207 208 /* 209 * Determine the type of allocation constraint. 210 */ 211 #ifdef CONFIG_NUMA 212 static enum oom_constraint constrained_alloc(struct zonelist *zonelist, 213 gfp_t gfp_mask, nodemask_t *nodemask, 214 unsigned long *totalpages) 215 { 216 struct zone *zone; 217 struct zoneref *z; 218 enum zone_type high_zoneidx = gfp_zone(gfp_mask); 219 bool cpuset_limited = false; 220 int nid; 221 222 /* Default to all available memory */ 223 *totalpages = totalram_pages + total_swap_pages; 224 225 if (!zonelist) 226 return CONSTRAINT_NONE; 227 /* 228 * Reach here only when __GFP_NOFAIL is used. So, we should avoid 229 * to kill current.We have to random task kill in this case. 230 * Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now. 231 */ 232 if (gfp_mask & __GFP_THISNODE) 233 return CONSTRAINT_NONE; 234 235 /* 236 * This is not a __GFP_THISNODE allocation, so a truncated nodemask in 237 * the page allocator means a mempolicy is in effect. Cpuset policy 238 * is enforced in get_page_from_freelist(). 239 */ 240 if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask)) { 241 *totalpages = total_swap_pages; 242 for_each_node_mask(nid, *nodemask) 243 *totalpages += node_spanned_pages(nid); 244 return CONSTRAINT_MEMORY_POLICY; 245 } 246 247 /* Check this allocation failure is caused by cpuset's wall function */ 248 for_each_zone_zonelist_nodemask(zone, z, zonelist, 249 high_zoneidx, nodemask) 250 if (!cpuset_zone_allowed_softwall(zone, gfp_mask)) 251 cpuset_limited = true; 252 253 if (cpuset_limited) { 254 *totalpages = total_swap_pages; 255 for_each_node_mask(nid, cpuset_current_mems_allowed) 256 *totalpages += node_spanned_pages(nid); 257 return CONSTRAINT_CPUSET; 258 } 259 return CONSTRAINT_NONE; 260 } 261 #else 262 static enum oom_constraint constrained_alloc(struct zonelist *zonelist, 263 gfp_t gfp_mask, nodemask_t *nodemask, 264 unsigned long *totalpages) 265 { 266 *totalpages = totalram_pages + total_swap_pages; 267 return CONSTRAINT_NONE; 268 } 269 #endif 270 271 /* 272 * Simple selection loop. We chose the process with the highest 273 * number of 'points'. We expect the caller will lock the tasklist. 274 * 275 * (not docbooked, we don't want this one cluttering up the manual) 276 */ 277 static struct task_struct *select_bad_process(unsigned int *ppoints, 278 unsigned long totalpages, struct mem_cgroup *mem, 279 const nodemask_t *nodemask) 280 { 281 struct task_struct *g, *p; 282 struct task_struct *chosen = NULL; 283 *ppoints = 0; 284 285 do_each_thread(g, p) { 286 unsigned int points; 287 288 if (!p->mm) 289 continue; 290 if (oom_unkillable_task(p, mem, nodemask)) 291 continue; 292 293 /* 294 * This task already has access to memory reserves and is 295 * being killed. Don't allow any other task access to the 296 * memory reserve. 297 * 298 * Note: this may have a chance of deadlock if it gets 299 * blocked waiting for another task which itself is waiting 300 * for memory. Is there a better alternative? 301 */ 302 if (test_tsk_thread_flag(p, TIF_MEMDIE)) 303 return ERR_PTR(-1UL); 304 305 if (p->flags & PF_EXITING) { 306 /* 307 * If p is the current task and is in the process of 308 * releasing memory, we allow the "kill" to set 309 * TIF_MEMDIE, which will allow it to gain access to 310 * memory reserves. Otherwise, it may stall forever. 311 * 312 * The loop isn't broken here, however, in case other 313 * threads are found to have already been oom killed. 314 */ 315 if (p == current) { 316 chosen = p; 317 *ppoints = 1000; 318 } else { 319 /* 320 * If this task is not being ptraced on exit, 321 * then wait for it to finish before killing 322 * some other task unnecessarily. 323 */ 324 if (!(task_ptrace(p->group_leader) & 325 PT_TRACE_EXIT)) 326 return ERR_PTR(-1UL); 327 } 328 } 329 330 points = oom_badness(p, mem, nodemask, totalpages); 331 if (points > *ppoints) { 332 chosen = p; 333 *ppoints = points; 334 } 335 } while_each_thread(g, p); 336 337 return chosen; 338 } 339 340 /** 341 * dump_tasks - dump current memory state of all system tasks 342 * @mem: current's memory controller, if constrained 343 * @nodemask: nodemask passed to page allocator for mempolicy ooms 344 * 345 * Dumps the current memory state of all eligible tasks. Tasks not in the same 346 * memcg, not in the same cpuset, or bound to a disjoint set of mempolicy nodes 347 * are not shown. 348 * State information includes task's pid, uid, tgid, vm size, rss, cpu, oom_adj 349 * value, oom_score_adj value, and name. 350 * 351 * Call with tasklist_lock read-locked. 352 */ 353 static void dump_tasks(const struct mem_cgroup *mem, const nodemask_t *nodemask) 354 { 355 struct task_struct *p; 356 struct task_struct *task; 357 358 pr_info("[ pid ] uid tgid total_vm rss cpu oom_adj oom_score_adj name\n"); 359 for_each_process(p) { 360 if (oom_unkillable_task(p, mem, nodemask)) 361 continue; 362 363 task = find_lock_task_mm(p); 364 if (!task) { 365 /* 366 * This is a kthread or all of p's threads have already 367 * detached their mm's. There's no need to report 368 * them; they can't be oom killed anyway. 369 */ 370 continue; 371 } 372 373 pr_info("[%5d] %5d %5d %8lu %8lu %3u %3d %5d %s\n", 374 task->pid, task_uid(task), task->tgid, 375 task->mm->total_vm, get_mm_rss(task->mm), 376 task_cpu(task), task->signal->oom_adj, 377 task->signal->oom_score_adj, task->comm); 378 task_unlock(task); 379 } 380 } 381 382 static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, 383 struct mem_cgroup *mem, const nodemask_t *nodemask) 384 { 385 task_lock(current); 386 pr_warning("%s invoked oom-killer: gfp_mask=0x%x, order=%d, " 387 "oom_adj=%d, oom_score_adj=%d\n", 388 current->comm, gfp_mask, order, current->signal->oom_adj, 389 current->signal->oom_score_adj); 390 cpuset_print_task_mems_allowed(current); 391 task_unlock(current); 392 dump_stack(); 393 mem_cgroup_print_oom_info(mem, p); 394 show_mem(SHOW_MEM_FILTER_NODES); 395 if (sysctl_oom_dump_tasks) 396 dump_tasks(mem, nodemask); 397 } 398 399 #define K(x) ((x) << (PAGE_SHIFT-10)) 400 static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) 401 { 402 struct task_struct *q; 403 struct mm_struct *mm; 404 405 p = find_lock_task_mm(p); 406 if (!p) 407 return 1; 408 409 /* mm cannot be safely dereferenced after task_unlock(p) */ 410 mm = p->mm; 411 412 pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n", 413 task_pid_nr(p), p->comm, K(p->mm->total_vm), 414 K(get_mm_counter(p->mm, MM_ANONPAGES)), 415 K(get_mm_counter(p->mm, MM_FILEPAGES))); 416 task_unlock(p); 417 418 /* 419 * Kill all processes sharing p->mm in other thread groups, if any. 420 * They don't get access to memory reserves or a higher scheduler 421 * priority, though, to avoid depletion of all memory or task 422 * starvation. This prevents mm->mmap_sem livelock when an oom killed 423 * task cannot exit because it requires the semaphore and its contended 424 * by another thread trying to allocate memory itself. That thread will 425 * now get access to memory reserves since it has a pending fatal 426 * signal. 427 */ 428 for_each_process(q) 429 if (q->mm == mm && !same_thread_group(q, p)) { 430 task_lock(q); /* Protect ->comm from prctl() */ 431 pr_err("Kill process %d (%s) sharing same memory\n", 432 task_pid_nr(q), q->comm); 433 task_unlock(q); 434 force_sig(SIGKILL, q); 435 } 436 437 set_tsk_thread_flag(p, TIF_MEMDIE); 438 force_sig(SIGKILL, p); 439 440 return 0; 441 } 442 #undef K 443 444 static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, 445 unsigned int points, unsigned long totalpages, 446 struct mem_cgroup *mem, nodemask_t *nodemask, 447 const char *message) 448 { 449 struct task_struct *victim = p; 450 struct task_struct *child; 451 struct task_struct *t = p; 452 unsigned int victim_points = 0; 453 454 if (printk_ratelimit()) 455 dump_header(p, gfp_mask, order, mem, nodemask); 456 457 /* 458 * If the task is already exiting, don't alarm the sysadmin or kill 459 * its children or threads, just set TIF_MEMDIE so it can die quickly 460 */ 461 if (p->flags & PF_EXITING) { 462 set_tsk_thread_flag(p, TIF_MEMDIE); 463 return 0; 464 } 465 466 task_lock(p); 467 pr_err("%s: Kill process %d (%s) score %d or sacrifice child\n", 468 message, task_pid_nr(p), p->comm, points); 469 task_unlock(p); 470 471 /* 472 * If any of p's children has a different mm and is eligible for kill, 473 * the one with the highest badness() score is sacrificed for its 474 * parent. This attempts to lose the minimal amount of work done while 475 * still freeing memory. 476 */ 477 do { 478 list_for_each_entry(child, &t->children, sibling) { 479 unsigned int child_points; 480 481 if (child->mm == p->mm) 482 continue; 483 /* 484 * oom_badness() returns 0 if the thread is unkillable 485 */ 486 child_points = oom_badness(child, mem, nodemask, 487 totalpages); 488 if (child_points > victim_points) { 489 victim = child; 490 victim_points = child_points; 491 } 492 } 493 } while_each_thread(p, t); 494 495 return oom_kill_task(victim, mem); 496 } 497 498 /* 499 * Determines whether the kernel must panic because of the panic_on_oom sysctl. 500 */ 501 static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, 502 int order, const nodemask_t *nodemask) 503 { 504 if (likely(!sysctl_panic_on_oom)) 505 return; 506 if (sysctl_panic_on_oom != 2) { 507 /* 508 * panic_on_oom == 1 only affects CONSTRAINT_NONE, the kernel 509 * does not panic for cpuset, mempolicy, or memcg allocation 510 * failures. 511 */ 512 if (constraint != CONSTRAINT_NONE) 513 return; 514 } 515 read_lock(&tasklist_lock); 516 dump_header(NULL, gfp_mask, order, NULL, nodemask); 517 read_unlock(&tasklist_lock); 518 panic("Out of memory: %s panic_on_oom is enabled\n", 519 sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); 520 } 521 522 #ifdef CONFIG_CGROUP_MEM_RES_CTLR 523 void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) 524 { 525 unsigned long limit; 526 unsigned int points = 0; 527 struct task_struct *p; 528 529 /* 530 * If current has a pending SIGKILL, then automatically select it. The 531 * goal is to allow it to allocate so that it may quickly exit and free 532 * its memory. 533 */ 534 if (fatal_signal_pending(current)) { 535 set_thread_flag(TIF_MEMDIE); 536 return; 537 } 538 539 check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, 0, NULL); 540 limit = mem_cgroup_get_limit(mem) >> PAGE_SHIFT; 541 read_lock(&tasklist_lock); 542 retry: 543 p = select_bad_process(&points, limit, mem, NULL); 544 if (!p || PTR_ERR(p) == -1UL) 545 goto out; 546 547 if (oom_kill_process(p, gfp_mask, 0, points, limit, mem, NULL, 548 "Memory cgroup out of memory")) 549 goto retry; 550 out: 551 read_unlock(&tasklist_lock); 552 } 553 #endif 554 555 static BLOCKING_NOTIFIER_HEAD(oom_notify_list); 556 557 int register_oom_notifier(struct notifier_block *nb) 558 { 559 return blocking_notifier_chain_register(&oom_notify_list, nb); 560 } 561 EXPORT_SYMBOL_GPL(register_oom_notifier); 562 563 int unregister_oom_notifier(struct notifier_block *nb) 564 { 565 return blocking_notifier_chain_unregister(&oom_notify_list, nb); 566 } 567 EXPORT_SYMBOL_GPL(unregister_oom_notifier); 568 569 /* 570 * Try to acquire the OOM killer lock for the zones in zonelist. Returns zero 571 * if a parallel OOM killing is already taking place that includes a zone in 572 * the zonelist. Otherwise, locks all zones in the zonelist and returns 1. 573 */ 574 int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) 575 { 576 struct zoneref *z; 577 struct zone *zone; 578 int ret = 1; 579 580 spin_lock(&zone_scan_lock); 581 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 582 if (zone_is_oom_locked(zone)) { 583 ret = 0; 584 goto out; 585 } 586 } 587 588 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 589 /* 590 * Lock each zone in the zonelist under zone_scan_lock so a 591 * parallel invocation of try_set_zonelist_oom() doesn't succeed 592 * when it shouldn't. 593 */ 594 zone_set_flag(zone, ZONE_OOM_LOCKED); 595 } 596 597 out: 598 spin_unlock(&zone_scan_lock); 599 return ret; 600 } 601 602 /* 603 * Clears the ZONE_OOM_LOCKED flag for all zones in the zonelist so that failed 604 * allocation attempts with zonelists containing them may now recall the OOM 605 * killer, if necessary. 606 */ 607 void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_mask) 608 { 609 struct zoneref *z; 610 struct zone *zone; 611 612 spin_lock(&zone_scan_lock); 613 for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask)) { 614 zone_clear_flag(zone, ZONE_OOM_LOCKED); 615 } 616 spin_unlock(&zone_scan_lock); 617 } 618 619 /* 620 * Try to acquire the oom killer lock for all system zones. Returns zero if a 621 * parallel oom killing is taking place, otherwise locks all zones and returns 622 * non-zero. 623 */ 624 static int try_set_system_oom(void) 625 { 626 struct zone *zone; 627 int ret = 1; 628 629 spin_lock(&zone_scan_lock); 630 for_each_populated_zone(zone) 631 if (zone_is_oom_locked(zone)) { 632 ret = 0; 633 goto out; 634 } 635 for_each_populated_zone(zone) 636 zone_set_flag(zone, ZONE_OOM_LOCKED); 637 out: 638 spin_unlock(&zone_scan_lock); 639 return ret; 640 } 641 642 /* 643 * Clears ZONE_OOM_LOCKED for all system zones so that failed allocation 644 * attempts or page faults may now recall the oom killer, if necessary. 645 */ 646 static void clear_system_oom(void) 647 { 648 struct zone *zone; 649 650 spin_lock(&zone_scan_lock); 651 for_each_populated_zone(zone) 652 zone_clear_flag(zone, ZONE_OOM_LOCKED); 653 spin_unlock(&zone_scan_lock); 654 } 655 656 /** 657 * out_of_memory - kill the "best" process when we run out of memory 658 * @zonelist: zonelist pointer 659 * @gfp_mask: memory allocation flags 660 * @order: amount of memory being requested as a power of 2 661 * @nodemask: nodemask passed to page allocator 662 * 663 * If we run out of memory, we have the choice between either 664 * killing a random task (bad), letting the system crash (worse) 665 * OR try to be smart about which process to kill. Note that we 666 * don't have to be perfect here, we just have to be good. 667 */ 668 void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, 669 int order, nodemask_t *nodemask) 670 { 671 const nodemask_t *mpol_mask; 672 struct task_struct *p; 673 unsigned long totalpages; 674 unsigned long freed = 0; 675 unsigned int points; 676 enum oom_constraint constraint = CONSTRAINT_NONE; 677 int killed = 0; 678 679 blocking_notifier_call_chain(&oom_notify_list, 0, &freed); 680 if (freed > 0) 681 /* Got some memory back in the last second. */ 682 return; 683 684 /* 685 * If current has a pending SIGKILL, then automatically select it. The 686 * goal is to allow it to allocate so that it may quickly exit and free 687 * its memory. 688 */ 689 if (fatal_signal_pending(current)) { 690 set_thread_flag(TIF_MEMDIE); 691 return; 692 } 693 694 /* 695 * Check if there were limitations on the allocation (only relevant for 696 * NUMA) that may require different handling. 697 */ 698 constraint = constrained_alloc(zonelist, gfp_mask, nodemask, 699 &totalpages); 700 mpol_mask = (constraint == CONSTRAINT_MEMORY_POLICY) ? nodemask : NULL; 701 check_panic_on_oom(constraint, gfp_mask, order, mpol_mask); 702 703 read_lock(&tasklist_lock); 704 if (sysctl_oom_kill_allocating_task && 705 !oom_unkillable_task(current, NULL, nodemask) && 706 current->mm && !atomic_read(¤t->mm->oom_disable_count)) { 707 /* 708 * oom_kill_process() needs tasklist_lock held. If it returns 709 * non-zero, current could not be killed so we must fallback to 710 * the tasklist scan. 711 */ 712 if (!oom_kill_process(current, gfp_mask, order, 0, totalpages, 713 NULL, nodemask, 714 "Out of memory (oom_kill_allocating_task)")) 715 goto out; 716 } 717 718 retry: 719 p = select_bad_process(&points, totalpages, NULL, mpol_mask); 720 if (PTR_ERR(p) == -1UL) 721 goto out; 722 723 /* Found nothing?!?! Either we hang forever, or we panic. */ 724 if (!p) { 725 dump_header(NULL, gfp_mask, order, NULL, mpol_mask); 726 read_unlock(&tasklist_lock); 727 panic("Out of memory and no killable processes...\n"); 728 } 729 730 if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, 731 nodemask, "Out of memory")) 732 goto retry; 733 killed = 1; 734 out: 735 read_unlock(&tasklist_lock); 736 737 /* 738 * Give "p" a good chance of killing itself before we 739 * retry to allocate memory unless "p" is current 740 */ 741 if (killed && !test_thread_flag(TIF_MEMDIE)) 742 schedule_timeout_uninterruptible(1); 743 } 744 745 /* 746 * The pagefault handler calls here because it is out of memory, so kill a 747 * memory-hogging task. If a populated zone has ZONE_OOM_LOCKED set, a parallel 748 * oom killing is already in progress so do nothing. If a task is found with 749 * TIF_MEMDIE set, it has been killed so do nothing and allow it to exit. 750 */ 751 void pagefault_out_of_memory(void) 752 { 753 if (try_set_system_oom()) { 754 out_of_memory(NULL, 0, 0, NULL); 755 clear_system_oom(); 756 } 757 if (!test_thread_flag(TIF_MEMDIE)) 758 schedule_timeout_uninterruptible(1); 759 } 760