1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/kernel/power/snapshot.c 4 * 5 * This file provides system snapshot/restore functionality for swsusp. 6 * 7 * Copyright (C) 1998-2005 Pavel Machek <pavel@ucw.cz> 8 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 9 */ 10 11 #define pr_fmt(fmt) "PM: hibernation: " fmt 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/memblock.h> 25 #include <linux/nmi.h> 26 #include <linux/syscalls.h> 27 #include <linux/console.h> 28 #include <linux/highmem.h> 29 #include <linux/list.h> 30 #include <linux/slab.h> 31 #include <linux/compiler.h> 32 #include <linux/ktime.h> 33 #include <linux/set_memory.h> 34 35 #include <linux/uaccess.h> 36 #include <asm/mmu_context.h> 37 #include <asm/tlbflush.h> 38 #include <asm/io.h> 39 40 #include "power.h" 41 42 #if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_ARCH_HAS_SET_MEMORY) 43 static bool hibernate_restore_protection; 44 static bool hibernate_restore_protection_active; 45 46 void enable_restore_image_protection(void) 47 { 48 hibernate_restore_protection = true; 49 } 50 51 static inline void hibernate_restore_protection_begin(void) 52 { 53 hibernate_restore_protection_active = hibernate_restore_protection; 54 } 55 56 static inline void hibernate_restore_protection_end(void) 57 { 58 hibernate_restore_protection_active = false; 59 } 60 61 static inline void hibernate_restore_protect_page(void *page_address) 62 { 63 if (hibernate_restore_protection_active) 64 set_memory_ro((unsigned long)page_address, 1); 65 } 66 67 static inline void hibernate_restore_unprotect_page(void *page_address) 68 { 69 if (hibernate_restore_protection_active) 70 set_memory_rw((unsigned long)page_address, 1); 71 } 72 #else 73 static inline void hibernate_restore_protection_begin(void) {} 74 static inline void hibernate_restore_protection_end(void) {} 75 static inline void hibernate_restore_protect_page(void *page_address) {} 76 static inline void hibernate_restore_unprotect_page(void *page_address) {} 77 #endif /* CONFIG_STRICT_KERNEL_RWX && CONFIG_ARCH_HAS_SET_MEMORY */ 78 79 80 /* 81 * The calls to set_direct_map_*() should not fail because remapping a page 82 * here means that we only update protection bits in an existing PTE. 83 * It is still worth to have a warning here if something changes and this 84 * will no longer be the case. 85 */ 86 static inline void hibernate_map_page(struct page *page) 87 { 88 if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { 89 int ret = set_direct_map_default_noflush(page); 90 91 if (ret) 92 pr_warn_once("Failed to remap page\n"); 93 } else { 94 debug_pagealloc_map_pages(page, 1); 95 } 96 } 97 98 static inline void hibernate_unmap_page(struct page *page) 99 { 100 if (IS_ENABLED(CONFIG_ARCH_HAS_SET_DIRECT_MAP)) { 101 unsigned long addr = (unsigned long)page_address(page); 102 int ret = set_direct_map_invalid_noflush(page); 103 104 if (ret) 105 pr_warn_once("Failed to remap page\n"); 106 107 flush_tlb_kernel_range(addr, addr + PAGE_SIZE); 108 } else { 109 debug_pagealloc_unmap_pages(page, 1); 110 } 111 } 112 113 static int swsusp_page_is_free(struct page *); 114 static void swsusp_set_page_forbidden(struct page *); 115 static void swsusp_unset_page_forbidden(struct page *); 116 117 /* 118 * Number of bytes to reserve for memory allocations made by device drivers 119 * from their ->freeze() and ->freeze_noirq() callbacks so that they don't 120 * cause image creation to fail (tunable via /sys/power/reserved_size). 121 */ 122 unsigned long reserved_size; 123 124 void __init hibernate_reserved_size_init(void) 125 { 126 reserved_size = SPARE_PAGES * PAGE_SIZE; 127 } 128 129 /* 130 * Preferred image size in bytes (tunable via /sys/power/image_size). 131 * When it is set to N, swsusp will do its best to ensure the image 132 * size will not exceed N bytes, but if that is impossible, it will 133 * try to create the smallest image possible. 134 */ 135 unsigned long image_size; 136 137 void __init hibernate_image_size_init(void) 138 { 139 image_size = ((totalram_pages() * 2) / 5) * PAGE_SIZE; 140 } 141 142 /* 143 * List of PBEs needed for restoring the pages that were allocated before 144 * the suspend and included in the suspend image, but have also been 145 * allocated by the "resume" kernel, so their contents cannot be written 146 * directly to their "original" page frames. 147 */ 148 struct pbe *restore_pblist; 149 150 /* struct linked_page is used to build chains of pages */ 151 152 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 153 154 struct linked_page { 155 struct linked_page *next; 156 char data[LINKED_PAGE_DATA_SIZE]; 157 } __packed; 158 159 /* 160 * List of "safe" pages (ie. pages that were not used by the image kernel 161 * before hibernation) that may be used as temporary storage for image kernel 162 * memory contents. 163 */ 164 static struct linked_page *safe_pages_list; 165 166 /* Pointer to an auxiliary buffer (1 page) */ 167 static void *buffer; 168 169 #define PG_ANY 0 170 #define PG_SAFE 1 171 #define PG_UNSAFE_CLEAR 1 172 #define PG_UNSAFE_KEEP 0 173 174 static unsigned int allocated_unsafe_pages; 175 176 /** 177 * get_image_page - Allocate a page for a hibernation image. 178 * @gfp_mask: GFP mask for the allocation. 179 * @safe_needed: Get pages that were not used before hibernation (restore only) 180 * 181 * During image restoration, for storing the PBE list and the image data, we can 182 * only use memory pages that do not conflict with the pages used before 183 * hibernation. The "unsafe" pages have PageNosaveFree set and we count them 184 * using allocated_unsafe_pages. 185 * 186 * Each allocated image page is marked as PageNosave and PageNosaveFree so that 187 * swsusp_free() can release it. 188 */ 189 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 190 { 191 void *res; 192 193 res = (void *)get_zeroed_page(gfp_mask); 194 if (safe_needed) 195 while (res && swsusp_page_is_free(virt_to_page(res))) { 196 /* The page is unsafe, mark it for swsusp_free() */ 197 swsusp_set_page_forbidden(virt_to_page(res)); 198 allocated_unsafe_pages++; 199 res = (void *)get_zeroed_page(gfp_mask); 200 } 201 if (res) { 202 swsusp_set_page_forbidden(virt_to_page(res)); 203 swsusp_set_page_free(virt_to_page(res)); 204 } 205 return res; 206 } 207 208 static void *__get_safe_page(gfp_t gfp_mask) 209 { 210 if (safe_pages_list) { 211 void *ret = safe_pages_list; 212 213 safe_pages_list = safe_pages_list->next; 214 memset(ret, 0, PAGE_SIZE); 215 return ret; 216 } 217 return get_image_page(gfp_mask, PG_SAFE); 218 } 219 220 unsigned long get_safe_page(gfp_t gfp_mask) 221 { 222 return (unsigned long)__get_safe_page(gfp_mask); 223 } 224 225 static struct page *alloc_image_page(gfp_t gfp_mask) 226 { 227 struct page *page; 228 229 page = alloc_page(gfp_mask); 230 if (page) { 231 swsusp_set_page_forbidden(page); 232 swsusp_set_page_free(page); 233 } 234 return page; 235 } 236 237 static void recycle_safe_page(void *page_address) 238 { 239 struct linked_page *lp = page_address; 240 241 lp->next = safe_pages_list; 242 safe_pages_list = lp; 243 } 244 245 /** 246 * free_image_page - Free a page allocated for hibernation image. 247 * @addr: Address of the page to free. 248 * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page. 249 * 250 * The page to free should have been allocated by get_image_page() (page flags 251 * set by it are affected). 252 */ 253 static inline void free_image_page(void *addr, int clear_nosave_free) 254 { 255 struct page *page; 256 257 BUG_ON(!virt_addr_valid(addr)); 258 259 page = virt_to_page(addr); 260 261 swsusp_unset_page_forbidden(page); 262 if (clear_nosave_free) 263 swsusp_unset_page_free(page); 264 265 __free_page(page); 266 } 267 268 static inline void free_list_of_pages(struct linked_page *list, 269 int clear_page_nosave) 270 { 271 while (list) { 272 struct linked_page *lp = list->next; 273 274 free_image_page(list, clear_page_nosave); 275 list = lp; 276 } 277 } 278 279 /* 280 * struct chain_allocator is used for allocating small objects out of 281 * a linked list of pages called 'the chain'. 282 * 283 * The chain grows each time when there is no room for a new object in 284 * the current page. The allocated objects cannot be freed individually. 285 * It is only possible to free them all at once, by freeing the entire 286 * chain. 287 * 288 * NOTE: The chain allocator may be inefficient if the allocated objects 289 * are not much smaller than PAGE_SIZE. 290 */ 291 struct chain_allocator { 292 struct linked_page *chain; /* the chain */ 293 unsigned int used_space; /* total size of objects allocated out 294 of the current page */ 295 gfp_t gfp_mask; /* mask for allocating pages */ 296 int safe_needed; /* if set, only "safe" pages are allocated */ 297 }; 298 299 static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, 300 int safe_needed) 301 { 302 ca->chain = NULL; 303 ca->used_space = LINKED_PAGE_DATA_SIZE; 304 ca->gfp_mask = gfp_mask; 305 ca->safe_needed = safe_needed; 306 } 307 308 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 309 { 310 void *ret; 311 312 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 313 struct linked_page *lp; 314 315 lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) : 316 get_image_page(ca->gfp_mask, PG_ANY); 317 if (!lp) 318 return NULL; 319 320 lp->next = ca->chain; 321 ca->chain = lp; 322 ca->used_space = 0; 323 } 324 ret = ca->chain->data + ca->used_space; 325 ca->used_space += size; 326 return ret; 327 } 328 329 /** 330 * Data types related to memory bitmaps. 331 * 332 * Memory bitmap is a structure consisting of many linked lists of 333 * objects. The main list's elements are of type struct zone_bitmap 334 * and each of them corresponds to one zone. For each zone bitmap 335 * object there is a list of objects of type struct bm_block that 336 * represent each blocks of bitmap in which information is stored. 337 * 338 * struct memory_bitmap contains a pointer to the main list of zone 339 * bitmap objects, a struct bm_position used for browsing the bitmap, 340 * and a pointer to the list of pages used for allocating all of the 341 * zone bitmap objects and bitmap block objects. 342 * 343 * NOTE: It has to be possible to lay out the bitmap in memory 344 * using only allocations of order 0. Additionally, the bitmap is 345 * designed to work with arbitrary number of zones (this is over the 346 * top for now, but let's avoid making unnecessary assumptions ;-). 347 * 348 * struct zone_bitmap contains a pointer to a list of bitmap block 349 * objects and a pointer to the bitmap block object that has been 350 * most recently used for setting bits. Additionally, it contains the 351 * PFNs that correspond to the start and end of the represented zone. 352 * 353 * struct bm_block contains a pointer to the memory page in which 354 * information is stored (in the form of a block of bitmap) 355 * It also contains the pfns that correspond to the start and end of 356 * the represented memory area. 357 * 358 * The memory bitmap is organized as a radix tree to guarantee fast random 359 * access to the bits. There is one radix tree for each zone (as returned 360 * from create_mem_extents). 361 * 362 * One radix tree is represented by one struct mem_zone_bm_rtree. There are 363 * two linked lists for the nodes of the tree, one for the inner nodes and 364 * one for the leave nodes. The linked leave nodes are used for fast linear 365 * access of the memory bitmap. 366 * 367 * The struct rtree_node represents one node of the radix tree. 368 */ 369 370 #define BM_END_OF_MAP (~0UL) 371 372 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE) 373 #define BM_BLOCK_SHIFT (PAGE_SHIFT + 3) 374 #define BM_BLOCK_MASK ((1UL << BM_BLOCK_SHIFT) - 1) 375 376 /* 377 * struct rtree_node is a wrapper struct to link the nodes 378 * of the rtree together for easy linear iteration over 379 * bits and easy freeing 380 */ 381 struct rtree_node { 382 struct list_head list; 383 unsigned long *data; 384 }; 385 386 /* 387 * struct mem_zone_bm_rtree represents a bitmap used for one 388 * populated memory zone. 389 */ 390 struct mem_zone_bm_rtree { 391 struct list_head list; /* Link Zones together */ 392 struct list_head nodes; /* Radix Tree inner nodes */ 393 struct list_head leaves; /* Radix Tree leaves */ 394 unsigned long start_pfn; /* Zone start page frame */ 395 unsigned long end_pfn; /* Zone end page frame + 1 */ 396 struct rtree_node *rtree; /* Radix Tree Root */ 397 int levels; /* Number of Radix Tree Levels */ 398 unsigned int blocks; /* Number of Bitmap Blocks */ 399 }; 400 401 /* strcut bm_position is used for browsing memory bitmaps */ 402 403 struct bm_position { 404 struct mem_zone_bm_rtree *zone; 405 struct rtree_node *node; 406 unsigned long node_pfn; 407 int node_bit; 408 }; 409 410 struct memory_bitmap { 411 struct list_head zones; 412 struct linked_page *p_list; /* list of pages used to store zone 413 bitmap objects and bitmap block 414 objects */ 415 struct bm_position cur; /* most recently used bit position */ 416 }; 417 418 /* Functions that operate on memory bitmaps */ 419 420 #define BM_ENTRIES_PER_LEVEL (PAGE_SIZE / sizeof(unsigned long)) 421 #if BITS_PER_LONG == 32 422 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 2) 423 #else 424 #define BM_RTREE_LEVEL_SHIFT (PAGE_SHIFT - 3) 425 #endif 426 #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) 427 428 /** 429 * alloc_rtree_node - Allocate a new node and add it to the radix tree. 430 * 431 * This function is used to allocate inner nodes as well as the 432 * leave nodes of the radix tree. It also adds the node to the 433 * corresponding linked list passed in by the *list parameter. 434 */ 435 static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, 436 struct chain_allocator *ca, 437 struct list_head *list) 438 { 439 struct rtree_node *node; 440 441 node = chain_alloc(ca, sizeof(struct rtree_node)); 442 if (!node) 443 return NULL; 444 445 node->data = get_image_page(gfp_mask, safe_needed); 446 if (!node->data) 447 return NULL; 448 449 list_add_tail(&node->list, list); 450 451 return node; 452 } 453 454 /** 455 * add_rtree_block - Add a new leave node to the radix tree. 456 * 457 * The leave nodes need to be allocated in order to keep the leaves 458 * linked list in order. This is guaranteed by the zone->blocks 459 * counter. 460 */ 461 static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, 462 int safe_needed, struct chain_allocator *ca) 463 { 464 struct rtree_node *node, *block, **dst; 465 unsigned int levels_needed, block_nr; 466 int i; 467 468 block_nr = zone->blocks; 469 levels_needed = 0; 470 471 /* How many levels do we need for this block nr? */ 472 while (block_nr) { 473 levels_needed += 1; 474 block_nr >>= BM_RTREE_LEVEL_SHIFT; 475 } 476 477 /* Make sure the rtree has enough levels */ 478 for (i = zone->levels; i < levels_needed; i++) { 479 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 480 &zone->nodes); 481 if (!node) 482 return -ENOMEM; 483 484 node->data[0] = (unsigned long)zone->rtree; 485 zone->rtree = node; 486 zone->levels += 1; 487 } 488 489 /* Allocate new block */ 490 block = alloc_rtree_node(gfp_mask, safe_needed, ca, &zone->leaves); 491 if (!block) 492 return -ENOMEM; 493 494 /* Now walk the rtree to insert the block */ 495 node = zone->rtree; 496 dst = &zone->rtree; 497 block_nr = zone->blocks; 498 for (i = zone->levels; i > 0; i--) { 499 int index; 500 501 if (!node) { 502 node = alloc_rtree_node(gfp_mask, safe_needed, ca, 503 &zone->nodes); 504 if (!node) 505 return -ENOMEM; 506 *dst = node; 507 } 508 509 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 510 index &= BM_RTREE_LEVEL_MASK; 511 dst = (struct rtree_node **)&((*dst)->data[index]); 512 node = *dst; 513 } 514 515 zone->blocks += 1; 516 *dst = block; 517 518 return 0; 519 } 520 521 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 522 int clear_nosave_free); 523 524 /** 525 * create_zone_bm_rtree - Create a radix tree for one zone. 526 * 527 * Allocated the mem_zone_bm_rtree structure and initializes it. 528 * This function also allocated and builds the radix tree for the 529 * zone. 530 */ 531 static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, 532 int safe_needed, 533 struct chain_allocator *ca, 534 unsigned long start, 535 unsigned long end) 536 { 537 struct mem_zone_bm_rtree *zone; 538 unsigned int i, nr_blocks; 539 unsigned long pages; 540 541 pages = end - start; 542 zone = chain_alloc(ca, sizeof(struct mem_zone_bm_rtree)); 543 if (!zone) 544 return NULL; 545 546 INIT_LIST_HEAD(&zone->nodes); 547 INIT_LIST_HEAD(&zone->leaves); 548 zone->start_pfn = start; 549 zone->end_pfn = end; 550 nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 551 552 for (i = 0; i < nr_blocks; i++) { 553 if (add_rtree_block(zone, gfp_mask, safe_needed, ca)) { 554 free_zone_bm_rtree(zone, PG_UNSAFE_CLEAR); 555 return NULL; 556 } 557 } 558 559 return zone; 560 } 561 562 /** 563 * free_zone_bm_rtree - Free the memory of the radix tree. 564 * 565 * Free all node pages of the radix tree. The mem_zone_bm_rtree 566 * structure itself is not freed here nor are the rtree_node 567 * structs. 568 */ 569 static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, 570 int clear_nosave_free) 571 { 572 struct rtree_node *node; 573 574 list_for_each_entry(node, &zone->nodes, list) 575 free_image_page(node->data, clear_nosave_free); 576 577 list_for_each_entry(node, &zone->leaves, list) 578 free_image_page(node->data, clear_nosave_free); 579 } 580 581 static void memory_bm_position_reset(struct memory_bitmap *bm) 582 { 583 bm->cur.zone = list_entry(bm->zones.next, struct mem_zone_bm_rtree, 584 list); 585 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 586 struct rtree_node, list); 587 bm->cur.node_pfn = 0; 588 bm->cur.node_bit = 0; 589 } 590 591 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 592 593 struct mem_extent { 594 struct list_head hook; 595 unsigned long start; 596 unsigned long end; 597 }; 598 599 /** 600 * free_mem_extents - Free a list of memory extents. 601 * @list: List of extents to free. 602 */ 603 static void free_mem_extents(struct list_head *list) 604 { 605 struct mem_extent *ext, *aux; 606 607 list_for_each_entry_safe(ext, aux, list, hook) { 608 list_del(&ext->hook); 609 kfree(ext); 610 } 611 } 612 613 /** 614 * create_mem_extents - Create a list of memory extents. 615 * @list: List to put the extents into. 616 * @gfp_mask: Mask to use for memory allocations. 617 * 618 * The extents represent contiguous ranges of PFNs. 619 */ 620 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 621 { 622 struct zone *zone; 623 624 INIT_LIST_HEAD(list); 625 626 for_each_populated_zone(zone) { 627 unsigned long zone_start, zone_end; 628 struct mem_extent *ext, *cur, *aux; 629 630 zone_start = zone->zone_start_pfn; 631 zone_end = zone_end_pfn(zone); 632 633 list_for_each_entry(ext, list, hook) 634 if (zone_start <= ext->end) 635 break; 636 637 if (&ext->hook == list || zone_end < ext->start) { 638 /* New extent is necessary */ 639 struct mem_extent *new_ext; 640 641 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 642 if (!new_ext) { 643 free_mem_extents(list); 644 return -ENOMEM; 645 } 646 new_ext->start = zone_start; 647 new_ext->end = zone_end; 648 list_add_tail(&new_ext->hook, &ext->hook); 649 continue; 650 } 651 652 /* Merge this zone's range of PFNs with the existing one */ 653 if (zone_start < ext->start) 654 ext->start = zone_start; 655 if (zone_end > ext->end) 656 ext->end = zone_end; 657 658 /* More merging may be possible */ 659 cur = ext; 660 list_for_each_entry_safe_continue(cur, aux, list, hook) { 661 if (zone_end < cur->start) 662 break; 663 if (zone_end < cur->end) 664 ext->end = cur->end; 665 list_del(&cur->hook); 666 kfree(cur); 667 } 668 } 669 670 return 0; 671 } 672 673 /** 674 * memory_bm_create - Allocate memory for a memory bitmap. 675 */ 676 static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, 677 int safe_needed) 678 { 679 struct chain_allocator ca; 680 struct list_head mem_extents; 681 struct mem_extent *ext; 682 int error; 683 684 chain_init(&ca, gfp_mask, safe_needed); 685 INIT_LIST_HEAD(&bm->zones); 686 687 error = create_mem_extents(&mem_extents, gfp_mask); 688 if (error) 689 return error; 690 691 list_for_each_entry(ext, &mem_extents, hook) { 692 struct mem_zone_bm_rtree *zone; 693 694 zone = create_zone_bm_rtree(gfp_mask, safe_needed, &ca, 695 ext->start, ext->end); 696 if (!zone) { 697 error = -ENOMEM; 698 goto Error; 699 } 700 list_add_tail(&zone->list, &bm->zones); 701 } 702 703 bm->p_list = ca.chain; 704 memory_bm_position_reset(bm); 705 Exit: 706 free_mem_extents(&mem_extents); 707 return error; 708 709 Error: 710 bm->p_list = ca.chain; 711 memory_bm_free(bm, PG_UNSAFE_CLEAR); 712 goto Exit; 713 } 714 715 /** 716 * memory_bm_free - Free memory occupied by the memory bitmap. 717 * @bm: Memory bitmap. 718 */ 719 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 720 { 721 struct mem_zone_bm_rtree *zone; 722 723 list_for_each_entry(zone, &bm->zones, list) 724 free_zone_bm_rtree(zone, clear_nosave_free); 725 726 free_list_of_pages(bm->p_list, clear_nosave_free); 727 728 INIT_LIST_HEAD(&bm->zones); 729 } 730 731 /** 732 * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap. 733 * 734 * Find the bit in memory bitmap @bm that corresponds to the given PFN. 735 * The cur.zone, cur.block and cur.node_pfn members of @bm are updated. 736 * 737 * Walk the radix tree to find the page containing the bit that represents @pfn 738 * and return the position of the bit in @addr and @bit_nr. 739 */ 740 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 741 void **addr, unsigned int *bit_nr) 742 { 743 struct mem_zone_bm_rtree *curr, *zone; 744 struct rtree_node *node; 745 int i, block_nr; 746 747 zone = bm->cur.zone; 748 749 if (pfn >= zone->start_pfn && pfn < zone->end_pfn) 750 goto zone_found; 751 752 zone = NULL; 753 754 /* Find the right zone */ 755 list_for_each_entry(curr, &bm->zones, list) { 756 if (pfn >= curr->start_pfn && pfn < curr->end_pfn) { 757 zone = curr; 758 break; 759 } 760 } 761 762 if (!zone) 763 return -EFAULT; 764 765 zone_found: 766 /* 767 * We have found the zone. Now walk the radix tree to find the leaf node 768 * for our PFN. 769 */ 770 771 /* 772 * If the zone we wish to scan is the current zone and the 773 * pfn falls into the current node then we do not need to walk 774 * the tree. 775 */ 776 node = bm->cur.node; 777 if (zone == bm->cur.zone && 778 ((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) 779 goto node_found; 780 781 node = zone->rtree; 782 block_nr = (pfn - zone->start_pfn) >> BM_BLOCK_SHIFT; 783 784 for (i = zone->levels; i > 0; i--) { 785 int index; 786 787 index = block_nr >> ((i - 1) * BM_RTREE_LEVEL_SHIFT); 788 index &= BM_RTREE_LEVEL_MASK; 789 BUG_ON(node->data[index] == 0); 790 node = (struct rtree_node *)node->data[index]; 791 } 792 793 node_found: 794 /* Update last position */ 795 bm->cur.zone = zone; 796 bm->cur.node = node; 797 bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; 798 799 /* Set return values */ 800 *addr = node->data; 801 *bit_nr = (pfn - zone->start_pfn) & BM_BLOCK_MASK; 802 803 return 0; 804 } 805 806 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 807 { 808 void *addr; 809 unsigned int bit; 810 int error; 811 812 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 813 BUG_ON(error); 814 set_bit(bit, addr); 815 } 816 817 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 818 { 819 void *addr; 820 unsigned int bit; 821 int error; 822 823 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 824 if (!error) 825 set_bit(bit, addr); 826 827 return error; 828 } 829 830 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 831 { 832 void *addr; 833 unsigned int bit; 834 int error; 835 836 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 837 BUG_ON(error); 838 clear_bit(bit, addr); 839 } 840 841 static void memory_bm_clear_current(struct memory_bitmap *bm) 842 { 843 int bit; 844 845 bit = max(bm->cur.node_bit - 1, 0); 846 clear_bit(bit, bm->cur.node->data); 847 } 848 849 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 850 { 851 void *addr; 852 unsigned int bit; 853 int error; 854 855 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 856 BUG_ON(error); 857 return test_bit(bit, addr); 858 } 859 860 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 861 { 862 void *addr; 863 unsigned int bit; 864 865 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 866 } 867 868 /* 869 * rtree_next_node - Jump to the next leaf node. 870 * 871 * Set the position to the beginning of the next node in the 872 * memory bitmap. This is either the next node in the current 873 * zone's radix tree or the first node in the radix tree of the 874 * next zone. 875 * 876 * Return true if there is a next node, false otherwise. 877 */ 878 static bool rtree_next_node(struct memory_bitmap *bm) 879 { 880 if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { 881 bm->cur.node = list_entry(bm->cur.node->list.next, 882 struct rtree_node, list); 883 bm->cur.node_pfn += BM_BITS_PER_BLOCK; 884 bm->cur.node_bit = 0; 885 touch_softlockup_watchdog(); 886 return true; 887 } 888 889 /* No more nodes, goto next zone */ 890 if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { 891 bm->cur.zone = list_entry(bm->cur.zone->list.next, 892 struct mem_zone_bm_rtree, list); 893 bm->cur.node = list_entry(bm->cur.zone->leaves.next, 894 struct rtree_node, list); 895 bm->cur.node_pfn = 0; 896 bm->cur.node_bit = 0; 897 return true; 898 } 899 900 /* No more zones */ 901 return false; 902 } 903 904 /** 905 * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. 906 * @bm: Memory bitmap. 907 * 908 * Starting from the last returned position this function searches for the next 909 * set bit in @bm and returns the PFN represented by it. If no more bits are 910 * set, BM_END_OF_MAP is returned. 911 * 912 * It is required to run memory_bm_position_reset() before the first call to 913 * this function for the given memory bitmap. 914 */ 915 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 916 { 917 unsigned long bits, pfn, pages; 918 int bit; 919 920 do { 921 pages = bm->cur.zone->end_pfn - bm->cur.zone->start_pfn; 922 bits = min(pages - bm->cur.node_pfn, BM_BITS_PER_BLOCK); 923 bit = find_next_bit(bm->cur.node->data, bits, 924 bm->cur.node_bit); 925 if (bit < bits) { 926 pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; 927 bm->cur.node_bit = bit + 1; 928 return pfn; 929 } 930 } while (rtree_next_node(bm)); 931 932 return BM_END_OF_MAP; 933 } 934 935 /* 936 * This structure represents a range of page frames the contents of which 937 * should not be saved during hibernation. 938 */ 939 struct nosave_region { 940 struct list_head list; 941 unsigned long start_pfn; 942 unsigned long end_pfn; 943 }; 944 945 static LIST_HEAD(nosave_regions); 946 947 static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone) 948 { 949 struct rtree_node *node; 950 951 list_for_each_entry(node, &zone->nodes, list) 952 recycle_safe_page(node->data); 953 954 list_for_each_entry(node, &zone->leaves, list) 955 recycle_safe_page(node->data); 956 } 957 958 static void memory_bm_recycle(struct memory_bitmap *bm) 959 { 960 struct mem_zone_bm_rtree *zone; 961 struct linked_page *p_list; 962 963 list_for_each_entry(zone, &bm->zones, list) 964 recycle_zone_bm_rtree(zone); 965 966 p_list = bm->p_list; 967 while (p_list) { 968 struct linked_page *lp = p_list; 969 970 p_list = lp->next; 971 recycle_safe_page(lp); 972 } 973 } 974 975 /** 976 * register_nosave_region - Register a region of unsaveable memory. 977 * 978 * Register a range of page frames the contents of which should not be saved 979 * during hibernation (to be used in the early initialization code). 980 */ 981 void __init register_nosave_region(unsigned long start_pfn, unsigned long end_pfn) 982 { 983 struct nosave_region *region; 984 985 if (start_pfn >= end_pfn) 986 return; 987 988 if (!list_empty(&nosave_regions)) { 989 /* Try to extend the previous region (they should be sorted) */ 990 region = list_entry(nosave_regions.prev, 991 struct nosave_region, list); 992 if (region->end_pfn == start_pfn) { 993 region->end_pfn = end_pfn; 994 goto Report; 995 } 996 } 997 /* This allocation cannot fail */ 998 region = memblock_alloc(sizeof(struct nosave_region), 999 SMP_CACHE_BYTES); 1000 if (!region) 1001 panic("%s: Failed to allocate %zu bytes\n", __func__, 1002 sizeof(struct nosave_region)); 1003 region->start_pfn = start_pfn; 1004 region->end_pfn = end_pfn; 1005 list_add_tail(®ion->list, &nosave_regions); 1006 Report: 1007 pr_info("Registered nosave memory: [mem %#010llx-%#010llx]\n", 1008 (unsigned long long) start_pfn << PAGE_SHIFT, 1009 ((unsigned long long) end_pfn << PAGE_SHIFT) - 1); 1010 } 1011 1012 /* 1013 * Set bits in this map correspond to the page frames the contents of which 1014 * should not be saved during the suspend. 1015 */ 1016 static struct memory_bitmap *forbidden_pages_map; 1017 1018 /* Set bits in this map correspond to free page frames. */ 1019 static struct memory_bitmap *free_pages_map; 1020 1021 /* 1022 * Each page frame allocated for creating the image is marked by setting the 1023 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 1024 */ 1025 1026 void swsusp_set_page_free(struct page *page) 1027 { 1028 if (free_pages_map) 1029 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 1030 } 1031 1032 static int swsusp_page_is_free(struct page *page) 1033 { 1034 return free_pages_map ? 1035 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 1036 } 1037 1038 void swsusp_unset_page_free(struct page *page) 1039 { 1040 if (free_pages_map) 1041 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 1042 } 1043 1044 static void swsusp_set_page_forbidden(struct page *page) 1045 { 1046 if (forbidden_pages_map) 1047 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 1048 } 1049 1050 int swsusp_page_is_forbidden(struct page *page) 1051 { 1052 return forbidden_pages_map ? 1053 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 1054 } 1055 1056 static void swsusp_unset_page_forbidden(struct page *page) 1057 { 1058 if (forbidden_pages_map) 1059 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 1060 } 1061 1062 /** 1063 * mark_nosave_pages - Mark pages that should not be saved. 1064 * @bm: Memory bitmap. 1065 * 1066 * Set the bits in @bm that correspond to the page frames the contents of which 1067 * should not be saved. 1068 */ 1069 static void mark_nosave_pages(struct memory_bitmap *bm) 1070 { 1071 struct nosave_region *region; 1072 1073 if (list_empty(&nosave_regions)) 1074 return; 1075 1076 list_for_each_entry(region, &nosave_regions, list) { 1077 unsigned long pfn; 1078 1079 pr_debug("Marking nosave pages: [mem %#010llx-%#010llx]\n", 1080 (unsigned long long) region->start_pfn << PAGE_SHIFT, 1081 ((unsigned long long) region->end_pfn << PAGE_SHIFT) 1082 - 1); 1083 1084 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 1085 if (pfn_valid(pfn)) { 1086 /* 1087 * It is safe to ignore the result of 1088 * mem_bm_set_bit_check() here, since we won't 1089 * touch the PFNs for which the error is 1090 * returned anyway. 1091 */ 1092 mem_bm_set_bit_check(bm, pfn); 1093 } 1094 } 1095 } 1096 1097 /** 1098 * create_basic_memory_bitmaps - Create bitmaps to hold basic page information. 1099 * 1100 * Create bitmaps needed for marking page frames that should not be saved and 1101 * free page frames. The forbidden_pages_map and free_pages_map pointers are 1102 * only modified if everything goes well, because we don't want the bits to be 1103 * touched before both bitmaps are set up. 1104 */ 1105 int create_basic_memory_bitmaps(void) 1106 { 1107 struct memory_bitmap *bm1, *bm2; 1108 int error = 0; 1109 1110 if (forbidden_pages_map && free_pages_map) 1111 return 0; 1112 else 1113 BUG_ON(forbidden_pages_map || free_pages_map); 1114 1115 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1116 if (!bm1) 1117 return -ENOMEM; 1118 1119 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 1120 if (error) 1121 goto Free_first_object; 1122 1123 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 1124 if (!bm2) 1125 goto Free_first_bitmap; 1126 1127 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 1128 if (error) 1129 goto Free_second_object; 1130 1131 forbidden_pages_map = bm1; 1132 free_pages_map = bm2; 1133 mark_nosave_pages(forbidden_pages_map); 1134 1135 pr_debug("Basic memory bitmaps created\n"); 1136 1137 return 0; 1138 1139 Free_second_object: 1140 kfree(bm2); 1141 Free_first_bitmap: 1142 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1143 Free_first_object: 1144 kfree(bm1); 1145 return -ENOMEM; 1146 } 1147 1148 /** 1149 * free_basic_memory_bitmaps - Free memory bitmaps holding basic information. 1150 * 1151 * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The 1152 * auxiliary pointers are necessary so that the bitmaps themselves are not 1153 * referred to while they are being freed. 1154 */ 1155 void free_basic_memory_bitmaps(void) 1156 { 1157 struct memory_bitmap *bm1, *bm2; 1158 1159 if (WARN_ON(!(forbidden_pages_map && free_pages_map))) 1160 return; 1161 1162 bm1 = forbidden_pages_map; 1163 bm2 = free_pages_map; 1164 forbidden_pages_map = NULL; 1165 free_pages_map = NULL; 1166 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 1167 kfree(bm1); 1168 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 1169 kfree(bm2); 1170 1171 pr_debug("Basic memory bitmaps freed\n"); 1172 } 1173 1174 static void clear_or_poison_free_page(struct page *page) 1175 { 1176 if (page_poisoning_enabled_static()) 1177 __kernel_poison_pages(page, 1); 1178 else if (want_init_on_free()) 1179 clear_highpage(page); 1180 } 1181 1182 void clear_or_poison_free_pages(void) 1183 { 1184 struct memory_bitmap *bm = free_pages_map; 1185 unsigned long pfn; 1186 1187 if (WARN_ON(!(free_pages_map))) 1188 return; 1189 1190 if (page_poisoning_enabled() || want_init_on_free()) { 1191 memory_bm_position_reset(bm); 1192 pfn = memory_bm_next_pfn(bm); 1193 while (pfn != BM_END_OF_MAP) { 1194 if (pfn_valid(pfn)) 1195 clear_or_poison_free_page(pfn_to_page(pfn)); 1196 1197 pfn = memory_bm_next_pfn(bm); 1198 } 1199 memory_bm_position_reset(bm); 1200 pr_info("free pages cleared after restore\n"); 1201 } 1202 } 1203 1204 /** 1205 * snapshot_additional_pages - Estimate the number of extra pages needed. 1206 * @zone: Memory zone to carry out the computation for. 1207 * 1208 * Estimate the number of additional pages needed for setting up a hibernation 1209 * image data structures for @zone (usually, the returned value is greater than 1210 * the exact number). 1211 */ 1212 unsigned int snapshot_additional_pages(struct zone *zone) 1213 { 1214 unsigned int rtree, nodes; 1215 1216 rtree = nodes = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 1217 rtree += DIV_ROUND_UP(rtree * sizeof(struct rtree_node), 1218 LINKED_PAGE_DATA_SIZE); 1219 while (nodes > 1) { 1220 nodes = DIV_ROUND_UP(nodes, BM_ENTRIES_PER_LEVEL); 1221 rtree += nodes; 1222 } 1223 1224 return 2 * rtree; 1225 } 1226 1227 #ifdef CONFIG_HIGHMEM 1228 /** 1229 * count_free_highmem_pages - Compute the total number of free highmem pages. 1230 * 1231 * The returned number is system-wide. 1232 */ 1233 static unsigned int count_free_highmem_pages(void) 1234 { 1235 struct zone *zone; 1236 unsigned int cnt = 0; 1237 1238 for_each_populated_zone(zone) 1239 if (is_highmem(zone)) 1240 cnt += zone_page_state(zone, NR_FREE_PAGES); 1241 1242 return cnt; 1243 } 1244 1245 /** 1246 * saveable_highmem_page - Check if a highmem page is saveable. 1247 * 1248 * Determine whether a highmem page should be included in a hibernation image. 1249 * 1250 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 1251 * and it isn't part of a free chunk of pages. 1252 */ 1253 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 1254 { 1255 struct page *page; 1256 1257 if (!pfn_valid(pfn)) 1258 return NULL; 1259 1260 page = pfn_to_online_page(pfn); 1261 if (!page || page_zone(page) != zone) 1262 return NULL; 1263 1264 BUG_ON(!PageHighMem(page)); 1265 1266 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1267 return NULL; 1268 1269 if (PageReserved(page) || PageOffline(page)) 1270 return NULL; 1271 1272 if (page_is_guard(page)) 1273 return NULL; 1274 1275 return page; 1276 } 1277 1278 /** 1279 * count_highmem_pages - Compute the total number of saveable highmem pages. 1280 */ 1281 static unsigned int count_highmem_pages(void) 1282 { 1283 struct zone *zone; 1284 unsigned int n = 0; 1285 1286 for_each_populated_zone(zone) { 1287 unsigned long pfn, max_zone_pfn; 1288 1289 if (!is_highmem(zone)) 1290 continue; 1291 1292 mark_free_pages(zone); 1293 max_zone_pfn = zone_end_pfn(zone); 1294 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1295 if (saveable_highmem_page(zone, pfn)) 1296 n++; 1297 } 1298 return n; 1299 } 1300 #else 1301 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 1302 { 1303 return NULL; 1304 } 1305 #endif /* CONFIG_HIGHMEM */ 1306 1307 /** 1308 * saveable_page - Check if the given page is saveable. 1309 * 1310 * Determine whether a non-highmem page should be included in a hibernation 1311 * image. 1312 * 1313 * We should save the page if it isn't Nosave, and is not in the range 1314 * of pages statically defined as 'unsaveable', and it isn't part of 1315 * a free chunk of pages. 1316 */ 1317 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 1318 { 1319 struct page *page; 1320 1321 if (!pfn_valid(pfn)) 1322 return NULL; 1323 1324 page = pfn_to_online_page(pfn); 1325 if (!page || page_zone(page) != zone) 1326 return NULL; 1327 1328 BUG_ON(PageHighMem(page)); 1329 1330 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 1331 return NULL; 1332 1333 if (PageOffline(page)) 1334 return NULL; 1335 1336 if (PageReserved(page) 1337 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 1338 return NULL; 1339 1340 if (page_is_guard(page)) 1341 return NULL; 1342 1343 return page; 1344 } 1345 1346 /** 1347 * count_data_pages - Compute the total number of saveable non-highmem pages. 1348 */ 1349 static unsigned int count_data_pages(void) 1350 { 1351 struct zone *zone; 1352 unsigned long pfn, max_zone_pfn; 1353 unsigned int n = 0; 1354 1355 for_each_populated_zone(zone) { 1356 if (is_highmem(zone)) 1357 continue; 1358 1359 mark_free_pages(zone); 1360 max_zone_pfn = zone_end_pfn(zone); 1361 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1362 if (saveable_page(zone, pfn)) 1363 n++; 1364 } 1365 return n; 1366 } 1367 1368 /* 1369 * This is needed, because copy_page and memcpy are not usable for copying 1370 * task structs. 1371 */ 1372 static inline void do_copy_page(long *dst, long *src) 1373 { 1374 int n; 1375 1376 for (n = PAGE_SIZE / sizeof(long); n; n--) 1377 *dst++ = *src++; 1378 } 1379 1380 /** 1381 * safe_copy_page - Copy a page in a safe way. 1382 * 1383 * Check if the page we are going to copy is marked as present in the kernel 1384 * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or 1385 * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present() 1386 * always returns 'true'. 1387 */ 1388 static void safe_copy_page(void *dst, struct page *s_page) 1389 { 1390 if (kernel_page_present(s_page)) { 1391 do_copy_page(dst, page_address(s_page)); 1392 } else { 1393 hibernate_map_page(s_page); 1394 do_copy_page(dst, page_address(s_page)); 1395 hibernate_unmap_page(s_page); 1396 } 1397 } 1398 1399 #ifdef CONFIG_HIGHMEM 1400 static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) 1401 { 1402 return is_highmem(zone) ? 1403 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 1404 } 1405 1406 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1407 { 1408 struct page *s_page, *d_page; 1409 void *src, *dst; 1410 1411 s_page = pfn_to_page(src_pfn); 1412 d_page = pfn_to_page(dst_pfn); 1413 if (PageHighMem(s_page)) { 1414 src = kmap_atomic(s_page); 1415 dst = kmap_atomic(d_page); 1416 do_copy_page(dst, src); 1417 kunmap_atomic(dst); 1418 kunmap_atomic(src); 1419 } else { 1420 if (PageHighMem(d_page)) { 1421 /* 1422 * The page pointed to by src may contain some kernel 1423 * data modified by kmap_atomic() 1424 */ 1425 safe_copy_page(buffer, s_page); 1426 dst = kmap_atomic(d_page); 1427 copy_page(dst, buffer); 1428 kunmap_atomic(dst); 1429 } else { 1430 safe_copy_page(page_address(d_page), s_page); 1431 } 1432 } 1433 } 1434 #else 1435 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 1436 1437 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1438 { 1439 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1440 pfn_to_page(src_pfn)); 1441 } 1442 #endif /* CONFIG_HIGHMEM */ 1443 1444 static void copy_data_pages(struct memory_bitmap *copy_bm, 1445 struct memory_bitmap *orig_bm) 1446 { 1447 struct zone *zone; 1448 unsigned long pfn; 1449 1450 for_each_populated_zone(zone) { 1451 unsigned long max_zone_pfn; 1452 1453 mark_free_pages(zone); 1454 max_zone_pfn = zone_end_pfn(zone); 1455 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1456 if (page_is_saveable(zone, pfn)) 1457 memory_bm_set_bit(orig_bm, pfn); 1458 } 1459 memory_bm_position_reset(orig_bm); 1460 memory_bm_position_reset(copy_bm); 1461 for(;;) { 1462 pfn = memory_bm_next_pfn(orig_bm); 1463 if (unlikely(pfn == BM_END_OF_MAP)) 1464 break; 1465 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1466 } 1467 } 1468 1469 /* Total number of image pages */ 1470 static unsigned int nr_copy_pages; 1471 /* Number of pages needed for saving the original pfns of the image pages */ 1472 static unsigned int nr_meta_pages; 1473 /* 1474 * Numbers of normal and highmem page frames allocated for hibernation image 1475 * before suspending devices. 1476 */ 1477 static unsigned int alloc_normal, alloc_highmem; 1478 /* 1479 * Memory bitmap used for marking saveable pages (during hibernation) or 1480 * hibernation image pages (during restore) 1481 */ 1482 static struct memory_bitmap orig_bm; 1483 /* 1484 * Memory bitmap used during hibernation for marking allocated page frames that 1485 * will contain copies of saveable pages. During restore it is initially used 1486 * for marking hibernation image pages, but then the set bits from it are 1487 * duplicated in @orig_bm and it is released. On highmem systems it is next 1488 * used for marking "safe" highmem pages, but it has to be reinitialized for 1489 * this purpose. 1490 */ 1491 static struct memory_bitmap copy_bm; 1492 1493 /** 1494 * swsusp_free - Free pages allocated for hibernation image. 1495 * 1496 * Image pages are allocated before snapshot creation, so they need to be 1497 * released after resume. 1498 */ 1499 void swsusp_free(void) 1500 { 1501 unsigned long fb_pfn, fr_pfn; 1502 1503 if (!forbidden_pages_map || !free_pages_map) 1504 goto out; 1505 1506 memory_bm_position_reset(forbidden_pages_map); 1507 memory_bm_position_reset(free_pages_map); 1508 1509 loop: 1510 fr_pfn = memory_bm_next_pfn(free_pages_map); 1511 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1512 1513 /* 1514 * Find the next bit set in both bitmaps. This is guaranteed to 1515 * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. 1516 */ 1517 do { 1518 if (fb_pfn < fr_pfn) 1519 fb_pfn = memory_bm_next_pfn(forbidden_pages_map); 1520 if (fr_pfn < fb_pfn) 1521 fr_pfn = memory_bm_next_pfn(free_pages_map); 1522 } while (fb_pfn != fr_pfn); 1523 1524 if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { 1525 struct page *page = pfn_to_page(fr_pfn); 1526 1527 memory_bm_clear_current(forbidden_pages_map); 1528 memory_bm_clear_current(free_pages_map); 1529 hibernate_restore_unprotect_page(page_address(page)); 1530 __free_page(page); 1531 goto loop; 1532 } 1533 1534 out: 1535 nr_copy_pages = 0; 1536 nr_meta_pages = 0; 1537 restore_pblist = NULL; 1538 buffer = NULL; 1539 alloc_normal = 0; 1540 alloc_highmem = 0; 1541 hibernate_restore_protection_end(); 1542 } 1543 1544 /* Helper functions used for the shrinking of memory. */ 1545 1546 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) 1547 1548 /** 1549 * preallocate_image_pages - Allocate a number of pages for hibernation image. 1550 * @nr_pages: Number of page frames to allocate. 1551 * @mask: GFP flags to use for the allocation. 1552 * 1553 * Return value: Number of page frames actually allocated 1554 */ 1555 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask) 1556 { 1557 unsigned long nr_alloc = 0; 1558 1559 while (nr_pages > 0) { 1560 struct page *page; 1561 1562 page = alloc_image_page(mask); 1563 if (!page) 1564 break; 1565 memory_bm_set_bit(©_bm, page_to_pfn(page)); 1566 if (PageHighMem(page)) 1567 alloc_highmem++; 1568 else 1569 alloc_normal++; 1570 nr_pages--; 1571 nr_alloc++; 1572 } 1573 1574 return nr_alloc; 1575 } 1576 1577 static unsigned long preallocate_image_memory(unsigned long nr_pages, 1578 unsigned long avail_normal) 1579 { 1580 unsigned long alloc; 1581 1582 if (avail_normal <= alloc_normal) 1583 return 0; 1584 1585 alloc = avail_normal - alloc_normal; 1586 if (nr_pages < alloc) 1587 alloc = nr_pages; 1588 1589 return preallocate_image_pages(alloc, GFP_IMAGE); 1590 } 1591 1592 #ifdef CONFIG_HIGHMEM 1593 static unsigned long preallocate_image_highmem(unsigned long nr_pages) 1594 { 1595 return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM); 1596 } 1597 1598 /** 1599 * __fraction - Compute (an approximation of) x * (multiplier / base). 1600 */ 1601 static unsigned long __fraction(u64 x, u64 multiplier, u64 base) 1602 { 1603 return div64_u64(x * multiplier, base); 1604 } 1605 1606 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1607 unsigned long highmem, 1608 unsigned long total) 1609 { 1610 unsigned long alloc = __fraction(nr_pages, highmem, total); 1611 1612 return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM); 1613 } 1614 #else /* CONFIG_HIGHMEM */ 1615 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) 1616 { 1617 return 0; 1618 } 1619 1620 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, 1621 unsigned long highmem, 1622 unsigned long total) 1623 { 1624 return 0; 1625 } 1626 #endif /* CONFIG_HIGHMEM */ 1627 1628 /** 1629 * free_unnecessary_pages - Release preallocated pages not needed for the image. 1630 */ 1631 static unsigned long free_unnecessary_pages(void) 1632 { 1633 unsigned long save, to_free_normal, to_free_highmem, free; 1634 1635 save = count_data_pages(); 1636 if (alloc_normal >= save) { 1637 to_free_normal = alloc_normal - save; 1638 save = 0; 1639 } else { 1640 to_free_normal = 0; 1641 save -= alloc_normal; 1642 } 1643 save += count_highmem_pages(); 1644 if (alloc_highmem >= save) { 1645 to_free_highmem = alloc_highmem - save; 1646 } else { 1647 to_free_highmem = 0; 1648 save -= alloc_highmem; 1649 if (to_free_normal > save) 1650 to_free_normal -= save; 1651 else 1652 to_free_normal = 0; 1653 } 1654 free = to_free_normal + to_free_highmem; 1655 1656 memory_bm_position_reset(©_bm); 1657 1658 while (to_free_normal > 0 || to_free_highmem > 0) { 1659 unsigned long pfn = memory_bm_next_pfn(©_bm); 1660 struct page *page = pfn_to_page(pfn); 1661 1662 if (PageHighMem(page)) { 1663 if (!to_free_highmem) 1664 continue; 1665 to_free_highmem--; 1666 alloc_highmem--; 1667 } else { 1668 if (!to_free_normal) 1669 continue; 1670 to_free_normal--; 1671 alloc_normal--; 1672 } 1673 memory_bm_clear_bit(©_bm, pfn); 1674 swsusp_unset_page_forbidden(page); 1675 swsusp_unset_page_free(page); 1676 __free_page(page); 1677 } 1678 1679 return free; 1680 } 1681 1682 /** 1683 * minimum_image_size - Estimate the minimum acceptable size of an image. 1684 * @saveable: Number of saveable pages in the system. 1685 * 1686 * We want to avoid attempting to free too much memory too hard, so estimate the 1687 * minimum acceptable size of a hibernation image to use as the lower limit for 1688 * preallocating memory. 1689 * 1690 * We assume that the minimum image size should be proportional to 1691 * 1692 * [number of saveable pages] - [number of pages that can be freed in theory] 1693 * 1694 * where the second term is the sum of (1) reclaimable slab pages, (2) active 1695 * and (3) inactive anonymous pages, (4) active and (5) inactive file pages. 1696 */ 1697 static unsigned long minimum_image_size(unsigned long saveable) 1698 { 1699 unsigned long size; 1700 1701 size = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) 1702 + global_node_page_state(NR_ACTIVE_ANON) 1703 + global_node_page_state(NR_INACTIVE_ANON) 1704 + global_node_page_state(NR_ACTIVE_FILE) 1705 + global_node_page_state(NR_INACTIVE_FILE); 1706 1707 return saveable <= size ? 0 : saveable - size; 1708 } 1709 1710 /** 1711 * hibernate_preallocate_memory - Preallocate memory for hibernation image. 1712 * 1713 * To create a hibernation image it is necessary to make a copy of every page 1714 * frame in use. We also need a number of page frames to be free during 1715 * hibernation for allocations made while saving the image and for device 1716 * drivers, in case they need to allocate memory from their hibernation 1717 * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough 1718 * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through 1719 * /sys/power/reserved_size, respectively). To make this happen, we compute the 1720 * total number of available page frames and allocate at least 1721 * 1722 * ([page frames total] + PAGES_FOR_IO + [metadata pages]) / 2 1723 * + 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE) 1724 * 1725 * of them, which corresponds to the maximum size of a hibernation image. 1726 * 1727 * If image_size is set below the number following from the above formula, 1728 * the preallocation of memory is continued until the total number of saveable 1729 * pages in the system is below the requested image size or the minimum 1730 * acceptable image size returned by minimum_image_size(), whichever is greater. 1731 */ 1732 int hibernate_preallocate_memory(void) 1733 { 1734 struct zone *zone; 1735 unsigned long saveable, size, max_size, count, highmem, pages = 0; 1736 unsigned long alloc, save_highmem, pages_highmem, avail_normal; 1737 ktime_t start, stop; 1738 int error; 1739 1740 pr_info("Preallocating image memory\n"); 1741 start = ktime_get(); 1742 1743 error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY); 1744 if (error) { 1745 pr_err("Cannot allocate original bitmap\n"); 1746 goto err_out; 1747 } 1748 1749 error = memory_bm_create(©_bm, GFP_IMAGE, PG_ANY); 1750 if (error) { 1751 pr_err("Cannot allocate copy bitmap\n"); 1752 goto err_out; 1753 } 1754 1755 alloc_normal = 0; 1756 alloc_highmem = 0; 1757 1758 /* Count the number of saveable data pages. */ 1759 save_highmem = count_highmem_pages(); 1760 saveable = count_data_pages(); 1761 1762 /* 1763 * Compute the total number of page frames we can use (count) and the 1764 * number of pages needed for image metadata (size). 1765 */ 1766 count = saveable; 1767 saveable += save_highmem; 1768 highmem = save_highmem; 1769 size = 0; 1770 for_each_populated_zone(zone) { 1771 size += snapshot_additional_pages(zone); 1772 if (is_highmem(zone)) 1773 highmem += zone_page_state(zone, NR_FREE_PAGES); 1774 else 1775 count += zone_page_state(zone, NR_FREE_PAGES); 1776 } 1777 avail_normal = count; 1778 count += highmem; 1779 count -= totalreserve_pages; 1780 1781 /* Compute the maximum number of saveable pages to leave in memory. */ 1782 max_size = (count - (size + PAGES_FOR_IO)) / 2 1783 - 2 * DIV_ROUND_UP(reserved_size, PAGE_SIZE); 1784 /* Compute the desired number of image pages specified by image_size. */ 1785 size = DIV_ROUND_UP(image_size, PAGE_SIZE); 1786 if (size > max_size) 1787 size = max_size; 1788 /* 1789 * If the desired number of image pages is at least as large as the 1790 * current number of saveable pages in memory, allocate page frames for 1791 * the image and we're done. 1792 */ 1793 if (size >= saveable) { 1794 pages = preallocate_image_highmem(save_highmem); 1795 pages += preallocate_image_memory(saveable - pages, avail_normal); 1796 goto out; 1797 } 1798 1799 /* Estimate the minimum size of the image. */ 1800 pages = minimum_image_size(saveable); 1801 /* 1802 * To avoid excessive pressure on the normal zone, leave room in it to 1803 * accommodate an image of the minimum size (unless it's already too 1804 * small, in which case don't preallocate pages from it at all). 1805 */ 1806 if (avail_normal > pages) 1807 avail_normal -= pages; 1808 else 1809 avail_normal = 0; 1810 if (size < pages) 1811 size = min_t(unsigned long, pages, max_size); 1812 1813 /* 1814 * Let the memory management subsystem know that we're going to need a 1815 * large number of page frames to allocate and make it free some memory. 1816 * NOTE: If this is not done, performance will be hurt badly in some 1817 * test cases. 1818 */ 1819 shrink_all_memory(saveable - size); 1820 1821 /* 1822 * The number of saveable pages in memory was too high, so apply some 1823 * pressure to decrease it. First, make room for the largest possible 1824 * image and fail if that doesn't work. Next, try to decrease the size 1825 * of the image as much as indicated by 'size' using allocations from 1826 * highmem and non-highmem zones separately. 1827 */ 1828 pages_highmem = preallocate_image_highmem(highmem / 2); 1829 alloc = count - max_size; 1830 if (alloc > pages_highmem) 1831 alloc -= pages_highmem; 1832 else 1833 alloc = 0; 1834 pages = preallocate_image_memory(alloc, avail_normal); 1835 if (pages < alloc) { 1836 /* We have exhausted non-highmem pages, try highmem. */ 1837 alloc -= pages; 1838 pages += pages_highmem; 1839 pages_highmem = preallocate_image_highmem(alloc); 1840 if (pages_highmem < alloc) { 1841 pr_err("Image allocation is %lu pages short\n", 1842 alloc - pages_highmem); 1843 goto err_out; 1844 } 1845 pages += pages_highmem; 1846 /* 1847 * size is the desired number of saveable pages to leave in 1848 * memory, so try to preallocate (all memory - size) pages. 1849 */ 1850 alloc = (count - pages) - size; 1851 pages += preallocate_image_highmem(alloc); 1852 } else { 1853 /* 1854 * There are approximately max_size saveable pages at this point 1855 * and we want to reduce this number down to size. 1856 */ 1857 alloc = max_size - size; 1858 size = preallocate_highmem_fraction(alloc, highmem, count); 1859 pages_highmem += size; 1860 alloc -= size; 1861 size = preallocate_image_memory(alloc, avail_normal); 1862 pages_highmem += preallocate_image_highmem(alloc - size); 1863 pages += pages_highmem + size; 1864 } 1865 1866 /* 1867 * We only need as many page frames for the image as there are saveable 1868 * pages in memory, but we have allocated more. Release the excessive 1869 * ones now. 1870 */ 1871 pages -= free_unnecessary_pages(); 1872 1873 out: 1874 stop = ktime_get(); 1875 pr_info("Allocated %lu pages for snapshot\n", pages); 1876 swsusp_show_speed(start, stop, pages, "Allocated"); 1877 1878 return 0; 1879 1880 err_out: 1881 swsusp_free(); 1882 return -ENOMEM; 1883 } 1884 1885 #ifdef CONFIG_HIGHMEM 1886 /** 1887 * count_pages_for_highmem - Count non-highmem pages needed for copying highmem. 1888 * 1889 * Compute the number of non-highmem pages that will be necessary for creating 1890 * copies of highmem pages. 1891 */ 1892 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1893 { 1894 unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; 1895 1896 if (free_highmem >= nr_highmem) 1897 nr_highmem = 0; 1898 else 1899 nr_highmem -= free_highmem; 1900 1901 return nr_highmem; 1902 } 1903 #else 1904 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1905 #endif /* CONFIG_HIGHMEM */ 1906 1907 /** 1908 * enough_free_mem - Check if there is enough free memory for the image. 1909 */ 1910 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1911 { 1912 struct zone *zone; 1913 unsigned int free = alloc_normal; 1914 1915 for_each_populated_zone(zone) 1916 if (!is_highmem(zone)) 1917 free += zone_page_state(zone, NR_FREE_PAGES); 1918 1919 nr_pages += count_pages_for_highmem(nr_highmem); 1920 pr_debug("Normal pages needed: %u + %u, available pages: %u\n", 1921 nr_pages, PAGES_FOR_IO, free); 1922 1923 return free > nr_pages + PAGES_FOR_IO; 1924 } 1925 1926 #ifdef CONFIG_HIGHMEM 1927 /** 1928 * get_highmem_buffer - Allocate a buffer for highmem pages. 1929 * 1930 * If there are some highmem pages in the hibernation image, we may need a 1931 * buffer to copy them and/or load their data. 1932 */ 1933 static inline int get_highmem_buffer(int safe_needed) 1934 { 1935 buffer = get_image_page(GFP_ATOMIC, safe_needed); 1936 return buffer ? 0 : -ENOMEM; 1937 } 1938 1939 /** 1940 * alloc_highmem_image_pages - Allocate some highmem pages for the image. 1941 * 1942 * Try to allocate as many pages as needed, but if the number of free highmem 1943 * pages is less than that, allocate them all. 1944 */ 1945 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1946 unsigned int nr_highmem) 1947 { 1948 unsigned int to_alloc = count_free_highmem_pages(); 1949 1950 if (to_alloc > nr_highmem) 1951 to_alloc = nr_highmem; 1952 1953 nr_highmem -= to_alloc; 1954 while (to_alloc-- > 0) { 1955 struct page *page; 1956 1957 page = alloc_image_page(__GFP_HIGHMEM|__GFP_KSWAPD_RECLAIM); 1958 memory_bm_set_bit(bm, page_to_pfn(page)); 1959 } 1960 return nr_highmem; 1961 } 1962 #else 1963 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1964 1965 static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, 1966 unsigned int n) { return 0; } 1967 #endif /* CONFIG_HIGHMEM */ 1968 1969 /** 1970 * swsusp_alloc - Allocate memory for hibernation image. 1971 * 1972 * We first try to allocate as many highmem pages as there are 1973 * saveable highmem pages in the system. If that fails, we allocate 1974 * non-highmem pages for the copies of the remaining highmem ones. 1975 * 1976 * In this approach it is likely that the copies of highmem pages will 1977 * also be located in the high memory, because of the way in which 1978 * copy_data_pages() works. 1979 */ 1980 static int swsusp_alloc(struct memory_bitmap *copy_bm, 1981 unsigned int nr_pages, unsigned int nr_highmem) 1982 { 1983 if (nr_highmem > 0) { 1984 if (get_highmem_buffer(PG_ANY)) 1985 goto err_out; 1986 if (nr_highmem > alloc_highmem) { 1987 nr_highmem -= alloc_highmem; 1988 nr_pages += alloc_highmem_pages(copy_bm, nr_highmem); 1989 } 1990 } 1991 if (nr_pages > alloc_normal) { 1992 nr_pages -= alloc_normal; 1993 while (nr_pages-- > 0) { 1994 struct page *page; 1995 1996 page = alloc_image_page(GFP_ATOMIC); 1997 if (!page) 1998 goto err_out; 1999 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 2000 } 2001 } 2002 2003 return 0; 2004 2005 err_out: 2006 swsusp_free(); 2007 return -ENOMEM; 2008 } 2009 2010 asmlinkage __visible int swsusp_save(void) 2011 { 2012 unsigned int nr_pages, nr_highmem; 2013 2014 pr_info("Creating image:\n"); 2015 2016 drain_local_pages(NULL); 2017 nr_pages = count_data_pages(); 2018 nr_highmem = count_highmem_pages(); 2019 pr_info("Need to copy %u pages\n", nr_pages + nr_highmem); 2020 2021 if (!enough_free_mem(nr_pages, nr_highmem)) { 2022 pr_err("Not enough free memory\n"); 2023 return -ENOMEM; 2024 } 2025 2026 if (swsusp_alloc(©_bm, nr_pages, nr_highmem)) { 2027 pr_err("Memory allocation failed\n"); 2028 return -ENOMEM; 2029 } 2030 2031 /* 2032 * During allocating of suspend pagedir, new cold pages may appear. 2033 * Kill them. 2034 */ 2035 drain_local_pages(NULL); 2036 copy_data_pages(©_bm, &orig_bm); 2037 2038 /* 2039 * End of critical section. From now on, we can write to memory, 2040 * but we should not touch disk. This specially means we must _not_ 2041 * touch swap space! Except we must write out our image of course. 2042 */ 2043 2044 nr_pages += nr_highmem; 2045 nr_copy_pages = nr_pages; 2046 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 2047 2048 pr_info("Image created (%d pages copied)\n", nr_pages); 2049 2050 return 0; 2051 } 2052 2053 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 2054 static int init_header_complete(struct swsusp_info *info) 2055 { 2056 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 2057 info->version_code = LINUX_VERSION_CODE; 2058 return 0; 2059 } 2060 2061 static const char *check_image_kernel(struct swsusp_info *info) 2062 { 2063 if (info->version_code != LINUX_VERSION_CODE) 2064 return "kernel version"; 2065 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 2066 return "system type"; 2067 if (strcmp(info->uts.release,init_utsname()->release)) 2068 return "kernel release"; 2069 if (strcmp(info->uts.version,init_utsname()->version)) 2070 return "version"; 2071 if (strcmp(info->uts.machine,init_utsname()->machine)) 2072 return "machine"; 2073 return NULL; 2074 } 2075 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 2076 2077 unsigned long snapshot_get_image_size(void) 2078 { 2079 return nr_copy_pages + nr_meta_pages + 1; 2080 } 2081 2082 static int init_header(struct swsusp_info *info) 2083 { 2084 memset(info, 0, sizeof(struct swsusp_info)); 2085 info->num_physpages = get_num_physpages(); 2086 info->image_pages = nr_copy_pages; 2087 info->pages = snapshot_get_image_size(); 2088 info->size = info->pages; 2089 info->size <<= PAGE_SHIFT; 2090 return init_header_complete(info); 2091 } 2092 2093 /** 2094 * pack_pfns - Prepare PFNs for saving. 2095 * @bm: Memory bitmap. 2096 * @buf: Memory buffer to store the PFNs in. 2097 * 2098 * PFNs corresponding to set bits in @bm are stored in the area of memory 2099 * pointed to by @buf (1 page at a time). 2100 */ 2101 static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 2102 { 2103 int j; 2104 2105 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2106 buf[j] = memory_bm_next_pfn(bm); 2107 if (unlikely(buf[j] == BM_END_OF_MAP)) 2108 break; 2109 } 2110 } 2111 2112 /** 2113 * snapshot_read_next - Get the address to read the next image page from. 2114 * @handle: Snapshot handle to be used for the reading. 2115 * 2116 * On the first call, @handle should point to a zeroed snapshot_handle 2117 * structure. The structure gets populated then and a pointer to it should be 2118 * passed to this function every next time. 2119 * 2120 * On success, the function returns a positive number. Then, the caller 2121 * is allowed to read up to the returned number of bytes from the memory 2122 * location computed by the data_of() macro. 2123 * 2124 * The function returns 0 to indicate the end of the data stream condition, 2125 * and negative numbers are returned on errors. If that happens, the structure 2126 * pointed to by @handle is not updated and should not be used any more. 2127 */ 2128 int snapshot_read_next(struct snapshot_handle *handle) 2129 { 2130 if (handle->cur > nr_meta_pages + nr_copy_pages) 2131 return 0; 2132 2133 if (!buffer) { 2134 /* This makes the buffer be freed by swsusp_free() */ 2135 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2136 if (!buffer) 2137 return -ENOMEM; 2138 } 2139 if (!handle->cur) { 2140 int error; 2141 2142 error = init_header((struct swsusp_info *)buffer); 2143 if (error) 2144 return error; 2145 handle->buffer = buffer; 2146 memory_bm_position_reset(&orig_bm); 2147 memory_bm_position_reset(©_bm); 2148 } else if (handle->cur <= nr_meta_pages) { 2149 clear_page(buffer); 2150 pack_pfns(buffer, &orig_bm); 2151 } else { 2152 struct page *page; 2153 2154 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 2155 if (PageHighMem(page)) { 2156 /* 2157 * Highmem pages are copied to the buffer, 2158 * because we can't return with a kmapped 2159 * highmem page (we may not be called again). 2160 */ 2161 void *kaddr; 2162 2163 kaddr = kmap_atomic(page); 2164 copy_page(buffer, kaddr); 2165 kunmap_atomic(kaddr); 2166 handle->buffer = buffer; 2167 } else { 2168 handle->buffer = page_address(page); 2169 } 2170 } 2171 handle->cur++; 2172 return PAGE_SIZE; 2173 } 2174 2175 static void duplicate_memory_bitmap(struct memory_bitmap *dst, 2176 struct memory_bitmap *src) 2177 { 2178 unsigned long pfn; 2179 2180 memory_bm_position_reset(src); 2181 pfn = memory_bm_next_pfn(src); 2182 while (pfn != BM_END_OF_MAP) { 2183 memory_bm_set_bit(dst, pfn); 2184 pfn = memory_bm_next_pfn(src); 2185 } 2186 } 2187 2188 /** 2189 * mark_unsafe_pages - Mark pages that were used before hibernation. 2190 * 2191 * Mark the pages that cannot be used for storing the image during restoration, 2192 * because they conflict with the pages that had been used before hibernation. 2193 */ 2194 static void mark_unsafe_pages(struct memory_bitmap *bm) 2195 { 2196 unsigned long pfn; 2197 2198 /* Clear the "free"/"unsafe" bit for all PFNs */ 2199 memory_bm_position_reset(free_pages_map); 2200 pfn = memory_bm_next_pfn(free_pages_map); 2201 while (pfn != BM_END_OF_MAP) { 2202 memory_bm_clear_current(free_pages_map); 2203 pfn = memory_bm_next_pfn(free_pages_map); 2204 } 2205 2206 /* Mark pages that correspond to the "original" PFNs as "unsafe" */ 2207 duplicate_memory_bitmap(free_pages_map, bm); 2208 2209 allocated_unsafe_pages = 0; 2210 } 2211 2212 static int check_header(struct swsusp_info *info) 2213 { 2214 const char *reason; 2215 2216 reason = check_image_kernel(info); 2217 if (!reason && info->num_physpages != get_num_physpages()) 2218 reason = "memory size"; 2219 if (reason) { 2220 pr_err("Image mismatch: %s\n", reason); 2221 return -EPERM; 2222 } 2223 return 0; 2224 } 2225 2226 /** 2227 * load header - Check the image header and copy the data from it. 2228 */ 2229 static int load_header(struct swsusp_info *info) 2230 { 2231 int error; 2232 2233 restore_pblist = NULL; 2234 error = check_header(info); 2235 if (!error) { 2236 nr_copy_pages = info->image_pages; 2237 nr_meta_pages = info->pages - info->image_pages - 1; 2238 } 2239 return error; 2240 } 2241 2242 /** 2243 * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. 2244 * @bm: Memory bitmap. 2245 * @buf: Area of memory containing the PFNs. 2246 * 2247 * For each element of the array pointed to by @buf (1 page at a time), set the 2248 * corresponding bit in @bm. 2249 */ 2250 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 2251 { 2252 int j; 2253 2254 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 2255 if (unlikely(buf[j] == BM_END_OF_MAP)) 2256 break; 2257 2258 if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) 2259 memory_bm_set_bit(bm, buf[j]); 2260 else 2261 return -EFAULT; 2262 } 2263 2264 return 0; 2265 } 2266 2267 #ifdef CONFIG_HIGHMEM 2268 /* 2269 * struct highmem_pbe is used for creating the list of highmem pages that 2270 * should be restored atomically during the resume from disk, because the page 2271 * frames they have occupied before the suspend are in use. 2272 */ 2273 struct highmem_pbe { 2274 struct page *copy_page; /* data is here now */ 2275 struct page *orig_page; /* data was here before the suspend */ 2276 struct highmem_pbe *next; 2277 }; 2278 2279 /* 2280 * List of highmem PBEs needed for restoring the highmem pages that were 2281 * allocated before the suspend and included in the suspend image, but have 2282 * also been allocated by the "resume" kernel, so their contents cannot be 2283 * written directly to their "original" page frames. 2284 */ 2285 static struct highmem_pbe *highmem_pblist; 2286 2287 /** 2288 * count_highmem_image_pages - Compute the number of highmem pages in the image. 2289 * @bm: Memory bitmap. 2290 * 2291 * The bits in @bm that correspond to image pages are assumed to be set. 2292 */ 2293 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 2294 { 2295 unsigned long pfn; 2296 unsigned int cnt = 0; 2297 2298 memory_bm_position_reset(bm); 2299 pfn = memory_bm_next_pfn(bm); 2300 while (pfn != BM_END_OF_MAP) { 2301 if (PageHighMem(pfn_to_page(pfn))) 2302 cnt++; 2303 2304 pfn = memory_bm_next_pfn(bm); 2305 } 2306 return cnt; 2307 } 2308 2309 static unsigned int safe_highmem_pages; 2310 2311 static struct memory_bitmap *safe_highmem_bm; 2312 2313 /** 2314 * prepare_highmem_image - Allocate memory for loading highmem data from image. 2315 * @bm: Pointer to an uninitialized memory bitmap structure. 2316 * @nr_highmem_p: Pointer to the number of highmem image pages. 2317 * 2318 * Try to allocate as many highmem pages as there are highmem image pages 2319 * (@nr_highmem_p points to the variable containing the number of highmem image 2320 * pages). The pages that are "safe" (ie. will not be overwritten when the 2321 * hibernation image is restored entirely) have the corresponding bits set in 2322 * @bm (it must be uninitialized). 2323 * 2324 * NOTE: This function should not be called if there are no highmem image pages. 2325 */ 2326 static int prepare_highmem_image(struct memory_bitmap *bm, 2327 unsigned int *nr_highmem_p) 2328 { 2329 unsigned int to_alloc; 2330 2331 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 2332 return -ENOMEM; 2333 2334 if (get_highmem_buffer(PG_SAFE)) 2335 return -ENOMEM; 2336 2337 to_alloc = count_free_highmem_pages(); 2338 if (to_alloc > *nr_highmem_p) 2339 to_alloc = *nr_highmem_p; 2340 else 2341 *nr_highmem_p = to_alloc; 2342 2343 safe_highmem_pages = 0; 2344 while (to_alloc-- > 0) { 2345 struct page *page; 2346 2347 page = alloc_page(__GFP_HIGHMEM); 2348 if (!swsusp_page_is_free(page)) { 2349 /* The page is "safe", set its bit the bitmap */ 2350 memory_bm_set_bit(bm, page_to_pfn(page)); 2351 safe_highmem_pages++; 2352 } 2353 /* Mark the page as allocated */ 2354 swsusp_set_page_forbidden(page); 2355 swsusp_set_page_free(page); 2356 } 2357 memory_bm_position_reset(bm); 2358 safe_highmem_bm = bm; 2359 return 0; 2360 } 2361 2362 static struct page *last_highmem_page; 2363 2364 /** 2365 * get_highmem_page_buffer - Prepare a buffer to store a highmem image page. 2366 * 2367 * For a given highmem image page get a buffer that suspend_write_next() should 2368 * return to its caller to write to. 2369 * 2370 * If the page is to be saved to its "original" page frame or a copy of 2371 * the page is to be made in the highmem, @buffer is returned. Otherwise, 2372 * the copy of the page is to be made in normal memory, so the address of 2373 * the copy is returned. 2374 * 2375 * If @buffer is returned, the caller of suspend_write_next() will write 2376 * the page's contents to @buffer, so they will have to be copied to the 2377 * right location on the next call to suspend_write_next() and it is done 2378 * with the help of copy_last_highmem_page(). For this purpose, if 2379 * @buffer is returned, @last_highmem_page is set to the page to which 2380 * the data will have to be copied from @buffer. 2381 */ 2382 static void *get_highmem_page_buffer(struct page *page, 2383 struct chain_allocator *ca) 2384 { 2385 struct highmem_pbe *pbe; 2386 void *kaddr; 2387 2388 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 2389 /* 2390 * We have allocated the "original" page frame and we can 2391 * use it directly to store the loaded page. 2392 */ 2393 last_highmem_page = page; 2394 return buffer; 2395 } 2396 /* 2397 * The "original" page frame has not been allocated and we have to 2398 * use a "safe" page frame to store the loaded page. 2399 */ 2400 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 2401 if (!pbe) { 2402 swsusp_free(); 2403 return ERR_PTR(-ENOMEM); 2404 } 2405 pbe->orig_page = page; 2406 if (safe_highmem_pages > 0) { 2407 struct page *tmp; 2408 2409 /* Copy of the page will be stored in high memory */ 2410 kaddr = buffer; 2411 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 2412 safe_highmem_pages--; 2413 last_highmem_page = tmp; 2414 pbe->copy_page = tmp; 2415 } else { 2416 /* Copy of the page will be stored in normal memory */ 2417 kaddr = safe_pages_list; 2418 safe_pages_list = safe_pages_list->next; 2419 pbe->copy_page = virt_to_page(kaddr); 2420 } 2421 pbe->next = highmem_pblist; 2422 highmem_pblist = pbe; 2423 return kaddr; 2424 } 2425 2426 /** 2427 * copy_last_highmem_page - Copy most the most recent highmem image page. 2428 * 2429 * Copy the contents of a highmem image from @buffer, where the caller of 2430 * snapshot_write_next() has stored them, to the right location represented by 2431 * @last_highmem_page . 2432 */ 2433 static void copy_last_highmem_page(void) 2434 { 2435 if (last_highmem_page) { 2436 void *dst; 2437 2438 dst = kmap_atomic(last_highmem_page); 2439 copy_page(dst, buffer); 2440 kunmap_atomic(dst); 2441 last_highmem_page = NULL; 2442 } 2443 } 2444 2445 static inline int last_highmem_page_copied(void) 2446 { 2447 return !last_highmem_page; 2448 } 2449 2450 static inline void free_highmem_data(void) 2451 { 2452 if (safe_highmem_bm) 2453 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 2454 2455 if (buffer) 2456 free_image_page(buffer, PG_UNSAFE_CLEAR); 2457 } 2458 #else 2459 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 2460 2461 static inline int prepare_highmem_image(struct memory_bitmap *bm, 2462 unsigned int *nr_highmem_p) { return 0; } 2463 2464 static inline void *get_highmem_page_buffer(struct page *page, 2465 struct chain_allocator *ca) 2466 { 2467 return ERR_PTR(-EINVAL); 2468 } 2469 2470 static inline void copy_last_highmem_page(void) {} 2471 static inline int last_highmem_page_copied(void) { return 1; } 2472 static inline void free_highmem_data(void) {} 2473 #endif /* CONFIG_HIGHMEM */ 2474 2475 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 2476 2477 /** 2478 * prepare_image - Make room for loading hibernation image. 2479 * @new_bm: Uninitialized memory bitmap structure. 2480 * @bm: Memory bitmap with unsafe pages marked. 2481 * 2482 * Use @bm to mark the pages that will be overwritten in the process of 2483 * restoring the system memory state from the suspend image ("unsafe" pages) 2484 * and allocate memory for the image. 2485 * 2486 * The idea is to allocate a new memory bitmap first and then allocate 2487 * as many pages as needed for image data, but without specifying what those 2488 * pages will be used for just yet. Instead, we mark them all as allocated and 2489 * create a lists of "safe" pages to be used later. On systems with high 2490 * memory a list of "safe" highmem pages is created too. 2491 */ 2492 static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 2493 { 2494 unsigned int nr_pages, nr_highmem; 2495 struct linked_page *lp; 2496 int error; 2497 2498 /* If there is no highmem, the buffer will not be necessary */ 2499 free_image_page(buffer, PG_UNSAFE_CLEAR); 2500 buffer = NULL; 2501 2502 nr_highmem = count_highmem_image_pages(bm); 2503 mark_unsafe_pages(bm); 2504 2505 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 2506 if (error) 2507 goto Free; 2508 2509 duplicate_memory_bitmap(new_bm, bm); 2510 memory_bm_free(bm, PG_UNSAFE_KEEP); 2511 if (nr_highmem > 0) { 2512 error = prepare_highmem_image(bm, &nr_highmem); 2513 if (error) 2514 goto Free; 2515 } 2516 /* 2517 * Reserve some safe pages for potential later use. 2518 * 2519 * NOTE: This way we make sure there will be enough safe pages for the 2520 * chain_alloc() in get_buffer(). It is a bit wasteful, but 2521 * nr_copy_pages cannot be greater than 50% of the memory anyway. 2522 * 2523 * nr_copy_pages cannot be less than allocated_unsafe_pages too. 2524 */ 2525 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2526 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 2527 while (nr_pages > 0) { 2528 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 2529 if (!lp) { 2530 error = -ENOMEM; 2531 goto Free; 2532 } 2533 lp->next = safe_pages_list; 2534 safe_pages_list = lp; 2535 nr_pages--; 2536 } 2537 /* Preallocate memory for the image */ 2538 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 2539 while (nr_pages > 0) { 2540 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 2541 if (!lp) { 2542 error = -ENOMEM; 2543 goto Free; 2544 } 2545 if (!swsusp_page_is_free(virt_to_page(lp))) { 2546 /* The page is "safe", add it to the list */ 2547 lp->next = safe_pages_list; 2548 safe_pages_list = lp; 2549 } 2550 /* Mark the page as allocated */ 2551 swsusp_set_page_forbidden(virt_to_page(lp)); 2552 swsusp_set_page_free(virt_to_page(lp)); 2553 nr_pages--; 2554 } 2555 return 0; 2556 2557 Free: 2558 swsusp_free(); 2559 return error; 2560 } 2561 2562 /** 2563 * get_buffer - Get the address to store the next image data page. 2564 * 2565 * Get the address that snapshot_write_next() should return to its caller to 2566 * write to. 2567 */ 2568 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 2569 { 2570 struct pbe *pbe; 2571 struct page *page; 2572 unsigned long pfn = memory_bm_next_pfn(bm); 2573 2574 if (pfn == BM_END_OF_MAP) 2575 return ERR_PTR(-EFAULT); 2576 2577 page = pfn_to_page(pfn); 2578 if (PageHighMem(page)) 2579 return get_highmem_page_buffer(page, ca); 2580 2581 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 2582 /* 2583 * We have allocated the "original" page frame and we can 2584 * use it directly to store the loaded page. 2585 */ 2586 return page_address(page); 2587 2588 /* 2589 * The "original" page frame has not been allocated and we have to 2590 * use a "safe" page frame to store the loaded page. 2591 */ 2592 pbe = chain_alloc(ca, sizeof(struct pbe)); 2593 if (!pbe) { 2594 swsusp_free(); 2595 return ERR_PTR(-ENOMEM); 2596 } 2597 pbe->orig_address = page_address(page); 2598 pbe->address = safe_pages_list; 2599 safe_pages_list = safe_pages_list->next; 2600 pbe->next = restore_pblist; 2601 restore_pblist = pbe; 2602 return pbe->address; 2603 } 2604 2605 /** 2606 * snapshot_write_next - Get the address to store the next image page. 2607 * @handle: Snapshot handle structure to guide the writing. 2608 * 2609 * On the first call, @handle should point to a zeroed snapshot_handle 2610 * structure. The structure gets populated then and a pointer to it should be 2611 * passed to this function every next time. 2612 * 2613 * On success, the function returns a positive number. Then, the caller 2614 * is allowed to write up to the returned number of bytes to the memory 2615 * location computed by the data_of() macro. 2616 * 2617 * The function returns 0 to indicate the "end of file" condition. Negative 2618 * numbers are returned on errors, in which cases the structure pointed to by 2619 * @handle is not updated and should not be used any more. 2620 */ 2621 int snapshot_write_next(struct snapshot_handle *handle) 2622 { 2623 static struct chain_allocator ca; 2624 int error = 0; 2625 2626 /* Check if we have already loaded the entire image */ 2627 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) 2628 return 0; 2629 2630 handle->sync_read = 1; 2631 2632 if (!handle->cur) { 2633 if (!buffer) 2634 /* This makes the buffer be freed by swsusp_free() */ 2635 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 2636 2637 if (!buffer) 2638 return -ENOMEM; 2639 2640 handle->buffer = buffer; 2641 } else if (handle->cur == 1) { 2642 error = load_header(buffer); 2643 if (error) 2644 return error; 2645 2646 safe_pages_list = NULL; 2647 2648 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 2649 if (error) 2650 return error; 2651 2652 hibernate_restore_protection_begin(); 2653 } else if (handle->cur <= nr_meta_pages + 1) { 2654 error = unpack_orig_pfns(buffer, ©_bm); 2655 if (error) 2656 return error; 2657 2658 if (handle->cur == nr_meta_pages + 1) { 2659 error = prepare_image(&orig_bm, ©_bm); 2660 if (error) 2661 return error; 2662 2663 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 2664 memory_bm_position_reset(&orig_bm); 2665 restore_pblist = NULL; 2666 handle->buffer = get_buffer(&orig_bm, &ca); 2667 handle->sync_read = 0; 2668 if (IS_ERR(handle->buffer)) 2669 return PTR_ERR(handle->buffer); 2670 } 2671 } else { 2672 copy_last_highmem_page(); 2673 hibernate_restore_protect_page(handle->buffer); 2674 handle->buffer = get_buffer(&orig_bm, &ca); 2675 if (IS_ERR(handle->buffer)) 2676 return PTR_ERR(handle->buffer); 2677 if (handle->buffer != buffer) 2678 handle->sync_read = 0; 2679 } 2680 handle->cur++; 2681 return PAGE_SIZE; 2682 } 2683 2684 /** 2685 * snapshot_write_finalize - Complete the loading of a hibernation image. 2686 * 2687 * Must be called after the last call to snapshot_write_next() in case the last 2688 * page in the image happens to be a highmem page and its contents should be 2689 * stored in highmem. Additionally, it recycles bitmap memory that's not 2690 * necessary any more. 2691 */ 2692 void snapshot_write_finalize(struct snapshot_handle *handle) 2693 { 2694 copy_last_highmem_page(); 2695 hibernate_restore_protect_page(handle->buffer); 2696 /* Do that only if we have loaded the image entirely */ 2697 if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { 2698 memory_bm_recycle(&orig_bm); 2699 free_highmem_data(); 2700 } 2701 } 2702 2703 int snapshot_image_loaded(struct snapshot_handle *handle) 2704 { 2705 return !(!nr_copy_pages || !last_highmem_page_copied() || 2706 handle->cur <= nr_meta_pages + nr_copy_pages); 2707 } 2708 2709 #ifdef CONFIG_HIGHMEM 2710 /* Assumes that @buf is ready and points to a "safe" page */ 2711 static inline void swap_two_pages_data(struct page *p1, struct page *p2, 2712 void *buf) 2713 { 2714 void *kaddr1, *kaddr2; 2715 2716 kaddr1 = kmap_atomic(p1); 2717 kaddr2 = kmap_atomic(p2); 2718 copy_page(buf, kaddr1); 2719 copy_page(kaddr1, kaddr2); 2720 copy_page(kaddr2, buf); 2721 kunmap_atomic(kaddr2); 2722 kunmap_atomic(kaddr1); 2723 } 2724 2725 /** 2726 * restore_highmem - Put highmem image pages into their original locations. 2727 * 2728 * For each highmem page that was in use before hibernation and is included in 2729 * the image, and also has been allocated by the "restore" kernel, swap its 2730 * current contents with the previous (ie. "before hibernation") ones. 2731 * 2732 * If the restore eventually fails, we can call this function once again and 2733 * restore the highmem state as seen by the restore kernel. 2734 */ 2735 int restore_highmem(void) 2736 { 2737 struct highmem_pbe *pbe = highmem_pblist; 2738 void *buf; 2739 2740 if (!pbe) 2741 return 0; 2742 2743 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2744 if (!buf) 2745 return -ENOMEM; 2746 2747 while (pbe) { 2748 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2749 pbe = pbe->next; 2750 } 2751 free_image_page(buf, PG_UNSAFE_CLEAR); 2752 return 0; 2753 } 2754 #endif /* CONFIG_HIGHMEM */ 2755