1 /* 2 * linux/kernel/power/snapshot.c 3 * 4 * This file provides system snapshot/restore functionality for swsusp. 5 * 6 * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz> 7 * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> 8 * 9 * This file is released under the GPLv2. 10 * 11 */ 12 13 #include <linux/version.h> 14 #include <linux/module.h> 15 #include <linux/mm.h> 16 #include <linux/suspend.h> 17 #include <linux/delay.h> 18 #include <linux/bitops.h> 19 #include <linux/spinlock.h> 20 #include <linux/kernel.h> 21 #include <linux/pm.h> 22 #include <linux/device.h> 23 #include <linux/init.h> 24 #include <linux/bootmem.h> 25 #include <linux/syscalls.h> 26 #include <linux/console.h> 27 #include <linux/highmem.h> 28 #include <linux/list.h> 29 30 #include <asm/uaccess.h> 31 #include <asm/mmu_context.h> 32 #include <asm/pgtable.h> 33 #include <asm/tlbflush.h> 34 #include <asm/io.h> 35 36 #include "power.h" 37 38 static int swsusp_page_is_free(struct page *); 39 static void swsusp_set_page_forbidden(struct page *); 40 static void swsusp_unset_page_forbidden(struct page *); 41 42 /* 43 * Preferred image size in bytes (tunable via /sys/power/image_size). 44 * When it is set to N, swsusp will do its best to ensure the image 45 * size will not exceed N bytes, but if that is impossible, it will 46 * try to create the smallest image possible. 47 */ 48 unsigned long image_size = 500 * 1024 * 1024; 49 50 /* List of PBEs needed for restoring the pages that were allocated before 51 * the suspend and included in the suspend image, but have also been 52 * allocated by the "resume" kernel, so their contents cannot be written 53 * directly to their "original" page frames. 54 */ 55 struct pbe *restore_pblist; 56 57 /* Pointer to an auxiliary buffer (1 page) */ 58 static void *buffer; 59 60 /** 61 * @safe_needed - on resume, for storing the PBE list and the image, 62 * we can only use memory pages that do not conflict with the pages 63 * used before suspend. The unsafe pages have PageNosaveFree set 64 * and we count them using unsafe_pages. 65 * 66 * Each allocated image page is marked as PageNosave and PageNosaveFree 67 * so that swsusp_free() can release it. 68 */ 69 70 #define PG_ANY 0 71 #define PG_SAFE 1 72 #define PG_UNSAFE_CLEAR 1 73 #define PG_UNSAFE_KEEP 0 74 75 static unsigned int allocated_unsafe_pages; 76 77 static void *get_image_page(gfp_t gfp_mask, int safe_needed) 78 { 79 void *res; 80 81 res = (void *)get_zeroed_page(gfp_mask); 82 if (safe_needed) 83 while (res && swsusp_page_is_free(virt_to_page(res))) { 84 /* The page is unsafe, mark it for swsusp_free() */ 85 swsusp_set_page_forbidden(virt_to_page(res)); 86 allocated_unsafe_pages++; 87 res = (void *)get_zeroed_page(gfp_mask); 88 } 89 if (res) { 90 swsusp_set_page_forbidden(virt_to_page(res)); 91 swsusp_set_page_free(virt_to_page(res)); 92 } 93 return res; 94 } 95 96 unsigned long get_safe_page(gfp_t gfp_mask) 97 { 98 return (unsigned long)get_image_page(gfp_mask, PG_SAFE); 99 } 100 101 static struct page *alloc_image_page(gfp_t gfp_mask) 102 { 103 struct page *page; 104 105 page = alloc_page(gfp_mask); 106 if (page) { 107 swsusp_set_page_forbidden(page); 108 swsusp_set_page_free(page); 109 } 110 return page; 111 } 112 113 /** 114 * free_image_page - free page represented by @addr, allocated with 115 * get_image_page (page flags set by it must be cleared) 116 */ 117 118 static inline void free_image_page(void *addr, int clear_nosave_free) 119 { 120 struct page *page; 121 122 BUG_ON(!virt_addr_valid(addr)); 123 124 page = virt_to_page(addr); 125 126 swsusp_unset_page_forbidden(page); 127 if (clear_nosave_free) 128 swsusp_unset_page_free(page); 129 130 __free_page(page); 131 } 132 133 /* struct linked_page is used to build chains of pages */ 134 135 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) 136 137 struct linked_page { 138 struct linked_page *next; 139 char data[LINKED_PAGE_DATA_SIZE]; 140 } __attribute__((packed)); 141 142 static inline void 143 free_list_of_pages(struct linked_page *list, int clear_page_nosave) 144 { 145 while (list) { 146 struct linked_page *lp = list->next; 147 148 free_image_page(list, clear_page_nosave); 149 list = lp; 150 } 151 } 152 153 /** 154 * struct chain_allocator is used for allocating small objects out of 155 * a linked list of pages called 'the chain'. 156 * 157 * The chain grows each time when there is no room for a new object in 158 * the current page. The allocated objects cannot be freed individually. 159 * It is only possible to free them all at once, by freeing the entire 160 * chain. 161 * 162 * NOTE: The chain allocator may be inefficient if the allocated objects 163 * are not much smaller than PAGE_SIZE. 164 */ 165 166 struct chain_allocator { 167 struct linked_page *chain; /* the chain */ 168 unsigned int used_space; /* total size of objects allocated out 169 * of the current page 170 */ 171 gfp_t gfp_mask; /* mask for allocating pages */ 172 int safe_needed; /* if set, only "safe" pages are allocated */ 173 }; 174 175 static void 176 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) 177 { 178 ca->chain = NULL; 179 ca->used_space = LINKED_PAGE_DATA_SIZE; 180 ca->gfp_mask = gfp_mask; 181 ca->safe_needed = safe_needed; 182 } 183 184 static void *chain_alloc(struct chain_allocator *ca, unsigned int size) 185 { 186 void *ret; 187 188 if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { 189 struct linked_page *lp; 190 191 lp = get_image_page(ca->gfp_mask, ca->safe_needed); 192 if (!lp) 193 return NULL; 194 195 lp->next = ca->chain; 196 ca->chain = lp; 197 ca->used_space = 0; 198 } 199 ret = ca->chain->data + ca->used_space; 200 ca->used_space += size; 201 return ret; 202 } 203 204 /** 205 * Data types related to memory bitmaps. 206 * 207 * Memory bitmap is a structure consiting of many linked lists of 208 * objects. The main list's elements are of type struct zone_bitmap 209 * and each of them corresonds to one zone. For each zone bitmap 210 * object there is a list of objects of type struct bm_block that 211 * represent each blocks of bitmap in which information is stored. 212 * 213 * struct memory_bitmap contains a pointer to the main list of zone 214 * bitmap objects, a struct bm_position used for browsing the bitmap, 215 * and a pointer to the list of pages used for allocating all of the 216 * zone bitmap objects and bitmap block objects. 217 * 218 * NOTE: It has to be possible to lay out the bitmap in memory 219 * using only allocations of order 0. Additionally, the bitmap is 220 * designed to work with arbitrary number of zones (this is over the 221 * top for now, but let's avoid making unnecessary assumptions ;-). 222 * 223 * struct zone_bitmap contains a pointer to a list of bitmap block 224 * objects and a pointer to the bitmap block object that has been 225 * most recently used for setting bits. Additionally, it contains the 226 * pfns that correspond to the start and end of the represented zone. 227 * 228 * struct bm_block contains a pointer to the memory page in which 229 * information is stored (in the form of a block of bitmap) 230 * It also contains the pfns that correspond to the start and end of 231 * the represented memory area. 232 */ 233 234 #define BM_END_OF_MAP (~0UL) 235 236 #define BM_BITS_PER_BLOCK (PAGE_SIZE << 3) 237 238 struct bm_block { 239 struct list_head hook; /* hook into a list of bitmap blocks */ 240 unsigned long start_pfn; /* pfn represented by the first bit */ 241 unsigned long end_pfn; /* pfn represented by the last bit plus 1 */ 242 unsigned long *data; /* bitmap representing pages */ 243 }; 244 245 static inline unsigned long bm_block_bits(struct bm_block *bb) 246 { 247 return bb->end_pfn - bb->start_pfn; 248 } 249 250 /* strcut bm_position is used for browsing memory bitmaps */ 251 252 struct bm_position { 253 struct bm_block *block; 254 int bit; 255 }; 256 257 struct memory_bitmap { 258 struct list_head blocks; /* list of bitmap blocks */ 259 struct linked_page *p_list; /* list of pages used to store zone 260 * bitmap objects and bitmap block 261 * objects 262 */ 263 struct bm_position cur; /* most recently used bit position */ 264 }; 265 266 /* Functions that operate on memory bitmaps */ 267 268 static void memory_bm_position_reset(struct memory_bitmap *bm) 269 { 270 bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook); 271 bm->cur.bit = 0; 272 } 273 274 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free); 275 276 /** 277 * create_bm_block_list - create a list of block bitmap objects 278 * @nr_blocks - number of blocks to allocate 279 * @list - list to put the allocated blocks into 280 * @ca - chain allocator to be used for allocating memory 281 */ 282 static int create_bm_block_list(unsigned long pages, 283 struct list_head *list, 284 struct chain_allocator *ca) 285 { 286 unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK); 287 288 while (nr_blocks-- > 0) { 289 struct bm_block *bb; 290 291 bb = chain_alloc(ca, sizeof(struct bm_block)); 292 if (!bb) 293 return -ENOMEM; 294 list_add(&bb->hook, list); 295 } 296 297 return 0; 298 } 299 300 struct mem_extent { 301 struct list_head hook; 302 unsigned long start; 303 unsigned long end; 304 }; 305 306 /** 307 * free_mem_extents - free a list of memory extents 308 * @list - list of extents to empty 309 */ 310 static void free_mem_extents(struct list_head *list) 311 { 312 struct mem_extent *ext, *aux; 313 314 list_for_each_entry_safe(ext, aux, list, hook) { 315 list_del(&ext->hook); 316 kfree(ext); 317 } 318 } 319 320 /** 321 * create_mem_extents - create a list of memory extents representing 322 * contiguous ranges of PFNs 323 * @list - list to put the extents into 324 * @gfp_mask - mask to use for memory allocations 325 */ 326 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) 327 { 328 struct zone *zone; 329 330 INIT_LIST_HEAD(list); 331 332 for_each_populated_zone(zone) { 333 unsigned long zone_start, zone_end; 334 struct mem_extent *ext, *cur, *aux; 335 336 zone_start = zone->zone_start_pfn; 337 zone_end = zone->zone_start_pfn + zone->spanned_pages; 338 339 list_for_each_entry(ext, list, hook) 340 if (zone_start <= ext->end) 341 break; 342 343 if (&ext->hook == list || zone_end < ext->start) { 344 /* New extent is necessary */ 345 struct mem_extent *new_ext; 346 347 new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask); 348 if (!new_ext) { 349 free_mem_extents(list); 350 return -ENOMEM; 351 } 352 new_ext->start = zone_start; 353 new_ext->end = zone_end; 354 list_add_tail(&new_ext->hook, &ext->hook); 355 continue; 356 } 357 358 /* Merge this zone's range of PFNs with the existing one */ 359 if (zone_start < ext->start) 360 ext->start = zone_start; 361 if (zone_end > ext->end) 362 ext->end = zone_end; 363 364 /* More merging may be possible */ 365 cur = ext; 366 list_for_each_entry_safe_continue(cur, aux, list, hook) { 367 if (zone_end < cur->start) 368 break; 369 if (zone_end < cur->end) 370 ext->end = cur->end; 371 list_del(&cur->hook); 372 kfree(cur); 373 } 374 } 375 376 return 0; 377 } 378 379 /** 380 * memory_bm_create - allocate memory for a memory bitmap 381 */ 382 static int 383 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) 384 { 385 struct chain_allocator ca; 386 struct list_head mem_extents; 387 struct mem_extent *ext; 388 int error; 389 390 chain_init(&ca, gfp_mask, safe_needed); 391 INIT_LIST_HEAD(&bm->blocks); 392 393 error = create_mem_extents(&mem_extents, gfp_mask); 394 if (error) 395 return error; 396 397 list_for_each_entry(ext, &mem_extents, hook) { 398 struct bm_block *bb; 399 unsigned long pfn = ext->start; 400 unsigned long pages = ext->end - ext->start; 401 402 bb = list_entry(bm->blocks.prev, struct bm_block, hook); 403 404 error = create_bm_block_list(pages, bm->blocks.prev, &ca); 405 if (error) 406 goto Error; 407 408 list_for_each_entry_continue(bb, &bm->blocks, hook) { 409 bb->data = get_image_page(gfp_mask, safe_needed); 410 if (!bb->data) { 411 error = -ENOMEM; 412 goto Error; 413 } 414 415 bb->start_pfn = pfn; 416 if (pages >= BM_BITS_PER_BLOCK) { 417 pfn += BM_BITS_PER_BLOCK; 418 pages -= BM_BITS_PER_BLOCK; 419 } else { 420 /* This is executed only once in the loop */ 421 pfn += pages; 422 } 423 bb->end_pfn = pfn; 424 } 425 } 426 427 bm->p_list = ca.chain; 428 memory_bm_position_reset(bm); 429 Exit: 430 free_mem_extents(&mem_extents); 431 return error; 432 433 Error: 434 bm->p_list = ca.chain; 435 memory_bm_free(bm, PG_UNSAFE_CLEAR); 436 goto Exit; 437 } 438 439 /** 440 * memory_bm_free - free memory occupied by the memory bitmap @bm 441 */ 442 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) 443 { 444 struct bm_block *bb; 445 446 list_for_each_entry(bb, &bm->blocks, hook) 447 if (bb->data) 448 free_image_page(bb->data, clear_nosave_free); 449 450 free_list_of_pages(bm->p_list, clear_nosave_free); 451 452 INIT_LIST_HEAD(&bm->blocks); 453 } 454 455 /** 456 * memory_bm_find_bit - find the bit in the bitmap @bm that corresponds 457 * to given pfn. The cur_zone_bm member of @bm and the cur_block member 458 * of @bm->cur_zone_bm are updated. 459 */ 460 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, 461 void **addr, unsigned int *bit_nr) 462 { 463 struct bm_block *bb; 464 465 /* 466 * Check if the pfn corresponds to the current bitmap block and find 467 * the block where it fits if this is not the case. 468 */ 469 bb = bm->cur.block; 470 if (pfn < bb->start_pfn) 471 list_for_each_entry_continue_reverse(bb, &bm->blocks, hook) 472 if (pfn >= bb->start_pfn) 473 break; 474 475 if (pfn >= bb->end_pfn) 476 list_for_each_entry_continue(bb, &bm->blocks, hook) 477 if (pfn >= bb->start_pfn && pfn < bb->end_pfn) 478 break; 479 480 if (&bb->hook == &bm->blocks) 481 return -EFAULT; 482 483 /* The block has been found */ 484 bm->cur.block = bb; 485 pfn -= bb->start_pfn; 486 bm->cur.bit = pfn + 1; 487 *bit_nr = pfn; 488 *addr = bb->data; 489 return 0; 490 } 491 492 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn) 493 { 494 void *addr; 495 unsigned int bit; 496 int error; 497 498 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 499 BUG_ON(error); 500 set_bit(bit, addr); 501 } 502 503 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn) 504 { 505 void *addr; 506 unsigned int bit; 507 int error; 508 509 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 510 if (!error) 511 set_bit(bit, addr); 512 return error; 513 } 514 515 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) 516 { 517 void *addr; 518 unsigned int bit; 519 int error; 520 521 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 522 BUG_ON(error); 523 clear_bit(bit, addr); 524 } 525 526 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) 527 { 528 void *addr; 529 unsigned int bit; 530 int error; 531 532 error = memory_bm_find_bit(bm, pfn, &addr, &bit); 533 BUG_ON(error); 534 return test_bit(bit, addr); 535 } 536 537 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) 538 { 539 void *addr; 540 unsigned int bit; 541 542 return !memory_bm_find_bit(bm, pfn, &addr, &bit); 543 } 544 545 /** 546 * memory_bm_next_pfn - find the pfn that corresponds to the next set bit 547 * in the bitmap @bm. If the pfn cannot be found, BM_END_OF_MAP is 548 * returned. 549 * 550 * It is required to run memory_bm_position_reset() before the first call to 551 * this function. 552 */ 553 554 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) 555 { 556 struct bm_block *bb; 557 int bit; 558 559 bb = bm->cur.block; 560 do { 561 bit = bm->cur.bit; 562 bit = find_next_bit(bb->data, bm_block_bits(bb), bit); 563 if (bit < bm_block_bits(bb)) 564 goto Return_pfn; 565 566 bb = list_entry(bb->hook.next, struct bm_block, hook); 567 bm->cur.block = bb; 568 bm->cur.bit = 0; 569 } while (&bb->hook != &bm->blocks); 570 571 memory_bm_position_reset(bm); 572 return BM_END_OF_MAP; 573 574 Return_pfn: 575 bm->cur.bit = bit + 1; 576 return bb->start_pfn + bit; 577 } 578 579 /** 580 * This structure represents a range of page frames the contents of which 581 * should not be saved during the suspend. 582 */ 583 584 struct nosave_region { 585 struct list_head list; 586 unsigned long start_pfn; 587 unsigned long end_pfn; 588 }; 589 590 static LIST_HEAD(nosave_regions); 591 592 /** 593 * register_nosave_region - register a range of page frames the contents 594 * of which should not be saved during the suspend (to be used in the early 595 * initialization code) 596 */ 597 598 void __init 599 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, 600 int use_kmalloc) 601 { 602 struct nosave_region *region; 603 604 if (start_pfn >= end_pfn) 605 return; 606 607 if (!list_empty(&nosave_regions)) { 608 /* Try to extend the previous region (they should be sorted) */ 609 region = list_entry(nosave_regions.prev, 610 struct nosave_region, list); 611 if (region->end_pfn == start_pfn) { 612 region->end_pfn = end_pfn; 613 goto Report; 614 } 615 } 616 if (use_kmalloc) { 617 /* during init, this shouldn't fail */ 618 region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); 619 BUG_ON(!region); 620 } else 621 /* This allocation cannot fail */ 622 region = alloc_bootmem_low(sizeof(struct nosave_region)); 623 region->start_pfn = start_pfn; 624 region->end_pfn = end_pfn; 625 list_add_tail(®ion->list, &nosave_regions); 626 Report: 627 printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n", 628 start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); 629 } 630 631 /* 632 * Set bits in this map correspond to the page frames the contents of which 633 * should not be saved during the suspend. 634 */ 635 static struct memory_bitmap *forbidden_pages_map; 636 637 /* Set bits in this map correspond to free page frames. */ 638 static struct memory_bitmap *free_pages_map; 639 640 /* 641 * Each page frame allocated for creating the image is marked by setting the 642 * corresponding bits in forbidden_pages_map and free_pages_map simultaneously 643 */ 644 645 void swsusp_set_page_free(struct page *page) 646 { 647 if (free_pages_map) 648 memory_bm_set_bit(free_pages_map, page_to_pfn(page)); 649 } 650 651 static int swsusp_page_is_free(struct page *page) 652 { 653 return free_pages_map ? 654 memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0; 655 } 656 657 void swsusp_unset_page_free(struct page *page) 658 { 659 if (free_pages_map) 660 memory_bm_clear_bit(free_pages_map, page_to_pfn(page)); 661 } 662 663 static void swsusp_set_page_forbidden(struct page *page) 664 { 665 if (forbidden_pages_map) 666 memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page)); 667 } 668 669 int swsusp_page_is_forbidden(struct page *page) 670 { 671 return forbidden_pages_map ? 672 memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0; 673 } 674 675 static void swsusp_unset_page_forbidden(struct page *page) 676 { 677 if (forbidden_pages_map) 678 memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page)); 679 } 680 681 /** 682 * mark_nosave_pages - set bits corresponding to the page frames the 683 * contents of which should not be saved in a given bitmap. 684 */ 685 686 static void mark_nosave_pages(struct memory_bitmap *bm) 687 { 688 struct nosave_region *region; 689 690 if (list_empty(&nosave_regions)) 691 return; 692 693 list_for_each_entry(region, &nosave_regions, list) { 694 unsigned long pfn; 695 696 pr_debug("PM: Marking nosave pages: %016lx - %016lx\n", 697 region->start_pfn << PAGE_SHIFT, 698 region->end_pfn << PAGE_SHIFT); 699 700 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 701 if (pfn_valid(pfn)) { 702 /* 703 * It is safe to ignore the result of 704 * mem_bm_set_bit_check() here, since we won't 705 * touch the PFNs for which the error is 706 * returned anyway. 707 */ 708 mem_bm_set_bit_check(bm, pfn); 709 } 710 } 711 } 712 713 /** 714 * create_basic_memory_bitmaps - create bitmaps needed for marking page 715 * frames that should not be saved and free page frames. The pointers 716 * forbidden_pages_map and free_pages_map are only modified if everything 717 * goes well, because we don't want the bits to be used before both bitmaps 718 * are set up. 719 */ 720 721 int create_basic_memory_bitmaps(void) 722 { 723 struct memory_bitmap *bm1, *bm2; 724 int error = 0; 725 726 BUG_ON(forbidden_pages_map || free_pages_map); 727 728 bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 729 if (!bm1) 730 return -ENOMEM; 731 732 error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY); 733 if (error) 734 goto Free_first_object; 735 736 bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL); 737 if (!bm2) 738 goto Free_first_bitmap; 739 740 error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY); 741 if (error) 742 goto Free_second_object; 743 744 forbidden_pages_map = bm1; 745 free_pages_map = bm2; 746 mark_nosave_pages(forbidden_pages_map); 747 748 pr_debug("PM: Basic memory bitmaps created\n"); 749 750 return 0; 751 752 Free_second_object: 753 kfree(bm2); 754 Free_first_bitmap: 755 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 756 Free_first_object: 757 kfree(bm1); 758 return -ENOMEM; 759 } 760 761 /** 762 * free_basic_memory_bitmaps - free memory bitmaps allocated by 763 * create_basic_memory_bitmaps(). The auxiliary pointers are necessary 764 * so that the bitmaps themselves are not referred to while they are being 765 * freed. 766 */ 767 768 void free_basic_memory_bitmaps(void) 769 { 770 struct memory_bitmap *bm1, *bm2; 771 772 BUG_ON(!(forbidden_pages_map && free_pages_map)); 773 774 bm1 = forbidden_pages_map; 775 bm2 = free_pages_map; 776 forbidden_pages_map = NULL; 777 free_pages_map = NULL; 778 memory_bm_free(bm1, PG_UNSAFE_CLEAR); 779 kfree(bm1); 780 memory_bm_free(bm2, PG_UNSAFE_CLEAR); 781 kfree(bm2); 782 783 pr_debug("PM: Basic memory bitmaps freed\n"); 784 } 785 786 /** 787 * snapshot_additional_pages - estimate the number of additional pages 788 * be needed for setting up the suspend image data structures for given 789 * zone (usually the returned value is greater than the exact number) 790 */ 791 792 unsigned int snapshot_additional_pages(struct zone *zone) 793 { 794 unsigned int res; 795 796 res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK); 797 res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE); 798 return 2 * res; 799 } 800 801 #ifdef CONFIG_HIGHMEM 802 /** 803 * count_free_highmem_pages - compute the total number of free highmem 804 * pages, system-wide. 805 */ 806 807 static unsigned int count_free_highmem_pages(void) 808 { 809 struct zone *zone; 810 unsigned int cnt = 0; 811 812 for_each_populated_zone(zone) 813 if (is_highmem(zone)) 814 cnt += zone_page_state(zone, NR_FREE_PAGES); 815 816 return cnt; 817 } 818 819 /** 820 * saveable_highmem_page - Determine whether a highmem page should be 821 * included in the suspend image. 822 * 823 * We should save the page if it isn't Nosave or NosaveFree, or Reserved, 824 * and it isn't a part of a free chunk of pages. 825 */ 826 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) 827 { 828 struct page *page; 829 830 if (!pfn_valid(pfn)) 831 return NULL; 832 833 page = pfn_to_page(pfn); 834 if (page_zone(page) != zone) 835 return NULL; 836 837 BUG_ON(!PageHighMem(page)); 838 839 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) || 840 PageReserved(page)) 841 return NULL; 842 843 return page; 844 } 845 846 /** 847 * count_highmem_pages - compute the total number of saveable highmem 848 * pages. 849 */ 850 851 static unsigned int count_highmem_pages(void) 852 { 853 struct zone *zone; 854 unsigned int n = 0; 855 856 for_each_zone(zone) { 857 unsigned long pfn, max_zone_pfn; 858 859 if (!is_highmem(zone)) 860 continue; 861 862 mark_free_pages(zone); 863 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 864 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 865 if (saveable_highmem_page(zone, pfn)) 866 n++; 867 } 868 return n; 869 } 870 #else 871 static inline void *saveable_highmem_page(struct zone *z, unsigned long p) 872 { 873 return NULL; 874 } 875 #endif /* CONFIG_HIGHMEM */ 876 877 /** 878 * saveable_page - Determine whether a non-highmem page should be included 879 * in the suspend image. 880 * 881 * We should save the page if it isn't Nosave, and is not in the range 882 * of pages statically defined as 'unsaveable', and it isn't a part of 883 * a free chunk of pages. 884 */ 885 static struct page *saveable_page(struct zone *zone, unsigned long pfn) 886 { 887 struct page *page; 888 889 if (!pfn_valid(pfn)) 890 return NULL; 891 892 page = pfn_to_page(pfn); 893 if (page_zone(page) != zone) 894 return NULL; 895 896 BUG_ON(PageHighMem(page)); 897 898 if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page)) 899 return NULL; 900 901 if (PageReserved(page) 902 && (!kernel_page_present(page) || pfn_is_nosave(pfn))) 903 return NULL; 904 905 return page; 906 } 907 908 /** 909 * count_data_pages - compute the total number of saveable non-highmem 910 * pages. 911 */ 912 913 static unsigned int count_data_pages(void) 914 { 915 struct zone *zone; 916 unsigned long pfn, max_zone_pfn; 917 unsigned int n = 0; 918 919 for_each_zone(zone) { 920 if (is_highmem(zone)) 921 continue; 922 923 mark_free_pages(zone); 924 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 925 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 926 if (saveable_page(zone, pfn)) 927 n++; 928 } 929 return n; 930 } 931 932 /* This is needed, because copy_page and memcpy are not usable for copying 933 * task structs. 934 */ 935 static inline void do_copy_page(long *dst, long *src) 936 { 937 int n; 938 939 for (n = PAGE_SIZE / sizeof(long); n; n--) 940 *dst++ = *src++; 941 } 942 943 944 /** 945 * safe_copy_page - check if the page we are going to copy is marked as 946 * present in the kernel page tables (this always is the case if 947 * CONFIG_DEBUG_PAGEALLOC is not set and in that case 948 * kernel_page_present() always returns 'true'). 949 */ 950 static void safe_copy_page(void *dst, struct page *s_page) 951 { 952 if (kernel_page_present(s_page)) { 953 do_copy_page(dst, page_address(s_page)); 954 } else { 955 kernel_map_pages(s_page, 1, 1); 956 do_copy_page(dst, page_address(s_page)); 957 kernel_map_pages(s_page, 1, 0); 958 } 959 } 960 961 962 #ifdef CONFIG_HIGHMEM 963 static inline struct page * 964 page_is_saveable(struct zone *zone, unsigned long pfn) 965 { 966 return is_highmem(zone) ? 967 saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); 968 } 969 970 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 971 { 972 struct page *s_page, *d_page; 973 void *src, *dst; 974 975 s_page = pfn_to_page(src_pfn); 976 d_page = pfn_to_page(dst_pfn); 977 if (PageHighMem(s_page)) { 978 src = kmap_atomic(s_page, KM_USER0); 979 dst = kmap_atomic(d_page, KM_USER1); 980 do_copy_page(dst, src); 981 kunmap_atomic(src, KM_USER0); 982 kunmap_atomic(dst, KM_USER1); 983 } else { 984 if (PageHighMem(d_page)) { 985 /* Page pointed to by src may contain some kernel 986 * data modified by kmap_atomic() 987 */ 988 safe_copy_page(buffer, s_page); 989 dst = kmap_atomic(d_page, KM_USER0); 990 memcpy(dst, buffer, PAGE_SIZE); 991 kunmap_atomic(dst, KM_USER0); 992 } else { 993 safe_copy_page(page_address(d_page), s_page); 994 } 995 } 996 } 997 #else 998 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn) 999 1000 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) 1001 { 1002 safe_copy_page(page_address(pfn_to_page(dst_pfn)), 1003 pfn_to_page(src_pfn)); 1004 } 1005 #endif /* CONFIG_HIGHMEM */ 1006 1007 static void 1008 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) 1009 { 1010 struct zone *zone; 1011 unsigned long pfn; 1012 1013 for_each_zone(zone) { 1014 unsigned long max_zone_pfn; 1015 1016 mark_free_pages(zone); 1017 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 1018 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1019 if (page_is_saveable(zone, pfn)) 1020 memory_bm_set_bit(orig_bm, pfn); 1021 } 1022 memory_bm_position_reset(orig_bm); 1023 memory_bm_position_reset(copy_bm); 1024 for(;;) { 1025 pfn = memory_bm_next_pfn(orig_bm); 1026 if (unlikely(pfn == BM_END_OF_MAP)) 1027 break; 1028 copy_data_page(memory_bm_next_pfn(copy_bm), pfn); 1029 } 1030 } 1031 1032 /* Total number of image pages */ 1033 static unsigned int nr_copy_pages; 1034 /* Number of pages needed for saving the original pfns of the image pages */ 1035 static unsigned int nr_meta_pages; 1036 1037 /** 1038 * swsusp_free - free pages allocated for the suspend. 1039 * 1040 * Suspend pages are alocated before the atomic copy is made, so we 1041 * need to release them after the resume. 1042 */ 1043 1044 void swsusp_free(void) 1045 { 1046 struct zone *zone; 1047 unsigned long pfn, max_zone_pfn; 1048 1049 for_each_zone(zone) { 1050 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 1051 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1052 if (pfn_valid(pfn)) { 1053 struct page *page = pfn_to_page(pfn); 1054 1055 if (swsusp_page_is_forbidden(page) && 1056 swsusp_page_is_free(page)) { 1057 swsusp_unset_page_forbidden(page); 1058 swsusp_unset_page_free(page); 1059 __free_page(page); 1060 } 1061 } 1062 } 1063 nr_copy_pages = 0; 1064 nr_meta_pages = 0; 1065 restore_pblist = NULL; 1066 buffer = NULL; 1067 } 1068 1069 /** 1070 * swsusp_shrink_memory - Try to free as much memory as needed 1071 * 1072 * ... but do not OOM-kill anyone 1073 * 1074 * Notice: all userland should be stopped before it is called, or 1075 * livelock is possible. 1076 */ 1077 1078 #define SHRINK_BITE 10000 1079 static inline unsigned long __shrink_memory(long tmp) 1080 { 1081 if (tmp > SHRINK_BITE) 1082 tmp = SHRINK_BITE; 1083 return shrink_all_memory(tmp); 1084 } 1085 1086 int swsusp_shrink_memory(void) 1087 { 1088 long tmp; 1089 struct zone *zone; 1090 unsigned long pages = 0; 1091 unsigned int i = 0; 1092 char *p = "-\\|/"; 1093 struct timeval start, stop; 1094 1095 printk(KERN_INFO "PM: Shrinking memory... "); 1096 do_gettimeofday(&start); 1097 do { 1098 long size, highmem_size; 1099 1100 highmem_size = count_highmem_pages(); 1101 size = count_data_pages() + PAGES_FOR_IO + SPARE_PAGES; 1102 tmp = size; 1103 size += highmem_size; 1104 for_each_populated_zone(zone) { 1105 tmp += snapshot_additional_pages(zone); 1106 if (is_highmem(zone)) { 1107 highmem_size -= 1108 zone_page_state(zone, NR_FREE_PAGES); 1109 } else { 1110 tmp -= zone_page_state(zone, NR_FREE_PAGES); 1111 tmp += zone->lowmem_reserve[ZONE_NORMAL]; 1112 } 1113 } 1114 1115 if (highmem_size < 0) 1116 highmem_size = 0; 1117 1118 tmp += highmem_size; 1119 if (tmp > 0) { 1120 tmp = __shrink_memory(tmp); 1121 if (!tmp) 1122 return -ENOMEM; 1123 pages += tmp; 1124 } else if (size > image_size / PAGE_SIZE) { 1125 tmp = __shrink_memory(size - (image_size / PAGE_SIZE)); 1126 pages += tmp; 1127 } 1128 printk("\b%c", p[i++%4]); 1129 } while (tmp > 0); 1130 do_gettimeofday(&stop); 1131 printk("\bdone (%lu pages freed)\n", pages); 1132 swsusp_show_speed(&start, &stop, pages, "Freed"); 1133 1134 return 0; 1135 } 1136 1137 #ifdef CONFIG_HIGHMEM 1138 /** 1139 * count_pages_for_highmem - compute the number of non-highmem pages 1140 * that will be necessary for creating copies of highmem pages. 1141 */ 1142 1143 static unsigned int count_pages_for_highmem(unsigned int nr_highmem) 1144 { 1145 unsigned int free_highmem = count_free_highmem_pages(); 1146 1147 if (free_highmem >= nr_highmem) 1148 nr_highmem = 0; 1149 else 1150 nr_highmem -= free_highmem; 1151 1152 return nr_highmem; 1153 } 1154 #else 1155 static unsigned int 1156 count_pages_for_highmem(unsigned int nr_highmem) { return 0; } 1157 #endif /* CONFIG_HIGHMEM */ 1158 1159 /** 1160 * enough_free_mem - Make sure we have enough free memory for the 1161 * snapshot image. 1162 */ 1163 1164 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) 1165 { 1166 struct zone *zone; 1167 unsigned int free = 0, meta = 0; 1168 1169 for_each_zone(zone) { 1170 meta += snapshot_additional_pages(zone); 1171 if (!is_highmem(zone)) 1172 free += zone_page_state(zone, NR_FREE_PAGES); 1173 } 1174 1175 nr_pages += count_pages_for_highmem(nr_highmem); 1176 pr_debug("PM: Normal pages needed: %u + %u + %u, available pages: %u\n", 1177 nr_pages, PAGES_FOR_IO, meta, free); 1178 1179 return free > nr_pages + PAGES_FOR_IO + meta; 1180 } 1181 1182 #ifdef CONFIG_HIGHMEM 1183 /** 1184 * get_highmem_buffer - if there are some highmem pages in the suspend 1185 * image, we may need the buffer to copy them and/or load their data. 1186 */ 1187 1188 static inline int get_highmem_buffer(int safe_needed) 1189 { 1190 buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); 1191 return buffer ? 0 : -ENOMEM; 1192 } 1193 1194 /** 1195 * alloc_highmem_image_pages - allocate some highmem pages for the image. 1196 * Try to allocate as many pages as needed, but if the number of free 1197 * highmem pages is lesser than that, allocate them all. 1198 */ 1199 1200 static inline unsigned int 1201 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem) 1202 { 1203 unsigned int to_alloc = count_free_highmem_pages(); 1204 1205 if (to_alloc > nr_highmem) 1206 to_alloc = nr_highmem; 1207 1208 nr_highmem -= to_alloc; 1209 while (to_alloc-- > 0) { 1210 struct page *page; 1211 1212 page = alloc_image_page(__GFP_HIGHMEM); 1213 memory_bm_set_bit(bm, page_to_pfn(page)); 1214 } 1215 return nr_highmem; 1216 } 1217 #else 1218 static inline int get_highmem_buffer(int safe_needed) { return 0; } 1219 1220 static inline unsigned int 1221 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } 1222 #endif /* CONFIG_HIGHMEM */ 1223 1224 /** 1225 * swsusp_alloc - allocate memory for the suspend image 1226 * 1227 * We first try to allocate as many highmem pages as there are 1228 * saveable highmem pages in the system. If that fails, we allocate 1229 * non-highmem pages for the copies of the remaining highmem ones. 1230 * 1231 * In this approach it is likely that the copies of highmem pages will 1232 * also be located in the high memory, because of the way in which 1233 * copy_data_pages() works. 1234 */ 1235 1236 static int 1237 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, 1238 unsigned int nr_pages, unsigned int nr_highmem) 1239 { 1240 int error; 1241 1242 error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); 1243 if (error) 1244 goto Free; 1245 1246 error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY); 1247 if (error) 1248 goto Free; 1249 1250 if (nr_highmem > 0) { 1251 error = get_highmem_buffer(PG_ANY); 1252 if (error) 1253 goto Free; 1254 1255 nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem); 1256 } 1257 while (nr_pages-- > 0) { 1258 struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD); 1259 1260 if (!page) 1261 goto Free; 1262 1263 memory_bm_set_bit(copy_bm, page_to_pfn(page)); 1264 } 1265 return 0; 1266 1267 Free: 1268 swsusp_free(); 1269 return -ENOMEM; 1270 } 1271 1272 /* Memory bitmap used for marking saveable pages (during suspend) or the 1273 * suspend image pages (during resume) 1274 */ 1275 static struct memory_bitmap orig_bm; 1276 /* Memory bitmap used on suspend for marking allocated pages that will contain 1277 * the copies of saveable pages. During resume it is initially used for 1278 * marking the suspend image pages, but then its set bits are duplicated in 1279 * @orig_bm and it is released. Next, on systems with high memory, it may be 1280 * used for marking "safe" highmem pages, but it has to be reinitialized for 1281 * this purpose. 1282 */ 1283 static struct memory_bitmap copy_bm; 1284 1285 asmlinkage int swsusp_save(void) 1286 { 1287 unsigned int nr_pages, nr_highmem; 1288 1289 printk(KERN_INFO "PM: Creating hibernation image: \n"); 1290 1291 drain_local_pages(NULL); 1292 nr_pages = count_data_pages(); 1293 nr_highmem = count_highmem_pages(); 1294 printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem); 1295 1296 if (!enough_free_mem(nr_pages, nr_highmem)) { 1297 printk(KERN_ERR "PM: Not enough free memory\n"); 1298 return -ENOMEM; 1299 } 1300 1301 if (swsusp_alloc(&orig_bm, ©_bm, nr_pages, nr_highmem)) { 1302 printk(KERN_ERR "PM: Memory allocation failed\n"); 1303 return -ENOMEM; 1304 } 1305 1306 /* During allocating of suspend pagedir, new cold pages may appear. 1307 * Kill them. 1308 */ 1309 drain_local_pages(NULL); 1310 copy_data_pages(©_bm, &orig_bm); 1311 1312 /* 1313 * End of critical section. From now on, we can write to memory, 1314 * but we should not touch disk. This specially means we must _not_ 1315 * touch swap space! Except we must write out our image of course. 1316 */ 1317 1318 nr_pages += nr_highmem; 1319 nr_copy_pages = nr_pages; 1320 nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); 1321 1322 printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n", 1323 nr_pages); 1324 1325 return 0; 1326 } 1327 1328 #ifndef CONFIG_ARCH_HIBERNATION_HEADER 1329 static int init_header_complete(struct swsusp_info *info) 1330 { 1331 memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname)); 1332 info->version_code = LINUX_VERSION_CODE; 1333 return 0; 1334 } 1335 1336 static char *check_image_kernel(struct swsusp_info *info) 1337 { 1338 if (info->version_code != LINUX_VERSION_CODE) 1339 return "kernel version"; 1340 if (strcmp(info->uts.sysname,init_utsname()->sysname)) 1341 return "system type"; 1342 if (strcmp(info->uts.release,init_utsname()->release)) 1343 return "kernel release"; 1344 if (strcmp(info->uts.version,init_utsname()->version)) 1345 return "version"; 1346 if (strcmp(info->uts.machine,init_utsname()->machine)) 1347 return "machine"; 1348 return NULL; 1349 } 1350 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ 1351 1352 unsigned long snapshot_get_image_size(void) 1353 { 1354 return nr_copy_pages + nr_meta_pages + 1; 1355 } 1356 1357 static int init_header(struct swsusp_info *info) 1358 { 1359 memset(info, 0, sizeof(struct swsusp_info)); 1360 info->num_physpages = num_physpages; 1361 info->image_pages = nr_copy_pages; 1362 info->pages = snapshot_get_image_size(); 1363 info->size = info->pages; 1364 info->size <<= PAGE_SHIFT; 1365 return init_header_complete(info); 1366 } 1367 1368 /** 1369 * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm 1370 * are stored in the array @buf[] (1 page at a time) 1371 */ 1372 1373 static inline void 1374 pack_pfns(unsigned long *buf, struct memory_bitmap *bm) 1375 { 1376 int j; 1377 1378 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 1379 buf[j] = memory_bm_next_pfn(bm); 1380 if (unlikely(buf[j] == BM_END_OF_MAP)) 1381 break; 1382 } 1383 } 1384 1385 /** 1386 * snapshot_read_next - used for reading the system memory snapshot. 1387 * 1388 * On the first call to it @handle should point to a zeroed 1389 * snapshot_handle structure. The structure gets updated and a pointer 1390 * to it should be passed to this function every next time. 1391 * 1392 * The @count parameter should contain the number of bytes the caller 1393 * wants to read from the snapshot. It must not be zero. 1394 * 1395 * On success the function returns a positive number. Then, the caller 1396 * is allowed to read up to the returned number of bytes from the memory 1397 * location computed by the data_of() macro. The number returned 1398 * may be smaller than @count, but this only happens if the read would 1399 * cross a page boundary otherwise. 1400 * 1401 * The function returns 0 to indicate the end of data stream condition, 1402 * and a negative number is returned on error. In such cases the 1403 * structure pointed to by @handle is not updated and should not be used 1404 * any more. 1405 */ 1406 1407 int snapshot_read_next(struct snapshot_handle *handle, size_t count) 1408 { 1409 if (handle->cur > nr_meta_pages + nr_copy_pages) 1410 return 0; 1411 1412 if (!buffer) { 1413 /* This makes the buffer be freed by swsusp_free() */ 1414 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 1415 if (!buffer) 1416 return -ENOMEM; 1417 } 1418 if (!handle->offset) { 1419 int error; 1420 1421 error = init_header((struct swsusp_info *)buffer); 1422 if (error) 1423 return error; 1424 handle->buffer = buffer; 1425 memory_bm_position_reset(&orig_bm); 1426 memory_bm_position_reset(©_bm); 1427 } 1428 if (handle->prev < handle->cur) { 1429 if (handle->cur <= nr_meta_pages) { 1430 memset(buffer, 0, PAGE_SIZE); 1431 pack_pfns(buffer, &orig_bm); 1432 } else { 1433 struct page *page; 1434 1435 page = pfn_to_page(memory_bm_next_pfn(©_bm)); 1436 if (PageHighMem(page)) { 1437 /* Highmem pages are copied to the buffer, 1438 * because we can't return with a kmapped 1439 * highmem page (we may not be called again). 1440 */ 1441 void *kaddr; 1442 1443 kaddr = kmap_atomic(page, KM_USER0); 1444 memcpy(buffer, kaddr, PAGE_SIZE); 1445 kunmap_atomic(kaddr, KM_USER0); 1446 handle->buffer = buffer; 1447 } else { 1448 handle->buffer = page_address(page); 1449 } 1450 } 1451 handle->prev = handle->cur; 1452 } 1453 handle->buf_offset = handle->cur_offset; 1454 if (handle->cur_offset + count >= PAGE_SIZE) { 1455 count = PAGE_SIZE - handle->cur_offset; 1456 handle->cur_offset = 0; 1457 handle->cur++; 1458 } else { 1459 handle->cur_offset += count; 1460 } 1461 handle->offset += count; 1462 return count; 1463 } 1464 1465 /** 1466 * mark_unsafe_pages - mark the pages that cannot be used for storing 1467 * the image during resume, because they conflict with the pages that 1468 * had been used before suspend 1469 */ 1470 1471 static int mark_unsafe_pages(struct memory_bitmap *bm) 1472 { 1473 struct zone *zone; 1474 unsigned long pfn, max_zone_pfn; 1475 1476 /* Clear page flags */ 1477 for_each_zone(zone) { 1478 max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages; 1479 for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) 1480 if (pfn_valid(pfn)) 1481 swsusp_unset_page_free(pfn_to_page(pfn)); 1482 } 1483 1484 /* Mark pages that correspond to the "original" pfns as "unsafe" */ 1485 memory_bm_position_reset(bm); 1486 do { 1487 pfn = memory_bm_next_pfn(bm); 1488 if (likely(pfn != BM_END_OF_MAP)) { 1489 if (likely(pfn_valid(pfn))) 1490 swsusp_set_page_free(pfn_to_page(pfn)); 1491 else 1492 return -EFAULT; 1493 } 1494 } while (pfn != BM_END_OF_MAP); 1495 1496 allocated_unsafe_pages = 0; 1497 1498 return 0; 1499 } 1500 1501 static void 1502 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) 1503 { 1504 unsigned long pfn; 1505 1506 memory_bm_position_reset(src); 1507 pfn = memory_bm_next_pfn(src); 1508 while (pfn != BM_END_OF_MAP) { 1509 memory_bm_set_bit(dst, pfn); 1510 pfn = memory_bm_next_pfn(src); 1511 } 1512 } 1513 1514 static int check_header(struct swsusp_info *info) 1515 { 1516 char *reason; 1517 1518 reason = check_image_kernel(info); 1519 if (!reason && info->num_physpages != num_physpages) 1520 reason = "memory size"; 1521 if (reason) { 1522 printk(KERN_ERR "PM: Image mismatch: %s\n", reason); 1523 return -EPERM; 1524 } 1525 return 0; 1526 } 1527 1528 /** 1529 * load header - check the image header and copy data from it 1530 */ 1531 1532 static int 1533 load_header(struct swsusp_info *info) 1534 { 1535 int error; 1536 1537 restore_pblist = NULL; 1538 error = check_header(info); 1539 if (!error) { 1540 nr_copy_pages = info->image_pages; 1541 nr_meta_pages = info->pages - info->image_pages - 1; 1542 } 1543 return error; 1544 } 1545 1546 /** 1547 * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set 1548 * the corresponding bit in the memory bitmap @bm 1549 */ 1550 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) 1551 { 1552 int j; 1553 1554 for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { 1555 if (unlikely(buf[j] == BM_END_OF_MAP)) 1556 break; 1557 1558 if (memory_bm_pfn_present(bm, buf[j])) 1559 memory_bm_set_bit(bm, buf[j]); 1560 else 1561 return -EFAULT; 1562 } 1563 1564 return 0; 1565 } 1566 1567 /* List of "safe" pages that may be used to store data loaded from the suspend 1568 * image 1569 */ 1570 static struct linked_page *safe_pages_list; 1571 1572 #ifdef CONFIG_HIGHMEM 1573 /* struct highmem_pbe is used for creating the list of highmem pages that 1574 * should be restored atomically during the resume from disk, because the page 1575 * frames they have occupied before the suspend are in use. 1576 */ 1577 struct highmem_pbe { 1578 struct page *copy_page; /* data is here now */ 1579 struct page *orig_page; /* data was here before the suspend */ 1580 struct highmem_pbe *next; 1581 }; 1582 1583 /* List of highmem PBEs needed for restoring the highmem pages that were 1584 * allocated before the suspend and included in the suspend image, but have 1585 * also been allocated by the "resume" kernel, so their contents cannot be 1586 * written directly to their "original" page frames. 1587 */ 1588 static struct highmem_pbe *highmem_pblist; 1589 1590 /** 1591 * count_highmem_image_pages - compute the number of highmem pages in the 1592 * suspend image. The bits in the memory bitmap @bm that correspond to the 1593 * image pages are assumed to be set. 1594 */ 1595 1596 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) 1597 { 1598 unsigned long pfn; 1599 unsigned int cnt = 0; 1600 1601 memory_bm_position_reset(bm); 1602 pfn = memory_bm_next_pfn(bm); 1603 while (pfn != BM_END_OF_MAP) { 1604 if (PageHighMem(pfn_to_page(pfn))) 1605 cnt++; 1606 1607 pfn = memory_bm_next_pfn(bm); 1608 } 1609 return cnt; 1610 } 1611 1612 /** 1613 * prepare_highmem_image - try to allocate as many highmem pages as 1614 * there are highmem image pages (@nr_highmem_p points to the variable 1615 * containing the number of highmem image pages). The pages that are 1616 * "safe" (ie. will not be overwritten when the suspend image is 1617 * restored) have the corresponding bits set in @bm (it must be 1618 * unitialized). 1619 * 1620 * NOTE: This function should not be called if there are no highmem 1621 * image pages. 1622 */ 1623 1624 static unsigned int safe_highmem_pages; 1625 1626 static struct memory_bitmap *safe_highmem_bm; 1627 1628 static int 1629 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 1630 { 1631 unsigned int to_alloc; 1632 1633 if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE)) 1634 return -ENOMEM; 1635 1636 if (get_highmem_buffer(PG_SAFE)) 1637 return -ENOMEM; 1638 1639 to_alloc = count_free_highmem_pages(); 1640 if (to_alloc > *nr_highmem_p) 1641 to_alloc = *nr_highmem_p; 1642 else 1643 *nr_highmem_p = to_alloc; 1644 1645 safe_highmem_pages = 0; 1646 while (to_alloc-- > 0) { 1647 struct page *page; 1648 1649 page = alloc_page(__GFP_HIGHMEM); 1650 if (!swsusp_page_is_free(page)) { 1651 /* The page is "safe", set its bit the bitmap */ 1652 memory_bm_set_bit(bm, page_to_pfn(page)); 1653 safe_highmem_pages++; 1654 } 1655 /* Mark the page as allocated */ 1656 swsusp_set_page_forbidden(page); 1657 swsusp_set_page_free(page); 1658 } 1659 memory_bm_position_reset(bm); 1660 safe_highmem_bm = bm; 1661 return 0; 1662 } 1663 1664 /** 1665 * get_highmem_page_buffer - for given highmem image page find the buffer 1666 * that suspend_write_next() should set for its caller to write to. 1667 * 1668 * If the page is to be saved to its "original" page frame or a copy of 1669 * the page is to be made in the highmem, @buffer is returned. Otherwise, 1670 * the copy of the page is to be made in normal memory, so the address of 1671 * the copy is returned. 1672 * 1673 * If @buffer is returned, the caller of suspend_write_next() will write 1674 * the page's contents to @buffer, so they will have to be copied to the 1675 * right location on the next call to suspend_write_next() and it is done 1676 * with the help of copy_last_highmem_page(). For this purpose, if 1677 * @buffer is returned, @last_highmem page is set to the page to which 1678 * the data will have to be copied from @buffer. 1679 */ 1680 1681 static struct page *last_highmem_page; 1682 1683 static void * 1684 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 1685 { 1686 struct highmem_pbe *pbe; 1687 void *kaddr; 1688 1689 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { 1690 /* We have allocated the "original" page frame and we can 1691 * use it directly to store the loaded page. 1692 */ 1693 last_highmem_page = page; 1694 return buffer; 1695 } 1696 /* The "original" page frame has not been allocated and we have to 1697 * use a "safe" page frame to store the loaded page. 1698 */ 1699 pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); 1700 if (!pbe) { 1701 swsusp_free(); 1702 return ERR_PTR(-ENOMEM); 1703 } 1704 pbe->orig_page = page; 1705 if (safe_highmem_pages > 0) { 1706 struct page *tmp; 1707 1708 /* Copy of the page will be stored in high memory */ 1709 kaddr = buffer; 1710 tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm)); 1711 safe_highmem_pages--; 1712 last_highmem_page = tmp; 1713 pbe->copy_page = tmp; 1714 } else { 1715 /* Copy of the page will be stored in normal memory */ 1716 kaddr = safe_pages_list; 1717 safe_pages_list = safe_pages_list->next; 1718 pbe->copy_page = virt_to_page(kaddr); 1719 } 1720 pbe->next = highmem_pblist; 1721 highmem_pblist = pbe; 1722 return kaddr; 1723 } 1724 1725 /** 1726 * copy_last_highmem_page - copy the contents of a highmem image from 1727 * @buffer, where the caller of snapshot_write_next() has place them, 1728 * to the right location represented by @last_highmem_page . 1729 */ 1730 1731 static void copy_last_highmem_page(void) 1732 { 1733 if (last_highmem_page) { 1734 void *dst; 1735 1736 dst = kmap_atomic(last_highmem_page, KM_USER0); 1737 memcpy(dst, buffer, PAGE_SIZE); 1738 kunmap_atomic(dst, KM_USER0); 1739 last_highmem_page = NULL; 1740 } 1741 } 1742 1743 static inline int last_highmem_page_copied(void) 1744 { 1745 return !last_highmem_page; 1746 } 1747 1748 static inline void free_highmem_data(void) 1749 { 1750 if (safe_highmem_bm) 1751 memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR); 1752 1753 if (buffer) 1754 free_image_page(buffer, PG_UNSAFE_CLEAR); 1755 } 1756 #else 1757 static inline int get_safe_write_buffer(void) { return 0; } 1758 1759 static unsigned int 1760 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } 1761 1762 static inline int 1763 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) 1764 { 1765 return 0; 1766 } 1767 1768 static inline void * 1769 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) 1770 { 1771 return ERR_PTR(-EINVAL); 1772 } 1773 1774 static inline void copy_last_highmem_page(void) {} 1775 static inline int last_highmem_page_copied(void) { return 1; } 1776 static inline void free_highmem_data(void) {} 1777 #endif /* CONFIG_HIGHMEM */ 1778 1779 /** 1780 * prepare_image - use the memory bitmap @bm to mark the pages that will 1781 * be overwritten in the process of restoring the system memory state 1782 * from the suspend image ("unsafe" pages) and allocate memory for the 1783 * image. 1784 * 1785 * The idea is to allocate a new memory bitmap first and then allocate 1786 * as many pages as needed for the image data, but not to assign these 1787 * pages to specific tasks initially. Instead, we just mark them as 1788 * allocated and create a lists of "safe" pages that will be used 1789 * later. On systems with high memory a list of "safe" highmem pages is 1790 * also created. 1791 */ 1792 1793 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) 1794 1795 static int 1796 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) 1797 { 1798 unsigned int nr_pages, nr_highmem; 1799 struct linked_page *sp_list, *lp; 1800 int error; 1801 1802 /* If there is no highmem, the buffer will not be necessary */ 1803 free_image_page(buffer, PG_UNSAFE_CLEAR); 1804 buffer = NULL; 1805 1806 nr_highmem = count_highmem_image_pages(bm); 1807 error = mark_unsafe_pages(bm); 1808 if (error) 1809 goto Free; 1810 1811 error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); 1812 if (error) 1813 goto Free; 1814 1815 duplicate_memory_bitmap(new_bm, bm); 1816 memory_bm_free(bm, PG_UNSAFE_KEEP); 1817 if (nr_highmem > 0) { 1818 error = prepare_highmem_image(bm, &nr_highmem); 1819 if (error) 1820 goto Free; 1821 } 1822 /* Reserve some safe pages for potential later use. 1823 * 1824 * NOTE: This way we make sure there will be enough safe pages for the 1825 * chain_alloc() in get_buffer(). It is a bit wasteful, but 1826 * nr_copy_pages cannot be greater than 50% of the memory anyway. 1827 */ 1828 sp_list = NULL; 1829 /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ 1830 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 1831 nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); 1832 while (nr_pages > 0) { 1833 lp = get_image_page(GFP_ATOMIC, PG_SAFE); 1834 if (!lp) { 1835 error = -ENOMEM; 1836 goto Free; 1837 } 1838 lp->next = sp_list; 1839 sp_list = lp; 1840 nr_pages--; 1841 } 1842 /* Preallocate memory for the image */ 1843 safe_pages_list = NULL; 1844 nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; 1845 while (nr_pages > 0) { 1846 lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); 1847 if (!lp) { 1848 error = -ENOMEM; 1849 goto Free; 1850 } 1851 if (!swsusp_page_is_free(virt_to_page(lp))) { 1852 /* The page is "safe", add it to the list */ 1853 lp->next = safe_pages_list; 1854 safe_pages_list = lp; 1855 } 1856 /* Mark the page as allocated */ 1857 swsusp_set_page_forbidden(virt_to_page(lp)); 1858 swsusp_set_page_free(virt_to_page(lp)); 1859 nr_pages--; 1860 } 1861 /* Free the reserved safe pages so that chain_alloc() can use them */ 1862 while (sp_list) { 1863 lp = sp_list->next; 1864 free_image_page(sp_list, PG_UNSAFE_CLEAR); 1865 sp_list = lp; 1866 } 1867 return 0; 1868 1869 Free: 1870 swsusp_free(); 1871 return error; 1872 } 1873 1874 /** 1875 * get_buffer - compute the address that snapshot_write_next() should 1876 * set for its caller to write to. 1877 */ 1878 1879 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) 1880 { 1881 struct pbe *pbe; 1882 struct page *page; 1883 unsigned long pfn = memory_bm_next_pfn(bm); 1884 1885 if (pfn == BM_END_OF_MAP) 1886 return ERR_PTR(-EFAULT); 1887 1888 page = pfn_to_page(pfn); 1889 if (PageHighMem(page)) 1890 return get_highmem_page_buffer(page, ca); 1891 1892 if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) 1893 /* We have allocated the "original" page frame and we can 1894 * use it directly to store the loaded page. 1895 */ 1896 return page_address(page); 1897 1898 /* The "original" page frame has not been allocated and we have to 1899 * use a "safe" page frame to store the loaded page. 1900 */ 1901 pbe = chain_alloc(ca, sizeof(struct pbe)); 1902 if (!pbe) { 1903 swsusp_free(); 1904 return ERR_PTR(-ENOMEM); 1905 } 1906 pbe->orig_address = page_address(page); 1907 pbe->address = safe_pages_list; 1908 safe_pages_list = safe_pages_list->next; 1909 pbe->next = restore_pblist; 1910 restore_pblist = pbe; 1911 return pbe->address; 1912 } 1913 1914 /** 1915 * snapshot_write_next - used for writing the system memory snapshot. 1916 * 1917 * On the first call to it @handle should point to a zeroed 1918 * snapshot_handle structure. The structure gets updated and a pointer 1919 * to it should be passed to this function every next time. 1920 * 1921 * The @count parameter should contain the number of bytes the caller 1922 * wants to write to the image. It must not be zero. 1923 * 1924 * On success the function returns a positive number. Then, the caller 1925 * is allowed to write up to the returned number of bytes to the memory 1926 * location computed by the data_of() macro. The number returned 1927 * may be smaller than @count, but this only happens if the write would 1928 * cross a page boundary otherwise. 1929 * 1930 * The function returns 0 to indicate the "end of file" condition, 1931 * and a negative number is returned on error. In such cases the 1932 * structure pointed to by @handle is not updated and should not be used 1933 * any more. 1934 */ 1935 1936 int snapshot_write_next(struct snapshot_handle *handle, size_t count) 1937 { 1938 static struct chain_allocator ca; 1939 int error = 0; 1940 1941 /* Check if we have already loaded the entire image */ 1942 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) 1943 return 0; 1944 1945 if (handle->offset == 0) { 1946 if (!buffer) 1947 /* This makes the buffer be freed by swsusp_free() */ 1948 buffer = get_image_page(GFP_ATOMIC, PG_ANY); 1949 1950 if (!buffer) 1951 return -ENOMEM; 1952 1953 handle->buffer = buffer; 1954 } 1955 handle->sync_read = 1; 1956 if (handle->prev < handle->cur) { 1957 if (handle->prev == 0) { 1958 error = load_header(buffer); 1959 if (error) 1960 return error; 1961 1962 error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); 1963 if (error) 1964 return error; 1965 1966 } else if (handle->prev <= nr_meta_pages) { 1967 error = unpack_orig_pfns(buffer, ©_bm); 1968 if (error) 1969 return error; 1970 1971 if (handle->prev == nr_meta_pages) { 1972 error = prepare_image(&orig_bm, ©_bm); 1973 if (error) 1974 return error; 1975 1976 chain_init(&ca, GFP_ATOMIC, PG_SAFE); 1977 memory_bm_position_reset(&orig_bm); 1978 restore_pblist = NULL; 1979 handle->buffer = get_buffer(&orig_bm, &ca); 1980 handle->sync_read = 0; 1981 if (IS_ERR(handle->buffer)) 1982 return PTR_ERR(handle->buffer); 1983 } 1984 } else { 1985 copy_last_highmem_page(); 1986 handle->buffer = get_buffer(&orig_bm, &ca); 1987 if (IS_ERR(handle->buffer)) 1988 return PTR_ERR(handle->buffer); 1989 if (handle->buffer != buffer) 1990 handle->sync_read = 0; 1991 } 1992 handle->prev = handle->cur; 1993 } 1994 handle->buf_offset = handle->cur_offset; 1995 if (handle->cur_offset + count >= PAGE_SIZE) { 1996 count = PAGE_SIZE - handle->cur_offset; 1997 handle->cur_offset = 0; 1998 handle->cur++; 1999 } else { 2000 handle->cur_offset += count; 2001 } 2002 handle->offset += count; 2003 return count; 2004 } 2005 2006 /** 2007 * snapshot_write_finalize - must be called after the last call to 2008 * snapshot_write_next() in case the last page in the image happens 2009 * to be a highmem page and its contents should be stored in the 2010 * highmem. Additionally, it releases the memory that will not be 2011 * used any more. 2012 */ 2013 2014 void snapshot_write_finalize(struct snapshot_handle *handle) 2015 { 2016 copy_last_highmem_page(); 2017 /* Free only if we have loaded the image entirely */ 2018 if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) { 2019 memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); 2020 free_highmem_data(); 2021 } 2022 } 2023 2024 int snapshot_image_loaded(struct snapshot_handle *handle) 2025 { 2026 return !(!nr_copy_pages || !last_highmem_page_copied() || 2027 handle->cur <= nr_meta_pages + nr_copy_pages); 2028 } 2029 2030 #ifdef CONFIG_HIGHMEM 2031 /* Assumes that @buf is ready and points to a "safe" page */ 2032 static inline void 2033 swap_two_pages_data(struct page *p1, struct page *p2, void *buf) 2034 { 2035 void *kaddr1, *kaddr2; 2036 2037 kaddr1 = kmap_atomic(p1, KM_USER0); 2038 kaddr2 = kmap_atomic(p2, KM_USER1); 2039 memcpy(buf, kaddr1, PAGE_SIZE); 2040 memcpy(kaddr1, kaddr2, PAGE_SIZE); 2041 memcpy(kaddr2, buf, PAGE_SIZE); 2042 kunmap_atomic(kaddr1, KM_USER0); 2043 kunmap_atomic(kaddr2, KM_USER1); 2044 } 2045 2046 /** 2047 * restore_highmem - for each highmem page that was allocated before 2048 * the suspend and included in the suspend image, and also has been 2049 * allocated by the "resume" kernel swap its current (ie. "before 2050 * resume") contents with the previous (ie. "before suspend") one. 2051 * 2052 * If the resume eventually fails, we can call this function once 2053 * again and restore the "before resume" highmem state. 2054 */ 2055 2056 int restore_highmem(void) 2057 { 2058 struct highmem_pbe *pbe = highmem_pblist; 2059 void *buf; 2060 2061 if (!pbe) 2062 return 0; 2063 2064 buf = get_image_page(GFP_ATOMIC, PG_SAFE); 2065 if (!buf) 2066 return -ENOMEM; 2067 2068 while (pbe) { 2069 swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf); 2070 pbe = pbe->next; 2071 } 2072 free_image_page(buf, PG_UNSAFE_CLEAR); 2073 return 0; 2074 } 2075 #endif /* CONFIG_HIGHMEM */ 2076