xref: /linux/kernel/power/snapshot.c (revision 757dea93e136b219af09d3cd56a81063fdbdef1a)
1 /*
2  * linux/kernel/power/snapshot.c
3  *
4  * This file provides system snapshot/restore functionality for swsusp.
5  *
6  * Copyright (C) 1998-2005 Pavel Machek <pavel@suse.cz>
7  * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl>
8  *
9  * This file is released under the GPLv2.
10  *
11  */
12 
13 #include <linux/version.h>
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/smp_lock.h>
18 #include <linux/delay.h>
19 #include <linux/bitops.h>
20 #include <linux/spinlock.h>
21 #include <linux/kernel.h>
22 #include <linux/pm.h>
23 #include <linux/device.h>
24 #include <linux/init.h>
25 #include <linux/bootmem.h>
26 #include <linux/syscalls.h>
27 #include <linux/console.h>
28 #include <linux/highmem.h>
29 
30 #include <asm/uaccess.h>
31 #include <asm/mmu_context.h>
32 #include <asm/pgtable.h>
33 #include <asm/tlbflush.h>
34 #include <asm/io.h>
35 
36 #include "power.h"
37 
38 static int swsusp_page_is_free(struct page *);
39 static void swsusp_set_page_forbidden(struct page *);
40 static void swsusp_unset_page_forbidden(struct page *);
41 
42 /* List of PBEs needed for restoring the pages that were allocated before
43  * the suspend and included in the suspend image, but have also been
44  * allocated by the "resume" kernel, so their contents cannot be written
45  * directly to their "original" page frames.
46  */
47 struct pbe *restore_pblist;
48 
49 /* Pointer to an auxiliary buffer (1 page) */
50 static void *buffer;
51 
52 /**
53  *	@safe_needed - on resume, for storing the PBE list and the image,
54  *	we can only use memory pages that do not conflict with the pages
55  *	used before suspend.  The unsafe pages have PageNosaveFree set
56  *	and we count them using unsafe_pages.
57  *
58  *	Each allocated image page is marked as PageNosave and PageNosaveFree
59  *	so that swsusp_free() can release it.
60  */
61 
62 #define PG_ANY		0
63 #define PG_SAFE		1
64 #define PG_UNSAFE_CLEAR	1
65 #define PG_UNSAFE_KEEP	0
66 
67 static unsigned int allocated_unsafe_pages;
68 
69 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
70 {
71 	void *res;
72 
73 	res = (void *)get_zeroed_page(gfp_mask);
74 	if (safe_needed)
75 		while (res && swsusp_page_is_free(virt_to_page(res))) {
76 			/* The page is unsafe, mark it for swsusp_free() */
77 			swsusp_set_page_forbidden(virt_to_page(res));
78 			allocated_unsafe_pages++;
79 			res = (void *)get_zeroed_page(gfp_mask);
80 		}
81 	if (res) {
82 		swsusp_set_page_forbidden(virt_to_page(res));
83 		swsusp_set_page_free(virt_to_page(res));
84 	}
85 	return res;
86 }
87 
88 unsigned long get_safe_page(gfp_t gfp_mask)
89 {
90 	return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
91 }
92 
93 static struct page *alloc_image_page(gfp_t gfp_mask)
94 {
95 	struct page *page;
96 
97 	page = alloc_page(gfp_mask);
98 	if (page) {
99 		swsusp_set_page_forbidden(page);
100 		swsusp_set_page_free(page);
101 	}
102 	return page;
103 }
104 
105 /**
106  *	free_image_page - free page represented by @addr, allocated with
107  *	get_image_page (page flags set by it must be cleared)
108  */
109 
110 static inline void free_image_page(void *addr, int clear_nosave_free)
111 {
112 	struct page *page;
113 
114 	BUG_ON(!virt_addr_valid(addr));
115 
116 	page = virt_to_page(addr);
117 
118 	swsusp_unset_page_forbidden(page);
119 	if (clear_nosave_free)
120 		swsusp_unset_page_free(page);
121 
122 	__free_page(page);
123 }
124 
125 /* struct linked_page is used to build chains of pages */
126 
127 #define LINKED_PAGE_DATA_SIZE	(PAGE_SIZE - sizeof(void *))
128 
129 struct linked_page {
130 	struct linked_page *next;
131 	char data[LINKED_PAGE_DATA_SIZE];
132 } __attribute__((packed));
133 
134 static inline void
135 free_list_of_pages(struct linked_page *list, int clear_page_nosave)
136 {
137 	while (list) {
138 		struct linked_page *lp = list->next;
139 
140 		free_image_page(list, clear_page_nosave);
141 		list = lp;
142 	}
143 }
144 
145 /**
146   *	struct chain_allocator is used for allocating small objects out of
147   *	a linked list of pages called 'the chain'.
148   *
149   *	The chain grows each time when there is no room for a new object in
150   *	the current page.  The allocated objects cannot be freed individually.
151   *	It is only possible to free them all at once, by freeing the entire
152   *	chain.
153   *
154   *	NOTE: The chain allocator may be inefficient if the allocated objects
155   *	are not much smaller than PAGE_SIZE.
156   */
157 
158 struct chain_allocator {
159 	struct linked_page *chain;	/* the chain */
160 	unsigned int used_space;	/* total size of objects allocated out
161 					 * of the current page
162 					 */
163 	gfp_t gfp_mask;		/* mask for allocating pages */
164 	int safe_needed;	/* if set, only "safe" pages are allocated */
165 };
166 
167 static void
168 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
169 {
170 	ca->chain = NULL;
171 	ca->used_space = LINKED_PAGE_DATA_SIZE;
172 	ca->gfp_mask = gfp_mask;
173 	ca->safe_needed = safe_needed;
174 }
175 
176 static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
177 {
178 	void *ret;
179 
180 	if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
181 		struct linked_page *lp;
182 
183 		lp = get_image_page(ca->gfp_mask, ca->safe_needed);
184 		if (!lp)
185 			return NULL;
186 
187 		lp->next = ca->chain;
188 		ca->chain = lp;
189 		ca->used_space = 0;
190 	}
191 	ret = ca->chain->data + ca->used_space;
192 	ca->used_space += size;
193 	return ret;
194 }
195 
196 static void chain_free(struct chain_allocator *ca, int clear_page_nosave)
197 {
198 	free_list_of_pages(ca->chain, clear_page_nosave);
199 	memset(ca, 0, sizeof(struct chain_allocator));
200 }
201 
202 /**
203  *	Data types related to memory bitmaps.
204  *
205  *	Memory bitmap is a structure consiting of many linked lists of
206  *	objects.  The main list's elements are of type struct zone_bitmap
207  *	and each of them corresonds to one zone.  For each zone bitmap
208  *	object there is a list of objects of type struct bm_block that
209  *	represent each blocks of bit chunks in which information is
210  *	stored.
211  *
212  *	struct memory_bitmap contains a pointer to the main list of zone
213  *	bitmap objects, a struct bm_position used for browsing the bitmap,
214  *	and a pointer to the list of pages used for allocating all of the
215  *	zone bitmap objects and bitmap block objects.
216  *
217  *	NOTE: It has to be possible to lay out the bitmap in memory
218  *	using only allocations of order 0.  Additionally, the bitmap is
219  *	designed to work with arbitrary number of zones (this is over the
220  *	top for now, but let's avoid making unnecessary assumptions ;-).
221  *
222  *	struct zone_bitmap contains a pointer to a list of bitmap block
223  *	objects and a pointer to the bitmap block object that has been
224  *	most recently used for setting bits.  Additionally, it contains the
225  *	pfns that correspond to the start and end of the represented zone.
226  *
227  *	struct bm_block contains a pointer to the memory page in which
228  *	information is stored (in the form of a block of bit chunks
229  *	of type unsigned long each).  It also contains the pfns that
230  *	correspond to the start and end of the represented memory area and
231  *	the number of bit chunks in the block.
232  */
233 
234 #define BM_END_OF_MAP	(~0UL)
235 
236 #define BM_CHUNKS_PER_BLOCK	(PAGE_SIZE / sizeof(long))
237 #define BM_BITS_PER_CHUNK	(sizeof(long) << 3)
238 #define BM_BITS_PER_BLOCK	(PAGE_SIZE << 3)
239 
240 struct bm_block {
241 	struct bm_block *next;		/* next element of the list */
242 	unsigned long start_pfn;	/* pfn represented by the first bit */
243 	unsigned long end_pfn;	/* pfn represented by the last bit plus 1 */
244 	unsigned int size;	/* number of bit chunks */
245 	unsigned long *data;	/* chunks of bits representing pages */
246 };
247 
248 struct zone_bitmap {
249 	struct zone_bitmap *next;	/* next element of the list */
250 	unsigned long start_pfn;	/* minimal pfn in this zone */
251 	unsigned long end_pfn;		/* maximal pfn in this zone plus 1 */
252 	struct bm_block *bm_blocks;	/* list of bitmap blocks */
253 	struct bm_block *cur_block;	/* recently used bitmap block */
254 };
255 
256 /* strcut bm_position is used for browsing memory bitmaps */
257 
258 struct bm_position {
259 	struct zone_bitmap *zone_bm;
260 	struct bm_block *block;
261 	int chunk;
262 	int bit;
263 };
264 
265 struct memory_bitmap {
266 	struct zone_bitmap *zone_bm_list;	/* list of zone bitmaps */
267 	struct linked_page *p_list;	/* list of pages used to store zone
268 					 * bitmap objects and bitmap block
269 					 * objects
270 					 */
271 	struct bm_position cur;	/* most recently used bit position */
272 };
273 
274 /* Functions that operate on memory bitmaps */
275 
276 static inline void memory_bm_reset_chunk(struct memory_bitmap *bm)
277 {
278 	bm->cur.chunk = 0;
279 	bm->cur.bit = -1;
280 }
281 
282 static void memory_bm_position_reset(struct memory_bitmap *bm)
283 {
284 	struct zone_bitmap *zone_bm;
285 
286 	zone_bm = bm->zone_bm_list;
287 	bm->cur.zone_bm = zone_bm;
288 	bm->cur.block = zone_bm->bm_blocks;
289 	memory_bm_reset_chunk(bm);
290 }
291 
292 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
293 
294 /**
295  *	create_bm_block_list - create a list of block bitmap objects
296  */
297 
298 static inline struct bm_block *
299 create_bm_block_list(unsigned int nr_blocks, struct chain_allocator *ca)
300 {
301 	struct bm_block *bblist = NULL;
302 
303 	while (nr_blocks-- > 0) {
304 		struct bm_block *bb;
305 
306 		bb = chain_alloc(ca, sizeof(struct bm_block));
307 		if (!bb)
308 			return NULL;
309 
310 		bb->next = bblist;
311 		bblist = bb;
312 	}
313 	return bblist;
314 }
315 
316 /**
317  *	create_zone_bm_list - create a list of zone bitmap objects
318  */
319 
320 static inline struct zone_bitmap *
321 create_zone_bm_list(unsigned int nr_zones, struct chain_allocator *ca)
322 {
323 	struct zone_bitmap *zbmlist = NULL;
324 
325 	while (nr_zones-- > 0) {
326 		struct zone_bitmap *zbm;
327 
328 		zbm = chain_alloc(ca, sizeof(struct zone_bitmap));
329 		if (!zbm)
330 			return NULL;
331 
332 		zbm->next = zbmlist;
333 		zbmlist = zbm;
334 	}
335 	return zbmlist;
336 }
337 
338 /**
339   *	memory_bm_create - allocate memory for a memory bitmap
340   */
341 
342 static int
343 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
344 {
345 	struct chain_allocator ca;
346 	struct zone *zone;
347 	struct zone_bitmap *zone_bm;
348 	struct bm_block *bb;
349 	unsigned int nr;
350 
351 	chain_init(&ca, gfp_mask, safe_needed);
352 
353 	/* Compute the number of zones */
354 	nr = 0;
355 	for_each_zone(zone)
356 		if (populated_zone(zone))
357 			nr++;
358 
359 	/* Allocate the list of zones bitmap objects */
360 	zone_bm = create_zone_bm_list(nr, &ca);
361 	bm->zone_bm_list = zone_bm;
362 	if (!zone_bm) {
363 		chain_free(&ca, PG_UNSAFE_CLEAR);
364 		return -ENOMEM;
365 	}
366 
367 	/* Initialize the zone bitmap objects */
368 	for_each_zone(zone) {
369 		unsigned long pfn;
370 
371 		if (!populated_zone(zone))
372 			continue;
373 
374 		zone_bm->start_pfn = zone->zone_start_pfn;
375 		zone_bm->end_pfn = zone->zone_start_pfn + zone->spanned_pages;
376 		/* Allocate the list of bitmap block objects */
377 		nr = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
378 		bb = create_bm_block_list(nr, &ca);
379 		zone_bm->bm_blocks = bb;
380 		zone_bm->cur_block = bb;
381 		if (!bb)
382 			goto Free;
383 
384 		nr = zone->spanned_pages;
385 		pfn = zone->zone_start_pfn;
386 		/* Initialize the bitmap block objects */
387 		while (bb) {
388 			unsigned long *ptr;
389 
390 			ptr = get_image_page(gfp_mask, safe_needed);
391 			bb->data = ptr;
392 			if (!ptr)
393 				goto Free;
394 
395 			bb->start_pfn = pfn;
396 			if (nr >= BM_BITS_PER_BLOCK) {
397 				pfn += BM_BITS_PER_BLOCK;
398 				bb->size = BM_CHUNKS_PER_BLOCK;
399 				nr -= BM_BITS_PER_BLOCK;
400 			} else {
401 				/* This is executed only once in the loop */
402 				pfn += nr;
403 				bb->size = DIV_ROUND_UP(nr, BM_BITS_PER_CHUNK);
404 			}
405 			bb->end_pfn = pfn;
406 			bb = bb->next;
407 		}
408 		zone_bm = zone_bm->next;
409 	}
410 	bm->p_list = ca.chain;
411 	memory_bm_position_reset(bm);
412 	return 0;
413 
414  Free:
415 	bm->p_list = ca.chain;
416 	memory_bm_free(bm, PG_UNSAFE_CLEAR);
417 	return -ENOMEM;
418 }
419 
420 /**
421   *	memory_bm_free - free memory occupied by the memory bitmap @bm
422   */
423 
424 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
425 {
426 	struct zone_bitmap *zone_bm;
427 
428 	/* Free the list of bit blocks for each zone_bitmap object */
429 	zone_bm = bm->zone_bm_list;
430 	while (zone_bm) {
431 		struct bm_block *bb;
432 
433 		bb = zone_bm->bm_blocks;
434 		while (bb) {
435 			if (bb->data)
436 				free_image_page(bb->data, clear_nosave_free);
437 			bb = bb->next;
438 		}
439 		zone_bm = zone_bm->next;
440 	}
441 	free_list_of_pages(bm->p_list, clear_nosave_free);
442 	bm->zone_bm_list = NULL;
443 }
444 
445 /**
446  *	memory_bm_find_bit - find the bit in the bitmap @bm that corresponds
447  *	to given pfn.  The cur_zone_bm member of @bm and the cur_block member
448  *	of @bm->cur_zone_bm are updated.
449  */
450 
451 static void memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
452 				void **addr, unsigned int *bit_nr)
453 {
454 	struct zone_bitmap *zone_bm;
455 	struct bm_block *bb;
456 
457 	/* Check if the pfn is from the current zone */
458 	zone_bm = bm->cur.zone_bm;
459 	if (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
460 		zone_bm = bm->zone_bm_list;
461 		/* We don't assume that the zones are sorted by pfns */
462 		while (pfn < zone_bm->start_pfn || pfn >= zone_bm->end_pfn) {
463 			zone_bm = zone_bm->next;
464 
465 			BUG_ON(!zone_bm);
466 		}
467 		bm->cur.zone_bm = zone_bm;
468 	}
469 	/* Check if the pfn corresponds to the current bitmap block */
470 	bb = zone_bm->cur_block;
471 	if (pfn < bb->start_pfn)
472 		bb = zone_bm->bm_blocks;
473 
474 	while (pfn >= bb->end_pfn) {
475 		bb = bb->next;
476 
477 		BUG_ON(!bb);
478 	}
479 	zone_bm->cur_block = bb;
480 	pfn -= bb->start_pfn;
481 	*bit_nr = pfn % BM_BITS_PER_CHUNK;
482 	*addr = bb->data + pfn / BM_BITS_PER_CHUNK;
483 }
484 
485 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
486 {
487 	void *addr;
488 	unsigned int bit;
489 
490 	memory_bm_find_bit(bm, pfn, &addr, &bit);
491 	set_bit(bit, addr);
492 }
493 
494 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
495 {
496 	void *addr;
497 	unsigned int bit;
498 
499 	memory_bm_find_bit(bm, pfn, &addr, &bit);
500 	clear_bit(bit, addr);
501 }
502 
503 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
504 {
505 	void *addr;
506 	unsigned int bit;
507 
508 	memory_bm_find_bit(bm, pfn, &addr, &bit);
509 	return test_bit(bit, addr);
510 }
511 
512 /* Two auxiliary functions for memory_bm_next_pfn */
513 
514 /* Find the first set bit in the given chunk, if there is one */
515 
516 static inline int next_bit_in_chunk(int bit, unsigned long *chunk_p)
517 {
518 	bit++;
519 	while (bit < BM_BITS_PER_CHUNK) {
520 		if (test_bit(bit, chunk_p))
521 			return bit;
522 
523 		bit++;
524 	}
525 	return -1;
526 }
527 
528 /* Find a chunk containing some bits set in given block of bits */
529 
530 static inline int next_chunk_in_block(int n, struct bm_block *bb)
531 {
532 	n++;
533 	while (n < bb->size) {
534 		if (bb->data[n])
535 			return n;
536 
537 		n++;
538 	}
539 	return -1;
540 }
541 
542 /**
543  *	memory_bm_next_pfn - find the pfn that corresponds to the next set bit
544  *	in the bitmap @bm.  If the pfn cannot be found, BM_END_OF_MAP is
545  *	returned.
546  *
547  *	It is required to run memory_bm_position_reset() before the first call to
548  *	this function.
549  */
550 
551 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
552 {
553 	struct zone_bitmap *zone_bm;
554 	struct bm_block *bb;
555 	int chunk;
556 	int bit;
557 
558 	do {
559 		bb = bm->cur.block;
560 		do {
561 			chunk = bm->cur.chunk;
562 			bit = bm->cur.bit;
563 			do {
564 				bit = next_bit_in_chunk(bit, bb->data + chunk);
565 				if (bit >= 0)
566 					goto Return_pfn;
567 
568 				chunk = next_chunk_in_block(chunk, bb);
569 				bit = -1;
570 			} while (chunk >= 0);
571 			bb = bb->next;
572 			bm->cur.block = bb;
573 			memory_bm_reset_chunk(bm);
574 		} while (bb);
575 		zone_bm = bm->cur.zone_bm->next;
576 		if (zone_bm) {
577 			bm->cur.zone_bm = zone_bm;
578 			bm->cur.block = zone_bm->bm_blocks;
579 			memory_bm_reset_chunk(bm);
580 		}
581 	} while (zone_bm);
582 	memory_bm_position_reset(bm);
583 	return BM_END_OF_MAP;
584 
585  Return_pfn:
586 	bm->cur.chunk = chunk;
587 	bm->cur.bit = bit;
588 	return bb->start_pfn + chunk * BM_BITS_PER_CHUNK + bit;
589 }
590 
591 /**
592  *	This structure represents a range of page frames the contents of which
593  *	should not be saved during the suspend.
594  */
595 
596 struct nosave_region {
597 	struct list_head list;
598 	unsigned long start_pfn;
599 	unsigned long end_pfn;
600 };
601 
602 static LIST_HEAD(nosave_regions);
603 
604 /**
605  *	register_nosave_region - register a range of page frames the contents
606  *	of which should not be saved during the suspend (to be used in the early
607  *	initialization code)
608  */
609 
610 void __init
611 register_nosave_region(unsigned long start_pfn, unsigned long end_pfn)
612 {
613 	struct nosave_region *region;
614 
615 	if (start_pfn >= end_pfn)
616 		return;
617 
618 	if (!list_empty(&nosave_regions)) {
619 		/* Try to extend the previous region (they should be sorted) */
620 		region = list_entry(nosave_regions.prev,
621 					struct nosave_region, list);
622 		if (region->end_pfn == start_pfn) {
623 			region->end_pfn = end_pfn;
624 			goto Report;
625 		}
626 	}
627 	/* This allocation cannot fail */
628 	region = alloc_bootmem_low(sizeof(struct nosave_region));
629 	region->start_pfn = start_pfn;
630 	region->end_pfn = end_pfn;
631 	list_add_tail(&region->list, &nosave_regions);
632  Report:
633 	printk("swsusp: Registered nosave memory region: %016lx - %016lx\n",
634 		start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
635 }
636 
637 /*
638  * Set bits in this map correspond to the page frames the contents of which
639  * should not be saved during the suspend.
640  */
641 static struct memory_bitmap *forbidden_pages_map;
642 
643 /* Set bits in this map correspond to free page frames. */
644 static struct memory_bitmap *free_pages_map;
645 
646 /*
647  * Each page frame allocated for creating the image is marked by setting the
648  * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
649  */
650 
651 void swsusp_set_page_free(struct page *page)
652 {
653 	if (free_pages_map)
654 		memory_bm_set_bit(free_pages_map, page_to_pfn(page));
655 }
656 
657 static int swsusp_page_is_free(struct page *page)
658 {
659 	return free_pages_map ?
660 		memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
661 }
662 
663 void swsusp_unset_page_free(struct page *page)
664 {
665 	if (free_pages_map)
666 		memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
667 }
668 
669 static void swsusp_set_page_forbidden(struct page *page)
670 {
671 	if (forbidden_pages_map)
672 		memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
673 }
674 
675 int swsusp_page_is_forbidden(struct page *page)
676 {
677 	return forbidden_pages_map ?
678 		memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
679 }
680 
681 static void swsusp_unset_page_forbidden(struct page *page)
682 {
683 	if (forbidden_pages_map)
684 		memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
685 }
686 
687 /**
688  *	mark_nosave_pages - set bits corresponding to the page frames the
689  *	contents of which should not be saved in a given bitmap.
690  */
691 
692 static void mark_nosave_pages(struct memory_bitmap *bm)
693 {
694 	struct nosave_region *region;
695 
696 	if (list_empty(&nosave_regions))
697 		return;
698 
699 	list_for_each_entry(region, &nosave_regions, list) {
700 		unsigned long pfn;
701 
702 		printk("swsusp: Marking nosave pages: %016lx - %016lx\n",
703 				region->start_pfn << PAGE_SHIFT,
704 				region->end_pfn << PAGE_SHIFT);
705 
706 		for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
707 			memory_bm_set_bit(bm, pfn);
708 	}
709 }
710 
711 /**
712  *	create_basic_memory_bitmaps - create bitmaps needed for marking page
713  *	frames that should not be saved and free page frames.  The pointers
714  *	forbidden_pages_map and free_pages_map are only modified if everything
715  *	goes well, because we don't want the bits to be used before both bitmaps
716  *	are set up.
717  */
718 
719 int create_basic_memory_bitmaps(void)
720 {
721 	struct memory_bitmap *bm1, *bm2;
722 	int error = 0;
723 
724 	BUG_ON(forbidden_pages_map || free_pages_map);
725 
726 	bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
727 	if (!bm1)
728 		return -ENOMEM;
729 
730 	error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
731 	if (error)
732 		goto Free_first_object;
733 
734 	bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
735 	if (!bm2)
736 		goto Free_first_bitmap;
737 
738 	error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
739 	if (error)
740 		goto Free_second_object;
741 
742 	forbidden_pages_map = bm1;
743 	free_pages_map = bm2;
744 	mark_nosave_pages(forbidden_pages_map);
745 
746 	printk("swsusp: Basic memory bitmaps created\n");
747 
748 	return 0;
749 
750  Free_second_object:
751 	kfree(bm2);
752  Free_first_bitmap:
753  	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
754  Free_first_object:
755 	kfree(bm1);
756 	return -ENOMEM;
757 }
758 
759 /**
760  *	free_basic_memory_bitmaps - free memory bitmaps allocated by
761  *	create_basic_memory_bitmaps().  The auxiliary pointers are necessary
762  *	so that the bitmaps themselves are not referred to while they are being
763  *	freed.
764  */
765 
766 void free_basic_memory_bitmaps(void)
767 {
768 	struct memory_bitmap *bm1, *bm2;
769 
770 	BUG_ON(!(forbidden_pages_map && free_pages_map));
771 
772 	bm1 = forbidden_pages_map;
773 	bm2 = free_pages_map;
774 	forbidden_pages_map = NULL;
775 	free_pages_map = NULL;
776 	memory_bm_free(bm1, PG_UNSAFE_CLEAR);
777 	kfree(bm1);
778 	memory_bm_free(bm2, PG_UNSAFE_CLEAR);
779 	kfree(bm2);
780 
781 	printk("swsusp: Basic memory bitmaps freed\n");
782 }
783 
784 /**
785  *	snapshot_additional_pages - estimate the number of additional pages
786  *	be needed for setting up the suspend image data structures for given
787  *	zone (usually the returned value is greater than the exact number)
788  */
789 
790 unsigned int snapshot_additional_pages(struct zone *zone)
791 {
792 	unsigned int res;
793 
794 	res = DIV_ROUND_UP(zone->spanned_pages, BM_BITS_PER_BLOCK);
795 	res += DIV_ROUND_UP(res * sizeof(struct bm_block), PAGE_SIZE);
796 	return 2 * res;
797 }
798 
799 #ifdef CONFIG_HIGHMEM
800 /**
801  *	count_free_highmem_pages - compute the total number of free highmem
802  *	pages, system-wide.
803  */
804 
805 static unsigned int count_free_highmem_pages(void)
806 {
807 	struct zone *zone;
808 	unsigned int cnt = 0;
809 
810 	for_each_zone(zone)
811 		if (populated_zone(zone) && is_highmem(zone))
812 			cnt += zone_page_state(zone, NR_FREE_PAGES);
813 
814 	return cnt;
815 }
816 
817 /**
818  *	saveable_highmem_page - Determine whether a highmem page should be
819  *	included in the suspend image.
820  *
821  *	We should save the page if it isn't Nosave or NosaveFree, or Reserved,
822  *	and it isn't a part of a free chunk of pages.
823  */
824 
825 static struct page *saveable_highmem_page(unsigned long pfn)
826 {
827 	struct page *page;
828 
829 	if (!pfn_valid(pfn))
830 		return NULL;
831 
832 	page = pfn_to_page(pfn);
833 
834 	BUG_ON(!PageHighMem(page));
835 
836 	if (swsusp_page_is_forbidden(page) ||  swsusp_page_is_free(page) ||
837 	    PageReserved(page))
838 		return NULL;
839 
840 	return page;
841 }
842 
843 /**
844  *	count_highmem_pages - compute the total number of saveable highmem
845  *	pages.
846  */
847 
848 unsigned int count_highmem_pages(void)
849 {
850 	struct zone *zone;
851 	unsigned int n = 0;
852 
853 	for_each_zone(zone) {
854 		unsigned long pfn, max_zone_pfn;
855 
856 		if (!is_highmem(zone))
857 			continue;
858 
859 		mark_free_pages(zone);
860 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
861 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
862 			if (saveable_highmem_page(pfn))
863 				n++;
864 	}
865 	return n;
866 }
867 #else
868 static inline void *saveable_highmem_page(unsigned long pfn) { return NULL; }
869 static inline unsigned int count_highmem_pages(void) { return 0; }
870 #endif /* CONFIG_HIGHMEM */
871 
872 /**
873  *	saveable - Determine whether a non-highmem page should be included in
874  *	the suspend image.
875  *
876  *	We should save the page if it isn't Nosave, and is not in the range
877  *	of pages statically defined as 'unsaveable', and it isn't a part of
878  *	a free chunk of pages.
879  */
880 
881 static struct page *saveable_page(unsigned long pfn)
882 {
883 	struct page *page;
884 
885 	if (!pfn_valid(pfn))
886 		return NULL;
887 
888 	page = pfn_to_page(pfn);
889 
890 	BUG_ON(PageHighMem(page));
891 
892 	if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
893 		return NULL;
894 
895 	if (PageReserved(page) && pfn_is_nosave(pfn))
896 		return NULL;
897 
898 	return page;
899 }
900 
901 /**
902  *	count_data_pages - compute the total number of saveable non-highmem
903  *	pages.
904  */
905 
906 unsigned int count_data_pages(void)
907 {
908 	struct zone *zone;
909 	unsigned long pfn, max_zone_pfn;
910 	unsigned int n = 0;
911 
912 	for_each_zone(zone) {
913 		if (is_highmem(zone))
914 			continue;
915 
916 		mark_free_pages(zone);
917 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
918 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
919 			if(saveable_page(pfn))
920 				n++;
921 	}
922 	return n;
923 }
924 
925 /* This is needed, because copy_page and memcpy are not usable for copying
926  * task structs.
927  */
928 static inline void do_copy_page(long *dst, long *src)
929 {
930 	int n;
931 
932 	for (n = PAGE_SIZE / sizeof(long); n; n--)
933 		*dst++ = *src++;
934 }
935 
936 #ifdef CONFIG_HIGHMEM
937 static inline struct page *
938 page_is_saveable(struct zone *zone, unsigned long pfn)
939 {
940 	return is_highmem(zone) ?
941 			saveable_highmem_page(pfn) : saveable_page(pfn);
942 }
943 
944 static inline void
945 copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
946 {
947 	struct page *s_page, *d_page;
948 	void *src, *dst;
949 
950 	s_page = pfn_to_page(src_pfn);
951 	d_page = pfn_to_page(dst_pfn);
952 	if (PageHighMem(s_page)) {
953 		src = kmap_atomic(s_page, KM_USER0);
954 		dst = kmap_atomic(d_page, KM_USER1);
955 		do_copy_page(dst, src);
956 		kunmap_atomic(src, KM_USER0);
957 		kunmap_atomic(dst, KM_USER1);
958 	} else {
959 		src = page_address(s_page);
960 		if (PageHighMem(d_page)) {
961 			/* Page pointed to by src may contain some kernel
962 			 * data modified by kmap_atomic()
963 			 */
964 			do_copy_page(buffer, src);
965 			dst = kmap_atomic(pfn_to_page(dst_pfn), KM_USER0);
966 			memcpy(dst, buffer, PAGE_SIZE);
967 			kunmap_atomic(dst, KM_USER0);
968 		} else {
969 			dst = page_address(d_page);
970 			do_copy_page(dst, src);
971 		}
972 	}
973 }
974 #else
975 #define page_is_saveable(zone, pfn)	saveable_page(pfn)
976 
977 static inline void
978 copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
979 {
980 	do_copy_page(page_address(pfn_to_page(dst_pfn)),
981 			page_address(pfn_to_page(src_pfn)));
982 }
983 #endif /* CONFIG_HIGHMEM */
984 
985 static void
986 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
987 {
988 	struct zone *zone;
989 	unsigned long pfn;
990 
991 	for_each_zone(zone) {
992 		unsigned long max_zone_pfn;
993 
994 		mark_free_pages(zone);
995 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
996 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
997 			if (page_is_saveable(zone, pfn))
998 				memory_bm_set_bit(orig_bm, pfn);
999 	}
1000 	memory_bm_position_reset(orig_bm);
1001 	memory_bm_position_reset(copy_bm);
1002 	do {
1003 		pfn = memory_bm_next_pfn(orig_bm);
1004 		if (likely(pfn != BM_END_OF_MAP))
1005 			copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1006 	} while (pfn != BM_END_OF_MAP);
1007 }
1008 
1009 /* Total number of image pages */
1010 static unsigned int nr_copy_pages;
1011 /* Number of pages needed for saving the original pfns of the image pages */
1012 static unsigned int nr_meta_pages;
1013 
1014 /**
1015  *	swsusp_free - free pages allocated for the suspend.
1016  *
1017  *	Suspend pages are alocated before the atomic copy is made, so we
1018  *	need to release them after the resume.
1019  */
1020 
1021 void swsusp_free(void)
1022 {
1023 	struct zone *zone;
1024 	unsigned long pfn, max_zone_pfn;
1025 
1026 	for_each_zone(zone) {
1027 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1028 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1029 			if (pfn_valid(pfn)) {
1030 				struct page *page = pfn_to_page(pfn);
1031 
1032 				if (swsusp_page_is_forbidden(page) &&
1033 				    swsusp_page_is_free(page)) {
1034 					swsusp_unset_page_forbidden(page);
1035 					swsusp_unset_page_free(page);
1036 					__free_page(page);
1037 				}
1038 			}
1039 	}
1040 	nr_copy_pages = 0;
1041 	nr_meta_pages = 0;
1042 	restore_pblist = NULL;
1043 	buffer = NULL;
1044 }
1045 
1046 #ifdef CONFIG_HIGHMEM
1047 /**
1048   *	count_pages_for_highmem - compute the number of non-highmem pages
1049   *	that will be necessary for creating copies of highmem pages.
1050   */
1051 
1052 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1053 {
1054 	unsigned int free_highmem = count_free_highmem_pages();
1055 
1056 	if (free_highmem >= nr_highmem)
1057 		nr_highmem = 0;
1058 	else
1059 		nr_highmem -= free_highmem;
1060 
1061 	return nr_highmem;
1062 }
1063 #else
1064 static unsigned int
1065 count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1066 #endif /* CONFIG_HIGHMEM */
1067 
1068 /**
1069  *	enough_free_mem - Make sure we have enough free memory for the
1070  *	snapshot image.
1071  */
1072 
1073 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1074 {
1075 	struct zone *zone;
1076 	unsigned int free = 0, meta = 0;
1077 
1078 	for_each_zone(zone) {
1079 		meta += snapshot_additional_pages(zone);
1080 		if (!is_highmem(zone))
1081 			free += zone_page_state(zone, NR_FREE_PAGES);
1082 	}
1083 
1084 	nr_pages += count_pages_for_highmem(nr_highmem);
1085 	pr_debug("swsusp: Normal pages needed: %u + %u + %u, available pages: %u\n",
1086 		nr_pages, PAGES_FOR_IO, meta, free);
1087 
1088 	return free > nr_pages + PAGES_FOR_IO + meta;
1089 }
1090 
1091 #ifdef CONFIG_HIGHMEM
1092 /**
1093  *	get_highmem_buffer - if there are some highmem pages in the suspend
1094  *	image, we may need the buffer to copy them and/or load their data.
1095  */
1096 
1097 static inline int get_highmem_buffer(int safe_needed)
1098 {
1099 	buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1100 	return buffer ? 0 : -ENOMEM;
1101 }
1102 
1103 /**
1104  *	alloc_highmem_image_pages - allocate some highmem pages for the image.
1105  *	Try to allocate as many pages as needed, but if the number of free
1106  *	highmem pages is lesser than that, allocate them all.
1107  */
1108 
1109 static inline unsigned int
1110 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1111 {
1112 	unsigned int to_alloc = count_free_highmem_pages();
1113 
1114 	if (to_alloc > nr_highmem)
1115 		to_alloc = nr_highmem;
1116 
1117 	nr_highmem -= to_alloc;
1118 	while (to_alloc-- > 0) {
1119 		struct page *page;
1120 
1121 		page = alloc_image_page(__GFP_HIGHMEM);
1122 		memory_bm_set_bit(bm, page_to_pfn(page));
1123 	}
1124 	return nr_highmem;
1125 }
1126 #else
1127 static inline int get_highmem_buffer(int safe_needed) { return 0; }
1128 
1129 static inline unsigned int
1130 alloc_highmem_image_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1131 #endif /* CONFIG_HIGHMEM */
1132 
1133 /**
1134  *	swsusp_alloc - allocate memory for the suspend image
1135  *
1136  *	We first try to allocate as many highmem pages as there are
1137  *	saveable highmem pages in the system.  If that fails, we allocate
1138  *	non-highmem pages for the copies of the remaining highmem ones.
1139  *
1140  *	In this approach it is likely that the copies of highmem pages will
1141  *	also be located in the high memory, because of the way in which
1142  *	copy_data_pages() works.
1143  */
1144 
1145 static int
1146 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1147 		unsigned int nr_pages, unsigned int nr_highmem)
1148 {
1149 	int error;
1150 
1151 	error = memory_bm_create(orig_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1152 	if (error)
1153 		goto Free;
1154 
1155 	error = memory_bm_create(copy_bm, GFP_ATOMIC | __GFP_COLD, PG_ANY);
1156 	if (error)
1157 		goto Free;
1158 
1159 	if (nr_highmem > 0) {
1160 		error = get_highmem_buffer(PG_ANY);
1161 		if (error)
1162 			goto Free;
1163 
1164 		nr_pages += alloc_highmem_image_pages(copy_bm, nr_highmem);
1165 	}
1166 	while (nr_pages-- > 0) {
1167 		struct page *page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1168 
1169 		if (!page)
1170 			goto Free;
1171 
1172 		memory_bm_set_bit(copy_bm, page_to_pfn(page));
1173 	}
1174 	return 0;
1175 
1176  Free:
1177 	swsusp_free();
1178 	return -ENOMEM;
1179 }
1180 
1181 /* Memory bitmap used for marking saveable pages (during suspend) or the
1182  * suspend image pages (during resume)
1183  */
1184 static struct memory_bitmap orig_bm;
1185 /* Memory bitmap used on suspend for marking allocated pages that will contain
1186  * the copies of saveable pages.  During resume it is initially used for
1187  * marking the suspend image pages, but then its set bits are duplicated in
1188  * @orig_bm and it is released.  Next, on systems with high memory, it may be
1189  * used for marking "safe" highmem pages, but it has to be reinitialized for
1190  * this purpose.
1191  */
1192 static struct memory_bitmap copy_bm;
1193 
1194 asmlinkage int swsusp_save(void)
1195 {
1196 	unsigned int nr_pages, nr_highmem;
1197 
1198 	printk("swsusp: critical section: \n");
1199 
1200 	drain_local_pages();
1201 	nr_pages = count_data_pages();
1202 	nr_highmem = count_highmem_pages();
1203 	printk("swsusp: Need to copy %u pages\n", nr_pages + nr_highmem);
1204 
1205 	if (!enough_free_mem(nr_pages, nr_highmem)) {
1206 		printk(KERN_ERR "swsusp: Not enough free memory\n");
1207 		return -ENOMEM;
1208 	}
1209 
1210 	if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1211 		printk(KERN_ERR "swsusp: Memory allocation failed\n");
1212 		return -ENOMEM;
1213 	}
1214 
1215 	/* During allocating of suspend pagedir, new cold pages may appear.
1216 	 * Kill them.
1217 	 */
1218 	drain_local_pages();
1219 	copy_data_pages(&copy_bm, &orig_bm);
1220 
1221 	/*
1222 	 * End of critical section. From now on, we can write to memory,
1223 	 * but we should not touch disk. This specially means we must _not_
1224 	 * touch swap space! Except we must write out our image of course.
1225 	 */
1226 
1227 	nr_pages += nr_highmem;
1228 	nr_copy_pages = nr_pages;
1229 	nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1230 
1231 	printk("swsusp: critical section/: done (%d pages copied)\n", nr_pages);
1232 
1233 	return 0;
1234 }
1235 
1236 static void init_header(struct swsusp_info *info)
1237 {
1238 	memset(info, 0, sizeof(struct swsusp_info));
1239 	info->version_code = LINUX_VERSION_CODE;
1240 	info->num_physpages = num_physpages;
1241 	memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1242 	info->cpus = num_online_cpus();
1243 	info->image_pages = nr_copy_pages;
1244 	info->pages = nr_copy_pages + nr_meta_pages + 1;
1245 	info->size = info->pages;
1246 	info->size <<= PAGE_SHIFT;
1247 }
1248 
1249 /**
1250  *	pack_pfns - pfns corresponding to the set bits found in the bitmap @bm
1251  *	are stored in the array @buf[] (1 page at a time)
1252  */
1253 
1254 static inline void
1255 pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1256 {
1257 	int j;
1258 
1259 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1260 		buf[j] = memory_bm_next_pfn(bm);
1261 		if (unlikely(buf[j] == BM_END_OF_MAP))
1262 			break;
1263 	}
1264 }
1265 
1266 /**
1267  *	snapshot_read_next - used for reading the system memory snapshot.
1268  *
1269  *	On the first call to it @handle should point to a zeroed
1270  *	snapshot_handle structure.  The structure gets updated and a pointer
1271  *	to it should be passed to this function every next time.
1272  *
1273  *	The @count parameter should contain the number of bytes the caller
1274  *	wants to read from the snapshot.  It must not be zero.
1275  *
1276  *	On success the function returns a positive number.  Then, the caller
1277  *	is allowed to read up to the returned number of bytes from the memory
1278  *	location computed by the data_of() macro.  The number returned
1279  *	may be smaller than @count, but this only happens if the read would
1280  *	cross a page boundary otherwise.
1281  *
1282  *	The function returns 0 to indicate the end of data stream condition,
1283  *	and a negative number is returned on error.  In such cases the
1284  *	structure pointed to by @handle is not updated and should not be used
1285  *	any more.
1286  */
1287 
1288 int snapshot_read_next(struct snapshot_handle *handle, size_t count)
1289 {
1290 	if (handle->cur > nr_meta_pages + nr_copy_pages)
1291 		return 0;
1292 
1293 	if (!buffer) {
1294 		/* This makes the buffer be freed by swsusp_free() */
1295 		buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1296 		if (!buffer)
1297 			return -ENOMEM;
1298 	}
1299 	if (!handle->offset) {
1300 		init_header((struct swsusp_info *)buffer);
1301 		handle->buffer = buffer;
1302 		memory_bm_position_reset(&orig_bm);
1303 		memory_bm_position_reset(&copy_bm);
1304 	}
1305 	if (handle->prev < handle->cur) {
1306 		if (handle->cur <= nr_meta_pages) {
1307 			memset(buffer, 0, PAGE_SIZE);
1308 			pack_pfns(buffer, &orig_bm);
1309 		} else {
1310 			struct page *page;
1311 
1312 			page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1313 			if (PageHighMem(page)) {
1314 				/* Highmem pages are copied to the buffer,
1315 				 * because we can't return with a kmapped
1316 				 * highmem page (we may not be called again).
1317 				 */
1318 				void *kaddr;
1319 
1320 				kaddr = kmap_atomic(page, KM_USER0);
1321 				memcpy(buffer, kaddr, PAGE_SIZE);
1322 				kunmap_atomic(kaddr, KM_USER0);
1323 				handle->buffer = buffer;
1324 			} else {
1325 				handle->buffer = page_address(page);
1326 			}
1327 		}
1328 		handle->prev = handle->cur;
1329 	}
1330 	handle->buf_offset = handle->cur_offset;
1331 	if (handle->cur_offset + count >= PAGE_SIZE) {
1332 		count = PAGE_SIZE - handle->cur_offset;
1333 		handle->cur_offset = 0;
1334 		handle->cur++;
1335 	} else {
1336 		handle->cur_offset += count;
1337 	}
1338 	handle->offset += count;
1339 	return count;
1340 }
1341 
1342 /**
1343  *	mark_unsafe_pages - mark the pages that cannot be used for storing
1344  *	the image during resume, because they conflict with the pages that
1345  *	had been used before suspend
1346  */
1347 
1348 static int mark_unsafe_pages(struct memory_bitmap *bm)
1349 {
1350 	struct zone *zone;
1351 	unsigned long pfn, max_zone_pfn;
1352 
1353 	/* Clear page flags */
1354 	for_each_zone(zone) {
1355 		max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1356 		for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1357 			if (pfn_valid(pfn))
1358 				swsusp_unset_page_free(pfn_to_page(pfn));
1359 	}
1360 
1361 	/* Mark pages that correspond to the "original" pfns as "unsafe" */
1362 	memory_bm_position_reset(bm);
1363 	do {
1364 		pfn = memory_bm_next_pfn(bm);
1365 		if (likely(pfn != BM_END_OF_MAP)) {
1366 			if (likely(pfn_valid(pfn)))
1367 				swsusp_set_page_free(pfn_to_page(pfn));
1368 			else
1369 				return -EFAULT;
1370 		}
1371 	} while (pfn != BM_END_OF_MAP);
1372 
1373 	allocated_unsafe_pages = 0;
1374 
1375 	return 0;
1376 }
1377 
1378 static void
1379 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1380 {
1381 	unsigned long pfn;
1382 
1383 	memory_bm_position_reset(src);
1384 	pfn = memory_bm_next_pfn(src);
1385 	while (pfn != BM_END_OF_MAP) {
1386 		memory_bm_set_bit(dst, pfn);
1387 		pfn = memory_bm_next_pfn(src);
1388 	}
1389 }
1390 
1391 static inline int check_header(struct swsusp_info *info)
1392 {
1393 	char *reason = NULL;
1394 
1395 	if (info->version_code != LINUX_VERSION_CODE)
1396 		reason = "kernel version";
1397 	if (info->num_physpages != num_physpages)
1398 		reason = "memory size";
1399 	if (strcmp(info->uts.sysname,init_utsname()->sysname))
1400 		reason = "system type";
1401 	if (strcmp(info->uts.release,init_utsname()->release))
1402 		reason = "kernel release";
1403 	if (strcmp(info->uts.version,init_utsname()->version))
1404 		reason = "version";
1405 	if (strcmp(info->uts.machine,init_utsname()->machine))
1406 		reason = "machine";
1407 	if (reason) {
1408 		printk(KERN_ERR "swsusp: Resume mismatch: %s\n", reason);
1409 		return -EPERM;
1410 	}
1411 	return 0;
1412 }
1413 
1414 /**
1415  *	load header - check the image header and copy data from it
1416  */
1417 
1418 static int
1419 load_header(struct swsusp_info *info)
1420 {
1421 	int error;
1422 
1423 	restore_pblist = NULL;
1424 	error = check_header(info);
1425 	if (!error) {
1426 		nr_copy_pages = info->image_pages;
1427 		nr_meta_pages = info->pages - info->image_pages - 1;
1428 	}
1429 	return error;
1430 }
1431 
1432 /**
1433  *	unpack_orig_pfns - for each element of @buf[] (1 page at a time) set
1434  *	the corresponding bit in the memory bitmap @bm
1435  */
1436 
1437 static inline void
1438 unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1439 {
1440 	int j;
1441 
1442 	for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1443 		if (unlikely(buf[j] == BM_END_OF_MAP))
1444 			break;
1445 
1446 		memory_bm_set_bit(bm, buf[j]);
1447 	}
1448 }
1449 
1450 /* List of "safe" pages that may be used to store data loaded from the suspend
1451  * image
1452  */
1453 static struct linked_page *safe_pages_list;
1454 
1455 #ifdef CONFIG_HIGHMEM
1456 /* struct highmem_pbe is used for creating the list of highmem pages that
1457  * should be restored atomically during the resume from disk, because the page
1458  * frames they have occupied before the suspend are in use.
1459  */
1460 struct highmem_pbe {
1461 	struct page *copy_page;	/* data is here now */
1462 	struct page *orig_page;	/* data was here before the suspend */
1463 	struct highmem_pbe *next;
1464 };
1465 
1466 /* List of highmem PBEs needed for restoring the highmem pages that were
1467  * allocated before the suspend and included in the suspend image, but have
1468  * also been allocated by the "resume" kernel, so their contents cannot be
1469  * written directly to their "original" page frames.
1470  */
1471 static struct highmem_pbe *highmem_pblist;
1472 
1473 /**
1474  *	count_highmem_image_pages - compute the number of highmem pages in the
1475  *	suspend image.  The bits in the memory bitmap @bm that correspond to the
1476  *	image pages are assumed to be set.
1477  */
1478 
1479 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1480 {
1481 	unsigned long pfn;
1482 	unsigned int cnt = 0;
1483 
1484 	memory_bm_position_reset(bm);
1485 	pfn = memory_bm_next_pfn(bm);
1486 	while (pfn != BM_END_OF_MAP) {
1487 		if (PageHighMem(pfn_to_page(pfn)))
1488 			cnt++;
1489 
1490 		pfn = memory_bm_next_pfn(bm);
1491 	}
1492 	return cnt;
1493 }
1494 
1495 /**
1496  *	prepare_highmem_image - try to allocate as many highmem pages as
1497  *	there are highmem image pages (@nr_highmem_p points to the variable
1498  *	containing the number of highmem image pages).  The pages that are
1499  *	"safe" (ie. will not be overwritten when the suspend image is
1500  *	restored) have the corresponding bits set in @bm (it must be
1501  *	unitialized).
1502  *
1503  *	NOTE: This function should not be called if there are no highmem
1504  *	image pages.
1505  */
1506 
1507 static unsigned int safe_highmem_pages;
1508 
1509 static struct memory_bitmap *safe_highmem_bm;
1510 
1511 static int
1512 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1513 {
1514 	unsigned int to_alloc;
1515 
1516 	if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1517 		return -ENOMEM;
1518 
1519 	if (get_highmem_buffer(PG_SAFE))
1520 		return -ENOMEM;
1521 
1522 	to_alloc = count_free_highmem_pages();
1523 	if (to_alloc > *nr_highmem_p)
1524 		to_alloc = *nr_highmem_p;
1525 	else
1526 		*nr_highmem_p = to_alloc;
1527 
1528 	safe_highmem_pages = 0;
1529 	while (to_alloc-- > 0) {
1530 		struct page *page;
1531 
1532 		page = alloc_page(__GFP_HIGHMEM);
1533 		if (!swsusp_page_is_free(page)) {
1534 			/* The page is "safe", set its bit the bitmap */
1535 			memory_bm_set_bit(bm, page_to_pfn(page));
1536 			safe_highmem_pages++;
1537 		}
1538 		/* Mark the page as allocated */
1539 		swsusp_set_page_forbidden(page);
1540 		swsusp_set_page_free(page);
1541 	}
1542 	memory_bm_position_reset(bm);
1543 	safe_highmem_bm = bm;
1544 	return 0;
1545 }
1546 
1547 /**
1548  *	get_highmem_page_buffer - for given highmem image page find the buffer
1549  *	that suspend_write_next() should set for its caller to write to.
1550  *
1551  *	If the page is to be saved to its "original" page frame or a copy of
1552  *	the page is to be made in the highmem, @buffer is returned.  Otherwise,
1553  *	the copy of the page is to be made in normal memory, so the address of
1554  *	the copy is returned.
1555  *
1556  *	If @buffer is returned, the caller of suspend_write_next() will write
1557  *	the page's contents to @buffer, so they will have to be copied to the
1558  *	right location on the next call to suspend_write_next() and it is done
1559  *	with the help of copy_last_highmem_page().  For this purpose, if
1560  *	@buffer is returned, @last_highmem page is set to the page to which
1561  *	the data will have to be copied from @buffer.
1562  */
1563 
1564 static struct page *last_highmem_page;
1565 
1566 static void *
1567 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1568 {
1569 	struct highmem_pbe *pbe;
1570 	void *kaddr;
1571 
1572 	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1573 		/* We have allocated the "original" page frame and we can
1574 		 * use it directly to store the loaded page.
1575 		 */
1576 		last_highmem_page = page;
1577 		return buffer;
1578 	}
1579 	/* The "original" page frame has not been allocated and we have to
1580 	 * use a "safe" page frame to store the loaded page.
1581 	 */
1582 	pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1583 	if (!pbe) {
1584 		swsusp_free();
1585 		return NULL;
1586 	}
1587 	pbe->orig_page = page;
1588 	if (safe_highmem_pages > 0) {
1589 		struct page *tmp;
1590 
1591 		/* Copy of the page will be stored in high memory */
1592 		kaddr = buffer;
1593 		tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1594 		safe_highmem_pages--;
1595 		last_highmem_page = tmp;
1596 		pbe->copy_page = tmp;
1597 	} else {
1598 		/* Copy of the page will be stored in normal memory */
1599 		kaddr = safe_pages_list;
1600 		safe_pages_list = safe_pages_list->next;
1601 		pbe->copy_page = virt_to_page(kaddr);
1602 	}
1603 	pbe->next = highmem_pblist;
1604 	highmem_pblist = pbe;
1605 	return kaddr;
1606 }
1607 
1608 /**
1609  *	copy_last_highmem_page - copy the contents of a highmem image from
1610  *	@buffer, where the caller of snapshot_write_next() has place them,
1611  *	to the right location represented by @last_highmem_page .
1612  */
1613 
1614 static void copy_last_highmem_page(void)
1615 {
1616 	if (last_highmem_page) {
1617 		void *dst;
1618 
1619 		dst = kmap_atomic(last_highmem_page, KM_USER0);
1620 		memcpy(dst, buffer, PAGE_SIZE);
1621 		kunmap_atomic(dst, KM_USER0);
1622 		last_highmem_page = NULL;
1623 	}
1624 }
1625 
1626 static inline int last_highmem_page_copied(void)
1627 {
1628 	return !last_highmem_page;
1629 }
1630 
1631 static inline void free_highmem_data(void)
1632 {
1633 	if (safe_highmem_bm)
1634 		memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
1635 
1636 	if (buffer)
1637 		free_image_page(buffer, PG_UNSAFE_CLEAR);
1638 }
1639 #else
1640 static inline int get_safe_write_buffer(void) { return 0; }
1641 
1642 static unsigned int
1643 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
1644 
1645 static inline int
1646 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1647 {
1648 	return 0;
1649 }
1650 
1651 static inline void *
1652 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1653 {
1654 	return NULL;
1655 }
1656 
1657 static inline void copy_last_highmem_page(void) {}
1658 static inline int last_highmem_page_copied(void) { return 1; }
1659 static inline void free_highmem_data(void) {}
1660 #endif /* CONFIG_HIGHMEM */
1661 
1662 /**
1663  *	prepare_image - use the memory bitmap @bm to mark the pages that will
1664  *	be overwritten in the process of restoring the system memory state
1665  *	from the suspend image ("unsafe" pages) and allocate memory for the
1666  *	image.
1667  *
1668  *	The idea is to allocate a new memory bitmap first and then allocate
1669  *	as many pages as needed for the image data, but not to assign these
1670  *	pages to specific tasks initially.  Instead, we just mark them as
1671  *	allocated and create a lists of "safe" pages that will be used
1672  *	later.  On systems with high memory a list of "safe" highmem pages is
1673  *	also created.
1674  */
1675 
1676 #define PBES_PER_LINKED_PAGE	(LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
1677 
1678 static int
1679 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
1680 {
1681 	unsigned int nr_pages, nr_highmem;
1682 	struct linked_page *sp_list, *lp;
1683 	int error;
1684 
1685 	/* If there is no highmem, the buffer will not be necessary */
1686 	free_image_page(buffer, PG_UNSAFE_CLEAR);
1687 	buffer = NULL;
1688 
1689 	nr_highmem = count_highmem_image_pages(bm);
1690 	error = mark_unsafe_pages(bm);
1691 	if (error)
1692 		goto Free;
1693 
1694 	error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
1695 	if (error)
1696 		goto Free;
1697 
1698 	duplicate_memory_bitmap(new_bm, bm);
1699 	memory_bm_free(bm, PG_UNSAFE_KEEP);
1700 	if (nr_highmem > 0) {
1701 		error = prepare_highmem_image(bm, &nr_highmem);
1702 		if (error)
1703 			goto Free;
1704 	}
1705 	/* Reserve some safe pages for potential later use.
1706 	 *
1707 	 * NOTE: This way we make sure there will be enough safe pages for the
1708 	 * chain_alloc() in get_buffer().  It is a bit wasteful, but
1709 	 * nr_copy_pages cannot be greater than 50% of the memory anyway.
1710 	 */
1711 	sp_list = NULL;
1712 	/* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
1713 	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1714 	nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
1715 	while (nr_pages > 0) {
1716 		lp = get_image_page(GFP_ATOMIC, PG_SAFE);
1717 		if (!lp) {
1718 			error = -ENOMEM;
1719 			goto Free;
1720 		}
1721 		lp->next = sp_list;
1722 		sp_list = lp;
1723 		nr_pages--;
1724 	}
1725 	/* Preallocate memory for the image */
1726 	safe_pages_list = NULL;
1727 	nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
1728 	while (nr_pages > 0) {
1729 		lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
1730 		if (!lp) {
1731 			error = -ENOMEM;
1732 			goto Free;
1733 		}
1734 		if (!swsusp_page_is_free(virt_to_page(lp))) {
1735 			/* The page is "safe", add it to the list */
1736 			lp->next = safe_pages_list;
1737 			safe_pages_list = lp;
1738 		}
1739 		/* Mark the page as allocated */
1740 		swsusp_set_page_forbidden(virt_to_page(lp));
1741 		swsusp_set_page_free(virt_to_page(lp));
1742 		nr_pages--;
1743 	}
1744 	/* Free the reserved safe pages so that chain_alloc() can use them */
1745 	while (sp_list) {
1746 		lp = sp_list->next;
1747 		free_image_page(sp_list, PG_UNSAFE_CLEAR);
1748 		sp_list = lp;
1749 	}
1750 	return 0;
1751 
1752  Free:
1753 	swsusp_free();
1754 	return error;
1755 }
1756 
1757 /**
1758  *	get_buffer - compute the address that snapshot_write_next() should
1759  *	set for its caller to write to.
1760  */
1761 
1762 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
1763 {
1764 	struct pbe *pbe;
1765 	struct page *page = pfn_to_page(memory_bm_next_pfn(bm));
1766 
1767 	if (PageHighMem(page))
1768 		return get_highmem_page_buffer(page, ca);
1769 
1770 	if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
1771 		/* We have allocated the "original" page frame and we can
1772 		 * use it directly to store the loaded page.
1773 		 */
1774 		return page_address(page);
1775 
1776 	/* The "original" page frame has not been allocated and we have to
1777 	 * use a "safe" page frame to store the loaded page.
1778 	 */
1779 	pbe = chain_alloc(ca, sizeof(struct pbe));
1780 	if (!pbe) {
1781 		swsusp_free();
1782 		return NULL;
1783 	}
1784 	pbe->orig_address = page_address(page);
1785 	pbe->address = safe_pages_list;
1786 	safe_pages_list = safe_pages_list->next;
1787 	pbe->next = restore_pblist;
1788 	restore_pblist = pbe;
1789 	return pbe->address;
1790 }
1791 
1792 /**
1793  *	snapshot_write_next - used for writing the system memory snapshot.
1794  *
1795  *	On the first call to it @handle should point to a zeroed
1796  *	snapshot_handle structure.  The structure gets updated and a pointer
1797  *	to it should be passed to this function every next time.
1798  *
1799  *	The @count parameter should contain the number of bytes the caller
1800  *	wants to write to the image.  It must not be zero.
1801  *
1802  *	On success the function returns a positive number.  Then, the caller
1803  *	is allowed to write up to the returned number of bytes to the memory
1804  *	location computed by the data_of() macro.  The number returned
1805  *	may be smaller than @count, but this only happens if the write would
1806  *	cross a page boundary otherwise.
1807  *
1808  *	The function returns 0 to indicate the "end of file" condition,
1809  *	and a negative number is returned on error.  In such cases the
1810  *	structure pointed to by @handle is not updated and should not be used
1811  *	any more.
1812  */
1813 
1814 int snapshot_write_next(struct snapshot_handle *handle, size_t count)
1815 {
1816 	static struct chain_allocator ca;
1817 	int error = 0;
1818 
1819 	/* Check if we have already loaded the entire image */
1820 	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages)
1821 		return 0;
1822 
1823 	if (handle->offset == 0) {
1824 		if (!buffer)
1825 			/* This makes the buffer be freed by swsusp_free() */
1826 			buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1827 
1828 		if (!buffer)
1829 			return -ENOMEM;
1830 
1831 		handle->buffer = buffer;
1832 	}
1833 	handle->sync_read = 1;
1834 	if (handle->prev < handle->cur) {
1835 		if (handle->prev == 0) {
1836 			error = load_header(buffer);
1837 			if (error)
1838 				return error;
1839 
1840 			error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
1841 			if (error)
1842 				return error;
1843 
1844 		} else if (handle->prev <= nr_meta_pages) {
1845 			unpack_orig_pfns(buffer, &copy_bm);
1846 			if (handle->prev == nr_meta_pages) {
1847 				error = prepare_image(&orig_bm, &copy_bm);
1848 				if (error)
1849 					return error;
1850 
1851 				chain_init(&ca, GFP_ATOMIC, PG_SAFE);
1852 				memory_bm_position_reset(&orig_bm);
1853 				restore_pblist = NULL;
1854 				handle->buffer = get_buffer(&orig_bm, &ca);
1855 				handle->sync_read = 0;
1856 				if (!handle->buffer)
1857 					return -ENOMEM;
1858 			}
1859 		} else {
1860 			copy_last_highmem_page();
1861 			handle->buffer = get_buffer(&orig_bm, &ca);
1862 			if (handle->buffer != buffer)
1863 				handle->sync_read = 0;
1864 		}
1865 		handle->prev = handle->cur;
1866 	}
1867 	handle->buf_offset = handle->cur_offset;
1868 	if (handle->cur_offset + count >= PAGE_SIZE) {
1869 		count = PAGE_SIZE - handle->cur_offset;
1870 		handle->cur_offset = 0;
1871 		handle->cur++;
1872 	} else {
1873 		handle->cur_offset += count;
1874 	}
1875 	handle->offset += count;
1876 	return count;
1877 }
1878 
1879 /**
1880  *	snapshot_write_finalize - must be called after the last call to
1881  *	snapshot_write_next() in case the last page in the image happens
1882  *	to be a highmem page and its contents should be stored in the
1883  *	highmem.  Additionally, it releases the memory that will not be
1884  *	used any more.
1885  */
1886 
1887 void snapshot_write_finalize(struct snapshot_handle *handle)
1888 {
1889 	copy_last_highmem_page();
1890 	/* Free only if we have loaded the image entirely */
1891 	if (handle->prev && handle->cur > nr_meta_pages + nr_copy_pages) {
1892 		memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
1893 		free_highmem_data();
1894 	}
1895 }
1896 
1897 int snapshot_image_loaded(struct snapshot_handle *handle)
1898 {
1899 	return !(!nr_copy_pages || !last_highmem_page_copied() ||
1900 			handle->cur <= nr_meta_pages + nr_copy_pages);
1901 }
1902 
1903 #ifdef CONFIG_HIGHMEM
1904 /* Assumes that @buf is ready and points to a "safe" page */
1905 static inline void
1906 swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
1907 {
1908 	void *kaddr1, *kaddr2;
1909 
1910 	kaddr1 = kmap_atomic(p1, KM_USER0);
1911 	kaddr2 = kmap_atomic(p2, KM_USER1);
1912 	memcpy(buf, kaddr1, PAGE_SIZE);
1913 	memcpy(kaddr1, kaddr2, PAGE_SIZE);
1914 	memcpy(kaddr2, buf, PAGE_SIZE);
1915 	kunmap_atomic(kaddr1, KM_USER0);
1916 	kunmap_atomic(kaddr2, KM_USER1);
1917 }
1918 
1919 /**
1920  *	restore_highmem - for each highmem page that was allocated before
1921  *	the suspend and included in the suspend image, and also has been
1922  *	allocated by the "resume" kernel swap its current (ie. "before
1923  *	resume") contents with the previous (ie. "before suspend") one.
1924  *
1925  *	If the resume eventually fails, we can call this function once
1926  *	again and restore the "before resume" highmem state.
1927  */
1928 
1929 int restore_highmem(void)
1930 {
1931 	struct highmem_pbe *pbe = highmem_pblist;
1932 	void *buf;
1933 
1934 	if (!pbe)
1935 		return 0;
1936 
1937 	buf = get_image_page(GFP_ATOMIC, PG_SAFE);
1938 	if (!buf)
1939 		return -ENOMEM;
1940 
1941 	while (pbe) {
1942 		swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
1943 		pbe = pbe->next;
1944 	}
1945 	free_image_page(buf, PG_UNSAFE_CLEAR);
1946 	return 0;
1947 }
1948 #endif /* CONFIG_HIGHMEM */
1949