Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
snapshot.c
Go to the documentation of this file.
1 /*
2  * linux/kernel/power/snapshot.c
3  *
4  * This file provides system snapshot/restore functionality for swsusp.
5  *
6  * Copyright (C) 1998-2005 Pavel Machek <[email protected]>
7  * Copyright (C) 2006 Rafael J. Wysocki <[email protected]>
8  *
9  * This file is released under the GPLv2.
10  *
11  */
12 
13 #include <linux/version.h>
14 #include <linux/module.h>
15 #include <linux/mm.h>
16 #include <linux/suspend.h>
17 #include <linux/delay.h>
18 #include <linux/bitops.h>
19 #include <linux/spinlock.h>
20 #include <linux/kernel.h>
21 #include <linux/pm.h>
22 #include <linux/device.h>
23 #include <linux/init.h>
24 #include <linux/bootmem.h>
25 #include <linux/syscalls.h>
26 #include <linux/console.h>
27 #include <linux/highmem.h>
28 #include <linux/list.h>
29 #include <linux/slab.h>
30 
31 #include <asm/uaccess.h>
32 #include <asm/mmu_context.h>
33 #include <asm/pgtable.h>
34 #include <asm/tlbflush.h>
35 #include <asm/io.h>
36 
37 #include "power.h"
38 
39 static int swsusp_page_is_free(struct page *);
40 static void swsusp_set_page_forbidden(struct page *);
41 static void swsusp_unset_page_forbidden(struct page *);
42 
43 /*
44  * Number of bytes to reserve for memory allocations made by device drivers
45  * from their ->freeze() and ->freeze_noirq() callbacks so that they don't
46  * cause image creation to fail (tunable via /sys/power/reserved_size).
47  */
48 unsigned long reserved_size;
49 
51 {
52  reserved_size = SPARE_PAGES * PAGE_SIZE;
53 }
54 
55 /*
56  * Preferred image size in bytes (tunable via /sys/power/image_size).
57  * When it is set to N, swsusp will do its best to ensure the image
58  * size will not exceed N bytes, but if that is impossible, it will
59  * try to create the smallest image possible.
60  */
61 unsigned long image_size;
62 
64 {
65  image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;
66 }
67 
68 /* List of PBEs needed for restoring the pages that were allocated before
69  * the suspend and included in the suspend image, but have also been
70  * allocated by the "resume" kernel, so their contents cannot be written
71  * directly to their "original" page frames.
72  */
74 
75 /* Pointer to an auxiliary buffer (1 page) */
76 static void *buffer;
77 
88 #define PG_ANY 0
89 #define PG_SAFE 1
90 #define PG_UNSAFE_CLEAR 1
91 #define PG_UNSAFE_KEEP 0
92 
93 static unsigned int allocated_unsafe_pages;
94 
95 static void *get_image_page(gfp_t gfp_mask, int safe_needed)
96 {
97  void *res;
98 
99  res = (void *)get_zeroed_page(gfp_mask);
100  if (safe_needed)
101  while (res && swsusp_page_is_free(virt_to_page(res))) {
102  /* The page is unsafe, mark it for swsusp_free() */
103  swsusp_set_page_forbidden(virt_to_page(res));
104  allocated_unsafe_pages++;
105  res = (void *)get_zeroed_page(gfp_mask);
106  }
107  if (res) {
108  swsusp_set_page_forbidden(virt_to_page(res));
110  }
111  return res;
112 }
113 
115 {
116  return (unsigned long)get_image_page(gfp_mask, PG_SAFE);
117 }
118 
119 static struct page *alloc_image_page(gfp_t gfp_mask)
120 {
121  struct page *page;
122 
123  page = alloc_page(gfp_mask);
124  if (page) {
125  swsusp_set_page_forbidden(page);
126  swsusp_set_page_free(page);
127  }
128  return page;
129 }
130 
136 static inline void free_image_page(void *addr, int clear_nosave_free)
137 {
138  struct page *page;
139 
140  BUG_ON(!virt_addr_valid(addr));
141 
142  page = virt_to_page(addr);
143 
144  swsusp_unset_page_forbidden(page);
145  if (clear_nosave_free)
147 
148  __free_page(page);
149 }
150 
151 /* struct linked_page is used to build chains of pages */
152 
153 #define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *))
154 
155 struct linked_page {
156  struct linked_page *next;
158 } __attribute__((packed));
160 static inline void
161 free_list_of_pages(struct linked_page *list, int clear_page_nosave)
162 {
163  while (list) {
164  struct linked_page *lp = list->next;
165 
166  free_image_page(list, clear_page_nosave);
167  list = lp;
168  }
169 }
170 
185  struct linked_page *chain; /* the chain */
186  unsigned int used_space; /* total size of objects allocated out
187  * of the current page
188  */
189  gfp_t gfp_mask; /* mask for allocating pages */
190  int safe_needed; /* if set, only "safe" pages are allocated */
191 };
192 
193 static void
194 chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed)
195 {
196  ca->chain = NULL;
198  ca->gfp_mask = gfp_mask;
199  ca->safe_needed = safe_needed;
200 }
201 
202 static void *chain_alloc(struct chain_allocator *ca, unsigned int size)
203 {
204  void *ret;
205 
206  if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) {
207  struct linked_page *lp;
208 
209  lp = get_image_page(ca->gfp_mask, ca->safe_needed);
210  if (!lp)
211  return NULL;
212 
213  lp->next = ca->chain;
214  ca->chain = lp;
215  ca->used_space = 0;
216  }
217  ret = ca->chain->data + ca->used_space;
218  ca->used_space += size;
219  return ret;
220 }
221 
252 #define BM_END_OF_MAP (~0UL)
253 
254 #define BM_BITS_PER_BLOCK (PAGE_SIZE * BITS_PER_BYTE)
255 
256 struct bm_block {
257  struct list_head hook; /* hook into a list of bitmap blocks */
258  unsigned long start_pfn; /* pfn represented by the first bit */
259  unsigned long end_pfn; /* pfn represented by the last bit plus 1 */
260  unsigned long *data; /* bitmap representing pages */
261 };
262 
263 static inline unsigned long bm_block_bits(struct bm_block *bb)
264 {
265  return bb->end_pfn - bb->start_pfn;
266 }
267 
268 /* strcut bm_position is used for browsing memory bitmaps */
269 
270 struct bm_position {
271  struct bm_block *block;
272  int bit;
273 };
274 
276  struct list_head blocks; /* list of bitmap blocks */
277  struct linked_page *p_list; /* list of pages used to store zone
278  * bitmap objects and bitmap block
279  * objects
280  */
281  struct bm_position cur; /* most recently used bit position */
282 };
283 
284 /* Functions that operate on memory bitmaps */
285 
286 static void memory_bm_position_reset(struct memory_bitmap *bm)
287 {
288  bm->cur.block = list_entry(bm->blocks.next, struct bm_block, hook);
289  bm->cur.bit = 0;
290 }
291 
292 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free);
293 
300 static int create_bm_block_list(unsigned long pages,
301  struct list_head *list,
302  struct chain_allocator *ca)
303 {
304  unsigned int nr_blocks = DIV_ROUND_UP(pages, BM_BITS_PER_BLOCK);
305 
306  while (nr_blocks-- > 0) {
307  struct bm_block *bb;
308 
309  bb = chain_alloc(ca, sizeof(struct bm_block));
310  if (!bb)
311  return -ENOMEM;
312  list_add(&bb->hook, list);
313  }
314 
315  return 0;
316 }
317 
318 struct mem_extent {
319  struct list_head hook;
320  unsigned long start;
321  unsigned long end;
322 };
323 
328 static void free_mem_extents(struct list_head *list)
329 {
330  struct mem_extent *ext, *aux;
331 
332  list_for_each_entry_safe(ext, aux, list, hook) {
333  list_del(&ext->hook);
334  kfree(ext);
335  }
336 }
337 
344 static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
345 {
346  struct zone *zone;
347 
348  INIT_LIST_HEAD(list);
349 
351  unsigned long zone_start, zone_end;
352  struct mem_extent *ext, *cur, *aux;
353 
354  zone_start = zone->zone_start_pfn;
355  zone_end = zone->zone_start_pfn + zone->spanned_pages;
356 
357  list_for_each_entry(ext, list, hook)
358  if (zone_start <= ext->end)
359  break;
360 
361  if (&ext->hook == list || zone_end < ext->start) {
362  /* New extent is necessary */
363  struct mem_extent *new_ext;
364 
365  new_ext = kzalloc(sizeof(struct mem_extent), gfp_mask);
366  if (!new_ext) {
367  free_mem_extents(list);
368  return -ENOMEM;
369  }
370  new_ext->start = zone_start;
371  new_ext->end = zone_end;
372  list_add_tail(&new_ext->hook, &ext->hook);
373  continue;
374  }
375 
376  /* Merge this zone's range of PFNs with the existing one */
377  if (zone_start < ext->start)
378  ext->start = zone_start;
379  if (zone_end > ext->end)
380  ext->end = zone_end;
381 
382  /* More merging may be possible */
383  cur = ext;
384  list_for_each_entry_safe_continue(cur, aux, list, hook) {
385  if (zone_end < cur->start)
386  break;
387  if (zone_end < cur->end)
388  ext->end = cur->end;
389  list_del(&cur->hook);
390  kfree(cur);
391  }
392  }
393 
394  return 0;
395 }
396 
400 static int
401 memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed)
402 {
403  struct chain_allocator ca;
404  struct list_head mem_extents;
405  struct mem_extent *ext;
406  int error;
407 
408  chain_init(&ca, gfp_mask, safe_needed);
409  INIT_LIST_HEAD(&bm->blocks);
410 
411  error = create_mem_extents(&mem_extents, gfp_mask);
412  if (error)
413  return error;
414 
415  list_for_each_entry(ext, &mem_extents, hook) {
416  struct bm_block *bb;
417  unsigned long pfn = ext->start;
418  unsigned long pages = ext->end - ext->start;
419 
420  bb = list_entry(bm->blocks.prev, struct bm_block, hook);
421 
422  error = create_bm_block_list(pages, bm->blocks.prev, &ca);
423  if (error)
424  goto Error;
425 
427  bb->data = get_image_page(gfp_mask, safe_needed);
428  if (!bb->data) {
429  error = -ENOMEM;
430  goto Error;
431  }
432 
433  bb->start_pfn = pfn;
434  if (pages >= BM_BITS_PER_BLOCK) {
435  pfn += BM_BITS_PER_BLOCK;
436  pages -= BM_BITS_PER_BLOCK;
437  } else {
438  /* This is executed only once in the loop */
439  pfn += pages;
440  }
441  bb->end_pfn = pfn;
442  }
443  }
444 
445  bm->p_list = ca.chain;
446  memory_bm_position_reset(bm);
447  Exit:
448  free_mem_extents(&mem_extents);
449  return error;
450 
451  Error:
452  bm->p_list = ca.chain;
453  memory_bm_free(bm, PG_UNSAFE_CLEAR);
454  goto Exit;
455 }
456 
460 static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free)
461 {
462  struct bm_block *bb;
463 
464  list_for_each_entry(bb, &bm->blocks, hook)
465  if (bb->data)
466  free_image_page(bb->data, clear_nosave_free);
467 
468  free_list_of_pages(bm->p_list, clear_nosave_free);
469 
470  INIT_LIST_HEAD(&bm->blocks);
471 }
472 
478 static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn,
479  void **addr, unsigned int *bit_nr)
480 {
481  struct bm_block *bb;
482 
483  /*
484  * Check if the pfn corresponds to the current bitmap block and find
485  * the block where it fits if this is not the case.
486  */
487  bb = bm->cur.block;
488  if (pfn < bb->start_pfn)
490  if (pfn >= bb->start_pfn)
491  break;
492 
493  if (pfn >= bb->end_pfn)
494  list_for_each_entry_continue(bb, &bm->blocks, hook)
495  if (pfn >= bb->start_pfn && pfn < bb->end_pfn)
496  break;
497 
498  if (&bb->hook == &bm->blocks)
499  return -EFAULT;
500 
501  /* The block has been found */
502  bm->cur.block = bb;
503  pfn -= bb->start_pfn;
504  bm->cur.bit = pfn + 1;
505  *bit_nr = pfn;
506  *addr = bb->data;
507  return 0;
508 }
509 
510 static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
511 {
512  void *addr;
513  unsigned int bit;
514  int error;
515 
516  error = memory_bm_find_bit(bm, pfn, &addr, &bit);
517  BUG_ON(error);
518  set_bit(bit, addr);
519 }
520 
521 static int mem_bm_set_bit_check(struct memory_bitmap *bm, unsigned long pfn)
522 {
523  void *addr;
524  unsigned int bit;
525  int error;
526 
527  error = memory_bm_find_bit(bm, pfn, &addr, &bit);
528  if (!error)
529  set_bit(bit, addr);
530  return error;
531 }
532 
533 static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn)
534 {
535  void *addr;
536  unsigned int bit;
537  int error;
538 
539  error = memory_bm_find_bit(bm, pfn, &addr, &bit);
540  BUG_ON(error);
541  clear_bit(bit, addr);
542 }
543 
544 static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
545 {
546  void *addr;
547  unsigned int bit;
548  int error;
549 
550  error = memory_bm_find_bit(bm, pfn, &addr, &bit);
551  BUG_ON(error);
552  return test_bit(bit, addr);
553 }
554 
555 static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn)
556 {
557  void *addr;
558  unsigned int bit;
559 
560  return !memory_bm_find_bit(bm, pfn, &addr, &bit);
561 }
562 
572 static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
573 {
574  struct bm_block *bb;
575  int bit;
576 
577  bb = bm->cur.block;
578  do {
579  bit = bm->cur.bit;
580  bit = find_next_bit(bb->data, bm_block_bits(bb), bit);
581  if (bit < bm_block_bits(bb))
582  goto Return_pfn;
583 
584  bb = list_entry(bb->hook.next, struct bm_block, hook);
585  bm->cur.block = bb;
586  bm->cur.bit = 0;
587  } while (&bb->hook != &bm->blocks);
588 
589  memory_bm_position_reset(bm);
590  return BM_END_OF_MAP;
591 
592  Return_pfn:
593  bm->cur.bit = bit + 1;
594  return bb->start_pfn + bit;
595 }
596 
603  struct list_head list;
604  unsigned long start_pfn;
605  unsigned long end_pfn;
606 };
607 
608 static LIST_HEAD(nosave_regions);
609 
616 void __init
617 __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn,
618  int use_kmalloc)
619 {
620  struct nosave_region *region;
621 
622  if (start_pfn >= end_pfn)
623  return;
624 
625  if (!list_empty(&nosave_regions)) {
626  /* Try to extend the previous region (they should be sorted) */
627  region = list_entry(nosave_regions.prev,
628  struct nosave_region, list);
629  if (region->end_pfn == start_pfn) {
630  region->end_pfn = end_pfn;
631  goto Report;
632  }
633  }
634  if (use_kmalloc) {
635  /* during init, this shouldn't fail */
636  region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL);
637  BUG_ON(!region);
638  } else
639  /* This allocation cannot fail */
640  region = alloc_bootmem(sizeof(struct nosave_region));
641  region->start_pfn = start_pfn;
642  region->end_pfn = end_pfn;
643  list_add_tail(&region->list, &nosave_regions);
644  Report:
645  printk(KERN_INFO "PM: Registered nosave memory: %016lx - %016lx\n",
646  start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT);
647 }
648 
649 /*
650  * Set bits in this map correspond to the page frames the contents of which
651  * should not be saved during the suspend.
652  */
653 static struct memory_bitmap *forbidden_pages_map;
654 
655 /* Set bits in this map correspond to free page frames. */
656 static struct memory_bitmap *free_pages_map;
657 
658 /*
659  * Each page frame allocated for creating the image is marked by setting the
660  * corresponding bits in forbidden_pages_map and free_pages_map simultaneously
661  */
662 
663 void swsusp_set_page_free(struct page *page)
664 {
665  if (free_pages_map)
666  memory_bm_set_bit(free_pages_map, page_to_pfn(page));
667 }
668 
669 static int swsusp_page_is_free(struct page *page)
670 {
671  return free_pages_map ?
672  memory_bm_test_bit(free_pages_map, page_to_pfn(page)) : 0;
673 }
674 
675 void swsusp_unset_page_free(struct page *page)
676 {
677  if (free_pages_map)
678  memory_bm_clear_bit(free_pages_map, page_to_pfn(page));
679 }
680 
681 static void swsusp_set_page_forbidden(struct page *page)
682 {
683  if (forbidden_pages_map)
684  memory_bm_set_bit(forbidden_pages_map, page_to_pfn(page));
685 }
686 
687 int swsusp_page_is_forbidden(struct page *page)
688 {
689  return forbidden_pages_map ?
690  memory_bm_test_bit(forbidden_pages_map, page_to_pfn(page)) : 0;
691 }
692 
693 static void swsusp_unset_page_forbidden(struct page *page)
694 {
695  if (forbidden_pages_map)
696  memory_bm_clear_bit(forbidden_pages_map, page_to_pfn(page));
697 }
698 
704 static void mark_nosave_pages(struct memory_bitmap *bm)
705 {
706  struct nosave_region *region;
707 
708  if (list_empty(&nosave_regions))
709  return;
710 
711  list_for_each_entry(region, &nosave_regions, list) {
712  unsigned long pfn;
713 
714  pr_debug("PM: Marking nosave pages: [mem %#010llx-%#010llx]\n",
715  (unsigned long long) region->start_pfn << PAGE_SHIFT,
716  ((unsigned long long) region->end_pfn << PAGE_SHIFT)
717  - 1);
718 
719  for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
720  if (pfn_valid(pfn)) {
721  /*
722  * It is safe to ignore the result of
723  * mem_bm_set_bit_check() here, since we won't
724  * touch the PFNs for which the error is
725  * returned anyway.
726  */
727  mem_bm_set_bit_check(bm, pfn);
728  }
729  }
730 }
731 
741 {
742  struct memory_bitmap *bm1, *bm2;
743  int error = 0;
744 
745  BUG_ON(forbidden_pages_map || free_pages_map);
746 
747  bm1 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
748  if (!bm1)
749  return -ENOMEM;
750 
751  error = memory_bm_create(bm1, GFP_KERNEL, PG_ANY);
752  if (error)
753  goto Free_first_object;
754 
755  bm2 = kzalloc(sizeof(struct memory_bitmap), GFP_KERNEL);
756  if (!bm2)
757  goto Free_first_bitmap;
758 
759  error = memory_bm_create(bm2, GFP_KERNEL, PG_ANY);
760  if (error)
761  goto Free_second_object;
762 
763  forbidden_pages_map = bm1;
764  free_pages_map = bm2;
765  mark_nosave_pages(forbidden_pages_map);
766 
767  pr_debug("PM: Basic memory bitmaps created\n");
768 
769  return 0;
770 
771  Free_second_object:
772  kfree(bm2);
773  Free_first_bitmap:
774  memory_bm_free(bm1, PG_UNSAFE_CLEAR);
775  Free_first_object:
776  kfree(bm1);
777  return -ENOMEM;
778 }
779 
788 {
789  struct memory_bitmap *bm1, *bm2;
790 
791  BUG_ON(!(forbidden_pages_map && free_pages_map));
792 
793  bm1 = forbidden_pages_map;
794  bm2 = free_pages_map;
795  forbidden_pages_map = NULL;
796  free_pages_map = NULL;
797  memory_bm_free(bm1, PG_UNSAFE_CLEAR);
798  kfree(bm1);
799  memory_bm_free(bm2, PG_UNSAFE_CLEAR);
800  kfree(bm2);
801 
802  pr_debug("PM: Basic memory bitmaps freed\n");
803 }
804 
811 unsigned int snapshot_additional_pages(struct zone *zone)
812 {
813  unsigned int res;
814 
816  res += DIV_ROUND_UP(res * sizeof(struct bm_block),
818  return 2 * res;
819 }
820 
821 #ifdef CONFIG_HIGHMEM
822 
827 static unsigned int count_free_highmem_pages(void)
828 {
829  struct zone *zone;
830  unsigned int cnt = 0;
831 
833  if (is_highmem(zone))
834  cnt += zone_page_state(zone, NR_FREE_PAGES);
835 
836  return cnt;
837 }
838 
846 static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn)
847 {
848  struct page *page;
849 
850  if (!pfn_valid(pfn))
851  return NULL;
852 
853  page = pfn_to_page(pfn);
854  if (page_zone(page) != zone)
855  return NULL;
856 
857  BUG_ON(!PageHighMem(page));
858 
859  if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page) ||
860  PageReserved(page))
861  return NULL;
862 
863  if (page_is_guard(page))
864  return NULL;
865 
866  return page;
867 }
868 
874 static unsigned int count_highmem_pages(void)
875 {
876  struct zone *zone;
877  unsigned int n = 0;
878 
880  unsigned long pfn, max_zone_pfn;
881 
882  if (!is_highmem(zone))
883  continue;
884 
885  mark_free_pages(zone);
886  max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
887  for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
888  if (saveable_highmem_page(zone, pfn))
889  n++;
890  }
891  return n;
892 }
893 #else
894 static inline void *saveable_highmem_page(struct zone *z, unsigned long p)
895 {
896  return NULL;
897 }
898 #endif /* CONFIG_HIGHMEM */
899 
908 static struct page *saveable_page(struct zone *zone, unsigned long pfn)
909 {
910  struct page *page;
911 
912  if (!pfn_valid(pfn))
913  return NULL;
914 
915  page = pfn_to_page(pfn);
916  if (page_zone(page) != zone)
917  return NULL;
918 
919  BUG_ON(PageHighMem(page));
920 
921  if (swsusp_page_is_forbidden(page) || swsusp_page_is_free(page))
922  return NULL;
923 
924  if (PageReserved(page)
925  && (!kernel_page_present(page) || pfn_is_nosave(pfn)))
926  return NULL;
927 
928  if (page_is_guard(page))
929  return NULL;
930 
931  return page;
932 }
933 
939 static unsigned int count_data_pages(void)
940 {
941  struct zone *zone;
942  unsigned long pfn, max_zone_pfn;
943  unsigned int n = 0;
944 
946  if (is_highmem(zone))
947  continue;
948 
949  mark_free_pages(zone);
950  max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
951  for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
952  if (saveable_page(zone, pfn))
953  n++;
954  }
955  return n;
956 }
957 
958 /* This is needed, because copy_page and memcpy are not usable for copying
959  * task structs.
960  */
961 static inline void do_copy_page(long *dst, long *src)
962 {
963  int n;
964 
965  for (n = PAGE_SIZE / sizeof(long); n; n--)
966  *dst++ = *src++;
967 }
968 
969 
976 static void safe_copy_page(void *dst, struct page *s_page)
977 {
978  if (kernel_page_present(s_page)) {
979  do_copy_page(dst, page_address(s_page));
980  } else {
981  kernel_map_pages(s_page, 1, 1);
982  do_copy_page(dst, page_address(s_page));
983  kernel_map_pages(s_page, 1, 0);
984  }
985 }
986 
987 
988 #ifdef CONFIG_HIGHMEM
989 static inline struct page *
990 page_is_saveable(struct zone *zone, unsigned long pfn)
991 {
992  return is_highmem(zone) ?
993  saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
994 }
995 
996 static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
997 {
998  struct page *s_page, *d_page;
999  void *src, *dst;
1000 
1001  s_page = pfn_to_page(src_pfn);
1002  d_page = pfn_to_page(dst_pfn);
1003  if (PageHighMem(s_page)) {
1004  src = kmap_atomic(s_page);
1005  dst = kmap_atomic(d_page);
1006  do_copy_page(dst, src);
1007  kunmap_atomic(dst);
1008  kunmap_atomic(src);
1009  } else {
1010  if (PageHighMem(d_page)) {
1011  /* Page pointed to by src may contain some kernel
1012  * data modified by kmap_atomic()
1013  */
1014  safe_copy_page(buffer, s_page);
1015  dst = kmap_atomic(d_page);
1016  copy_page(dst, buffer);
1017  kunmap_atomic(dst);
1018  } else {
1019  safe_copy_page(page_address(d_page), s_page);
1020  }
1021  }
1022 }
1023 #else
1024 #define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
1025 
1026 static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
1027 {
1028  safe_copy_page(page_address(pfn_to_page(dst_pfn)),
1029  pfn_to_page(src_pfn));
1030 }
1031 #endif /* CONFIG_HIGHMEM */
1032 
1033 static void
1034 copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
1035 {
1036  struct zone *zone;
1037  unsigned long pfn;
1038 
1039  for_each_populated_zone(zone) {
1040  unsigned long max_zone_pfn;
1041 
1042  mark_free_pages(zone);
1043  max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1044  for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1045  if (page_is_saveable(zone, pfn))
1046  memory_bm_set_bit(orig_bm, pfn);
1047  }
1048  memory_bm_position_reset(orig_bm);
1049  memory_bm_position_reset(copy_bm);
1050  for(;;) {
1051  pfn = memory_bm_next_pfn(orig_bm);
1052  if (unlikely(pfn == BM_END_OF_MAP))
1053  break;
1054  copy_data_page(memory_bm_next_pfn(copy_bm), pfn);
1055  }
1056 }
1057 
1058 /* Total number of image pages */
1059 static unsigned int nr_copy_pages;
1060 /* Number of pages needed for saving the original pfns of the image pages */
1061 static unsigned int nr_meta_pages;
1062 /*
1063  * Numbers of normal and highmem page frames allocated for hibernation image
1064  * before suspending devices.
1065  */
1067 /*
1068  * Memory bitmap used for marking saveable pages (during hibernation) or
1069  * hibernation image pages (during restore)
1070  */
1071 static struct memory_bitmap orig_bm;
1072 /*
1073  * Memory bitmap used during hibernation for marking allocated page frames that
1074  * will contain copies of saveable pages. During restore it is initially used
1075  * for marking hibernation image pages, but then the set bits from it are
1076  * duplicated in @orig_bm and it is released. On highmem systems it is next
1077  * used for marking "safe" highmem pages, but it has to be reinitialized for
1078  * this purpose.
1079  */
1080 static struct memory_bitmap copy_bm;
1081 
1089 void swsusp_free(void)
1090 {
1091  struct zone *zone;
1092  unsigned long pfn, max_zone_pfn;
1093 
1094  for_each_populated_zone(zone) {
1095  max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1096  for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1097  if (pfn_valid(pfn)) {
1098  struct page *page = pfn_to_page(pfn);
1099 
1100  if (swsusp_page_is_forbidden(page) &&
1101  swsusp_page_is_free(page)) {
1102  swsusp_unset_page_forbidden(page);
1103  swsusp_unset_page_free(page);
1104  __free_page(page);
1105  }
1106  }
1107  }
1108  nr_copy_pages = 0;
1109  nr_meta_pages = 0;
1110  restore_pblist = NULL;
1111  buffer = NULL;
1112  alloc_normal = 0;
1113  alloc_highmem = 0;
1114 }
1115 
1116 /* Helper functions used for the shrinking of memory. */
1117 
1118 #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN)
1119 
1127 static unsigned long preallocate_image_pages(unsigned long nr_pages, gfp_t mask)
1128 {
1129  unsigned long nr_alloc = 0;
1130 
1131  while (nr_pages > 0) {
1132  struct page *page;
1133 
1134  page = alloc_image_page(mask);
1135  if (!page)
1136  break;
1137  memory_bm_set_bit(&copy_bm, page_to_pfn(page));
1138  if (PageHighMem(page))
1139  alloc_highmem++;
1140  else
1141  alloc_normal++;
1142  nr_pages--;
1143  nr_alloc++;
1144  }
1145 
1146  return nr_alloc;
1147 }
1148 
1149 static unsigned long preallocate_image_memory(unsigned long nr_pages,
1150  unsigned long avail_normal)
1151 {
1152  unsigned long alloc;
1153 
1154  if (avail_normal <= alloc_normal)
1155  return 0;
1156 
1157  alloc = avail_normal - alloc_normal;
1158  if (nr_pages < alloc)
1159  alloc = nr_pages;
1160 
1161  return preallocate_image_pages(alloc, GFP_IMAGE);
1162 }
1163 
1164 #ifdef CONFIG_HIGHMEM
1165 static unsigned long preallocate_image_highmem(unsigned long nr_pages)
1166 {
1167  return preallocate_image_pages(nr_pages, GFP_IMAGE | __GFP_HIGHMEM);
1168 }
1169 
1173 static unsigned long __fraction(u64 x, u64 multiplier, u64 base)
1174 {
1175  x *= multiplier;
1176  do_div(x, base);
1177  return (unsigned long)x;
1178 }
1179 
1180 static unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1181  unsigned long highmem,
1182  unsigned long total)
1183 {
1184  unsigned long alloc = __fraction(nr_pages, highmem, total);
1185 
1186  return preallocate_image_pages(alloc, GFP_IMAGE | __GFP_HIGHMEM);
1187 }
1188 #else /* CONFIG_HIGHMEM */
1189 static inline unsigned long preallocate_image_highmem(unsigned long nr_pages)
1190 {
1191  return 0;
1192 }
1193 
1194 static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages,
1195  unsigned long highmem,
1196  unsigned long total)
1197 {
1198  return 0;
1199 }
1200 #endif /* CONFIG_HIGHMEM */
1201 
1205 static void free_unnecessary_pages(void)
1206 {
1207  unsigned long save, to_free_normal, to_free_highmem;
1208 
1209  save = count_data_pages();
1210  if (alloc_normal >= save) {
1211  to_free_normal = alloc_normal - save;
1212  save = 0;
1213  } else {
1214  to_free_normal = 0;
1215  save -= alloc_normal;
1216  }
1217  save += count_highmem_pages();
1218  if (alloc_highmem >= save) {
1219  to_free_highmem = alloc_highmem - save;
1220  } else {
1221  to_free_highmem = 0;
1222  save -= alloc_highmem;
1223  if (to_free_normal > save)
1224  to_free_normal -= save;
1225  else
1226  to_free_normal = 0;
1227  }
1228 
1229  memory_bm_position_reset(&copy_bm);
1230 
1231  while (to_free_normal > 0 || to_free_highmem > 0) {
1232  unsigned long pfn = memory_bm_next_pfn(&copy_bm);
1233  struct page *page = pfn_to_page(pfn);
1234 
1235  if (PageHighMem(page)) {
1236  if (!to_free_highmem)
1237  continue;
1238  to_free_highmem--;
1239  alloc_highmem--;
1240  } else {
1241  if (!to_free_normal)
1242  continue;
1243  to_free_normal--;
1244  alloc_normal--;
1245  }
1246  memory_bm_clear_bit(&copy_bm, pfn);
1247  swsusp_unset_page_forbidden(page);
1248  swsusp_unset_page_free(page);
1249  __free_page(page);
1250  }
1251 }
1252 
1269 static unsigned long minimum_image_size(unsigned long saveable)
1270 {
1271  unsigned long size;
1272 
1273  size = global_page_state(NR_SLAB_RECLAIMABLE)
1274  + global_page_state(NR_ACTIVE_ANON)
1275  + global_page_state(NR_INACTIVE_ANON)
1276  + global_page_state(NR_ACTIVE_FILE)
1277  + global_page_state(NR_INACTIVE_FILE)
1278  - global_page_state(NR_FILE_MAPPED);
1279 
1280  return saveable <= size ? 0 : saveable - size;
1281 }
1282 
1306 {
1307  struct zone *zone;
1308  unsigned long saveable, size, max_size, count, highmem, pages = 0;
1309  unsigned long alloc, save_highmem, pages_highmem, avail_normal;
1310  struct timeval start, stop;
1311  int error;
1312 
1313  printk(KERN_INFO "PM: Preallocating image memory... ");
1314  do_gettimeofday(&start);
1315 
1316  error = memory_bm_create(&orig_bm, GFP_IMAGE, PG_ANY);
1317  if (error)
1318  goto err_out;
1319 
1320  error = memory_bm_create(&copy_bm, GFP_IMAGE, PG_ANY);
1321  if (error)
1322  goto err_out;
1323 
1324  alloc_normal = 0;
1325  alloc_highmem = 0;
1326 
1327  /* Count the number of saveable data pages. */
1328  save_highmem = count_highmem_pages();
1329  saveable = count_data_pages();
1330 
1331  /*
1332  * Compute the total number of page frames we can use (count) and the
1333  * number of pages needed for image metadata (size).
1334  */
1335  count = saveable;
1336  saveable += save_highmem;
1337  highmem = save_highmem;
1338  size = 0;
1339  for_each_populated_zone(zone) {
1340  size += snapshot_additional_pages(zone);
1341  if (is_highmem(zone))
1342  highmem += zone_page_state(zone, NR_FREE_PAGES);
1343  else
1344  count += zone_page_state(zone, NR_FREE_PAGES);
1345  }
1346  avail_normal = count;
1347  count += highmem;
1348  count -= totalreserve_pages;
1349 
1350  /* Add number of pages required for page keys (s390 only). */
1351  size += page_key_additional_pages(saveable);
1352 
1353  /* Compute the maximum number of saveable pages to leave in memory. */
1354  max_size = (count - (size + PAGES_FOR_IO)) / 2
1356  /* Compute the desired number of image pages specified by image_size. */
1358  if (size > max_size)
1359  size = max_size;
1360  /*
1361  * If the desired number of image pages is at least as large as the
1362  * current number of saveable pages in memory, allocate page frames for
1363  * the image and we're done.
1364  */
1365  if (size >= saveable) {
1366  pages = preallocate_image_highmem(save_highmem);
1367  pages += preallocate_image_memory(saveable - pages, avail_normal);
1368  goto out;
1369  }
1370 
1371  /* Estimate the minimum size of the image. */
1372  pages = minimum_image_size(saveable);
1373  /*
1374  * To avoid excessive pressure on the normal zone, leave room in it to
1375  * accommodate an image of the minimum size (unless it's already too
1376  * small, in which case don't preallocate pages from it at all).
1377  */
1378  if (avail_normal > pages)
1379  avail_normal -= pages;
1380  else
1381  avail_normal = 0;
1382  if (size < pages)
1383  size = min_t(unsigned long, pages, max_size);
1384 
1385  /*
1386  * Let the memory management subsystem know that we're going to need a
1387  * large number of page frames to allocate and make it free some memory.
1388  * NOTE: If this is not done, performance will be hurt badly in some
1389  * test cases.
1390  */
1391  shrink_all_memory(saveable - size);
1392 
1393  /*
1394  * The number of saveable pages in memory was too high, so apply some
1395  * pressure to decrease it. First, make room for the largest possible
1396  * image and fail if that doesn't work. Next, try to decrease the size
1397  * of the image as much as indicated by 'size' using allocations from
1398  * highmem and non-highmem zones separately.
1399  */
1400  pages_highmem = preallocate_image_highmem(highmem / 2);
1401  alloc = (count - max_size) - pages_highmem;
1402  pages = preallocate_image_memory(alloc, avail_normal);
1403  if (pages < alloc) {
1404  /* We have exhausted non-highmem pages, try highmem. */
1405  alloc -= pages;
1406  pages += pages_highmem;
1407  pages_highmem = preallocate_image_highmem(alloc);
1408  if (pages_highmem < alloc)
1409  goto err_out;
1410  pages += pages_highmem;
1411  /*
1412  * size is the desired number of saveable pages to leave in
1413  * memory, so try to preallocate (all memory - size) pages.
1414  */
1415  alloc = (count - pages) - size;
1416  pages += preallocate_image_highmem(alloc);
1417  } else {
1418  /*
1419  * There are approximately max_size saveable pages at this point
1420  * and we want to reduce this number down to size.
1421  */
1422  alloc = max_size - size;
1423  size = preallocate_highmem_fraction(alloc, highmem, count);
1424  pages_highmem += size;
1425  alloc -= size;
1426  size = preallocate_image_memory(alloc, avail_normal);
1427  pages_highmem += preallocate_image_highmem(alloc - size);
1428  pages += pages_highmem + size;
1429  }
1430 
1431  /*
1432  * We only need as many page frames for the image as there are saveable
1433  * pages in memory, but we have allocated more. Release the excessive
1434  * ones now.
1435  */
1436  free_unnecessary_pages();
1437 
1438  out:
1439  do_gettimeofday(&stop);
1440  printk(KERN_CONT "done (allocated %lu pages)\n", pages);
1441  swsusp_show_speed(&start, &stop, pages, "Allocated");
1442 
1443  return 0;
1444 
1445  err_out:
1446  printk(KERN_CONT "\n");
1447  swsusp_free();
1448  return -ENOMEM;
1449 }
1450 
1451 #ifdef CONFIG_HIGHMEM
1452 
1457 static unsigned int count_pages_for_highmem(unsigned int nr_highmem)
1458 {
1459  unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem;
1460 
1461  if (free_highmem >= nr_highmem)
1462  nr_highmem = 0;
1463  else
1464  nr_highmem -= free_highmem;
1465 
1466  return nr_highmem;
1467 }
1468 #else
1469 static unsigned int
1470 count_pages_for_highmem(unsigned int nr_highmem) { return 0; }
1471 #endif /* CONFIG_HIGHMEM */
1472 
1478 static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem)
1479 {
1480  struct zone *zone;
1481  unsigned int free = alloc_normal;
1482 
1484  if (!is_highmem(zone))
1485  free += zone_page_state(zone, NR_FREE_PAGES);
1486 
1487  nr_pages += count_pages_for_highmem(nr_highmem);
1488  pr_debug("PM: Normal pages needed: %u + %u, available pages: %u\n",
1489  nr_pages, PAGES_FOR_IO, free);
1490 
1491  return free > nr_pages + PAGES_FOR_IO;
1492 }
1493 
1494 #ifdef CONFIG_HIGHMEM
1495 
1500 static inline int get_highmem_buffer(int safe_needed)
1501 {
1502  buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed);
1503  return buffer ? 0 : -ENOMEM;
1504 }
1505 
1512 static inline unsigned int
1513 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem)
1514 {
1515  unsigned int to_alloc = count_free_highmem_pages();
1516 
1517  if (to_alloc > nr_highmem)
1518  to_alloc = nr_highmem;
1519 
1520  nr_highmem -= to_alloc;
1521  while (to_alloc-- > 0) {
1522  struct page *page;
1523 
1524  page = alloc_image_page(__GFP_HIGHMEM);
1525  memory_bm_set_bit(bm, page_to_pfn(page));
1526  }
1527  return nr_highmem;
1528 }
1529 #else
1530 static inline int get_highmem_buffer(int safe_needed) { return 0; }
1531 
1532 static inline unsigned int
1533 alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; }
1534 #endif /* CONFIG_HIGHMEM */
1535 
1548 static int
1549 swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm,
1550  unsigned int nr_pages, unsigned int nr_highmem)
1551 {
1552  if (nr_highmem > 0) {
1553  if (get_highmem_buffer(PG_ANY))
1554  goto err_out;
1555  if (nr_highmem > alloc_highmem) {
1556  nr_highmem -= alloc_highmem;
1557  nr_pages += alloc_highmem_pages(copy_bm, nr_highmem);
1558  }
1559  }
1560  if (nr_pages > alloc_normal) {
1561  nr_pages -= alloc_normal;
1562  while (nr_pages-- > 0) {
1563  struct page *page;
1564 
1565  page = alloc_image_page(GFP_ATOMIC | __GFP_COLD);
1566  if (!page)
1567  goto err_out;
1568  memory_bm_set_bit(copy_bm, page_to_pfn(page));
1569  }
1570  }
1571 
1572  return 0;
1573 
1574  err_out:
1575  swsusp_free();
1576  return -ENOMEM;
1577 }
1578 
1580 {
1581  unsigned int nr_pages, nr_highmem;
1582 
1583  printk(KERN_INFO "PM: Creating hibernation image:\n");
1584 
1586  nr_pages = count_data_pages();
1587  nr_highmem = count_highmem_pages();
1588  printk(KERN_INFO "PM: Need to copy %u pages\n", nr_pages + nr_highmem);
1589 
1590  if (!enough_free_mem(nr_pages, nr_highmem)) {
1591  printk(KERN_ERR "PM: Not enough free memory\n");
1592  return -ENOMEM;
1593  }
1594 
1595  if (swsusp_alloc(&orig_bm, &copy_bm, nr_pages, nr_highmem)) {
1596  printk(KERN_ERR "PM: Memory allocation failed\n");
1597  return -ENOMEM;
1598  }
1599 
1600  /* During allocating of suspend pagedir, new cold pages may appear.
1601  * Kill them.
1602  */
1604  copy_data_pages(&copy_bm, &orig_bm);
1605 
1606  /*
1607  * End of critical section. From now on, we can write to memory,
1608  * but we should not touch disk. This specially means we must _not_
1609  * touch swap space! Except we must write out our image of course.
1610  */
1611 
1612  nr_pages += nr_highmem;
1613  nr_copy_pages = nr_pages;
1614  nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
1615 
1616  printk(KERN_INFO "PM: Hibernation image created (%d pages copied)\n",
1617  nr_pages);
1618 
1619  return 0;
1620 }
1621 
1622 #ifndef CONFIG_ARCH_HIBERNATION_HEADER
1623 static int init_header_complete(struct swsusp_info *info)
1624 {
1625  memcpy(&info->uts, init_utsname(), sizeof(struct new_utsname));
1626  info->version_code = LINUX_VERSION_CODE;
1627  return 0;
1628 }
1629 
1630 static char *check_image_kernel(struct swsusp_info *info)
1631 {
1632  if (info->version_code != LINUX_VERSION_CODE)
1633  return "kernel version";
1634  if (strcmp(info->uts.sysname,init_utsname()->sysname))
1635  return "system type";
1636  if (strcmp(info->uts.release,init_utsname()->release))
1637  return "kernel release";
1638  if (strcmp(info->uts.version,init_utsname()->version))
1639  return "version";
1640  if (strcmp(info->uts.machine,init_utsname()->machine))
1641  return "machine";
1642  return NULL;
1643 }
1644 #endif /* CONFIG_ARCH_HIBERNATION_HEADER */
1645 
1646 unsigned long snapshot_get_image_size(void)
1647 {
1648  return nr_copy_pages + nr_meta_pages + 1;
1649 }
1650 
1651 static int init_header(struct swsusp_info *info)
1652 {
1653  memset(info, 0, sizeof(struct swsusp_info));
1654  info->num_physpages = num_physpages;
1655  info->image_pages = nr_copy_pages;
1656  info->pages = snapshot_get_image_size();
1657  info->size = info->pages;
1658  info->size <<= PAGE_SHIFT;
1659  return init_header_complete(info);
1660 }
1661 
1667 static inline void
1668 pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
1669 {
1670  int j;
1671 
1672  for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1673  buf[j] = memory_bm_next_pfn(bm);
1674  if (unlikely(buf[j] == BM_END_OF_MAP))
1675  break;
1676  /* Save page key for data page (s390 only). */
1677  page_key_read(buf + j);
1678  }
1679 }
1680 
1699 {
1700  if (handle->cur > nr_meta_pages + nr_copy_pages)
1701  return 0;
1702 
1703  if (!buffer) {
1704  /* This makes the buffer be freed by swsusp_free() */
1705  buffer = get_image_page(GFP_ATOMIC, PG_ANY);
1706  if (!buffer)
1707  return -ENOMEM;
1708  }
1709  if (!handle->cur) {
1710  int error;
1711 
1712  error = init_header((struct swsusp_info *)buffer);
1713  if (error)
1714  return error;
1715  handle->buffer = buffer;
1716  memory_bm_position_reset(&orig_bm);
1717  memory_bm_position_reset(&copy_bm);
1718  } else if (handle->cur <= nr_meta_pages) {
1719  clear_page(buffer);
1720  pack_pfns(buffer, &orig_bm);
1721  } else {
1722  struct page *page;
1723 
1724  page = pfn_to_page(memory_bm_next_pfn(&copy_bm));
1725  if (PageHighMem(page)) {
1726  /* Highmem pages are copied to the buffer,
1727  * because we can't return with a kmapped
1728  * highmem page (we may not be called again).
1729  */
1730  void *kaddr;
1731 
1732  kaddr = kmap_atomic(page);
1733  copy_page(buffer, kaddr);
1734  kunmap_atomic(kaddr);
1735  handle->buffer = buffer;
1736  } else {
1737  handle->buffer = page_address(page);
1738  }
1739  }
1740  handle->cur++;
1741  return PAGE_SIZE;
1742 }
1743 
1750 static int mark_unsafe_pages(struct memory_bitmap *bm)
1751 {
1752  struct zone *zone;
1753  unsigned long pfn, max_zone_pfn;
1754 
1755  /* Clear page flags */
1756  for_each_populated_zone(zone) {
1757  max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
1758  for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
1759  if (pfn_valid(pfn))
1761  }
1762 
1763  /* Mark pages that correspond to the "original" pfns as "unsafe" */
1764  memory_bm_position_reset(bm);
1765  do {
1766  pfn = memory_bm_next_pfn(bm);
1767  if (likely(pfn != BM_END_OF_MAP)) {
1768  if (likely(pfn_valid(pfn)))
1770  else
1771  return -EFAULT;
1772  }
1773  } while (pfn != BM_END_OF_MAP);
1774 
1775  allocated_unsafe_pages = 0;
1776 
1777  return 0;
1778 }
1779 
1780 static void
1781 duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src)
1782 {
1783  unsigned long pfn;
1784 
1785  memory_bm_position_reset(src);
1786  pfn = memory_bm_next_pfn(src);
1787  while (pfn != BM_END_OF_MAP) {
1788  memory_bm_set_bit(dst, pfn);
1789  pfn = memory_bm_next_pfn(src);
1790  }
1791 }
1792 
1793 static int check_header(struct swsusp_info *info)
1794 {
1795  char *reason;
1796 
1797  reason = check_image_kernel(info);
1798  if (!reason && info->num_physpages != num_physpages)
1799  reason = "memory size";
1800  if (reason) {
1801  printk(KERN_ERR "PM: Image mismatch: %s\n", reason);
1802  return -EPERM;
1803  }
1804  return 0;
1805 }
1806 
1811 static int
1812 load_header(struct swsusp_info *info)
1813 {
1814  int error;
1815 
1816  restore_pblist = NULL;
1817  error = check_header(info);
1818  if (!error) {
1819  nr_copy_pages = info->image_pages;
1820  nr_meta_pages = info->pages - info->image_pages - 1;
1821  }
1822  return error;
1823 }
1824 
1829 static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm)
1830 {
1831  int j;
1832 
1833  for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
1834  if (unlikely(buf[j] == BM_END_OF_MAP))
1835  break;
1836 
1837  /* Extract and buffer page key for data page (s390 only). */
1838  page_key_memorize(buf + j);
1839 
1840  if (memory_bm_pfn_present(bm, buf[j]))
1841  memory_bm_set_bit(bm, buf[j]);
1842  else
1843  return -EFAULT;
1844  }
1845 
1846  return 0;
1847 }
1848 
1849 /* List of "safe" pages that may be used to store data loaded from the suspend
1850  * image
1851  */
1852 static struct linked_page *safe_pages_list;
1853 
1854 #ifdef CONFIG_HIGHMEM
1855 /* struct highmem_pbe is used for creating the list of highmem pages that
1856  * should be restored atomically during the resume from disk, because the page
1857  * frames they have occupied before the suspend are in use.
1858  */
1859 struct highmem_pbe {
1860  struct page *copy_page; /* data is here now */
1861  struct page *orig_page; /* data was here before the suspend */
1862  struct highmem_pbe *next;
1863 };
1864 
1865 /* List of highmem PBEs needed for restoring the highmem pages that were
1866  * allocated before the suspend and included in the suspend image, but have
1867  * also been allocated by the "resume" kernel, so their contents cannot be
1868  * written directly to their "original" page frames.
1869  */
1870 static struct highmem_pbe *highmem_pblist;
1871 
1878 static unsigned int count_highmem_image_pages(struct memory_bitmap *bm)
1879 {
1880  unsigned long pfn;
1881  unsigned int cnt = 0;
1882 
1883  memory_bm_position_reset(bm);
1884  pfn = memory_bm_next_pfn(bm);
1885  while (pfn != BM_END_OF_MAP) {
1886  if (PageHighMem(pfn_to_page(pfn)))
1887  cnt++;
1888 
1889  pfn = memory_bm_next_pfn(bm);
1890  }
1891  return cnt;
1892 }
1893 
1906 static unsigned int safe_highmem_pages;
1907 
1908 static struct memory_bitmap *safe_highmem_bm;
1909 
1910 static int
1911 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
1912 {
1913  unsigned int to_alloc;
1914 
1915  if (memory_bm_create(bm, GFP_ATOMIC, PG_SAFE))
1916  return -ENOMEM;
1917 
1918  if (get_highmem_buffer(PG_SAFE))
1919  return -ENOMEM;
1920 
1921  to_alloc = count_free_highmem_pages();
1922  if (to_alloc > *nr_highmem_p)
1923  to_alloc = *nr_highmem_p;
1924  else
1925  *nr_highmem_p = to_alloc;
1926 
1927  safe_highmem_pages = 0;
1928  while (to_alloc-- > 0) {
1929  struct page *page;
1930 
1931  page = alloc_page(__GFP_HIGHMEM);
1932  if (!swsusp_page_is_free(page)) {
1933  /* The page is "safe", set its bit the bitmap */
1934  memory_bm_set_bit(bm, page_to_pfn(page));
1935  safe_highmem_pages++;
1936  }
1937  /* Mark the page as allocated */
1938  swsusp_set_page_forbidden(page);
1939  swsusp_set_page_free(page);
1940  }
1941  memory_bm_position_reset(bm);
1942  safe_highmem_bm = bm;
1943  return 0;
1944 }
1945 
1963 static struct page *last_highmem_page;
1964 
1965 static void *
1966 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
1967 {
1968  struct highmem_pbe *pbe;
1969  void *kaddr;
1970 
1971  if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) {
1972  /* We have allocated the "original" page frame and we can
1973  * use it directly to store the loaded page.
1974  */
1975  last_highmem_page = page;
1976  return buffer;
1977  }
1978  /* The "original" page frame has not been allocated and we have to
1979  * use a "safe" page frame to store the loaded page.
1980  */
1981  pbe = chain_alloc(ca, sizeof(struct highmem_pbe));
1982  if (!pbe) {
1983  swsusp_free();
1984  return ERR_PTR(-ENOMEM);
1985  }
1986  pbe->orig_page = page;
1987  if (safe_highmem_pages > 0) {
1988  struct page *tmp;
1989 
1990  /* Copy of the page will be stored in high memory */
1991  kaddr = buffer;
1992  tmp = pfn_to_page(memory_bm_next_pfn(safe_highmem_bm));
1993  safe_highmem_pages--;
1994  last_highmem_page = tmp;
1995  pbe->copy_page = tmp;
1996  } else {
1997  /* Copy of the page will be stored in normal memory */
1998  kaddr = safe_pages_list;
1999  safe_pages_list = safe_pages_list->next;
2000  pbe->copy_page = virt_to_page(kaddr);
2001  }
2002  pbe->next = highmem_pblist;
2003  highmem_pblist = pbe;
2004  return kaddr;
2005 }
2006 
2013 static void copy_last_highmem_page(void)
2014 {
2015  if (last_highmem_page) {
2016  void *dst;
2017 
2018  dst = kmap_atomic(last_highmem_page);
2019  copy_page(dst, buffer);
2020  kunmap_atomic(dst);
2021  last_highmem_page = NULL;
2022  }
2023 }
2024 
2025 static inline int last_highmem_page_copied(void)
2026 {
2027  return !last_highmem_page;
2028 }
2029 
2030 static inline void free_highmem_data(void)
2031 {
2032  if (safe_highmem_bm)
2033  memory_bm_free(safe_highmem_bm, PG_UNSAFE_CLEAR);
2034 
2035  if (buffer)
2036  free_image_page(buffer, PG_UNSAFE_CLEAR);
2037 }
2038 #else
2039 static inline int get_safe_write_buffer(void) { return 0; }
2040 
2041 static unsigned int
2042 count_highmem_image_pages(struct memory_bitmap *bm) { return 0; }
2043 
2044 static inline int
2045 prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p)
2046 {
2047  return 0;
2048 }
2049 
2050 static inline void *
2051 get_highmem_page_buffer(struct page *page, struct chain_allocator *ca)
2052 {
2053  return ERR_PTR(-EINVAL);
2054 }
2055 
2056 static inline void copy_last_highmem_page(void) {}
2057 static inline int last_highmem_page_copied(void) { return 1; }
2058 static inline void free_highmem_data(void) {}
2059 #endif /* CONFIG_HIGHMEM */
2060 
2075 #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe))
2076 
2077 static int
2078 prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
2079 {
2080  unsigned int nr_pages, nr_highmem;
2081  struct linked_page *sp_list, *lp;
2082  int error;
2083 
2084  /* If there is no highmem, the buffer will not be necessary */
2085  free_image_page(buffer, PG_UNSAFE_CLEAR);
2086  buffer = NULL;
2087 
2088  nr_highmem = count_highmem_image_pages(bm);
2089  error = mark_unsafe_pages(bm);
2090  if (error)
2091  goto Free;
2092 
2093  error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE);
2094  if (error)
2095  goto Free;
2096 
2097  duplicate_memory_bitmap(new_bm, bm);
2098  memory_bm_free(bm, PG_UNSAFE_KEEP);
2099  if (nr_highmem > 0) {
2100  error = prepare_highmem_image(bm, &nr_highmem);
2101  if (error)
2102  goto Free;
2103  }
2104  /* Reserve some safe pages for potential later use.
2105  *
2106  * NOTE: This way we make sure there will be enough safe pages for the
2107  * chain_alloc() in get_buffer(). It is a bit wasteful, but
2108  * nr_copy_pages cannot be greater than 50% of the memory anyway.
2109  */
2110  sp_list = NULL;
2111  /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */
2112  nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2113  nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
2114  while (nr_pages > 0) {
2115  lp = get_image_page(GFP_ATOMIC, PG_SAFE);
2116  if (!lp) {
2117  error = -ENOMEM;
2118  goto Free;
2119  }
2120  lp->next = sp_list;
2121  sp_list = lp;
2122  nr_pages--;
2123  }
2124  /* Preallocate memory for the image */
2125  safe_pages_list = NULL;
2126  nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages;
2127  while (nr_pages > 0) {
2128  lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
2129  if (!lp) {
2130  error = -ENOMEM;
2131  goto Free;
2132  }
2133  if (!swsusp_page_is_free(virt_to_page(lp))) {
2134  /* The page is "safe", add it to the list */
2135  lp->next = safe_pages_list;
2136  safe_pages_list = lp;
2137  }
2138  /* Mark the page as allocated */
2139  swsusp_set_page_forbidden(virt_to_page(lp));
2141  nr_pages--;
2142  }
2143  /* Free the reserved safe pages so that chain_alloc() can use them */
2144  while (sp_list) {
2145  lp = sp_list->next;
2146  free_image_page(sp_list, PG_UNSAFE_CLEAR);
2147  sp_list = lp;
2148  }
2149  return 0;
2150 
2151  Free:
2152  swsusp_free();
2153  return error;
2154 }
2155 
2161 static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca)
2162 {
2163  struct pbe *pbe;
2164  struct page *page;
2165  unsigned long pfn = memory_bm_next_pfn(bm);
2166 
2167  if (pfn == BM_END_OF_MAP)
2168  return ERR_PTR(-EFAULT);
2169 
2170  page = pfn_to_page(pfn);
2171  if (PageHighMem(page))
2172  return get_highmem_page_buffer(page, ca);
2173 
2174  if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page))
2175  /* We have allocated the "original" page frame and we can
2176  * use it directly to store the loaded page.
2177  */
2178  return page_address(page);
2179 
2180  /* The "original" page frame has not been allocated and we have to
2181  * use a "safe" page frame to store the loaded page.
2182  */
2183  pbe = chain_alloc(ca, sizeof(struct pbe));
2184  if (!pbe) {
2185  swsusp_free();
2186  return ERR_PTR(-ENOMEM);
2187  }
2188  pbe->orig_address = page_address(page);
2189  pbe->address = safe_pages_list;
2190  safe_pages_list = safe_pages_list->next;
2191  pbe->next = restore_pblist;
2192  restore_pblist = pbe;
2193  return pbe->address;
2194 }
2195 
2214 {
2215  static struct chain_allocator ca;
2216  int error = 0;
2217 
2218  /* Check if we have already loaded the entire image */
2219  if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages)
2220  return 0;
2221 
2222  handle->sync_read = 1;
2223 
2224  if (!handle->cur) {
2225  if (!buffer)
2226  /* This makes the buffer be freed by swsusp_free() */
2227  buffer = get_image_page(GFP_ATOMIC, PG_ANY);
2228 
2229  if (!buffer)
2230  return -ENOMEM;
2231 
2232  handle->buffer = buffer;
2233  } else if (handle->cur == 1) {
2234  error = load_header(buffer);
2235  if (error)
2236  return error;
2237 
2238  error = memory_bm_create(&copy_bm, GFP_ATOMIC, PG_ANY);
2239  if (error)
2240  return error;
2241 
2242  /* Allocate buffer for page keys. */
2243  error = page_key_alloc(nr_copy_pages);
2244  if (error)
2245  return error;
2246 
2247  } else if (handle->cur <= nr_meta_pages + 1) {
2248  error = unpack_orig_pfns(buffer, &copy_bm);
2249  if (error)
2250  return error;
2251 
2252  if (handle->cur == nr_meta_pages + 1) {
2253  error = prepare_image(&orig_bm, &copy_bm);
2254  if (error)
2255  return error;
2256 
2257  chain_init(&ca, GFP_ATOMIC, PG_SAFE);
2258  memory_bm_position_reset(&orig_bm);
2259  restore_pblist = NULL;
2260  handle->buffer = get_buffer(&orig_bm, &ca);
2261  handle->sync_read = 0;
2262  if (IS_ERR(handle->buffer))
2263  return PTR_ERR(handle->buffer);
2264  }
2265  } else {
2266  copy_last_highmem_page();
2267  /* Restore page key for data page (s390 only). */
2268  page_key_write(handle->buffer);
2269  handle->buffer = get_buffer(&orig_bm, &ca);
2270  if (IS_ERR(handle->buffer))
2271  return PTR_ERR(handle->buffer);
2272  if (handle->buffer != buffer)
2273  handle->sync_read = 0;
2274  }
2275  handle->cur++;
2276  return PAGE_SIZE;
2277 }
2278 
2288 {
2289  copy_last_highmem_page();
2290  /* Restore page key for data page (s390 only). */
2291  page_key_write(handle->buffer);
2292  page_key_free();
2293  /* Free only if we have loaded the image entirely */
2294  if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) {
2295  memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR);
2296  free_highmem_data();
2297  }
2298 }
2299 
2301 {
2302  return !(!nr_copy_pages || !last_highmem_page_copied() ||
2303  handle->cur <= nr_meta_pages + nr_copy_pages);
2304 }
2305 
2306 #ifdef CONFIG_HIGHMEM
2307 /* Assumes that @buf is ready and points to a "safe" page */
2308 static inline void
2309 swap_two_pages_data(struct page *p1, struct page *p2, void *buf)
2310 {
2311  void *kaddr1, *kaddr2;
2312 
2313  kaddr1 = kmap_atomic(p1);
2314  kaddr2 = kmap_atomic(p2);
2315  copy_page(buf, kaddr1);
2316  copy_page(kaddr1, kaddr2);
2317  copy_page(kaddr2, buf);
2318  kunmap_atomic(kaddr2);
2319  kunmap_atomic(kaddr1);
2320 }
2321 
2332 int restore_highmem(void)
2333 {
2334  struct highmem_pbe *pbe = highmem_pblist;
2335  void *buf;
2336 
2337  if (!pbe)
2338  return 0;
2339 
2340  buf = get_image_page(GFP_ATOMIC, PG_SAFE);
2341  if (!buf)
2342  return -ENOMEM;
2343 
2344  while (pbe) {
2345  swap_two_pages_data(pbe->copy_page, pbe->orig_page, buf);
2346  pbe = pbe->next;
2347  }
2348  free_image_page(buf, PG_UNSAFE_CLEAR);
2349  return 0;
2350 }
2351 #endif /* CONFIG_HIGHMEM */