Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
sparse-vmemmap.c
Go to the documentation of this file.
1 /*
2  * Virtual Memory Map support
3  *
4  * (C) 2007 sgi. Christoph Lameter.
5  *
6  * Virtual memory maps allow VM primitives pfn_to_page, page_to_pfn,
7  * virt_to_page, page_address() to be implemented as a base offset
8  * calculation without memory access.
9  *
10  * However, virtual mappings need a page table and TLBs. Many Linux
11  * architectures already map their physical space using 1-1 mappings
12  * via TLBs. For those arches the virtual memory map is essentially
13  * for free if we use the same page size as the 1-1 mappings. In that
14  * case the overhead consists of a few additional pages that are
15  * allocated to create a view of memory for vmemmap.
16  *
17  * The architecture is expected to provide a vmemmap_populate() function
18  * to instantiate the mapping.
19  */
20 #include <linux/mm.h>
21 #include <linux/mmzone.h>
22 #include <linux/bootmem.h>
23 #include <linux/highmem.h>
24 #include <linux/slab.h>
25 #include <linux/spinlock.h>
26 #include <linux/vmalloc.h>
27 #include <linux/sched.h>
28 #include <asm/dma.h>
29 #include <asm/pgalloc.h>
30 #include <asm/pgtable.h>
31 
32 /*
33  * Allocate a block of memory to be used to back the virtual memory map
34  * or to back the page tables that are used to create the mapping.
35  * Uses the main allocators if they are available, else bootmem.
36  */
37 
38 static void * __init_refok __earlyonly_bootmem_alloc(int node,
39  unsigned long size,
40  unsigned long align,
41  unsigned long goal)
42 {
43  return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
44 }
45 
46 static void *vmemmap_buf;
47 static void *vmemmap_buf_end;
48 
49 void * __meminit vmemmap_alloc_block(unsigned long size, int node)
50 {
51  /* If the main allocator is up use that, fallback to bootmem. */
52  if (slab_is_available()) {
53  struct page *page;
54 
55  if (node_state(node, N_HIGH_MEMORY))
56  page = alloc_pages_node(node,
58  else
60  get_order(size));
61  if (page)
62  return page_address(page);
63  return NULL;
64  } else
65  return __earlyonly_bootmem_alloc(node, size, size,
67 }
68 
69 /* need to make sure size is all the same during early stage */
70 void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
71 {
72  void *ptr;
73 
74  if (!vmemmap_buf)
75  return vmemmap_alloc_block(size, node);
76 
77  /* take the from buf */
78  ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
79  if (ptr + size > vmemmap_buf_end)
80  return vmemmap_alloc_block(size, node);
81 
82  vmemmap_buf = ptr + size;
83 
84  return ptr;
85 }
86 
88  unsigned long start, unsigned long end)
89 {
90  unsigned long pfn = pte_pfn(*pte);
91  int actual_node = early_pfn_to_nid(pfn);
92 
93  if (node_distance(actual_node, node) > LOCAL_DISTANCE)
94  printk(KERN_WARNING "[%lx-%lx] potential offnode "
95  "page_structs\n", start, end - 1);
96 }
97 
98 pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
99 {
100  pte_t *pte = pte_offset_kernel(pmd, addr);
101  if (pte_none(*pte)) {
102  pte_t entry;
103  void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
104  if (!p)
105  return NULL;
106  entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
107  set_pte_at(&init_mm, addr, pte, entry);
108  }
109  return pte;
110 }
111 
112 pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
113 {
114  pmd_t *pmd = pmd_offset(pud, addr);
115  if (pmd_none(*pmd)) {
116  void *p = vmemmap_alloc_block(PAGE_SIZE, node);
117  if (!p)
118  return NULL;
119  pmd_populate_kernel(&init_mm, pmd, p);
120  }
121  return pmd;
122 }
123 
124 pud_t * __meminit vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node)
125 {
126  pud_t *pud = pud_offset(pgd, addr);
127  if (pud_none(*pud)) {
128  void *p = vmemmap_alloc_block(PAGE_SIZE, node);
129  if (!p)
130  return NULL;
131  pud_populate(&init_mm, pud, p);
132  }
133  return pud;
134 }
135 
136 pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node)
137 {
138  pgd_t *pgd = pgd_offset_k(addr);
139  if (pgd_none(*pgd)) {
140  void *p = vmemmap_alloc_block(PAGE_SIZE, node);
141  if (!p)
142  return NULL;
143  pgd_populate(&init_mm, pgd, p);
144  }
145  return pgd;
146 }
147 
149  unsigned long size, int node)
150 {
151  unsigned long addr = (unsigned long)start_page;
152  unsigned long end = (unsigned long)(start_page + size);
153  pgd_t *pgd;
154  pud_t *pud;
155  pmd_t *pmd;
156  pte_t *pte;
157 
158  for (; addr < end; addr += PAGE_SIZE) {
159  pgd = vmemmap_pgd_populate(addr, node);
160  if (!pgd)
161  return -ENOMEM;
162  pud = vmemmap_pud_populate(pgd, addr, node);
163  if (!pud)
164  return -ENOMEM;
165  pmd = vmemmap_pmd_populate(pud, addr, node);
166  if (!pmd)
167  return -ENOMEM;
168  pte = vmemmap_pte_populate(pmd, addr, node);
169  if (!pte)
170  return -ENOMEM;
171  vmemmap_verify(pte, node, addr, addr + PAGE_SIZE);
172  }
173 
174  return 0;
175 }
176 
177 struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
178 {
179  struct page *map = pfn_to_page(pnum * PAGES_PER_SECTION);
180  int error = vmemmap_populate(map, PAGES_PER_SECTION, nid);
181  if (error)
182  return NULL;
183 
184  return map;
185 }
186 
188  unsigned long pnum_begin,
189  unsigned long pnum_end,
190  unsigned long map_count, int nodeid)
191 {
192  unsigned long pnum;
193  unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
194  void *vmemmap_buf_start;
195 
196  size = ALIGN(size, PMD_SIZE);
197  vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
199 
200  if (vmemmap_buf_start) {
201  vmemmap_buf = vmemmap_buf_start;
202  vmemmap_buf_end = vmemmap_buf_start + size * map_count;
203  }
204 
205  for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
206  struct mem_section *ms;
207 
208  if (!present_section_nr(pnum))
209  continue;
210 
211  map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
212  if (map_map[pnum])
213  continue;
214  ms = __nr_to_section(pnum);
215  printk(KERN_ERR "%s: sparsemem memory map backing failed "
216  "some memory will not be available.\n", __func__);
217  ms->section_mem_map = 0;
218  }
219 
220  if (vmemmap_buf_start) {
221  /* need to free left buf */
222  free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
223  vmemmap_buf = NULL;
224  vmemmap_buf_end = NULL;
225  }
226 }