Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
numa.c
Go to the documentation of this file.
1 /*
2  * linux/arch/alpha/mm/numa.c
3  *
4  * DISCONTIGMEM NUMA alpha support.
5  *
6  * Copyright (C) 2001 Andrea Arcangeli <[email protected]> SuSE
7  */
8 
9 #include <linux/types.h>
10 #include <linux/kernel.h>
11 #include <linux/mm.h>
12 #include <linux/bootmem.h>
13 #include <linux/swap.h>
14 #include <linux/initrd.h>
15 #include <linux/pfn.h>
16 #include <linux/module.h>
17 
18 #include <asm/hwrpb.h>
19 #include <asm/pgalloc.h>
20 
22 EXPORT_SYMBOL(node_data);
23 
24 #undef DEBUG_DISCONTIG
25 #ifdef DEBUG_DISCONTIG
26 #define DBGDCONT(args...) printk(args)
27 #else
28 #define DBGDCONT(args...)
29 #endif
30 
31 #define for_each_mem_cluster(memdesc, _cluster, i) \
32  for ((_cluster) = (memdesc)->cluster, (i) = 0; \
33  (i) < (memdesc)->numclusters; (i)++, (_cluster)++)
34 
35 static void __init show_mem_layout(void)
36 {
37  struct memclust_struct * cluster;
38  struct memdesc_struct * memdesc;
39  int i;
40 
41  /* Find free clusters, and init and free the bootmem accordingly. */
42  memdesc = (struct memdesc_struct *)
43  (hwrpb->mddt_offset + (unsigned long) hwrpb);
44 
45  printk("Raw memory layout:\n");
46  for_each_mem_cluster(memdesc, cluster, i) {
47  printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
48  i, cluster->usage, cluster->start_pfn,
49  cluster->start_pfn + cluster->numpages);
50  }
51 }
52 
53 static void __init
54 setup_memory_node(int nid, void *kernel_end)
55 {
56  extern unsigned long mem_size_limit;
57  struct memclust_struct * cluster;
58  struct memdesc_struct * memdesc;
59  unsigned long start_kernel_pfn, end_kernel_pfn;
60  unsigned long bootmap_size, bootmap_pages, bootmap_start;
61  unsigned long start, end;
62  unsigned long node_pfn_start, node_pfn_end;
63  unsigned long node_min_pfn, node_max_pfn;
64  int i;
65  unsigned long node_datasz = PFN_UP(sizeof(pg_data_t));
66  int show_init = 0;
67 
68  /* Find the bounds of current node */
69  node_pfn_start = (node_mem_start(nid)) >> PAGE_SHIFT;
70  node_pfn_end = node_pfn_start + (node_mem_size(nid) >> PAGE_SHIFT);
71 
72  /* Find free clusters, and init and free the bootmem accordingly. */
73  memdesc = (struct memdesc_struct *)
74  (hwrpb->mddt_offset + (unsigned long) hwrpb);
75 
76  /* find the bounds of this node (node_min_pfn/node_max_pfn) */
77  node_min_pfn = ~0UL;
78  node_max_pfn = 0UL;
79  for_each_mem_cluster(memdesc, cluster, i) {
80  /* Bit 0 is console/PALcode reserved. Bit 1 is
81  non-volatile memory -- we might want to mark
82  this for later. */
83  if (cluster->usage & 3)
84  continue;
85 
86  start = cluster->start_pfn;
87  end = start + cluster->numpages;
88 
89  if (start >= node_pfn_end || end <= node_pfn_start)
90  continue;
91 
92  if (!show_init) {
93  show_init = 1;
94  printk("Initializing bootmem allocator on Node ID %d\n", nid);
95  }
96  printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
97  i, cluster->usage, cluster->start_pfn,
98  cluster->start_pfn + cluster->numpages);
99 
100  if (start < node_pfn_start)
101  start = node_pfn_start;
102  if (end > node_pfn_end)
103  end = node_pfn_end;
104 
105  if (start < node_min_pfn)
106  node_min_pfn = start;
107  if (end > node_max_pfn)
108  node_max_pfn = end;
109  }
110 
111  if (mem_size_limit && node_max_pfn > mem_size_limit) {
112  static int msg_shown = 0;
113  if (!msg_shown) {
114  msg_shown = 1;
115  printk("setup: forcing memory size to %ldK (from %ldK).\n",
116  mem_size_limit << (PAGE_SHIFT - 10),
117  node_max_pfn << (PAGE_SHIFT - 10));
118  }
119  node_max_pfn = mem_size_limit;
120  }
121 
122  if (node_min_pfn >= node_max_pfn)
123  return;
124 
125  /* Update global {min,max}_low_pfn from node information. */
126  if (node_min_pfn < min_low_pfn)
127  min_low_pfn = node_min_pfn;
128  if (node_max_pfn > max_low_pfn)
129  max_pfn = max_low_pfn = node_max_pfn;
130 
131  num_physpages += node_max_pfn - node_min_pfn;
132 
133 #if 0 /* we'll try this one again in a little while */
134  /* Cute trick to make sure our local node data is on local memory */
135  node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT));
136 #endif
137  /* Quasi-mark the pg_data_t as in-use */
138  node_min_pfn += node_datasz;
139  if (node_min_pfn >= node_max_pfn) {
140  printk(" not enough mem to reserve NODE_DATA");
141  return;
142  }
143  NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
144 
145  printk(" Detected node memory: start %8lu, end %8lu\n",
146  node_min_pfn, node_max_pfn);
147 
148  DBGDCONT(" DISCONTIG: node_data[%d] is at 0x%p\n", nid, NODE_DATA(nid));
149  DBGDCONT(" DISCONTIG: NODE_DATA(%d)->bdata is at 0x%p\n", nid, NODE_DATA(nid)->bdata);
150 
151  /* Find the bounds of kernel memory. */
152  start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
153  end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
154  bootmap_start = -1;
155 
156  if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn))
157  panic("kernel loaded out of ram");
158 
159  /* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
160  Note that we round this down, not up - node memory
161  has much larger alignment than 8Mb, so it's safe. */
162  node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
163 
164  /* We need to know how many physically contiguous pages
165  we'll need for the bootmap. */
166  bootmap_pages = bootmem_bootmap_pages(node_max_pfn-node_min_pfn);
167 
168  /* Now find a good region where to allocate the bootmap. */
169  for_each_mem_cluster(memdesc, cluster, i) {
170  if (cluster->usage & 3)
171  continue;
172 
173  start = cluster->start_pfn;
174  end = start + cluster->numpages;
175 
176  if (start >= node_max_pfn || end <= node_min_pfn)
177  continue;
178 
179  if (end > node_max_pfn)
180  end = node_max_pfn;
181  if (start < node_min_pfn)
182  start = node_min_pfn;
183 
184  if (start < start_kernel_pfn) {
185  if (end > end_kernel_pfn
186  && end - end_kernel_pfn >= bootmap_pages) {
187  bootmap_start = end_kernel_pfn;
188  break;
189  } else if (end > start_kernel_pfn)
190  end = start_kernel_pfn;
191  } else if (start < end_kernel_pfn)
192  start = end_kernel_pfn;
193  if (end - start >= bootmap_pages) {
194  bootmap_start = start;
195  break;
196  }
197  }
198 
199  if (bootmap_start == -1)
200  panic("couldn't find a contiguous place for the bootmap");
201 
202  /* Allocate the bootmap and mark the whole MM as reserved. */
203  bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start,
204  node_min_pfn, node_max_pfn);
205  DBGDCONT(" bootmap_start %lu, bootmap_size %lu, bootmap_pages %lu\n",
206  bootmap_start, bootmap_size, bootmap_pages);
207 
208  /* Mark the free regions. */
209  for_each_mem_cluster(memdesc, cluster, i) {
210  if (cluster->usage & 3)
211  continue;
212 
213  start = cluster->start_pfn;
214  end = cluster->start_pfn + cluster->numpages;
215 
216  if (start >= node_max_pfn || end <= node_min_pfn)
217  continue;
218 
219  if (end > node_max_pfn)
220  end = node_max_pfn;
221  if (start < node_min_pfn)
222  start = node_min_pfn;
223 
224  if (start < start_kernel_pfn) {
225  if (end > end_kernel_pfn) {
226  free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start),
227  (PFN_PHYS(start_kernel_pfn)
228  - PFN_PHYS(start)));
229  printk(" freeing pages %ld:%ld\n",
230  start, start_kernel_pfn);
231  start = end_kernel_pfn;
232  } else if (end > start_kernel_pfn)
233  end = start_kernel_pfn;
234  } else if (start < end_kernel_pfn)
235  start = end_kernel_pfn;
236  if (start >= end)
237  continue;
238 
239  free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start));
240  printk(" freeing pages %ld:%ld\n", start, end);
241  }
242 
243  /* Reserve the bootmap memory. */
244  reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start),
245  bootmap_size, BOOTMEM_DEFAULT);
246  printk(" reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size));
247 
248  node_set_online(nid);
249 }
250 
251 void __init
252 setup_memory(void *kernel_end)
253 {
254  int nid;
255 
256  show_mem_layout();
257 
259 
260  min_low_pfn = ~0UL;
261  max_low_pfn = 0UL;
262  for (nid = 0; nid < MAX_NUMNODES; nid++)
263  setup_memory_node(nid, kernel_end);
264 
265 #ifdef CONFIG_BLK_DEV_INITRD
267  if (initrd_start) {
268  extern void *move_initrd(unsigned long);
269 
271  printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
272  (void *) initrd_start, INITRD_SIZE);
273 
274  if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) {
275  if (!move_initrd(PFN_PHYS(max_low_pfn)))
276  printk("initrd extends beyond end of memory "
277  "(0x%08lx > 0x%p)\ndisabling initrd\n",
278  initrd_end,
280  } else {
281  nid = kvaddr_to_nid(initrd_start);
283  virt_to_phys((void *)initrd_start),
284  INITRD_SIZE, BOOTMEM_DEFAULT);
285  }
286  }
287 #endif /* CONFIG_BLK_DEV_INITRD */
288 }
289 
290 void __init paging_init(void)
291 {
292  unsigned int nid;
293  unsigned long zones_size[MAX_NR_ZONES] = {0, };
294  unsigned long dma_local_pfn;
295 
296  /*
297  * The old global MAX_DMA_ADDRESS per-arch API doesn't fit
298  * in the NUMA model, for now we convert it to a pfn and
299  * we interpret this pfn as a local per-node information.
300  * This issue isn't very important since none of these machines
301  * have legacy ISA slots anyways.
302  */
303  dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
304 
305  for_each_online_node(nid) {
306  bootmem_data_t *bdata = &bootmem_node_data[nid];
307  unsigned long start_pfn = bdata->node_min_pfn;
308  unsigned long end_pfn = bdata->node_low_pfn;
309 
310  if (dma_local_pfn >= end_pfn - start_pfn)
311  zones_size[ZONE_DMA] = end_pfn - start_pfn;
312  else {
313  zones_size[ZONE_DMA] = dma_local_pfn;
314  zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn;
315  }
316  node_set_state(nid, N_NORMAL_MEMORY);
317  free_area_init_node(nid, zones_size, start_pfn, NULL);
318  }
319 
320  /* Initialize the kernel's ZERO_PGE. */
321  memset((void *)ZERO_PGE, 0, PAGE_SIZE);
322 }
323 
324 void __init mem_init(void)
325 {
326  unsigned long codesize, reservedpages, datasize, initsize, pfn;
327  extern int page_is_ram(unsigned long) __init;
328  extern char _text, _etext, _data, _edata;
329  extern char __init_begin, __init_end;
330  unsigned long nid, i;
331  high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
332 
333  reservedpages = 0;
334  for_each_online_node(nid) {
335  /*
336  * This will free up the bootmem, ie, slot 0 memory
337  */
338  totalram_pages += free_all_bootmem_node(NODE_DATA(nid));
339 
340  pfn = NODE_DATA(nid)->node_start_pfn;
341  for (i = 0; i < node_spanned_pages(nid); i++, pfn++)
342  if (page_is_ram(pfn) &&
343  PageReserved(nid_page_nr(nid, i)))
344  reservedpages++;
345  }
346 
347  codesize = (unsigned long) &_etext - (unsigned long) &_text;
348  datasize = (unsigned long) &_edata - (unsigned long) &_data;
349  initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
350 
351  printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, "
352  "%luk data, %luk init)\n",
353  nr_free_pages() << (PAGE_SHIFT-10),
354  num_physpages << (PAGE_SHIFT-10),
355  codesize >> 10,
356  reservedpages << (PAGE_SHIFT-10),
357  datasize >> 10,
358  initsize >> 10);
359 #if 0
360  mem_stress();
361 #endif
362 }