Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
bootmem.c
Go to the documentation of this file.
1 /*
2  * bootmem - A boot-time physical memory allocator and configurator
3  *
4  * Copyright (C) 1999 Ingo Molnar
5  * 1999 Kanoj Sarcar, SGI
6  * 2008 Johannes Weiner
7  *
8  * Access to this subsystem has to be serialized externally (which is true
9  * for the boot process anyway).
10  */
11 #include <linux/init.h>
12 #include <linux/pfn.h>
13 #include <linux/slab.h>
14 #include <linux/bootmem.h>
15 #include <linux/export.h>
16 #include <linux/kmemleak.h>
17 #include <linux/range.h>
18 #include <linux/memblock.h>
19 
20 #include <asm/bug.h>
21 #include <asm/io.h>
22 #include <asm/processor.h>
23 
24 #include "internal.h"
25 
26 #ifndef CONFIG_NEED_MULTIPLE_NODES
28  .bdata = &bootmem_node_data[0]
29 };
30 EXPORT_SYMBOL(contig_page_data);
31 #endif
32 
33 unsigned long max_low_pfn;
34 unsigned long min_low_pfn;
35 unsigned long max_pfn;
36 
38 
39 static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
40 
41 static int bootmem_debug;
42 
43 static int __init bootmem_debug_setup(char *buf)
44 {
45  bootmem_debug = 1;
46  return 0;
47 }
48 early_param("bootmem_debug", bootmem_debug_setup);
49 
50 #define bdebug(fmt, args...) ({ \
51  if (unlikely(bootmem_debug)) \
52  printk(KERN_INFO \
53  "bootmem::%s " fmt, \
54  __func__, ## args); \
55 })
56 
57 static unsigned long __init bootmap_bytes(unsigned long pages)
58 {
59  unsigned long bytes = DIV_ROUND_UP(pages, 8);
60 
61  return ALIGN(bytes, sizeof(long));
62 }
63 
68 unsigned long __init bootmem_bootmap_pages(unsigned long pages)
69 {
70  unsigned long bytes = bootmap_bytes(pages);
71 
72  return PAGE_ALIGN(bytes) >> PAGE_SHIFT;
73 }
74 
75 /*
76  * link bdata in order
77  */
78 static void __init link_bootmem(bootmem_data_t *bdata)
79 {
81 
82  list_for_each_entry(ent, &bdata_list, list) {
83  if (bdata->node_min_pfn < ent->node_min_pfn) {
84  list_add_tail(&bdata->list, &ent->list);
85  return;
86  }
87  }
88 
89  list_add_tail(&bdata->list, &bdata_list);
90 }
91 
92 /*
93  * Called once to set up the allocator itself.
94  */
95 static unsigned long __init init_bootmem_core(bootmem_data_t *bdata,
96  unsigned long mapstart, unsigned long start, unsigned long end)
97 {
98  unsigned long mapsize;
99 
100  mminit_validate_memmodel_limits(&start, &end);
101  bdata->node_bootmem_map = phys_to_virt(PFN_PHYS(mapstart));
102  bdata->node_min_pfn = start;
103  bdata->node_low_pfn = end;
104  link_bootmem(bdata);
105 
106  /*
107  * Initially all pages are reserved - setup_arch() has to
108  * register free RAM areas explicitly.
109  */
110  mapsize = bootmap_bytes(end - start);
111  memset(bdata->node_bootmem_map, 0xff, mapsize);
112 
113  bdebug("nid=%td start=%lx map=%lx end=%lx mapsize=%lx\n",
114  bdata - bootmem_node_data, start, mapstart, end, mapsize);
115 
116  return mapsize;
117 }
118 
128 unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
129  unsigned long startpfn, unsigned long endpfn)
130 {
131  return init_bootmem_core(pgdat->bdata, freepfn, startpfn, endpfn);
132 }
133 
141 unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
142 {
143  max_low_pfn = pages;
144  min_low_pfn = start;
145  return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
146 }
147 
148 /*
149  * free_bootmem_late - free bootmem pages directly to page allocator
150  * @addr: starting address of the range
151  * @size: size of the range in bytes
152  *
153  * This is only useful when the bootmem allocator has already been torn
154  * down, but we are still initializing the system. Pages are given directly
155  * to the page allocator, no bootmem metadata is updated because it is gone.
156  */
157 void __init free_bootmem_late(unsigned long addr, unsigned long size)
158 {
159  unsigned long cursor, end;
160 
161  kmemleak_free_part(__va(addr), size);
162 
163  cursor = PFN_UP(addr);
164  end = PFN_DOWN(addr + size);
165 
166  for (; cursor < end; cursor++) {
167  __free_pages_bootmem(pfn_to_page(cursor), 0);
168  totalram_pages++;
169  }
170 }
171 
172 static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
173 {
174  struct page *page;
175  unsigned long start, end, pages, count = 0;
176 
177  if (!bdata->node_bootmem_map)
178  return 0;
179 
180  start = bdata->node_min_pfn;
181  end = bdata->node_low_pfn;
182 
183  bdebug("nid=%td start=%lx end=%lx\n",
184  bdata - bootmem_node_data, start, end);
185 
186  while (start < end) {
187  unsigned long *map, idx, vec;
188 
189  map = bdata->node_bootmem_map;
190  idx = start - bdata->node_min_pfn;
191  vec = ~map[idx / BITS_PER_LONG];
192  /*
193  * If we have a properly aligned and fully unreserved
194  * BITS_PER_LONG block of pages in front of us, free
195  * it in one go.
196  */
197  if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
198  int order = ilog2(BITS_PER_LONG);
199 
200  __free_pages_bootmem(pfn_to_page(start), order);
201  count += BITS_PER_LONG;
202  start += BITS_PER_LONG;
203  } else {
204  unsigned long off = 0;
205 
206  vec >>= start & (BITS_PER_LONG - 1);
207  while (vec) {
208  if (vec & 1) {
209  page = pfn_to_page(start + off);
210  __free_pages_bootmem(page, 0);
211  count++;
212  }
213  vec >>= 1;
214  off++;
215  }
216  start = ALIGN(start + 1, BITS_PER_LONG);
217  }
218  }
219 
220  page = virt_to_page(bdata->node_bootmem_map);
221  pages = bdata->node_low_pfn - bdata->node_min_pfn;
222  pages = bootmem_bootmap_pages(pages);
223  count += pages;
224  while (pages--)
225  __free_pages_bootmem(page++, 0);
226 
227  bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
228 
229  return count;
230 }
231 
239 {
240  register_page_bootmem_info_node(pgdat);
241  return free_all_bootmem_core(pgdat->bdata);
242 }
243 
249 unsigned long __init free_all_bootmem(void)
250 {
251  unsigned long total_pages = 0;
252  bootmem_data_t *bdata;
253 
254  list_for_each_entry(bdata, &bdata_list, list)
255  total_pages += free_all_bootmem_core(bdata);
256 
257  return total_pages;
258 }
259 
260 static void __init __free(bootmem_data_t *bdata,
261  unsigned long sidx, unsigned long eidx)
262 {
263  unsigned long idx;
264 
265  bdebug("nid=%td start=%lx end=%lx\n", bdata - bootmem_node_data,
266  sidx + bdata->node_min_pfn,
267  eidx + bdata->node_min_pfn);
268 
269  if (bdata->hint_idx > sidx)
270  bdata->hint_idx = sidx;
271 
272  for (idx = sidx; idx < eidx; idx++)
273  if (!test_and_clear_bit(idx, bdata->node_bootmem_map))
274  BUG();
275 }
276 
277 static int __init __reserve(bootmem_data_t *bdata, unsigned long sidx,
278  unsigned long eidx, int flags)
279 {
280  unsigned long idx;
281  int exclusive = flags & BOOTMEM_EXCLUSIVE;
282 
283  bdebug("nid=%td start=%lx end=%lx flags=%x\n",
284  bdata - bootmem_node_data,
285  sidx + bdata->node_min_pfn,
286  eidx + bdata->node_min_pfn,
287  flags);
288 
289  for (idx = sidx; idx < eidx; idx++)
290  if (test_and_set_bit(idx, bdata->node_bootmem_map)) {
291  if (exclusive) {
292  __free(bdata, sidx, idx);
293  return -EBUSY;
294  }
295  bdebug("silent double reserve of PFN %lx\n",
296  idx + bdata->node_min_pfn);
297  }
298  return 0;
299 }
300 
301 static int __init mark_bootmem_node(bootmem_data_t *bdata,
302  unsigned long start, unsigned long end,
303  int reserve, int flags)
304 {
305  unsigned long sidx, eidx;
306 
307  bdebug("nid=%td start=%lx end=%lx reserve=%d flags=%x\n",
308  bdata - bootmem_node_data, start, end, reserve, flags);
309 
310  BUG_ON(start < bdata->node_min_pfn);
311  BUG_ON(end > bdata->node_low_pfn);
312 
313  sidx = start - bdata->node_min_pfn;
314  eidx = end - bdata->node_min_pfn;
315 
316  if (reserve)
317  return __reserve(bdata, sidx, eidx, flags);
318  else
319  __free(bdata, sidx, eidx);
320  return 0;
321 }
322 
323 static int __init mark_bootmem(unsigned long start, unsigned long end,
324  int reserve, int flags)
325 {
326  unsigned long pos;
327  bootmem_data_t *bdata;
328 
329  pos = start;
330  list_for_each_entry(bdata, &bdata_list, list) {
331  int err;
332  unsigned long max;
333 
334  if (pos < bdata->node_min_pfn ||
335  pos >= bdata->node_low_pfn) {
336  BUG_ON(pos != start);
337  continue;
338  }
339 
340  max = min(bdata->node_low_pfn, end);
341 
342  err = mark_bootmem_node(bdata, pos, max, reserve, flags);
343  if (reserve && err) {
344  mark_bootmem(start, pos, 0, 0);
345  return err;
346  }
347 
348  if (max == end)
349  return 0;
350  pos = bdata->node_low_pfn;
351  }
352  BUG();
353 }
354 
365 void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
366  unsigned long size)
367 {
368  unsigned long start, end;
369 
370  kmemleak_free_part(__va(physaddr), size);
371 
372  start = PFN_UP(physaddr);
373  end = PFN_DOWN(physaddr + size);
374 
375  mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
376 }
377 
387 void __init free_bootmem(unsigned long addr, unsigned long size)
388 {
389  unsigned long start, end;
390 
391  kmemleak_free_part(__va(addr), size);
392 
393  start = PFN_UP(addr);
394  end = PFN_DOWN(addr + size);
395 
396  mark_bootmem(start, end, 0, 0);
397 }
398 
410 int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
411  unsigned long size, int flags)
412 {
413  unsigned long start, end;
414 
415  start = PFN_DOWN(physaddr);
416  end = PFN_UP(physaddr + size);
417 
418  return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
419 }
420 
431 int __init reserve_bootmem(unsigned long addr, unsigned long size,
432  int flags)
433 {
434  unsigned long start, end;
435 
436  start = PFN_DOWN(addr);
437  end = PFN_UP(addr + size);
438 
439  return mark_bootmem(start, end, 1, flags);
440 }
441 
442 int __weak __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
443  int flags)
444 {
445  return reserve_bootmem(phys, len, flags);
446 }
447 
448 static unsigned long __init align_idx(struct bootmem_data *bdata,
449  unsigned long idx, unsigned long step)
450 {
451  unsigned long base = bdata->node_min_pfn;
452 
453  /*
454  * Align the index with respect to the node start so that the
455  * combination of both satisfies the requested alignment.
456  */
457 
458  return ALIGN(base + idx, step) - base;
459 }
460 
461 static unsigned long __init align_off(struct bootmem_data *bdata,
462  unsigned long off, unsigned long align)
463 {
464  unsigned long base = PFN_PHYS(bdata->node_min_pfn);
465 
466  /* Same as align_idx for byte offsets */
467 
468  return ALIGN(base + off, align) - base;
469 }
470 
471 static void * __init alloc_bootmem_bdata(struct bootmem_data *bdata,
472  unsigned long size, unsigned long align,
473  unsigned long goal, unsigned long limit)
474 {
475  unsigned long fallback = 0;
476  unsigned long min, max, start, sidx, midx, step;
477 
478  bdebug("nid=%td size=%lx [%lu pages] align=%lx goal=%lx limit=%lx\n",
479  bdata - bootmem_node_data, size, PAGE_ALIGN(size) >> PAGE_SHIFT,
480  align, goal, limit);
481 
482  BUG_ON(!size);
483  BUG_ON(align & (align - 1));
484  BUG_ON(limit && goal + size > limit);
485 
486  if (!bdata->node_bootmem_map)
487  return NULL;
488 
489  min = bdata->node_min_pfn;
490  max = bdata->node_low_pfn;
491 
492  goal >>= PAGE_SHIFT;
493  limit >>= PAGE_SHIFT;
494 
495  if (limit && max > limit)
496  max = limit;
497  if (max <= min)
498  return NULL;
499 
500  step = max(align >> PAGE_SHIFT, 1UL);
501 
502  if (goal && min < goal && goal < max)
503  start = ALIGN(goal, step);
504  else
505  start = ALIGN(min, step);
506 
507  sidx = start - bdata->node_min_pfn;
508  midx = max - bdata->node_min_pfn;
509 
510  if (bdata->hint_idx > sidx) {
511  /*
512  * Handle the valid case of sidx being zero and still
513  * catch the fallback below.
514  */
515  fallback = sidx + 1;
516  sidx = align_idx(bdata, bdata->hint_idx, step);
517  }
518 
519  while (1) {
520  int merge;
521  void *region;
522  unsigned long eidx, i, start_off, end_off;
523 find_block:
524  sidx = find_next_zero_bit(bdata->node_bootmem_map, midx, sidx);
525  sidx = align_idx(bdata, sidx, step);
526  eidx = sidx + PFN_UP(size);
527 
528  if (sidx >= midx || eidx > midx)
529  break;
530 
531  for (i = sidx; i < eidx; i++)
532  if (test_bit(i, bdata->node_bootmem_map)) {
533  sidx = align_idx(bdata, i, step);
534  if (sidx == i)
535  sidx += step;
536  goto find_block;
537  }
538 
539  if (bdata->last_end_off & (PAGE_SIZE - 1) &&
540  PFN_DOWN(bdata->last_end_off) + 1 == sidx)
541  start_off = align_off(bdata, bdata->last_end_off, align);
542  else
543  start_off = PFN_PHYS(sidx);
544 
545  merge = PFN_DOWN(start_off) < sidx;
546  end_off = start_off + size;
547 
548  bdata->last_end_off = end_off;
549  bdata->hint_idx = PFN_UP(end_off);
550 
551  /*
552  * Reserve the area now:
553  */
554  if (__reserve(bdata, PFN_DOWN(start_off) + merge,
555  PFN_UP(end_off), BOOTMEM_EXCLUSIVE))
556  BUG();
557 
558  region = phys_to_virt(PFN_PHYS(bdata->node_min_pfn) +
559  start_off);
560  memset(region, 0, size);
561  /*
562  * The min_count is set to 0 so that bootmem allocated blocks
563  * are never reported as leaks.
564  */
565  kmemleak_alloc(region, size, 0, 0);
566  return region;
567  }
568 
569  if (fallback) {
570  sidx = align_idx(bdata, fallback - 1, step);
571  fallback = 0;
572  goto find_block;
573  }
574 
575  return NULL;
576 }
577 
578 static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
579  unsigned long size, unsigned long align,
580  unsigned long goal, unsigned long limit)
581 {
583  return kzalloc(size, GFP_NOWAIT);
584 
585 #ifdef CONFIG_HAVE_ARCH_BOOTMEM
586  {
587  bootmem_data_t *p_bdata;
588 
589  p_bdata = bootmem_arch_preferred_node(bdata, size, align,
590  goal, limit);
591  if (p_bdata)
592  return alloc_bootmem_bdata(p_bdata, size, align,
593  goal, limit);
594  }
595 #endif
596  return NULL;
597 }
598 
599 static void * __init alloc_bootmem_core(unsigned long size,
600  unsigned long align,
601  unsigned long goal,
602  unsigned long limit)
603 {
604  bootmem_data_t *bdata;
605  void *region;
606 
607  region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit);
608  if (region)
609  return region;
610 
611  list_for_each_entry(bdata, &bdata_list, list) {
612  if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
613  continue;
614  if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
615  break;
616 
617  region = alloc_bootmem_bdata(bdata, size, align, goal, limit);
618  if (region)
619  return region;
620  }
621 
622  return NULL;
623 }
624 
625 static void * __init ___alloc_bootmem_nopanic(unsigned long size,
626  unsigned long align,
627  unsigned long goal,
628  unsigned long limit)
629 {
630  void *ptr;
631 
632 restart:
633  ptr = alloc_bootmem_core(size, align, goal, limit);
634  if (ptr)
635  return ptr;
636  if (goal) {
637  goal = 0;
638  goto restart;
639  }
640 
641  return NULL;
642 }
643 
657 void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
658  unsigned long goal)
659 {
660  unsigned long limit = 0;
661 
662  return ___alloc_bootmem_nopanic(size, align, goal, limit);
663 }
664 
665 static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
666  unsigned long goal, unsigned long limit)
667 {
668  void *mem = ___alloc_bootmem_nopanic(size, align, goal, limit);
669 
670  if (mem)
671  return mem;
672  /*
673  * Whoops, we cannot satisfy the allocation request.
674  */
675  printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
676  panic("Out of memory");
677  return NULL;
678 }
679 
693 void * __init __alloc_bootmem(unsigned long size, unsigned long align,
694  unsigned long goal)
695 {
696  unsigned long limit = 0;
697 
698  return ___alloc_bootmem(size, align, goal, limit);
699 }
700 
702  unsigned long size, unsigned long align,
703  unsigned long goal, unsigned long limit)
704 {
705  void *ptr;
706 
707 again:
708  ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size,
709  align, goal, limit);
710  if (ptr)
711  return ptr;
712 
713  /* do not panic in alloc_bootmem_bdata() */
714  if (limit && goal + size > limit)
715  limit = 0;
716 
717  ptr = alloc_bootmem_bdata(pgdat->bdata, size, align, goal, limit);
718  if (ptr)
719  return ptr;
720 
721  ptr = alloc_bootmem_core(size, align, goal, limit);
722  if (ptr)
723  return ptr;
724 
725  if (goal) {
726  goal = 0;
727  goto again;
728  }
729 
730  return NULL;
731 }
732 
733 void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
734  unsigned long align, unsigned long goal)
735 {
737  return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
738 
739  return ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
740 }
741 
742 void * __init ___alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
743  unsigned long align, unsigned long goal,
744  unsigned long limit)
745 {
746  void *ptr;
747 
748  ptr = ___alloc_bootmem_node_nopanic(pgdat, size, align, goal, 0);
749  if (ptr)
750  return ptr;
751 
752  printk(KERN_ALERT "bootmem alloc of %lu bytes failed!\n", size);
753  panic("Out of memory");
754  return NULL;
755 }
756 
772 void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
773  unsigned long align, unsigned long goal)
774 {
776  return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
777 
778  return ___alloc_bootmem_node(pgdat, size, align, goal, 0);
779 }
780 
781 void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
782  unsigned long align, unsigned long goal)
783 {
784 #ifdef MAX_DMA32_PFN
785  unsigned long end_pfn;
786 
788  return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
789 
790  /* update goal according ...MAX_DMA32_PFN */
791  end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
792 
793  if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
794  (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
795  void *ptr;
796  unsigned long new_goal;
797 
798  new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
799  ptr = alloc_bootmem_bdata(pgdat->bdata, size, align,
800  new_goal, 0);
801  if (ptr)
802  return ptr;
803  }
804 #endif
805 
806  return __alloc_bootmem_node(pgdat, size, align, goal);
807 
808 }
809 
810 #ifndef ARCH_LOW_ADDRESS_LIMIT
811 #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL
812 #endif
813 
827 void * __init __alloc_bootmem_low(unsigned long size, unsigned long align,
828  unsigned long goal)
829 {
830  return ___alloc_bootmem(size, align, goal, ARCH_LOW_ADDRESS_LIMIT);
831 }
832 
848 void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
849  unsigned long align, unsigned long goal)
850 {
852  return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
853 
854  return ___alloc_bootmem_node(pgdat, size, align,
855  goal, ARCH_LOW_ADDRESS_LIMIT);
856 }