17 #include <linux/stddef.h>
25 #include <linux/compiler.h>
26 #include <linux/kernel.h>
28 #include <linux/module.h>
32 #include <linux/slab.h>
34 #include <linux/oom.h>
37 #include <linux/sysctl.h>
44 #include <linux/mempolicy.h>
58 #include <linux/prefetch.h>
62 #include <asm/tlbflush.h>
63 #include <asm/div64.h>
66 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
71 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
111 #ifdef CONFIG_PM_SLEEP
121 static gfp_t saved_gfp_mask;
126 if (saved_gfp_mask) {
134 WARN_ON(!mutex_is_locked(&pm_mutex));
140 bool pm_suspended_storage(
void)
142 if ((gfp_allowed_mask &
GFP_IOFS) == GFP_IOFS)
148 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
152 static void __free_pages_ok(
struct page *
page,
unsigned int order);
166 #ifdef CONFIG_ZONE_DMA
169 #ifdef CONFIG_ZONE_DMA32
172 #ifdef CONFIG_HIGHMEM
180 static char *
const zone_names[MAX_NR_ZONES] = {
181 #ifdef CONFIG_ZONE_DMA
184 #ifdef CONFIG_ZONE_DMA32
188 #ifdef CONFIG_HIGHMEM
200 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
201 static unsigned long __meminitdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
202 static unsigned long __meminitdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
203 static unsigned long __initdata required_kernelcore;
204 static unsigned long __initdata required_movablecore;
238 #ifdef CONFIG_DEBUG_VM
239 static int page_outside_zone_boundaries(
struct zone *
zone,
struct page *
page)
246 seq = zone_span_seqbegin(zone);
249 else if (pfn < zone->zone_start_pfn)
251 }
while (zone_span_seqretry(zone, seq));
256 static int page_is_consistent(
struct zone *zone,
struct page *page)
260 if (zone != page_zone(page))
268 static int bad_range(
struct zone *zone,
struct page *page)
270 if (page_outside_zone_boundaries(zone, page))
272 if (!page_is_consistent(zone, page))
278 static inline int bad_range(
struct zone *zone,
struct page *page)
284 static void bad_page(
struct page *page)
286 static unsigned long resume;
287 static unsigned long nr_shown;
288 static unsigned long nr_unshown;
291 if (PageHWPoison(page)) {
292 reset_page_mapcount(page);
300 if (nr_shown == 60) {
307 "BUG: Bad page state: %lu messages suppressed\n",
324 reset_page_mapcount(page);
343 static void free_compound_page(
struct page *page)
345 __free_pages_ok(page, compound_order(page));
351 int nr_pages = 1 <<
order;
353 set_compound_page_dtor(page, free_compound_page);
354 set_compound_order(page, order);
356 for (i = 1; i < nr_pages; i++) {
357 struct page *
p = page +
i;
359 set_page_count(p, 0);
365 static int destroy_compound_page(
struct page *page,
unsigned long order)
368 int nr_pages = 1 <<
order;
371 if (
unlikely(compound_order(page) != order) ||
377 __ClearPageHead(page);
379 for (i = 1; i < nr_pages; i++) {
380 struct page *
p = page +
i;
392 static inline void prep_zero_page(
struct page *page,
int order,
gfp_t gfp_flags)
401 for (i = 0; i < (1 <<
order); i++)
402 clear_highpage(page + i);
405 #ifdef CONFIG_DEBUG_PAGEALLOC
406 unsigned int _debug_guardpage_minorder;
408 static int __init debug_guardpage_minorder_setup(
char *
buf)
412 if (kstrtoul(buf, 10, &res) < 0 || res >
MAX_ORDER / 2) {
416 _debug_guardpage_minorder =
res;
420 __setup(
"debug_guardpage_minorder=", debug_guardpage_minorder_setup);
422 static inline void set_page_guard_flag(
struct page *page)
427 static inline void clear_page_guard_flag(
struct page *page)
432 static inline void set_page_guard_flag(
struct page *page) { }
433 static inline void clear_page_guard_flag(
struct page *page) { }
436 static inline void set_page_order(
struct page *page,
int order)
438 set_page_private(page, order);
439 __SetPageBuddy(page);
442 static inline void rmv_page_order(
struct page *page)
444 __ClearPageBuddy(page);
445 set_page_private(page, 0);
465 static inline unsigned long
466 __find_buddy_index(
unsigned long page_idx,
unsigned int order)
468 return page_idx ^ (1 <<
order);
484 static inline int page_is_buddy(
struct page *page,
struct page *buddy,
490 if (page_zone_id(page) != page_zone_id(buddy))
493 if (page_is_guard(buddy) && page_order(buddy) == order) {
498 if (PageBuddy(buddy) && page_order(buddy) == order) {
529 static inline void __free_one_page(
struct page *page,
530 struct zone *zone,
unsigned int order,
533 unsigned long page_idx;
534 unsigned long combined_idx;
539 if (
unlikely(destroy_compound_page(page, order)))
546 VM_BUG_ON(page_idx & ((1 << order) - 1));
550 buddy_idx = __find_buddy_index(page_idx, order);
551 buddy = page + (buddy_idx - page_idx);
552 if (!page_is_buddy(page, buddy, order))
558 if (page_is_guard(buddy)) {
559 clear_page_guard_flag(buddy);
560 set_page_private(page, 0);
561 __mod_zone_freepage_state(zone, 1 << order,
566 rmv_page_order(buddy);
568 combined_idx = buddy_idx & page_idx;
569 page = page + (combined_idx - page_idx);
570 page_idx = combined_idx;
573 set_page_order(page, order);
584 struct page *higher_page, *higher_buddy;
585 combined_idx = buddy_idx & page_idx;
586 higher_page = page + (combined_idx - page_idx);
587 buddy_idx = __find_buddy_index(combined_idx, order + 1);
588 higher_buddy = higher_page + (buddy_idx - combined_idx);
589 if (page_is_buddy(higher_page, higher_buddy, order + 1)) {
591 &zone->
free_area[order].free_list[migratetype]);
596 list_add(&page->
lru, &zone->
free_area[order].free_list[migratetype]);
601 static inline int free_pages_check(
struct page *page)
607 (mem_cgroup_bad_page_check(page)))) {
627 static void free_pcppages_bulk(
struct zone *zone,
int count,
634 spin_lock(&zone->
lock);
653 list = &pcp->
lists[migratetype];
654 }
while (list_empty(list));
658 batch_free = to_free;
666 mt = get_freepage_migratetype(page);
668 __free_one_page(page, zone, 0, mt);
669 trace_mm_page_pcpu_drain(page, 0, mt);
672 }
while (--to_free && --batch_free && !list_empty(list));
675 spin_unlock(&zone->
lock);
678 static void free_one_page(
struct zone *zone,
struct page *page,
int order,
681 spin_lock(&zone->
lock);
685 __free_one_page(page, zone, order, migratetype);
687 __mod_zone_freepage_state(zone, 1 << order, migratetype);
688 spin_unlock(&zone->
lock);
691 static bool free_pages_prepare(
struct page *page,
unsigned int order)
696 trace_mm_page_free(page, order);
701 for (i = 0; i < (1 <<
order); i++)
702 bad += free_pages_check(page + i);
706 if (!PageHighMem(page)) {
717 static void __free_pages_ok(
struct page *page,
unsigned int order)
722 if (!free_pages_prepare(page, order))
726 __count_vm_events(
PGFREE, 1 << order);
727 migratetype = get_pageblock_migratetype(page);
728 set_freepage_migratetype(page, migratetype);
729 free_one_page(page_zone(page), page, order, migratetype);
735 unsigned int nr_pages = 1 <<
order;
739 for (loop = 0; loop < nr_pages; loop++) {
740 struct page *p = &page[loop];
742 if (loop + 1 < nr_pages)
744 __ClearPageReserved(p);
745 set_page_count(p, 0);
748 set_page_refcounted(page);
754 void __init init_cma_reserved_pageblock(
struct page *page)
757 struct page *p =
page;
760 __ClearPageReserved(p);
761 set_page_count(p, 0);
764 set_page_refcounted(page);
785 static inline void expand(
struct zone *zone,
struct page *page,
797 #ifdef CONFIG_DEBUG_PAGEALLOC
798 if (high < debug_guardpage_minorder()) {
805 INIT_LIST_HEAD(&page[size].
lru);
806 set_page_guard_flag(&page[size]);
807 set_page_private(&page[size], high);
809 __mod_zone_freepage_state(zone, -(1 << high),
814 list_add(&page[size].
lru, &area->
free_list[migratetype]);
816 set_page_order(&page[size], high);
823 static inline int check_new_page(
struct page *page)
829 (mem_cgroup_bad_page_check(page)))) {
836 static int prep_new_page(
struct page *page,
int order,
gfp_t gfp_flags)
840 for (i = 0; i < (1 <<
order); i++) {
841 struct page *p = page +
i;
846 set_page_private(page, 0);
847 set_page_refcounted(page);
853 prep_zero_page(page, order, gfp_flags);
866 struct page *__rmqueue_smallest(
struct zone *zone,
unsigned int order,
869 unsigned int current_order;
874 for (current_order = order; current_order <
MAX_ORDER; ++current_order) {
875 area = &(zone->
free_area[current_order]);
876 if (list_empty(&area->
free_list[migratetype]))
882 rmv_page_order(page);
884 expand(zone, page, order, current_order, area, migratetype);
915 struct page *start_page,
struct page *end_page,
922 #ifndef CONFIG_HOLES_IN_ZONE
930 BUG_ON(page_zone(start_page) != page_zone(end_page));
933 for (page = start_page; page <= end_page;) {
935 VM_BUG_ON(page_to_nid(page) != zone_to_nid(zone));
942 if (!PageBuddy(page)) {
947 order = page_order(page);
948 list_move(&page->
lru,
949 &zone->
free_area[order].free_list[migratetype]);
950 set_freepage_migratetype(page, migratetype);
952 pages_moved += 1 <<
order;
961 unsigned long start_pfn, end_pfn;
962 struct page *start_page, *end_page;
968 end_pfn = start_pfn + pageblock_nr_pages - 1;
971 if (start_pfn < zone->zone_start_pfn)
979 static void change_pageblock_range(
struct page *pageblock_page,
980 int start_order,
int migratetype)
984 while (nr_pageblocks--) {
991 static inline struct page *
992 __rmqueue_fallback(
struct zone *zone,
int order,
int start_migratetype)
1000 for (current_order = MAX_ORDER-1; current_order >=
order;
1003 migratetype = fallbacks[start_migratetype][
i];
1009 area = &(zone->
free_area[current_order]);
1010 if (list_empty(&area->
free_list[migratetype]))
1032 page_group_by_mobility_disabled)) {
1039 page_group_by_mobility_disabled)
1043 migratetype = start_migratetype;
1048 rmv_page_order(page);
1053 change_pageblock_range(page, current_order,
1056 expand(zone, page, order, current_order, area,
1058 ? migratetype : start_migratetype);
1060 trace_mm_page_alloc_extfrag(page, order, current_order,
1061 start_migratetype, migratetype);
1074 static struct page *__rmqueue(
struct zone *zone,
unsigned int order,
1080 page = __rmqueue_smallest(zone, order, migratetype);
1083 page = __rmqueue_fallback(zone, order, migratetype);
1096 trace_mm_page_alloc_zone_locked(page, order, migratetype);
1105 static int rmqueue_bulk(
struct zone *zone,
unsigned int order,
1106 unsigned long count,
struct list_head *list,
1107 int migratetype,
int cold)
1109 int mt = migratetype,
i;
1111 spin_lock(&zone->
lock);
1112 for (i = 0; i <
count; ++
i) {
1113 struct page *page = __rmqueue(zone, order, migratetype);
1127 list_add(&page->
lru, list);
1131 mt = get_pageblock_migratetype(page);
1135 set_freepage_migratetype(page, mt);
1142 spin_unlock(&zone->
lock);
1157 unsigned long flags;
1162 to_drain = pcp->
batch;
1164 to_drain = pcp->
count;
1166 free_pcppages_bulk(zone, to_drain, pcp);
1167 pcp->
count -= to_drain;
1180 static void drain_pages(
unsigned int cpu)
1182 unsigned long flags;
1194 free_pcppages_bulk(zone, pcp->
count, pcp);
1237 bool has_pcps =
false;
1240 if (pcp->
pcp.count) {
1246 cpumask_set_cpu(cpu, &cpus_with_pcps);
1248 cpumask_clear_cpu(cpu, &cpus_with_pcps);
1253 #ifdef CONFIG_HIBERNATION
1257 unsigned long pfn, max_zone_pfn;
1258 unsigned long flags;
1281 for (i = 0; i < (1
UL <<
order); i++)
1285 spin_unlock_irqrestore(&zone->
lock, flags);
1295 struct zone *zone = page_zone(page);
1297 unsigned long flags;
1300 if (!free_pages_prepare(page, 0))
1303 migratetype = get_pageblock_migratetype(page);
1304 set_freepage_migratetype(page, migratetype);
1306 __count_vm_event(
PGFREE);
1317 free_one_page(zone, page, 0, migratetype);
1327 list_add(&page->
lru, &pcp->
lists[migratetype]);
1330 free_pcppages_bulk(zone, pcp->
batch, pcp);
1346 trace_mm_page_free_batched(page, cold);
1366 #ifdef CONFIG_KMEMCHECK
1375 for (i = 1; i < (1 <<
order); i++)
1376 set_page_refcounted(page + i);
1387 unsigned long watermark;
1391 BUG_ON(!PageBuddy(page));
1393 zone = page_zone(page);
1394 order = page_order(page);
1404 rmv_page_order(page);
1406 mt = get_pageblock_migratetype(page);
1408 __mod_zone_freepage_state(zone, -(1
UL << alloc_order), mt);
1410 if (alloc_order != order)
1411 expand(zone, page, alloc_order, order,
1416 struct page *endpage = page + (1 <<
order) - 1;
1418 int mt = get_pageblock_migratetype(page);
1425 return 1
UL << alloc_order;
1443 BUG_ON(!PageBuddy(page));
1444 order = page_order(page);
1451 set_page_refcounted(page);
1462 struct page *buffered_rmqueue(
struct zone *preferred_zone,
1463 struct zone *zone,
int order,
gfp_t gfp_flags,
1466 unsigned long flags;
1471 if (
likely(order == 0)) {
1477 list = &pcp->
lists[migratetype];
1478 if (list_empty(list)) {
1479 pcp->
count += rmqueue_bulk(zone, 0,
1508 page = __rmqueue(zone, order, migratetype);
1509 spin_unlock(&zone->
lock);
1512 __mod_zone_freepage_state(zone, -(1 << order),
1513 get_pageblock_migratetype(page));
1521 if (prep_new_page(page, order, gfp_flags))
1530 #ifdef CONFIG_FAIL_PAGE_ALLOC
1533 struct fault_attr
attr;
1535 u32 ignore_gfp_highmem;
1538 } fail_page_alloc = {
1539 .attr = FAULT_ATTR_INITIALIZER,
1540 .ignore_gfp_wait = 1,
1541 .ignore_gfp_highmem = 1,
1545 static int __init setup_fail_page_alloc(
char *
str)
1549 __setup(
"fail_page_alloc=", setup_fail_page_alloc);
1551 static bool should_fail_alloc_page(
gfp_t gfp_mask,
unsigned int order)
1553 if (order < fail_page_alloc.min_order)
1555 if (gfp_mask & __GFP_NOFAIL)
1557 if (fail_page_alloc.ignore_gfp_highmem && (gfp_mask & __GFP_HIGHMEM))
1559 if (fail_page_alloc.ignore_gfp_wait && (gfp_mask &
__GFP_WAIT))
1562 return should_fail(&fail_page_alloc.attr, 1 << order);
1565 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
1567 static int __init fail_page_alloc_debugfs(
void)
1572 dir = fault_create_debugfs_attr(
"fail_page_alloc",
NULL,
1573 &fail_page_alloc.attr);
1575 return PTR_ERR(dir);
1578 &fail_page_alloc.ignore_gfp_wait))
1581 &fail_page_alloc.ignore_gfp_highmem))
1584 &fail_page_alloc.min_order))
1600 static inline bool should_fail_alloc_page(
gfp_t gfp_mask,
unsigned int order)
1611 static bool __zone_watermark_ok(
struct zone *z,
int order,
unsigned long mark,
1612 int classzone_idx,
int alloc_flags,
long free_pages)
1619 free_pages -= (1 <<
order) - 1;
1629 if (free_pages <= min + lowmem_reserve)
1631 for (o = 0; o <
order; o++) {
1633 free_pages -= z->
free_area[o].nr_free << o;
1638 if (free_pages <= min)
1644 #ifdef CONFIG_MEMORY_ISOLATION
1645 static inline unsigned long nr_zone_isolate_freepages(
struct zone *zone)
1647 if (
unlikely(zone->nr_pageblock_isolate))
1652 static inline unsigned long nr_zone_isolate_freepages(
struct zone *zone)
1659 int classzone_idx,
int alloc_flags)
1661 return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
1666 int classzone_idx,
int alloc_flags)
1680 free_pages -= nr_zone_isolate_freepages(z);
1681 return __zone_watermark_ok(z, order, mark, classzone_idx, alloc_flags,
1710 struct zonelist_cache *zlc;
1717 if (
time_after(jiffies, zlc->last_full_zap + HZ)) {
1725 return allowednodes;
1750 static int zlc_zone_worth_trying(
struct zonelist *zonelist,
struct zoneref *z,
1753 struct zonelist_cache *zlc;
1773 static void zlc_mark_zone_full(
struct zonelist *zonelist,
struct zoneref *z)
1775 struct zonelist_cache *zlc;
1791 static void zlc_clear_zones_full(
struct zonelist *zonelist)
1793 struct zonelist_cache *zlc;
1802 static bool zone_allows_reclaim(
struct zone *local_zone,
struct zone *zone)
1807 static void __paginginit init_zone_allows_reclaim(
int nid)
1815 zone_reclaim_mode = 1;
1820 static nodemask_t *zlc_setup(
struct zonelist *zonelist,
int alloc_flags)
1825 static int zlc_zone_worth_trying(
struct zonelist *zonelist,
struct zoneref *z,
1831 static void zlc_mark_zone_full(
struct zonelist *zonelist,
struct zoneref *z)
1835 static void zlc_clear_zones_full(
struct zonelist *zonelist)
1839 static bool zone_allows_reclaim(
struct zone *local_zone,
struct zone *zone)
1844 static inline void init_zone_allows_reclaim(
int nid)
1853 static struct page *
1854 get_page_from_freelist(
gfp_t gfp_mask,
nodemask_t *nodemask,
unsigned int order,
1855 struct zonelist *zonelist,
int high_zoneidx,
int alloc_flags,
1856 struct zone *preferred_zone,
int migratetype)
1859 struct page *page =
NULL;
1864 int did_zlc_setup = 0;
1866 classzone_idx =
zone_idx(preferred_zone);
1873 high_zoneidx, nodemask) {
1875 !zlc_zone_worth_trying(zonelist, z, allowednodes))
1878 !cpuset_zone_allowed_softwall(zone, gfp_mask))
1908 goto this_zone_full;
1917 classzone_idx, alloc_flags))
1926 allowednodes = zlc_setup(zonelist, alloc_flags);
1931 if (zone_reclaim_mode == 0 ||
1932 !zone_allows_reclaim(preferred_zone, zone))
1933 goto this_zone_full;
1940 !zlc_zone_worth_trying(zonelist, z, allowednodes))
1943 ret = zone_reclaim(zone, gfp_mask, order);
1954 classzone_idx, alloc_flags))
1955 goto this_zone_full;
1960 page = buffered_rmqueue(preferred_zone, zone, order,
1961 gfp_mask, migratetype);
1966 zlc_mark_zone_full(zonelist, z);
1992 static inline bool should_suppress_show_mem(
void)
2008 unsigned int filter = SHOW_MEM_FILTER_NODES;
2011 debug_guardpage_minorder() > 0)
2022 filter &= ~SHOW_MEM_FILTER_NODES;
2024 filter &= ~SHOW_MEM_FILTER_NODES;
2040 pr_warn(
"%s: page allocation failure: order:%d, mode:0x%x\n",
2041 current->comm, order, gfp_mask);
2044 if (!should_suppress_show_mem())
2049 should_alloc_retry(
gfp_t gfp_mask,
unsigned int order,
2050 unsigned long did_some_progress,
2051 unsigned long pages_reclaimed)
2058 if (gfp_mask & __GFP_NOFAIL)
2066 if (!did_some_progress && pm_suspended_storage())
2084 if (gfp_mask &
__GFP_REPEAT && pages_reclaimed < (1 << order))
2090 static inline struct page *
2091 __alloc_pages_may_oom(
gfp_t gfp_mask,
unsigned int order,
2092 struct zonelist *zonelist,
enum zone_type high_zoneidx,
2093 nodemask_t *nodemask,
struct zone *preferred_zone,
2110 order, zonelist, high_zoneidx,
2112 preferred_zone, migratetype);
2116 if (!(gfp_mask & __GFP_NOFAIL)) {
2141 #ifdef CONFIG_COMPACTION
2143 static struct page *
2144 __alloc_pages_direct_compact(
gfp_t gfp_mask,
unsigned int order,
2145 struct zonelist *zonelist,
enum zone_type high_zoneidx,
2146 nodemask_t *nodemask,
int alloc_flags,
struct zone *preferred_zone,
2147 int migratetype,
bool sync_migration,
2148 bool *contended_compaction,
bool *deferred_compaction,
2149 unsigned long *did_some_progress)
2151 struct page *page =
NULL;
2156 if (compaction_deferred(preferred_zone, order)) {
2157 *deferred_compaction =
true;
2162 *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
2163 nodemask, sync_migration,
2164 contended_compaction, &page);
2169 prep_new_page(page, order, gfp_mask);
2178 page = get_page_from_freelist(gfp_mask, nodemask,
2179 order, zonelist, high_zoneidx,
2180 alloc_flags & ~ALLOC_NO_WATERMARKS,
2181 preferred_zone, migratetype);
2184 preferred_zone->compact_blockskip_flush =
false;
2185 preferred_zone->compact_considered = 0;
2186 preferred_zone->compact_defer_shift = 0;
2187 if (order >= preferred_zone->compact_order_failed)
2188 preferred_zone->compact_order_failed = order + 1;
2189 count_vm_event(COMPACTSUCCESS);
2198 count_vm_event(COMPACTFAIL);
2205 defer_compaction(preferred_zone, order);
2213 static inline struct page *
2214 __alloc_pages_direct_compact(
gfp_t gfp_mask,
unsigned int order,
2215 struct zonelist *zonelist,
enum zone_type high_zoneidx,
2216 nodemask_t *nodemask,
int alloc_flags,
struct zone *preferred_zone,
2217 int migratetype,
bool sync_migration,
2218 bool *contended_compaction,
bool *deferred_compaction,
2219 unsigned long *did_some_progress)
2227 __perform_reclaim(
gfp_t gfp_mask,
unsigned int order,
struct zonelist *zonelist,
2236 cpuset_memory_pressure_bump();
2254 static inline struct page *
2255 __alloc_pages_direct_reclaim(
gfp_t gfp_mask,
unsigned int order,
2256 struct zonelist *zonelist,
enum zone_type high_zoneidx,
2257 nodemask_t *nodemask,
int alloc_flags,
struct zone *preferred_zone,
2258 int migratetype,
unsigned long *did_some_progress)
2260 struct page *page =
NULL;
2261 bool drained =
false;
2263 *did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
2265 if (
unlikely(!(*did_some_progress)))
2270 zlc_clear_zones_full(zonelist);
2273 page = get_page_from_freelist(gfp_mask, nodemask, order,
2274 zonelist, high_zoneidx,
2275 alloc_flags & ~ALLOC_NO_WATERMARKS,
2276 preferred_zone, migratetype);
2282 if (!page && !drained) {
2295 static inline struct page *
2296 __alloc_pages_high_priority(
gfp_t gfp_mask,
unsigned int order,
2297 struct zonelist *zonelist,
enum zone_type high_zoneidx,
2298 nodemask_t *nodemask,
struct zone *preferred_zone,
2304 page = get_page_from_freelist(gfp_mask, nodemask, order,
2305 zonelist, high_zoneidx, ALLOC_NO_WATERMARKS,
2306 preferred_zone, migratetype);
2308 if (!page && gfp_mask & __GFP_NOFAIL)
2310 }
while (!page && (gfp_mask & __GFP_NOFAIL));
2316 void wake_all_kswapd(
unsigned int order,
struct zonelist *zonelist,
2328 gfp_to_alloc_flags(
gfp_t gfp_mask)
2350 alloc_flags |= ALLOC_HARDER;
2355 alloc_flags &= ~ALLOC_CPUSET;
2357 alloc_flags |= ALLOC_HARDER;
2367 alloc_flags |= ALLOC_NO_WATERMARKS;
2371 alloc_flags |= ALLOC_CMA;
2381 static inline struct page *
2382 __alloc_pages_slowpath(
gfp_t gfp_mask,
unsigned int order,
2383 struct zonelist *zonelist,
enum zone_type high_zoneidx,
2384 nodemask_t *nodemask,
struct zone *preferred_zone,
2388 struct page *page =
NULL;
2390 unsigned long pages_reclaimed = 0;
2391 unsigned long did_some_progress;
2392 bool sync_migration =
false;
2393 bool deferred_compaction =
false;
2394 bool contended_compaction =
false;
2402 if (order >= MAX_ORDER) {
2420 wake_all_kswapd(order, zonelist, high_zoneidx,
2428 alloc_flags = gfp_to_alloc_flags(gfp_mask);
2434 if (!(alloc_flags & ALLOC_CPUSET) && !nodemask)
2435 first_zones_zonelist(zonelist, high_zoneidx,
NULL,
2440 page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist,
2441 high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS,
2442 preferred_zone, migratetype);
2447 if (alloc_flags & ALLOC_NO_WATERMARKS) {
2455 page = __alloc_pages_high_priority(gfp_mask, order,
2456 zonelist, high_zoneidx, nodemask,
2457 preferred_zone, migratetype);
2472 if (test_thread_flag(
TIF_MEMDIE) && !(gfp_mask & __GFP_NOFAIL))
2479 page = __alloc_pages_direct_compact(gfp_mask, order,
2480 zonelist, high_zoneidx,
2482 alloc_flags, preferred_zone,
2483 migratetype, sync_migration,
2484 &contended_compaction,
2485 &deferred_compaction,
2486 &did_some_progress);
2489 sync_migration =
true;
2497 if ((deferred_compaction || contended_compaction) &&
2498 (gfp_mask & __GFP_NO_KSWAPD))
2502 page = __alloc_pages_direct_reclaim(gfp_mask, order,
2503 zonelist, high_zoneidx,
2505 alloc_flags, preferred_zone,
2506 migratetype, &did_some_progress);
2514 if (!did_some_progress) {
2515 if ((gfp_mask &
__GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
2516 if (oom_killer_disabled)
2520 !(gfp_mask & __GFP_NOFAIL))
2522 page = __alloc_pages_may_oom(gfp_mask, order,
2523 zonelist, high_zoneidx,
2524 nodemask, preferred_zone,
2529 if (!(gfp_mask & __GFP_NOFAIL)) {
2552 pages_reclaimed += did_some_progress;
2553 if (should_alloc_retry(gfp_mask, order, did_some_progress,
2564 page = __alloc_pages_direct_compact(gfp_mask, order,
2565 zonelist, high_zoneidx,
2567 alloc_flags, preferred_zone,
2568 migratetype, sync_migration,
2569 &contended_compaction,
2570 &deferred_compaction,
2571 &did_some_progress);
2591 struct zonelist *zonelist,
nodemask_t *nodemask)
2593 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
2594 struct zone *preferred_zone;
2595 struct page *page =
NULL;
2596 int migratetype = allocflags_to_migratetype(gfp_mask);
2597 unsigned int cpuset_mems_cookie;
2606 if (should_fail_alloc_page(gfp_mask, order))
2618 cpuset_mems_cookie = get_mems_allowed();
2621 first_zones_zonelist(zonelist, high_zoneidx,
2624 if (!preferred_zone)
2632 page = get_page_from_freelist(gfp_mask|
__GFP_HARDWALL, nodemask, order,
2633 zonelist, high_zoneidx, alloc_flags,
2634 preferred_zone, migratetype);
2636 page = __alloc_pages_slowpath(gfp_mask, order,
2637 zonelist, high_zoneidx, nodemask,
2638 preferred_zone, migratetype);
2640 trace_mm_page_alloc(page, order, gfp_mask, migratetype);
2649 if (
unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
2667 VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
2684 if (put_page_testzero(page)) {
2688 __free_pages_ok(page, order);
2704 static void *make_alloc_exact(
unsigned long addr,
unsigned order,
size_t size)
2711 while (used < alloc_end) {
2716 return (
void *)
addr;
2738 return make_alloc_exact(addr, order, size);
2757 struct page *p = alloc_pages_node(nid, gfp_mask, order);
2760 return make_alloc_exact((
unsigned long)
page_address(p), order, size);
2773 unsigned long addr = (
unsigned long)virt;
2776 while (addr < end) {
2783 static unsigned int nr_free_zone_pages(
int offset)
2789 unsigned int sum = 0;
2808 return nr_free_zone_pages(gfp_zone(
GFP_USER));
2820 static inline void show_node(
struct zone *zone)
2823 printk(
"Node %d ", zone_to_nid(zone));
2833 val->
freehigh = nr_free_highpages();
2840 void si_meminfo_node(
struct sysinfo *
val,
int nid)
2846 #ifdef CONFIG_HIGHMEM
2865 unsigned int cpuset_mems_cookie;
2867 if (!(flags & SHOW_MEM_FILTER_NODES))
2871 cpuset_mems_cookie = get_mems_allowed();
2873 }
while (!put_mems_allowed(cpuset_mems_cookie));
2878 #define K(x) ((x) << (PAGE_SHIFT-10))
2903 printk(
"CPU %4d: hi:%5d, btch:%4d usd:%4d\n",
2904 cpu, pageset->
pcp.high,
2905 pageset->
pcp.batch, pageset->
pcp.count);
2909 printk(
"active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
2910 " active_file:%lu inactive_file:%lu isolated_file:%lu\n"
2912 " dirty:%lu writeback:%lu unstable:%lu\n"
2913 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
2914 " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
2946 " active_anon:%lukB"
2947 " inactive_anon:%lukB"
2948 " active_file:%lukB"
2949 " inactive_file:%lukB"
2950 " unevictable:%lukB"
2951 " isolated(anon):%lukB"
2952 " isolated(file):%lukB"
2959 " slab_reclaimable:%lukB"
2960 " slab_unreclaimable:%lukB"
2961 " kernel_stack:%lukB"
2966 " writeback_tmp:%lukB"
2967 " pages_scanned:%lu"
2968 " all_unreclaimable? %s"
3000 printk(
"lowmem_reserve[]:");
3001 for (i = 0; i < MAX_NR_ZONES; i++)
3015 for (order = 0; order <
MAX_ORDER; order++) {
3019 spin_unlock_irqrestore(&zone->
lock, flags);
3020 for (order = 0; order <
MAX_ORDER; order++)
3021 printk(
"%lu*%lukB ", nr[order],
K(1
UL) << order);
3022 printk(
"= %lukB\n",
K(total));
3030 static void zoneref_set_zone(
struct zone *zone,
struct zoneref *
zoneref)
3032 zoneref->
zone = zone;
3041 static int build_zonelists_node(
pg_data_t *pgdat,
struct zonelist *zonelist,
3046 BUG_ON(zone_type >= MAX_NR_ZONES);
3052 if (populated_zone(zone)) {
3053 zoneref_set_zone(zone,
3055 check_highest_zone(zone_type);
3058 }
while (zone_type);
3072 #define ZONELIST_ORDER_DEFAULT 0
3073 #define ZONELIST_ORDER_NODE 1
3074 #define ZONELIST_ORDER_ZONE 2
3080 static char zonelist_order_name[3][8] = {
"Default",
"Node",
"Zone"};
3087 #define NUMA_ZONELIST_ORDER_LEN 16
3098 static int __parse_numa_zonelist_order(
char *
s)
3100 if (*s ==
'd' || *s ==
'D') {
3102 }
else if (*s ==
'n' || *s ==
'N') {
3104 }
else if (*s ==
'z' || *s ==
'Z') {
3108 "Ignoring invalid numa_zonelist_order value: "
3115 static __init int setup_numa_zonelist_order(
char *s)
3122 ret = __parse_numa_zonelist_order(s);
3128 early_param(
"numa_zonelist_order", setup_numa_zonelist_order);
3148 int oldval = user_zonelist_order;
3149 if (__parse_numa_zonelist_order((
char*)table->
data)) {
3155 user_zonelist_order = oldval;
3156 }
else if (oldval != user_zonelist_order) {
3168 #define MAX_NODE_LOAD (nr_online_nodes)
3185 static int find_next_best_node(
int node,
nodemask_t *used_node_mask)
3212 if (!cpumask_empty(tmp))
3217 val += node_load[
n];
3219 if (val < min_val) {
3226 node_set(best_node, *used_node_mask);
3237 static void build_zonelists_in_node_order(
pg_data_t *pgdat,
int node)
3240 struct zonelist *zonelist;
3245 j = build_zonelists_node(
NODE_DATA(node), zonelist, j,
3254 static void build_thisnode_zonelists(
pg_data_t *pgdat)
3257 struct zonelist *zonelist;
3260 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
3273 static void build_zonelists_in_zone_order(
pg_data_t *pgdat,
int nr_nodes)
3278 struct zonelist *zonelist;
3282 for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
3283 for (j = 0; j < nr_nodes; j++) {
3284 node = node_order[
j];
3286 if (populated_zone(z)) {
3289 check_highest_zone(zone_type);
3297 static int default_zonelist_order(
void)
3313 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
3315 if (populated_zone(z)) {
3331 if (!low_kmem_size ||
3332 low_kmem_size > total_size/2)
3339 average_size = total_size /
3344 for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
3346 if (populated_zone(z)) {
3352 if (low_kmem_size &&
3353 total_size > average_size &&
3354 low_kmem_size > total_size * 70/100)
3360 static void set_zonelist_order(
void)
3363 current_zonelist_order = default_zonelist_order();
3365 current_zonelist_order = user_zonelist_order;
3368 static void build_zonelists(
pg_data_t *pgdat)
3373 int local_node, prev_node;
3374 struct zonelist *zonelist;
3375 int order = current_zonelist_order;
3387 prev_node = local_node;
3390 memset(node_order, 0,
sizeof(node_order));
3393 while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
3406 build_zonelists_in_node_order(pgdat, node);
3408 node_order[j++] =
node;
3413 build_zonelists_in_zone_order(pgdat, j);
3416 build_thisnode_zonelists(pgdat);
3420 static void build_zonelist_cache(
pg_data_t *pgdat)
3422 struct zonelist *zonelist;
3423 struct zonelist_cache *zlc;
3430 zlc->z_to_n[z - zonelist->
_zonerefs] = zonelist_node_idx(z);
3433 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
3440 int local_memory_node(
int node)
3454 static void set_zonelist_order(
void)
3459 static void build_zonelists(
pg_data_t *pgdat)
3461 int node, local_node;
3463 struct zonelist *zonelist;
3468 j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
3478 for (node = local_node + 1; node <
MAX_NUMNODES; node++) {
3481 j = build_zonelists_node(
NODE_DATA(node), zonelist, j,
3484 for (node = 0; node < local_node; node++) {
3487 j = build_zonelists_node(
NODE_DATA(node), zonelist, j,
3496 static void build_zonelist_cache(
pg_data_t *pgdat)
3518 static void setup_pageset(
struct per_cpu_pageset *p,
unsigned long batch);
3520 static void setup_zone_pageset(
struct zone *zone);
3529 static int __build_all_zonelists(
void *
data)
3536 memset(node_load, 0,
sizeof(node_load));
3540 build_zonelists(
self);
3541 build_zonelist_cache(
self);
3547 build_zonelists(pgdat);
3548 build_zonelist_cache(pgdat);
3565 setup_pageset(&
per_cpu(boot_pageset, cpu), 0);
3567 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
3590 set_zonelist_order();
3593 __build_all_zonelists(
NULL);
3594 mminit_verify_zonelist();
3599 #ifdef CONFIG_MEMORY_HOTPLUG
3601 setup_zone_pageset(zone);
3603 stop_machine(__build_all_zonelists, pgdat,
NULL);
3619 printk(
"Built %i zonelists in %s order, mobility grouping %s. "
3620 "Total pages: %ld\n",
3622 zonelist_order_name[current_zonelist_order],
3641 #define PAGES_PER_WAITQUEUE 256
3643 #ifndef CONFIG_MEMORY_HOTPLUG
3644 static inline unsigned long wait_table_hash_nr_entries(
unsigned long pages)
3646 unsigned long size = 1;
3650 while (size < pages)
3658 size =
min(size, 4096
UL);
3660 return max(size, 4
UL);
3680 static inline unsigned long wait_table_hash_nr_entries(
unsigned long pages)
3691 static inline unsigned long wait_table_bits(
unsigned long size)
3696 #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
3701 static int pageblock_is_reserved(
unsigned long start_pfn,
unsigned long end_pfn)
3705 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
3719 static void setup_zone_migrate_reserve(
struct zone *zone)
3721 unsigned long start_pfn, pfn, end_pfn, block_end_pfn;
3723 unsigned long block_migratetype;
3745 reserve =
min(2, reserve);
3753 if (page_to_nid(page) != zone_to_nid(zone))
3756 block_migratetype = get_pageblock_migratetype(page);
3765 if (pageblock_is_reserved(pfn, block_end_pfn))
3805 unsigned long end_pfn = start_pfn +
size;
3813 for (pfn = start_pfn; pfn < end_pfn; pfn++) {
3826 set_page_links(page, zone, nid, pfn);
3827 mminit_verify_page_links(page, zone, nid, pfn);
3828 init_page_count(page);
3829 reset_page_mapcount(page);
3830 SetPageReserved(page);
3850 INIT_LIST_HEAD(&page->
lru);
3851 #ifdef WANT_PAGE_VIRTUAL
3853 if (!is_highmem_idx(zone))
3859 static void __meminit zone_init_free_lists(
struct zone *zone)
3863 INIT_LIST_HEAD(&zone->
free_area[order].free_list[t]);
3868 #ifndef __HAVE_ARCH_MEMMAP_INIT
3869 #define memmap_init(size, nid, zone, start_pfn) \
3870 memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY)
3873 static int __meminit zone_batchsize(
struct zone *zone)
3923 static void setup_pageset(
struct per_cpu_pageset *p,
unsigned long batch)
3928 memset(p, 0,
sizeof(*p));
3935 INIT_LIST_HEAD(&pcp->
lists[migratetype]);
3955 static void __meminit setup_zone_pageset(
struct zone *zone)
3964 setup_pageset(pcp, zone_batchsize(zone));
3967 setup_pagelist_highmark(pcp,
3982 setup_zone_pageset(zone);
3986 int zone_wait_table_init(
struct zone *zone,
unsigned long zone_size_pages)
3997 wait_table_hash_nr_entries(zone_size_pages);
4028 static __meminit void zone_pcp_init(
struct zone *zone)
4035 zone->
pageset = &boot_pageset;
4040 zone_batchsize(zone));
4044 unsigned long zone_start_pfn,
4050 ret = zone_wait_table_init(zone, size);
4058 "Initialising map node %d zone %lu pfns %lu -> %lu\n",
4061 zone_start_pfn, (zone_start_pfn + size));
4063 zone_init_free_lists(zone);
4068 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4069 #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
4076 int __meminit __early_pfn_to_nid(
unsigned long pfn)
4078 unsigned long start_pfn, end_pfn;
4081 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4082 if (start_pfn <= pfn && pfn < end_pfn)
4089 int __meminit early_pfn_to_nid(
unsigned long pfn)
4093 nid = __early_pfn_to_nid(pfn);
4100 #ifdef CONFIG_NODES_SPAN_OTHER_NODES
4105 nid = __early_pfn_to_nid(pfn);
4106 if (nid >= 0 && nid != node)
4121 void __init free_bootmem_with_active_regions(
int nid,
unsigned long max_low_pfn)
4123 unsigned long start_pfn, end_pfn;
4126 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) {
4127 start_pfn =
min(start_pfn, max_low_pfn);
4128 end_pfn =
min(end_pfn, max_low_pfn);
4130 if (start_pfn < end_pfn)
4145 void __init sparse_memory_present_with_active_regions(
int nid)
4147 unsigned long start_pfn, end_pfn;
4150 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
4165 void __meminit get_pfn_range_for_nid(
unsigned int nid,
4166 unsigned long *start_pfn,
unsigned long *end_pfn)
4168 unsigned long this_start_pfn, this_end_pfn;
4174 for_each_mem_pfn_range(i, nid, &this_start_pfn, &this_end_pfn,
NULL) {
4175 *start_pfn =
min(*start_pfn, this_start_pfn);
4176 *end_pfn =
max(*end_pfn, this_end_pfn);
4179 if (*start_pfn == -1
UL)
4188 static void __init find_usable_zone_for_movable(
void)
4191 for (zone_index = MAX_NR_ZONES - 1; zone_index >= 0; zone_index--) {
4195 if (arch_zone_highest_possible_pfn[zone_index] >
4196 arch_zone_lowest_possible_pfn[zone_index])
4201 movable_zone = zone_index;
4214 static void __meminit adjust_zone_range_for_zone_movable(
int nid,
4215 unsigned long zone_type,
4218 unsigned long *zone_start_pfn,
4219 unsigned long *zone_end_pfn)
4222 if (zone_movable_pfn[nid]) {
4225 *zone_start_pfn = zone_movable_pfn[nid];
4226 *zone_end_pfn =
min(node_end_pfn,
4227 arch_zone_highest_possible_pfn[movable_zone]);
4230 }
else if (*zone_start_pfn < zone_movable_pfn[nid] &&
4231 *zone_end_pfn > zone_movable_pfn[nid]) {
4232 *zone_end_pfn = zone_movable_pfn[nid];
4235 }
else if (*zone_start_pfn >= zone_movable_pfn[nid])
4236 *zone_start_pfn = *zone_end_pfn;
4244 static unsigned long __meminit zone_spanned_pages_in_node(
int nid,
4245 unsigned long zone_type,
4249 unsigned long zone_start_pfn, zone_end_pfn;
4252 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
4253 zone_start_pfn = arch_zone_lowest_possible_pfn[
zone_type];
4254 zone_end_pfn = arch_zone_highest_possible_pfn[
zone_type];
4255 adjust_zone_range_for_zone_movable(nid, zone_type,
4256 node_start_pfn, node_end_pfn,
4257 &zone_start_pfn, &zone_end_pfn);
4260 if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
4264 zone_end_pfn =
min(zone_end_pfn, node_end_pfn);
4265 zone_start_pfn =
max(zone_start_pfn, node_start_pfn);
4268 return zone_end_pfn - zone_start_pfn;
4275 unsigned long __meminit __absent_pages_in_range(
int nid,
4276 unsigned long range_start_pfn,
4277 unsigned long range_end_pfn)
4279 unsigned long nr_absent = range_end_pfn - range_start_pfn;
4280 unsigned long start_pfn, end_pfn;
4283 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn,
NULL) {
4284 start_pfn =
clamp(start_pfn, range_start_pfn, range_end_pfn);
4285 end_pfn =
clamp(end_pfn, range_start_pfn, range_end_pfn);
4286 nr_absent -= end_pfn - start_pfn;
4298 unsigned long __init absent_pages_in_range(
unsigned long start_pfn,
4299 unsigned long end_pfn)
4301 return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn);
4305 static unsigned long __meminit zone_absent_pages_in_node(
int nid,
4306 unsigned long zone_type,
4307 unsigned long *ignored)
4309 unsigned long zone_low = arch_zone_lowest_possible_pfn[
zone_type];
4310 unsigned long zone_high = arch_zone_highest_possible_pfn[
zone_type];
4312 unsigned long zone_start_pfn, zone_end_pfn;
4314 get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
4315 zone_start_pfn =
clamp(node_start_pfn, zone_low, zone_high);
4316 zone_end_pfn =
clamp(node_end_pfn, zone_low, zone_high);
4318 adjust_zone_range_for_zone_movable(nid, zone_type,
4319 node_start_pfn, node_end_pfn,
4320 &zone_start_pfn, &zone_end_pfn);
4321 return __absent_pages_in_range(nid, zone_start_pfn, zone_end_pfn);
4325 static inline unsigned long __meminit zone_spanned_pages_in_node(
int nid,
4326 unsigned long zone_type,
4327 unsigned long *zones_size)
4332 static inline unsigned long __meminit zone_absent_pages_in_node(
int nid,
4333 unsigned long zone_type,
4334 unsigned long *zholes_size)
4345 unsigned long *zones_size,
unsigned long *zholes_size)
4347 unsigned long realtotalpages, totalpages = 0;
4350 for (i = 0; i < MAX_NR_ZONES; i++)
4351 totalpages += zone_spanned_pages_in_node(pgdat->
node_id, i,
4355 realtotalpages = totalpages;
4356 for (i = 0; i < MAX_NR_ZONES; i++)
4358 zone_absent_pages_in_node(pgdat->
node_id, i,
4365 #ifndef CONFIG_SPARSEMEM
4375 unsigned long usemapsize;
4380 usemapsize =
roundup(usemapsize, 8 *
sizeof(
unsigned long));
4382 return usemapsize / 8;
4386 struct zone *zone,
unsigned long zonesize)
4395 static inline void setup_usemap(
struct pglist_data *pgdat,
4396 struct zone *zone,
unsigned long zonesize) {}
4399 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
4407 if (pageblock_order)
4413 order = MAX_ORDER - 1;
4420 pageblock_order =
order;
4445 unsigned long *zones_size,
unsigned long *zholes_size)
4452 pgdat_resize_init(pgdat);
4457 for (j = 0; j < MAX_NR_ZONES; j++) {
4459 unsigned long size, realsize, memmap_pages;
4461 size = zone_spanned_pages_in_node(nid, j, zones_size);
4462 realsize = size - zone_absent_pages_in_node(nid, j,
4472 if (realsize >= memmap_pages) {
4473 realsize -= memmap_pages;
4476 " %s zone: %lu pages used for memmap\n",
4477 zone_names[j], memmap_pages);
4480 " %s zone: %lu pages exceeds realsize %lu\n",
4481 zone_names[j], memmap_pages, realsize);
4484 if (j == 0 && realsize > dma_reserve) {
4485 realsize -= dma_reserve;
4487 zone_names[0], dma_reserve);
4490 if (!is_highmem_idx(j))
4491 nr_kernel_pages += realsize;
4492 nr_all_pages += realsize;
4498 zone->min_unmapped_pages = (realsize*sysctl_min_unmapped_ratio)
4500 zone->min_slab_pages = (realsize * sysctl_min_slab_ratio) / 100;
4502 zone->
name = zone_names[
j];
4505 zone_seqlock_init(zone);
4508 zone_pcp_init(zone);
4514 setup_usemap(pgdat, zone, size);
4519 zone_start_pfn +=
size;
4529 #ifdef CONFIG_FLAT_NODE_MEM_MAP
4531 if (!pgdat->node_mem_map) {
4543 size = (end -
start) *
sizeof(
struct page);
4549 #ifndef CONFIG_NEED_MULTIPLE_NODES
4555 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4565 unsigned long node_start_pfn,
unsigned long *zholes_size)
4574 init_zone_allows_reclaim(nid);
4575 calculate_node_totalpages(pgdat, zones_size, zholes_size);
4577 alloc_node_mem_map(pgdat);
4578 #ifdef CONFIG_FLAT_NODE_MEM_MAP
4579 printk(
KERN_DEBUG "free_area_init_node: node %d, pgdat %08lx, node_mem_map %08lx\n",
4580 nid, (
unsigned long)pgdat,
4581 (
unsigned long)pgdat->node_mem_map);
4584 free_area_init_core(pgdat, zones_size, zholes_size);
4587 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
4589 #if MAX_NUMNODES > 1
4593 static void __init setup_nr_node_ids(
void)
4603 static inline void setup_nr_node_ids(
void)
4627 unsigned long __init node_map_pfn_alignment(
void)
4629 unsigned long accl_mask = 0, last_end = 0;
4634 for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, &nid) {
4635 if (!start || last_nid < 0 || last_nid == nid) {
4646 mask = ~((1 <<
__ffs(start)) - 1);
4647 while (mask && last_end <= (start & (mask << 1)))
4655 return ~accl_mask + 1;
4659 static unsigned long __init find_min_pfn_for_node(
int nid)
4662 unsigned long start_pfn;
4665 for_each_mem_pfn_range(i, nid, &start_pfn,
NULL,
NULL)
4666 min_pfn = min(min_pfn, start_pfn);
4670 "Could not find start_pfn for node %d\n", nid);
4683 unsigned long __init find_min_pfn_with_active_regions(
void)
4685 return find_min_pfn_for_node(MAX_NUMNODES);
4693 static unsigned long __init early_calculate_totalpages(
void)
4695 unsigned long totalpages = 0;
4696 unsigned long start_pfn, end_pfn;
4699 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
4700 unsigned long pages = end_pfn - start_pfn;
4702 totalpages +=
pages;
4715 static void __init find_zone_movable_pfns_for_nodes(
void)
4718 unsigned long usable_startpfn;
4719 unsigned long kernelcore_node, kernelcore_remaining;
4722 unsigned long totalpages = early_calculate_totalpages();
4733 if (required_movablecore) {
4734 unsigned long corepages;
4740 required_movablecore =
4742 corepages = totalpages - required_movablecore;
4744 required_kernelcore =
max(required_kernelcore, corepages);
4748 if (!required_kernelcore)
4752 find_usable_zone_for_movable();
4753 usable_startpfn = arch_zone_lowest_possible_pfn[movable_zone];
4757 kernelcore_node = required_kernelcore / usable_nodes;
4759 unsigned long start_pfn, end_pfn;
4766 if (required_kernelcore < kernelcore_node)
4767 kernelcore_node = required_kernelcore / usable_nodes;
4774 kernelcore_remaining = kernelcore_node;
4777 for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn,
NULL) {
4778 unsigned long size_pages;
4780 start_pfn =
max(start_pfn, zone_movable_pfn[nid]);
4781 if (start_pfn >= end_pfn)
4785 if (start_pfn < usable_startpfn) {
4786 unsigned long kernel_pages;
4787 kernel_pages =
min(end_pfn, usable_startpfn)
4790 kernelcore_remaining -=
min(kernel_pages,
4791 kernelcore_remaining);
4792 required_kernelcore -=
min(kernel_pages,
4793 required_kernelcore);
4796 if (end_pfn <= usable_startpfn) {
4804 zone_movable_pfn[nid] = end_pfn;
4807 start_pfn = usable_startpfn;
4815 size_pages = end_pfn - start_pfn;
4816 if (size_pages > kernelcore_remaining)
4817 size_pages = kernelcore_remaining;
4818 zone_movable_pfn[nid] = start_pfn + size_pages;
4825 required_kernelcore -=
min(required_kernelcore,
4827 kernelcore_remaining -= size_pages;
4828 if (!kernelcore_remaining)
4840 if (usable_nodes && required_kernelcore > usable_nodes)
4845 zone_movable_pfn[nid] =
4856 #ifdef CONFIG_HIGHMEM
4859 for (zone_type = 0; zone_type <=
ZONE_NORMAL; zone_type++) {
4882 void __init free_area_init_nodes(
unsigned long *max_zone_pfn)
4884 unsigned long start_pfn, end_pfn;
4888 memset(arch_zone_lowest_possible_pfn, 0,
4889 sizeof(arch_zone_lowest_possible_pfn));
4890 memset(arch_zone_highest_possible_pfn, 0,
4891 sizeof(arch_zone_highest_possible_pfn));
4892 arch_zone_lowest_possible_pfn[0] = find_min_pfn_with_active_regions();
4893 arch_zone_highest_possible_pfn[0] = max_zone_pfn[0];
4894 for (i = 1; i < MAX_NR_ZONES; i++) {
4897 arch_zone_lowest_possible_pfn[
i] =
4898 arch_zone_highest_possible_pfn[i-1];
4899 arch_zone_highest_possible_pfn[
i] =
4900 max(max_zone_pfn[i], arch_zone_lowest_possible_pfn[i]);
4906 memset(zone_movable_pfn, 0,
sizeof(zone_movable_pfn));
4907 find_zone_movable_pfns_for_nodes();
4910 printk(
"Zone ranges:\n");
4911 for (i = 0; i < MAX_NR_ZONES; i++) {
4915 if (arch_zone_lowest_possible_pfn[i] ==
4916 arch_zone_highest_possible_pfn[i])
4920 arch_zone_lowest_possible_pfn[i] <<
PAGE_SHIFT,
4921 (arch_zone_highest_possible_pfn[i]
4926 printk(
"Movable zone start for each node\n");
4928 if (zone_movable_pfn[i])
4929 printk(
" Node %d: %#010lx\n", i,
4934 printk(
"Early memory node ranges\n");
4935 for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
4936 printk(" node %3
d: [
mem %
#010lx-%#010lx]\n", nid,
4940 mminit_verify_pageflags_layout();
4941 setup_nr_node_ids();
4945 find_min_pfn_for_node(nid),
NULL);
4950 check_for_regular_memory(pgdat);
4954 static int __init cmdline_parse_core(
char *p,
unsigned long *
core)
4956 unsigned long long coremem;
4973 static int __init cmdline_parse_kernelcore(
char *p)
4975 return cmdline_parse_core(p, &required_kernelcore);
4982 static int __init cmdline_parse_movablecore(
char *p)
4984 return cmdline_parse_core(p, &required_movablecore);
4987 early_param(
"kernelcore", cmdline_parse_kernelcore);
4988 early_param(
"movablecore", cmdline_parse_movablecore);
5005 dma_reserve = new_dma_reserve;
5015 unsigned long action,
void *hcpu)
5017 int cpu = (
unsigned long)hcpu;
5029 vm_events_fold_cpu(cpu);
5038 refresh_cpu_vm_stats(cpu);
5052 static void calculate_totalreserve_pages(
void)
5055 unsigned long reserve_pages = 0;
5056 enum zone_type
i,
j;
5059 for (i = 0; i < MAX_NR_ZONES; i++) {
5061 unsigned long max = 0;
5064 for (j = i; j < MAX_NR_ZONES; j++) {
5074 reserve_pages +=
max;
5088 totalreserve_pages = reserve_pages;
5097 static void setup_per_zone_lowmem_reserve(
void)
5100 enum zone_type
j,
idx;
5103 for (j = 0; j < MAX_NR_ZONES; j++) {
5111 struct zone *lower_zone;
5115 if (sysctl_lowmem_reserve_ratio[idx] < 1)
5116 sysctl_lowmem_reserve_ratio[
idx] = 1;
5120 sysctl_lowmem_reserve_ratio[
idx];
5127 calculate_totalreserve_pages();
5130 static void __setup_per_zone_wmarks(
void)
5132 unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
5133 unsigned long lowmem_pages = 0;
5135 unsigned long flags;
5139 if (!is_highmem(zone))
5148 do_div(tmp, lowmem_pages);
5149 if (is_highmem(zone)) {
5162 if (min_pages < SWAP_CLUSTER_MAX)
5163 min_pages = SWAP_CLUSTER_MAX;
5164 if (min_pages > 128)
5182 setup_zone_migrate_reserve(zone);
5183 spin_unlock_irqrestore(&zone->
lock, flags);
5187 calculate_totalreserve_pages();
5200 __setup_per_zone_wmarks();
5225 static void __meminit calculate_zone_inactive_ratio(
struct zone *zone)
5227 unsigned int gb,
ratio;
5239 static void __meminit setup_per_zone_inactive_ratio(
void)
5244 calculate_zone_inactive_ratio(zone);
5273 unsigned long lowmem_kbytes;
5277 min_free_kbytes =
int_sqrt(lowmem_kbytes * 16);
5278 if (min_free_kbytes < 128)
5279 min_free_kbytes = 128;
5280 if (min_free_kbytes > 65536)
5281 min_free_kbytes = 65536;
5283 refresh_zone_stat_thresholds();
5284 setup_per_zone_lowmem_reserve();
5285 setup_per_zone_inactive_ratio();
5296 void __user *buffer,
size_t *length, loff_t *ppos)
5306 void __user *buffer,
size_t *length, loff_t *ppos)
5317 sysctl_min_unmapped_ratio) / 100;
5322 void __user *buffer,
size_t *length, loff_t *ppos)
5333 sysctl_min_slab_ratio) / 100;
5348 void __user *buffer,
size_t *length, loff_t *ppos)
5351 setup_per_zone_lowmem_reserve();
5362 void __user *buffer,
size_t *length, loff_t *ppos)
5369 if (!write || (ret < 0))
5375 setup_pagelist_highmark(
5385 static int __init set_hashdist(
char *
str)
5392 __setup(
"hashdist=", set_hashdist);
5402 unsigned long bucketsize,
5403 unsigned long numentries,
5406 unsigned int *_hash_shift,
5407 unsigned int *_hash_mask,
5408 unsigned long low_limit,
5409 unsigned long high_limit)
5411 unsigned long long max = high_limit;
5412 unsigned long log2qty,
size;
5418 numentries = nr_kernel_pages;
5424 if (scale > PAGE_SHIFT)
5427 numentries <<= (PAGE_SHIFT - scale);
5433 if (!(numentries >> *_hash_shift)) {
5434 numentries = 1
UL << *_hash_shift;
5444 max = ((
unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4;
5447 max =
min(max, 0x80000000ULL);
5449 if (numentries < low_limit)
5450 numentries = low_limit;
5451 if (numentries > max)
5454 log2qty =
ilog2(numentries);
5457 size = bucketsize << log2qty;
5473 }
while (!table && size >
PAGE_SIZE && --log2qty);
5476 panic(
"Failed to allocate %s hash table\n", tablename);
5478 printk(
KERN_INFO "%s hash table entries: %ld (order: %d, %lu bytes)\n",
5481 ilog2(size) - PAGE_SHIFT,
5485 *_hash_shift = log2qty;
5487 *_hash_mask = (1 << log2qty) - 1;
5493 static inline unsigned long *get_pageblock_bitmap(
struct zone *zone,
5496 #ifdef CONFIG_SPARSEMEM
5497 return __pfn_to_section(pfn)->pageblock_flags;
5503 static inline int pfn_to_bitidx(
struct zone *zone,
unsigned long pfn)
5505 #ifdef CONFIG_SPARSEMEM
5506 pfn &= (PAGES_PER_SECTION-1);
5522 int start_bitidx,
int end_bitidx)
5526 unsigned long pfn, bitidx;
5527 unsigned long flags = 0;
5528 unsigned long value = 1;
5530 zone = page_zone(page);
5532 bitmap = get_pageblock_bitmap(zone, pfn);
5533 bitidx = pfn_to_bitidx(zone, pfn);
5535 for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
5536 if (
test_bit(bitidx + start_bitidx, bitmap))
5550 int start_bitidx,
int end_bitidx)
5554 unsigned long pfn, bitidx;
5555 unsigned long value = 1;
5557 zone = page_zone(page);
5559 bitmap = get_pageblock_bitmap(zone, pfn);
5560 bitidx = pfn_to_bitidx(zone, pfn);
5564 for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
5566 __set_bit(bitidx + start_bitidx, bitmap);
5581 unsigned long pfn, iter, found;
5590 mt = get_pageblock_migratetype(page);
5596 unsigned long check = pfn + iter;
5609 if (PageBuddy(page))
5610 iter += (1 << page_order(page)) - 1;
5650 zone = page_zone(page);
5661 static unsigned long pfn_max_align_down(
unsigned long pfn)
5667 static unsigned long pfn_max_align_up(
unsigned long pfn)
5674 static int __alloc_contig_migrate_range(
struct compact_control *
cc,
5675 unsigned long start,
unsigned long end)
5678 unsigned long nr_reclaimed;
5679 unsigned long pfn =
start;
5680 unsigned int tries = 0;
5685 while (pfn < end || !list_empty(&cc->migratepages)) {
5686 if (fatal_signal_pending(
current)) {
5691 if (list_empty(&cc->migratepages)) {
5692 cc->nr_migratepages = 0;
5693 pfn = isolate_migratepages_range(cc->zone, cc,
5700 }
else if (++tries == 5) {
5701 ret = ret < 0 ? ret : -
EBUSY;
5707 cc->nr_migratepages -= nr_reclaimed;
5715 return ret > 0 ? 0 :
ret;
5721 static inline void __update_cma_watermarks(
struct zone *zone,
int count)
5723 unsigned long flags;
5725 zone->min_cma_pages +=
count;
5726 spin_unlock_irqrestore(&zone->
lock, flags);
5735 static int __reclaim_pages(
struct zone *zone,
gfp_t gfp_mask,
int count)
5737 enum zone_type high_zoneidx = gfp_zone(gfp_mask);
5738 struct zonelist *zonelist = node_zonelist(0, gfp_mask);
5739 int did_some_progress = 0;
5746 __update_cma_watermarks(zone, count);
5750 wake_all_kswapd(order, zonelist, high_zoneidx,
zone_idx(zone));
5752 did_some_progress = __perform_reclaim(gfp_mask, order, zonelist,
5754 if (!did_some_progress) {
5761 __update_cma_watermarks(zone, -count);
5786 int alloc_contig_range(
unsigned long start,
unsigned long end,
5787 unsigned migratetype)
5789 struct zone *zone = page_zone(
pfn_to_page(start));
5790 unsigned long outer_start, outer_end;
5793 struct compact_control cc = {
5794 .nr_migratepages = 0,
5798 .ignore_skip_hint =
true,
5800 INIT_LIST_HEAD(&cc.migratepages);
5827 pfn_max_align_up(end), migratetype);
5831 ret = __alloc_contig_migrate_range(&cc, start, end);
5856 outer_start =
start;
5858 if (++order >= MAX_ORDER) {
5867 pr_warn(
"alloc_contig_range test_pages_isolated(%lx, %lx) failed\n",
5880 outer_end = isolate_freepages_range(&cc, outer_start, end);
5887 if (start != outer_start)
5888 free_contig_range(outer_start, start - outer_start);
5889 if (end != outer_end)
5890 free_contig_range(end, outer_end - end);
5894 pfn_max_align_up(end), migratetype);
5898 void free_contig_range(
unsigned long pfn,
unsigned nr_pages)
5900 for (; nr_pages--; ++pfn)
5905 #ifdef CONFIG_MEMORY_HOTPLUG
5906 static int __meminit __zone_pcp_update(
void *data)
5908 struct zone *zone =
data;
5910 unsigned long batch = zone_batchsize(zone),
flags;
5921 free_pcppages_bulk(zone, pcp->
count, pcp);
5922 drain_zonestat(zone, pset);
5923 setup_pageset(pset, batch);
5929 void __meminit zone_pcp_update(
struct zone *zone)
5931 stop_machine(__zone_pcp_update, zone,
NULL);
5935 #ifdef CONFIG_MEMORY_HOTREMOVE
5936 void zone_pcp_reset(
struct zone *zone)
5938 unsigned long flags;
5944 if (zone->
pageset != &boot_pageset) {
5947 drain_zonestat(zone, pset);
5950 zone->
pageset = &boot_pageset;
5959 __offline_isolated_pages(
unsigned long start_pfn,
unsigned long end_pfn)
5965 unsigned long flags;
5967 for (pfn = start_pfn; pfn < end_pfn; pfn++)
5975 while (pfn < end_pfn) {
5981 BUG_ON(page_count(page));
5982 BUG_ON(!PageBuddy(page));
5983 order = page_order(page);
5984 #ifdef CONFIG_DEBUG_VM
5986 pfn, 1 << order, end_pfn);
5989 rmv_page_order(page);
5993 for (i = 0; i < (1 <<
order); i++)
5994 SetPageReserved((page+i));
5995 pfn += (1 <<
order);
5997 spin_unlock_irqrestore(&zone->
lock, flags);
6001 #ifdef CONFIG_MEMORY_FAILURE
6002 bool is_free_buddy_page(
struct page *page)
6004 struct zone *zone = page_zone(page);
6006 unsigned long flags;
6010 for (order = 0; order <
MAX_ORDER; order++) {
6011 struct page *page_head = page - (pfn & ((1 <<
order) - 1));
6013 if (PageBuddy(page_head) && page_order(page_head) >=
order)
6016 spin_unlock_irqrestore(&zone->
lock, flags);
6037 #ifdef CONFIG_PAGEFLAGS_EXTENDED
6038 {1
UL << PG_head,
"head" },
6039 {1
UL << PG_tail,
"tail" },
6049 {1
UL << PG_mlocked,
"mlocked" },
6051 #ifdef CONFIG_ARCH_USES_PG_UNCACHED
6052 {1
UL << PG_uncached,
"uncached" },
6054 #ifdef CONFIG_MEMORY_FAILURE
6055 {1
UL << PG_hwpoison,
"hwpoison" },
6057 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
6058 {1
UL << PG_compound_lock,
"compound_lock" },
6062 static void dump_page_flags(
unsigned long flags)
6064 const char *delim =
"";
6073 flags &= (1
UL << NR_PAGEFLAGS) - 1;
6077 mask = pageflag_names[
i].
mask;
6078 if ((flags & mask) !=
mask)
6082 printk(
"%s%s", delim, pageflag_names[i].
name);
6088 printk(
"%s%#lx", delim, flags);
6096 "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
6099 dump_page_flags(page->
flags);
6100 mem_cgroup_print_bad_page(page);