5 #include <linux/list.h>
7 #include <linux/module.h>
10 #include <linux/sysctl.h>
15 #include <linux/mempolicy.h>
20 #include <linux/slab.h>
26 #include <asm/pgtable.h>
47 static unsigned long __initdata default_hstate_max_huge_pages;
48 static unsigned long __initdata default_hstate_size;
55 static inline void unlock_or_release_subpool(
struct hugepage_subpool *spool)
57 bool free = (spool->count == 0) && (spool->used_hpages == 0);
59 spin_unlock(&spool->lock);
69 struct hugepage_subpool *spool;
77 spool->max_hpages = nr_blocks;
78 spool->used_hpages = 0;
85 spin_lock(&spool->lock);
88 unlock_or_release_subpool(spool);
91 static int hugepage_subpool_get_pages(
struct hugepage_subpool *spool,
99 spin_lock(&spool->lock);
100 if ((spool->used_hpages + delta) <= spool->max_hpages) {
101 spool->used_hpages +=
delta;
105 spin_unlock(&spool->lock);
110 static void hugepage_subpool_put_pages(
struct hugepage_subpool *spool,
116 spin_lock(&spool->lock);
117 spool->used_hpages -=
delta;
120 unlock_or_release_subpool(spool);
123 static inline struct hugepage_subpool *subpool_inode(
struct inode *
inode)
125 return HUGETLBFS_SB(inode->
i_sb)->spool;
128 static inline struct hugepage_subpool *subpool_vma(
struct vm_area_struct *vma)
130 return subpool_inode(vma->
vm_file->f_dentry->d_inode);
169 if (&rg->
link == head)
208 INIT_LIST_HEAD(&nrg->
link);
209 list_add(&nrg->
link, rg->
link.prev);
221 if (&rg->
link == head)
238 static long region_truncate(
struct list_head *head,
long end)
251 if (end > rg->from) {
259 if (&rg->
link == head)
268 static long region_count(
struct list_head *head,
long f,
long t)
284 seg_to =
min(rg->
to, t);
286 chg += seg_to - seg_from;
304 unsigned long address)
306 return vma_hugecache_offset(
hstate_vma(vma), vma, address);
317 if (!is_vm_hugetlb_page(vma))
332 #ifndef vma_mmu_pagesize
344 #define HPAGE_RESV_OWNER (1UL << 0)
345 #define HPAGE_RESV_UNMAPPED (1UL << 1)
346 #define HPAGE_RESV_MASK (HPAGE_RESV_OWNER | HPAGE_RESV_UNMAPPED)
367 static unsigned long get_vma_private_data(
struct vm_area_struct *vma)
383 static struct resv_map *resv_map_alloc(
void)
389 kref_init(&resv_map->
refs);
390 INIT_LIST_HEAD(&resv_map->
regions);
395 static void resv_map_release(
struct kref *ref)
400 region_truncate(&resv_map->
regions, 0);
408 return (
struct resv_map *)(get_vma_private_data(vma) &
418 set_vma_private_data(vma, (get_vma_private_data(vma) &
427 set_vma_private_data(vma, get_vma_private_data(vma) | flags);
434 return (get_vma_private_data(vma) & flag) != 0;
438 static void decrement_hugepage_resv_vma(
struct hstate *h,
446 h->resv_huge_pages--;
452 h->resv_huge_pages--;
474 static void copy_gigantic_page(
struct page *
dst,
struct page *
src)
477 struct hstate *h = page_hstate(src);
481 for (i = 0; i < pages_per_huge_page(h); ) {
483 copy_highpage(dst, src);
486 dst = mem_map_next(dst, dst_base, i);
487 src = mem_map_next(src, src_base, i);
494 struct hstate *h = page_hstate(src);
497 copy_gigantic_page(dst, src);
502 for (i = 0; i < pages_per_huge_page(h); i++) {
504 copy_highpage(dst + i, src + i);
508 static void enqueue_huge_page(
struct hstate *h,
struct page *
page)
510 int nid = page_to_nid(page);
511 list_move(&page->
lru, &h->hugepage_freelists[nid]);
512 h->free_huge_pages++;
513 h->free_huge_pages_node[nid]++;
516 static struct page *dequeue_huge_page_node(
struct hstate *h,
int nid)
520 if (list_empty(&h->hugepage_freelists[nid]))
522 page =
list_entry(h->hugepage_freelists[nid].next,
struct page,
lru);
523 list_move(&page->
lru, &h->hugepage_activelist);
524 set_page_refcounted(page);
525 h->free_huge_pages--;
526 h->free_huge_pages_node[nid]--;
530 static struct page *dequeue_huge_page_vma(
struct hstate *h,
532 unsigned long address,
int avoid_reserve)
534 struct page *page =
NULL;
540 unsigned int cpuset_mems_cookie;
543 cpuset_mems_cookie = get_mems_allowed();
544 zonelist = huge_zonelist(vma, address,
545 htlb_alloc_mask, &mpol, &nodemask);
551 if (!vma_has_reserves(vma) &&
552 h->free_huge_pages - h->resv_huge_pages == 0)
556 if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
560 MAX_NR_ZONES - 1, nodemask) {
561 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
562 page = dequeue_huge_page_node(h, zone_to_nid(zone));
565 decrement_hugepage_resv_vma(h, vma);
572 if (
unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
581 static void update_and_free_page(
struct hstate *h,
struct page *page)
588 h->nr_huge_pages_node[page_to_nid(page)]--;
589 for (i = 0; i < pages_per_huge_page(h); i++) {
595 VM_BUG_ON(hugetlb_cgroup_from_page(page));
596 set_compound_page_dtor(page,
NULL);
597 set_page_refcounted(page);
613 static void free_huge_page(
struct page *page)
619 struct hstate *h = page_hstate(page);
620 int nid = page_to_nid(page);
621 struct hugepage_subpool *spool =
622 (
struct hugepage_subpool *)page_private(page);
624 set_page_private(page, 0);
627 BUG_ON(page_mapcount(page));
629 spin_lock(&hugetlb_lock);
631 pages_per_huge_page(h), page);
635 update_and_free_page(h, page);
636 h->surplus_huge_pages--;
637 h->surplus_huge_pages_node[nid]--;
640 enqueue_huge_page(h, page);
642 spin_unlock(&hugetlb_lock);
643 hugepage_subpool_put_pages(spool, 1);
646 static void prep_new_huge_page(
struct hstate *h,
struct page *page,
int nid)
648 INIT_LIST_HEAD(&page->
lru);
649 set_compound_page_dtor(page, free_huge_page);
650 spin_lock(&hugetlb_lock);
651 set_hugetlb_cgroup(page,
NULL);
653 h->nr_huge_pages_node[nid]++;
654 spin_unlock(&hugetlb_lock);
658 static void prep_compound_gigantic_page(
struct page *page,
unsigned long order)
661 int nr_pages = 1 <<
order;
662 struct page *
p = page + 1;
665 set_compound_order(page, order);
667 for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
669 set_page_count(p, 0);
670 p->first_page =
page;
681 compound_page_dtor *dtor;
683 if (!PageCompound(page))
686 page = compound_head(page);
687 dtor = get_compound_page_dtor(page);
689 return dtor == free_huge_page;
693 static struct page *alloc_fresh_huge_page_node(
struct hstate *h,
int nid)
700 page = alloc_pages_exact_node(nid,
709 prep_new_huge_page(h, page, nid);
722 static int next_node_allowed(
int nid,
nodemask_t *nodes_allowed)
732 static int get_valid_node_allowed(
int nid,
nodemask_t *nodes_allowed)
735 nid = next_node_allowed(nid, nodes_allowed);
745 static int hstate_next_node_to_alloc(
struct hstate *h,
752 nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
753 h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
758 static int alloc_fresh_huge_page(
struct hstate *h,
nodemask_t *nodes_allowed)
765 start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
766 next_nid = start_nid;
769 page = alloc_fresh_huge_page_node(h, next_nid);
774 next_nid = hstate_next_node_to_alloc(h, nodes_allowed);
775 }
while (next_nid != start_nid);
778 count_vm_event(HTLB_BUDDY_PGALLOC);
780 count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
791 static int hstate_next_node_to_free(
struct hstate *h,
nodemask_t *nodes_allowed)
797 nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
798 h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
816 start_nid = hstate_next_node_to_free(h, nodes_allowed);
817 next_nid = start_nid;
824 if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) &&
825 !list_empty(&h->hugepage_freelists[next_nid])) {
827 list_entry(h->hugepage_freelists[next_nid].next,
830 h->free_huge_pages--;
831 h->free_huge_pages_node[next_nid]--;
833 h->surplus_huge_pages--;
834 h->surplus_huge_pages_node[next_nid]--;
836 update_and_free_page(h, page);
840 next_nid = hstate_next_node_to_free(h, nodes_allowed);
841 }
while (next_nid != start_nid);
846 static struct page *alloc_buddy_huge_page(
struct hstate *h,
int nid)
877 spin_lock(&hugetlb_lock);
878 if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
879 spin_unlock(&hugetlb_lock);
883 h->surplus_huge_pages++;
885 spin_unlock(&hugetlb_lock);
892 page = alloc_pages_exact_node(nid,
901 spin_lock(&hugetlb_lock);
903 INIT_LIST_HEAD(&page->
lru);
904 r_nid = page_to_nid(page);
905 set_compound_page_dtor(page, free_huge_page);
906 set_hugetlb_cgroup(page,
NULL);
910 h->nr_huge_pages_node[r_nid]++;
911 h->surplus_huge_pages_node[r_nid]++;
912 __count_vm_event(HTLB_BUDDY_PGALLOC);
915 h->surplus_huge_pages--;
916 __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
918 spin_unlock(&hugetlb_lock);
932 spin_lock(&hugetlb_lock);
933 page = dequeue_huge_page_node(h, nid);
934 spin_unlock(&hugetlb_lock);
937 page = alloc_buddy_huge_page(h, nid);
946 static int gather_surplus_pages(
struct hstate *h,
int delta)
951 int needed, allocated;
952 bool alloc_ok =
true;
954 needed = (h->resv_huge_pages +
delta) - h->free_huge_pages;
956 h->resv_huge_pages +=
delta;
961 INIT_LIST_HEAD(&surplus_list);
965 spin_unlock(&hugetlb_lock);
966 for (i = 0; i < needed; i++) {
972 list_add(&page->
lru, &surplus_list);
980 spin_lock(&hugetlb_lock);
981 needed = (h->resv_huge_pages +
delta) -
982 (h->free_huge_pages + allocated);
1001 needed += allocated;
1002 h->resv_huge_pages +=
delta;
1013 put_page_testzero(page);
1015 enqueue_huge_page(h, page);
1018 spin_unlock(&hugetlb_lock);
1021 if (!list_empty(&surplus_list)) {
1026 spin_lock(&hugetlb_lock);
1037 static void return_unused_surplus_pages(
struct hstate *h,
1038 unsigned long unused_resv_pages)
1040 unsigned long nr_pages;
1043 h->resv_huge_pages -= unused_resv_pages;
1049 nr_pages =
min(unused_resv_pages, h->surplus_huge_pages);
1059 while (nr_pages--) {
1075 static long vma_needs_reservation(
struct hstate *h,
1082 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
1083 return region_chg(&inode->
i_mapping->private_list,
1091 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
1092 struct resv_map *reservations = vma_resv_map(vma);
1094 err = region_chg(&reservations->
regions, idx, idx + 1);
1100 static void vma_commit_reservation(
struct hstate *h,
1104 struct inode *inode = mapping->
host;
1107 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
1108 region_add(&inode->
i_mapping->private_list, idx, idx + 1);
1111 pgoff_t idx = vma_hugecache_offset(h, vma, addr);
1112 struct resv_map *reservations = vma_resv_map(vma);
1115 region_add(&reservations->
regions, idx, idx + 1);
1120 unsigned long addr,
int avoid_reserve)
1122 struct hugepage_subpool *spool = subpool_vma(vma);
1138 chg = vma_needs_reservation(h, vma, addr);
1142 if (hugepage_subpool_get_pages(spool, chg))
1147 hugepage_subpool_put_pages(spool, chg);
1150 spin_lock(&hugetlb_lock);
1151 page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
1156 spin_unlock(&hugetlb_lock);
1158 spin_unlock(&hugetlb_lock);
1162 pages_per_huge_page(h),
1164 hugepage_subpool_put_pages(spool, chg);
1167 spin_lock(&hugetlb_lock);
1170 list_move(&page->
lru, &h->hugepage_activelist);
1171 spin_unlock(&hugetlb_lock);
1174 set_page_private(page, (
unsigned long)spool);
1176 vma_commit_reservation(h, vma, addr);
1182 struct huge_bootmem_page *
m;
1209 list_add(&m->list, &huge_boot_pages);
1214 static void prep_compound_huge_page(
struct page *page,
int order)
1217 prep_compound_gigantic_page(page, order);
1223 static void __init gather_bootmem_prealloc(
void)
1225 struct huge_bootmem_page *
m;
1228 struct hstate *h = m->hstate;
1231 #ifdef CONFIG_HIGHMEM
1234 sizeof(
struct huge_bootmem_page));
1238 __ClearPageReserved(page);
1239 WARN_ON(page_count(page) != 1);
1240 prep_compound_huge_page(page, h->order);
1241 prep_new_huge_page(h, page, page_to_nid(page));
1249 totalram_pages += 1 << h->order;
1253 static void __init hugetlb_hstate_alloc_pages(
struct hstate *h)
1257 for (i = 0; i < h->max_huge_pages; ++
i) {
1261 }
else if (!alloc_fresh_huge_page(h,
1265 h->max_huge_pages =
i;
1268 static void __init hugetlb_init_hstates(
void)
1272 for_each_hstate(h) {
1275 hugetlb_hstate_alloc_pages(h);
1279 static char *
__init memfmt(
char *
buf,
unsigned long n)
1281 if (n >= (1
UL << 30))
1282 sprintf(buf,
"%lu GB", n >> 30);
1283 else if (n >= (1
UL << 20))
1284 sprintf(buf,
"%lu MB", n >> 20);
1286 sprintf(buf,
"%lu KB", n >> 10);
1290 static void __init report_hugepages(
void)
1294 for_each_hstate(h) {
1297 "pre-allocated %ld pages\n",
1299 h->free_huge_pages);
1303 #ifdef CONFIG_HIGHMEM
1304 static void try_to_free_low(
struct hstate *h,
unsigned long count,
1314 struct list_head *freel = &h->hugepage_freelists[
i];
1316 if (count >= h->nr_huge_pages)
1318 if (PageHighMem(page))
1321 update_and_free_page(h, page);
1322 h->free_huge_pages--;
1323 h->free_huge_pages_node[page_to_nid(page)]--;
1328 static inline void try_to_free_low(
struct hstate *h,
unsigned long count,
1339 static int adjust_pool_surplus(
struct hstate *h,
nodemask_t *nodes_allowed,
1342 int start_nid, next_nid;
1348 start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
1350 start_nid = hstate_next_node_to_free(h, nodes_allowed);
1351 next_nid = start_nid;
1359 if (!h->surplus_huge_pages_node[nid]) {
1360 next_nid = hstate_next_node_to_alloc(h,
1369 if (h->surplus_huge_pages_node[nid] >=
1370 h->nr_huge_pages_node[nid]) {
1371 next_nid = hstate_next_node_to_free(h,
1377 h->surplus_huge_pages +=
delta;
1378 h->surplus_huge_pages_node[nid] +=
delta;
1381 }
while (next_nid != start_nid);
1386 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
1387 static unsigned long set_max_huge_pages(
struct hstate *h,
unsigned long count,
1390 unsigned long min_count,
ret;
1393 return h->max_huge_pages;
1406 spin_lock(&hugetlb_lock);
1408 if (!adjust_pool_surplus(h, nodes_allowed, -1))
1418 spin_unlock(&hugetlb_lock);
1419 ret = alloc_fresh_huge_page(h, nodes_allowed);
1420 spin_lock(&hugetlb_lock);
1444 min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
1445 min_count =
max(count, min_count);
1446 try_to_free_low(h, min_count, nodes_allowed);
1448 if (!free_pool_huge_page(h, nodes_allowed, 0))
1452 if (!adjust_pool_surplus(h, nodes_allowed, 1))
1457 spin_unlock(&hugetlb_lock);
1461 #define HSTATE_ATTR_RO(_name) \
1462 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
1464 #define HSTATE_ATTR(_name) \
1465 static struct kobj_attribute _name##_attr = \
1466 __ATTR(_name, 0644, _name##_show, _name##_store)
1468 static struct kobject *hugepages_kobj;
1471 static struct hstate *kobj_to_node_hstate(
struct kobject *kobj,
int *nidp);
1473 static struct hstate *kobj_to_hstate(
struct kobject *kobj,
int *nidp)
1478 if (hstate_kobjs[i] == kobj) {
1484 return kobj_to_node_hstate(kobj, nidp);
1491 unsigned long nr_huge_pages;
1494 h = kobj_to_hstate(kobj, &nid);
1496 nr_huge_pages = h->nr_huge_pages;
1498 nr_huge_pages = h->nr_huge_pages_node[nid];
1500 return sprintf(buf,
"%lu\n", nr_huge_pages);
1503 static ssize_t nr_hugepages_store_common(
bool obey_mempolicy,
1505 const char *buf,
size_t len)
1509 unsigned long count;
1517 h = kobj_to_hstate(kobj, &nid);
1527 if (!(obey_mempolicy &&
1528 init_nodemask_of_mempolicy(nodes_allowed))) {
1532 }
else if (nodes_allowed) {
1537 count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
1538 init_nodemask_of_node(nodes_allowed, nid);
1542 h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
1556 return nr_hugepages_show_common(kobj, attr, buf);
1562 return nr_hugepages_store_common(
false, kobj, attr, buf, len);
1575 return nr_hugepages_show_common(kobj, attr, buf);
1578 static ssize_t nr_hugepages_mempolicy_store(
struct kobject *kobj,
1581 return nr_hugepages_store_common(
true, kobj, attr, buf, len);
1587 static ssize_t nr_overcommit_hugepages_show(
struct kobject *kobj,
1590 struct hstate *h = kobj_to_hstate(kobj,
NULL);
1591 return sprintf(buf,
"%lu\n", h->nr_overcommit_huge_pages);
1594 static ssize_t nr_overcommit_hugepages_store(
struct kobject *kobj,
1598 unsigned long input;
1599 struct hstate *h = kobj_to_hstate(kobj,
NULL);
1608 spin_lock(&hugetlb_lock);
1609 h->nr_overcommit_huge_pages =
input;
1610 spin_unlock(&hugetlb_lock);
1620 unsigned long free_huge_pages;
1623 h = kobj_to_hstate(kobj, &nid);
1625 free_huge_pages = h->free_huge_pages;
1627 free_huge_pages = h->free_huge_pages_node[nid];
1629 return sprintf(buf,
"%lu\n", free_huge_pages);
1636 struct hstate *h = kobj_to_hstate(kobj,
NULL);
1637 return sprintf(buf,
"%lu\n", h->resv_huge_pages);
1645 unsigned long surplus_huge_pages;
1648 h = kobj_to_hstate(kobj, &nid);
1650 surplus_huge_pages = h->surplus_huge_pages;
1652 surplus_huge_pages = h->surplus_huge_pages_node[nid];
1654 return sprintf(buf,
"%lu\n", surplus_huge_pages);
1658 static struct attribute *hstate_attrs[] = {
1659 &nr_hugepages_attr.attr,
1660 &nr_overcommit_hugepages_attr.attr,
1661 &free_hugepages_attr.attr,
1662 &resv_hugepages_attr.attr,
1663 &surplus_hugepages_attr.attr,
1665 &nr_hugepages_mempolicy_attr.attr,
1671 .attrs = hstate_attrs,
1674 static int hugetlb_sysfs_add_hstate(
struct hstate *h,
struct kobject *parent,
1675 struct kobject **hstate_kobjs,
1682 if (!hstate_kobjs[hi])
1692 static void __init hugetlb_sysfs_init(
void)
1698 if (!hugepages_kobj)
1701 for_each_hstate(h) {
1702 err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
1703 hstate_kobjs, &hstate_attr_group);
1719 struct node_hstate {
1720 struct kobject *hugepages_kobj;
1728 static struct attribute *per_node_hstate_attrs[] = {
1729 &nr_hugepages_attr.attr,
1730 &free_hugepages_attr.attr,
1731 &surplus_hugepages_attr.attr,
1736 .
attrs = per_node_hstate_attrs,
1743 static struct hstate *kobj_to_node_hstate(
struct kobject *kobj,
int *nidp)
1748 struct node_hstate *nhs = &node_hstates[nid];
1751 if (nhs->hstate_kobjs[i] == kobj) {
1766 void hugetlb_unregister_node(
struct node *
node)
1769 struct node_hstate *nhs = &node_hstates[node->
dev.id];
1771 if (!nhs->hugepages_kobj)
1774 for_each_hstate(h) {
1776 if (nhs->hstate_kobjs[idx]) {
1778 nhs->hstate_kobjs[
idx] =
NULL;
1783 nhs->hugepages_kobj =
NULL;
1790 static void hugetlb_unregister_all_nodes(
void)
1797 register_hugetlbfs_with_node(
NULL,
NULL);
1810 void hugetlb_register_node(
struct node *node)
1813 struct node_hstate *nhs = &node_hstates[node->
dev.id];
1816 if (nhs->hugepages_kobj)
1821 if (!nhs->hugepages_kobj)
1824 for_each_hstate(h) {
1825 err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
1827 &per_node_hstate_attr_group);
1831 h->name, node->
dev.id);
1832 hugetlb_unregister_node(node);
1843 static void hugetlb_register_all_nodes(
void)
1849 if (node->
dev.id == nid)
1850 hugetlb_register_node(node);
1857 register_hugetlbfs_with_node(hugetlb_register_node,
1858 hugetlb_unregister_node);
1862 static struct hstate *kobj_to_node_hstate(
struct kobject *kobj,
int *nidp)
1870 static void hugetlb_unregister_all_nodes(
void) { }
1872 static void hugetlb_register_all_nodes(
void) { }
1876 static void __exit hugetlb_exit(
void)
1880 hugetlb_unregister_all_nodes();
1882 for_each_hstate(h) {
1890 static int __init hugetlb_init(
void)
1905 if (default_hstate_max_huge_pages)
1906 default_hstate.max_huge_pages = default_hstate_max_huge_pages;
1908 hugetlb_init_hstates();
1910 gather_bootmem_prealloc();
1914 hugetlb_sysfs_init();
1916 hugetlb_register_all_nodes();
1932 BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE);
1934 h = &
hstates[hugetlb_max_hstate++];
1936 h->mask = ~((1ULL << (order +
PAGE_SHIFT)) - 1);
1937 h->nr_huge_pages = 0;
1938 h->free_huge_pages = 0;
1940 INIT_LIST_HEAD(&h->hugepage_freelists[i]);
1941 INIT_LIST_HEAD(&h->hugepage_activelist);
1944 snprintf(h->name, HSTATE_NAME_LEN,
"hugepages-%lukB",
1957 static int __init hugetlb_nrpages_setup(
char *
s)
1960 static unsigned long *last_mhp;
1966 if (!hugetlb_max_hstate)
1967 mhp = &default_hstate_max_huge_pages;
1969 mhp = &parsed_hstate->max_huge_pages;
1971 if (mhp == last_mhp) {
1973 "interleaving hugepagesz=, ignoring\n");
1977 if (
sscanf(s,
"%lu", mhp) <= 0)
1985 if (hugetlb_max_hstate && parsed_hstate->order >=
MAX_ORDER)
1986 hugetlb_hstate_alloc_pages(parsed_hstate);
1992 __setup(
"hugepages=", hugetlb_nrpages_setup);
1994 static int __init hugetlb_default_setup(
char *s)
1996 default_hstate_size =
memparse(s, &s);
1999 __setup(
"default_hugepagesz=", hugetlb_default_setup);
2001 static unsigned int cpuset_mems_nr(
unsigned int *array)
2004 unsigned int nr = 0;
2012 #ifdef CONFIG_SYSCTL
2013 static int hugetlb_sysctl_handler_common(
bool obey_mempolicy,
2017 struct hstate *h = &default_hstate;
2021 tmp = h->max_huge_pages;
2035 if (!(obey_mempolicy &&
2036 init_nodemask_of_mempolicy(nodes_allowed))) {
2040 h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
2049 int hugetlb_sysctl_handler(
struct ctl_table *table,
int write,
2050 void __user *buffer,
size_t *length, loff_t *ppos)
2053 return hugetlb_sysctl_handler_common(
false, table, write,
2054 buffer, length, ppos);
2058 int hugetlb_mempolicy_sysctl_handler(
struct ctl_table *table,
int write,
2059 void __user *buffer,
size_t *length, loff_t *ppos)
2061 return hugetlb_sysctl_handler_common(
true, table, write,
2062 buffer, length, ppos);
2066 int hugetlb_treat_movable_handler(
struct ctl_table *table,
int write,
2067 void __user *buffer,
2068 size_t *length, loff_t *ppos)
2078 int hugetlb_overcommit_handler(
struct ctl_table *table,
int write,
2079 void __user *buffer,
2080 size_t *length, loff_t *ppos)
2082 struct hstate *h = &default_hstate;
2086 tmp = h->nr_overcommit_huge_pages;
2098 spin_lock(&hugetlb_lock);
2099 h->nr_overcommit_huge_pages =
tmp;
2100 spin_unlock(&hugetlb_lock);
2110 struct hstate *h = &default_hstate;
2112 "HugePages_Total: %5lu\n"
2113 "HugePages_Free: %5lu\n"
2114 "HugePages_Rsvd: %5lu\n"
2115 "HugePages_Surp: %5lu\n"
2116 "Hugepagesize: %8lu kB\n",
2120 h->surplus_huge_pages,
2126 struct hstate *h = &default_hstate;
2128 "Node %d HugePages_Total: %5u\n"
2129 "Node %d HugePages_Free: %5u\n"
2130 "Node %d HugePages_Surp: %5u\n",
2131 nid, h->nr_huge_pages_node[nid],
2132 nid, h->free_huge_pages_node[nid],
2133 nid, h->surplus_huge_pages_node[nid]);
2139 struct hstate *h = &default_hstate;
2140 return h->nr_huge_pages * pages_per_huge_page(h);
2143 static int hugetlb_acct_memory(
struct hstate *h,
long delta)
2147 spin_lock(&hugetlb_lock);
2166 if (gather_surplus_pages(h, delta) < 0)
2169 if (delta > cpuset_mems_nr(h->free_huge_pages_node)) {
2170 return_unused_surplus_pages(h, delta);
2177 return_unused_surplus_pages(h, (
unsigned long) -delta);
2180 spin_unlock(&hugetlb_lock);
2186 struct resv_map *reservations = vma_resv_map(vma);
2197 kref_get(&reservations->
refs);
2202 struct resv_map *reservations = vma_resv_map(vma);
2206 kref_put(&reservations->
refs, resv_map_release);
2212 struct resv_map *reservations = vma_resv_map(vma);
2213 struct hugepage_subpool *spool = subpool_vma(vma);
2215 unsigned long start;
2219 start = vma_hugecache_offset(h, vma, vma->
vm_start);
2220 end = vma_hugecache_offset(h, vma, vma->
vm_end);
2222 reserve = (end -
start) -
2223 region_count(&reservations->
regions, start, end);
2228 hugetlb_acct_memory(h, -reserve);
2229 hugepage_subpool_put_pages(spool, reserve);
2240 static int hugetlb_vm_op_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
2247 .fault = hugetlb_vm_op_fault,
2248 .open = hugetlb_vm_op_open,
2249 .close = hugetlb_vm_op_close,
2265 entry = arch_make_huge_pte(entry, vma, page, writable);
2271 unsigned long address,
pte_t *ptep)
2285 struct page *ptepage;
2291 cow = (vma->
vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
2293 for (addr = vma->
vm_start; addr < vma->vm_end; addr += sz) {
2302 if (dst_pte == src_pte)
2307 if (!huge_pte_none(huge_ptep_get(src_pte))) {
2310 entry = huge_ptep_get(src_pte);
2313 page_dup_rmap(ptepage);
2325 static int is_hugetlb_entry_migration(
pte_t pte)
2331 swp = pte_to_swp_entry(pte);
2332 if (non_swap_entry(swp) && is_migration_entry(swp))
2338 static int is_hugetlb_entry_hwpoisoned(
pte_t pte)
2344 swp = pte_to_swp_entry(pte);
2345 if (non_swap_entry(swp) && is_hwpoison_entry(swp))
2352 unsigned long start,
unsigned long end,
2353 struct page *ref_page)
2355 int force_flush = 0;
2363 const unsigned long mmun_start =
start;
2364 const unsigned long mmun_end =
end;
2366 WARN_ON(!is_vm_hugetlb_page(vma));
2371 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2374 for (address = start; address <
end; address += sz) {
2382 pte = huge_ptep_get(ptep);
2383 if (huge_pte_none(pte))
2389 if (
unlikely(is_hugetlb_entry_hwpoisoned(pte)))
2399 if (page != ref_page)
2432 if (address < end && !ref_page)
2435 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2441 unsigned long end,
struct page *ref_page)
2459 unsigned long end,
struct page *ref_page)
2478 struct page *page,
unsigned long address)
2492 mapping = vma->
vm_file->f_dentry->d_inode->i_mapping;
2500 vma_interval_tree_foreach(iter_vma, &mapping->
i_mmap, pgoff, pgoff) {
2502 if (iter_vma == vma)
2528 unsigned long address,
pte_t *ptep,
pte_t pte,
2529 struct page *pagecache_page)
2532 struct page *old_page, *new_page;
2534 int outside_reserve = 0;
2535 unsigned long mmun_start;
2536 unsigned long mmun_end;
2543 avoidcopy = (page_mapcount(old_page) == 1);
2545 if (PageAnon(old_page))
2547 set_huge_ptep_writable(vma, address, ptep);
2560 if (!(vma->
vm_flags & VM_MAYSHARE) &&
2562 old_page != pagecache_page)
2563 outside_reserve = 1;
2569 new_page = alloc_huge_page(vma, address, outside_reserve);
2571 if (IS_ERR(new_page)) {
2572 long err = PTR_ERR(new_page);
2582 if (outside_reserve) {
2583 BUG_ON(huge_pte_none(pte));
2584 if (unmap_ref_private(mm, vma, old_page, address)) {
2585 BUG_ON(huge_pte_none(pte));
2589 goto retry_avoidcopy;
2602 return VM_FAULT_OOM;
2604 return VM_FAULT_SIGBUS;
2616 return VM_FAULT_OOM;
2619 copy_user_huge_page(new_page, old_page, address, vma,
2620 pages_per_huge_page(h));
2621 __SetPageUptodate(new_page);
2625 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2634 huge_ptep_clear_flush(vma, address, ptep);
2636 make_huge_pte(vma, new_page, 1));
2638 hugepage_add_new_anon_rmap(new_page, vma, address);
2640 new_page = old_page;
2643 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2652 static struct page *hugetlbfs_pagecache_page(
struct hstate *h,
2658 mapping = vma->
vm_file->f_mapping;
2659 idx = vma_hugecache_offset(h, vma, address);
2668 static bool hugetlbfs_pagecache_present(
struct hstate *h,
2675 mapping = vma->
vm_file->f_mapping;
2676 idx = vma_hugecache_offset(h, vma, address);
2681 return page !=
NULL;
2685 unsigned long address,
pte_t *ptep,
unsigned int flags)
2688 int ret = VM_FAULT_SIGBUS;
2703 "PID %d killed due to inadequate hugepage pool\n",
2708 mapping = vma->
vm_file->f_mapping;
2709 idx = vma_hugecache_offset(h, vma, address);
2721 page = alloc_huge_page(vma, address, 0);
2723 ret = PTR_ERR(page);
2727 ret = VM_FAULT_SIGBUS;
2730 clear_huge_page(page, address, pages_per_huge_page(h));
2731 __SetPageUptodate(page);
2735 struct inode *inode = mapping->
host;
2737 err = add_to_page_cache(page, mapping, idx,
GFP_KERNEL);
2745 spin_lock(&inode->
i_lock);
2746 inode->
i_blocks += blocks_per_huge_page(h);
2747 spin_unlock(&inode->
i_lock);
2752 goto backout_unlocked;
2762 if (
unlikely(PageHWPoison(page))) {
2763 ret = VM_FAULT_HWPOISON |
2765 goto backout_unlocked;
2775 if ((flags & FAULT_FLAG_WRITE) && !(vma->
vm_flags & VM_SHARED))
2776 if (vma_needs_reservation(h, vma, address) < 0) {
2778 goto backout_unlocked;
2787 if (!huge_pte_none(huge_ptep_get(ptep)))
2791 hugepage_add_new_anon_rmap(page, vma, address);
2793 page_dup_rmap(page);
2794 new_pte = make_huge_pte(vma, page, ((vma->
vm_flags & VM_WRITE)
2798 if ((flags & FAULT_FLAG_WRITE) && !(vma->
vm_flags & VM_SHARED)) {
2800 ret = hugetlb_cow(mm, vma, address, ptep, new_pte, page);
2817 unsigned long address,
unsigned int flags)
2822 struct page *page =
NULL;
2823 struct page *pagecache_page =
NULL;
2831 entry = huge_ptep_get(ptep);
2832 if (
unlikely(is_hugetlb_entry_migration(entry))) {
2835 }
else if (
unlikely(is_hugetlb_entry_hwpoisoned(entry)))
2836 return VM_FAULT_HWPOISON_LARGE |
2842 return VM_FAULT_OOM;
2850 entry = huge_ptep_get(ptep);
2851 if (huge_pte_none(entry)) {
2852 ret = hugetlb_no_page(mm, vma, address, ptep, flags);
2866 if ((flags & FAULT_FLAG_WRITE) && !
pte_write(entry)) {
2867 if (vma_needs_reservation(h, vma, address) < 0) {
2872 if (!(vma->
vm_flags & VM_MAYSHARE))
2873 pagecache_page = hugetlbfs_pagecache_page(h,
2886 if (page != pagecache_page)
2892 goto out_page_table_lock;
2895 if (flags & FAULT_FLAG_WRITE) {
2897 ret = hugetlb_cow(mm, vma, address, ptep, entry,
2899 goto out_page_table_lock;
2905 flags & FAULT_FLAG_WRITE))
2908 out_page_table_lock:
2911 if (pagecache_page) {
2915 if (page != pagecache_page)
2928 pud_t *pud,
int write)
2936 unsigned long *position,
int *length,
int i,
2939 unsigned long pfn_offset;
2940 unsigned long vaddr = *position;
2945 while (vaddr < vma->vm_end && remainder) {
2956 absent = !pte || huge_pte_none(huge_ptep_get(pte));
2965 if (absent && (flags & FOLL_DUMP) &&
2966 !hugetlbfs_pagecache_present(h, vma, vaddr)) {
2972 ((flags & FOLL_WRITE) && !
pte_write(huge_ptep_get(pte)))) {
2977 (flags & FOLL_WRITE) ? FAULT_FLAG_WRITE : 0);
2979 if (!(ret & VM_FAULT_ERROR))
2987 page =
pte_page(huge_ptep_get(pte));
2990 pages[
i] = mem_map_offset(page, pfn_offset);
3001 if (vaddr < vma->vm_end && remainder &&
3002 pfn_offset < pages_per_huge_page(h)) {
3011 *length = remainder;
3018 unsigned long address,
unsigned long end,
pgprot_t newprot)
3021 unsigned long start =
address;
3037 if (!huge_pte_none(huge_ptep_get(ptep))) {
3061 struct hugepage_subpool *spool = subpool_inode(inode);
3068 if (vm_flags & VM_NORESERVE)
3077 if (!vma || vma->
vm_flags & VM_MAYSHARE)
3078 chg = region_chg(&inode->
i_mapping->private_list, from, to);
3080 struct resv_map *resv_map = resv_map_alloc();
3086 set_vma_resv_map(vma, resv_map);
3096 if (hugepage_subpool_get_pages(spool, chg)) {
3105 ret = hugetlb_acct_memory(h, chg);
3107 hugepage_subpool_put_pages(spool, chg);
3122 if (!vma || vma->
vm_flags & VM_MAYSHARE)
3123 region_add(&inode->
i_mapping->private_list, from, to);
3134 long chg = region_truncate(&inode->
i_mapping->private_list, offset);
3135 struct hugepage_subpool *spool = subpool_inode(inode);
3137 spin_lock(&inode->
i_lock);
3138 inode->
i_blocks -= (blocks_per_huge_page(h) * freed);
3139 spin_unlock(&inode->
i_lock);
3141 hugepage_subpool_put_pages(spool, (chg - freed));
3142 hugetlb_acct_memory(h, -(chg - freed));
3145 #ifdef CONFIG_MEMORY_FAILURE
3148 static int is_hugepage_on_freelist(
struct page *hpage)
3152 struct hstate *h = page_hstate(hpage);
3153 int nid = page_to_nid(hpage);
3165 int dequeue_hwpoisoned_huge_page(
struct page *hpage)
3167 struct hstate *h = page_hstate(hpage);
3168 int nid = page_to_nid(hpage);
3171 spin_lock(&hugetlb_lock);
3172 if (is_hugepage_on_freelist(hpage)) {
3174 set_page_refcounted(hpage);
3175 h->free_huge_pages--;
3176 h->free_huge_pages_node[nid]--;
3179 spin_unlock(&hugetlb_lock);