36 #include <linux/export.h>
38 #include <linux/rbtree.h>
39 #include <linux/slab.h>
51 #include <linux/oom.h>
57 #include <asm/uaccess.h>
62 #define MEM_CGROUP_RECLAIM_RETRIES 5
65 #ifdef CONFIG_MEMCG_SWAP
70 #ifdef CONFIG_MEMCG_SWAP_ENABLED
71 static int really_do_swap_account
__initdata = 1;
73 static int really_do_swap_account
__initdata = 0;
77 #define do_swap_account 0
95 static const char *
const mem_cgroup_stat_names[] = {
110 static const char *
const mem_cgroup_events_names[] = {
129 #define THRESHOLDS_EVENTS_TARGET 128
130 #define SOFTLIMIT_EVENTS_TARGET 1024
131 #define NUMAINFO_EVENTS_TARGET 1024
225 static void mem_cgroup_oom_notify(
struct mem_cgroup *memcg);
239 struct cgroup_subsys_state
css;
330 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
347 static struct move_charge_struct {
351 unsigned long precharge;
352 unsigned long moved_charge;
353 unsigned long moved_swap;
361 static bool move_anon(
void)
364 &
mc.to->move_charge_at_immigrate);
367 static bool move_file(
void)
370 &
mc.to->move_charge_at_immigrate);
377 #define MEM_CGROUP_MAX_RECLAIM_LOOPS 100
378 #define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
391 #define _OOM_TYPE (2)
392 #define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val))
393 #define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff)
394 #define MEMFILE_ATTR(val) ((val) & 0xffff)
396 #define OOM_CONTROL (0)
401 #define MEM_CGROUP_RECLAIM_NOSWAP_BIT 0x0
402 #define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
403 #define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
404 #define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
406 static void mem_cgroup_get(
struct mem_cgroup *memcg);
407 static void mem_cgroup_put(
struct mem_cgroup *memcg);
410 struct mem_cgroup *mem_cgroup_from_css(
struct cgroup_subsys_state *
s)
415 static inline bool mem_cgroup_is_root(
struct mem_cgroup *memcg)
417 return (memcg == root_mem_cgroup);
421 #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
423 void sock_update_memcg(
struct sock *
sk)
429 BUG_ON(!sk->sk_prot->proto_cgroup);
441 mem_cgroup_get(sk->
sk_cgrp->memcg);
447 cg_proto = sk->sk_prot->proto_cgroup(memcg);
448 if (!mem_cgroup_is_root(memcg) && memcg_proto_active(cg_proto)) {
449 mem_cgroup_get(memcg);
457 void sock_release_memcg(
struct sock *sk)
463 mem_cgroup_put(memcg);
469 if (!memcg || mem_cgroup_is_root(memcg))
472 return &memcg->tcp_mem.cg_proto;
476 static void disarm_sock_keys(
struct mem_cgroup *memcg)
478 if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto))
483 static void disarm_sock_keys(
struct mem_cgroup *memcg)
488 static void drain_all_stock_async(
struct mem_cgroup *memcg);
491 mem_cgroup_zoneinfo(
struct mem_cgroup *memcg,
int nid,
int zid)
493 return &memcg->
info.nodeinfo[nid]->zoneinfo[zid];
504 int nid = page_to_nid(page);
505 int zid = page_zonenum(page);
507 return mem_cgroup_zoneinfo(memcg, nid, zid);
511 soft_limit_tree_node_zone(
int nid,
int zid)
513 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
517 soft_limit_tree_from_page(
struct page *
page)
519 int nid = page_to_nid(page);
520 int zid = page_zonenum(page);
522 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
526 __mem_cgroup_insert_exceeded(
struct mem_cgroup *memcg,
529 unsigned long long new_usage_in_excess)
560 __mem_cgroup_remove_exceeded(
struct mem_cgroup *memcg,
571 mem_cgroup_remove_exceeded(
struct mem_cgroup *memcg,
575 spin_lock(&mctz->
lock);
576 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
577 spin_unlock(&mctz->
lock);
583 unsigned long long excess;
586 int nid = page_to_nid(page);
587 int zid = page_zonenum(page);
588 mctz = soft_limit_tree_from_page(page);
595 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
596 excess = res_counter_soft_limit_excess(&memcg->
res);
602 spin_lock(&mctz->
lock);
605 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
610 __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
611 spin_unlock(&mctz->
lock);
616 static void mem_cgroup_remove_from_trees(
struct mem_cgroup *memcg)
623 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
624 mz = mem_cgroup_zoneinfo(memcg, node, zone);
625 mctz = soft_limit_tree_node_zone(node, zone);
626 mem_cgroup_remove_exceeded(memcg, mz, mctz);
649 __mem_cgroup_remove_exceeded(mz->
memcg, mz, mctz);
650 if (!res_counter_soft_limit_excess(&mz->
memcg->res) ||
651 !css_tryget(&mz->
memcg->css))
662 spin_lock(&mctz->
lock);
663 mz = __mem_cgroup_largest_soft_limit_node(mctz);
664 spin_unlock(&mctz->
lock);
687 static long mem_cgroup_read_stat(
struct mem_cgroup *memcg,
696 #ifdef CONFIG_HOTPLUG_CPU
705 static void mem_cgroup_swap_statistics(
struct mem_cgroup *memcg,
708 int val = (charge) ? 1 : -1;
712 static unsigned long mem_cgroup_read_events(
struct mem_cgroup *memcg,
715 unsigned long val = 0;
720 #ifdef CONFIG_HOTPLUG_CPU
728 static void mem_cgroup_charge_statistics(
struct mem_cgroup *memcg,
729 bool anon,
int nr_pages)
749 nr_pages = -nr_pages;
767 mem_cgroup_zone_nr_lru_pages(
struct mem_cgroup *memcg,
int nid,
int zid,
768 unsigned int lru_mask)
772 unsigned long ret = 0;
774 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
777 if (
BIT(lru) & lru_mask)
784 mem_cgroup_node_nr_lru_pages(
struct mem_cgroup *memcg,
785 int nid,
unsigned int lru_mask)
790 for (zid = 0; zid < MAX_NR_ZONES; zid++)
791 total += mem_cgroup_zone_nr_lru_pages(memcg,
797 static unsigned long mem_cgroup_nr_lru_pages(
struct mem_cgroup *memcg,
798 unsigned int lru_mask)
804 total += mem_cgroup_node_nr_lru_pages(memcg, nid, lru_mask);
816 if ((
long)next - (
long)val < 0) {
840 static void memcg_check_events(
struct mem_cgroup *memcg,
struct page *page)
844 if (
unlikely(mem_cgroup_event_ratelimit(memcg,
849 do_softlimit = mem_cgroup_event_ratelimit(memcg,
852 do_numainfo = mem_cgroup_event_ratelimit(memcg,
859 mem_cgroup_update_tree(memcg, page);
870 return mem_cgroup_from_css(
871 cgroup_subsys_state(cont, mem_cgroup_subsys_id));
884 return mem_cgroup_from_css(task_subsys_state(p, mem_cgroup_subsys_id));
903 }
while (!css_tryget(&memcg->
css));
932 if (mem_cgroup_disabled())
936 root = root_mem_cgroup;
938 if (prev && !reclaim)
941 if (prev && prev != root)
952 struct cgroup_subsys_state *css;
955 int nid = zone_to_nid(reclaim->
zone);
959 mz = mem_cgroup_zoneinfo(root, nid, zid);
961 if (prev && reclaim->
generation != iter->generation)
969 if (css == &root->
css || css_tryget(css))
970 memcg = mem_cgroup_from_css(css);
979 else if (!prev && memcg)
998 root = root_mem_cgroup;
999 if (prev && prev != root)
1000 css_put(&prev->
css);
1008 #define for_each_mem_cgroup_tree(iter, root) \
1009 for (iter = mem_cgroup_iter(root, NULL, NULL); \
1011 iter = mem_cgroup_iter(root, iter, NULL))
1013 #define for_each_mem_cgroup(iter) \
1014 for (iter = mem_cgroup_iter(NULL, NULL, NULL); \
1016 iter = mem_cgroup_iter(NULL, iter, NULL))
1060 if (mem_cgroup_disabled()) {
1065 mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone),
zone_idx(zone));
1073 if (
unlikely(lruvec->zone != zone))
1074 lruvec->zone = zone;
1101 struct page_cgroup *
pc;
1104 if (mem_cgroup_disabled()) {
1110 memcg = pc->mem_cgroup;
1121 if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
1122 pc->mem_cgroup = memcg = root_mem_cgroup;
1124 mz = page_cgroup_zoneinfo(memcg, page);
1132 if (
unlikely(lruvec->zone != zone))
1133 lruvec->zone = zone;
1152 if (mem_cgroup_disabled())
1157 *lru_size += nr_pages;
1168 if (root_memcg == memcg)
1175 static bool mem_cgroup_same_or_subtree(
const struct mem_cgroup *root_memcg,
1205 css_get(&curr->
css);
1216 ret = mem_cgroup_same_or_subtree(memcg, curr);
1217 css_put(&curr->
css);
1223 unsigned long inactive_ratio;
1233 inactive_ratio =
int_sqrt(10 * gb);
1237 return inactive * inactive_ratio <
active;
1248 return (active > inactive);
1251 #define mem_cgroup_from_res_counter(counter, member) \
1252 container_of(counter, struct mem_cgroup, member)
1261 static unsigned long mem_cgroup_margin(
struct mem_cgroup *memcg)
1263 unsigned long long margin;
1265 margin = res_counter_margin(&memcg->
res);
1267 margin =
min(margin, res_counter_margin(&memcg->
memsw));
1273 struct cgroup *cgrp = memcg->
css.cgroup;
1276 if (cgrp->parent ==
NULL)
1300 static void mem_cgroup_start_move(
struct mem_cgroup *memcg)
1307 static void mem_cgroup_end_move(
struct mem_cgroup *memcg)
1331 static bool mem_cgroup_stolen(
struct mem_cgroup *memcg)
1337 static bool mem_cgroup_under_move(
struct mem_cgroup *memcg)
1346 spin_lock(&
mc.lock);
1352 ret = mem_cgroup_same_or_subtree(memcg, from)
1353 || mem_cgroup_same_or_subtree(memcg, to);
1355 spin_unlock(&
mc.lock);
1359 static bool mem_cgroup_wait_acct_move(
struct mem_cgroup *memcg)
1361 if (
mc.moving_task &&
current !=
mc.moving_task) {
1362 if (mem_cgroup_under_move(memcg)) {
1381 static void move_lock_mem_cgroup(
struct mem_cgroup *memcg,
1382 unsigned long *
flags)
1387 static void move_unlock_mem_cgroup(
struct mem_cgroup *memcg,
1388 unsigned long *
flags)
1390 spin_unlock_irqrestore(&memcg->
move_lock, *flags);
1403 struct cgroup *task_cgrp;
1404 struct cgroup *mem_cgrp;
1418 mem_cgrp = memcg->
css.cgroup;
1419 task_cgrp = task_cgroup(p, mem_cgroup_subsys_id);
1448 printk(
KERN_INFO "memory: usage %llukB, limit %llukB, failcnt %llu\n",
1463 static int mem_cgroup_count_children(
struct mem_cgroup *memcg)
1495 limit =
min(limit, memsw);
1505 unsigned long chosen_points = 0;
1506 unsigned long totalpages;
1507 unsigned int points = 0;
1515 if (fatal_signal_pending(
current)) {
1521 totalpages = mem_cgroup_get_limit(memcg) >>
PAGE_SHIFT ? : 1;
1523 struct cgroup *cgroup = iter->
css.cgroup;
1524 struct cgroup_iter it;
1533 put_task_struct(chosen);
1544 put_task_struct(chosen);
1550 if (points > chosen_points) {
1552 put_task_struct(chosen);
1554 chosen_points = points;
1563 points = chosen_points * 1000 / totalpages;
1565 NULL,
"Memory cgroup out of memory");
1568 static unsigned long mem_cgroup_reclaim(
struct mem_cgroup *memcg,
1570 unsigned long flags)
1572 unsigned long total = 0;
1573 bool noswap =
false;
1583 drain_all_stock_async(memcg);
1584 total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
1590 if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
1592 if (mem_cgroup_margin(memcg))
1614 static bool test_mem_cgroup_node_reclaimable(
struct mem_cgroup *memcg,
1615 int nid,
bool noswap)
1617 if (mem_cgroup_node_nr_lru_pages(memcg, nid,
LRU_ALL_FILE))
1621 if (mem_cgroup_node_nr_lru_pages(memcg, nid,
LRU_ALL_ANON))
1626 #if MAX_NUMNODES > 1
1634 static void mem_cgroup_may_update_nodemask(
struct mem_cgroup *memcg)
1651 if (!test_mem_cgroup_node_reclaimable(memcg, nid,
false))
1675 mem_cgroup_may_update_nodemask(memcg);
1678 node =
next_node(node, memcg->scan_nodes);
1700 static bool mem_cgroup_reclaimable(
struct mem_cgroup *memcg,
bool noswap)
1711 nid =
next_node(nid, memcg->scan_nodes)) {
1713 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1723 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1735 static bool mem_cgroup_reclaimable(
struct mem_cgroup *memcg,
bool noswap)
1737 return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
1741 static int mem_cgroup_soft_reclaim(
struct mem_cgroup *root_memcg,
1744 unsigned long *total_scanned)
1749 unsigned long excess;
1750 unsigned long nr_scanned;
1756 excess = res_counter_soft_limit_excess(&root_memcg->
res) >>
PAGE_SHIFT;
1776 if (total >= (excess >> 2) ||
1777 (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
1782 if (!mem_cgroup_reclaimable(victim,
false))
1784 total += mem_cgroup_shrink_node_zone(victim, gfp_mask,
false,
1786 *total_scanned += nr_scanned;
1787 if (!res_counter_soft_limit_excess(&root_memcg->
res))
1799 static bool mem_cgroup_oom_lock(
struct mem_cgroup *memcg)
1824 if (iter == failed) {
1836 static int mem_cgroup_oom_unlock(
struct mem_cgroup *memcg)
1863 atomic_add_unless(&iter->under_oom, -1, 0);
1881 oom_wait_info =
container_of(wait,
struct oom_wait_info, wait);
1882 oom_wait_memcg = oom_wait_info->
memcg;
1888 if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
1889 && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
1894 static void memcg_wakeup_oom(
struct mem_cgroup *memcg)
1900 static void memcg_oom_recover(
struct mem_cgroup *memcg)
1903 memcg_wakeup_oom(memcg);
1913 bool locked, need_to_kill;
1915 owait.memcg =
memcg;
1916 owait.wait.flags = 0;
1917 owait.wait.func = memcg_oom_wake_function;
1919 INIT_LIST_HEAD(&owait.wait.task_list);
1920 need_to_kill =
true;
1921 mem_cgroup_mark_under_oom(memcg);
1924 spin_lock(&memcg_oom_lock);
1925 locked = mem_cgroup_oom_lock(memcg);
1933 need_to_kill =
false;
1935 mem_cgroup_oom_notify(memcg);
1936 spin_unlock(&memcg_oom_lock);
1945 spin_lock(&memcg_oom_lock);
1947 mem_cgroup_oom_unlock(memcg);
1948 memcg_wakeup_oom(memcg);
1949 spin_unlock(&memcg_oom_lock);
1951 mem_cgroup_unmark_under_oom(memcg);
1985 bool *locked,
unsigned long *flags)
1988 struct page_cgroup *
pc;
1992 memcg = pc->mem_cgroup;
1993 if (
unlikely(!memcg || !PageCgroupUsed(pc)))
2001 if (!mem_cgroup_stolen(memcg))
2004 move_lock_mem_cgroup(memcg, flags);
2005 if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
2006 move_unlock_mem_cgroup(memcg, flags);
2021 move_unlock_mem_cgroup(pc->mem_cgroup, flags);
2031 if (mem_cgroup_disabled())
2034 memcg = pc->mem_cgroup;
2035 if (
unlikely(!memcg || !PageCgroupUsed(pc)))
2053 #define CHARGE_BATCH 32U
2059 #define FLUSHING_CACHED_CHARGE 0
2070 static bool consume_stock(
struct mem_cgroup *memcg)
2117 static void refill_stock(
struct mem_cgroup *memcg,
unsigned int nr_pages)
2121 if (stock->
cached != memcg) {
2134 static void drain_all_stock(
struct mem_cgroup *root_memcg,
bool sync)
2148 if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
2152 drain_local_stock(&stock->
work);
2177 static void drain_all_stock_async(
struct mem_cgroup *root_memcg)
2184 drain_all_stock(root_memcg,
false);
2189 static void drain_all_stock_sync(
struct mem_cgroup *root_memcg)
2193 drain_all_stock(root_memcg,
true);
2201 static void mem_cgroup_drain_pcp_counter(
struct mem_cgroup *memcg,
int cpu)
2213 unsigned long x =
per_cpu(memcg->
stat->events[i], cpu);
2225 int cpu = (
unsigned long)hcpu;
2236 mem_cgroup_drain_pcp_counter(iter, cpu);
2238 stock = &
per_cpu(memcg_stock, cpu);
2253 static int mem_cgroup_do_charge(
struct mem_cgroup *memcg,
gfp_t gfp_mask,
2254 unsigned int nr_pages,
bool oom_check)
2256 unsigned long csize = nr_pages *
PAGE_SIZE;
2259 unsigned long flags = 0;
2289 ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
2290 if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
2301 if (nr_pages == 1 && ret)
2308 if (mem_cgroup_wait_acct_move(mem_over_limit))
2315 if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask,
get_order(csize)))
2342 static int __mem_cgroup_try_charge(
struct mm_struct *mm,
2344 unsigned int nr_pages,
2359 || fatal_signal_pending(
current)))
2369 *ptr = root_mem_cgroup;
2374 if (mem_cgroup_is_root(memcg))
2376 if (nr_pages == 1 && consume_stock(memcg))
2378 css_get(&memcg->
css);
2396 memcg = root_mem_cgroup;
2397 if (mem_cgroup_is_root(memcg)) {
2401 if (nr_pages == 1 && consume_stock(memcg)) {
2414 if (!css_tryget(&memcg->
css)) {
2425 if (fatal_signal_pending(
current)) {
2426 css_put(&memcg->
css);
2431 if (oom && !nr_oom_retries) {
2436 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, oom_check);
2442 css_put(&memcg->
css);
2446 css_put(&memcg->
css);
2450 css_put(&memcg->
css);
2457 css_put(&memcg->
css);
2462 if (batch > nr_pages)
2463 refill_stock(memcg, batch - nr_pages);
2464 css_put(&memcg->
css);
2472 *ptr = root_mem_cgroup;
2481 static void __mem_cgroup_cancel_charge(
struct mem_cgroup *memcg,
2482 unsigned int nr_pages)
2484 if (!mem_cgroup_is_root(memcg)) {
2497 static void __mem_cgroup_cancel_local_charge(
struct mem_cgroup *memcg,
2498 unsigned int nr_pages)
2502 if (mem_cgroup_is_root(memcg))
2508 memcg->
memsw.parent, bytes);
2517 static struct mem_cgroup *mem_cgroup_lookup(
unsigned short id)
2519 struct cgroup_subsys_state *css;
2527 return mem_cgroup_from_css(css);
2533 struct page_cgroup *
pc;
2540 lock_page_cgroup(pc);
2541 if (PageCgroupUsed(pc)) {
2542 memcg = pc->mem_cgroup;
2543 if (memcg && !css_tryget(&memcg->
css))
2545 }
else if (PageSwapCache(page)) {
2546 ent.
val = page_private(page);
2547 id = lookup_swap_cgroup_id(ent);
2549 memcg = mem_cgroup_lookup(
id);
2550 if (memcg && !css_tryget(&memcg->
css))
2554 unlock_page_cgroup(pc);
2558 static void __mem_cgroup_commit_charge(
struct mem_cgroup *memcg,
2560 unsigned int nr_pages,
2567 bool was_on_lru =
false;
2570 lock_page_cgroup(pc);
2582 zone = page_zone(page);
2584 if (PageLRU(page)) {
2587 del_page_from_lru_list(page, lruvec, page_lru(page));
2592 pc->mem_cgroup = memcg;
2601 SetPageCgroupUsed(pc);
2608 add_page_to_lru_list(page, lruvec, page_lru(page));
2618 mem_cgroup_charge_statistics(memcg, anon, nr_pages);
2619 unlock_page_cgroup(pc);
2626 memcg_check_events(memcg, page);
2629 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
2631 #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
2638 void mem_cgroup_split_huge_fixup(
struct page *
head)
2641 struct page_cgroup *
pc;
2644 if (mem_cgroup_disabled())
2648 pc->mem_cgroup = head_pc->mem_cgroup;
2650 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT;
2670 static int mem_cgroup_move_account(
struct page *page,
2671 unsigned int nr_pages,
2672 struct page_cgroup *pc,
2676 unsigned long flags;
2678 bool anon = PageAnon(page);
2689 if (nr_pages > 1 && !PageTransHuge(page))
2692 lock_page_cgroup(pc);
2695 if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
2698 move_lock_mem_cgroup(from, &flags);
2700 if (!anon && page_mapped(page)) {
2707 mem_cgroup_charge_statistics(from, anon, -nr_pages);
2710 pc->mem_cgroup = to;
2711 mem_cgroup_charge_statistics(to, anon, nr_pages);
2719 move_unlock_mem_cgroup(from, &flags);
2722 unlock_page_cgroup(pc);
2726 memcg_check_events(to, page);
2727 memcg_check_events(from, page);
2736 static int mem_cgroup_move_parent(
struct page *page,
2737 struct page_cgroup *pc,
2741 unsigned int nr_pages;
2746 if (mem_cgroup_is_root(child))
2750 if (!get_page_unless_zero(page))
2762 parent = root_mem_cgroup;
2765 flags = compound_lock_irqsave(page);
2767 ret = mem_cgroup_move_account(page, nr_pages,
2770 __mem_cgroup_cancel_local_charge(child, nr_pages);
2773 compound_unlock_irqrestore(page, flags);
2787 static int mem_cgroup_charge_common(
struct page *page,
struct mm_struct *mm,
2791 unsigned int nr_pages = 1;
2795 if (PageTransHuge(page)) {
2796 nr_pages <<= compound_order(page);
2805 ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom);
2808 __mem_cgroup_commit_charge(memcg, page, nr_pages, ctype,
false);
2815 if (mem_cgroup_disabled())
2820 return mem_cgroup_charge_common(page, mm, gfp_mask,
2830 static int __mem_cgroup_try_charge_swapin(
struct mm_struct *mm,
2836 struct page_cgroup *
pc;
2847 if (PageCgroupUsed(pc))
2855 ret = __mem_cgroup_try_charge(
NULL, mask, 1, memcgp,
true);
2856 css_put(&memcg->
css);
2861 ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp,
true);
2871 if (mem_cgroup_disabled())
2879 if (!PageSwapCache(page)) {
2882 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp,
true);
2887 return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
2892 if (mem_cgroup_disabled())
2896 __mem_cgroup_cancel_charge(memcg, 1);
2900 __mem_cgroup_commit_charge_swapin(
struct page *page,
struct mem_cgroup *memcg,
2903 if (mem_cgroup_disabled())
2909 __mem_cgroup_commit_charge(memcg, page, 1, ctype,
true);
2919 mem_cgroup_uncharge_swap(ent);
2932 __mem_cgroup_commit_charge_swapin(page, memcg,
2943 if (mem_cgroup_disabled())
2945 if (PageCompound(page))
2948 if (!PageSwapCache(page))
2949 ret = mem_cgroup_charge_common(page, mm, gfp_mask, type);
2951 ret = __mem_cgroup_try_charge_swapin(mm, page,
2954 __mem_cgroup_commit_charge_swapin(page, memcg, type);
2959 static void mem_cgroup_do_uncharge(
struct mem_cgroup *memcg,
2960 unsigned int nr_pages,
2963 struct memcg_batch_info *batch =
NULL;
2964 bool uncharge_memsw =
true;
2968 uncharge_memsw =
false;
2970 batch = &
current->memcg_batch;
2977 batch->memcg = memcg;
2986 if (!batch->do_batch || test_thread_flag(
TIF_MEMDIE))
2987 goto direct_uncharge;
2990 goto direct_uncharge;
2997 if (batch->memcg != memcg)
2998 goto direct_uncharge;
3002 batch->memsw_nr_pages++;
3008 if (
unlikely(batch->memcg != memcg))
3009 memcg_oom_recover(memcg);
3016 __mem_cgroup_uncharge_common(
struct page *page,
enum charge_type ctype,
3020 unsigned int nr_pages = 1;
3021 struct page_cgroup *
pc;
3024 if (mem_cgroup_disabled())
3029 if (PageTransHuge(page)) {
3030 nr_pages <<= compound_order(page);
3040 lock_page_cgroup(pc);
3042 memcg = pc->mem_cgroup;
3044 if (!PageCgroupUsed(pc))
3047 anon = PageAnon(page);
3060 if (page_mapped(page))
3069 if (!end_migration && PageCgroupMigration(pc))
3073 if (!PageAnon(page)) {
3074 if (page->
mapping && !page_is_file_cache(page))
3076 }
else if (page_mapped(page))
3083 mem_cgroup_charge_statistics(memcg, anon, -nr_pages);
3085 ClearPageCgroupUsed(pc);
3093 unlock_page_cgroup(pc);
3098 memcg_check_events(memcg, page);
3100 mem_cgroup_swap_statistics(memcg,
true);
3101 mem_cgroup_get(memcg);
3108 if (!end_migration && !mem_cgroup_is_root(memcg))
3109 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
3114 unlock_page_cgroup(pc);
3121 if (page_mapped(page))
3124 if (PageSwapCache(page))
3146 current->memcg_batch.do_batch++;
3148 if (
current->memcg_batch.do_batch == 1) {
3150 current->memcg_batch.nr_pages = 0;
3151 current->memcg_batch.memsw_nr_pages = 0;
3157 struct memcg_batch_info *batch = &
current->memcg_batch;
3159 if (!batch->do_batch)
3163 if (batch->do_batch)
3172 if (batch->nr_pages)
3174 batch->nr_pages * PAGE_SIZE);
3175 if (batch->memsw_nr_pages)
3177 batch->memsw_nr_pages * PAGE_SIZE);
3178 memcg_oom_recover(batch->memcg);
3180 batch->memcg =
NULL;
3189 mem_cgroup_uncharge_swapcache(
struct page *page,
swp_entry_t ent,
bool swapout)
3197 memcg = __mem_cgroup_uncharge_common(page, ctype,
false);
3204 swap_cgroup_record(ent,
css_id(&memcg->
css));
3208 #ifdef CONFIG_MEMCG_SWAP
3221 id = swap_cgroup_record(ent, 0);
3223 memcg = mem_cgroup_lookup(
id);
3229 if (!mem_cgroup_is_root(memcg))
3231 mem_cgroup_swap_statistics(memcg,
false);
3232 mem_cgroup_put(memcg);
3254 unsigned short old_id, new_id;
3259 if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
3260 mem_cgroup_swap_statistics(from,
false);
3261 mem_cgroup_swap_statistics(to,
true);
3276 static inline int mem_cgroup_move_swap_account(
swp_entry_t entry,
3291 struct page_cgroup *
pc;
3297 if (mem_cgroup_disabled())
3301 lock_page_cgroup(pc);
3302 if (PageCgroupUsed(pc)) {
3303 memcg = pc->mem_cgroup;
3304 css_get(&memcg->
css);
3335 SetPageCgroupMigration(pc);
3337 unlock_page_cgroup(pc);
3361 __mem_cgroup_commit_charge(memcg, newpage, 1, ctype,
false);
3366 struct page *oldpage,
struct page *newpage,
bool migration_ok)
3369 struct page_cgroup *
pc;
3376 if (!migration_ok) {
3383 anon = PageAnon(used);
3384 __mem_cgroup_uncharge_common(unused,
3388 css_put(&memcg->
css);
3395 lock_page_cgroup(pc);
3396 ClearPageCgroupMigration(pc);
3397 unlock_page_cgroup(pc);
3424 struct page *newpage)
3427 struct page_cgroup *
pc;
3430 if (mem_cgroup_disabled())
3435 lock_page_cgroup(pc);
3436 if (PageCgroupUsed(pc)) {
3437 memcg = pc->mem_cgroup;
3438 mem_cgroup_charge_statistics(memcg,
false, -1);
3439 ClearPageCgroupUsed(pc);
3441 unlock_page_cgroup(pc);
3454 __mem_cgroup_commit_charge(memcg, newpage, 1, type,
true);
3457 #ifdef CONFIG_DEBUG_VM
3458 static struct page_cgroup *lookup_page_cgroup_used(
struct page *page)
3460 struct page_cgroup *
pc;
3468 if (
likely(pc) && PageCgroupUsed(pc))
3473 bool mem_cgroup_bad_page_check(
struct page *page)
3475 if (mem_cgroup_disabled())
3478 return lookup_page_cgroup_used(page) !=
NULL;
3481 void mem_cgroup_print_bad_page(
struct page *page)
3483 struct page_cgroup *
pc;
3485 pc = lookup_page_cgroup_used(page);
3488 pc, pc->flags, pc->mem_cgroup);
3495 static int mem_cgroup_resize_limit(
struct mem_cgroup *memcg,
3496 unsigned long long val)
3499 u64 memswlimit, memlimit;
3501 int children = mem_cgroup_count_children(memcg);
3502 u64 curusage, oldusage;
3515 while (retry_count) {
3516 if (signal_pending(
current)) {
3527 if (memswlimit < val) {
3537 ret = res_counter_set_limit(&memcg->
res, val);
3539 if (memswlimit == val)
3550 MEM_CGROUP_RECLAIM_SHRINK);
3553 if (curusage >= oldusage)
3556 oldusage = curusage;
3558 if (!ret && enlarge)
3559 memcg_oom_recover(memcg);
3564 static int mem_cgroup_resize_memsw_limit(
struct mem_cgroup *memcg,
3565 unsigned long long val)
3568 u64 memlimit, memswlimit, oldusage, curusage;
3569 int children = mem_cgroup_count_children(memcg);
3576 while (retry_count) {
3577 if (signal_pending(
current)) {
3588 if (memlimit > val) {
3594 if (memswlimit < val)
3596 ret = res_counter_set_limit(&memcg->
memsw, val);
3598 if (memlimit == val)
3609 MEM_CGROUP_RECLAIM_NOSWAP |
3610 MEM_CGROUP_RECLAIM_SHRINK);
3613 if (curusage >= oldusage)
3616 oldusage = curusage;
3618 if (!ret && enlarge)
3619 memcg_oom_recover(memcg);
3625 unsigned long *total_scanned)
3627 unsigned long nr_reclaimed = 0;
3629 unsigned long reclaimed;
3632 unsigned long long excess;
3633 unsigned long nr_scanned;
3638 mctz = soft_limit_tree_node_zone(zone_to_nid(zone),
zone_idx(zone));
3648 mz = mem_cgroup_largest_soft_limit_node(mctz);
3653 reclaimed = mem_cgroup_soft_reclaim(mz->
memcg, zone,
3654 gfp_mask, &nr_scanned);
3655 nr_reclaimed += reclaimed;
3656 *total_scanned += nr_scanned;
3657 spin_lock(&mctz->
lock);
3678 __mem_cgroup_largest_soft_limit_node(mctz);
3680 css_put(&next_mz->
memcg->css);
3685 __mem_cgroup_remove_exceeded(mz->
memcg, mz, mctz);
3686 excess = res_counter_soft_limit_excess(&mz->
memcg->res);
3696 __mem_cgroup_insert_exceeded(mz->
memcg, mz, mctz, excess);
3697 spin_unlock(&mctz->
lock);
3698 css_put(&mz->
memcg->css);
3705 if (!nr_reclaimed &&
3709 }
while (!nr_reclaimed);
3711 css_put(&next_mz->
memcg->css);
3712 return nr_reclaimed;
3721 static bool mem_cgroup_force_empty_list(
struct mem_cgroup *memcg,
3722 int node,
int zid,
enum lru_list lru)
3724 struct lruvec *lruvec;
3725 unsigned long flags, loop;
3730 zone = &
NODE_DATA(node)->node_zones[zid];
3739 struct page_cgroup *
pc;
3743 if (list_empty(list)) {
3744 spin_unlock_irqrestore(&zone->
lru_lock, flags);
3749 list_move(&page->
lru, list);
3751 spin_unlock_irqrestore(&zone->
lru_lock, flags);
3754 spin_unlock_irqrestore(&zone->
lru_lock, flags);
3758 if (mem_cgroup_move_parent(page, pc, memcg)) {
3765 return !list_empty(list);
3772 static int mem_cgroup_force_empty(
struct mem_cgroup *memcg,
bool free_all)
3777 struct cgroup *cgrp = memcg->
css.cgroup;
3779 css_get(&memcg->
css);
3792 drain_all_stock_sync(memcg);
3794 mem_cgroup_start_move(memcg);
3796 for (zid = 0; !ret && zid < MAX_NR_ZONES; zid++) {
3799 ret = mem_cgroup_force_empty_list(memcg,
3808 mem_cgroup_end_move(memcg);
3809 memcg_oom_recover(memcg);
3814 css_put(&memcg->
css);
3830 if (signal_pending(
current)) {
3834 progress = try_to_free_mem_cgroup_pages(memcg,
GFP_KERNEL,
3848 static int mem_cgroup_force_empty_write(
struct cgroup *
cont,
unsigned int event)
3854 static u64 mem_cgroup_hierarchy_read(
struct cgroup *cont,
struct cftype *cft)
3859 static int mem_cgroup_hierarchy_write(
struct cgroup *cont,
struct cftype *cft,
3864 struct cgroup *parent = cont->parent;
3884 (val == 1 || val == 0)) {
3885 if (list_empty(&cont->children))
3899 static unsigned long mem_cgroup_recursive_stat(
struct mem_cgroup *memcg,
3907 val += mem_cgroup_read_stat(iter, idx);
3918 if (!mem_cgroup_is_root(memcg)) {
3934 static ssize_t mem_cgroup_read(
struct cgroup *cont,
struct cftype *cft,
3936 size_t nbytes, loff_t *ppos)
3952 val = mem_cgroup_usage(memcg,
false);
3958 val = mem_cgroup_usage(memcg,
true);
3966 len =
scnprintf(str,
sizeof(str),
"%llu\n", (
unsigned long long)val);
3973 static int mem_cgroup_write(
struct cgroup *cont,
struct cftype *cft,
3978 unsigned long long val;
3989 if (mem_cgroup_is_root(memcg)) {
3998 ret = mem_cgroup_resize_limit(memcg, val);
4000 ret = mem_cgroup_resize_memsw_limit(memcg, val);
4012 ret = res_counter_set_soft_limit(&memcg->
res, val);
4023 static void memcg_get_hierarchical_limit(
struct mem_cgroup *memcg,
4024 unsigned long long *mem_limit,
unsigned long long *memsw_limit)
4026 struct cgroup *cgroup;
4027 unsigned long long min_limit, min_memsw_limit,
tmp;
4031 cgroup = memcg->
css.cgroup;
4035 while (cgroup->parent) {
4036 cgroup = cgroup->parent;
4041 min_limit =
min(min_limit, tmp);
4043 min_memsw_limit =
min(min_memsw_limit, tmp);
4046 *mem_limit = min_limit;
4047 *memsw_limit = min_memsw_limit;
4050 static int mem_cgroup_reset(
struct cgroup *cont,
unsigned int event)
4064 res_counter_reset_max(&memcg->
res);
4066 res_counter_reset_max(&memcg->
memsw);
4070 res_counter_reset_failcnt(&memcg->
res);
4072 res_counter_reset_failcnt(&memcg->
memsw);
4079 static u64 mem_cgroup_move_charge_read(
struct cgroup *cgrp,
4086 static int mem_cgroup_move_charge_write(
struct cgroup *cgrp,
4087 struct cftype *cft,
u64 val)
4105 static int mem_cgroup_move_charge_write(
struct cgroup *cgrp,
4106 struct cftype *cft,
u64 val)
4113 static int memcg_numa_stat_show(
struct cgroup *cont,
struct cftype *cft,
4117 unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
4118 unsigned long node_nr;
4121 total_nr = mem_cgroup_nr_lru_pages(memcg,
LRU_ALL);
4124 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
LRU_ALL);
4129 file_nr = mem_cgroup_nr_lru_pages(memcg,
LRU_ALL_FILE);
4132 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
4138 anon_nr = mem_cgroup_nr_lru_pages(memcg,
LRU_ALL_ANON);
4141 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
4148 seq_printf(m,
"unevictable=%lu", unevictable_nr);
4150 node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
4159 static const char *
const mem_cgroup_lru_names[] = {
4167 static inline void mem_cgroup_lru_names_not_uptodate(
void)
4172 static int memcg_stat_show(
struct cgroup *cont,
struct cftype *cft,
4182 seq_printf(m,
"%s %ld\n", mem_cgroup_stat_names[i],
4183 mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
4187 seq_printf(m,
"%s %lu\n", mem_cgroup_events_names[i],
4188 mem_cgroup_read_events(memcg, i));
4191 seq_printf(m,
"%s %lu\n", mem_cgroup_lru_names[i],
4192 mem_cgroup_nr_lru_pages(memcg,
BIT(i)) * PAGE_SIZE);
4196 unsigned long long limit, memsw_limit;
4197 memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit);
4198 seq_printf(m,
"hierarchical_memory_limit %llu\n", limit);
4200 seq_printf(m,
"hierarchical_memsw_limit %llu\n",
4210 val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
4211 seq_printf(m, "total_%
s %lld\
n", mem_cgroup_stat_names[i], val);
4214 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {
4215 unsigned long long val = 0;
4218 val += mem_cgroup_read_events(mi, i);
4220 mem_cgroup_events_names[i], val);
4223 for (i = 0; i < NR_LRU_LISTS; i++) {
4224 unsigned long long val = 0;
4227 val += mem_cgroup_nr_lru_pages(mi,
BIT(i)) * PAGE_SIZE;
4231 #ifdef CONFIG_DEBUG_VM
4240 for (zid = 0; zid < MAX_NR_ZONES; zid++) {
4241 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
4242 rstat = &mz->
lruvec.reclaim_stat;
4249 seq_printf(m,
"recent_rotated_anon %lu\n", recent_rotated[0]);
4250 seq_printf(m,
"recent_rotated_file %lu\n", recent_rotated[1]);
4251 seq_printf(m,
"recent_scanned_anon %lu\n", recent_scanned[0]);
4252 seq_printf(m,
"recent_scanned_file %lu\n", recent_scanned[1]);
4259 static u64 mem_cgroup_swappiness_read(
struct cgroup *cgrp,
struct cftype *cft)
4266 static int mem_cgroup_swappiness_write(
struct cgroup *cgrp,
struct cftype *cft,
4275 if (cgrp->parent ==
NULL)
4296 static void __mem_cgroup_threshold(
struct mem_cgroup *memcg,
bool swap)
4311 usage = mem_cgroup_usage(memcg, swap);
4350 __mem_cgroup_threshold(memcg,
false);
4352 __mem_cgroup_threshold(memcg,
true);
4358 static int compare_thresholds(
const void *
a,
const void *
b)
4366 static int mem_cgroup_oom_notify_cb(
struct mem_cgroup *memcg)
4380 mem_cgroup_oom_notify_cb(iter);
4383 static
int mem_cgroup_usage_register_event(
struct cgroup *cgrp,
4406 usage = mem_cgroup_usage(memcg, type ==
_MEMSWAP);
4410 __mem_cgroup_threshold(memcg, type ==
_MEMSWAP);
4425 memcpy(new->entries, thresholds->
primary->entries, (size - 1) *
4430 new->entries[size - 1].eventfd = eventfd;
4431 new->entries[size - 1].threshold =
threshold;
4435 compare_thresholds,
NULL);
4438 new->current_threshold = -1;
4439 for (i = 0; i <
size; i++) {
4440 if (new->entries[i].threshold <= usage) {
4446 ++
new->current_threshold;
4466 static void mem_cgroup_usage_unregister_event(
struct cgroup *cgrp,
4487 usage = mem_cgroup_usage(memcg, type ==
_MEMSWAP);
4490 __mem_cgroup_threshold(memcg, type ==
_MEMSWAP);
4494 for (i = 0; i < thresholds->
primary->size; i++) {
4495 if (thresholds->
primary->entries[i].eventfd != eventfd)
4499 new = thresholds->
spare;
4511 new->current_threshold = -1;
4512 for (i = 0, j = 0; i < thresholds->
primary->size; i++) {
4513 if (thresholds->
primary->entries[i].eventfd == eventfd)
4516 new->entries[
j] = thresholds->
primary->entries[
i];
4517 if (new->entries[j].threshold <= usage) {
4523 ++
new->current_threshold;
4545 static int mem_cgroup_oom_register_event(
struct cgroup *cgrp,
4546 struct cftype *cft,
struct eventfd_ctx *eventfd,
const char *args)
4557 spin_lock(&memcg_oom_lock);
4565 spin_unlock(&memcg_oom_lock);
4570 static void mem_cgroup_oom_unregister_event(
struct cgroup *cgrp,
4579 spin_lock(&memcg_oom_lock);
4588 spin_unlock(&memcg_oom_lock);
4591 static int mem_cgroup_oom_control_read(
struct cgroup *cgrp,
4592 struct cftype *cft,
struct cgroup_map_cb *
cb)
4599 cb->fill(cb,
"under_oom", 1);
4601 cb->fill(cb,
"under_oom", 0);
4605 static int mem_cgroup_oom_control_write(
struct cgroup *cgrp,
4606 struct cftype *cft,
u64 val)
4612 if (!cgrp->parent || !((val == 0) || (val == 1)))
4626 memcg_oom_recover(memcg);
4631 #ifdef CONFIG_MEMCG_KMEM
4632 static int memcg_init_kmem(
struct mem_cgroup *memcg,
struct cgroup_subsys *
ss)
4634 return mem_cgroup_sockets_init(memcg, ss);
4637 static void kmem_cgroup_destroy(
struct mem_cgroup *memcg)
4639 mem_cgroup_sockets_destroy(memcg);
4642 static int memcg_init_kmem(
struct mem_cgroup *memcg,
struct cgroup_subsys *
ss)
4647 static void kmem_cgroup_destroy(
struct mem_cgroup *memcg)
4652 static struct cftype mem_cgroup_files[] = {
4654 .name =
"usage_in_bytes",
4656 .read = mem_cgroup_read,
4657 .register_event = mem_cgroup_usage_register_event,
4658 .unregister_event = mem_cgroup_usage_unregister_event,
4661 .name =
"max_usage_in_bytes",
4663 .trigger = mem_cgroup_reset,
4664 .read = mem_cgroup_read,
4667 .name =
"limit_in_bytes",
4669 .write_string = mem_cgroup_write,
4670 .read = mem_cgroup_read,
4673 .name =
"soft_limit_in_bytes",
4675 .write_string = mem_cgroup_write,
4676 .read = mem_cgroup_read,
4681 .trigger = mem_cgroup_reset,
4682 .read = mem_cgroup_read,
4686 .read_seq_string = memcg_stat_show,
4689 .name =
"force_empty",
4690 .trigger = mem_cgroup_force_empty_write,
4693 .name =
"use_hierarchy",
4694 .write_u64 = mem_cgroup_hierarchy_write,
4695 .read_u64 = mem_cgroup_hierarchy_read,
4698 .name =
"swappiness",
4699 .read_u64 = mem_cgroup_swappiness_read,
4700 .write_u64 = mem_cgroup_swappiness_write,
4703 .name =
"move_charge_at_immigrate",
4704 .read_u64 = mem_cgroup_move_charge_read,
4705 .write_u64 = mem_cgroup_move_charge_write,
4708 .name =
"oom_control",
4709 .read_map = mem_cgroup_oom_control_read,
4710 .write_u64 = mem_cgroup_oom_control_write,
4711 .register_event = mem_cgroup_oom_register_event,
4712 .unregister_event = mem_cgroup_oom_unregister_event,
4717 .name =
"numa_stat",
4718 .read_seq_string = memcg_numa_stat_show,
4721 #ifdef CONFIG_MEMCG_SWAP
4723 .name =
"memsw.usage_in_bytes",
4725 .read = mem_cgroup_read,
4726 .register_event = mem_cgroup_usage_register_event,
4727 .unregister_event = mem_cgroup_usage_unregister_event,
4730 .name =
"memsw.max_usage_in_bytes",
4732 .trigger = mem_cgroup_reset,
4733 .read = mem_cgroup_read,
4736 .name =
"memsw.limit_in_bytes",
4738 .write_string = mem_cgroup_write,
4739 .read = mem_cgroup_read,
4742 .name =
"memsw.failcnt",
4744 .trigger = mem_cgroup_reset,
4745 .read = mem_cgroup_read,
4751 static int alloc_mem_cgroup_per_zone_info(
struct mem_cgroup *memcg,
int node)
4755 int zone, tmp =
node;
4766 pn = kzalloc_node(
sizeof(*pn),
GFP_KERNEL, tmp);
4770 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
4781 static void free_mem_cgroup_per_zone_info(
struct mem_cgroup *memcg,
int node)
4786 static struct mem_cgroup *mem_cgroup_alloc(
void)
4792 if (size < PAGE_SIZE)
4807 if (size < PAGE_SIZE)
4836 disarm_sock_keys(memcg);
4837 if (size < PAGE_SIZE)
4863 static void __mem_cgroup_free(
struct mem_cgroup *memcg)
4867 mem_cgroup_remove_from_trees(memcg);
4871 free_mem_cgroup_per_zone_info(memcg, node);
4886 __mem_cgroup_free(memcg);
4888 mem_cgroup_put(parent);
4892 static void mem_cgroup_put(
struct mem_cgroup *memcg)
4894 __mem_cgroup_put(memcg, 1);
4902 if (!memcg->
res.parent)
4908 #ifdef CONFIG_MEMCG_SWAP
4909 static void __init enable_swap_cgroup(
void)
4911 if (!mem_cgroup_disabled() && really_do_swap_account)
4915 static void __init enable_swap_cgroup(
void)
4920 static int mem_cgroup_soft_limit_tree_init(
void)
4930 rtpn = kzalloc_node(
sizeof(*rtpn),
GFP_KERNEL, tmp);
4934 soft_limit_tree.rb_tree_per_node[
node] = rtpn;
4936 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
4946 if (!soft_limit_tree.rb_tree_per_node[node])
4948 kfree(soft_limit_tree.rb_tree_per_node[node]);
4949 soft_limit_tree.rb_tree_per_node[
node] =
NULL;
4955 static struct cgroup_subsys_state *
__ref
4956 mem_cgroup_create(
struct cgroup *cont)
4962 memcg = mem_cgroup_alloc();
4964 return ERR_PTR(error);
4967 if (alloc_mem_cgroup_per_zone_info(memcg, node))
4971 if (cont->parent ==
NULL) {
4973 enable_swap_cgroup();
4975 if (mem_cgroup_soft_limit_tree_init())
4977 root_mem_cgroup = memcg;
4999 mem_cgroup_get(parent);
5008 if (parent && parent != root_mem_cgroup)
5012 INIT_LIST_HEAD(&memcg->oom_notify);
5017 memcg->move_charge_at_immigrate = 0;
5028 mem_cgroup_put(memcg);
5029 return ERR_PTR(error);
5033 __mem_cgroup_free(memcg);
5034 return ERR_PTR(error);
5037 static int mem_cgroup_pre_destroy(
struct cgroup *cont)
5041 return mem_cgroup_force_empty(memcg,
false);
5044 static void mem_cgroup_destroy(
struct cgroup *cont)
5048 kmem_cgroup_destroy(memcg);
5050 mem_cgroup_put(memcg);
5055 #define PRECHARGE_COUNT_AT_ONCE 256
5056 static int mem_cgroup_do_precharge(
unsigned long count)
5059 int batch_count = PRECHARGE_COUNT_AT_ONCE;
5062 if (mem_cgroup_is_root(memcg)) {
5079 PAGE_SIZE * count, &dummy)) {
5089 if (signal_pending(
current)) {
5093 if (!batch_count--) {
5094 batch_count = PRECHARGE_COUNT_AT_ONCE;
5097 ret = __mem_cgroup_try_charge(NULL,
5130 enum mc_target_type {
5136 static struct page *mc_handle_present_pte(
struct vm_area_struct *vma,
5141 if (!page || !page_mapped(page))
5143 if (PageAnon(page)) {
5147 }
else if (!move_file())
5150 if (!get_page_unless_zero(page))
5157 static struct page *mc_handle_swap_pte(
struct vm_area_struct *vma,
5160 struct page *page =
NULL;
5163 if (!move_anon() || non_swap_entry(ent))
5176 static struct page *mc_handle_swap_pte(
struct vm_area_struct *vma,
5183 static struct page *mc_handle_file_pte(
struct vm_area_struct *vma,
5186 struct page *page =
NULL;
5195 mapping = vma->
vm_file->f_mapping;
5197 pgoff = linear_page_index(vma, addr);
5206 if (radix_tree_exceptional_entry(page)) {
5216 static enum mc_target_type get_mctgt_type(
struct vm_area_struct *vma,
5217 unsigned long addr,
pte_t ptent,
union mc_target *target)
5219 struct page *page =
NULL;
5220 struct page_cgroup *
pc;
5221 enum mc_target_type ret = MC_TARGET_NONE;
5225 page = mc_handle_present_pte(vma, addr, ptent);
5226 else if (is_swap_pte(ptent))
5227 page = mc_handle_swap_pte(vma, addr, ptent, &ent);
5229 page = mc_handle_file_pte(vma, addr, ptent, &ent);
5231 if (!page && !ent.
val)
5240 if (PageCgroupUsed(pc) && pc->mem_cgroup ==
mc.from) {
5241 ret = MC_TARGET_PAGE;
5243 target->page =
page;
5245 if (!ret || !target)
5249 if (ent.
val && !ret &&
5250 css_id(&
mc.from->css) == lookup_swap_cgroup_id(ent)) {
5251 ret = MC_TARGET_SWAP;
5258 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
5264 static enum mc_target_type get_mctgt_type_thp(
struct vm_area_struct *vma,
5265 unsigned long addr,
pmd_t pmd,
union mc_target *target)
5267 struct page *page =
NULL;
5268 struct page_cgroup *
pc;
5269 enum mc_target_type ret = MC_TARGET_NONE;
5276 if (PageCgroupUsed(pc) && pc->mem_cgroup ==
mc.from) {
5277 ret = MC_TARGET_PAGE;
5280 target->page =
page;
5286 static inline enum mc_target_type get_mctgt_type_thp(
struct vm_area_struct *vma,
5287 unsigned long addr,
pmd_t pmd,
union mc_target *target)
5289 return MC_TARGET_NONE;
5293 static int mem_cgroup_count_precharge_pte_range(
pmd_t *pmd,
5294 unsigned long addr,
unsigned long end,
5295 struct mm_walk *walk)
5301 if (pmd_trans_huge_lock(pmd, vma) == 1) {
5302 if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
5304 spin_unlock(&vma->
vm_mm->page_table_lock);
5308 if (pmd_trans_unstable(pmd))
5310 pte = pte_offset_map_lock(vma->
vm_mm, pmd, addr, &ptl);
5312 if (get_mctgt_type(vma, addr, *pte, NULL))
5314 pte_unmap_unlock(pte - 1, ptl);
5320 static unsigned long mem_cgroup_count_precharge(
struct mm_struct *mm)
5322 unsigned long precharge;
5327 struct mm_walk mem_cgroup_count_precharge_walk = {
5328 .pmd_entry = mem_cgroup_count_precharge_pte_range,
5332 if (is_vm_hugetlb_page(vma))
5335 &mem_cgroup_count_precharge_walk);
5339 precharge =
mc.precharge;
5345 static int mem_cgroup_precharge_mc(
struct mm_struct *mm)
5347 unsigned long precharge = mem_cgroup_count_precharge(mm);
5351 return mem_cgroup_do_precharge(precharge);
5355 static void __mem_cgroup_clear_mc(
void)
5362 __mem_cgroup_cancel_charge(
mc.to,
mc.precharge);
5369 if (
mc.moved_charge) {
5370 __mem_cgroup_cancel_charge(
mc.from,
mc.moved_charge);
5371 mc.moved_charge = 0;
5374 if (
mc.moved_swap) {
5376 if (!mem_cgroup_is_root(
mc.from))
5378 PAGE_SIZE *
mc.moved_swap);
5379 __mem_cgroup_put(
mc.from,
mc.moved_swap);
5381 if (!mem_cgroup_is_root(
mc.to)) {
5387 PAGE_SIZE *
mc.moved_swap);
5392 memcg_oom_recover(from);
5393 memcg_oom_recover(to);
5397 static void mem_cgroup_clear_mc(
void)
5406 __mem_cgroup_clear_mc();
5407 spin_lock(&
mc.lock);
5410 spin_unlock(&
mc.lock);
5411 mem_cgroup_end_move(from);
5414 static int mem_cgroup_can_attach(
struct cgroup *cgroup,
5431 if (mm->owner == p) {
5437 mem_cgroup_start_move(from);
5438 spin_lock(&
mc.lock);
5441 spin_unlock(&
mc.lock);
5444 ret = mem_cgroup_precharge_mc(mm);
5446 mem_cgroup_clear_mc();
5453 static void mem_cgroup_cancel_attach(
struct cgroup *cgroup,
5456 mem_cgroup_clear_mc();
5459 static int mem_cgroup_move_charge_pte_range(
pmd_t *pmd,
5460 unsigned long addr,
unsigned long end,
5461 struct mm_walk *walk)
5468 union mc_target target;
5470 struct page_cgroup *
pc;
5482 if (pmd_trans_huge_lock(pmd, vma) == 1) {
5484 spin_unlock(&vma->
vm_mm->page_table_lock);
5487 target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
5488 if (target_type == MC_TARGET_PAGE) {
5493 pc,
mc.from,
mc.to)) {
5501 spin_unlock(&vma->
vm_mm->page_table_lock);
5505 if (pmd_trans_unstable(pmd))
5508 pte = pte_offset_map_lock(vma->
vm_mm, pmd, addr, &ptl);
5510 pte_t ptent = *(pte++);
5516 switch (get_mctgt_type(vma, addr, ptent, &target)) {
5517 case MC_TARGET_PAGE:
5522 if (!mem_cgroup_move_account(page, 1, pc,
5532 case MC_TARGET_SWAP:
5534 if (!mem_cgroup_move_swap_account(ent,
mc.from,
mc.to)) {
5544 pte_unmap_unlock(pte - 1, ptl);
5554 ret = mem_cgroup_do_precharge(1);
5562 static void mem_cgroup_move_charge(
struct mm_struct *mm)
5576 __mem_cgroup_clear_mc();
5582 struct mm_walk mem_cgroup_move_charge_walk = {
5583 .pmd_entry = mem_cgroup_move_charge_pte_range,
5587 if (is_vm_hugetlb_page(vma))
5590 &mem_cgroup_move_charge_walk);
5601 static void mem_cgroup_move_task(
struct cgroup *cont,
5609 mem_cgroup_move_charge(mm);
5613 mem_cgroup_clear_mc();
5616 static int mem_cgroup_can_attach(
struct cgroup *cgroup,
5621 static void mem_cgroup_cancel_attach(
struct cgroup *cgroup,
5625 static void mem_cgroup_move_task(
struct cgroup *cont,
5633 .subsys_id = mem_cgroup_subsys_id,
5634 .create = mem_cgroup_create,
5635 .pre_destroy = mem_cgroup_pre_destroy,
5636 .destroy = mem_cgroup_destroy,
5637 .can_attach = mem_cgroup_can_attach,
5638 .cancel_attach = mem_cgroup_cancel_attach,
5639 .attach = mem_cgroup_move_task,
5640 .base_cftypes = mem_cgroup_files,
5643 .__DEPRECATED_clear_css_refs =
true,
5646 #ifdef CONFIG_MEMCG_SWAP
5647 static int __init enable_swap_account(
char *
s)
5651 really_do_swap_account = 1;
5652 else if (!
strcmp(s,
"0"))
5653 really_do_swap_account = 0;
5656 __setup(
"swapaccount=", enable_swap_account);