38 #include <linux/kernel.h>
41 #include <linux/kernel-page-flags.h>
42 #include <linux/sched.h>
45 #include <linux/export.h>
52 #include <linux/slab.h>
66 #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)
79 static int hwpoison_filter_dev(
struct page *
p)
84 if (hwpoison_filter_dev_major == ~0
U &&
85 hwpoison_filter_dev_minor == ~0
U)
94 mapping = page_mapping(p);
98 dev = mapping->
host->i_sb->s_dev;
99 if (hwpoison_filter_dev_major != ~0
U &&
100 hwpoison_filter_dev_major !=
MAJOR(dev))
102 if (hwpoison_filter_dev_minor != ~0
U &&
103 hwpoison_filter_dev_minor !=
MINOR(dev))
109 static int hwpoison_filter_flags(
struct page *p)
111 if (!hwpoison_filter_flags_mask)
115 hwpoison_filter_flags_value)
131 #ifdef CONFIG_MEMCG_SWAP
134 static int hwpoison_filter_task(
struct page *p)
137 struct cgroup_subsys_state *css;
140 if (!hwpoison_filter_memcg)
149 if (!css->cgroup->dentry)
152 ino = css->cgroup->dentry->d_inode->i_ino;
155 if (ino != hwpoison_filter_memcg)
161 static int hwpoison_filter_task(
struct page *p) {
return 0; }
166 if (!hwpoison_filter_enable)
169 if (hwpoison_filter_dev(p))
172 if (hwpoison_filter_flags(p))
175 if (hwpoison_filter_task(p))
201 "MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
205 si.si_addr = (
void *)addr;
206 #ifdef __ARCH_SI_TRAPNO
207 si.si_trapno = trapno;
209 si.si_addr_lsb = compound_trans_order(compound_head(page)) +
PAGE_SHIFT;
211 if ((flags & MF_ACTION_REQUIRED) && t ==
current) {
241 if (PageLRU(p) || is_free_buddy_page(p))
257 if (page_count(p) == 1)
303 static void add_to_kill(
struct task_struct *tsk,
struct page *p,
306 struct to_kill **tkc)
317 "MCE: Out of memory while machine check handling\n");
331 pr_info(
"MCE: Unable to find user space address %lx in %s\n",
348 static void kill_procs(
struct list_head *to_kill,
int forcekill,
int trapno,
349 int fail,
struct page *page,
unsigned long pfn,
352 struct to_kill *tk, *
next;
363 "MCE %#lx: forcibly killing %s:%d because of failure to unmap corrupted page\n",
364 pfn, tk->
tsk->comm, tk->
tsk->pid);
374 else if (kill_proc(tk->
tsk, tk->
addr, trapno,
375 pfn, page, flags) < 0)
377 "MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
378 pfn, tk->
tsk->comm, tk->
tsk->pid);
380 put_task_struct(tk->
tsk);
385 static int task_early_kill(
struct task_struct *tsk)
391 return sysctl_memory_failure_early_kill;
397 static void collect_procs_anon(
struct page *page,
struct list_head *to_kill,
398 struct to_kill **tkc)
414 if (!task_early_kill(tsk))
416 anon_vma_interval_tree_foreach(vmac, &av->
rb_root,
422 add_to_kill(tsk, page, vma, to_kill, tkc);
432 static void collect_procs_file(
struct page *page,
struct list_head *to_kill,
433 struct to_kill **tkc)
444 if (!task_early_kill(tsk))
447 vma_interval_tree_foreach(vma, &mapping->
i_mmap, pgoff,
457 add_to_kill(tsk, page, vma, to_kill, tkc);
470 static void collect_procs(
struct page *page,
struct list_head *tokill)
481 collect_procs_anon(page, tokill, &tk);
483 collect_procs_file(page, tokill, &tk);
498 static const char *action_name[] = {
511 static int delete_from_lru_cache(
struct page *p)
519 ClearPageUnevictable(p);
534 static int me_kernel(
struct page *p,
unsigned long pfn)
542 static int me_unknown(
struct page *p,
unsigned long pfn)
551 static int me_pagecache_clean(
struct page *p,
unsigned long pfn)
557 delete_from_lru_cache(p);
573 mapping = page_mapping(p);
586 if (mapping->
a_ops->error_remove_page) {
587 err = mapping->
a_ops->error_remove_page(mapping, p);
591 }
else if (page_has_private(p) &&
593 pr_info(
"MCE %#lx: failed to release buffers\n", pfn);
616 static int me_pagecache_dirty(
struct page *p,
unsigned long pfn)
657 mapping_set_error(mapping,
EIO);
660 return me_pagecache_clean(p, pfn);
682 static int me_swapcache_dirty(
struct page *p,
unsigned long pfn)
686 ClearPageUptodate(p);
688 if (!delete_from_lru_cache(p))
694 static int me_swapcache_clean(
struct page *p,
unsigned long pfn)
698 if (!delete_from_lru_cache(p))
710 static int me_huge_page(
struct page *p,
unsigned long pfn)
713 struct page *hpage = compound_head(p);
724 if (!(page_mapping(hpage) || PageAnon(hpage))) {
725 res = dequeue_hwpoisoned_huge_page(hpage);
745 #define dirty (1UL << PG_dirty)
746 #define sc (1UL << PG_swapcache)
747 #define unevict (1UL << PG_unevictable)
748 #define mlock (1UL << PG_mlocked)
749 #define writeback (1UL << PG_writeback)
750 #define lru (1UL << PG_lru)
751 #define swapbacked (1UL << PG_swapbacked)
752 #define head (1UL << PG_head)
753 #define tail (1UL << PG_tail)
754 #define compound (1UL << PG_compound)
755 #define slab (1UL << PG_slab)
756 #define reserved (1UL << PG_reserved)
758 static struct page_state {
762 int (*
action)(
struct page *
p,
unsigned long pfn);
775 {
slab,
slab,
"kernel slab", me_kernel },
777 #ifdef CONFIG_PAGEFLAGS_EXTENDED
778 {
head,
head,
"huge", me_huge_page },
779 {
tail,
tail,
"huge", me_huge_page },
785 {
sc|
dirty,
sc,
"swapcache", me_swapcache_clean },
791 {
mlock,
mlock,
"mlocked LRU", me_pagecache_clean },
794 {
lru|
dirty,
lru,
"clean LRU", me_pagecache_clean },
799 { 0, 0,
"unknown page state", me_unknown },
815 static void action_result(
unsigned long pfn,
char *
msg,
int result)
821 PageDirty(page) ?
"dirty " :
"",
822 msg, action_name[result]);
825 static int page_action(
struct page_state *
ps,
struct page *p,
831 result = ps->action(p, pfn);
832 action_result(pfn, ps->msg, result);
834 count = page_count(p) - 1;
835 if (ps->action == me_swapcache_dirty && result ==
DELAYED)
839 "MCE %#lx: %s page still referenced by %d users\n",
840 pfn, ps->msg, count);
856 static int hwpoison_user_mappings(
struct page *p,
unsigned long pfn,
857 int trapno,
int flags)
863 int kill = 1, forcekill;
864 struct page *hpage = compound_head(p);
867 if (PageReserved(p) || PageSlab(p))
874 if (!page_mapped(hpage))
880 if (PageSwapCache(p)) {
882 "MCE %#lx: keeping poisoned page in swap cache\n", pfn);
892 mapping = page_mapping(hpage);
893 if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping &&
894 mapping_cap_writeback_dirty(mapping)) {
901 "MCE %#lx: corrupted page was clean: dropped without side effects\n",
914 if (PageTransHuge(hpage)) {
925 if (!
PageHuge(hpage) && PageAnon(hpage)) {
934 "MCE %#lx: failed to split THP\n", pfn);
953 collect_procs(ppage, &tokill);
961 pfn, page_mapcount(ppage));
976 forcekill = PageDirty(ppage) || (flags & MF_MUST_KILL);
977 kill_procs(&tokill, forcekill, trapno,
983 static void set_page_hwpoison_huge_page(
struct page *hpage)
986 int nr_pages = 1 << compound_trans_order(hpage);
987 for (i = 0; i < nr_pages; i++)
988 SetPageHWPoison(hpage + i);
991 static void clear_page_hwpoison_huge_page(
struct page *hpage)
994 int nr_pages = 1 << compound_trans_order(hpage);
995 for (i = 0; i < nr_pages; i++)
996 ClearPageHWPoison(hpage + i);
1019 struct page_state *
ps;
1023 unsigned int nr_pages;
1025 if (!sysctl_memory_failure_recovery)
1026 panic(
"Memory failure from trap %d on page %lx", trapno, pfn);
1030 "MCE %#lx: memory outside kernel control\n",
1036 hpage = compound_head(p);
1037 if (TestSetPageHWPoison(p)) {
1042 nr_pages = 1 << compound_trans_order(hpage);
1043 atomic_long_add(nr_pages, &mce_bad_pages);
1059 if (!(flags & MF_COUNT_INCREASED) &&
1060 !get_page_unless_zero(hpage)) {
1061 if (is_free_buddy_page(p)) {
1062 action_result(pfn,
"free buddy",
DELAYED);
1070 if (!PageHWPoison(hpage)
1072 || (p != hpage && TestSetPageHWPoison(hpage))) {
1073 atomic_long_sub(nr_pages, &mce_bad_pages);
1076 set_page_hwpoison_huge_page(hpage);
1077 res = dequeue_hwpoisoned_huge_page(hpage);
1078 action_result(pfn,
"free huge",
1083 action_result(pfn,
"high order kernel",
IGNORED);
1096 if (!
PageHuge(p) && !PageTransTail(p)) {
1103 if (is_free_buddy_page(p)) {
1104 action_result(pfn,
"free buddy, 2nd try",
1108 action_result(pfn,
"non LRU",
IGNORED);
1124 if (!PageHWPoison(p)) {
1130 if (TestClearPageHWPoison(p))
1131 atomic_long_sub(nr_pages, &mce_bad_pages);
1141 if (
PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
1142 action_result(pfn,
"hugepage already hardware poisoned",
1155 set_page_hwpoison_huge_page(hpage);
1157 wait_on_page_writeback(p);
1163 if (hwpoison_user_mappings(p, pfn, trapno, flags) !=
SWAP_SUCCESS) {
1172 if (PageLRU(p) && !PageSwapCache(p) && p->
mapping ==
NULL) {
1173 action_result(pfn,
"already truncated LRU",
IGNORED);
1179 for (ps = error_states;; ps++) {
1180 if ((p->
flags & ps->mask) == ps->res) {
1181 res = page_action(ps, p, pfn);
1191 #define MEMORY_FAILURE_FIFO_ORDER 4
1192 #define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
1229 unsigned long proc_flags;
1241 pr_err(
"Memory failure: buffer overflow when queuing memory failure at 0x%#lx\n",
1243 spin_unlock_irqrestore(&mf_cpu->
lock, proc_flags);
1252 unsigned long proc_flags;
1258 gotten =
kfifo_get(&mf_cpu->fifo, &entry);
1259 spin_unlock_irqrestore(&mf_cpu->
lock, proc_flags);
1266 static int __init memory_failure_init(
void)
1299 unsigned int nr_pages;
1305 page = compound_head(p);
1307 if (!PageHWPoison(p)) {
1308 pr_info(
"MCE: Page was already unpoisoned %#lx\n", pfn);
1312 nr_pages = 1 << compound_trans_order(page);
1314 if (!get_page_unless_zero(page)) {
1322 pr_info(
"MCE: Memory failure is now running on free hugepage %#lx\n", pfn);
1325 if (TestClearPageHWPoison(p))
1326 atomic_long_sub(nr_pages, &mce_bad_pages);
1327 pr_info(
"MCE: Software-unpoisoned free page %#lx\n", pfn);
1338 if (TestClearPageHWPoison(page)) {
1339 pr_info(
"MCE: Software-unpoisoned page %#lx\n", pfn);
1340 atomic_long_sub(nr_pages, &mce_bad_pages);
1343 clear_page_hwpoison_huge_page(page);
1355 static struct page *new_page(
struct page *p,
unsigned long private,
int **
x)
1357 int nid = page_to_nid(p);
1371 static int get_any_page(
struct page *p,
unsigned long pfn,
int flags)
1375 if (flags & MF_COUNT_INCREASED)
1393 if (!get_page_unless_zero(compound_head(p))) {
1395 pr_info(
"%s: %#lx free huge page\n", __func__, pfn);
1396 ret = dequeue_hwpoisoned_huge_page(compound_head(p));
1397 }
else if (is_free_buddy_page(p)) {
1398 pr_info(
"%s: %#lx free buddy page\n", __func__, pfn);
1403 pr_info(
"%s: %#lx: unknown zero refcount page type %lx\n",
1404 __func__, pfn, p->
flags);
1416 static int soft_offline_huge_page(
struct page *page,
int flags)
1420 struct page *hpage = compound_head(page);
1422 ret = get_any_page(page, pfn, flags);
1428 if (PageHWPoison(hpage)) {
1430 pr_info(
"soft offline: %#lx hugepage already poisoned\n", pfn);
1439 pr_info(
"soft offline: %#lx: migration failed %d, type %lx\n",
1440 pfn, ret, page->
flags);
1444 if (!PageHWPoison(hpage))
1445 atomic_long_add(1 << compound_trans_order(hpage),
1447 set_page_hwpoison_huge_page(hpage);
1448 dequeue_hwpoisoned_huge_page(hpage);
1482 return soft_offline_huge_page(page, flags);
1483 if (PageTransHuge(hpage)) {
1485 pr_info(
"soft offline: %#lx: failed to split THP\n",
1491 ret = get_any_page(page, pfn, flags);
1500 if (!PageLRU(page)) {
1510 ret = get_any_page(page, pfn, 0);
1516 if (!PageLRU(page)) {
1517 pr_info(
"soft_offline: %#lx: unknown non LRU page type %lx\n",
1523 wait_on_page_writeback(page);
1528 if (PageHWPoison(page)) {
1531 pr_info(
"soft offline: %#lx page already poisoned\n", pfn);
1548 pr_info(
"soft_offline: %#lx: invalidated\n", pfn);
1566 page_is_file_cache(page));
1567 list_add(&page->
lru, &pagelist);
1572 pr_info(
"soft offline: %#lx: migration failed %d, type %lx\n",
1573 pfn, ret, page->
flags);
1578 pr_info(
"soft offline: %#lx: isolation failed: %d, page count %d, type %lx\n",
1579 pfn, ret, page_count(page), page->
flags);
1585 atomic_long_add(1, &mce_bad_pages);
1586 SetPageHWPoison(page);