11 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 #include <linux/capability.h>
22 #include <linux/kernel.h>
24 #include <linux/string.h>
25 #include <linux/device.h>
28 #include <linux/ctype.h>
29 #include <linux/sched.h>
31 #include <linux/types.h>
32 #include <linux/slab.h>
35 #include <linux/poll.h>
43 #include <linux/export.h>
45 #include <asm/processor.h>
53 #define rcu_dereference_check_mce(p) \
54 rcu_dereference_index_check((p), \
55 rcu_read_lock_sched_held() || \
56 lockdep_is_held(&mce_chrdev_read_mutex))
58 #define CREATE_TRACE_POINTS
91 static unsigned long mce_need_notify;
92 static char mce_helper[128];
93 static char *mce_helper_argv[2] = { mce_helper,
NULL };
98 static int cpu_missing;
124 m->
cpuid = cpuid_eax(1);
139 static struct mce_log mcelog = {
142 .recordlen =
sizeof(
struct mce),
151 trace_mce_record(mce);
154 if (ret == NOTIFY_STOP)
170 (
unsigned long *)&mcelog.
flags);
174 if (mcelog.
entry[entry].finished) {
185 memcpy(mcelog.
entry + entry, mce,
sizeof(
struct mce));
194 static void drain_mcelog_buffer(
void)
204 for (i = prev; i <
next; i++) {
217 pr_err(
"skipping error being logged currently!\n");
225 memset(mcelog.
entry + prev, 0, (next - prev) *
sizeof(*m));
228 }
while (next != prev);
235 drain_mcelog_buffer();
245 static void print_mce(
struct mce *m)
249 pr_emerg(
HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
258 print_symbol(
"{%s}", m->
ip);
273 pr_emerg(
HW_ERR "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x microcode %x\n",
282 if (ret == NOTIFY_STOP)
288 #define PANIC_TIMEOUT 5
292 static int fake_panic;
296 static void wait_for_panic(
void)
302 while (timeout-- > 0)
306 panic(
"Panicing machine check CPU died");
309 static void mce_panic(
char *
msg,
struct mce *
final,
char *
exp)
346 if (!
final ||
memcmp(m,
final,
sizeof(
struct mce))) {
371 static int msr_to_offset(
u32 msr)
389 static u64 mce_rdmsrl(
u32 msr)
394 int offset = msr_to_offset(msr);
401 if (rdmsrl_safe(msr, &v)) {
402 WARN_ONCE(1,
"mce: Unable to read msr %d!\n", msr);
414 static void mce_wrmsrl(
u32 msr,
u64 v)
417 int offset = msr_to_offset(msr);
431 static inline void mce_gather_info(
struct mce *m,
struct pt_regs *
regs)
450 if (v8086_mode(regs))
455 m->
ip = mce_rdmsrl(rip_msr);
464 #define MCE_RING_SIZE 16
474 static int mce_ring_empty(
void)
481 static int mce_ring_get(
unsigned long *pfn)
500 static int mce_ring_add(
unsigned long pfn)
506 if (next == r->
start)
521 static void mce_schedule_work(
void)
523 if (!mce_ring_empty()) {
538 static void mce_report_event(
struct pt_regs *regs)
558 static void mce_read_aux(
struct mce *m,
int i)
600 mce_gather_info(&m,
NULL);
602 for (i = 0; i <
banks; i++) {
622 if (!(flags & MCP_UC) &&
628 if (!(flags & MCP_TIMESTAMP))
634 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce)
656 static int mce_no_way_out(
struct mce *m,
char **msg,
unsigned long *validp,
661 for (i = 0; i <
banks; i++) {
665 if (quirk_no_way_out)
666 quirk_no_way_out(i, m, regs);
688 static int mce_timed_out(
u64 *
t)
699 if (!monarch_timeout)
704 mce_panic(
"Timeout synchronizing machine check over CPUs",
739 static void mce_reign(
void)
743 int global_worst = 0;
755 if (severity > global_worst) {
757 global_worst = severity;
768 mce_panic(
"Fatal Machine check", m, msg);
781 mce_panic(
"Machine check from unknown source",
NULL,
NULL);
800 static
int mce_start(
int *no_way_out)
820 if (mce_timed_out(&timeout)) {
845 if (mce_timed_out(&timeout)) {
865 static int mce_end(
int order)
889 if (mce_timed_out(&timeout))
902 if (mce_timed_out(&timeout))
934 static int mce_usable_address(
struct mce *m)
945 static void mce_clear_state(
unsigned long *toclear)
949 for (i = 0; i <
banks; i++) {
960 #define MCE_INFO_MAX 16
982 mce_panic(
"Too many concurrent recoverable errors",
NULL,
NULL);
985 static struct mce_info *mce_find_info(
void)
995 static void mce_clear_info(
struct mce_info *mi)
1014 struct mce m, *
final;
1035 char *msg =
"Unknown";
1044 mce_gather_info(&m, regs);
1049 memset(valid_banks, 0,
sizeof(valid_banks));
1050 no_way_out = mce_no_way_out(&m, &msg, valid_banks, regs);
1067 order = mce_start(&no_way_out);
1068 for (i = 0; i <
banks; i++) {
1113 mce_read_aux(&m, i);
1127 if (severity > worst) {
1137 mce_clear_state(toclear);
1143 if (mce_end(order) < 0)
1154 mce_panic(
"Fatal machine check on current CPU", &m, msg);
1159 }
else if (kill_it) {
1165 mce_report_event(regs);
1173 #ifndef CONFIG_MEMORY_FAILURE
1177 BUG_ON(flags & MF_ACTION_REQUIRED);
1178 pr_err(
"Uncorrected memory error in page 0x%lx ignored\n"
1179 "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n",
1197 struct mce_info *mi = mce_find_info();
1198 int flags = MF_ACTION_REQUIRED;
1201 mce_panic(
"Lost physical address for unconsumed uncorrectable error",
NULL,
NULL);
1206 pr_err(
"Uncorrected hardware memory error in user-access at %llx",
1214 flags |= MF_MUST_KILL;
1216 pr_err(
"Memory error not recovered");
1231 while (mce_ring_get(&pfn))
1235 #ifdef CONFIG_X86_MCE_INTEL
1265 static unsigned long check_interval = 5 * 60;
1270 static unsigned long mce_adjust_timer_default(
unsigned long interval)
1275 static unsigned long (*mce_adjust_timer)(
unsigned long interval) =
1276 mce_adjust_timer_default;
1278 static void mce_timer_fn(
unsigned long data)
1297 iv =
max(iv / 2, (
unsigned long)
HZ/100);
1300 iv = mce_adjust_timer(iv);
1319 if (timer_pending(t)) {
1331 static void mce_timer_delete_all(
void)
1377 static int __cpuinit __mcheck_cpu_mce_banks_init(
void)
1384 for (i = 0; i <
banks; i++) {
1396 static int __cpuinit __mcheck_cpu_cap_init(
void)
1405 pr_info(
"CPU supports %d MCE banks\n", b);
1407 if (b > MAX_NR_BANKS) {
1408 pr_warn(
"Using only %u machine check banks out of %u\n",
1414 WARN_ON(banks != 0 && b != banks);
1417 int err = __mcheck_cpu_mce_banks_init();
1433 static void __mcheck_cpu_init_generic(
void)
1435 mce_banks_t all_banks;
1442 bitmap_fill(all_banks, MAX_NR_BANKS);
1451 for (i = 0; i <
banks; i++) {
1469 static void quirk_sandybridge_ifu(
int bank,
struct mce *m,
struct pt_regs *regs)
1493 pr_info(
"unknown CPU type - not enabling MCE support\n");
1499 if (c->
x86 == 15 && banks > 4) {
1507 if (c->
x86 <= 17 && mce_bootlog < 0) {
1518 if (c->
x86 == 6 && banks > 0)
1525 if (c->
x86 == 0x15 &&
1538 need_toggle = !(hwcr &
BIT(18));
1544 rdmsrl(msrs[i], val);
1549 wrmsrl(msrs[i], val);
1577 monarch_timeout < 0)
1584 if (c->
x86 == 6 && c->
x86_model <= 13 && mce_bootlog < 0)
1588 quirk_no_way_out = quirk_sandybridge_ifu;
1590 if (monarch_timeout < 0)
1591 monarch_timeout = 0;
1592 if (mce_bootlog != 0)
1593 mce_panic_timeout = 30;
1617 static void __mcheck_cpu_init_vendor(
struct cpuinfo_x86 *c)
1632 static void mce_start_timer(
unsigned int cpu,
struct timer_list *t)
1634 unsigned long iv = mce_adjust_timer(check_interval *
HZ);
1638 if (mce_ignore_ce || !iv)
1645 static void __mcheck_cpu_init_timer(
void)
1651 mce_start_timer(cpu, t);
1657 pr_err(
"CPU#%d: Unexpected int18 (Machine Check)\n",
1663 unexpected_machine_check;
1674 if (__mcheck_cpu_ancient_init(c))
1680 if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
1687 __mcheck_cpu_init_generic();
1688 __mcheck_cpu_init_vendor(c);
1689 __mcheck_cpu_init_timer();
1691 init_irq_work(&
__get_cpu_var(mce_irq_work), &mce_irq_work_cb);
1699 static int mce_chrdev_open_count;
1700 static int mce_chrdev_open_exclu;
1704 spin_lock(&mce_chrdev_state_lock);
1706 if (mce_chrdev_open_exclu ||
1708 spin_unlock(&mce_chrdev_state_lock);
1714 mce_chrdev_open_exclu = 1;
1715 mce_chrdev_open_count++;
1717 spin_unlock(&mce_chrdev_state_lock);
1722 static int mce_chrdev_release(
struct inode *inode,
struct file *file)
1724 spin_lock(&mce_chrdev_state_lock);
1726 mce_chrdev_open_count--;
1727 mce_chrdev_open_exclu = 0;
1729 spin_unlock(&mce_chrdev_state_lock);
1734 static void collect_tscs(
void *data)
1736 unsigned long *cpu_tsc = (
unsigned long *)data;
1741 static int mce_apei_read_done;
1744 static int __mce_read_apei(
char __user **ubuf,
size_t usize)
1750 if (usize <
sizeof(
struct mce))
1756 mce_apei_read_done = 1;
1776 mce_apei_read_done = 1;
1779 *ubuf +=
sizeof(
struct mce);
1784 static ssize_t mce_chrdev_read(
struct file *filp,
char __user *ubuf,
1785 size_t usize, loff_t *off)
1788 unsigned long *cpu_tsc;
1798 if (!mce_apei_read_done) {
1799 err = __mce_read_apei(&buf, usize);
1800 if (err || buf != ubuf)
1808 if (*off != 0 || usize < MCE_LOG_LEN*
sizeof(
struct mce))
1814 for (i = prev; i <
next; i++) {
1815 unsigned long start =
jiffies;
1816 struct mce *m = &mcelog.
entry[
i];
1820 memset(m, 0,
sizeof(*m));
1833 (next - prev) *
sizeof(
struct mce));
1836 }
while (next != prev);
1847 struct mce *m = &mcelog.
entry[
i];
1853 memset(m, 0,
sizeof(*m));
1864 return err ? err : buf - ubuf;
1867 static unsigned int mce_chrdev_poll(
struct file *file,
poll_table *
wait)
1869 poll_wait(file, &mce_chrdev_wait, wait);
1877 static long mce_chrdev_ioctl(
struct file *
f,
unsigned int cmd,
1887 return put_user(
sizeof(
struct mce), p);
1894 flags = mcelog.
flags;
1904 static ssize_t (*mce_write)(
struct file *filp,
const char __user *ubuf,
1905 size_t usize, loff_t *off);
1908 const char __user *ubuf,
1909 size_t usize, loff_t *off))
1916 size_t usize, loff_t *off)
1919 return mce_write(filp, ubuf, usize, off);
1925 .open = mce_chrdev_open,
1926 .release = mce_chrdev_release,
1927 .read = mce_chrdev_read,
1929 .poll = mce_chrdev_poll,
1930 .unlocked_ioctl = mce_chrdev_ioctl,
1934 static struct miscdevice mce_chrdev_device = {
1952 static int __init mcheck_enable(
char *
str)
1962 else if (!
strcmp(str,
"no_cmci"))
1963 mce_cmci_disabled = 1;
1964 else if (!
strcmp(str,
"dont_log_ce"))
1965 mce_dont_log_ce = 1;
1966 else if (!
strcmp(str,
"ignore_ce"))
1968 else if (!
strcmp(str,
"bootlog") || !
strcmp(str,
"nobootlog"))
1969 mce_bootlog = (str[0] ==
'b');
1970 else if (!
strcmp(str,
"bios_cmci_threshold"))
1971 mce_bios_cmci_threshold = 1;
1979 pr_info(
"mce argument %s ignored. Please use /sys\n", str);
1984 __setup(
"mce", mcheck_enable);
2001 static int mce_disable_error_reporting(
void)
2005 for (i = 0; i <
banks; i++) {
2014 static int mce_syscore_suspend(
void)
2016 return mce_disable_error_reporting();
2019 static void mce_syscore_shutdown(
void)
2021 mce_disable_error_reporting();
2029 static void mce_syscore_resume(
void)
2031 __mcheck_cpu_init_generic();
2036 .suspend = mce_syscore_suspend,
2037 .shutdown = mce_syscore_shutdown,
2038 .resume = mce_syscore_resume,
2045 static void mce_cpu_restart(
void *data)
2049 __mcheck_cpu_init_generic();
2050 __mcheck_cpu_init_timer();
2054 static void mce_restart(
void)
2056 mce_timer_delete_all();
2061 static void mce_disable_cmci(
void *data)
2068 static void mce_enable_ce(
void *all)
2075 __mcheck_cpu_init_timer();
2078 static struct bus_type mce_subsys = {
2079 .name =
"machinecheck",
2080 .dev_name =
"machinecheck",
2096 return sprintf(buf,
"%llx\n", attr_to_bank(attr)->
ctl);
2100 const char *buf,
size_t size)
2107 attr_to_bank(attr)->ctl =
new;
2118 return strlen(mce_helper) + 1;
2122 const char *buf,
size_t siz)
2126 strncpy(mce_helper, buf,
sizeof(mce_helper));
2127 mce_helper[
sizeof(mce_helper)-1] = 0;
2128 p =
strchr(mce_helper,
'\n');
2133 return strlen(mce_helper) + !!
p;
2138 const char *buf,
size_t size)
2145 if (mce_ignore_ce ^ !!
new) {
2148 mce_timer_delete_all();
2162 const char *buf,
size_t size)
2169 if (mce_cmci_disabled ^ !!
new) {
2173 mce_cmci_disabled = 1;
2176 mce_cmci_disabled = 0;
2185 const char *buf,
size_t size)
2213 &dev_attr_tolerant.attr,
2214 &dev_attr_check_interval.
attr,
2216 &dev_attr_monarch_timeout.attr,
2217 &dev_attr_dont_log_ce.attr,
2218 &dev_attr_ignore_ce.
attr,
2219 &dev_attr_cmci_disabled.
attr,
2225 static void mce_device_release(
struct device *
dev)
2231 static __cpuinit int mce_device_create(
unsigned int cpu)
2244 dev->
bus = &mce_subsys;
2245 dev->
release = &mce_device_release;
2251 for (i = 0; mce_device_attrs[
i]; i++) {
2256 for (j = 0; j <
banks; j++) {
2261 cpumask_set_cpu(cpu, mce_device_initialized);
2277 static __cpuinit void mce_device_remove(
unsigned int cpu)
2285 for (i = 0; mce_device_attrs[
i]; i++)
2288 for (i = 0; i <
banks; i++)
2292 cpumask_clear_cpu(cpu, mce_device_initialized);
2297 static void __cpuinit mce_disable_cpu(
void *
h)
2299 unsigned long action = *(
unsigned long *)h;
2307 for (i = 0; i <
banks; i++) {
2315 static void __cpuinit mce_reenable_cpu(
void *h)
2317 unsigned long action = *(
unsigned long *)h;
2323 if (!(action & CPU_TASKS_FROZEN))
2325 for (i = 0; i <
banks; i++) {
2335 mce_cpu_callback(
struct notifier_block *nfb,
unsigned long action,
void *hcpu)
2337 unsigned int cpu = (
unsigned long)hcpu;
2340 switch (action & ~CPU_TASKS_FROZEN) {
2342 mce_device_create(cpu);
2349 mce_device_remove(cpu);
2358 mce_start_timer(cpu, t);
2371 .notifier_call = mce_cpu_callback,
2374 static __init void mce_init_banks(
void)
2378 for (i = 0; i <
banks; i++) {
2386 a->
attr.mode = 0644;
2387 a->
show = show_bank;
2388 a->
store = set_bank;
2392 static __init int mcheck_init_device(
void)
2400 zalloc_cpumask_var(&mce_device_initialized,
GFP_KERNEL);
2409 err = mce_device_create(i);
2427 static int __init mcheck_disable(
char *str)
2432 __setup(
"nomce", mcheck_disable);
2434 #ifdef CONFIG_DEBUG_FS
2437 static struct dentry *dmce;
2445 static void mce_reset(
void)
2454 static int fake_panic_get(
void *data,
u64 *val)
2460 static int fake_panic_set(
void *data,
u64 val)
2468 fake_panic_set,
"%llu\n");
2470 static int __init mcheck_debugfs_init(
void)
2472 struct dentry *dmce, *ffake_panic;