1 #include <linux/module.h>
2 #include <linux/slab.h>
8 static u8 xec_mask = 0xf;
9 static u8 nb_err_cpumask = 0xf;
11 static bool report_gart_errors;
16 report_gart_errors =
v;
31 nb_bus_decoder =
NULL;
42 const char *
const tt_msgs[] = {
"INSN",
"DATA",
"GEN",
"RESV" };
46 const char *
const ll_msgs[] = {
"RESV",
"L1",
"L2",
"L3/GEN" };
51 "GEN",
"RD",
"WR",
"DRD",
"DWR",
"IRD",
"PRF",
"EV",
"SNP"
56 const char *
const pp_msgs[] = {
"SRC",
"RES",
"OBS",
"GEN" };
60 const char *
const to_msgs[] = {
"no timeout",
"timed out" };
64 const char *
const ii_msgs[] = {
"MEM",
"RESV",
"IO",
"GEN" };
67 static const char *
const f15h_ic_mce_desc[] = {
68 "UC during a demand linefill from L2",
69 "Parity error during data load from IC",
70 "Parity error for IC valid bit",
71 "Main tag parity error",
72 "Parity error in prediction queue",
73 "PFB data/address parity error",
74 "Parity error in the branch status reg",
75 "PFB promotion address error",
76 "Tag error during probe/victimization",
77 "Parity error for IC probe tag valid bit",
78 "PFB non-cacheable bit parity error",
79 "PFB valid bit parity error",
80 "Microcode Patch Buffer",
87 static const char *
const f15h_cu_mce_desc[] = {
88 "Fill ECC error on data fills",
89 "Fill parity error on insn fills",
90 "Prefetcher request FIFO parity error",
91 "PRQ address parity error",
92 "PRQ data parity error",
95 "WCB Data parity error",
96 "VB Data ECC or parity error",
98 "Hard L2 Tag ECC error",
99 "Multiple hits on L2 tag",
101 "PRB address parity error"
104 static const char *
const nb_mce_desc[] = {
105 "DRAM ECC error detected on the NB",
106 "CRC error detected on HT link",
107 "Link-defined sync error packets detected on HT link",
110 "Invalid GART PTE entry during GART table walk",
111 "Unsupported atomic RMW received from an IO link",
112 "Watchdog timeout due to lack of progress",
113 "DRAM ECC error detected on the NB",
114 "SVM DMA Exclusion Vector error",
115 "HT data error detected on link",
116 "Protocol error (link, L3, probe filter)",
117 "NB internal arrays parity error",
118 "DRAM addr/ctl signals parity error",
119 "IO link transmission error",
120 "L3 data cache ECC error",
121 "L3 cache tag error",
122 "L3 LRU parity bits error",
123 "ECC Error in the Probe Filter directory"
126 static const char *
const fr_ex_mce_desc[] = {
127 "CPU Watchdog timer expire",
128 "Wakeup array dest tag",
132 "Retire dispatch queue",
133 "Mapper checkpoint array",
134 "Physical register file EX0 port",
135 "Physical register file EX1 port",
136 "Physical register file AG0 port",
137 "Physical register file AG1 port",
138 "Flag register file",
142 static bool f12h_dc_mce(
u16 ec,
u8 xec)
151 pr_cont(
"during L1 linefill from L2.\n");
152 else if (ll ==
LL_L1)
160 static bool f10h_dc_mce(
u16 ec,
u8 xec)
163 pr_cont(
"during data scrub.\n");
166 return f12h_dc_mce(ec, xec);
169 static bool k8_dc_mce(
u16 ec,
u8 xec)
172 pr_cont(
"during system linefill.\n");
176 return f10h_dc_mce(ec, xec);
179 static bool f14h_dc_mce(
u16 ec,
u8 xec)
192 pr_cont(
"Data/Tag parity error due to %s.\n",
193 (r4 ==
R4_DRD ?
"load/hw prf" :
"store"));
196 pr_cont(
"Copyback parity error on a tag miss.\n");
199 pr_cont(
"Tag parity error during snoop.\n");
209 pr_cont(
"System read data error on a ");
231 static bool f15h_dc_mce(
u16 ec,
u8 xec)
239 pr_cont(
"Data Array access error.\n");
243 pr_cont(
"UC error during a linefill from L2/NB.\n");
248 pr_cont(
"STQ access error.\n");
252 pr_cont(
"SCB access error.\n");
260 pr_cont(
"LDQ access error.\n");
269 pr_cont(
"System Read Data Error.\n");
271 pr_cont(
" Internal error condition type %d.\n", xec);
278 static void amd_decode_dc_mce(
struct mce *
m)
289 ((xec == 2) ?
"locked miss"
290 : (xec ?
"multimatch" :
"parity")));
293 }
else if (fam_ops->
dc_mce(ec, xec))
299 static bool k8_ic_mce(
u16 ec,
u8 xec)
308 pr_cont(
"during a linefill from L2.\n");
309 else if (ll == 0x1) {
312 pr_cont(
"Parity error during data load.\n");
316 pr_cont(
"Copyback Parity/Victim error.\n");
333 static bool f14h_ic_mce(
u16 ec,
u8 xec)
339 if (
TT(ec) != 0 ||
LL(ec) != 1)
343 pr_cont(
"Data/tag array parity error for a tag hit.\n");
345 pr_cont(
"Tag error during snoop/victimization.\n");
352 static bool f15h_ic_mce(
u16 ec,
u8 xec)
361 pr_cont(
"%s.\n", f15h_ic_mce_desc[xec]);
365 pr_cont(
"%s.\n", f15h_ic_mce_desc[xec-2]);
369 pr_cont(
"%s.\n", f15h_ic_mce_desc[xec-4]);
373 pr_cont(
"Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]);
382 static void amd_decode_ic_mce(
struct mce *m)
391 (xec ?
"multimatch" :
"parity error"));
395 pr_cont(
"during %s.\n", (k8 ?
"system linefill" :
"NB data read"));
396 }
else if (fam_ops->
ic_mce(ec, xec))
402 static void amd_decode_bu_mce(
struct mce *m)
410 pr_cont(
" in the write data buffers.\n");
412 pr_cont(
" in the victim data buffers.\n");
415 else if (xec == 0x0) {
417 pr_cont(
": %s error in a Page Descriptor Cache or "
418 "Guest TLB.\n",
TT_MSG(ec));
420 pr_cont(
": %s/ECC error in data read from NB: %s.\n",
426 pr_cont(
": %s error during data copyback.\n",
429 pr_cont(
": %s parity/ECC error during data "
430 "access from L2.\n",
R4_MSG(ec));
444 static void amd_decode_cu_mce(
struct mce *m)
453 pr_cont(
"Data parity TLB read error.\n");
455 pr_cont(
"Poison data provided for TLB fill.\n");
462 pr_cont(
"Error during attempted NB data read.\n");
466 pr_cont(
"%s.\n", f15h_cu_mce_desc[xec - 0x4]);
470 pr_cont(
"%s.\n", f15h_cu_mce_desc[xec - 0x7]);
484 static void amd_decode_ls_mce(
struct mce *m)
490 pr_emerg(
"You shouldn't be seeing an LS MCE on this cpu family,"
491 " please report on LKML.\n");
527 if (xec == 0x0 || xec == 0x8) {
532 pr_cont(
"%s.\n", nb_mce_desc[xec]);
535 nb_bus_decoder(node_id, m);
542 pr_cont(
"GART Table Walk data error.\n");
544 pr_cont(
"DMA Exclusion Vector Table Walk error.\n");
551 pr_cont(
"Compute Unit Data Error.\n");
564 pr_cont(
"%s.\n", nb_mce_desc[xec - offset]);
572 static void amd_decode_fr_mce(
struct mce *m)
577 if (c->
x86 == 0xf || c->
x86 == 0x11)
581 (c->
x86 == 0x15 ?
"Execution Unit" :
"FIROB"));
583 if (xec == 0x0 || xec == 0
xc)
584 pr_cont(
"%s.\n", fr_ex_mce_desc[xec]);
586 pr_cont(
"%s parity error.\n", fr_ex_mce_desc[xec]);
596 static void amd_decode_fp_mce(
struct mce *m)
608 pr_cont(
"Physical Register File");
620 pr_cont(
"Status Register File");
636 static inline void amd_decode_err_code(
u16 ec)
659 static bool amd_filter_mce(
struct mce *m)
666 if (m->
bank == 4 && xec == 0x5 && !report_gart_errors)
674 struct mce *m = (
struct mce *)data;
678 if (amd_filter_mce(m))
695 ecc = (m->
status >> 45) & 0x3;
697 pr_cont(
"|%sECC", ((ecc == 2) ?
"C" :
"U"));
706 amd_decode_dc_mce(m);
710 amd_decode_ic_mce(m);
715 amd_decode_cu_mce(m);
717 amd_decode_bu_mce(m);
721 amd_decode_ls_mce(m);
729 amd_decode_fr_mce(m);
733 amd_decode_fp_mce(m);
740 amd_decode_err_code(m->
status & 0xffff);
750 static int __init mce_amd_init(
void)
757 if (c->
x86 < 0xf || c->
x86 > 0x15)
766 fam_ops->
dc_mce = k8_dc_mce;
767 fam_ops->
ic_mce = k8_ic_mce;
771 fam_ops->
dc_mce = f10h_dc_mce;
772 fam_ops->
ic_mce = k8_ic_mce;
776 fam_ops->
dc_mce = k8_dc_mce;
777 fam_ops->
ic_mce = k8_ic_mce;
781 fam_ops->
dc_mce = f12h_dc_mce;
782 fam_ops->
ic_mce = k8_ic_mce;
786 nb_err_cpumask = 0x3;
787 fam_ops->
dc_mce = f14h_dc_mce;
788 fam_ops->
ic_mce = f14h_ic_mce;
793 fam_ops->
dc_mce = f15h_dc_mce;
794 fam_ops->
ic_mce = f15h_ic_mce;
803 pr_info(
"MCE: In-kernel MCE decoding enabled.\n");
812 static void __exit mce_amd_exit(
void)