27 #include <linux/module.h>
29 #include <linux/pci.h>
31 #include <linux/slab.h>
40 #define I5400_REVISION " Ver: 1.0.0"
42 #define EDAC_MOD_STR "i5400_edac"
44 #define i5400_printk(level, fmt, arg...) \
45 edac_printk(level, "i5400", fmt, ##arg)
47 #define i5400_mc_printk(mci, level, fmt, arg...) \
48 edac_mc_chipset_printk(mci, level, "i5400", fmt, ##arg)
51 #define MAX_BRANCHES 2
52 #define CHANNELS_PER_BRANCH 2
53 #define DIMMS_PER_CHANNEL 4
54 #define MAX_CHANNELS (MAX_BRANCHES * CHANNELS_PER_BRANCH)
70 #define MAXDIMMPERCH 0x57
75 #define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3fe00)
82 #define FERR_FAT_FBD 0x98
83 #define FERR_FAT_FBDCHAN (3<<28)
85 #define NERR_FAT_FBD 0x9c
86 #define FERR_NF_FBD 0xa0
89 #define NERR_NF_FBD 0xa4
92 #define EMASK_FBD 0xa8
97 #define MCERR_FBD 0xb8
110 #define AMBPRESENT_0 0x64
111 #define AMBPRESENT_1 0x66
118 #define NRECFGLOG 0x74
119 #define RECFGLOG 0x78
120 #define NRECMEMA 0xbe
121 #define NRECMEMB 0xc0
122 #define NRECFB_DIMMA 0xc4
123 #define NRECFB_DIMMB 0xc8
124 #define NRECFB_DIMMC 0xcc
125 #define NRECFB_DIMMD 0xd0
126 #define NRECFB_DIMME 0xd4
127 #define NRECFB_DIMMF 0xd8
131 #define RECFB_DIMMA 0xf8
132 #define RECFB_DIMMB 0xec
133 #define RECFB_DIMMC 0xf0
134 #define RECFB_DIMMD 0xf4
135 #define RECFB_DIMME 0xf8
136 #define RECFB_DIMMF 0xfC
178 static const char *error_name[] = {
179 [0] =
"Memory Write error on non-redundant retry",
180 [1] =
"Memory or FB-DIMM configuration CRC read error",
182 [3] =
"Uncorrectable Data ECC on Replay",
183 [4] =
"Aliased Uncorrectable Non-Mirrored Demand Data ECC",
185 [6] =
"Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
186 [7] =
"Aliased Uncorrectable Patrol Data ECC",
187 [8] =
"Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
189 [10] =
"Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
190 [11] =
"Non-Aliased Uncorrectable Patrol Data ECC",
191 [12] =
"Memory Write error on first attempt",
192 [13] =
"FB-DIMM Configuration Write error on first attempt",
193 [14] =
"Memory or FB-DIMM configuration CRC read error",
194 [15] =
"Channel Failed-Over Occurred",
195 [16] =
"Correctable Non-Mirrored Demand Data ECC",
197 [18] =
"Correctable Resilver- or Spare-Copy Data ECC",
198 [19] =
"Correctable Patrol Data ECC",
199 [20] =
"FB-DIMM Northbound parity error on FB-DIMM Sync Status",
200 [21] =
"SPD protocol Error",
201 [22] =
"Non-Redundant Fast Reset Timeout",
202 [23] =
"Refresh error",
203 [24] =
"Memory Write error on redundant retry",
204 [25] =
"Redundant Fast Reset Timeout",
205 [26] =
"Correctable Counter Threshold Exceeded",
206 [27] =
"DIMM-Spare Copy Completed",
207 [28] =
"DIMM-Isolation Completed",
211 #define ERROR_FAT_MASK (EMASK_M1 | \
216 #define ERROR_NF_CORRECTABLE (EMASK_M27 | \
222 #define ERROR_NF_DIMM_SPARE (EMASK_M29 | \
224 #define ERROR_NF_SPD_PROTOCOL (EMASK_M22)
225 #define ERROR_NF_NORTH_CRC (EMASK_M21)
228 #define ERROR_NF_RECOVERABLE (EMASK_M26 | \
242 #define ERROR_NF_UNCORRECTABLE (EMASK_M4)
245 #define ERROR_NF_MASK (ERROR_NF_CORRECTABLE | \
246 ERROR_NF_UNCORRECTABLE | \
247 ERROR_NF_RECOVERABLE | \
248 ERROR_NF_DIMM_SPARE | \
249 ERROR_NF_SPD_PROTOCOL | \
257 #define ENABLE_EMASK_ALL (ERROR_FAT_MASK | ERROR_NF_MASK)
260 #define FERR_FAT_MASK ERROR_FAT_MASK
263 static inline int to_nf_mask(
unsigned int mask)
268 static inline int from_nf_ferr(
unsigned int mask)
271 (mask & ((1 << 28) - 1) << 3);
274 #define FERR_NF_MASK to_nf_mask(ERROR_NF_MASK)
275 #define FERR_NF_CORRECTABLE to_nf_mask(ERROR_NF_CORRECTABLE)
276 #define FERR_NF_DIMM_SPARE to_nf_mask(ERROR_NF_DIMM_SPARE)
277 #define FERR_NF_SPD_PROTOCOL to_nf_mask(ERROR_NF_SPD_PROTOCOL)
278 #define FERR_NF_NORTH_CRC to_nf_mask(ERROR_NF_NORTH_CRC)
279 #define FERR_NF_RECOVERABLE to_nf_mask(ERROR_NF_RECOVERABLE)
280 #define FERR_NF_UNCORRECTABLE to_nf_mask(ERROR_NF_UNCORRECTABLE)
285 #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 10))
286 #define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 9))
287 #define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 8)) ? 8 : 4)
288 #define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 6)) ? 8 : 4)
289 #define MTR_DRAM_BANKS_ADDR_BITS(mtr) ((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2)
290 #define MTR_DIMM_RANK(mtr) (((mtr) >> 5) & 0x1)
291 #define MTR_DIMM_RANK_ADDR_BITS(mtr) (MTR_DIMM_RANK(mtr) ? 2 : 1)
292 #define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3)
293 #define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13)
294 #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
295 #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
298 static inline int extract_fbdchan_indx(
u32 x)
300 return (x>>28) & 0x3;
379 return ((info->
nrecmema) >> 12) & 0x7;
383 return ((info->
nrecmema) >> 8) & 0xf;
395 static inline const char *rdwr_str(
int rdwr)
397 return rdwr ?
"Write" :
"Read";
401 return ((info->
nrecmemb) >> 16) & 0x1fff;
409 return ((info->
recmema) >> 12) & 0x7;
413 return ((info->
recmema) >> 8) & 0xf;
421 return ((info->
recmemb) >> 16) & 0x1fff;
425 return (info->
recmemb) & 0xffff;
428 static struct edac_pci_ctl_info *i5400_pci;
511 static void i5400_proccess_non_recoverable_info(
struct mem_ctl_info *mci,
513 unsigned long allErrors)
534 type =
"NON-FATAL uncorrected";
536 type =
"NON-FATAL recoverable";
544 bank = nrec_bank(info);
545 rank = nrec_rank(info);
546 buf_id = nrec_buf_id(info);
547 rdwr = nrec_rdwr(info);
548 ras = nrec_ras(info);
549 cas = nrec_cas(info);
551 edac_dbg(0,
"\t\tDIMM= %d Channels= %d,%d (Branch= %d DRAM Bank= %d Buffer ID = %d rdwr= %s ras= %d cas= %d)\n",
552 rank, channel, channel + 1, branch >> 1, bank,
553 buf_id, rdwr_str(rdwr), ras, cas);
560 "Bank=%d Buffer ID = %d RAS=%d CAS=%d Err=0x%lx (%s)",
561 bank, buf_id, ras, cas, allErrors, error_name[errnum]);
564 branch >> 1, -1, rank,
565 rdwr ?
"Write error" :
"Read error",
576 static void i5400_process_nonfatal_error_info(
struct mem_ctl_info *mci,
580 unsigned long allErrors;
597 i5400_proccess_non_recoverable_info(mci, info, allErrors);
603 edac_dbg(0,
"\tCorrected bits= 0x%lx\n", allErrors);
615 bank = rec_bank(info);
616 rank = rec_rank(info);
617 rdwr = rec_rdwr(info);
624 edac_dbg(0,
"\t\tDIMM= %d Channel= %d (Branch %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
625 rank, channel, branch >> 1, bank,
626 rdwr_str(rdwr), ras, cas);
630 "Corrected error (Branch=%d DRAM-Bank=%d RDWR=%s "
631 "RAS=%d CAS=%d, CE Err=0x%lx (%s))",
632 branch >> 1, bank, rdwr_str(rdwr), ras, cas,
633 allErrors, error_name[errnum]);
636 branch >> 1, channel % 2, rank,
637 rdwr ?
"Write error" :
"Read error",
649 "Non-Fatal misc error (Branch=%d Err=%#lx (%s))",
650 branch >> 1, allErrors, error_name[errnum]);
657 static void i5400_process_error_info(
struct mem_ctl_info *mci,
663 i5400_proccess_non_recoverable_info(mci, info, allErrors);
666 i5400_process_nonfatal_error_info(mci, info);
679 i5400_get_error_info(mci, &info);
690 i5400_get_error_info(mci, &info);
691 i5400_process_error_info(mci, &info);
717 static int i5400_get_devices(
struct mem_ctl_info *mci,
int dev_idx)
736 "'system address,Process Bus' "
738 "vendor 0x%x device 0x%x ERR func 1 "
758 "'system address,Process Bus' "
760 "vendor 0x%x device 0x%x ERR func 2 "
775 edac_dbg(1,
"System Address, processor bus- PCI Bus ID: %s %x:%x\n",
778 edac_dbg(1,
"Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
782 edac_dbg(1,
"FSB Error Regs - PCI Bus ID: %s %x:%x\n",
790 "MC: 'BRANCH 0' device not found:"
791 "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
809 "MC: 'BRANCH 1' device not found:"
810 "vendor 0x%x device 0x%x Func 0 "
837 static int determine_amb_present_reg(
struct i5400_pvt *pvt,
int channel)
861 static int determine_mtr(
struct i5400_pvt *pvt,
int dimm,
int channel)
872 edac_dbg(0,
"ERROR: trying to access an invalid dimm: %d\n",
887 static void decode_mtr(
int slot_row,
u16 mtr)
893 edac_dbg(2,
"\tMTR%d=0x%x: DIMMs are %sPresent\n",
894 slot_row, mtr, ans ?
"" :
"NOT ");
900 edac_dbg(2,
"\t\tELECTRICAL THROTTLING is %s\n",
918 static void handle_channel(
struct i5400_pvt *pvt,
int dimm,
int channel,
925 mtr = determine_mtr(pvt, dimm, channel);
927 amb_present_reg = determine_amb_present_reg(pvt, channel);
930 if (amb_present_reg & (1 << dimm)) {
956 static void calculate_dimm_size(
struct i5400_pvt *pvt)
960 char *
p, *mem_buffer;
979 for (dimm = max_dimms - 1; dimm >= 0; dimm--) {
984 n =
snprintf(p, space,
"---------------------------"
985 "-------------------------------");
992 n =
snprintf(p, space,
"dimm %2d ", dimm);
996 for (channel = 0; channel < pvt->
maxch; channel++) {
998 handle_channel(pvt, dimm, channel, dinfo);
1009 n =
snprintf(p, space,
"---------------------------"
1010 "-------------------------------");
1021 for (channel = 0; channel < pvt->
maxch; channel++) {
1022 n =
snprintf(p, space,
"channel %d | ", channel);
1035 n =
snprintf(p, space,
" branch %d | ", branch);
1051 static void i5400_get_mc_regs(
struct mem_ctl_info *mci)
1071 edac_dbg(2,
"AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n",
1077 edac_dbg(2,
"\nTOLM (number of 256M regions) =%u (0x%x)\n",
1080 actual_tolm = (
u32) ((1000
l * pvt->
tolm) >> (30 - 28));
1081 edac_dbg(2,
"Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
1082 actual_tolm/1000, actual_tolm % 1000, pvt->
tolm << 28);
1088 limit = (pvt->
mir0 >> 4) & 0x0fff;
1089 way0 = pvt->
mir0 & 0x1;
1090 way1 = pvt->
mir0 & 0x2;
1091 edac_dbg(2,
"MIR0: limit= 0x%x WAY1= %u WAY0= %x\n",
1093 limit = (pvt->
mir1 >> 4) & 0xfff;
1094 way0 = pvt->
mir1 & 0x1;
1095 way1 = pvt->
mir1 & 0x2;
1096 edac_dbg(2,
"MIR1: limit= 0x%x WAY1= %u WAY0= %x\n",
1101 int where =
MTR0 + (slot_row *
sizeof(
u16));
1104 pci_read_config_word(pvt->
branch_0, where,
1107 edac_dbg(2,
"MTR%d where=0x%x B0 value=0x%x\n",
1108 slot_row, where, pvt->
b0_mtr[slot_row]);
1111 pvt->
b1_mtr[slot_row] = 0;
1116 pci_read_config_word(pvt->
branch_1, where,
1118 edac_dbg(2,
"MTR%d where=0x%x B1 value=0x%x\n",
1119 slot_row, where, pvt->
b1_mtr[slot_row]);
1123 edac_dbg(2,
"Memory Technology Registers:\n");
1126 decode_mtr(slot_row, pvt->
b0_mtr[slot_row]);
1143 decode_mtr(slot_row, pvt->
b1_mtr[slot_row]);
1147 edac_dbg(2,
"\t\tAMB-Branch 1-present0 0x%x:\n",
1151 edac_dbg(2,
"\t\tAMB-Branch 1-present1 0x%x:\n",
1157 calculate_dimm_size(pvt);
1181 channel_count = pvt->
maxch;
1190 for (channel = 0; channel < mci->
layers[0].size * mci->
layers[1].size;
1192 for (slot = 0; slot < mci->
layers[2].size; slot++) {
1193 mtr = determine_mtr(pvt, slot, channel);
1200 channel / 2, channel % 2, slot);
1204 edac_dbg(2,
"dimm (branch %d channel %d slot %d): %d.%03d GB\n",
1205 channel / 2, channel % 2, slot,
1206 size_mb / 1000, size_mb % 1000);
1229 return (ndimms == 0);
1236 static void i5400_enable_error_reporting(
struct mem_ctl_info *mci)
1261 static int i5400_probe1(
struct pci_dev *pdev,
int dev_idx)
1270 edac_dbg(0,
"MC: pdev bus %u dev=0x%x fn=0x%x\n",
1285 layers[0].is_virt_csrow =
false;
1288 layers[1].is_virt_csrow =
false;
1291 layers[2].is_virt_csrow =
true;
1296 edac_dbg(0,
"MC: mci = %p\n", mci);
1306 if (i5400_get_devices(mci, dev_idx))
1310 i5400_get_mc_regs(mci);
1327 if (i5400_init_dimms(mci)) {
1328 edac_dbg(0,
"MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i5400_init_dimms() returned nonzero value\n");
1331 edac_dbg(1,
"MC: Enable error reporting now\n");
1332 i5400_enable_error_reporting(mci);
1337 edac_dbg(0,
"MC: failed edac_mc_add_mc()\n");
1344 i5400_clear_error(mci);
1350 "%s(): Unable to create PCI control\n",
1353 "%s(): PCI error report via EDAC not setup\n",
1362 i5400_put_devices(mci);
1410 i5400_put_devices(mci);
1432 .name =
"i5400_edac",
1433 .probe = i5400_init_one,
1435 .id_table = i5400_pci_tbl,
1442 static int __init i5400_init(
void)
1451 pci_rc = pci_register_driver(&i5400_driver);
1453 return (pci_rc < 0) ? pci_rc : 0;
1460 static void __exit i5400_exit(
void)