Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
i5000_edac.c
Go to the documentation of this file.
1 /*
2  * Intel 5000(P/V/X) class Memory Controllers kernel module
3  *
4  * This file may be distributed under the terms of the
5  * GNU General Public License.
6  *
7  * Written by Douglas Thompson Linux Networx (http://lnxi.com)
9  *
10  * This module is based on the following document:
11  *
12  * Intel 5000X Chipset Memory Controller Hub (MCH) - Datasheet
13  * http://developer.intel.com/design/chipsets/datashts/313070.htm
14  *
15  */
16 
17 #include <linux/module.h>
18 #include <linux/init.h>
19 #include <linux/pci.h>
20 #include <linux/pci_ids.h>
21 #include <linux/slab.h>
22 #include <linux/edac.h>
23 #include <asm/mmzone.h>
24 
25 #include "edac_core.h"
26 
27 /*
28  * Alter this version for the I5000 module when modifications are made
29  */
30 #define I5000_REVISION " Ver: 2.0.12"
31 #define EDAC_MOD_STR "i5000_edac"
32 
33 #define i5000_printk(level, fmt, arg...) \
34  edac_printk(level, "i5000", fmt, ##arg)
35 
36 #define i5000_mc_printk(mci, level, fmt, arg...) \
37  edac_mc_chipset_printk(mci, level, "i5000", fmt, ##arg)
38 
39 #ifndef PCI_DEVICE_ID_INTEL_FBD_0
40 #define PCI_DEVICE_ID_INTEL_FBD_0 0x25F5
41 #endif
42 #ifndef PCI_DEVICE_ID_INTEL_FBD_1
43 #define PCI_DEVICE_ID_INTEL_FBD_1 0x25F6
44 #endif
45 
46 /* Device 16,
47  * Function 0: System Address
48  * Function 1: Memory Branch Map, Control, Errors Register
49  * Function 2: FSB Error Registers
50  *
51  * All 3 functions of Device 16 (0,1,2) share the SAME DID
52  */
53 #define PCI_DEVICE_ID_INTEL_I5000_DEV16 0x25F0
54 
55 /* OFFSETS for Function 0 */
56 
57 /* OFFSETS for Function 1 */
58 #define AMBASE 0x48
59 #define MAXCH 0x56
60 #define MAXDIMMPERCH 0x57
61 #define TOLM 0x6C
62 #define REDMEMB 0x7C
63 #define RED_ECC_LOCATOR(x) ((x) & 0x3FFFF)
64 #define REC_ECC_LOCATOR_EVEN(x) ((x) & 0x001FF)
65 #define REC_ECC_LOCATOR_ODD(x) ((x) & 0x3FE00)
66 #define MIR0 0x80
67 #define MIR1 0x84
68 #define MIR2 0x88
69 #define AMIR0 0x8C
70 #define AMIR1 0x90
71 #define AMIR2 0x94
72 
73 #define FERR_FAT_FBD 0x98
74 #define NERR_FAT_FBD 0x9C
75 #define EXTRACT_FBDCHAN_INDX(x) (((x)>>28) & 0x3)
76 #define FERR_FAT_FBDCHAN 0x30000000
77 #define FERR_FAT_M3ERR 0x00000004
78 #define FERR_FAT_M2ERR 0x00000002
79 #define FERR_FAT_M1ERR 0x00000001
80 #define FERR_FAT_MASK (FERR_FAT_M1ERR | \
81  FERR_FAT_M2ERR | \
82  FERR_FAT_M3ERR)
83 
84 #define FERR_NF_FBD 0xA0
85 
86 /* Thermal and SPD or BFD errors */
87 #define FERR_NF_M28ERR 0x01000000
88 #define FERR_NF_M27ERR 0x00800000
89 #define FERR_NF_M26ERR 0x00400000
90 #define FERR_NF_M25ERR 0x00200000
91 #define FERR_NF_M24ERR 0x00100000
92 #define FERR_NF_M23ERR 0x00080000
93 #define FERR_NF_M22ERR 0x00040000
94 #define FERR_NF_M21ERR 0x00020000
95 
96 /* Correctable errors */
97 #define FERR_NF_M20ERR 0x00010000
98 #define FERR_NF_M19ERR 0x00008000
99 #define FERR_NF_M18ERR 0x00004000
100 #define FERR_NF_M17ERR 0x00002000
101 
102 /* Non-Retry or redundant Retry errors */
103 #define FERR_NF_M16ERR 0x00001000
104 #define FERR_NF_M15ERR 0x00000800
105 #define FERR_NF_M14ERR 0x00000400
106 #define FERR_NF_M13ERR 0x00000200
107 
108 /* Uncorrectable errors */
109 #define FERR_NF_M12ERR 0x00000100
110 #define FERR_NF_M11ERR 0x00000080
111 #define FERR_NF_M10ERR 0x00000040
112 #define FERR_NF_M9ERR 0x00000020
113 #define FERR_NF_M8ERR 0x00000010
114 #define FERR_NF_M7ERR 0x00000008
115 #define FERR_NF_M6ERR 0x00000004
116 #define FERR_NF_M5ERR 0x00000002
117 #define FERR_NF_M4ERR 0x00000001
118 
119 #define FERR_NF_UNCORRECTABLE (FERR_NF_M12ERR | \
120  FERR_NF_M11ERR | \
121  FERR_NF_M10ERR | \
122  FERR_NF_M9ERR | \
123  FERR_NF_M8ERR | \
124  FERR_NF_M7ERR | \
125  FERR_NF_M6ERR | \
126  FERR_NF_M5ERR | \
127  FERR_NF_M4ERR)
128 #define FERR_NF_CORRECTABLE (FERR_NF_M20ERR | \
129  FERR_NF_M19ERR | \
130  FERR_NF_M18ERR | \
131  FERR_NF_M17ERR)
132 #define FERR_NF_DIMM_SPARE (FERR_NF_M27ERR | \
133  FERR_NF_M28ERR)
134 #define FERR_NF_THERMAL (FERR_NF_M26ERR | \
135  FERR_NF_M25ERR | \
136  FERR_NF_M24ERR | \
137  FERR_NF_M23ERR)
138 #define FERR_NF_SPD_PROTOCOL (FERR_NF_M22ERR)
139 #define FERR_NF_NORTH_CRC (FERR_NF_M21ERR)
140 #define FERR_NF_NON_RETRY (FERR_NF_M13ERR | \
141  FERR_NF_M14ERR | \
142  FERR_NF_M15ERR)
143 
144 #define NERR_NF_FBD 0xA4
145 #define FERR_NF_MASK (FERR_NF_UNCORRECTABLE | \
146  FERR_NF_CORRECTABLE | \
147  FERR_NF_DIMM_SPARE | \
148  FERR_NF_THERMAL | \
149  FERR_NF_SPD_PROTOCOL | \
150  FERR_NF_NORTH_CRC | \
151  FERR_NF_NON_RETRY)
152 
153 #define EMASK_FBD 0xA8
154 #define EMASK_FBD_M28ERR 0x08000000
155 #define EMASK_FBD_M27ERR 0x04000000
156 #define EMASK_FBD_M26ERR 0x02000000
157 #define EMASK_FBD_M25ERR 0x01000000
158 #define EMASK_FBD_M24ERR 0x00800000
159 #define EMASK_FBD_M23ERR 0x00400000
160 #define EMASK_FBD_M22ERR 0x00200000
161 #define EMASK_FBD_M21ERR 0x00100000
162 #define EMASK_FBD_M20ERR 0x00080000
163 #define EMASK_FBD_M19ERR 0x00040000
164 #define EMASK_FBD_M18ERR 0x00020000
165 #define EMASK_FBD_M17ERR 0x00010000
166 
167 #define EMASK_FBD_M15ERR 0x00004000
168 #define EMASK_FBD_M14ERR 0x00002000
169 #define EMASK_FBD_M13ERR 0x00001000
170 #define EMASK_FBD_M12ERR 0x00000800
171 #define EMASK_FBD_M11ERR 0x00000400
172 #define EMASK_FBD_M10ERR 0x00000200
173 #define EMASK_FBD_M9ERR 0x00000100
174 #define EMASK_FBD_M8ERR 0x00000080
175 #define EMASK_FBD_M7ERR 0x00000040
176 #define EMASK_FBD_M6ERR 0x00000020
177 #define EMASK_FBD_M5ERR 0x00000010
178 #define EMASK_FBD_M4ERR 0x00000008
179 #define EMASK_FBD_M3ERR 0x00000004
180 #define EMASK_FBD_M2ERR 0x00000002
181 #define EMASK_FBD_M1ERR 0x00000001
182 
183 #define ENABLE_EMASK_FBD_FATAL_ERRORS (EMASK_FBD_M1ERR | \
184  EMASK_FBD_M2ERR | \
185  EMASK_FBD_M3ERR)
186 
187 #define ENABLE_EMASK_FBD_UNCORRECTABLE (EMASK_FBD_M4ERR | \
188  EMASK_FBD_M5ERR | \
189  EMASK_FBD_M6ERR | \
190  EMASK_FBD_M7ERR | \
191  EMASK_FBD_M8ERR | \
192  EMASK_FBD_M9ERR | \
193  EMASK_FBD_M10ERR | \
194  EMASK_FBD_M11ERR | \
195  EMASK_FBD_M12ERR)
196 #define ENABLE_EMASK_FBD_CORRECTABLE (EMASK_FBD_M17ERR | \
197  EMASK_FBD_M18ERR | \
198  EMASK_FBD_M19ERR | \
199  EMASK_FBD_M20ERR)
200 #define ENABLE_EMASK_FBD_DIMM_SPARE (EMASK_FBD_M27ERR | \
201  EMASK_FBD_M28ERR)
202 #define ENABLE_EMASK_FBD_THERMALS (EMASK_FBD_M26ERR | \
203  EMASK_FBD_M25ERR | \
204  EMASK_FBD_M24ERR | \
205  EMASK_FBD_M23ERR)
206 #define ENABLE_EMASK_FBD_SPD_PROTOCOL (EMASK_FBD_M22ERR)
207 #define ENABLE_EMASK_FBD_NORTH_CRC (EMASK_FBD_M21ERR)
208 #define ENABLE_EMASK_FBD_NON_RETRY (EMASK_FBD_M15ERR | \
209  EMASK_FBD_M14ERR | \
210  EMASK_FBD_M13ERR)
211 
212 #define ENABLE_EMASK_ALL (ENABLE_EMASK_FBD_NON_RETRY | \
213  ENABLE_EMASK_FBD_NORTH_CRC | \
214  ENABLE_EMASK_FBD_SPD_PROTOCOL | \
215  ENABLE_EMASK_FBD_THERMALS | \
216  ENABLE_EMASK_FBD_DIMM_SPARE | \
217  ENABLE_EMASK_FBD_FATAL_ERRORS | \
218  ENABLE_EMASK_FBD_CORRECTABLE | \
219  ENABLE_EMASK_FBD_UNCORRECTABLE)
220 
221 #define ERR0_FBD 0xAC
222 #define ERR1_FBD 0xB0
223 #define ERR2_FBD 0xB4
224 #define MCERR_FBD 0xB8
225 #define NRECMEMA 0xBE
226 #define NREC_BANK(x) (((x)>>12) & 0x7)
227 #define NREC_RDWR(x) (((x)>>11) & 1)
228 #define NREC_RANK(x) (((x)>>8) & 0x7)
229 #define NRECMEMB 0xC0
230 #define NREC_CAS(x) (((x)>>16) & 0xFFFFFF)
231 #define NREC_RAS(x) ((x) & 0x7FFF)
232 #define NRECFGLOG 0xC4
233 #define NREEECFBDA 0xC8
234 #define NREEECFBDB 0xCC
235 #define NREEECFBDC 0xD0
236 #define NREEECFBDD 0xD4
237 #define NREEECFBDE 0xD8
238 #define REDMEMA 0xDC
239 #define RECMEMA 0xE2
240 #define REC_BANK(x) (((x)>>12) & 0x7)
241 #define REC_RDWR(x) (((x)>>11) & 1)
242 #define REC_RANK(x) (((x)>>8) & 0x7)
243 #define RECMEMB 0xE4
244 #define REC_CAS(x) (((x)>>16) & 0xFFFFFF)
245 #define REC_RAS(x) ((x) & 0x7FFF)
246 #define RECFGLOG 0xE8
247 #define RECFBDA 0xEC
248 #define RECFBDB 0xF0
249 #define RECFBDC 0xF4
250 #define RECFBDD 0xF8
251 #define RECFBDE 0xFC
252 
253 /* OFFSETS for Function 2 */
254 
255 /*
256  * Device 21,
257  * Function 0: Memory Map Branch 0
258  *
259  * Device 22,
260  * Function 0: Memory Map Branch 1
261  */
262 #define PCI_DEVICE_ID_I5000_BRANCH_0 0x25F5
263 #define PCI_DEVICE_ID_I5000_BRANCH_1 0x25F6
264 
265 #define AMB_PRESENT_0 0x64
266 #define AMB_PRESENT_1 0x66
267 #define MTR0 0x80
268 #define MTR1 0x84
269 #define MTR2 0x88
270 #define MTR3 0x8C
271 
272 #define NUM_MTRS 4
273 #define CHANNELS_PER_BRANCH 2
274 #define MAX_BRANCHES 2
275 
276 /* Defines to extract the various fields from the
277  * MTRx - Memory Technology Registers
278  */
279 #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (0x1 << 8))
280 #define MTR_DRAM_WIDTH(mtr) ((((mtr) >> 6) & 0x1) ? 8 : 4)
281 #define MTR_DRAM_BANKS(mtr) ((((mtr) >> 5) & 0x1) ? 8 : 4)
282 #define MTR_DRAM_BANKS_ADDR_BITS(mtr) ((MTR_DRAM_BANKS(mtr) == 8) ? 3 : 2)
283 #define MTR_DIMM_RANK(mtr) (((mtr) >> 4) & 0x1)
284 #define MTR_DIMM_RANK_ADDR_BITS(mtr) (MTR_DIMM_RANK(mtr) ? 2 : 1)
285 #define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3)
286 #define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13)
287 #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
288 #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
289 
290 /* enables the report of miscellaneous messages as CE errors - default off */
291 static int misc_messages;
292 
293 /* Enumeration of supported devices */
295  I5000P = 0,
296  I5000V = 1, /* future */
297  I5000X = 2 /* future */
298 };
299 
300 /* Device name and register DID (Device ID) */
302  const char *ctl_name; /* name for this device */
303  u16 fsb_mapping_errors; /* DID for the branchmap,control */
304 };
305 
306 /* Table of devices attributes supported by this driver */
307 static const struct i5000_dev_info i5000_devs[] = {
308  [I5000P] = {
309  .ctl_name = "I5000",
310  .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I5000_DEV16,
311  },
312 };
313 
315  int megabytes; /* size, 0 means not present */
317 };
318 
319 #define MAX_CHANNELS 6 /* max possible channels */
320 #define MAX_CSROWS (8*2) /* max possible csrows per channel */
321 
322 /* driver private data structure */
323 struct i5000_pvt {
324  struct pci_dev *system_address; /* 16.0 */
325  struct pci_dev *branchmap_werrors; /* 16.1 */
326  struct pci_dev *fsb_error_regs; /* 16.2 */
327  struct pci_dev *branch_0; /* 21.0 */
328  struct pci_dev *branch_1; /* 22.0 */
329 
330  u16 tolm; /* top of low memory */
331  union {
332  u64 ambase; /* AMB BAR */
333  struct {
336  } u __packed;
337  };
338 
340 
341  u16 b0_mtr[NUM_MTRS]; /* Memory Technlogy Reg */
342  u16 b0_ambpresent0; /* Branch 0, Channel 0 */
343  u16 b0_ambpresent1; /* Brnach 0, Channel 1 */
344 
345  u16 b1_mtr[NUM_MTRS]; /* Memory Technlogy Reg */
346  u16 b1_ambpresent0; /* Branch 1, Channel 8 */
347  u16 b1_ambpresent1; /* Branch 1, Channel 1 */
348 
349  /* DIMM information matrix, allocating architecture maximums */
351 
352  /* Actual values for this controller */
353  int maxch; /* Max channels */
354  int maxdimmperch; /* Max DIMMs per channel */
355 };
356 
357 /* I5000 MCH error information retrieved from Hardware */
359 
360  /* These registers are always read from the MC */
361  u32 ferr_fat_fbd; /* First Errors Fatal */
362  u32 nerr_fat_fbd; /* Next Errors Fatal */
363  u32 ferr_nf_fbd; /* First Errors Non-Fatal */
364  u32 nerr_nf_fbd; /* Next Errors Non-Fatal */
365 
366  /* These registers are input ONLY if there was a Recoverable Error */
367  u32 redmemb; /* Recoverable Mem Data Error log B */
368  u16 recmema; /* Recoverable Mem Error log A */
369  u32 recmemb; /* Recoverable Mem Error log B */
370 
371  /* These registers are input ONLY if there was a
372  * Non-Recoverable Error */
373  u16 nrecmema; /* Non-Recoverable Mem log A */
374  u16 nrecmemb; /* Non-Recoverable Mem log B */
375 
376 };
377 
378 static struct edac_pci_ctl_info *i5000_pci;
379 
380 /*
381  * i5000_get_error_info Retrieve the hardware error information from
382  * the hardware and cache it in the 'info'
383  * structure
384  */
385 static void i5000_get_error_info(struct mem_ctl_info *mci,
386  struct i5000_error_info *info)
387 {
388  struct i5000_pvt *pvt;
389  u32 value;
390 
391  pvt = mci->pvt_info;
392 
393  /* read in the 1st FATAL error register */
394  pci_read_config_dword(pvt->branchmap_werrors, FERR_FAT_FBD, &value);
395 
396  /* Mask only the bits that the doc says are valid
397  */
398  value &= (FERR_FAT_FBDCHAN | FERR_FAT_MASK);
399 
400  /* If there is an error, then read in the */
401  /* NEXT FATAL error register and the Memory Error Log Register A */
402  if (value & FERR_FAT_MASK) {
403  info->ferr_fat_fbd = value;
404 
405  /* harvest the various error data we need */
406  pci_read_config_dword(pvt->branchmap_werrors,
407  NERR_FAT_FBD, &info->nerr_fat_fbd);
408  pci_read_config_word(pvt->branchmap_werrors,
409  NRECMEMA, &info->nrecmema);
410  pci_read_config_word(pvt->branchmap_werrors,
411  NRECMEMB, &info->nrecmemb);
412 
413  /* Clear the error bits, by writing them back */
414  pci_write_config_dword(pvt->branchmap_werrors,
415  FERR_FAT_FBD, value);
416  } else {
417  info->ferr_fat_fbd = 0;
418  info->nerr_fat_fbd = 0;
419  info->nrecmema = 0;
420  info->nrecmemb = 0;
421  }
422 
423  /* read in the 1st NON-FATAL error register */
424  pci_read_config_dword(pvt->branchmap_werrors, FERR_NF_FBD, &value);
425 
426  /* If there is an error, then read in the 1st NON-FATAL error
427  * register as well */
428  if (value & FERR_NF_MASK) {
429  info->ferr_nf_fbd = value;
430 
431  /* harvest the various error data we need */
432  pci_read_config_dword(pvt->branchmap_werrors,
433  NERR_NF_FBD, &info->nerr_nf_fbd);
434  pci_read_config_word(pvt->branchmap_werrors,
435  RECMEMA, &info->recmema);
436  pci_read_config_dword(pvt->branchmap_werrors,
437  RECMEMB, &info->recmemb);
438  pci_read_config_dword(pvt->branchmap_werrors,
439  REDMEMB, &info->redmemb);
440 
441  /* Clear the error bits, by writing them back */
442  pci_write_config_dword(pvt->branchmap_werrors,
443  FERR_NF_FBD, value);
444  } else {
445  info->ferr_nf_fbd = 0;
446  info->nerr_nf_fbd = 0;
447  info->recmema = 0;
448  info->recmemb = 0;
449  info->redmemb = 0;
450  }
451 }
452 
453 /*
454  * i5000_process_fatal_error_info(struct mem_ctl_info *mci,
455  * struct i5000_error_info *info,
456  * int handle_errors);
457  *
458  * handle the Intel FATAL errors, if any
459  */
460 static void i5000_process_fatal_error_info(struct mem_ctl_info *mci,
461  struct i5000_error_info *info,
462  int handle_errors)
463 {
464  char msg[EDAC_MC_LABEL_LEN + 1 + 160];
465  char *specific = NULL;
466  u32 allErrors;
467  int channel;
468  int bank;
469  int rank;
470  int rdwr;
471  int ras, cas;
472 
473  /* mask off the Error bits that are possible */
474  allErrors = (info->ferr_fat_fbd & FERR_FAT_MASK);
475  if (!allErrors)
476  return; /* if no error, return now */
477 
478  channel = EXTRACT_FBDCHAN_INDX(info->ferr_fat_fbd);
479 
480  /* Use the NON-Recoverable macros to extract data */
481  bank = NREC_BANK(info->nrecmema);
482  rank = NREC_RANK(info->nrecmema);
483  rdwr = NREC_RDWR(info->nrecmema);
484  ras = NREC_RAS(info->nrecmemb);
485  cas = NREC_CAS(info->nrecmemb);
486 
487  edac_dbg(0, "\t\tCSROW= %d Channel= %d (DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
488  rank, channel, bank,
489  rdwr ? "Write" : "Read", ras, cas);
490 
491  /* Only 1 bit will be on */
492  switch (allErrors) {
493  case FERR_FAT_M1ERR:
494  specific = "Alert on non-redundant retry or fast "
495  "reset timeout";
496  break;
497  case FERR_FAT_M2ERR:
498  specific = "Northbound CRC error on non-redundant "
499  "retry";
500  break;
501  case FERR_FAT_M3ERR:
502  {
503  static int done;
504 
505  /*
506  * This error is generated to inform that the intelligent
507  * throttling is disabled and the temperature passed the
508  * specified middle point. Since this is something the BIOS
509  * should take care of, we'll warn only once to avoid
510  * worthlessly flooding the log.
511  */
512  if (done)
513  return;
514  done++;
515 
516  specific = ">Tmid Thermal event with intelligent "
517  "throttling disabled";
518  }
519  break;
520  }
521 
522  /* Form out message */
523  snprintf(msg, sizeof(msg),
524  "Bank=%d RAS=%d CAS=%d FATAL Err=0x%x (%s)",
525  bank, ras, cas, allErrors, specific);
526 
527  /* Call the helper to output message */
528  edac_mc_handle_error(HW_EVENT_ERR_FATAL, mci, 1, 0, 0, 0,
529  channel >> 1, channel & 1, rank,
530  rdwr ? "Write error" : "Read error",
531  msg);
532 }
533 
534 /*
535  * i5000_process_fatal_error_info(struct mem_ctl_info *mci,
536  * struct i5000_error_info *info,
537  * int handle_errors);
538  *
539  * handle the Intel NON-FATAL errors, if any
540  */
541 static void i5000_process_nonfatal_error_info(struct mem_ctl_info *mci,
542  struct i5000_error_info *info,
543  int handle_errors)
544 {
545  char msg[EDAC_MC_LABEL_LEN + 1 + 170];
546  char *specific = NULL;
547  u32 allErrors;
548  u32 ue_errors;
549  u32 ce_errors;
550  u32 misc_errors;
551  int branch;
552  int channel;
553  int bank;
554  int rank;
555  int rdwr;
556  int ras, cas;
557 
558  /* mask off the Error bits that are possible */
559  allErrors = (info->ferr_nf_fbd & FERR_NF_MASK);
560  if (!allErrors)
561  return; /* if no error, return now */
562 
563  /* ONLY ONE of the possible error bits will be set, as per the docs */
564  ue_errors = allErrors & FERR_NF_UNCORRECTABLE;
565  if (ue_errors) {
566  edac_dbg(0, "\tUncorrected bits= 0x%x\n", ue_errors);
567 
568  branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
569 
570  /*
571  * According with i5000 datasheet, bit 28 has no significance
572  * for errors M4Err-M12Err and M17Err-M21Err, on FERR_NF_FBD
573  */
574  channel = branch & 2;
575 
576  bank = NREC_BANK(info->nrecmema);
577  rank = NREC_RANK(info->nrecmema);
578  rdwr = NREC_RDWR(info->nrecmema);
579  ras = NREC_RAS(info->nrecmemb);
580  cas = NREC_CAS(info->nrecmemb);
581 
582  edac_dbg(0, "\t\tCSROW= %d Channels= %d,%d (Branch= %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
583  rank, channel, channel + 1, branch >> 1, bank,
584  rdwr ? "Write" : "Read", ras, cas);
585 
586  switch (ue_errors) {
587  case FERR_NF_M12ERR:
588  specific = "Non-Aliased Uncorrectable Patrol Data ECC";
589  break;
590  case FERR_NF_M11ERR:
591  specific = "Non-Aliased Uncorrectable Spare-Copy "
592  "Data ECC";
593  break;
594  case FERR_NF_M10ERR:
595  specific = "Non-Aliased Uncorrectable Mirrored Demand "
596  "Data ECC";
597  break;
598  case FERR_NF_M9ERR:
599  specific = "Non-Aliased Uncorrectable Non-Mirrored "
600  "Demand Data ECC";
601  break;
602  case FERR_NF_M8ERR:
603  specific = "Aliased Uncorrectable Patrol Data ECC";
604  break;
605  case FERR_NF_M7ERR:
606  specific = "Aliased Uncorrectable Spare-Copy Data ECC";
607  break;
608  case FERR_NF_M6ERR:
609  specific = "Aliased Uncorrectable Mirrored Demand "
610  "Data ECC";
611  break;
612  case FERR_NF_M5ERR:
613  specific = "Aliased Uncorrectable Non-Mirrored Demand "
614  "Data ECC";
615  break;
616  case FERR_NF_M4ERR:
617  specific = "Uncorrectable Data ECC on Replay";
618  break;
619  }
620 
621  /* Form out message */
622  snprintf(msg, sizeof(msg),
623  "Rank=%d Bank=%d RAS=%d CAS=%d, UE Err=0x%x (%s)",
624  rank, bank, ras, cas, ue_errors, specific);
625 
626  /* Call the helper to output message */
628  channel >> 1, -1, rank,
629  rdwr ? "Write error" : "Read error",
630  msg);
631  }
632 
633  /* Check correctable errors */
634  ce_errors = allErrors & FERR_NF_CORRECTABLE;
635  if (ce_errors) {
636  edac_dbg(0, "\tCorrected bits= 0x%x\n", ce_errors);
637 
638  branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
639 
640  channel = 0;
641  if (REC_ECC_LOCATOR_ODD(info->redmemb))
642  channel = 1;
643 
644  /* Convert channel to be based from zero, instead of
645  * from branch base of 0 */
646  channel += branch;
647 
648  bank = REC_BANK(info->recmema);
649  rank = REC_RANK(info->recmema);
650  rdwr = REC_RDWR(info->recmema);
651  ras = REC_RAS(info->recmemb);
652  cas = REC_CAS(info->recmemb);
653 
654  edac_dbg(0, "\t\tCSROW= %d Channel= %d (Branch %d DRAM Bank= %d rdwr= %s ras= %d cas= %d)\n",
655  rank, channel, branch >> 1, bank,
656  rdwr ? "Write" : "Read", ras, cas);
657 
658  switch (ce_errors) {
659  case FERR_NF_M17ERR:
660  specific = "Correctable Non-Mirrored Demand Data ECC";
661  break;
662  case FERR_NF_M18ERR:
663  specific = "Correctable Mirrored Demand Data ECC";
664  break;
665  case FERR_NF_M19ERR:
666  specific = "Correctable Spare-Copy Data ECC";
667  break;
668  case FERR_NF_M20ERR:
669  specific = "Correctable Patrol Data ECC";
670  break;
671  }
672 
673  /* Form out message */
674  snprintf(msg, sizeof(msg),
675  "Rank=%d Bank=%d RDWR=%s RAS=%d "
676  "CAS=%d, CE Err=0x%x (%s))", branch >> 1, bank,
677  rdwr ? "Write" : "Read", ras, cas, ce_errors,
678  specific);
679 
680  /* Call the helper to output message */
682  channel >> 1, channel % 2, rank,
683  rdwr ? "Write error" : "Read error",
684  msg);
685  }
686 
687  if (!misc_messages)
688  return;
689 
690  misc_errors = allErrors & (FERR_NF_NON_RETRY | FERR_NF_NORTH_CRC |
692  if (misc_errors) {
693  switch (misc_errors) {
694  case FERR_NF_M13ERR:
695  specific = "Non-Retry or Redundant Retry FBD Memory "
696  "Alert or Redundant Fast Reset Timeout";
697  break;
698  case FERR_NF_M14ERR:
699  specific = "Non-Retry or Redundant Retry FBD "
700  "Configuration Alert";
701  break;
702  case FERR_NF_M15ERR:
703  specific = "Non-Retry or Redundant Retry FBD "
704  "Northbound CRC error on read data";
705  break;
706  case FERR_NF_M21ERR:
707  specific = "FBD Northbound CRC error on "
708  "FBD Sync Status";
709  break;
710  case FERR_NF_M22ERR:
711  specific = "SPD protocol error";
712  break;
713  case FERR_NF_M27ERR:
714  specific = "DIMM-spare copy started";
715  break;
716  case FERR_NF_M28ERR:
717  specific = "DIMM-spare copy completed";
718  break;
719  }
720  branch = EXTRACT_FBDCHAN_INDX(info->ferr_nf_fbd);
721 
722  /* Form out message */
723  snprintf(msg, sizeof(msg),
724  "Err=%#x (%s)", misc_errors, specific);
725 
726  /* Call the helper to output message */
728  branch >> 1, -1, -1,
729  "Misc error", msg);
730  }
731 }
732 
733 /*
734  * i5000_process_error_info Process the error info that is
735  * in the 'info' structure, previously retrieved from hardware
736  */
737 static void i5000_process_error_info(struct mem_ctl_info *mci,
738  struct i5000_error_info *info,
739  int handle_errors)
740 {
741  /* First handle any fatal errors that occurred */
742  i5000_process_fatal_error_info(mci, info, handle_errors);
743 
744  /* now handle any non-fatal errors that occurred */
745  i5000_process_nonfatal_error_info(mci, info, handle_errors);
746 }
747 
748 /*
749  * i5000_clear_error Retrieve any error from the hardware
750  * but do NOT process that error.
751  * Used for 'clearing' out of previous errors
752  * Called by the Core module.
753  */
754 static void i5000_clear_error(struct mem_ctl_info *mci)
755 {
756  struct i5000_error_info info;
757 
758  i5000_get_error_info(mci, &info);
759 }
760 
761 /*
762  * i5000_check_error Retrieve and process errors reported by the
763  * hardware. Called by the Core module.
764  */
765 static void i5000_check_error(struct mem_ctl_info *mci)
766 {
767  struct i5000_error_info info;
768  edac_dbg(4, "MC%d\n", mci->mc_idx);
769  i5000_get_error_info(mci, &info);
770  i5000_process_error_info(mci, &info, 1);
771 }
772 
773 /*
774  * i5000_get_devices Find and perform 'get' operation on the MCH's
775  * device/functions we want to reference for this driver
776  *
777  * Need to 'get' device 16 func 1 and func 2
778  */
779 static int i5000_get_devices(struct mem_ctl_info *mci, int dev_idx)
780 {
781  //const struct i5000_dev_info *i5000_dev = &i5000_devs[dev_idx];
782  struct i5000_pvt *pvt;
783  struct pci_dev *pdev;
784 
785  pvt = mci->pvt_info;
786 
787  /* Attempt to 'get' the MCH register we want */
788  pdev = NULL;
789  while (1) {
792 
793  /* End of list, leave */
794  if (pdev == NULL) {
796  "'system address,Process Bus' "
797  "device not found:"
798  "vendor 0x%x device 0x%x FUNC 1 "
799  "(broken BIOS?)\n",
802 
803  return 1;
804  }
805 
806  /* Scan for device 16 func 1 */
807  if (PCI_FUNC(pdev->devfn) == 1)
808  break;
809  }
810 
811  pvt->branchmap_werrors = pdev;
812 
813  /* Attempt to 'get' the MCH register we want */
814  pdev = NULL;
815  while (1) {
818 
819  if (pdev == NULL) {
821  "MC: 'branchmap,control,errors' "
822  "device not found:"
823  "vendor 0x%x device 0x%x Func 2 "
824  "(broken BIOS?)\n",
827 
829  return 1;
830  }
831 
832  /* Scan for device 16 func 1 */
833  if (PCI_FUNC(pdev->devfn) == 2)
834  break;
835  }
836 
837  pvt->fsb_error_regs = pdev;
838 
839  edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n",
840  pci_name(pvt->system_address),
841  pvt->system_address->vendor, pvt->system_address->device);
842  edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
843  pci_name(pvt->branchmap_werrors),
844  pvt->branchmap_werrors->vendor,
845  pvt->branchmap_werrors->device);
846  edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n",
847  pci_name(pvt->fsb_error_regs),
848  pvt->fsb_error_regs->vendor, pvt->fsb_error_regs->device);
849 
850  pdev = NULL;
853 
854  if (pdev == NULL) {
856  "MC: 'BRANCH 0' device not found:"
857  "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
859 
862  return 1;
863  }
864 
865  pvt->branch_0 = pdev;
866 
867  /* If this device claims to have more than 2 channels then
868  * fetch Branch 1's information
869  */
870  if (pvt->maxch >= CHANNELS_PER_BRANCH) {
871  pdev = NULL;
874 
875  if (pdev == NULL) {
877  "MC: 'BRANCH 1' device not found:"
878  "vendor 0x%x device 0x%x Func 0 "
879  "(broken BIOS?)\n",
882 
885  pci_dev_put(pvt->branch_0);
886  return 1;
887  }
888 
889  pvt->branch_1 = pdev;
890  }
891 
892  return 0;
893 }
894 
895 /*
896  * i5000_put_devices 'put' all the devices that we have
897  * reserved via 'get'
898  */
899 static void i5000_put_devices(struct mem_ctl_info *mci)
900 {
901  struct i5000_pvt *pvt;
902 
903  pvt = mci->pvt_info;
904 
905  pci_dev_put(pvt->branchmap_werrors); /* FUNC 1 */
906  pci_dev_put(pvt->fsb_error_regs); /* FUNC 2 */
907  pci_dev_put(pvt->branch_0); /* DEV 21 */
908 
909  /* Only if more than 2 channels do we release the second branch */
910  if (pvt->maxch >= CHANNELS_PER_BRANCH)
911  pci_dev_put(pvt->branch_1); /* DEV 22 */
912 }
913 
914 /*
915  * determine_amb_resent
916  *
917  * the information is contained in NUM_MTRS different registers
918  * determineing which of the NUM_MTRS requires knowing
919  * which channel is in question
920  *
921  * 2 branches, each with 2 channels
922  * b0_ambpresent0 for channel '0'
923  * b0_ambpresent1 for channel '1'
924  * b1_ambpresent0 for channel '2'
925  * b1_ambpresent1 for channel '3'
926  */
927 static int determine_amb_present_reg(struct i5000_pvt *pvt, int channel)
928 {
929  int amb_present;
930 
931  if (channel < CHANNELS_PER_BRANCH) {
932  if (channel & 0x1)
933  amb_present = pvt->b0_ambpresent1;
934  else
935  amb_present = pvt->b0_ambpresent0;
936  } else {
937  if (channel & 0x1)
938  amb_present = pvt->b1_ambpresent1;
939  else
940  amb_present = pvt->b1_ambpresent0;
941  }
942 
943  return amb_present;
944 }
945 
946 /*
947  * determine_mtr(pvt, csrow, channel)
948  *
949  * return the proper MTR register as determine by the csrow and channel desired
950  */
951 static int determine_mtr(struct i5000_pvt *pvt, int slot, int channel)
952 {
953  int mtr;
954 
955  if (channel < CHANNELS_PER_BRANCH)
956  mtr = pvt->b0_mtr[slot];
957  else
958  mtr = pvt->b1_mtr[slot];
959 
960  return mtr;
961 }
962 
963 /*
964  */
965 static void decode_mtr(int slot_row, u16 mtr)
966 {
967  int ans;
968 
969  ans = MTR_DIMMS_PRESENT(mtr);
970 
971  edac_dbg(2, "\tMTR%d=0x%x: DIMMs are %sPresent\n",
972  slot_row, mtr, ans ? "" : "NOT ");
973  if (!ans)
974  return;
975 
976  edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
977  edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
978  edac_dbg(2, "\t\tNUMRANK: %s\n",
979  MTR_DIMM_RANK(mtr) ? "double" : "single");
980  edac_dbg(2, "\t\tNUMROW: %s\n",
981  MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" :
982  MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" :
983  MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" :
984  "reserved");
985  edac_dbg(2, "\t\tNUMCOL: %s\n",
986  MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" :
987  MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" :
988  MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" :
989  "reserved");
990 }
991 
992 static void handle_channel(struct i5000_pvt *pvt, int slot, int channel,
993  struct i5000_dimm_info *dinfo)
994 {
995  int mtr;
996  int amb_present_reg;
997  int addrBits;
998 
999  mtr = determine_mtr(pvt, slot, channel);
1000  if (MTR_DIMMS_PRESENT(mtr)) {
1001  amb_present_reg = determine_amb_present_reg(pvt, channel);
1002 
1003  /* Determine if there is a DIMM present in this DIMM slot */
1004  if (amb_present_reg) {
1005  dinfo->dual_rank = MTR_DIMM_RANK(mtr);
1006 
1007  /* Start with the number of bits for a Bank
1008  * on the DRAM */
1009  addrBits = MTR_DRAM_BANKS_ADDR_BITS(mtr);
1010  /* Add the number of ROW bits */
1011  addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
1012  /* add the number of COLUMN bits */
1013  addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
1014 
1015  /* Dual-rank memories have twice the size */
1016  if (dinfo->dual_rank)
1017  addrBits++;
1018 
1019  addrBits += 6; /* add 64 bits per DIMM */
1020  addrBits -= 20; /* divide by 2^^20 */
1021  addrBits -= 3; /* 8 bits per bytes */
1022 
1023  dinfo->megabytes = 1 << addrBits;
1024  }
1025  }
1026 }
1027 
1028 /*
1029  * calculate_dimm_size
1030  *
1031  * also will output a DIMM matrix map, if debug is enabled, for viewing
1032  * how the DIMMs are populated
1033  */
1034 static void calculate_dimm_size(struct i5000_pvt *pvt)
1035 {
1036  struct i5000_dimm_info *dinfo;
1037  int slot, channel, branch;
1038  char *p, *mem_buffer;
1039  int space, n;
1040 
1041  /* ================= Generate some debug output ================= */
1042  space = PAGE_SIZE;
1043  mem_buffer = p = kmalloc(space, GFP_KERNEL);
1044  if (p == NULL) {
1045  i5000_printk(KERN_ERR, "MC: %s:%s() kmalloc() failed\n",
1046  __FILE__, __func__);
1047  return;
1048  }
1049 
1050  /* Scan all the actual slots
1051  * and calculate the information for each DIMM
1052  * Start with the highest slot first, to display it first
1053  * and work toward the 0th slot
1054  */
1055  for (slot = pvt->maxdimmperch - 1; slot >= 0; slot--) {
1056 
1057  /* on an odd slot, first output a 'boundary' marker,
1058  * then reset the message buffer */
1059  if (slot & 0x1) {
1060  n = snprintf(p, space, "--------------------------"
1061  "--------------------------------");
1062  p += n;
1063  space -= n;
1064  edac_dbg(2, "%s\n", mem_buffer);
1065  p = mem_buffer;
1066  space = PAGE_SIZE;
1067  }
1068  n = snprintf(p, space, "slot %2d ", slot);
1069  p += n;
1070  space -= n;
1071 
1072  for (channel = 0; channel < pvt->maxch; channel++) {
1073  dinfo = &pvt->dimm_info[slot][channel];
1074  handle_channel(pvt, slot, channel, dinfo);
1075  if (dinfo->megabytes)
1076  n = snprintf(p, space, "%4d MB %dR| ",
1077  dinfo->megabytes, dinfo->dual_rank + 1);
1078  else
1079  n = snprintf(p, space, "%4d MB | ", 0);
1080  p += n;
1081  space -= n;
1082  }
1083  p += n;
1084  space -= n;
1085  edac_dbg(2, "%s\n", mem_buffer);
1086  p = mem_buffer;
1087  space = PAGE_SIZE;
1088  }
1089 
1090  /* Output the last bottom 'boundary' marker */
1091  n = snprintf(p, space, "--------------------------"
1092  "--------------------------------");
1093  p += n;
1094  space -= n;
1095  edac_dbg(2, "%s\n", mem_buffer);
1096  p = mem_buffer;
1097  space = PAGE_SIZE;
1098 
1099  /* now output the 'channel' labels */
1100  n = snprintf(p, space, " ");
1101  p += n;
1102  space -= n;
1103  for (channel = 0; channel < pvt->maxch; channel++) {
1104  n = snprintf(p, space, "channel %d | ", channel);
1105  p += n;
1106  space -= n;
1107  }
1108  edac_dbg(2, "%s\n", mem_buffer);
1109  p = mem_buffer;
1110  space = PAGE_SIZE;
1111 
1112  n = snprintf(p, space, " ");
1113  p += n;
1114  for (branch = 0; branch < MAX_BRANCHES; branch++) {
1115  n = snprintf(p, space, " branch %d | ", branch);
1116  p += n;
1117  space -= n;
1118  }
1119 
1120  /* output the last message and free buffer */
1121  edac_dbg(2, "%s\n", mem_buffer);
1122  kfree(mem_buffer);
1123 }
1124 
1125 /*
1126  * i5000_get_mc_regs read in the necessary registers and
1127  * cache locally
1128  *
1129  * Fills in the private data members
1130  */
1131 static void i5000_get_mc_regs(struct mem_ctl_info *mci)
1132 {
1133  struct i5000_pvt *pvt;
1134  u32 actual_tolm;
1135  u16 limit;
1136  int slot_row;
1137  int maxch;
1138  int maxdimmperch;
1139  int way0, way1;
1140 
1141  pvt = mci->pvt_info;
1142 
1143  pci_read_config_dword(pvt->system_address, AMBASE,
1144  &pvt->u.ambase_bottom);
1145  pci_read_config_dword(pvt->system_address, AMBASE + sizeof(u32),
1146  &pvt->u.ambase_top);
1147 
1148  maxdimmperch = pvt->maxdimmperch;
1149  maxch = pvt->maxch;
1150 
1151  edac_dbg(2, "AMBASE= 0x%lx MAXCH= %d MAX-DIMM-Per-CH= %d\n",
1152  (long unsigned int)pvt->ambase, pvt->maxch, pvt->maxdimmperch);
1153 
1154  /* Get the Branch Map regs */
1155  pci_read_config_word(pvt->branchmap_werrors, TOLM, &pvt->tolm);
1156  pvt->tolm >>= 12;
1157  edac_dbg(2, "TOLM (number of 256M regions) =%u (0x%x)\n",
1158  pvt->tolm, pvt->tolm);
1159 
1160  actual_tolm = pvt->tolm << 28;
1161  edac_dbg(2, "Actual TOLM byte addr=%u (0x%x)\n",
1162  actual_tolm, actual_tolm);
1163 
1164  pci_read_config_word(pvt->branchmap_werrors, MIR0, &pvt->mir0);
1165  pci_read_config_word(pvt->branchmap_werrors, MIR1, &pvt->mir1);
1166  pci_read_config_word(pvt->branchmap_werrors, MIR2, &pvt->mir2);
1167 
1168  /* Get the MIR[0-2] regs */
1169  limit = (pvt->mir0 >> 4) & 0x0FFF;
1170  way0 = pvt->mir0 & 0x1;
1171  way1 = pvt->mir0 & 0x2;
1172  edac_dbg(2, "MIR0: limit= 0x%x WAY1= %u WAY0= %x\n",
1173  limit, way1, way0);
1174  limit = (pvt->mir1 >> 4) & 0x0FFF;
1175  way0 = pvt->mir1 & 0x1;
1176  way1 = pvt->mir1 & 0x2;
1177  edac_dbg(2, "MIR1: limit= 0x%x WAY1= %u WAY0= %x\n",
1178  limit, way1, way0);
1179  limit = (pvt->mir2 >> 4) & 0x0FFF;
1180  way0 = pvt->mir2 & 0x1;
1181  way1 = pvt->mir2 & 0x2;
1182  edac_dbg(2, "MIR2: limit= 0x%x WAY1= %u WAY0= %x\n",
1183  limit, way1, way0);
1184 
1185  /* Get the MTR[0-3] regs */
1186  for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) {
1187  int where = MTR0 + (slot_row * sizeof(u32));
1188 
1189  pci_read_config_word(pvt->branch_0, where,
1190  &pvt->b0_mtr[slot_row]);
1191 
1192  edac_dbg(2, "MTR%d where=0x%x B0 value=0x%x\n",
1193  slot_row, where, pvt->b0_mtr[slot_row]);
1194 
1195  if (pvt->maxch >= CHANNELS_PER_BRANCH) {
1196  pci_read_config_word(pvt->branch_1, where,
1197  &pvt->b1_mtr[slot_row]);
1198  edac_dbg(2, "MTR%d where=0x%x B1 value=0x%x\n",
1199  slot_row, where, pvt->b1_mtr[slot_row]);
1200  } else {
1201  pvt->b1_mtr[slot_row] = 0;
1202  }
1203  }
1204 
1205  /* Read and dump branch 0's MTRs */
1206  edac_dbg(2, "Memory Technology Registers:\n");
1207  edac_dbg(2, " Branch 0:\n");
1208  for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) {
1209  decode_mtr(slot_row, pvt->b0_mtr[slot_row]);
1210  }
1211  pci_read_config_word(pvt->branch_0, AMB_PRESENT_0,
1212  &pvt->b0_ambpresent0);
1213  edac_dbg(2, "\t\tAMB-Branch 0-present0 0x%x:\n", pvt->b0_ambpresent0);
1214  pci_read_config_word(pvt->branch_0, AMB_PRESENT_1,
1215  &pvt->b0_ambpresent1);
1216  edac_dbg(2, "\t\tAMB-Branch 0-present1 0x%x:\n", pvt->b0_ambpresent1);
1217 
1218  /* Only if we have 2 branchs (4 channels) */
1219  if (pvt->maxch < CHANNELS_PER_BRANCH) {
1220  pvt->b1_ambpresent0 = 0;
1221  pvt->b1_ambpresent1 = 0;
1222  } else {
1223  /* Read and dump branch 1's MTRs */
1224  edac_dbg(2, " Branch 1:\n");
1225  for (slot_row = 0; slot_row < NUM_MTRS; slot_row++) {
1226  decode_mtr(slot_row, pvt->b1_mtr[slot_row]);
1227  }
1228  pci_read_config_word(pvt->branch_1, AMB_PRESENT_0,
1229  &pvt->b1_ambpresent0);
1230  edac_dbg(2, "\t\tAMB-Branch 1-present0 0x%x:\n",
1231  pvt->b1_ambpresent0);
1232  pci_read_config_word(pvt->branch_1, AMB_PRESENT_1,
1233  &pvt->b1_ambpresent1);
1234  edac_dbg(2, "\t\tAMB-Branch 1-present1 0x%x:\n",
1235  pvt->b1_ambpresent1);
1236  }
1237 
1238  /* Go and determine the size of each DIMM and place in an
1239  * orderly matrix */
1240  calculate_dimm_size(pvt);
1241 }
1242 
1243 /*
1244  * i5000_init_csrows Initialize the 'csrows' table within
1245  * the mci control structure with the
1246  * addressing of memory.
1247  *
1248  * return:
1249  * 0 success
1250  * 1 no actual memory found on this MC
1251  */
1252 static int i5000_init_csrows(struct mem_ctl_info *mci)
1253 {
1254  struct i5000_pvt *pvt;
1255  struct dimm_info *dimm;
1256  int empty, channel_count;
1257  int max_csrows;
1258  int mtr;
1259  int csrow_megs;
1260  int channel;
1261  int slot;
1262 
1263  pvt = mci->pvt_info;
1264 
1265  channel_count = pvt->maxch;
1266  max_csrows = pvt->maxdimmperch * 2;
1267 
1268  empty = 1; /* Assume NO memory */
1269 
1270  /*
1271  * FIXME: The memory layout used to map slot/channel into the
1272  * real memory architecture is weird: branch+slot are "csrows"
1273  * and channel is channel. That required an extra array (dimm_info)
1274  * to map the dimms. A good cleanup would be to remove this array,
1275  * and do a loop here with branch, channel, slot
1276  */
1277  for (slot = 0; slot < max_csrows; slot++) {
1278  for (channel = 0; channel < pvt->maxch; channel++) {
1279 
1280  mtr = determine_mtr(pvt, slot, channel);
1281 
1282  if (!MTR_DIMMS_PRESENT(mtr))
1283  continue;
1284 
1285  dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
1286  channel / MAX_BRANCHES,
1287  channel % MAX_BRANCHES, slot);
1288 
1289  csrow_megs = pvt->dimm_info[slot][channel].megabytes;
1290  dimm->grain = 8;
1291 
1292  /* Assume DDR2 for now */
1293  dimm->mtype = MEM_FB_DDR2;
1294 
1295  /* ask what device type on this row */
1296  if (MTR_DRAM_WIDTH(mtr))
1297  dimm->dtype = DEV_X8;
1298  else
1299  dimm->dtype = DEV_X4;
1300 
1301  dimm->edac_mode = EDAC_S8ECD8ED;
1302  dimm->nr_pages = csrow_megs << 8;
1303  }
1304 
1305  empty = 0;
1306  }
1307 
1308  return empty;
1309 }
1310 
1311 /*
1312  * i5000_enable_error_reporting
1313  * Turn on the memory reporting features of the hardware
1314  */
1315 static void i5000_enable_error_reporting(struct mem_ctl_info *mci)
1316 {
1317  struct i5000_pvt *pvt;
1318  u32 fbd_error_mask;
1319 
1320  pvt = mci->pvt_info;
1321 
1322  /* Read the FBD Error Mask Register */
1323  pci_read_config_dword(pvt->branchmap_werrors, EMASK_FBD,
1324  &fbd_error_mask);
1325 
1326  /* Enable with a '0' */
1327  fbd_error_mask &= ~(ENABLE_EMASK_ALL);
1328 
1329  pci_write_config_dword(pvt->branchmap_werrors, EMASK_FBD,
1330  fbd_error_mask);
1331 }
1332 
1333 /*
1334  * i5000_get_dimm_and_channel_counts(pdev, &nr_csrows, &num_channels)
1335  *
1336  * ask the device how many channels are present and how many CSROWS
1337  * as well
1338  */
1339 static void i5000_get_dimm_and_channel_counts(struct pci_dev *pdev,
1340  int *num_dimms_per_channel,
1341  int *num_channels)
1342 {
1343  u8 value;
1344 
1345  /* Need to retrieve just how many channels and dimms per channel are
1346  * supported on this memory controller
1347  */
1348  pci_read_config_byte(pdev, MAXDIMMPERCH, &value);
1349  *num_dimms_per_channel = (int)value;
1350 
1351  pci_read_config_byte(pdev, MAXCH, &value);
1352  *num_channels = (int)value;
1353 }
1354 
1355 /*
1356  * i5000_probe1 Probe for ONE instance of device to see if it is
1357  * present.
1358  * return:
1359  * 0 for FOUND a device
1360  * < 0 for error code
1361  */
1362 static int i5000_probe1(struct pci_dev *pdev, int dev_idx)
1363 {
1364  struct mem_ctl_info *mci;
1365  struct edac_mc_layer layers[3];
1366  struct i5000_pvt *pvt;
1367  int num_channels;
1368  int num_dimms_per_channel;
1369 
1370  edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n",
1371  pdev->bus->number,
1372  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1373 
1374  /* We only are looking for func 0 of the set */
1375  if (PCI_FUNC(pdev->devfn) != 0)
1376  return -ENODEV;
1377 
1378  /* Ask the devices for the number of CSROWS and CHANNELS so
1379  * that we can calculate the memory resources, etc
1380  *
1381  * The Chipset will report what it can handle which will be greater
1382  * or equal to what the motherboard manufacturer will implement.
1383  *
1384  * As we don't have a motherboard identification routine to determine
1385  * actual number of slots/dimms per channel, we thus utilize the
1386  * resource as specified by the chipset. Thus, we might have
1387  * have more DIMMs per channel than actually on the mobo, but this
1388  * allows the driver to support up to the chipset max, without
1389  * some fancy mobo determination.
1390  */
1391  i5000_get_dimm_and_channel_counts(pdev, &num_dimms_per_channel,
1392  &num_channels);
1393 
1394  edac_dbg(0, "MC: Number of Branches=2 Channels= %d DIMMS= %d\n",
1395  num_channels, num_dimms_per_channel);
1396 
1397  /* allocate a new MC control structure */
1398 
1399  layers[0].type = EDAC_MC_LAYER_BRANCH;
1400  layers[0].size = MAX_BRANCHES;
1401  layers[0].is_virt_csrow = false;
1402  layers[1].type = EDAC_MC_LAYER_CHANNEL;
1403  layers[1].size = num_channels / MAX_BRANCHES;
1404  layers[1].is_virt_csrow = false;
1405  layers[2].type = EDAC_MC_LAYER_SLOT;
1406  layers[2].size = num_dimms_per_channel;
1407  layers[2].is_virt_csrow = true;
1408  mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
1409  if (mci == NULL)
1410  return -ENOMEM;
1411 
1412  edac_dbg(0, "MC: mci = %p\n", mci);
1413 
1414  mci->pdev = &pdev->dev; /* record ptr to the generic device */
1415 
1416  pvt = mci->pvt_info;
1417  pvt->system_address = pdev; /* Record this device in our private */
1418  pvt->maxch = num_channels;
1419  pvt->maxdimmperch = num_dimms_per_channel;
1420 
1421  /* 'get' the pci devices we want to reserve for our use */
1422  if (i5000_get_devices(mci, dev_idx))
1423  goto fail0;
1424 
1425  /* Time to get serious */
1426  i5000_get_mc_regs(mci); /* retrieve the hardware registers */
1427 
1428  mci->mc_idx = 0;
1429  mci->mtype_cap = MEM_FLAG_FB_DDR2;
1431  mci->edac_cap = EDAC_FLAG_NONE;
1432  mci->mod_name = "i5000_edac.c";
1433  mci->mod_ver = I5000_REVISION;
1434  mci->ctl_name = i5000_devs[dev_idx].ctl_name;
1435  mci->dev_name = pci_name(pdev);
1436  mci->ctl_page_to_phys = NULL;
1437 
1438  /* Set the function pointer to an actual operation function */
1439  mci->edac_check = i5000_check_error;
1440 
1441  /* initialize the MC control structure 'csrows' table
1442  * with the mapping and control information */
1443  if (i5000_init_csrows(mci)) {
1444  edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i5000_init_csrows() returned nonzero value\n");
1445  mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
1446  } else {
1447  edac_dbg(1, "MC: Enable error reporting now\n");
1448  i5000_enable_error_reporting(mci);
1449  }
1450 
1451  /* add this new MC control structure to EDAC's list of MCs */
1452  if (edac_mc_add_mc(mci)) {
1453  edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
1454  /* FIXME: perhaps some code should go here that disables error
1455  * reporting if we just enabled it
1456  */
1457  goto fail1;
1458  }
1459 
1460  i5000_clear_error(mci);
1461 
1462  /* allocating generic PCI control info */
1463  i5000_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1464  if (!i5000_pci) {
1466  "%s(): Unable to create PCI control\n",
1467  __func__);
1469  "%s(): PCI error report via EDAC not setup\n",
1470  __func__);
1471  }
1472 
1473  return 0;
1474 
1475  /* Error exit unwinding stack */
1476 fail1:
1477 
1478  i5000_put_devices(mci);
1479 
1480 fail0:
1481  edac_mc_free(mci);
1482  return -ENODEV;
1483 }
1484 
1485 /*
1486  * i5000_init_one constructor for one instance of device
1487  *
1488  * returns:
1489  * negative on error
1490  * count (>= 0)
1491  */
1492 static int __devinit i5000_init_one(struct pci_dev *pdev,
1493  const struct pci_device_id *id)
1494 {
1495  int rc;
1496 
1497  edac_dbg(0, "MC:\n");
1498 
1499  /* wake up device */
1500  rc = pci_enable_device(pdev);
1501  if (rc)
1502  return rc;
1503 
1504  /* now probe and enable the device */
1505  return i5000_probe1(pdev, id->driver_data);
1506 }
1507 
1508 /*
1509  * i5000_remove_one destructor for one instance of device
1510  *
1511  */
1512 static void __devexit i5000_remove_one(struct pci_dev *pdev)
1513 {
1514  struct mem_ctl_info *mci;
1515 
1516  edac_dbg(0, "\n");
1517 
1518  if (i5000_pci)
1519  edac_pci_release_generic_ctl(i5000_pci);
1520 
1521  if ((mci = edac_mc_del_mc(&pdev->dev)) == NULL)
1522  return;
1523 
1524  /* retrieve references to resources, and free those resources */
1525  i5000_put_devices(mci);
1526  edac_mc_free(mci);
1527 }
1528 
1529 /*
1530  * pci_device_id table for which devices we are looking for
1531  *
1532  * The "E500P" device is the first device supported.
1533  */
1534 static DEFINE_PCI_DEVICE_TABLE(i5000_pci_tbl) = {
1536  .driver_data = I5000P},
1537 
1538  {0,} /* 0 terminated list. */
1539 };
1540 
1541 MODULE_DEVICE_TABLE(pci, i5000_pci_tbl);
1542 
1543 /*
1544  * i5000_driver pci_driver structure for this module
1545  *
1546  */
1547 static struct pci_driver i5000_driver = {
1548  .name = KBUILD_BASENAME,
1549  .probe = i5000_init_one,
1550  .remove = __devexit_p(i5000_remove_one),
1551  .id_table = i5000_pci_tbl,
1552 };
1553 
1554 /*
1555  * i5000_init Module entry function
1556  * Try to initialize this module for its devices
1557  */
1558 static int __init i5000_init(void)
1559 {
1560  int pci_rc;
1561 
1562  edac_dbg(2, "MC:\n");
1563 
1564  /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1565  opstate_init();
1566 
1567  pci_rc = pci_register_driver(&i5000_driver);
1568 
1569  return (pci_rc < 0) ? pci_rc : 0;
1570 }
1571 
1572 /*
1573  * i5000_exit() Module exit function
1574  * Unregister the driver
1575  */
1576 static void __exit i5000_exit(void)
1577 {
1578  edac_dbg(2, "MC:\n");
1579  pci_unregister_driver(&i5000_driver);
1580 }
1581 
1582 module_init(i5000_init);
1583 module_exit(i5000_exit);
1584 
1585 MODULE_LICENSE("GPL");
1587  ("Linux Networx (http://lnxi.com) Doug Thompson <[email protected]>");
1588 MODULE_DESCRIPTION("MC Driver for Intel I5000 memory controllers - "
1589  I5000_REVISION);
1590 
1591 module_param(edac_op_state, int, 0444);
1592 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1593 module_param(misc_messages, int, 0444);
1594 MODULE_PARM_DESC(misc_messages, "Log miscellaneous non fatal messages");
1595