Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
i7300_edac.c
Go to the documentation of this file.
1 /*
2  * Intel 7300 class Memory Controllers kernel module (Clarksboro)
3  *
4  * This file may be distributed under the terms of the
5  * GNU General Public License version 2 only.
6  *
7  * Copyright (c) 2010 by:
8  * Mauro Carvalho Chehab <[email protected]>
9  *
10  * Red Hat Inc. http://www.redhat.com
11  *
12  * Intel 7300 Chipset Memory Controller Hub (MCH) - Datasheet
13  * http://www.intel.com/Assets/PDF/datasheet/318082.pdf
14  *
15  * TODO: The chipset allow checking for PCI Express errors also. Currently,
16  * the driver covers only memory error errors
17  *
18  * This driver uses "csrows" EDAC attribute to represent DIMM slot#
19  */
20 
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/pci.h>
24 #include <linux/pci_ids.h>
25 #include <linux/slab.h>
26 #include <linux/edac.h>
27 #include <linux/mmzone.h>
28 
29 #include "edac_core.h"
30 
31 /*
32  * Alter this version for the I7300 module when modifications are made
33  */
34 #define I7300_REVISION " Ver: 1.0.0"
35 
36 #define EDAC_MOD_STR "i7300_edac"
37 
38 #define i7300_printk(level, fmt, arg...) \
39  edac_printk(level, "i7300", fmt, ##arg)
40 
41 #define i7300_mc_printk(mci, level, fmt, arg...) \
42  edac_mc_chipset_printk(mci, level, "i7300", fmt, ##arg)
43 
44 /***********************************************
45  * i7300 Limit constants Structs and static vars
46  ***********************************************/
47 
48 /*
49  * Memory topology is organized as:
50  * Branch 0 - 2 channels: channels 0 and 1 (FDB0 PCI dev 21.0)
51  * Branch 1 - 2 channels: channels 2 and 3 (FDB1 PCI dev 22.0)
52  * Each channel can have to 8 DIMM sets (called as SLOTS)
53  * Slots should generally be filled in pairs
54  * Except on Single Channel mode of operation
55  * just slot 0/channel0 filled on this mode
56  * On normal operation mode, the two channels on a branch should be
57  * filled together for the same SLOT#
58  * When in mirrored mode, Branch 1 replicate memory at Branch 0, so, the four
59  * channels on both branches should be filled
60  */
61 
62 /* Limits for i7300 */
63 #define MAX_SLOTS 8
64 #define MAX_BRANCHES 2
65 #define MAX_CH_PER_BRANCH 2
66 #define MAX_CHANNELS (MAX_CH_PER_BRANCH * MAX_BRANCHES)
67 #define MAX_MIR 3
68 
69 #define to_channel(ch, branch) ((((branch)) << 1) | (ch))
70 
71 #define to_csrow(slot, ch, branch) \
72  (to_channel(ch, branch) | ((slot) << 2))
73 
74 /* Device name and register DID (Device ID) */
76  const char *ctl_name; /* name for this device */
77  u16 fsb_mapping_errors; /* DID for the branchmap,control */
78 };
79 
80 /* Table of devices attributes supported by this driver */
81 static const struct i7300_dev_info i7300_devs[] = {
82  {
83  .ctl_name = "I7300",
84  .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7300_MCH_ERR,
85  },
86 };
87 
89  int megabytes; /* size, 0 means not present */
90 };
91 
92 /* driver private data structure */
93 struct i7300_pvt {
94  struct pci_dev *pci_dev_16_0_fsb_ctlr; /* 16.0 */
95  struct pci_dev *pci_dev_16_1_fsb_addr_map; /* 16.1 */
96  struct pci_dev *pci_dev_16_2_fsb_err_regs; /* 16.2 */
97  struct pci_dev *pci_dev_2x_0_fbd_branch[MAX_BRANCHES]; /* 21.0 and 22.0 */
98 
99  u16 tolm; /* top of low memory */
100  u64 ambase; /* AMB BAR */
101 
102  u32 mc_settings; /* Report several settings */
104 
105  u16 mir[MAX_MIR]; /* Memory Interleave Reg*/
106 
107  u16 mtr[MAX_SLOTS][MAX_BRANCHES]; /* Memory Technlogy Reg */
108  u16 ambpresent[MAX_CHANNELS]; /* AMB present regs */
109 
110  /* DIMM information matrix, allocating architecture maximums */
112 
113  /* Temporary buffer for use when preparing error messages */
115 };
116 
117 /* FIXME: Why do we need to have this static? */
118 static struct edac_pci_ctl_info *i7300_pci;
119 
120 /***************************************************
121  * i7300 Register definitions for memory enumeration
122  ***************************************************/
123 
124 /*
125  * Device 16,
126  * Function 0: System Address (not documented)
127  * Function 1: Memory Branch Map, Control, Errors Register
128  */
129 
130  /* OFFSETS for Function 0 */
131 #define AMBASE 0x48 /* AMB Mem Mapped Reg Region Base */
132 #define MAXCH 0x56 /* Max Channel Number */
133 #define MAXDIMMPERCH 0x57 /* Max DIMM PER Channel Number */
134 
135  /* OFFSETS for Function 1 */
136 #define MC_SETTINGS 0x40
137  #define IS_MIRRORED(mc) ((mc) & (1 << 16))
138  #define IS_ECC_ENABLED(mc) ((mc) & (1 << 5))
139  #define IS_RETRY_ENABLED(mc) ((mc) & (1 << 31))
140  #define IS_SCRBALGO_ENHANCED(mc) ((mc) & (1 << 8))
141 
142 #define MC_SETTINGS_A 0x58
143  #define IS_SINGLE_MODE(mca) ((mca) & (1 << 14))
144 
145 #define TOLM 0x6C
146 
147 #define MIR0 0x80
148 #define MIR1 0x84
149 #define MIR2 0x88
150 
151 /*
152  * Note: Other Intel EDAC drivers use AMBPRESENT to identify if the available
153  * memory. From datasheet item 7.3.1 (FB-DIMM technology & organization), it
154  * seems that we cannot use this information directly for the same usage.
155  * Each memory slot may have up to 2 AMB interfaces, one for income and another
156  * for outcome interface to the next slot.
157  * For now, the driver just stores the AMB present registers, but rely only at
158  * the MTR info to detect memory.
159  * Datasheet is also not clear about how to map each AMBPRESENT registers to
160  * one of the 4 available channels.
161  */
162 #define AMBPRESENT_0 0x64
163 #define AMBPRESENT_1 0x66
164 
165 static const u16 mtr_regs[MAX_SLOTS] = {
166  0x80, 0x84, 0x88, 0x8c,
167  0x82, 0x86, 0x8a, 0x8e
168 };
169 
170 /*
171  * Defines to extract the vaious fields from the
172  * MTRx - Memory Technology Registers
173  */
174 #define MTR_DIMMS_PRESENT(mtr) ((mtr) & (1 << 8))
175 #define MTR_DIMMS_ETHROTTLE(mtr) ((mtr) & (1 << 7))
176 #define MTR_DRAM_WIDTH(mtr) (((mtr) & (1 << 6)) ? 8 : 4)
177 #define MTR_DRAM_BANKS(mtr) (((mtr) & (1 << 5)) ? 8 : 4)
178 #define MTR_DIMM_RANKS(mtr) (((mtr) & (1 << 4)) ? 1 : 0)
179 #define MTR_DIMM_ROWS(mtr) (((mtr) >> 2) & 0x3)
180 #define MTR_DRAM_BANKS_ADDR_BITS 2
181 #define MTR_DIMM_ROWS_ADDR_BITS(mtr) (MTR_DIMM_ROWS(mtr) + 13)
182 #define MTR_DIMM_COLS(mtr) ((mtr) & 0x3)
183 #define MTR_DIMM_COLS_ADDR_BITS(mtr) (MTR_DIMM_COLS(mtr) + 10)
184 
185 /************************************************
186  * i7300 Register definitions for error detection
187  ************************************************/
188 
189 /*
190  * Device 16.1: FBD Error Registers
191  */
192 #define FERR_FAT_FBD 0x98
193 static const char *ferr_fat_fbd_name[] = {
194  [22] = "Non-Redundant Fast Reset Timeout",
195  [2] = ">Tmid Thermal event with intelligent throttling disabled",
196  [1] = "Memory or FBD configuration CRC read error",
197  [0] = "Memory Write error on non-redundant retry or "
198  "FBD configuration Write error on retry",
199 };
200 #define GET_FBD_FAT_IDX(fbderr) (((fbderr) >> 28) & 3)
201 #define FERR_FAT_FBD_ERR_MASK ((1 << 0) | (1 << 1) | (1 << 2) | (1 << 22))
202 
203 #define FERR_NF_FBD 0xa0
204 static const char *ferr_nf_fbd_name[] = {
205  [24] = "DIMM-Spare Copy Completed",
206  [23] = "DIMM-Spare Copy Initiated",
207  [22] = "Redundant Fast Reset Timeout",
208  [21] = "Memory Write error on redundant retry",
209  [18] = "SPD protocol Error",
210  [17] = "FBD Northbound parity error on FBD Sync Status",
211  [16] = "Correctable Patrol Data ECC",
212  [15] = "Correctable Resilver- or Spare-Copy Data ECC",
213  [14] = "Correctable Mirrored Demand Data ECC",
214  [13] = "Correctable Non-Mirrored Demand Data ECC",
215  [11] = "Memory or FBD configuration CRC read error",
216  [10] = "FBD Configuration Write error on first attempt",
217  [9] = "Memory Write error on first attempt",
218  [8] = "Non-Aliased Uncorrectable Patrol Data ECC",
219  [7] = "Non-Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
220  [6] = "Non-Aliased Uncorrectable Mirrored Demand Data ECC",
221  [5] = "Non-Aliased Uncorrectable Non-Mirrored Demand Data ECC",
222  [4] = "Aliased Uncorrectable Patrol Data ECC",
223  [3] = "Aliased Uncorrectable Resilver- or Spare-Copy Data ECC",
224  [2] = "Aliased Uncorrectable Mirrored Demand Data ECC",
225  [1] = "Aliased Uncorrectable Non-Mirrored Demand Data ECC",
226  [0] = "Uncorrectable Data ECC on Replay",
227 };
228 #define GET_FBD_NF_IDX(fbderr) (((fbderr) >> 28) & 3)
229 #define FERR_NF_FBD_ERR_MASK ((1 << 24) | (1 << 23) | (1 << 22) | (1 << 21) |\
230  (1 << 18) | (1 << 17) | (1 << 16) | (1 << 15) |\
231  (1 << 14) | (1 << 13) | (1 << 11) | (1 << 10) |\
232  (1 << 9) | (1 << 8) | (1 << 7) | (1 << 6) |\
233  (1 << 5) | (1 << 4) | (1 << 3) | (1 << 2) |\
234  (1 << 1) | (1 << 0))
235 
236 #define EMASK_FBD 0xa8
237 #define EMASK_FBD_ERR_MASK ((1 << 27) | (1 << 26) | (1 << 25) | (1 << 24) |\
238  (1 << 22) | (1 << 21) | (1 << 20) | (1 << 19) |\
239  (1 << 18) | (1 << 17) | (1 << 16) | (1 << 14) |\
240  (1 << 13) | (1 << 12) | (1 << 11) | (1 << 10) |\
241  (1 << 9) | (1 << 8) | (1 << 7) | (1 << 6) |\
242  (1 << 5) | (1 << 4) | (1 << 3) | (1 << 2) |\
243  (1 << 1) | (1 << 0))
244 
245 /*
246  * Device 16.2: Global Error Registers
247  */
248 
249 #define FERR_GLOBAL_HI 0x48
250 static const char *ferr_global_hi_name[] = {
251  [3] = "FSB 3 Fatal Error",
252  [2] = "FSB 2 Fatal Error",
253  [1] = "FSB 1 Fatal Error",
254  [0] = "FSB 0 Fatal Error",
255 };
256 #define ferr_global_hi_is_fatal(errno) 1
257 
258 #define FERR_GLOBAL_LO 0x40
259 static const char *ferr_global_lo_name[] = {
260  [31] = "Internal MCH Fatal Error",
261  [30] = "Intel QuickData Technology Device Fatal Error",
262  [29] = "FSB1 Fatal Error",
263  [28] = "FSB0 Fatal Error",
264  [27] = "FBD Channel 3 Fatal Error",
265  [26] = "FBD Channel 2 Fatal Error",
266  [25] = "FBD Channel 1 Fatal Error",
267  [24] = "FBD Channel 0 Fatal Error",
268  [23] = "PCI Express Device 7Fatal Error",
269  [22] = "PCI Express Device 6 Fatal Error",
270  [21] = "PCI Express Device 5 Fatal Error",
271  [20] = "PCI Express Device 4 Fatal Error",
272  [19] = "PCI Express Device 3 Fatal Error",
273  [18] = "PCI Express Device 2 Fatal Error",
274  [17] = "PCI Express Device 1 Fatal Error",
275  [16] = "ESI Fatal Error",
276  [15] = "Internal MCH Non-Fatal Error",
277  [14] = "Intel QuickData Technology Device Non Fatal Error",
278  [13] = "FSB1 Non-Fatal Error",
279  [12] = "FSB 0 Non-Fatal Error",
280  [11] = "FBD Channel 3 Non-Fatal Error",
281  [10] = "FBD Channel 2 Non-Fatal Error",
282  [9] = "FBD Channel 1 Non-Fatal Error",
283  [8] = "FBD Channel 0 Non-Fatal Error",
284  [7] = "PCI Express Device 7 Non-Fatal Error",
285  [6] = "PCI Express Device 6 Non-Fatal Error",
286  [5] = "PCI Express Device 5 Non-Fatal Error",
287  [4] = "PCI Express Device 4 Non-Fatal Error",
288  [3] = "PCI Express Device 3 Non-Fatal Error",
289  [2] = "PCI Express Device 2 Non-Fatal Error",
290  [1] = "PCI Express Device 1 Non-Fatal Error",
291  [0] = "ESI Non-Fatal Error",
292 };
293 #define ferr_global_lo_is_fatal(errno) ((errno < 16) ? 0 : 1)
294 
295 #define NRECMEMA 0xbe
296  #define NRECMEMA_BANK(v) (((v) >> 12) & 7)
297  #define NRECMEMA_RANK(v) (((v) >> 8) & 15)
298 
299 #define NRECMEMB 0xc0
300  #define NRECMEMB_IS_WR(v) ((v) & (1 << 31))
301  #define NRECMEMB_CAS(v) (((v) >> 16) & 0x1fff)
302  #define NRECMEMB_RAS(v) ((v) & 0xffff)
303 
304 #define REDMEMA 0xdc
305 
306 #define REDMEMB 0x7c
307  #define IS_SECOND_CH(v) ((v) * (1 << 17))
308 
309 #define RECMEMA 0xe0
310  #define RECMEMA_BANK(v) (((v) >> 12) & 7)
311  #define RECMEMA_RANK(v) (((v) >> 8) & 15)
312 
313 #define RECMEMB 0xe4
314  #define RECMEMB_IS_WR(v) ((v) & (1 << 31))
315  #define RECMEMB_CAS(v) (((v) >> 16) & 0x1fff)
316  #define RECMEMB_RAS(v) ((v) & 0xffff)
317 
318 /********************************************
319  * i7300 Functions related to error detection
320  ********************************************/
321 
334 static const char *get_err_from_table(const char *table[], int size, int pos)
335 {
336  if (unlikely(pos >= size))
337  return "Reserved";
338 
339  if (unlikely(!table[pos]))
340  return "Reserved";
341 
342  return table[pos];
343 }
344 
345 #define GET_ERR_FROM_TABLE(table, pos) \
346  get_err_from_table(table, ARRAY_SIZE(table), pos)
347 
354 static void i7300_process_error_global(struct mem_ctl_info *mci)
355 {
356  struct i7300_pvt *pvt;
357  u32 errnum, error_reg;
358  unsigned long errors;
359  const char *specific;
360  bool is_fatal;
361 
362  pvt = mci->pvt_info;
363 
364  /* read in the 1st FATAL error register */
365  pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
366  FERR_GLOBAL_HI, &error_reg);
367  if (unlikely(error_reg)) {
368  errors = error_reg;
369  errnum = find_first_bit(&errors,
370  ARRAY_SIZE(ferr_global_hi_name));
371  specific = GET_ERR_FROM_TABLE(ferr_global_hi_name, errnum);
372  is_fatal = ferr_global_hi_is_fatal(errnum);
373 
374  /* Clear the error bit */
375  pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
376  FERR_GLOBAL_HI, error_reg);
377 
378  goto error_global;
379  }
380 
381  pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
382  FERR_GLOBAL_LO, &error_reg);
383  if (unlikely(error_reg)) {
384  errors = error_reg;
385  errnum = find_first_bit(&errors,
386  ARRAY_SIZE(ferr_global_lo_name));
387  specific = GET_ERR_FROM_TABLE(ferr_global_lo_name, errnum);
388  is_fatal = ferr_global_lo_is_fatal(errnum);
389 
390  /* Clear the error bit */
391  pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
392  FERR_GLOBAL_LO, error_reg);
393 
394  goto error_global;
395  }
396  return;
397 
398 error_global:
399  i7300_mc_printk(mci, KERN_EMERG, "%s misc error: %s\n",
400  is_fatal ? "Fatal" : "NOT fatal", specific);
401 }
402 
409 static void i7300_process_fbd_error(struct mem_ctl_info *mci)
410 {
411  struct i7300_pvt *pvt;
412  u32 errnum, value, error_reg;
413  u16 val16;
414  unsigned branch, channel, bank, rank, cas, ras;
415  u32 syndrome;
416 
417  unsigned long errors;
418  const char *specific;
419  bool is_wr;
420 
421  pvt = mci->pvt_info;
422 
423  /* read in the 1st FATAL error register */
424  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
425  FERR_FAT_FBD, &error_reg);
426  if (unlikely(error_reg & FERR_FAT_FBD_ERR_MASK)) {
427  errors = error_reg & FERR_FAT_FBD_ERR_MASK ;
428  errnum = find_first_bit(&errors,
429  ARRAY_SIZE(ferr_fat_fbd_name));
430  specific = GET_ERR_FROM_TABLE(ferr_fat_fbd_name, errnum);
431  branch = (GET_FBD_FAT_IDX(error_reg) == 2) ? 1 : 0;
432 
433  pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
434  NRECMEMA, &val16);
435  bank = NRECMEMA_BANK(val16);
436  rank = NRECMEMA_RANK(val16);
437 
438  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
439  NRECMEMB, &value);
440  is_wr = NRECMEMB_IS_WR(value);
441  cas = NRECMEMB_CAS(value);
442  ras = NRECMEMB_RAS(value);
443 
444  /* Clean the error register */
445  pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
446  FERR_FAT_FBD, error_reg);
447 
449  "Bank=%d RAS=%d CAS=%d Err=0x%lx (%s))",
450  bank, ras, cas, errors, specific);
451 
452  edac_mc_handle_error(HW_EVENT_ERR_FATAL, mci, 1, 0, 0, 0,
453  branch, -1, rank,
454  is_wr ? "Write error" : "Read error",
455  pvt->tmp_prt_buffer);
456 
457  }
458 
459  /* read in the 1st NON-FATAL error register */
460  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
461  FERR_NF_FBD, &error_reg);
462  if (unlikely(error_reg & FERR_NF_FBD_ERR_MASK)) {
463  errors = error_reg & FERR_NF_FBD_ERR_MASK;
464  errnum = find_first_bit(&errors,
465  ARRAY_SIZE(ferr_nf_fbd_name));
466  specific = GET_ERR_FROM_TABLE(ferr_nf_fbd_name, errnum);
467  branch = (GET_FBD_NF_IDX(error_reg) == 2) ? 1 : 0;
468 
469  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
470  REDMEMA, &syndrome);
471 
472  pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map,
473  RECMEMA, &val16);
474  bank = RECMEMA_BANK(val16);
475  rank = RECMEMA_RANK(val16);
476 
477  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
478  RECMEMB, &value);
479  is_wr = RECMEMB_IS_WR(value);
480  cas = RECMEMB_CAS(value);
481  ras = RECMEMB_RAS(value);
482 
483  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
484  REDMEMB, &value);
485  channel = (branch << 1);
486  if (IS_SECOND_CH(value))
487  channel++;
488 
489  /* Clear the error bit */
490  pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
491  FERR_NF_FBD, error_reg);
492 
493  /* Form out message */
495  "DRAM-Bank=%d RAS=%d CAS=%d, Err=0x%lx (%s))",
496  bank, ras, cas, errors, specific);
497 
499  syndrome,
500  branch >> 1, channel % 2, rank,
501  is_wr ? "Write error" : "Read error",
502  pvt->tmp_prt_buffer);
503  }
504  return;
505 }
506 
511 static void i7300_check_error(struct mem_ctl_info *mci)
512 {
513  i7300_process_error_global(mci);
514  i7300_process_fbd_error(mci);
515 };
516 
521 static void i7300_clear_error(struct mem_ctl_info *mci)
522 {
523  struct i7300_pvt *pvt = mci->pvt_info;
524  u32 value;
525  /*
526  * All error values are RWC - we need to read and write 1 to the
527  * bit that we want to cleanup
528  */
529 
530  /* Clear global error registers */
531  pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
532  FERR_GLOBAL_HI, &value);
533  pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
534  FERR_GLOBAL_HI, value);
535 
536  pci_read_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
537  FERR_GLOBAL_LO, &value);
538  pci_write_config_dword(pvt->pci_dev_16_2_fsb_err_regs,
539  FERR_GLOBAL_LO, value);
540 
541  /* Clear FBD error registers */
542  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
543  FERR_FAT_FBD, &value);
544  pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
545  FERR_FAT_FBD, value);
546 
547  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
548  FERR_NF_FBD, &value);
549  pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
550  FERR_NF_FBD, value);
551 }
552 
558 static void i7300_enable_error_reporting(struct mem_ctl_info *mci)
559 {
560  struct i7300_pvt *pvt = mci->pvt_info;
561  u32 fbd_error_mask;
562 
563  /* Read the FBD Error Mask Register */
564  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
565  EMASK_FBD, &fbd_error_mask);
566 
567  /* Enable with a '0' */
568  fbd_error_mask &= ~(EMASK_FBD_ERR_MASK);
569 
570  pci_write_config_dword(pvt->pci_dev_16_1_fsb_addr_map,
571  EMASK_FBD, fbd_error_mask);
572 }
573 
574 /************************************************
575  * i7300 Functions related to memory enumberation
576  ************************************************/
577 
587 static int decode_mtr(struct i7300_pvt *pvt,
588  int slot, int ch, int branch,
589  struct i7300_dimm_info *dinfo,
590  struct dimm_info *dimm)
591 {
592  int mtr, ans, addrBits, channel;
593 
594  channel = to_channel(ch, branch);
595 
596  mtr = pvt->mtr[slot][branch];
597  ans = MTR_DIMMS_PRESENT(mtr) ? 1 : 0;
598 
599  edac_dbg(2, "\tMTR%d CH%d: DIMMs are %sPresent (mtr)\n",
600  slot, channel, ans ? "" : "NOT ");
601 
602  /* Determine if there is a DIMM present in this DIMM slot */
603  if (!ans)
604  return 0;
605 
606  /* Start with the number of bits for a Bank
607  * on the DRAM */
608  addrBits = MTR_DRAM_BANKS_ADDR_BITS;
609  /* Add thenumber of ROW bits */
610  addrBits += MTR_DIMM_ROWS_ADDR_BITS(mtr);
611  /* add the number of COLUMN bits */
612  addrBits += MTR_DIMM_COLS_ADDR_BITS(mtr);
613  /* add the number of RANK bits */
614  addrBits += MTR_DIMM_RANKS(mtr);
615 
616  addrBits += 6; /* add 64 bits per DIMM */
617  addrBits -= 20; /* divide by 2^^20 */
618  addrBits -= 3; /* 8 bits per bytes */
619 
620  dinfo->megabytes = 1 << addrBits;
621 
622  edac_dbg(2, "\t\tWIDTH: x%d\n", MTR_DRAM_WIDTH(mtr));
623 
624  edac_dbg(2, "\t\tELECTRICAL THROTTLING is %s\n",
625  MTR_DIMMS_ETHROTTLE(mtr) ? "enabled" : "disabled");
626 
627  edac_dbg(2, "\t\tNUMBANK: %d bank(s)\n", MTR_DRAM_BANKS(mtr));
628  edac_dbg(2, "\t\tNUMRANK: %s\n",
629  MTR_DIMM_RANKS(mtr) ? "double" : "single");
630  edac_dbg(2, "\t\tNUMROW: %s\n",
631  MTR_DIMM_ROWS(mtr) == 0 ? "8,192 - 13 rows" :
632  MTR_DIMM_ROWS(mtr) == 1 ? "16,384 - 14 rows" :
633  MTR_DIMM_ROWS(mtr) == 2 ? "32,768 - 15 rows" :
634  "65,536 - 16 rows");
635  edac_dbg(2, "\t\tNUMCOL: %s\n",
636  MTR_DIMM_COLS(mtr) == 0 ? "1,024 - 10 columns" :
637  MTR_DIMM_COLS(mtr) == 1 ? "2,048 - 11 columns" :
638  MTR_DIMM_COLS(mtr) == 2 ? "4,096 - 12 columns" :
639  "reserved");
640  edac_dbg(2, "\t\tSIZE: %d MB\n", dinfo->megabytes);
641 
642  /*
643  * The type of error detection actually depends of the
644  * mode of operation. When it is just one single memory chip, at
645  * socket 0, channel 0, it uses 8-byte-over-32-byte SECDED+ code.
646  * In normal or mirrored mode, it uses Lockstep mode,
647  * with the possibility of using an extended algorithm for x8 memories
648  * See datasheet Sections 7.3.6 to 7.3.8
649  */
650 
651  dimm->nr_pages = MiB_TO_PAGES(dinfo->megabytes);
652  dimm->grain = 8;
653  dimm->mtype = MEM_FB_DDR2;
654  if (IS_SINGLE_MODE(pvt->mc_settings_a)) {
655  dimm->edac_mode = EDAC_SECDED;
656  edac_dbg(2, "\t\tECC code is 8-byte-over-32-byte SECDED+ code\n");
657  } else {
658  edac_dbg(2, "\t\tECC code is on Lockstep mode\n");
659  if (MTR_DRAM_WIDTH(mtr) == 8)
660  dimm->edac_mode = EDAC_S8ECD8ED;
661  else
662  dimm->edac_mode = EDAC_S4ECD4ED;
663  }
664 
665  /* ask what device type on this row */
666  if (MTR_DRAM_WIDTH(mtr) == 8) {
667  edac_dbg(2, "\t\tScrub algorithm for x8 is on %s mode\n",
669  "enhanced" : "normal");
670 
671  dimm->dtype = DEV_X8;
672  } else
673  dimm->dtype = DEV_X4;
674 
675  return mtr;
676 }
677 
684 static void print_dimm_size(struct i7300_pvt *pvt)
685 {
686 #ifdef CONFIG_EDAC_DEBUG
687  struct i7300_dimm_info *dinfo;
688  char *p;
689  int space, n;
690  int channel, slot;
691 
692  space = PAGE_SIZE;
693  p = pvt->tmp_prt_buffer;
694 
695  n = snprintf(p, space, " ");
696  p += n;
697  space -= n;
698  for (channel = 0; channel < MAX_CHANNELS; channel++) {
699  n = snprintf(p, space, "channel %d | ", channel);
700  p += n;
701  space -= n;
702  }
703  edac_dbg(2, "%s\n", pvt->tmp_prt_buffer);
704  p = pvt->tmp_prt_buffer;
705  space = PAGE_SIZE;
706  n = snprintf(p, space, "-------------------------------"
707  "------------------------------");
708  p += n;
709  space -= n;
710  edac_dbg(2, "%s\n", pvt->tmp_prt_buffer);
711  p = pvt->tmp_prt_buffer;
712  space = PAGE_SIZE;
713 
714  for (slot = 0; slot < MAX_SLOTS; slot++) {
715  n = snprintf(p, space, "csrow/SLOT %d ", slot);
716  p += n;
717  space -= n;
718 
719  for (channel = 0; channel < MAX_CHANNELS; channel++) {
720  dinfo = &pvt->dimm_info[slot][channel];
721  n = snprintf(p, space, "%4d MB | ", dinfo->megabytes);
722  p += n;
723  space -= n;
724  }
725 
726  edac_dbg(2, "%s\n", pvt->tmp_prt_buffer);
727  p = pvt->tmp_prt_buffer;
728  space = PAGE_SIZE;
729  }
730 
731  n = snprintf(p, space, "-------------------------------"
732  "------------------------------");
733  p += n;
734  space -= n;
735  edac_dbg(2, "%s\n", pvt->tmp_prt_buffer);
736  p = pvt->tmp_prt_buffer;
737  space = PAGE_SIZE;
738 #endif
739 }
740 
747 static int i7300_init_csrows(struct mem_ctl_info *mci)
748 {
749  struct i7300_pvt *pvt;
750  struct i7300_dimm_info *dinfo;
751  int rc = -ENODEV;
752  int mtr;
753  int ch, branch, slot, channel;
754  struct dimm_info *dimm;
755 
756  pvt = mci->pvt_info;
757 
758  edac_dbg(2, "Memory Technology Registers:\n");
759 
760  /* Get the AMB present registers for the four channels */
761  for (branch = 0; branch < MAX_BRANCHES; branch++) {
762  /* Read and dump branch 0's MTRs */
763  channel = to_channel(0, branch);
764  pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
765  AMBPRESENT_0,
766  &pvt->ambpresent[channel]);
767  edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n",
768  channel, pvt->ambpresent[channel]);
769 
770  channel = to_channel(1, branch);
771  pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
772  AMBPRESENT_1,
773  &pvt->ambpresent[channel]);
774  edac_dbg(2, "\t\tAMB-present CH%d = 0x%x:\n",
775  channel, pvt->ambpresent[channel]);
776  }
777 
778  /* Get the set of MTR[0-7] regs by each branch */
779  for (slot = 0; slot < MAX_SLOTS; slot++) {
780  int where = mtr_regs[slot];
781  for (branch = 0; branch < MAX_BRANCHES; branch++) {
782  pci_read_config_word(pvt->pci_dev_2x_0_fbd_branch[branch],
783  where,
784  &pvt->mtr[slot][branch]);
785  for (ch = 0; ch < MAX_CH_PER_BRANCH; ch++) {
786  int channel = to_channel(ch, branch);
787 
788  dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms,
789  mci->n_layers, branch, ch, slot);
790 
791  dinfo = &pvt->dimm_info[slot][channel];
792 
793  mtr = decode_mtr(pvt, slot, ch, branch,
794  dinfo, dimm);
795 
796  /* if no DIMMS on this row, continue */
797  if (!MTR_DIMMS_PRESENT(mtr))
798  continue;
799 
800  rc = 0;
801 
802  }
803  }
804  }
805 
806  return rc;
807 }
808 
814 static void decode_mir(int mir_no, u16 mir[MAX_MIR])
815 {
816  if (mir[mir_no] & 3)
817  edac_dbg(2, "MIR%d: limit= 0x%x Branch(es) that participate: %s %s\n",
818  mir_no,
819  (mir[mir_no] >> 4) & 0xfff,
820  (mir[mir_no] & 1) ? "B0" : "",
821  (mir[mir_no] & 2) ? "B1" : "");
822 }
823 
830 static int i7300_get_mc_regs(struct mem_ctl_info *mci)
831 {
832  struct i7300_pvt *pvt;
833  u32 actual_tolm;
834  int i, rc;
835 
836  pvt = mci->pvt_info;
837 
838  pci_read_config_dword(pvt->pci_dev_16_0_fsb_ctlr, AMBASE,
839  (u32 *) &pvt->ambase);
840 
841  edac_dbg(2, "AMBASE= 0x%lx\n", (long unsigned int)pvt->ambase);
842 
843  /* Get the Branch Map regs */
844  pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, TOLM, &pvt->tolm);
845  pvt->tolm >>= 12;
846  edac_dbg(2, "TOLM (number of 256M regions) =%u (0x%x)\n",
847  pvt->tolm, pvt->tolm);
848 
849  actual_tolm = (u32) ((1000l * pvt->tolm) >> (30 - 28));
850  edac_dbg(2, "Actual TOLM byte addr=%u.%03u GB (0x%x)\n",
851  actual_tolm/1000, actual_tolm % 1000, pvt->tolm << 28);
852 
853  /* Get memory controller settings */
854  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS,
855  &pvt->mc_settings);
856  pci_read_config_dword(pvt->pci_dev_16_1_fsb_addr_map, MC_SETTINGS_A,
857  &pvt->mc_settings_a);
858 
859  if (IS_SINGLE_MODE(pvt->mc_settings_a))
860  edac_dbg(0, "Memory controller operating on single mode\n");
861  else
862  edac_dbg(0, "Memory controller operating on %smirrored mode\n",
863  IS_MIRRORED(pvt->mc_settings) ? "" : "non-");
864 
865  edac_dbg(0, "Error detection is %s\n",
866  IS_ECC_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
867  edac_dbg(0, "Retry is %s\n",
868  IS_RETRY_ENABLED(pvt->mc_settings) ? "enabled" : "disabled");
869 
870  /* Get Memory Interleave Range registers */
871  pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR0,
872  &pvt->mir[0]);
873  pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR1,
874  &pvt->mir[1]);
875  pci_read_config_word(pvt->pci_dev_16_1_fsb_addr_map, MIR2,
876  &pvt->mir[2]);
877 
878  /* Decode the MIR regs */
879  for (i = 0; i < MAX_MIR; i++)
880  decode_mir(i, pvt->mir);
881 
882  rc = i7300_init_csrows(mci);
883  if (rc < 0)
884  return rc;
885 
886  /* Go and determine the size of each DIMM and place in an
887  * orderly matrix */
888  print_dimm_size(pvt);
889 
890  return 0;
891 }
892 
893 /*************************************************
894  * i7300 Functions related to device probe/release
895  *************************************************/
896 
901 static void i7300_put_devices(struct mem_ctl_info *mci)
902 {
903  struct i7300_pvt *pvt;
904  int branch;
905 
906  pvt = mci->pvt_info;
907 
908  /* Decrement usage count for devices */
909  for (branch = 0; branch < MAX_CH_PER_BRANCH; branch++)
910  pci_dev_put(pvt->pci_dev_2x_0_fbd_branch[branch]);
913 }
914 
926 static int __devinit i7300_get_devices(struct mem_ctl_info *mci)
927 {
928  struct i7300_pvt *pvt;
929  struct pci_dev *pdev;
930 
931  pvt = mci->pvt_info;
932 
933  /* Attempt to 'get' the MCH register we want */
934  pdev = NULL;
935  while (!pvt->pci_dev_16_1_fsb_addr_map ||
939  if (!pdev) {
940  /* End of list, leave */
942  "'system address,Process Bus' "
943  "device not found:"
944  "vendor 0x%x device 0x%x ERR funcs "
945  "(broken BIOS?)\n",
948  goto error;
949  }
950 
951  /* Store device 16 funcs 1 and 2 */
952  switch (PCI_FUNC(pdev->devfn)) {
953  case 1:
954  pvt->pci_dev_16_1_fsb_addr_map = pdev;
955  break;
956  case 2:
957  pvt->pci_dev_16_2_fsb_err_regs = pdev;
958  break;
959  }
960  }
961 
962  edac_dbg(1, "System Address, processor bus- PCI Bus ID: %s %x:%x\n",
963  pci_name(pvt->pci_dev_16_0_fsb_ctlr),
964  pvt->pci_dev_16_0_fsb_ctlr->vendor,
965  pvt->pci_dev_16_0_fsb_ctlr->device);
966  edac_dbg(1, "Branchmap, control and errors - PCI Bus ID: %s %x:%x\n",
967  pci_name(pvt->pci_dev_16_1_fsb_addr_map),
968  pvt->pci_dev_16_1_fsb_addr_map->vendor,
969  pvt->pci_dev_16_1_fsb_addr_map->device);
970  edac_dbg(1, "FSB Error Regs - PCI Bus ID: %s %x:%x\n",
971  pci_name(pvt->pci_dev_16_2_fsb_err_regs),
972  pvt->pci_dev_16_2_fsb_err_regs->vendor,
973  pvt->pci_dev_16_2_fsb_err_regs->device);
974 
977  NULL);
978  if (!pvt->pci_dev_2x_0_fbd_branch[0]) {
980  "MC: 'BRANCH 0' device not found:"
981  "vendor 0x%x device 0x%x Func 0 (broken BIOS?)\n",
983  goto error;
984  }
985 
988  NULL);
989  if (!pvt->pci_dev_2x_0_fbd_branch[1]) {
991  "MC: 'BRANCH 1' device not found:"
992  "vendor 0x%x device 0x%x Func 0 "
993  "(broken BIOS?)\n",
996  goto error;
997  }
998 
999  return 0;
1000 
1001 error:
1002  i7300_put_devices(mci);
1003  return -ENODEV;
1004 }
1005 
1011 static int __devinit i7300_init_one(struct pci_dev *pdev,
1012  const struct pci_device_id *id)
1013 {
1014  struct mem_ctl_info *mci;
1015  struct edac_mc_layer layers[3];
1016  struct i7300_pvt *pvt;
1017  int rc;
1018 
1019  /* wake up device */
1020  rc = pci_enable_device(pdev);
1021  if (rc == -EIO)
1022  return rc;
1023 
1024  edac_dbg(0, "MC: pdev bus %u dev=0x%x fn=0x%x\n",
1025  pdev->bus->number,
1026  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1027 
1028  /* We only are looking for func 0 of the set */
1029  if (PCI_FUNC(pdev->devfn) != 0)
1030  return -ENODEV;
1031 
1032  /* allocate a new MC control structure */
1033  layers[0].type = EDAC_MC_LAYER_BRANCH;
1034  layers[0].size = MAX_BRANCHES;
1035  layers[0].is_virt_csrow = false;
1036  layers[1].type = EDAC_MC_LAYER_CHANNEL;
1037  layers[1].size = MAX_CH_PER_BRANCH;
1038  layers[1].is_virt_csrow = true;
1039  layers[2].type = EDAC_MC_LAYER_SLOT;
1040  layers[2].size = MAX_SLOTS;
1041  layers[2].is_virt_csrow = true;
1042  mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, sizeof(*pvt));
1043  if (mci == NULL)
1044  return -ENOMEM;
1045 
1046  edac_dbg(0, "MC: mci = %p\n", mci);
1047 
1048  mci->pdev = &pdev->dev; /* record ptr to the generic device */
1049 
1050  pvt = mci->pvt_info;
1051  pvt->pci_dev_16_0_fsb_ctlr = pdev; /* Record this device in our private */
1052 
1054  if (!pvt->tmp_prt_buffer) {
1055  edac_mc_free(mci);
1056  return -ENOMEM;
1057  }
1058 
1059  /* 'get' the pci devices we want to reserve for our use */
1060  if (i7300_get_devices(mci))
1061  goto fail0;
1062 
1063  mci->mc_idx = 0;
1064  mci->mtype_cap = MEM_FLAG_FB_DDR2;
1066  mci->edac_cap = EDAC_FLAG_NONE;
1067  mci->mod_name = "i7300_edac.c";
1068  mci->mod_ver = I7300_REVISION;
1069  mci->ctl_name = i7300_devs[0].ctl_name;
1070  mci->dev_name = pci_name(pdev);
1071  mci->ctl_page_to_phys = NULL;
1072 
1073  /* Set the function pointer to an actual operation function */
1074  mci->edac_check = i7300_check_error;
1075 
1076  /* initialize the MC control structure 'csrows' table
1077  * with the mapping and control information */
1078  if (i7300_get_mc_regs(mci)) {
1079  edac_dbg(0, "MC: Setting mci->edac_cap to EDAC_FLAG_NONE because i7300_init_csrows() returned nonzero value\n");
1080  mci->edac_cap = EDAC_FLAG_NONE; /* no csrows found */
1081  } else {
1082  edac_dbg(1, "MC: Enable error reporting now\n");
1083  i7300_enable_error_reporting(mci);
1084  }
1085 
1086  /* add this new MC control structure to EDAC's list of MCs */
1087  if (edac_mc_add_mc(mci)) {
1088  edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
1089  /* FIXME: perhaps some code should go here that disables error
1090  * reporting if we just enabled it
1091  */
1092  goto fail1;
1093  }
1094 
1095  i7300_clear_error(mci);
1096 
1097  /* allocating generic PCI control info */
1098  i7300_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1099  if (!i7300_pci) {
1101  "%s(): Unable to create PCI control\n",
1102  __func__);
1104  "%s(): PCI error report via EDAC not setup\n",
1105  __func__);
1106  }
1107 
1108  return 0;
1109 
1110  /* Error exit unwinding stack */
1111 fail1:
1112 
1113  i7300_put_devices(mci);
1114 
1115 fail0:
1116  kfree(pvt->tmp_prt_buffer);
1117  edac_mc_free(mci);
1118  return -ENODEV;
1119 }
1120 
1125 static void __devexit i7300_remove_one(struct pci_dev *pdev)
1126 {
1127  struct mem_ctl_info *mci;
1128  char *tmp;
1129 
1130  edac_dbg(0, "\n");
1131 
1132  if (i7300_pci)
1133  edac_pci_release_generic_ctl(i7300_pci);
1134 
1135  mci = edac_mc_del_mc(&pdev->dev);
1136  if (!mci)
1137  return;
1138 
1139  tmp = ((struct i7300_pvt *)mci->pvt_info)->tmp_prt_buffer;
1140 
1141  /* retrieve references to resources, and free those resources */
1142  i7300_put_devices(mci);
1143 
1144  kfree(tmp);
1145  edac_mc_free(mci);
1146 }
1147 
1148 /*
1149  * pci_device_id: table for which devices we are looking for
1150  *
1151  * Has only 8086:360c PCI ID
1152  */
1153 static DEFINE_PCI_DEVICE_TABLE(i7300_pci_tbl) = {
1155  {0,} /* 0 terminated list. */
1156 };
1157 
1158 MODULE_DEVICE_TABLE(pci, i7300_pci_tbl);
1159 
1160 /*
1161  * i7300_driver: pci_driver structure for this module
1162  */
1163 static struct pci_driver i7300_driver = {
1164  .name = "i7300_edac",
1165  .probe = i7300_init_one,
1166  .remove = __devexit_p(i7300_remove_one),
1167  .id_table = i7300_pci_tbl,
1168 };
1169 
1173 static int __init i7300_init(void)
1174 {
1175  int pci_rc;
1176 
1177  edac_dbg(2, "\n");
1178 
1179  /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1180  opstate_init();
1181 
1182  pci_rc = pci_register_driver(&i7300_driver);
1183 
1184  return (pci_rc < 0) ? pci_rc : 0;
1185 }
1186 
1190 static void __exit i7300_exit(void)
1191 {
1192  edac_dbg(2, "\n");
1193  pci_unregister_driver(&i7300_driver);
1194 }
1195 
1196 module_init(i7300_init);
1197 module_exit(i7300_exit);
1198 
1199 MODULE_LICENSE("GPL");
1200 MODULE_AUTHOR("Mauro Carvalho Chehab <[email protected]>");
1201 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1202 MODULE_DESCRIPTION("MC Driver for Intel I7300 memory controllers - "
1203  I7300_REVISION);
1204 
1205 module_param(edac_op_state, int, 0444);
1206 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");