Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
i7core_edac.c
Go to the documentation of this file.
1 /* Intel i7 core/Nehalem Memory Controller kernel module
2  *
3  * This driver supports the memory controllers found on the Intel
4  * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5  * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
6  * and Westmere-EP.
7  *
8  * This file may be distributed under the terms of the
9  * GNU General Public License version 2 only.
10  *
11  * Copyright (c) 2009-2010 by:
12  * Mauro Carvalho Chehab <[email protected]>
13  *
14  * Red Hat Inc. http://www.redhat.com
15  *
16  * Forked and adapted from the i5400_edac driver
17  *
18  * Based on the following public Intel datasheets:
19  * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20  * Datasheet, Volume 2:
21  * http://download.intel.com/design/processor/datashts/320835.pdf
22  * Intel Xeon Processor 5500 Series Datasheet Volume 2
23  * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
24  * also available at:
25  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
26  */
27 
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/dmi.h>
35 #include <linux/edac.h>
36 #include <linux/mmzone.h>
37 #include <linux/smp.h>
38 #include <asm/mce.h>
39 #include <asm/processor.h>
40 #include <asm/div64.h>
41 
42 #include "edac_core.h"
43 
44 /* Static vars */
45 static LIST_HEAD(i7core_edac_list);
46 static DEFINE_MUTEX(i7core_edac_lock);
47 static int probed;
48 
49 static int use_pci_fixup;
50 module_param(use_pci_fixup, int, 0444);
51 MODULE_PARM_DESC(use_pci_fixup, "Enable PCI fixup to seek for hidden devices");
52 /*
53  * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
54  * registers start at bus 255, and are not reported by BIOS.
55  * We currently find devices with only 2 sockets. In order to support more QPI
56  * Quick Path Interconnect, just increment this number.
57  */
58 #define MAX_SOCKET_BUSES 2
59 
60 
61 /*
62  * Alter this version for the module when modifications are made
63  */
64 #define I7CORE_REVISION " Ver: 1.0.0"
65 #define EDAC_MOD_STR "i7core_edac"
66 
67 /*
68  * Debug macros
69  */
70 #define i7core_printk(level, fmt, arg...) \
71  edac_printk(level, "i7core", fmt, ##arg)
72 
73 #define i7core_mc_printk(mci, level, fmt, arg...) \
74  edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75 
76 /*
77  * i7core Memory Controller Registers
78  */
79 
80  /* OFFSETS for Device 0 Function 0 */
81 
82 #define MC_CFG_CONTROL 0x90
83  #define MC_CFG_UNLOCK 0x02
84  #define MC_CFG_LOCK 0x00
85 
86  /* OFFSETS for Device 3 Function 0 */
87 
88 #define MC_CONTROL 0x48
89 #define MC_STATUS 0x4c
90 #define MC_MAX_DOD 0x64
91 
92 /*
93  * OFFSETS for Device 3 Function 4, as indicated on Xeon 5500 datasheet:
94  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
95  */
96 
97 #define MC_TEST_ERR_RCV1 0x60
98  #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
99 
100 #define MC_TEST_ERR_RCV0 0x64
101  #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
102  #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
103 
104 /* OFFSETS for Device 3 Function 2, as indicated on Xeon 5500 datasheet */
105 #define MC_SSRCONTROL 0x48
106  #define SSR_MODE_DISABLE 0x00
107  #define SSR_MODE_ENABLE 0x01
108  #define SSR_MODE_MASK 0x03
109 
110 #define MC_SCRUB_CONTROL 0x4c
111  #define STARTSCRUB (1 << 24)
112  #define SCRUBINTERVAL_MASK 0xffffff
113 
114 #define MC_COR_ECC_CNT_0 0x80
115 #define MC_COR_ECC_CNT_1 0x84
116 #define MC_COR_ECC_CNT_2 0x88
117 #define MC_COR_ECC_CNT_3 0x8c
118 #define MC_COR_ECC_CNT_4 0x90
119 #define MC_COR_ECC_CNT_5 0x94
120 
121 #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
122 #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
123 
124 
125  /* OFFSETS for Devices 4,5 and 6 Function 0 */
126 
127 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
128  #define THREE_DIMMS_PRESENT (1 << 24)
129  #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
130  #define QUAD_RANK_PRESENT (1 << 22)
131  #define REGISTERED_DIMM (1 << 15)
132 
133 #define MC_CHANNEL_MAPPER 0x60
134  #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
135  #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
136 
137 #define MC_CHANNEL_RANK_PRESENT 0x7c
138  #define RANK_PRESENT_MASK 0xffff
139 
140 #define MC_CHANNEL_ADDR_MATCH 0xf0
141 #define MC_CHANNEL_ERROR_MASK 0xf8
142 #define MC_CHANNEL_ERROR_INJECT 0xfc
143  #define INJECT_ADDR_PARITY 0x10
144  #define INJECT_ECC 0x08
145  #define MASK_CACHELINE 0x06
146  #define MASK_FULL_CACHELINE 0x06
147  #define MASK_MSB32_CACHELINE 0x04
148  #define MASK_LSB32_CACHELINE 0x02
149  #define NO_MASK_CACHELINE 0x00
150  #define REPEAT_EN 0x01
151 
152  /* OFFSETS for Devices 4,5 and 6 Function 1 */
153 
154 #define MC_DOD_CH_DIMM0 0x48
155 #define MC_DOD_CH_DIMM1 0x4c
156 #define MC_DOD_CH_DIMM2 0x50
157  #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
158  #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
159  #define DIMM_PRESENT_MASK (1 << 9)
160  #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
161  #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
162  #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
163  #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
164  #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
165  #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
166  #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
167  #define MC_DOD_NUMCOL_MASK 3
168  #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
169 
170 #define MC_RANK_PRESENT 0x7c
171 
172 #define MC_SAG_CH_0 0x80
173 #define MC_SAG_CH_1 0x84
174 #define MC_SAG_CH_2 0x88
175 #define MC_SAG_CH_3 0x8c
176 #define MC_SAG_CH_4 0x90
177 #define MC_SAG_CH_5 0x94
178 #define MC_SAG_CH_6 0x98
179 #define MC_SAG_CH_7 0x9c
180 
181 #define MC_RIR_LIMIT_CH_0 0x40
182 #define MC_RIR_LIMIT_CH_1 0x44
183 #define MC_RIR_LIMIT_CH_2 0x48
184 #define MC_RIR_LIMIT_CH_3 0x4C
185 #define MC_RIR_LIMIT_CH_4 0x50
186 #define MC_RIR_LIMIT_CH_5 0x54
187 #define MC_RIR_LIMIT_CH_6 0x58
188 #define MC_RIR_LIMIT_CH_7 0x5C
189 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
190 
191 #define MC_RIR_WAY_CH 0x80
192  #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
193  #define MC_RIR_WAY_RANK_MASK 0x7
194 
195 /*
196  * i7core structs
197  */
198 
199 #define NUM_CHANS 3
200 #define MAX_DIMMS 3 /* Max DIMMS per channel */
201 #define MAX_MCR_FUNC 4
202 #define MAX_CHAN_FUNC 3
203 
204 struct i7core_info {
209 };
210 
211 
213  int enable;
214 
218 
219  /* Error address mask */
221 };
222 
226  bool has_4rank;
228 };
229 
230 struct pci_id_descr {
231  int dev;
232  int func;
233  int dev_id;
234  int optional;
235 };
236 
237 struct pci_id_table {
238  const struct pci_id_descr *descr;
239  int n_devs;
240 };
241 
242 struct i7core_dev {
243  struct list_head list;
245  struct pci_dev **pdev;
246  int n_devs;
247  struct mem_ctl_info *mci;
248 };
249 
250 struct i7core_pvt {
252 
256 
258 
262 
264 
265  /* ECC corrected errors counts per udimm */
266  unsigned long udimm_ce_count[MAX_DIMMS];
268  /* ECC corrected errors counts per rdimm */
271 
273 
274  /* Fifo double buffers */
277 
278  /* Fifo in/out counters */
279  unsigned mce_in, mce_out;
280 
281  /* Count indicator to show errors not got */
282  unsigned mce_overrun;
283 
284  /* DCLK Frequency used for computing scrub rate */
286 
287  /* Struct to control EDAC polling */
288  struct edac_pci_ctl_info *i7core_pci;
289 };
290 
291 #define PCI_DESCR(device, function, device_id) \
292  .dev = (device), \
293  .func = (function), \
294  .dev_id = (device_id)
295 
296 static const struct pci_id_descr pci_dev_descr_i7core_nehalem[] = {
297  /* Memory controller */
300  /* Exists only for RDIMM */
301  { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
303 
304  /* Channel 0 */
309 
310  /* Channel 1 */
315 
316  /* Channel 2 */
321 
322  /* Generic Non-core registers */
323  /*
324  * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
325  * On Xeon 55xx, however, it has a different id (8086:2c40). So,
326  * the probing code needs to test for the other address in case of
327  * failure of this one
328  */
330 
331 };
332 
333 static const struct pci_id_descr pci_dev_descr_lynnfield[] = {
337 
342 
347 
348  /*
349  * This is the PCI device has an alternate address on some
350  * processors like Core i7 860
351  */
353 };
354 
355 static const struct pci_id_descr pci_dev_descr_i7core_westmere[] = {
356  /* Memory controller */
359  /* Exists only for RDIMM */
360  { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2), .optional = 1 },
362 
363  /* Channel 0 */
368 
369  /* Channel 1 */
374 
375  /* Channel 2 */
380 
381  /* Generic Non-core registers */
383 
384 };
385 
386 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
387 static const struct pci_id_table pci_dev_table[] = {
388  PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem),
389  PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield),
390  PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere),
391  {0,} /* 0 terminated list. */
392 };
393 
394 /*
395  * pci_device_id table for which devices we are looking for
396  */
397 static DEFINE_PCI_DEVICE_TABLE(i7core_pci_tbl) = {
400  {0,} /* 0 terminated list. */
401 };
402 
403 /****************************************************************************
404  Ancillary status routines
405  ****************************************************************************/
406 
407  /* MC_CONTROL bits */
408 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
409 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
410 
411  /* MC_STATUS bits */
412 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
413 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
414 
415  /* MC_MAX_DOD read functions */
416 static inline int numdimms(u32 dimms)
417 {
418  return (dimms & 0x3) + 1;
419 }
420 
421 static inline int numrank(u32 rank)
422 {
423  static int ranks[4] = { 1, 2, 4, -EINVAL };
424 
425  return ranks[rank & 0x3];
426 }
427 
428 static inline int numbank(u32 bank)
429 {
430  static int banks[4] = { 4, 8, 16, -EINVAL };
431 
432  return banks[bank & 0x3];
433 }
434 
435 static inline int numrow(u32 row)
436 {
437  static int rows[8] = {
438  1 << 12, 1 << 13, 1 << 14, 1 << 15,
439  1 << 16, -EINVAL, -EINVAL, -EINVAL,
440  };
441 
442  return rows[row & 0x7];
443 }
444 
445 static inline int numcol(u32 col)
446 {
447  static int cols[8] = {
448  1 << 10, 1 << 11, 1 << 12, -EINVAL,
449  };
450  return cols[col & 0x3];
451 }
452 
453 static struct i7core_dev *get_i7core_dev(u8 socket)
454 {
455  struct i7core_dev *i7core_dev;
456 
457  list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
458  if (i7core_dev->socket == socket)
459  return i7core_dev;
460  }
461 
462  return NULL;
463 }
464 
465 static struct i7core_dev *alloc_i7core_dev(u8 socket,
466  const struct pci_id_table *table)
467 {
468  struct i7core_dev *i7core_dev;
469 
470  i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
471  if (!i7core_dev)
472  return NULL;
473 
474  i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * table->n_devs,
475  GFP_KERNEL);
476  if (!i7core_dev->pdev) {
477  kfree(i7core_dev);
478  return NULL;
479  }
480 
481  i7core_dev->socket = socket;
482  i7core_dev->n_devs = table->n_devs;
483  list_add_tail(&i7core_dev->list, &i7core_edac_list);
484 
485  return i7core_dev;
486 }
487 
488 static void free_i7core_dev(struct i7core_dev *i7core_dev)
489 {
490  list_del(&i7core_dev->list);
491  kfree(i7core_dev->pdev);
492  kfree(i7core_dev);
493 }
494 
495 /****************************************************************************
496  Memory check routines
497  ****************************************************************************/
498 
499 static int get_dimm_config(struct mem_ctl_info *mci)
500 {
501  struct i7core_pvt *pvt = mci->pvt_info;
502  struct pci_dev *pdev;
503  int i, j;
504  enum edac_type mode;
505  enum mem_type mtype;
506  struct dimm_info *dimm;
507 
508  /* Get data from the MC register, function 0 */
509  pdev = pvt->pci_mcr[0];
510  if (!pdev)
511  return -ENODEV;
512 
513  /* Device 3 function 0 reads */
514  pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
515  pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
516  pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
517  pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
518 
519  edac_dbg(0, "QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
520  pvt->i7core_dev->socket, pvt->info.mc_control,
521  pvt->info.mc_status, pvt->info.max_dod, pvt->info.ch_map);
522 
523  if (ECC_ENABLED(pvt)) {
524  edac_dbg(0, "ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
525  if (ECCx8(pvt))
526  mode = EDAC_S8ECD8ED;
527  else
528  mode = EDAC_S4ECD4ED;
529  } else {
530  edac_dbg(0, "ECC disabled\n");
531  mode = EDAC_NONE;
532  }
533 
534  /* FIXME: need to handle the error codes */
535  edac_dbg(0, "DOD Max limits: DIMMS: %d, %d-ranked, %d-banked x%x x 0x%x\n",
536  numdimms(pvt->info.max_dod),
537  numrank(pvt->info.max_dod >> 2),
538  numbank(pvt->info.max_dod >> 4),
539  numrow(pvt->info.max_dod >> 6),
540  numcol(pvt->info.max_dod >> 9));
541 
542  for (i = 0; i < NUM_CHANS; i++) {
543  u32 data, dimm_dod[3], value[8];
544 
545  if (!pvt->pci_ch[i][0])
546  continue;
547 
548  if (!CH_ACTIVE(pvt, i)) {
549  edac_dbg(0, "Channel %i is not active\n", i);
550  continue;
551  }
552  if (CH_DISABLED(pvt, i)) {
553  edac_dbg(0, "Channel %i is disabled\n", i);
554  continue;
555  }
556 
557  /* Devices 4-6 function 0 */
558  pci_read_config_dword(pvt->pci_ch[i][0],
560 
561 
562  if (data & THREE_DIMMS_PRESENT)
563  pvt->channel[i].is_3dimms_present = true;
564 
565  if (data & SINGLE_QUAD_RANK_PRESENT)
566  pvt->channel[i].is_single_4rank = true;
567 
568  if (data & QUAD_RANK_PRESENT)
569  pvt->channel[i].has_4rank = true;
570 
571  if (data & REGISTERED_DIMM)
572  mtype = MEM_RDDR3;
573  else
574  mtype = MEM_DDR3;
575 
576  /* Devices 4-6 function 1 */
577  pci_read_config_dword(pvt->pci_ch[i][1],
578  MC_DOD_CH_DIMM0, &dimm_dod[0]);
579  pci_read_config_dword(pvt->pci_ch[i][1],
580  MC_DOD_CH_DIMM1, &dimm_dod[1]);
581  pci_read_config_dword(pvt->pci_ch[i][1],
582  MC_DOD_CH_DIMM2, &dimm_dod[2]);
583 
584  edac_dbg(0, "Ch%d phy rd%d, wr%d (0x%08x): %s%s%s%cDIMMs\n",
585  i,
586  RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
587  data,
588  pvt->channel[i].is_3dimms_present ? "3DIMMS " : "",
589  pvt->channel[i].is_3dimms_present ? "SINGLE_4R " : "",
590  pvt->channel[i].has_4rank ? "HAS_4R " : "",
591  (data & REGISTERED_DIMM) ? 'R' : 'U');
592 
593  for (j = 0; j < 3; j++) {
594  u32 banks, ranks, rows, cols;
595  u32 size, npages;
596 
597  if (!DIMM_PRESENT(dimm_dod[j]))
598  continue;
599 
600  dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers,
601  i, j, 0);
602  banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
603  ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
604  rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
605  cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
606 
607  /* DDR3 has 8 I/O banks */
608  size = (rows * cols * banks * ranks) >> (20 - 3);
609 
610  edac_dbg(0, "\tdimm %d %d Mb offset: %x, bank: %d, rank: %d, row: %#x, col: %#x\n",
611  j, size,
612  RANKOFFSET(dimm_dod[j]),
613  banks, ranks, rows, cols);
614 
615  npages = MiB_TO_PAGES(size);
616 
617  dimm->nr_pages = npages;
618 
619  switch (banks) {
620  case 4:
621  dimm->dtype = DEV_X4;
622  break;
623  case 8:
624  dimm->dtype = DEV_X8;
625  break;
626  case 16:
627  dimm->dtype = DEV_X16;
628  break;
629  default:
630  dimm->dtype = DEV_UNKNOWN;
631  }
632 
633  snprintf(dimm->label, sizeof(dimm->label),
634  "CPU#%uChannel#%u_DIMM#%u",
635  pvt->i7core_dev->socket, i, j);
636  dimm->grain = 8;
637  dimm->edac_mode = mode;
638  dimm->mtype = mtype;
639  }
640 
641  pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
642  pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
643  pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
644  pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
645  pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
646  pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
647  pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
648  pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
649  edac_dbg(1, "\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
650  for (j = 0; j < 8; j++)
651  edac_dbg(1, "\t\t%#x\t%#x\t%#x\n",
652  (value[j] >> 27) & 0x1,
653  (value[j] >> 24) & 0x7,
654  (value[j] & ((1 << 24) - 1)));
655  }
656 
657  return 0;
658 }
659 
660 /****************************************************************************
661  Error insertion routines
662  ****************************************************************************/
663 
664 #define to_mci(k) container_of(k, struct mem_ctl_info, dev)
665 
666 /* The i7core has independent error injection features per channel.
667  However, to have a simpler code, we don't allow enabling error injection
668  on more than one channel.
669  Also, since a change at an inject parameter will be applied only at enable,
670  we're disabling error injection on all write calls to the sysfs nodes that
671  controls the error code injection.
672  */
673 static int disable_inject(const struct mem_ctl_info *mci)
674 {
675  struct i7core_pvt *pvt = mci->pvt_info;
676 
677  pvt->inject.enable = 0;
678 
679  if (!pvt->pci_ch[pvt->inject.channel][0])
680  return -ENODEV;
681 
682  pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
684 
685  return 0;
686 }
687 
688 /*
689  * i7core inject inject.section
690  *
691  * accept and store error injection inject.section value
692  * bit 0 - refers to the lower 32-byte half cacheline
693  * bit 1 - refers to the upper 32-byte half cacheline
694  */
695 static ssize_t i7core_inject_section_store(struct device *dev,
696  struct device_attribute *mattr,
697  const char *data, size_t count)
698 {
699  struct mem_ctl_info *mci = to_mci(dev);
700  struct i7core_pvt *pvt = mci->pvt_info;
701  unsigned long value;
702  int rc;
703 
704  if (pvt->inject.enable)
705  disable_inject(mci);
706 
707  rc = strict_strtoul(data, 10, &value);
708  if ((rc < 0) || (value > 3))
709  return -EIO;
710 
711  pvt->inject.section = (u32) value;
712  return count;
713 }
714 
715 static ssize_t i7core_inject_section_show(struct device *dev,
716  struct device_attribute *mattr,
717  char *data)
718 {
719  struct mem_ctl_info *mci = to_mci(dev);
720  struct i7core_pvt *pvt = mci->pvt_info;
721  return sprintf(data, "0x%08x\n", pvt->inject.section);
722 }
723 
724 /*
725  * i7core inject.type
726  *
727  * accept and store error injection inject.section value
728  * bit 0 - repeat enable - Enable error repetition
729  * bit 1 - inject ECC error
730  * bit 2 - inject parity error
731  */
732 static ssize_t i7core_inject_type_store(struct device *dev,
733  struct device_attribute *mattr,
734  const char *data, size_t count)
735 {
736  struct mem_ctl_info *mci = to_mci(dev);
737 struct i7core_pvt *pvt = mci->pvt_info;
738  unsigned long value;
739  int rc;
740 
741  if (pvt->inject.enable)
742  disable_inject(mci);
743 
744  rc = strict_strtoul(data, 10, &value);
745  if ((rc < 0) || (value > 7))
746  return -EIO;
747 
748  pvt->inject.type = (u32) value;
749  return count;
750 }
751 
752 static ssize_t i7core_inject_type_show(struct device *dev,
753  struct device_attribute *mattr,
754  char *data)
755 {
756  struct mem_ctl_info *mci = to_mci(dev);
757  struct i7core_pvt *pvt = mci->pvt_info;
758 
759  return sprintf(data, "0x%08x\n", pvt->inject.type);
760 }
761 
762 /*
763  * i7core_inject_inject.eccmask_store
764  *
765  * The type of error (UE/CE) will depend on the inject.eccmask value:
766  * Any bits set to a 1 will flip the corresponding ECC bit
767  * Correctable errors can be injected by flipping 1 bit or the bits within
768  * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
769  * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
770  * uncorrectable error to be injected.
771  */
772 static ssize_t i7core_inject_eccmask_store(struct device *dev,
773  struct device_attribute *mattr,
774  const char *data, size_t count)
775 {
776  struct mem_ctl_info *mci = to_mci(dev);
777  struct i7core_pvt *pvt = mci->pvt_info;
778  unsigned long value;
779  int rc;
780 
781  if (pvt->inject.enable)
782  disable_inject(mci);
783 
784  rc = strict_strtoul(data, 10, &value);
785  if (rc < 0)
786  return -EIO;
787 
788  pvt->inject.eccmask = (u32) value;
789  return count;
790 }
791 
792 static ssize_t i7core_inject_eccmask_show(struct device *dev,
793  struct device_attribute *mattr,
794  char *data)
795 {
796  struct mem_ctl_info *mci = to_mci(dev);
797  struct i7core_pvt *pvt = mci->pvt_info;
798 
799  return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
800 }
801 
802 /*
803  * i7core_addrmatch
804  *
805  * The type of error (UE/CE) will depend on the inject.eccmask value:
806  * Any bits set to a 1 will flip the corresponding ECC bit
807  * Correctable errors can be injected by flipping 1 bit or the bits within
808  * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
809  * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
810  * uncorrectable error to be injected.
811  */
812 
813 #define DECLARE_ADDR_MATCH(param, limit) \
814 static ssize_t i7core_inject_store_##param( \
815  struct device *dev, \
816  struct device_attribute *mattr, \
817  const char *data, size_t count) \
818 { \
819  struct mem_ctl_info *mci = dev_get_drvdata(dev); \
820  struct i7core_pvt *pvt; \
821  long value; \
822  int rc; \
823  \
824  edac_dbg(1, "\n"); \
825  pvt = mci->pvt_info; \
826  \
827  if (pvt->inject.enable) \
828  disable_inject(mci); \
829  \
830  if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
831  value = -1; \
832  else { \
833  rc = strict_strtoul(data, 10, &value); \
834  if ((rc < 0) || (value >= limit)) \
835  return -EIO; \
836  } \
837  \
838  pvt->inject.param = value; \
839  \
840  return count; \
841 } \
842  \
843 static ssize_t i7core_inject_show_##param( \
844  struct device *dev, \
845  struct device_attribute *mattr, \
846  char *data) \
847 { \
848  struct mem_ctl_info *mci = dev_get_drvdata(dev); \
849  struct i7core_pvt *pvt; \
850  \
851  pvt = mci->pvt_info; \
852  edac_dbg(1, "pvt=%p\n", pvt); \
853  if (pvt->inject.param < 0) \
854  return sprintf(data, "any\n"); \
855  else \
856  return sprintf(data, "%d\n", pvt->inject.param);\
857 }
858 
859 #define ATTR_ADDR_MATCH(param) \
860  static DEVICE_ATTR(param, S_IRUGO | S_IWUSR, \
861  i7core_inject_show_##param, \
862  i7core_inject_store_##param)
863 
865 DECLARE_ADDR_MATCH(dimm, 3);
866 DECLARE_ADDR_MATCH(rank, 4);
867 DECLARE_ADDR_MATCH(bank, 32);
868 DECLARE_ADDR_MATCH(page, 0x10000);
869 DECLARE_ADDR_MATCH(col, 0x4000);
870 
872 ATTR_ADDR_MATCH(dimm);
873 ATTR_ADDR_MATCH(rank);
874 ATTR_ADDR_MATCH(bank);
876 ATTR_ADDR_MATCH(col);
877 
878 static int write_and_test(struct pci_dev *dev, const int where, const u32 val)
879 {
880  u32 read;
881  int count;
882 
883  edac_dbg(0, "setting pci %02x:%02x.%x reg=%02x value=%08x\n",
884  dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
885  where, val);
886 
887  for (count = 0; count < 10; count++) {
888  if (count)
889  msleep(100);
890  pci_write_config_dword(dev, where, val);
891  pci_read_config_dword(dev, where, &read);
892 
893  if (read == val)
894  return 0;
895  }
896 
897  i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
898  "write=%08x. Read=%08x\n",
899  dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
900  where, val, read);
901 
902  return -EINVAL;
903 }
904 
905 /*
906  * This routine prepares the Memory Controller for error injection.
907  * The error will be injected when some process tries to write to the
908  * memory that matches the given criteria.
909  * The criteria can be set in terms of a mask where dimm, rank, bank, page
910  * and col can be specified.
911  * A -1 value for any of the mask items will make the MCU to ignore
912  * that matching criteria for error injection.
913  *
914  * It should be noticed that the error will only happen after a write operation
915  * on a memory that matches the condition. if REPEAT_EN is not enabled at
916  * inject mask, then it will produce just one error. Otherwise, it will repeat
917  * until the injectmask would be cleaned.
918  *
919  * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
920  * is reliable enough to check if the MC is using the
921  * three channels. However, this is not clear at the datasheet.
922  */
923 static ssize_t i7core_inject_enable_store(struct device *dev,
924  struct device_attribute *mattr,
925  const char *data, size_t count)
926 {
927  struct mem_ctl_info *mci = to_mci(dev);
928  struct i7core_pvt *pvt = mci->pvt_info;
929  u32 injectmask;
930  u64 mask = 0;
931  int rc;
932  long enable;
933 
934  if (!pvt->pci_ch[pvt->inject.channel][0])
935  return 0;
936 
937  rc = strict_strtoul(data, 10, &enable);
938  if ((rc < 0))
939  return 0;
940 
941  if (enable) {
942  pvt->inject.enable = 1;
943  } else {
944  disable_inject(mci);
945  return count;
946  }
947 
948  /* Sets pvt->inject.dimm mask */
949  if (pvt->inject.dimm < 0)
950  mask |= 1LL << 41;
951  else {
952  if (pvt->channel[pvt->inject.channel].dimms > 2)
953  mask |= (pvt->inject.dimm & 0x3LL) << 35;
954  else
955  mask |= (pvt->inject.dimm & 0x1LL) << 36;
956  }
957 
958  /* Sets pvt->inject.rank mask */
959  if (pvt->inject.rank < 0)
960  mask |= 1LL << 40;
961  else {
962  if (pvt->channel[pvt->inject.channel].dimms > 2)
963  mask |= (pvt->inject.rank & 0x1LL) << 34;
964  else
965  mask |= (pvt->inject.rank & 0x3LL) << 34;
966  }
967 
968  /* Sets pvt->inject.bank mask */
969  if (pvt->inject.bank < 0)
970  mask |= 1LL << 39;
971  else
972  mask |= (pvt->inject.bank & 0x15LL) << 30;
973 
974  /* Sets pvt->inject.page mask */
975  if (pvt->inject.page < 0)
976  mask |= 1LL << 38;
977  else
978  mask |= (pvt->inject.page & 0xffff) << 14;
979 
980  /* Sets pvt->inject.column mask */
981  if (pvt->inject.col < 0)
982  mask |= 1LL << 37;
983  else
984  mask |= (pvt->inject.col & 0x3fff);
985 
986  /*
987  * bit 0: REPEAT_EN
988  * bits 1-2: MASK_HALF_CACHELINE
989  * bit 3: INJECT_ECC
990  * bit 4: INJECT_ADDR_PARITY
991  */
992 
993  injectmask = (pvt->inject.type & 1) |
994  (pvt->inject.section & 0x3) << 1 |
995  (pvt->inject.type & 0x6) << (3 - 1);
996 
997  /* Unlock writes to registers - this register is write only */
998  pci_write_config_dword(pvt->pci_noncore,
999  MC_CFG_CONTROL, 0x2);
1000 
1001  write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1002  MC_CHANNEL_ADDR_MATCH, mask);
1003  write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1004  MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1005 
1006  write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1007  MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1008 
1009  write_and_test(pvt->pci_ch[pvt->inject.channel][0],
1010  MC_CHANNEL_ERROR_INJECT, injectmask);
1011 
1012  /*
1013  * This is something undocumented, based on my tests
1014  * Without writing 8 to this register, errors aren't injected. Not sure
1015  * why.
1016  */
1017  pci_write_config_dword(pvt->pci_noncore,
1018  MC_CFG_CONTROL, 8);
1019 
1020  edac_dbg(0, "Error inject addr match 0x%016llx, ecc 0x%08x, inject 0x%08x\n",
1021  mask, pvt->inject.eccmask, injectmask);
1022 
1023 
1024  return count;
1025 }
1026 
1027 static ssize_t i7core_inject_enable_show(struct device *dev,
1028  struct device_attribute *mattr,
1029  char *data)
1030 {
1031  struct mem_ctl_info *mci = to_mci(dev);
1032  struct i7core_pvt *pvt = mci->pvt_info;
1033  u32 injectmask;
1034 
1035  if (!pvt->pci_ch[pvt->inject.channel][0])
1036  return 0;
1037 
1038  pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1039  MC_CHANNEL_ERROR_INJECT, &injectmask);
1040 
1041  edac_dbg(0, "Inject error read: 0x%018x\n", injectmask);
1042 
1043  if (injectmask & 0x0c)
1044  pvt->inject.enable = 1;
1045 
1046  return sprintf(data, "%d\n", pvt->inject.enable);
1047 }
1048 
1049 #define DECLARE_COUNTER(param) \
1050 static ssize_t i7core_show_counter_##param( \
1051  struct device *dev, \
1052  struct device_attribute *mattr, \
1053  char *data) \
1054 { \
1055  struct mem_ctl_info *mci = dev_get_drvdata(dev); \
1056  struct i7core_pvt *pvt = mci->pvt_info; \
1057  \
1058  edac_dbg(1, "\n"); \
1059  if (!pvt->ce_count_available || (pvt->is_registered)) \
1060  return sprintf(data, "data unavailable\n"); \
1061  return sprintf(data, "%lu\n", \
1062  pvt->udimm_ce_count[param]); \
1063 }
1064 
1065 #define ATTR_COUNTER(param) \
1066  static DEVICE_ATTR(udimm##param, S_IRUGO | S_IWUSR, \
1067  i7core_show_counter_##param, \
1068  NULL)
1069 
1070 DECLARE_COUNTER(0);
1071 DECLARE_COUNTER(1);
1072 DECLARE_COUNTER(2);
1073 
1074 ATTR_COUNTER(0);
1075 ATTR_COUNTER(1);
1076 ATTR_COUNTER(2);
1077 
1078 /*
1079  * inject_addrmatch device sysfs struct
1080  */
1081 
1082 static struct attribute *i7core_addrmatch_attrs[] = {
1083  &dev_attr_channel.attr,
1084  &dev_attr_dimm.attr,
1085  &dev_attr_rank.attr,
1086  &dev_attr_bank.attr,
1087  &dev_attr_page.attr,
1088  &dev_attr_col.attr,
1089  NULL
1090 };
1091 
1092 static struct attribute_group addrmatch_grp = {
1093  .attrs = i7core_addrmatch_attrs,
1094 };
1095 
1096 static const struct attribute_group *addrmatch_groups[] = {
1097  &addrmatch_grp,
1098  NULL
1099 };
1100 
1101 static void addrmatch_release(struct device *device)
1102 {
1103  edac_dbg(1, "Releasing device %s\n", dev_name(device));
1104  kfree(device);
1105 }
1106 
1107 static struct device_type addrmatch_type = {
1108  .groups = addrmatch_groups,
1109  .release = addrmatch_release,
1110 };
1111 
1112 /*
1113  * all_channel_counts sysfs struct
1114  */
1115 
1116 static struct attribute *i7core_udimm_counters_attrs[] = {
1117  &dev_attr_udimm0.attr,
1118  &dev_attr_udimm1.attr,
1119  &dev_attr_udimm2.attr,
1120  NULL
1121 };
1122 
1123 static struct attribute_group all_channel_counts_grp = {
1124  .attrs = i7core_udimm_counters_attrs,
1125 };
1126 
1127 static const struct attribute_group *all_channel_counts_groups[] = {
1128  &all_channel_counts_grp,
1129  NULL
1130 };
1131 
1132 static void all_channel_counts_release(struct device *device)
1133 {
1134  edac_dbg(1, "Releasing device %s\n", dev_name(device));
1135  kfree(device);
1136 }
1137 
1138 static struct device_type all_channel_counts_type = {
1139  .groups = all_channel_counts_groups,
1140  .release = all_channel_counts_release,
1141 };
1142 
1143 /*
1144  * inject sysfs attributes
1145  */
1146 
1147 static DEVICE_ATTR(inject_section, S_IRUGO | S_IWUSR,
1148  i7core_inject_section_show, i7core_inject_section_store);
1149 
1150 static DEVICE_ATTR(inject_type, S_IRUGO | S_IWUSR,
1151  i7core_inject_type_show, i7core_inject_type_store);
1152 
1153 
1154 static DEVICE_ATTR(inject_eccmask, S_IRUGO | S_IWUSR,
1155  i7core_inject_eccmask_show, i7core_inject_eccmask_store);
1156 
1157 static DEVICE_ATTR(inject_enable, S_IRUGO | S_IWUSR,
1158  i7core_inject_enable_show, i7core_inject_enable_store);
1159 
1160 static int i7core_create_sysfs_devices(struct mem_ctl_info *mci)
1161 {
1162  struct i7core_pvt *pvt = mci->pvt_info;
1163  int rc;
1164 
1165  rc = device_create_file(&mci->dev, &dev_attr_inject_section);
1166  if (rc < 0)
1167  return rc;
1168  rc = device_create_file(&mci->dev, &dev_attr_inject_type);
1169  if (rc < 0)
1170  return rc;
1171  rc = device_create_file(&mci->dev, &dev_attr_inject_eccmask);
1172  if (rc < 0)
1173  return rc;
1174  rc = device_create_file(&mci->dev, &dev_attr_inject_enable);
1175  if (rc < 0)
1176  return rc;
1177 
1178  pvt->addrmatch_dev = kzalloc(sizeof(*pvt->addrmatch_dev), GFP_KERNEL);
1179  if (!pvt->addrmatch_dev)
1180  return rc;
1181 
1182  pvt->addrmatch_dev->type = &addrmatch_type;
1183  pvt->addrmatch_dev->bus = mci->dev.bus;
1185  pvt->addrmatch_dev->parent = &mci->dev;
1186  dev_set_name(pvt->addrmatch_dev, "inject_addrmatch");
1187  dev_set_drvdata(pvt->addrmatch_dev, mci);
1188 
1189  edac_dbg(1, "creating %s\n", dev_name(pvt->addrmatch_dev));
1190 
1191  rc = device_add(pvt->addrmatch_dev);
1192  if (rc < 0)
1193  return rc;
1194 
1195  if (!pvt->is_registered) {
1196  pvt->chancounts_dev = kzalloc(sizeof(*pvt->chancounts_dev),
1197  GFP_KERNEL);
1198  if (!pvt->chancounts_dev) {
1199  put_device(pvt->addrmatch_dev);
1200  device_del(pvt->addrmatch_dev);
1201  return rc;
1202  }
1203 
1204  pvt->chancounts_dev->type = &all_channel_counts_type;
1205  pvt->chancounts_dev->bus = mci->dev.bus;
1207  pvt->chancounts_dev->parent = &mci->dev;
1208  dev_set_name(pvt->chancounts_dev, "all_channel_counts");
1209  dev_set_drvdata(pvt->chancounts_dev, mci);
1210 
1211  edac_dbg(1, "creating %s\n", dev_name(pvt->chancounts_dev));
1212 
1213  rc = device_add(pvt->chancounts_dev);
1214  if (rc < 0)
1215  return rc;
1216  }
1217  return 0;
1218 }
1219 
1220 static void i7core_delete_sysfs_devices(struct mem_ctl_info *mci)
1221 {
1222  struct i7core_pvt *pvt = mci->pvt_info;
1223 
1224  edac_dbg(1, "\n");
1225 
1226  device_remove_file(&mci->dev, &dev_attr_inject_section);
1227  device_remove_file(&mci->dev, &dev_attr_inject_type);
1228  device_remove_file(&mci->dev, &dev_attr_inject_eccmask);
1229  device_remove_file(&mci->dev, &dev_attr_inject_enable);
1230 
1231  if (!pvt->is_registered) {
1232  put_device(pvt->chancounts_dev);
1233  device_del(pvt->chancounts_dev);
1234  }
1235  put_device(pvt->addrmatch_dev);
1236  device_del(pvt->addrmatch_dev);
1237 }
1238 
1239 /****************************************************************************
1240  Device initialization routines: put/get, init/exit
1241  ****************************************************************************/
1242 
1243 /*
1244  * i7core_put_all_devices 'put' all the devices that we have
1245  * reserved via 'get'
1246  */
1247 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1248 {
1249  int i;
1250 
1251  edac_dbg(0, "\n");
1252  for (i = 0; i < i7core_dev->n_devs; i++) {
1253  struct pci_dev *pdev = i7core_dev->pdev[i];
1254  if (!pdev)
1255  continue;
1256  edac_dbg(0, "Removing dev %02x:%02x.%d\n",
1257  pdev->bus->number,
1258  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1259  pci_dev_put(pdev);
1260  }
1261 }
1262 
1263 static void i7core_put_all_devices(void)
1264 {
1265  struct i7core_dev *i7core_dev, *tmp;
1266 
1267  list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1268  i7core_put_devices(i7core_dev);
1269  free_i7core_dev(i7core_dev);
1270  }
1271 }
1272 
1273 static void __init i7core_xeon_pci_fixup(const struct pci_id_table *table)
1274 {
1275  struct pci_dev *pdev = NULL;
1276  int i;
1277 
1278  /*
1279  * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1280  * aren't announced by acpi. So, we need to use a legacy scan probing
1281  * to detect them
1282  */
1283  while (table && table->descr) {
1284  pdev = pci_get_device(PCI_VENDOR_ID_INTEL, table->descr[0].dev_id, NULL);
1285  if (unlikely(!pdev)) {
1286  for (i = 0; i < MAX_SOCKET_BUSES; i++)
1288  }
1289  pci_dev_put(pdev);
1290  table++;
1291  }
1292 }
1293 
1294 static unsigned i7core_pci_lastbus(void)
1295 {
1296  int last_bus = 0, bus;
1297  struct pci_bus *b = NULL;
1298 
1299  while ((b = pci_find_next_bus(b)) != NULL) {
1300  bus = b->number;
1301  edac_dbg(0, "Found bus %d\n", bus);
1302  if (bus > last_bus)
1303  last_bus = bus;
1304  }
1305 
1306  edac_dbg(0, "Last bus %d\n", last_bus);
1307 
1308  return last_bus;
1309 }
1310 
1311 /*
1312  * i7core_get_all_devices Find and perform 'get' operation on the MCH's
1313  * device/functions we want to reference for this driver
1314  *
1315  * Need to 'get' device 16 func 1 and func 2
1316  */
1317 static int i7core_get_onedevice(struct pci_dev **prev,
1318  const struct pci_id_table *table,
1319  const unsigned devno,
1320  const unsigned last_bus)
1321 {
1322  struct i7core_dev *i7core_dev;
1323  const struct pci_id_descr *dev_descr = &table->descr[devno];
1324 
1325  struct pci_dev *pdev = NULL;
1326  u8 bus = 0;
1327  u8 socket = 0;
1328 
1330  dev_descr->dev_id, *prev);
1331 
1332  /*
1333  * On Xeon 55xx, the Intel QuickPath Arch Generic Non-core regs
1334  * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1335  * to probe for the alternate address in case of failure
1336  */
1337  if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1340 
1341  if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_LYNNFIELD_NONCORE && !pdev)
1344  *prev);
1345 
1346  if (!pdev) {
1347  if (*prev) {
1348  *prev = pdev;
1349  return 0;
1350  }
1351 
1352  if (dev_descr->optional)
1353  return 0;
1354 
1355  if (devno == 0)
1356  return -ENODEV;
1357 
1359  "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1360  dev_descr->dev, dev_descr->func,
1361  PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1362 
1363  /* End of list, leave */
1364  return -ENODEV;
1365  }
1366  bus = pdev->bus->number;
1367 
1368  socket = last_bus - bus;
1369 
1370  i7core_dev = get_i7core_dev(socket);
1371  if (!i7core_dev) {
1372  i7core_dev = alloc_i7core_dev(socket, table);
1373  if (!i7core_dev) {
1374  pci_dev_put(pdev);
1375  return -ENOMEM;
1376  }
1377  }
1378 
1379  if (i7core_dev->pdev[devno]) {
1381  "Duplicated device for "
1382  "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1383  bus, dev_descr->dev, dev_descr->func,
1384  PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1385  pci_dev_put(pdev);
1386  return -ENODEV;
1387  }
1388 
1389  i7core_dev->pdev[devno] = pdev;
1390 
1391  /* Sanity check */
1392  if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1393  PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1395  "Device PCI ID %04x:%04x "
1396  "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1397  PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1398  bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1399  bus, dev_descr->dev, dev_descr->func);
1400  return -ENODEV;
1401  }
1402 
1403  /* Be sure that the device is enabled */
1404  if (unlikely(pci_enable_device(pdev) < 0)) {
1406  "Couldn't enable "
1407  "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1408  bus, dev_descr->dev, dev_descr->func,
1409  PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1410  return -ENODEV;
1411  }
1412 
1413  edac_dbg(0, "Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1414  socket, bus, dev_descr->dev,
1415  dev_descr->func,
1416  PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1417 
1418  /*
1419  * As stated on drivers/pci/search.c, the reference count for
1420  * @from is always decremented if it is not %NULL. So, as we need
1421  * to get all devices up to null, we need to do a get for the device
1422  */
1423  pci_dev_get(pdev);
1424 
1425  *prev = pdev;
1426 
1427  return 0;
1428 }
1429 
1430 static int i7core_get_all_devices(void)
1431 {
1432  int i, rc, last_bus;
1433  struct pci_dev *pdev = NULL;
1434  const struct pci_id_table *table = pci_dev_table;
1435 
1436  last_bus = i7core_pci_lastbus();
1437 
1438  while (table && table->descr) {
1439  for (i = 0; i < table->n_devs; i++) {
1440  pdev = NULL;
1441  do {
1442  rc = i7core_get_onedevice(&pdev, table, i,
1443  last_bus);
1444  if (rc < 0) {
1445  if (i == 0) {
1446  i = table->n_devs;
1447  break;
1448  }
1449  i7core_put_all_devices();
1450  return -ENODEV;
1451  }
1452  } while (pdev);
1453  }
1454  table++;
1455  }
1456 
1457  return 0;
1458 }
1459 
1460 static int mci_bind_devs(struct mem_ctl_info *mci,
1461  struct i7core_dev *i7core_dev)
1462 {
1463  struct i7core_pvt *pvt = mci->pvt_info;
1464  struct pci_dev *pdev;
1465  int i, func, slot;
1466  char *family;
1467 
1468  pvt->is_registered = false;
1469  pvt->enable_scrub = false;
1470  for (i = 0; i < i7core_dev->n_devs; i++) {
1471  pdev = i7core_dev->pdev[i];
1472  if (!pdev)
1473  continue;
1474 
1475  func = PCI_FUNC(pdev->devfn);
1476  slot = PCI_SLOT(pdev->devfn);
1477  if (slot == 3) {
1478  if (unlikely(func > MAX_MCR_FUNC))
1479  goto error;
1480  pvt->pci_mcr[func] = pdev;
1481  } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1482  if (unlikely(func > MAX_CHAN_FUNC))
1483  goto error;
1484  pvt->pci_ch[slot - 4][func] = pdev;
1485  } else if (!slot && !func) {
1486  pvt->pci_noncore = pdev;
1487 
1488  /* Detect the processor family */
1489  switch (pdev->device) {
1491  family = "Xeon 35xx/ i7core";
1492  pvt->enable_scrub = false;
1493  break;
1495  family = "i7-800/i5-700";
1496  pvt->enable_scrub = false;
1497  break;
1499  family = "Xeon 34xx";
1500  pvt->enable_scrub = false;
1501  break;
1503  family = "Xeon 55xx";
1504  pvt->enable_scrub = true;
1505  break;
1507  family = "Xeon 56xx / i7-900";
1508  pvt->enable_scrub = true;
1509  break;
1510  default:
1511  family = "unknown";
1512  pvt->enable_scrub = false;
1513  }
1514  edac_dbg(0, "Detected a processor type %s\n", family);
1515  } else
1516  goto error;
1517 
1518  edac_dbg(0, "Associated fn %d.%d, dev = %p, socket %d\n",
1519  PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1520  pdev, i7core_dev->socket);
1521 
1522  if (PCI_SLOT(pdev->devfn) == 3 &&
1523  PCI_FUNC(pdev->devfn) == 2)
1524  pvt->is_registered = true;
1525  }
1526 
1527  return 0;
1528 
1529 error:
1530  i7core_printk(KERN_ERR, "Device %d, function %d "
1531  "is out of the expected range\n",
1532  slot, func);
1533  return -EINVAL;
1534 }
1535 
1536 /****************************************************************************
1537  Error check routines
1538  ****************************************************************************/
1539 
1540 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1541  const int chan,
1542  const int new0,
1543  const int new1,
1544  const int new2)
1545 {
1546  struct i7core_pvt *pvt = mci->pvt_info;
1547  int add0 = 0, add1 = 0, add2 = 0;
1548  /* Updates CE counters if it is not the first time here */
1549  if (pvt->ce_count_available) {
1550  /* Updates CE counters */
1551 
1552  add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1553  add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1554  add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1555 
1556  if (add2 < 0)
1557  add2 += 0x7fff;
1558  pvt->rdimm_ce_count[chan][2] += add2;
1559 
1560  if (add1 < 0)
1561  add1 += 0x7fff;
1562  pvt->rdimm_ce_count[chan][1] += add1;
1563 
1564  if (add0 < 0)
1565  add0 += 0x7fff;
1566  pvt->rdimm_ce_count[chan][0] += add0;
1567  } else
1568  pvt->ce_count_available = 1;
1569 
1570  /* Store the new values */
1571  pvt->rdimm_last_ce_count[chan][2] = new2;
1572  pvt->rdimm_last_ce_count[chan][1] = new1;
1573  pvt->rdimm_last_ce_count[chan][0] = new0;
1574 
1575  /*updated the edac core */
1576  if (add0 != 0)
1578  0, 0, 0,
1579  chan, 0, -1, "error", "");
1580  if (add1 != 0)
1582  0, 0, 0,
1583  chan, 1, -1, "error", "");
1584  if (add2 != 0)
1586  0, 0, 0,
1587  chan, 2, -1, "error", "");
1588 }
1589 
1590 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1591 {
1592  struct i7core_pvt *pvt = mci->pvt_info;
1593  u32 rcv[3][2];
1594  int i, new0, new1, new2;
1595 
1596  /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1597  pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1598  &rcv[0][0]);
1599  pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1600  &rcv[0][1]);
1601  pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1602  &rcv[1][0]);
1603  pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1604  &rcv[1][1]);
1605  pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1606  &rcv[2][0]);
1607  pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1608  &rcv[2][1]);
1609  for (i = 0 ; i < 3; i++) {
1610  edac_dbg(3, "MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1611  (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1612  /*if the channel has 3 dimms*/
1613  if (pvt->channel[i].dimms > 2) {
1614  new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1615  new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1616  new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1617  } else {
1618  new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1619  DIMM_BOT_COR_ERR(rcv[i][0]);
1620  new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1621  DIMM_BOT_COR_ERR(rcv[i][1]);
1622  new2 = 0;
1623  }
1624 
1625  i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1626  }
1627 }
1628 
1629 /* This function is based on the device 3 function 4 registers as described on:
1630  * Intel Xeon Processor 5500 Series Datasheet Volume 2
1631  * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1632  * also available at:
1633  * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1634  */
1635 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1636 {
1637  struct i7core_pvt *pvt = mci->pvt_info;
1638  u32 rcv1, rcv0;
1639  int new0, new1, new2;
1640 
1641  if (!pvt->pci_mcr[4]) {
1642  edac_dbg(0, "MCR registers not found\n");
1643  return;
1644  }
1645 
1646  /* Corrected test errors */
1647  pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1648  pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1649 
1650  /* Store the new values */
1651  new2 = DIMM2_COR_ERR(rcv1);
1652  new1 = DIMM1_COR_ERR(rcv0);
1653  new0 = DIMM0_COR_ERR(rcv0);
1654 
1655  /* Updates CE counters if it is not the first time here */
1656  if (pvt->ce_count_available) {
1657  /* Updates CE counters */
1658  int add0, add1, add2;
1659 
1660  add2 = new2 - pvt->udimm_last_ce_count[2];
1661  add1 = new1 - pvt->udimm_last_ce_count[1];
1662  add0 = new0 - pvt->udimm_last_ce_count[0];
1663 
1664  if (add2 < 0)
1665  add2 += 0x7fff;
1666  pvt->udimm_ce_count[2] += add2;
1667 
1668  if (add1 < 0)
1669  add1 += 0x7fff;
1670  pvt->udimm_ce_count[1] += add1;
1671 
1672  if (add0 < 0)
1673  add0 += 0x7fff;
1674  pvt->udimm_ce_count[0] += add0;
1675 
1676  if (add0 | add1 | add2)
1677  i7core_printk(KERN_ERR, "New Corrected error(s): "
1678  "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1679  add0, add1, add2);
1680  } else
1681  pvt->ce_count_available = 1;
1682 
1683  /* Store the new values */
1684  pvt->udimm_last_ce_count[2] = new2;
1685  pvt->udimm_last_ce_count[1] = new1;
1686  pvt->udimm_last_ce_count[0] = new0;
1687 }
1688 
1689 /*
1690  * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1691  * Architectures Software Developer’s Manual Volume 3B.
1692  * Nehalem are defined as family 0x06, model 0x1a
1693  *
1694  * The MCA registers used here are the following ones:
1695  * struct mce field MCA Register
1696  * m->status MSR_IA32_MC8_STATUS
1697  * m->addr MSR_IA32_MC8_ADDR
1698  * m->misc MSR_IA32_MC8_MISC
1699  * In the case of Nehalem, the error information is masked at .status and .misc
1700  * fields
1701  */
1702 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1703  const struct mce *m)
1704 {
1705  struct i7core_pvt *pvt = mci->pvt_info;
1706  char *type, *optype, *err;
1707  enum hw_event_mc_err_type tp_event;
1708  unsigned long error = m->status & 0x1ff0000l;
1709  bool uncorrected_error = m->mcgstatus & 1ll << 61;
1710  bool ripv = m->mcgstatus & 1;
1711  u32 optypenum = (m->status >> 4) & 0x07;
1712  u32 core_err_cnt = (m->status >> 38) & 0x7fff;
1713  u32 dimm = (m->misc >> 16) & 0x3;
1714  u32 channel = (m->misc >> 18) & 0x3;
1715  u32 syndrome = m->misc >> 32;
1716  u32 errnum = find_first_bit(&error, 32);
1717 
1718  if (uncorrected_error) {
1719  if (ripv) {
1720  type = "FATAL";
1721  tp_event = HW_EVENT_ERR_FATAL;
1722  } else {
1723  type = "NON_FATAL";
1724  tp_event = HW_EVENT_ERR_UNCORRECTED;
1725  }
1726  } else {
1727  type = "CORRECTED";
1728  tp_event = HW_EVENT_ERR_CORRECTED;
1729  }
1730 
1731  switch (optypenum) {
1732  case 0:
1733  optype = "generic undef request";
1734  break;
1735  case 1:
1736  optype = "read error";
1737  break;
1738  case 2:
1739  optype = "write error";
1740  break;
1741  case 3:
1742  optype = "addr/cmd error";
1743  break;
1744  case 4:
1745  optype = "scrubbing error";
1746  break;
1747  default:
1748  optype = "reserved";
1749  break;
1750  }
1751 
1752  switch (errnum) {
1753  case 16:
1754  err = "read ECC error";
1755  break;
1756  case 17:
1757  err = "RAS ECC error";
1758  break;
1759  case 18:
1760  err = "write parity error";
1761  break;
1762  case 19:
1763  err = "redundacy loss";
1764  break;
1765  case 20:
1766  err = "reserved";
1767  break;
1768  case 21:
1769  err = "memory range error";
1770  break;
1771  case 22:
1772  err = "RTID out of range";
1773  break;
1774  case 23:
1775  err = "address parity error";
1776  break;
1777  case 24:
1778  err = "byte enable parity error";
1779  break;
1780  default:
1781  err = "unknown";
1782  }
1783 
1784  /*
1785  * Call the helper to output message
1786  * FIXME: what to do if core_err_cnt > 1? Currently, it generates
1787  * only one event
1788  */
1789  if (uncorrected_error || !pvt->is_registered)
1790  edac_mc_handle_error(tp_event, mci, core_err_cnt,
1791  m->addr >> PAGE_SHIFT,
1792  m->addr & ~PAGE_MASK,
1793  syndrome,
1794  channel, dimm, -1,
1795  err, optype);
1796 }
1797 
1798 /*
1799  * i7core_check_error Retrieve and process errors reported by the
1800  * hardware. Called by the Core module.
1801  */
1802 static void i7core_check_error(struct mem_ctl_info *mci)
1803 {
1804  struct i7core_pvt *pvt = mci->pvt_info;
1805  int i;
1806  unsigned count = 0;
1807  struct mce *m;
1808 
1809  /*
1810  * MCE first step: Copy all mce errors into a temporary buffer
1811  * We use a double buffering here, to reduce the risk of
1812  * losing an error.
1813  */
1814  smp_rmb();
1815  count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1816  % MCE_LOG_LEN;
1817  if (!count)
1818  goto check_ce_error;
1819 
1820  m = pvt->mce_outentry;
1821  if (pvt->mce_in + count > MCE_LOG_LEN) {
1822  unsigned l = MCE_LOG_LEN - pvt->mce_in;
1823 
1824  memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1825  smp_wmb();
1826  pvt->mce_in = 0;
1827  count -= l;
1828  m += l;
1829  }
1830  memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1831  smp_wmb();
1832  pvt->mce_in += count;
1833 
1834  smp_rmb();
1835  if (pvt->mce_overrun) {
1836  i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1837  pvt->mce_overrun);
1838  smp_wmb();
1839  pvt->mce_overrun = 0;
1840  }
1841 
1842  /*
1843  * MCE second step: parse errors and display
1844  */
1845  for (i = 0; i < count; i++)
1846  i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1847 
1848  /*
1849  * Now, let's increment CE error counts
1850  */
1851 check_ce_error:
1852  if (!pvt->is_registered)
1853  i7core_udimm_check_mc_ecc_err(mci);
1854  else
1855  i7core_rdimm_check_mc_ecc_err(mci);
1856 }
1857 
1858 /*
1859  * i7core_mce_check_error Replicates mcelog routine to get errors
1860  * This routine simply queues mcelog errors, and
1861  * return. The error itself should be handled later
1862  * by i7core_check_error.
1863  * WARNING: As this routine should be called at NMI time, extra care should
1864  * be taken to avoid deadlocks, and to be as fast as possible.
1865  */
1866 static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
1867  void *data)
1868 {
1869  struct mce *mce = (struct mce *)data;
1870  struct i7core_dev *i7_dev;
1871  struct mem_ctl_info *mci;
1872  struct i7core_pvt *pvt;
1873 
1874  i7_dev = get_i7core_dev(mce->socketid);
1875  if (!i7_dev)
1876  return NOTIFY_BAD;
1877 
1878  mci = i7_dev->mci;
1879  pvt = mci->pvt_info;
1880 
1881  /*
1882  * Just let mcelog handle it if the error is
1883  * outside the memory controller
1884  */
1885  if (((mce->status & 0xffff) >> 7) != 1)
1886  return NOTIFY_DONE;
1887 
1888  /* Bank 8 registers are the only ones that we know how to handle */
1889  if (mce->bank != 8)
1890  return NOTIFY_DONE;
1891 
1892  smp_rmb();
1893  if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1894  smp_wmb();
1895  pvt->mce_overrun++;
1896  return NOTIFY_DONE;
1897  }
1898 
1899  /* Copy memory error at the ringbuffer */
1900  memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1901  smp_wmb();
1902  pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1903 
1904  /* Handle fatal errors immediately */
1905  if (mce->mcgstatus & 1)
1906  i7core_check_error(mci);
1907 
1908  /* Advise mcelog that the errors were handled */
1909  return NOTIFY_STOP;
1910 }
1911 
1912 static struct notifier_block i7_mce_dec = {
1913  .notifier_call = i7core_mce_check_error,
1914 };
1915 
1939 } __attribute__((__packed__));
1942 /*
1943  * Decode the DRAM Clock Frequency, be paranoid, make sure that all
1944  * memory devices show the same speed, and if they don't then consider
1945  * all speeds to be invalid.
1946  */
1947 static void decode_dclk(const struct dmi_header *dh, void *_dclk_freq)
1949  int *dclk_freq = _dclk_freq;
1950  u16 dmi_mem_clk_speed;
1952  if (*dclk_freq == -1)
1953  return;
1957  (struct memdev_dmi_entry *)dh;
1958  unsigned long conf_mem_clk_speed_offset =
1959  (unsigned long)&memdev_dmi_entry->conf_mem_clk_speed -
1960  (unsigned long)&memdev_dmi_entry->type;
1961  unsigned long speed_offset =
1962  (unsigned long)&memdev_dmi_entry->speed -
1963  (unsigned long)&memdev_dmi_entry->type;
1964 
1965  /* Check that a DIMM is present */
1966  if (memdev_dmi_entry->size == 0)
1967  return;
1968 
1969  /*
1970  * Pick the configured speed if it's available, otherwise
1971  * pick the DIMM speed, or we don't have a speed.
1972  */
1973  if (memdev_dmi_entry->length > conf_mem_clk_speed_offset) {
1974  dmi_mem_clk_speed =
1975  memdev_dmi_entry->conf_mem_clk_speed;
1976  } else if (memdev_dmi_entry->length > speed_offset) {
1977  dmi_mem_clk_speed = memdev_dmi_entry->speed;
1978  } else {
1979  *dclk_freq = -1;
1980  return;
1981  }
1982 
1983  if (*dclk_freq == 0) {
1984  /* First pass, speed was 0 */
1985  if (dmi_mem_clk_speed > 0) {
1986  /* Set speed if a valid speed is read */
1987  *dclk_freq = dmi_mem_clk_speed;
1988  } else {
1989  /* Otherwise we don't have a valid speed */
1990  *dclk_freq = -1;
1991  }
1992  } else if (*dclk_freq > 0 &&
1993  *dclk_freq != dmi_mem_clk_speed) {
1994  /*
1995  * If we have a speed, check that all DIMMS are the same
1996  * speed, otherwise set the speed as invalid.
1997  */
1998  *dclk_freq = -1;
1999  }
2000  }
2001 }
2002 
2003 /*
2004  * The default DCLK frequency is used as a fallback if we
2005  * fail to find anything reliable in the DMI. The value
2006  * is taken straight from the datasheet.
2007  */
2008 #define DEFAULT_DCLK_FREQ 800
2009 
2010 static int get_dclk_freq(void)
2011 {
2012  int dclk_freq = 0;
2013 
2014  dmi_walk(decode_dclk, (void *)&dclk_freq);
2015 
2016  if (dclk_freq < 1)
2017  return DEFAULT_DCLK_FREQ;
2018 
2019  return dclk_freq;
2020 }
2021 
2022 /*
2023  * set_sdram_scrub_rate This routine sets byte/sec bandwidth scrub rate
2024  * to hardware according to SCRUBINTERVAL formula
2025  * found in datasheet.
2026  */
2027 static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
2028 {
2029  struct i7core_pvt *pvt = mci->pvt_info;
2030  struct pci_dev *pdev;
2031  u32 dw_scrub;
2032  u32 dw_ssr;
2033 
2034  /* Get data from the MC register, function 2 */
2035  pdev = pvt->pci_mcr[2];
2036  if (!pdev)
2037  return -ENODEV;
2038 
2039  pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &dw_scrub);
2040 
2041  if (new_bw == 0) {
2042  /* Prepare to disable petrol scrub */
2043  dw_scrub &= ~STARTSCRUB;
2044  /* Stop the patrol scrub engine */
2045  write_and_test(pdev, MC_SCRUB_CONTROL,
2046  dw_scrub & ~SCRUBINTERVAL_MASK);
2047 
2048  /* Get current status of scrub rate and set bit to disable */
2049  pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2050  dw_ssr &= ~SSR_MODE_MASK;
2051  dw_ssr |= SSR_MODE_DISABLE;
2052  } else {
2053  const int cache_line_size = 64;
2054  const u32 freq_dclk_mhz = pvt->dclk_freq;
2055  unsigned long long scrub_interval;
2056  /*
2057  * Translate the desired scrub rate to a register value and
2058  * program the corresponding register value.
2059  */
2060  scrub_interval = (unsigned long long)freq_dclk_mhz *
2061  cache_line_size * 1000000;
2062  do_div(scrub_interval, new_bw);
2063 
2064  if (!scrub_interval || scrub_interval > SCRUBINTERVAL_MASK)
2065  return -EINVAL;
2066 
2067  dw_scrub = SCRUBINTERVAL_MASK & scrub_interval;
2068 
2069  /* Start the patrol scrub engine */
2070  pci_write_config_dword(pdev, MC_SCRUB_CONTROL,
2071  STARTSCRUB | dw_scrub);
2072 
2073  /* Get current status of scrub rate and set bit to enable */
2074  pci_read_config_dword(pdev, MC_SSRCONTROL, &dw_ssr);
2075  dw_ssr &= ~SSR_MODE_MASK;
2076  dw_ssr |= SSR_MODE_ENABLE;
2077  }
2078  /* Disable or enable scrubbing */
2079  pci_write_config_dword(pdev, MC_SSRCONTROL, dw_ssr);
2080 
2081  return new_bw;
2082 }
2083 
2084 /*
2085  * get_sdram_scrub_rate This routine convert current scrub rate value
2086  * into byte/sec bandwidth according to
2087  * SCRUBINTERVAL formula found in datasheet.
2088  */
2089 static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
2090 {
2091  struct i7core_pvt *pvt = mci->pvt_info;
2092  struct pci_dev *pdev;
2093  const u32 cache_line_size = 64;
2094  const u32 freq_dclk_mhz = pvt->dclk_freq;
2095  unsigned long long scrub_rate;
2096  u32 scrubval;
2097 
2098  /* Get data from the MC register, function 2 */
2099  pdev = pvt->pci_mcr[2];
2100  if (!pdev)
2101  return -ENODEV;
2102 
2103  /* Get current scrub control data */
2104  pci_read_config_dword(pdev, MC_SCRUB_CONTROL, &scrubval);
2105 
2106  /* Mask highest 8-bits to 0 */
2107  scrubval &= SCRUBINTERVAL_MASK;
2108  if (!scrubval)
2109  return 0;
2110 
2111  /* Calculate scrub rate value into byte/sec bandwidth */
2112  scrub_rate = (unsigned long long)freq_dclk_mhz *
2113  1000000 * cache_line_size;
2114  do_div(scrub_rate, scrubval);
2115  return (int)scrub_rate;
2116 }
2117 
2118 static void enable_sdram_scrub_setting(struct mem_ctl_info *mci)
2119 {
2120  struct i7core_pvt *pvt = mci->pvt_info;
2121  u32 pci_lock;
2122 
2123  /* Unlock writes to pci registers */
2124  pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2125  pci_lock &= ~0x3;
2126  pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2127  pci_lock | MC_CFG_UNLOCK);
2128 
2129  mci->set_sdram_scrub_rate = set_sdram_scrub_rate;
2130  mci->get_sdram_scrub_rate = get_sdram_scrub_rate;
2131 }
2132 
2133 static void disable_sdram_scrub_setting(struct mem_ctl_info *mci)
2134 {
2135  struct i7core_pvt *pvt = mci->pvt_info;
2136  u32 pci_lock;
2137 
2138  /* Lock writes to pci registers */
2139  pci_read_config_dword(pvt->pci_noncore, MC_CFG_CONTROL, &pci_lock);
2140  pci_lock &= ~0x3;
2141  pci_write_config_dword(pvt->pci_noncore, MC_CFG_CONTROL,
2142  pci_lock | MC_CFG_LOCK);
2143 }
2144 
2145 static void i7core_pci_ctl_create(struct i7core_pvt *pvt)
2146 {
2148  &pvt->i7core_dev->pdev[0]->dev,
2149  EDAC_MOD_STR);
2150  if (unlikely(!pvt->i7core_pci))
2152  "Unable to setup PCI error report via EDAC\n");
2153 }
2154 
2155 static void i7core_pci_ctl_release(struct i7core_pvt *pvt)
2156 {
2157  if (likely(pvt->i7core_pci))
2159  else
2161  "Couldn't find mem_ctl_info for socket %d\n",
2162  pvt->i7core_dev->socket);
2163  pvt->i7core_pci = NULL;
2164 }
2165 
2166 static void i7core_unregister_mci(struct i7core_dev *i7core_dev)
2167 {
2168  struct mem_ctl_info *mci = i7core_dev->mci;
2169  struct i7core_pvt *pvt;
2170 
2171  if (unlikely(!mci || !mci->pvt_info)) {
2172  edac_dbg(0, "MC: dev = %p\n", &i7core_dev->pdev[0]->dev);
2173 
2174  i7core_printk(KERN_ERR, "Couldn't find mci handler\n");
2175  return;
2176  }
2177 
2178  pvt = mci->pvt_info;
2179 
2180  edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2181 
2182  /* Disable scrubrate setting */
2183  if (pvt->enable_scrub)
2184  disable_sdram_scrub_setting(mci);
2185 
2186  /* Disable EDAC polling */
2187  i7core_pci_ctl_release(pvt);
2188 
2189  /* Remove MC sysfs nodes */
2190  i7core_delete_sysfs_devices(mci);
2191  edac_mc_del_mc(mci->pdev);
2192 
2193  edac_dbg(1, "%s: free mci struct\n", mci->ctl_name);
2194  kfree(mci->ctl_name);
2195  edac_mc_free(mci);
2196  i7core_dev->mci = NULL;
2197 }
2198 
2199 static int i7core_register_mci(struct i7core_dev *i7core_dev)
2200 {
2201  struct mem_ctl_info *mci;
2202  struct i7core_pvt *pvt;
2203  int rc;
2204  struct edac_mc_layer layers[2];
2205 
2206  /* allocate a new MC control structure */
2207 
2208  layers[0].type = EDAC_MC_LAYER_CHANNEL;
2209  layers[0].size = NUM_CHANS;
2210  layers[0].is_virt_csrow = false;
2211  layers[1].type = EDAC_MC_LAYER_SLOT;
2212  layers[1].size = MAX_DIMMS;
2213  layers[1].is_virt_csrow = true;
2214  mci = edac_mc_alloc(i7core_dev->socket, ARRAY_SIZE(layers), layers,
2215  sizeof(*pvt));
2216  if (unlikely(!mci))
2217  return -ENOMEM;
2218 
2219  edac_dbg(0, "MC: mci = %p, dev = %p\n", mci, &i7core_dev->pdev[0]->dev);
2220 
2221  pvt = mci->pvt_info;
2222  memset(pvt, 0, sizeof(*pvt));
2223 
2224  /* Associates i7core_dev and mci for future usage */
2225  pvt->i7core_dev = i7core_dev;
2226  i7core_dev->mci = mci;
2227 
2228  /*
2229  * FIXME: how to handle RDDR3 at MCI level? It is possible to have
2230  * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
2231  * memory channels
2232  */
2233  mci->mtype_cap = MEM_FLAG_DDR3;
2235  mci->edac_cap = EDAC_FLAG_NONE;
2236  mci->mod_name = "i7core_edac.c";
2237  mci->mod_ver = I7CORE_REVISION;
2238  mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
2239  i7core_dev->socket);
2240  mci->dev_name = pci_name(i7core_dev->pdev[0]);
2241  mci->ctl_page_to_phys = NULL;
2242 
2243  /* Store pci devices at mci for faster access */
2244  rc = mci_bind_devs(mci, i7core_dev);
2245  if (unlikely(rc < 0))
2246  goto fail0;
2247 
2248 
2249  /* Get dimm basic config */
2250  get_dimm_config(mci);
2251  /* record ptr to the generic device */
2252  mci->pdev = &i7core_dev->pdev[0]->dev;
2253  /* Set the function pointer to an actual operation function */
2254  mci->edac_check = i7core_check_error;
2255 
2256  /* Enable scrubrate setting */
2257  if (pvt->enable_scrub)
2258  enable_sdram_scrub_setting(mci);
2259 
2260  /* add this new MC control structure to EDAC's list of MCs */
2261  if (unlikely(edac_mc_add_mc(mci))) {
2262  edac_dbg(0, "MC: failed edac_mc_add_mc()\n");
2263  /* FIXME: perhaps some code should go here that disables error
2264  * reporting if we just enabled it
2265  */
2266 
2267  rc = -EINVAL;
2268  goto fail0;
2269  }
2270  if (i7core_create_sysfs_devices(mci)) {
2271  edac_dbg(0, "MC: failed to create sysfs nodes\n");
2272  edac_mc_del_mc(mci->pdev);
2273  rc = -EINVAL;
2274  goto fail0;
2275  }
2276 
2277  /* Default error mask is any memory */
2278  pvt->inject.channel = 0;
2279  pvt->inject.dimm = -1;
2280  pvt->inject.rank = -1;
2281  pvt->inject.bank = -1;
2282  pvt->inject.page = -1;
2283  pvt->inject.col = -1;
2284 
2285  /* allocating generic PCI control info */
2286  i7core_pci_ctl_create(pvt);
2287 
2288  /* DCLK for scrub rate setting */
2289  pvt->dclk_freq = get_dclk_freq();
2290 
2291  return 0;
2292 
2293 fail0:
2294  kfree(mci->ctl_name);
2295  edac_mc_free(mci);
2296  i7core_dev->mci = NULL;
2297  return rc;
2298 }
2299 
2300 /*
2301  * i7core_probe Probe for ONE instance of device to see if it is
2302  * present.
2303  * return:
2304  * 0 for FOUND a device
2305  * < 0 for error code
2306  */
2307 
2308 static int __devinit i7core_probe(struct pci_dev *pdev,
2309  const struct pci_device_id *id)
2310 {
2311  int rc, count = 0;
2312  struct i7core_dev *i7core_dev;
2313 
2314  /* get the pci devices we want to reserve for our use */
2315  mutex_lock(&i7core_edac_lock);
2316 
2317  /*
2318  * All memory controllers are allocated at the first pass.
2319  */
2320  if (unlikely(probed >= 1)) {
2321  mutex_unlock(&i7core_edac_lock);
2322  return -ENODEV;
2323  }
2324  probed++;
2325 
2326  rc = i7core_get_all_devices();
2327  if (unlikely(rc < 0))
2328  goto fail0;
2329 
2330  list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
2331  count++;
2332  rc = i7core_register_mci(i7core_dev);
2333  if (unlikely(rc < 0))
2334  goto fail1;
2335  }
2336 
2337  /*
2338  * Nehalem-EX uses a different memory controller. However, as the
2339  * memory controller is not visible on some Nehalem/Nehalem-EP, we
2340  * need to indirectly probe via a X58 PCI device. The same devices
2341  * are found on (some) Nehalem-EX. So, on those machines, the
2342  * probe routine needs to return -ENODEV, as the actual Memory
2343  * Controller registers won't be detected.
2344  */
2345  if (!count) {
2346  rc = -ENODEV;
2347  goto fail1;
2348  }
2349 
2351  "Driver loaded, %d memory controller(s) found.\n",
2352  count);
2353 
2354  mutex_unlock(&i7core_edac_lock);
2355  return 0;
2356 
2357 fail1:
2358  list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2359  i7core_unregister_mci(i7core_dev);
2360 
2361  i7core_put_all_devices();
2362 fail0:
2363  mutex_unlock(&i7core_edac_lock);
2364  return rc;
2365 }
2366 
2367 /*
2368  * i7core_remove destructor for one instance of device
2369  *
2370  */
2371 static void __devexit i7core_remove(struct pci_dev *pdev)
2372 {
2373  struct i7core_dev *i7core_dev;
2374 
2375  edac_dbg(0, "\n");
2376 
2377  /*
2378  * we have a trouble here: pdev value for removal will be wrong, since
2379  * it will point to the X58 register used to detect that the machine
2380  * is a Nehalem or upper design. However, due to the way several PCI
2381  * devices are grouped together to provide MC functionality, we need
2382  * to use a different method for releasing the devices
2383  */
2384 
2385  mutex_lock(&i7core_edac_lock);
2386 
2387  if (unlikely(!probed)) {
2388  mutex_unlock(&i7core_edac_lock);
2389  return;
2390  }
2391 
2392  list_for_each_entry(i7core_dev, &i7core_edac_list, list)
2393  i7core_unregister_mci(i7core_dev);
2394 
2395  /* Release PCI resources */
2396  i7core_put_all_devices();
2397 
2398  probed--;
2399 
2400  mutex_unlock(&i7core_edac_lock);
2401 }
2402 
2403 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
2404 
2405 /*
2406  * i7core_driver pci_driver structure for this module
2407  *
2408  */
2409 static struct pci_driver i7core_driver = {
2410  .name = "i7core_edac",
2411  .probe = i7core_probe,
2412  .remove = __devexit_p(i7core_remove),
2413  .id_table = i7core_pci_tbl,
2414 };
2415 
2416 /*
2417  * i7core_init Module entry function
2418  * Try to initialize this module for its devices
2419  */
2420 static int __init i7core_init(void)
2421 {
2422  int pci_rc;
2423 
2424  edac_dbg(2, "\n");
2425 
2426  /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2427  opstate_init();
2428 
2429  if (use_pci_fixup)
2430  i7core_xeon_pci_fixup(pci_dev_table);
2431 
2432  pci_rc = pci_register_driver(&i7core_driver);
2433 
2434  if (pci_rc >= 0) {
2435  mce_register_decode_chain(&i7_mce_dec);
2436  return 0;
2437  }
2438 
2439  i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
2440  pci_rc);
2441 
2442  return pci_rc;
2443 }
2444 
2445 /*
2446  * i7core_exit() Module exit function
2447  * Unregister the driver
2448  */
2449 static void __exit i7core_exit(void)
2450 {
2451  edac_dbg(2, "\n");
2452  pci_unregister_driver(&i7core_driver);
2453  mce_unregister_decode_chain(&i7_mce_dec);
2454 }
2455 
2456 module_init(i7core_init);
2457 module_exit(i7core_exit);
2458 
2459 MODULE_LICENSE("GPL");
2460 MODULE_AUTHOR("Mauro Carvalho Chehab <[email protected]>");
2461 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2462 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2463  I7CORE_REVISION);
2464 
2465 module_param(edac_op_state, int, 0444);
2466 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");