Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
edac.h
Go to the documentation of this file.
1 /*
2  * Generic EDAC defs
3  *
4  * Author: Dave Jiang <[email protected]>
5  *
6  * 2006-2008 (c) MontaVista Software, Inc. This file is licensed under
7  * the terms of the GNU General Public License version 2. This program
8  * is licensed "as is" without any warranty of any kind, whether express
9  * or implied.
10  *
11  */
12 #ifndef _LINUX_EDAC_H_
13 #define _LINUX_EDAC_H_
14 
15 #include <linux/atomic.h>
16 #include <linux/device.h>
17 #include <linux/kobject.h>
18 #include <linux/completion.h>
19 #include <linux/workqueue.h>
20 #include <linux/debugfs.h>
21 
22 struct device;
23 
24 #define EDAC_OPSTATE_INVAL -1
25 #define EDAC_OPSTATE_POLL 0
26 #define EDAC_OPSTATE_NMI 1
27 #define EDAC_OPSTATE_INT 2
28 
29 extern int edac_op_state;
30 extern int edac_err_assert;
31 extern atomic_t edac_handlers;
32 extern struct bus_type edac_subsys;
33 
34 extern int edac_handler_set(void);
35 extern void edac_atomic_assert_error(void);
36 extern struct bus_type *edac_get_sysfs_subsys(void);
37 extern void edac_put_sysfs_subsys(void);
38 
39 static inline void opstate_init(void)
40 {
41  switch (edac_op_state) {
42  case EDAC_OPSTATE_POLL:
43  case EDAC_OPSTATE_NMI:
44  break;
45  default:
47  }
48  return;
49 }
50 
51 #define EDAC_MC_LABEL_LEN 31
52 #define MC_PROC_NAME_MAX_LEN 7
53 
67 enum dev_type {
74  DEV_X32, /* Do these parts exist? */
75  DEV_X64 /* Do these parts exist? */
76 };
77 
78 #define DEV_FLAG_UNKNOWN BIT(DEV_UNKNOWN)
79 #define DEV_FLAG_X1 BIT(DEV_X1)
80 #define DEV_FLAG_X2 BIT(DEV_X2)
81 #define DEV_FLAG_X4 BIT(DEV_X4)
82 #define DEV_FLAG_X8 BIT(DEV_X8)
83 #define DEV_FLAG_X16 BIT(DEV_X16)
84 #define DEV_FLAG_X32 BIT(DEV_X32)
85 #define DEV_FLAG_X64 BIT(DEV_X64)
86 
104 };
105 
146 enum mem_type {
164 };
165 
166 #define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
167 #define MEM_FLAG_RESERVED BIT(MEM_RESERVED)
168 #define MEM_FLAG_UNKNOWN BIT(MEM_UNKNOWN)
169 #define MEM_FLAG_FPM BIT(MEM_FPM)
170 #define MEM_FLAG_EDO BIT(MEM_EDO)
171 #define MEM_FLAG_BEDO BIT(MEM_BEDO)
172 #define MEM_FLAG_SDR BIT(MEM_SDR)
173 #define MEM_FLAG_RDR BIT(MEM_RDR)
174 #define MEM_FLAG_DDR BIT(MEM_DDR)
175 #define MEM_FLAG_RDDR BIT(MEM_RDDR)
176 #define MEM_FLAG_RMBS BIT(MEM_RMBS)
177 #define MEM_FLAG_DDR2 BIT(MEM_DDR2)
178 #define MEM_FLAG_FB_DDR2 BIT(MEM_FB_DDR2)
179 #define MEM_FLAG_RDDR2 BIT(MEM_RDDR2)
180 #define MEM_FLAG_XDR BIT(MEM_XDR)
181 #define MEM_FLAG_DDR3 BIT(MEM_DDR3)
182 #define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
183 
197 enum edac_type {
208 };
209 
210 #define EDAC_FLAG_UNKNOWN BIT(EDAC_UNKNOWN)
211 #define EDAC_FLAG_NONE BIT(EDAC_NONE)
212 #define EDAC_FLAG_PARITY BIT(EDAC_PARITY)
213 #define EDAC_FLAG_EC BIT(EDAC_EC)
214 #define EDAC_FLAG_SECDED BIT(EDAC_SECDED)
215 #define EDAC_FLAG_S2ECD2ED BIT(EDAC_S2ECD2ED)
216 #define EDAC_FLAG_S4ECD4ED BIT(EDAC_S4ECD4ED)
217 #define EDAC_FLAG_S8ECD8ED BIT(EDAC_S8ECD8ED)
218 #define EDAC_FLAG_S16ECD16ED BIT(EDAC_S16ECD16ED)
219 
244 };
245 
246 #define SCRUB_FLAG_SW_PROG BIT(SCRUB_SW_PROG)
247 #define SCRUB_FLAG_SW_SRC BIT(SCRUB_SW_SRC)
248 #define SCRUB_FLAG_SW_PROG_SRC BIT(SCRUB_SW_PROG_SRC)
249 #define SCRUB_FLAG_SW_TUN BIT(SCRUB_SW_SCRUB_TUNABLE)
250 #define SCRUB_FLAG_HW_PROG BIT(SCRUB_HW_PROG)
251 #define SCRUB_FLAG_HW_SRC BIT(SCRUB_HW_SRC)
252 #define SCRUB_FLAG_HW_PROG_SRC BIT(SCRUB_HW_PROG_SRC)
253 #define SCRUB_FLAG_HW_TUN BIT(SCRUB_HW_TUNABLE)
254 
255 /* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
256 
257 /* EDAC internal operation states */
258 #define OP_ALLOC 0x100
259 #define OP_RUNNING_POLL 0x201
260 #define OP_RUNNING_INTERRUPT 0x202
261 #define OP_RUNNING_POLL_INTR 0x203
262 #define OP_OFFLINE 0x300
263 
264 /*
265  * Concepts used at the EDAC subsystem
266  *
267  * There are several things to be aware of that aren't at all obvious:
268  *
269  * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc..
270  *
271  * These are some of the many terms that are thrown about that don't always
272  * mean what people think they mean (Inconceivable!). In the interest of
273  * creating a common ground for discussion, terms and their definitions
274  * will be established.
275  *
276  * Memory devices: The individual DRAM chips on a memory stick. These
277  * devices commonly output 4 and 8 bits each (x4, x8).
278  * Grouping several of these in parallel provides the
279  * number of bits that the memory controller expects:
280  * typically 72 bits, in order to provide 64 bits +
281  * 8 bits of ECC data.
282  *
283  * Memory Stick: A printed circuit board that aggregates multiple
284  * memory devices in parallel. In general, this is the
285  * Field Replaceable Unit (FRU) which gets replaced, in
286  * the case of excessive errors. Most often it is also
287  * called DIMM (Dual Inline Memory Module).
288  *
289  * Memory Socket: A physical connector on the motherboard that accepts
290  * a single memory stick. Also called as "slot" on several
291  * datasheets.
292  *
293  * Channel: A memory controller channel, responsible to communicate
294  * with a group of DIMMs. Each channel has its own
295  * independent control (command) and data bus, and can
296  * be used independently or grouped with other channels.
297  *
298  * Branch: It is typically the highest hierarchy on a
299  * Fully-Buffered DIMM memory controller.
300  * Typically, it contains two channels.
301  * Two channels at the same branch can be used in single
302  * mode or in lockstep mode.
303  * When lockstep is enabled, the cacheline is doubled,
304  * but it generally brings some performance penalty.
305  * Also, it is generally not possible to point to just one
306  * memory stick when an error occurs, as the error
307  * correction code is calculated using two DIMMs instead
308  * of one. Due to that, it is capable of correcting more
309  * errors than on single mode.
310  *
311  * Single-channel: The data accessed by the memory controller is contained
312  * into one dimm only. E. g. if the data is 64 bits-wide,
313  * the data flows to the CPU using one 64 bits parallel
314  * access.
315  * Typically used with SDR, DDR, DDR2 and DDR3 memories.
316  * FB-DIMM and RAMBUS use a different concept for channel,
317  * so this concept doesn't apply there.
318  *
319  * Double-channel: The data size accessed by the memory controller is
320  * interlaced into two dimms, accessed at the same time.
321  * E. g. if the DIMM is 64 bits-wide (72 bits with ECC),
322  * the data flows to the CPU using a 128 bits parallel
323  * access.
324  *
325  * Chip-select row: This is the name of the DRAM signal used to select the
326  * DRAM ranks to be accessed. Common chip-select rows for
327  * single channel are 64 bits, for dual channel 128 bits.
328  * It may not be visible by the memory controller, as some
329  * DIMM types have a memory buffer that can hide direct
330  * access to it from the Memory Controller.
331  *
332  * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory.
333  * Motherboards commonly drive two chip-select pins to
334  * a memory stick. A single-ranked stick, will occupy
335  * only one of those rows. The other will be unused.
336  *
337  * Double-Ranked stick: A double-ranked stick has two chip-select rows which
338  * access different sets of memory devices. The two
339  * rows cannot be accessed concurrently.
340  *
341  * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick.
342  * A double-sided stick has two chip-select rows which
343  * access different sets of memory devices. The two
344  * rows cannot be accessed concurrently. "Double-sided"
345  * is irrespective of the memory devices being mounted
346  * on both sides of the memory stick.
347  *
348  * Socket set: All of the memory sticks that are required for
349  * a single memory access or all of the memory sticks
350  * spanned by a chip-select row. A single socket set
351  * has two chip-select rows and if double-sided sticks
352  * are used these will occupy those chip-select rows.
353  *
354  * Bank: This term is avoided because it is unclear when
355  * needing to distinguish between chip-select rows and
356  * socket sets.
357  *
358  * Controller pages:
359  *
360  * Physical pages:
361  *
362  * Virtual pages:
363  *
364  *
365  * STRUCTURE ORGANIZATION AND CHOICES
366  *
367  *
368  *
369  * PS - I enjoyed writing all that about as much as you enjoyed reading it.
370  */
371 
388 };
389 
401  unsigned size;
403 };
404 
405 /*
406  * Maximum number of layers used by the memory controller to uniquely
407  * identify a single memory stick.
408  * NOTE: Changing this constant requires not only to change the constant
409  * below, but also to change the existing code at the core, as there are
410  * some code there that are optimized for 3 layers.
411  */
412 #define EDAC_MAX_LAYERS 3
413 
437 #define EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2) ({ \
438  int __i; \
439  if ((nlayers) == 1) \
440  __i = layer0; \
441  else if ((nlayers) == 2) \
442  __i = (layer1) + ((layers[1]).size * (layer0)); \
443  else if ((nlayers) == 3) \
444  __i = (layer2) + ((layers[2]).size * ((layer1) + \
445  ((layers[1]).size * (layer0)))); \
446  else \
447  __i = -EINVAL; \
448  __i; \
449 })
450 
470 #define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \
471  typeof(*var) __p; \
472  int ___i = EDAC_DIMM_OFF(layers, nlayers, layer0, layer1, layer2); \
473  if (___i < 0) \
474  __p = NULL; \
475  else \
476  __p = (var)[___i]; \
477  __p; \
478 })
479 
480 struct dimm_info {
481  struct device dev;
482 
483  char label[EDAC_MC_LABEL_LEN + 1]; /* DIMM label on motherboard */
484 
485  /* Memory location data */
487 
488  struct mem_ctl_info *mci; /* the parent */
489 
490  u32 grain; /* granularity of reported error in bytes */
491  enum dev_type dtype; /* memory device type */
492  enum mem_type mtype; /* memory dimm type */
493  enum edac_type edac_mode; /* EDAC mode for this dimm */
494 
495  u32 nr_pages; /* number of pages on this dimm */
496 
497  unsigned csrow, cschannel; /* Points to the old API data */
498 };
499 
515 struct rank_info {
516  int chan_idx;
517  struct csrow_info *csrow;
518  struct dimm_info *dimm;
519 
520  u32 ce_count; /* Correctable Errors for this csrow */
521 };
522 
523 struct csrow_info {
524  struct device dev;
525 
526  /* Used only by edac_mc_find_csrow_by_page() */
527  unsigned long first_page; /* first page number in csrow */
528  unsigned long last_page; /* last page number in csrow */
529  unsigned long page_mask; /* used for interleaving -
530  * 0UL for non intlv */
531 
532  int csrow_idx; /* the chip-select row */
533 
534  u32 ue_count; /* Uncorrectable Errors for this csrow */
535  u32 ce_count; /* Correctable Errors for this csrow */
536 
537  struct mem_ctl_info *mci; /* the parent */
538 
539  /* channel information for this csrow */
541  struct rank_info **channels;
542 };
543 
544 /*
545  * struct errcount_attribute - used to store the several error counts
546  */
548  int n_layers;
551 };
552 
553 /* MEMORY controller information structure
554  */
555 struct mem_ctl_info {
556  struct device dev;
557  struct bus_type bus;
558 
559  struct list_head link; /* for global list of mem_ctl_info structs */
560 
561  struct module *owner; /* Module owner of this control struct */
562 
563  unsigned long mtype_cap; /* memory types supported by mc */
564  unsigned long edac_ctl_cap; /* Mem controller EDAC capabilities */
565  unsigned long edac_cap; /* configuration capabilities - this is
566  * closely related to edac_ctl_cap. The
567  * difference is that the controller may be
568  * capable of s4ecd4ed which would be listed
569  * in edac_ctl_cap, but if channels aren't
570  * capable of s4ecd4ed then the edac_cap would
571  * not have that capability.
572  */
573  unsigned long scrub_cap; /* chipset scrub capabilities */
574  enum scrub_type scrub_mode; /* current scrub mode */
575 
576  /* Translates sdram memory scrub rate given in bytes/sec to the
577  internal representation and configures whatever else needs
578  to be configured.
579  */
581 
582  /* Get the current sdram memory scrub rate from the internal
583  representation and converts it to the closest matching
584  bandwidth in bytes/sec.
585  */
587 
588 
589  /* pointer to edac checking routine */
591 
592  /*
593  * Remaps memory pages: controller pages to physical pages.
594  * For most MC's, this will be NULL.
595  */
596  /* FIXME - why not send the phys page to begin with? */
597  unsigned long (*ctl_page_to_phys) (struct mem_ctl_info * mci,
598  unsigned long page);
599  int mc_idx;
600  struct csrow_info **csrows;
602 
603  /*
604  * Memory Controller hierarchy
605  *
606  * There are basically two types of memory controller: the ones that
607  * sees memory sticks ("dimms"), and the ones that sees memory ranks.
608  * All old memory controllers enumerate memories per rank, but most
609  * of the recent drivers enumerate memories per DIMM, instead.
610  * When the memory controller is per rank, mem_is_per_rank is true.
611  */
612  unsigned n_layers;
615 
616  /*
617  * DIMM info. Will eventually remove the entire csrows_info some day
618  */
619  unsigned tot_dimms;
620  struct dimm_info **dimms;
621 
622  /*
623  * FIXME - what about controllers on other busses? - IDs must be
624  * unique. dev pointer should be sufficiently unique, but
625  * BUS:SLOT.FUNC numbers may not be unique.
626  */
627  struct device *pdev;
628  const char *mod_name;
629  const char *mod_ver;
630  const char *ctl_name;
631  const char *dev_name;
633  void *pvt_info;
634  unsigned long start_time; /* mci load start time (in jiffies) */
635 
636  /*
637  * drivers shouldn't access those fields directly, as the core
638  * already handles that.
639  */
643 
645 
646  /* Additional top controller level attributes, but specified
647  * by the low level driver.
648  *
649  * Set by the low level driver to provide attributes at the
650  * controller level.
651  * An array of structures, NULL terminated
652  *
653  * If attributes are desired, then set to array of attributes
654  * If no attributes are desired, leave NULL
655  */
656  const struct mcidev_sysfs_attribute *mc_driver_sysfs_attributes;
657 
658  /* work struct for this MC */
660 
661  /* the internal state of this controller instance */
662  int op_state;
663 
664 #ifdef CONFIG_EDAC_DEBUG
665  struct dentry *debugfs;
666  u8 fake_inject_layer[EDAC_MAX_LAYERS];
667  u32 fake_inject_ue;
668  u16 fake_inject_count;
669 #endif
670 };
671 
672 #endif