Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cper.c
Go to the documentation of this file.
1 /*
2  * UEFI Common Platform Error Record (CPER) support
3  *
4  * Copyright (C) 2010, Intel Corp.
5  * Author: Huang Ying <[email protected]>
6  *
7  * CPER is the format used to describe platform hardware error by
8  * various APEI tables, such as ERST, BERT and HEST etc.
9  *
10  * For more information about CPER, please refer to Appendix N of UEFI
11  * Specification version 2.3.
12  *
13  * This program is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU General Public License version
15  * 2 as published by the Free Software Foundation.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with this program; if not, write to the Free Software
24  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25  */
26 
27 #include <linux/kernel.h>
28 #include <linux/module.h>
29 #include <linux/time.h>
30 #include <linux/cper.h>
31 #include <linux/acpi.h>
32 #include <linux/aer.h>
33 
34 /*
35  * CPER record ID need to be unique even after reboot, because record
36  * ID is used as index for ERST storage, while CPER records from
37  * multiple boot may co-exist in ERST.
38  */
40 {
41  static atomic64_t seq;
42 
43  if (!atomic64_read(&seq))
44  atomic64_set(&seq, ((u64)get_seconds()) << 32);
45 
46  return atomic64_inc_return(&seq);
47 }
49 
50 static const char *cper_severity_strs[] = {
51  "recoverable",
52  "fatal",
53  "corrected",
54  "info",
55 };
56 
57 static const char *cper_severity_str(unsigned int severity)
58 {
59  return severity < ARRAY_SIZE(cper_severity_strs) ?
60  cper_severity_strs[severity] : "unknown";
61 }
62 
63 /*
64  * cper_print_bits - print strings for set bits
65  * @pfx: prefix for each line, including log level and prefix string
66  * @bits: bit mask
67  * @strs: string array, indexed by bit position
68  * @strs_size: size of the string array: @strs
69  *
70  * For each set bit in @bits, print the corresponding string in @strs.
71  * If the output length is longer than 80, multiple line will be
72  * printed, with @pfx is printed at the beginning of each line.
73  */
74 void cper_print_bits(const char *pfx, unsigned int bits,
75  const char *strs[], unsigned int strs_size)
76 {
77  int i, len = 0;
78  const char *str;
79  char buf[84];
80 
81  for (i = 0; i < strs_size; i++) {
82  if (!(bits & (1U << i)))
83  continue;
84  str = strs[i];
85  if (!str)
86  continue;
87  if (len && len + strlen(str) + 2 > 80) {
88  printk("%s\n", buf);
89  len = 0;
90  }
91  if (!len)
92  len = snprintf(buf, sizeof(buf), "%s%s", pfx, str);
93  else
94  len += snprintf(buf+len, sizeof(buf)-len, ", %s", str);
95  }
96  if (len)
97  printk("%s\n", buf);
98 }
99 
100 static const char *cper_proc_type_strs[] = {
101  "IA32/X64",
102  "IA64",
103 };
104 
105 static const char *cper_proc_isa_strs[] = {
106  "IA32",
107  "IA64",
108  "X64",
109 };
110 
111 static const char *cper_proc_error_type_strs[] = {
112  "cache error",
113  "TLB error",
114  "bus error",
115  "micro-architectural error",
116 };
117 
118 static const char *cper_proc_op_strs[] = {
119  "unknown or generic",
120  "data read",
121  "data write",
122  "instruction execution",
123 };
124 
125 static const char *cper_proc_flag_strs[] = {
126  "restartable",
127  "precise IP",
128  "overflow",
129  "corrected",
130 };
131 
132 static void cper_print_proc_generic(const char *pfx,
133  const struct cper_sec_proc_generic *proc)
134 {
136  printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type,
137  proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ?
138  cper_proc_type_strs[proc->proc_type] : "unknown");
140  printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa,
141  proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ?
142  cper_proc_isa_strs[proc->proc_isa] : "unknown");
144  printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type);
145  cper_print_bits(pfx, proc->proc_error_type,
146  cper_proc_error_type_strs,
147  ARRAY_SIZE(cper_proc_error_type_strs));
148  }
150  printk("%s""operation: %d, %s\n", pfx, proc->operation,
151  proc->operation < ARRAY_SIZE(cper_proc_op_strs) ?
152  cper_proc_op_strs[proc->operation] : "unknown");
154  printk("%s""flags: 0x%02x\n", pfx, proc->flags);
155  cper_print_bits(pfx, proc->flags, cper_proc_flag_strs,
156  ARRAY_SIZE(cper_proc_flag_strs));
157  }
159  printk("%s""level: %d\n", pfx, proc->level);
161  printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version);
163  printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id);
165  printk("%s""target_address: 0x%016llx\n",
166  pfx, proc->target_addr);
168  printk("%s""requestor_id: 0x%016llx\n",
169  pfx, proc->requestor_id);
171  printk("%s""responder_id: 0x%016llx\n",
172  pfx, proc->responder_id);
174  printk("%s""IP: 0x%016llx\n", pfx, proc->ip);
175 }
176 
177 static const char *cper_mem_err_type_strs[] = {
178  "unknown",
179  "no error",
180  "single-bit ECC",
181  "multi-bit ECC",
182  "single-symbol chipkill ECC",
183  "multi-symbol chipkill ECC",
184  "master abort",
185  "target abort",
186  "parity error",
187  "watchdog timeout",
188  "invalid address",
189  "mirror Broken",
190  "memory sparing",
191  "scrub corrected error",
192  "scrub uncorrected error",
193 };
194 
195 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
196 {
198  printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status);
200  printk("%s""physical_address: 0x%016llx\n",
201  pfx, mem->physical_addr);
203  printk("%s""physical_address_mask: 0x%016llx\n",
204  pfx, mem->physical_addr_mask);
206  printk("%s""node: %d\n", pfx, mem->node);
208  printk("%s""card: %d\n", pfx, mem->card);
210  printk("%s""module: %d\n", pfx, mem->module);
212  printk("%s""bank: %d\n", pfx, mem->bank);
214  printk("%s""device: %d\n", pfx, mem->device);
216  printk("%s""row: %d\n", pfx, mem->row);
218  printk("%s""column: %d\n", pfx, mem->column);
220  printk("%s""bit_position: %d\n", pfx, mem->bit_pos);
222  printk("%s""requestor_id: 0x%016llx\n", pfx, mem->requestor_id);
224  printk("%s""responder_id: 0x%016llx\n", pfx, mem->responder_id);
226  printk("%s""target_id: 0x%016llx\n", pfx, mem->target_id);
228  u8 etype = mem->error_type;
229  printk("%s""error_type: %d, %s\n", pfx, etype,
230  etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
231  cper_mem_err_type_strs[etype] : "unknown");
232  }
233 }
234 
235 static const char *cper_pcie_port_type_strs[] = {
236  "PCIe end point",
237  "legacy PCI end point",
238  "unknown",
239  "unknown",
240  "root port",
241  "upstream switch port",
242  "downstream switch port",
243  "PCIe to PCI/PCI-X bridge",
244  "PCI/PCI-X to PCIe bridge",
245  "root complex integrated endpoint device",
246  "root complex event collector",
247 };
248 
249 static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie,
250  const struct acpi_hest_generic_data *gdata)
251 {
253  printk("%s""port_type: %d, %s\n", pfx, pcie->port_type,
254  pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ?
255  cper_pcie_port_type_strs[pcie->port_type] : "unknown");
257  printk("%s""version: %d.%d\n", pfx,
258  pcie->version.major, pcie->version.minor);
260  printk("%s""command: 0x%04x, status: 0x%04x\n", pfx,
261  pcie->command, pcie->status);
263  const __u8 *p;
264  printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx,
265  pcie->device_id.segment, pcie->device_id.bus,
266  pcie->device_id.device, pcie->device_id.function);
267  printk("%s""slot: %d\n", pfx,
268  pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT);
269  printk("%s""secondary_bus: 0x%02x\n", pfx,
270  pcie->device_id.secondary_bus);
271  printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx,
272  pcie->device_id.vendor_id, pcie->device_id.device_id);
273  p = pcie->device_id.class_code;
274  printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]);
275  }
277  printk("%s""serial number: 0x%04x, 0x%04x\n", pfx,
278  pcie->serial_number.lower, pcie->serial_number.upper);
280  printk(
281  "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n",
282  pfx, pcie->bridge.secondary_status, pcie->bridge.control);
283 #ifdef CONFIG_ACPI_APEI_PCIEAER
285  struct aer_capability_regs *aer_regs = (void *)pcie->aer_info;
286  cper_print_aer(pfx, gdata->error_severity, aer_regs);
287  }
288 #endif
289 }
290 
291 static const char *apei_estatus_section_flag_strs[] = {
292  "primary",
293  "containment warning",
294  "reset",
295  "threshold exceeded",
296  "resource not accessible",
297  "latent error",
298 };
299 
300 static void apei_estatus_print_section(
301  const char *pfx, const struct acpi_hest_generic_data *gdata, int sec_no)
302 {
303  uuid_le *sec_type = (uuid_le *)gdata->section_type;
304  __u16 severity;
305 
306  severity = gdata->error_severity;
307  printk("%s""section: %d, severity: %d, %s\n", pfx, sec_no, severity,
308  cper_severity_str(severity));
309  printk("%s""flags: 0x%02x\n", pfx, gdata->flags);
310  cper_print_bits(pfx, gdata->flags, apei_estatus_section_flag_strs,
311  ARRAY_SIZE(apei_estatus_section_flag_strs));
313  printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id);
315  printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text);
316 
317  if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) {
318  struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1);
319  printk("%s""section_type: general processor error\n", pfx);
320  if (gdata->error_data_length >= sizeof(*proc_err))
321  cper_print_proc_generic(pfx, proc_err);
322  else
323  goto err_section_too_small;
324  } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
325  struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
326  printk("%s""section_type: memory error\n", pfx);
327  if (gdata->error_data_length >= sizeof(*mem_err))
328  cper_print_mem(pfx, mem_err);
329  else
330  goto err_section_too_small;
331  } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) {
332  struct cper_sec_pcie *pcie = (void *)(gdata + 1);
333  printk("%s""section_type: PCIe error\n", pfx);
334  if (gdata->error_data_length >= sizeof(*pcie))
335  cper_print_pcie(pfx, pcie, gdata);
336  else
337  goto err_section_too_small;
338  } else
339  printk("%s""section type: unknown, %pUl\n", pfx, sec_type);
340 
341  return;
342 
343 err_section_too_small:
344  pr_err(FW_WARN "error section length is too small\n");
345 }
346 
347 void apei_estatus_print(const char *pfx,
348  const struct acpi_hest_generic_status *estatus)
349 {
350  struct acpi_hest_generic_data *gdata;
351  unsigned int data_len, gedata_len;
352  int sec_no = 0;
353  __u16 severity;
354 
355  printk("%s""APEI generic hardware error status\n", pfx);
356  severity = estatus->error_severity;
357  printk("%s""severity: %d, %s\n", pfx, severity,
358  cper_severity_str(severity));
359  data_len = estatus->data_length;
360  gdata = (struct acpi_hest_generic_data *)(estatus + 1);
361  while (data_len > sizeof(*gdata)) {
362  gedata_len = gdata->error_data_length;
363  apei_estatus_print_section(pfx, gdata, sec_no);
364  data_len -= gedata_len + sizeof(*gdata);
365  gdata = (void *)(gdata + 1) + gedata_len;
366  sec_no++;
367  }
368 }
370 
372 {
373  if (estatus->data_length &&
374  estatus->data_length < sizeof(struct acpi_hest_generic_data))
375  return -EINVAL;
376  if (estatus->raw_data_length &&
377  estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length)
378  return -EINVAL;
379 
380  return 0;
381 }
383 
385 {
386  struct acpi_hest_generic_data *gdata;
387  unsigned int data_len, gedata_len;
388  int rc;
389 
390  rc = apei_estatus_check_header(estatus);
391  if (rc)
392  return rc;
393  data_len = estatus->data_length;
394  gdata = (struct acpi_hest_generic_data *)(estatus + 1);
395  while (data_len > sizeof(*gdata)) {
396  gedata_len = gdata->error_data_length;
397  if (gedata_len > data_len - sizeof(*gdata))
398  return -EINVAL;
399  data_len -= gedata_len + sizeof(*gdata);
400  gdata = (void *)(gdata + 1) + gedata_len;
401  }
402  if (data_len)
403  return -EINVAL;
404 
405  return 0;
406 }