Linux Kernel
3.7.1
|
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/proc_fs.h>
#include <linux/rbtree.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/of.h>
#include <linux/atomic.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/ppc-pci.h>
#include <asm/rtas.h>
Go to the source code of this file.
Data Structures | |
struct | eeh_stats |
Macros | |
#define | EEH_MAX_FAILS 2100000 |
#define | PCI_BUS_RESET_WAIT_MSEC (60*1000) |
#define | EEH_PCI_REGS_LOG_LEN 4096 |
#define | IS_BRIDGE(class_code) (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) |
#define | PCI_BUS_RST_HOLD_TIME_MSEC 250 |
#define | PCI_BUS_SETTLE_TIME_MSEC 1800 |
Functions | |
EXPORT_SYMBOL (eeh_subsystem_enabled) | |
DEFINE_MUTEX (eeh_mutex) | |
void | eeh_slot_error_detail (struct eeh_pe *pe, int severity) |
int | eeh_dev_check_failure (struct eeh_dev *edev) |
EXPORT_SYMBOL_GPL (eeh_dev_check_failure) | |
unsigned long | eeh_check_failure (const volatile void __iomem *token, unsigned long val) |
EXPORT_SYMBOL (eeh_check_failure) | |
int | eeh_pci_enable (struct eeh_pe *pe, int function) |
int | pcibios_set_pcie_reset_state (struct pci_dev *dev, enum pcie_reset_state state) |
int | eeh_reset_pe (struct eeh_pe *pe) |
void | eeh_save_bars (struct eeh_dev *edev) |
int __init | eeh_ops_register (struct eeh_ops *ops) |
Variables | |
struct eeh_ops * | eeh_ops = NULL |
int | eeh_subsystem_enabled |
int | eeh_probe_mode |
: name of EEH platform operations | |
eeh_ops_unregister - Unreigster platform dependent EEH operations Unregister the platform dependent EEH operation callback functions. | |
int __exit | eeh_ops_unregister (const char *name) |
core_initcall_sync (eeh_init) | |
void | eeh_add_device_tree_early (struct device_node *dn) |
EXPORT_SYMBOL_GPL (eeh_add_device_tree_early) | |
void | eeh_add_device_tree_late (struct pci_bus *bus) |
EXPORT_SYMBOL_GPL (eeh_add_device_tree_late) | |
void | eeh_remove_bus_device (struct pci_dev *dev, int purge_pe) |
EXPORT_SYMBOL_GPL (eeh_remove_bus_device) | |
__initcall (eeh_init_proc) | |
#define EEH_MAX_FAILS 2100000 |
Overview: EEH, or "Extended Error Handling" is a PCI bridge technology for dealing with PCI bus errors that can't be dealt with within the usual PCI framework, except by check-stopping the CPU. Systems that are designed for high-availability/reliability cannot afford to crash due to a "mere" PCI error, thus the need for EEH. An EEH-capable bridge operates by converting a detected error into a "slot freeze", taking the PCI adapter off-line, making the slot behave, from the OS'es point of view, as if the slot were "empty": all reads return 0xff's and all writes are silently ignored. EEH slot isolation events can be triggered by parity errors on the address or data busses (e.g. during posted writes), which in turn might be caused by low voltage on the bus, dust, vibration, humidity, radioactivity or plain-old failed hardware.
Note, however, that one of the leading causes of EEH slot freeze events are buggy device drivers, buggy device microcode, or buggy device hardware. This is because any attempt by the device to bus-master data to a memory address that is not assigned to the device will trigger a slot freeze. (The idea is to prevent devices-gone-wild from corrupting system memory). Buggy hardware/drivers will have a miserable time co-existing with EEH.
Ideally, a PCI device driver, when suspecting that an isolation event has occurred (e.g. by reading 0xff's), will then ask EEH whether this is the case, and then take appropriate steps to reset the PCI slot, the PCI device, and then resume operations. However, until that day, the checking is done here, with the eeh_check_failure() routine embedded in the MMIO macros. If the slot is found to be isolated, an "EEH Event" is synthesized and sent out for processing.
#define IS_BRIDGE | ( | class_code | ) | (((class_code)<<16) == PCI_BASE_CLASS_BRIDGE) |
#define PCI_BUS_RST_HOLD_TIME_MSEC 250 |
#define PCI_BUS_SETTLE_TIME_MSEC 1800 |
__initcall | ( | eeh_init_proc | ) |
core_initcall_sync | ( | eeh_init | ) |
DEFINE_MUTEX | ( | eeh_mutex | ) |
void eeh_add_device_tree_early | ( | struct device_node * | dn | ) |
eeh_check_failure - Check if all 1's data is due to EEH slot freeze : I/O token, should be address in the form 0xA.... : value, should be all 1's (XXX why do we need this arg??)
Check for an EEH failure at the given token address. Call this routine if the result of a read was all 0xff's and you want to find out if this is due to an EEH slot freeze event. This routine will query firmware for the EEH status.
Note this routine is safe to call in an interrupt context.
eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze : eeh device
Check for an EEH failure for the given device node. Call this routine if the result of a read was all 0xff's and you want to find out if this is due to an EEH slot freeze. This routine will query firmware for the EEH status.
Returns 0 if there has not been an EEH error; otherwise returns a non-zero value and queues up a slot isolation event notification.
It is safe to call this routine in an interrupt context.
eeh_slot_error_detail - Generate combined log including driver log and error log : EEH PE : temporary or permanent error log
This routine should be called to generate the combined log, which is comprised of driver log and error log. The driver log is figured out from the config space of the corresponding PCI device, while the error log is fetched through platform dependent function call.
EXPORT_SYMBOL | ( | eeh_subsystem_enabled | ) |
EXPORT_SYMBOL | ( | eeh_check_failure | ) |
EXPORT_SYMBOL_GPL | ( | eeh_dev_check_failure | ) |
EXPORT_SYMBOL_GPL | ( | eeh_add_device_tree_early | ) |
EXPORT_SYMBOL_GPL | ( | eeh_add_device_tree_late | ) |
EXPORT_SYMBOL_GPL | ( | eeh_remove_bus_device | ) |
int pcibios_set_pcie_reset_state | ( | struct pci_dev * | dev, |
enum pcie_reset_state | state | ||
) |