32 #include <linux/kernel.h>
33 #include <linux/module.h>
49 #include <linux/pci.h>
54 #include <asm/tlbflush.h>
59 #define GHES_PFX "GHES: "
61 #define GHES_ESTATUS_MAX_SIZE 65536
62 #define GHES_ESOURCE_PREALLOC_MAX_SIZE 65536
64 #define GHES_ESTATUS_POOL_MIN_ALLOC_ORDER 3
67 #define GHES_ESTATUS_CACHE_AVG_SIZE 512
69 #define GHES_ESTATUS_CACHES_SIZE 4
71 #define GHES_ESTATUS_IN_CACHE_MAX_NSEC 10000000000ULL
73 #define GHES_ESTATUS_CACHE_ALLOCED_MAX (GHES_ESTATUS_CACHES_SIZE * 3 / 2)
75 #define GHES_ESTATUS_CACHE_LEN(estatus_len) \
76 (sizeof(struct ghes_estatus_cache) + (estatus_len))
77 #define GHES_ESTATUS_FROM_CACHE(estatus_cache) \
78 ((struct acpi_hest_generic_status *) \
79 ((struct ghes_estatus_cache *)(estatus_cache) + 1))
81 #define GHES_ESTATUS_NODE_LEN(estatus_len) \
82 (sizeof(struct ghes_estatus_node) + (estatus_len))
83 #define GHES_ESTATUS_FROM_NODE(estatus_node) \
84 ((struct acpi_hest_generic_status *) \
85 ((struct ghes_estatus_node *)(estatus_node) + 1))
95 #define GHES_TO_CLEAR 0x0001
96 #define GHES_EXITING 0x0002
157 #define GHES_IOREMAP_PAGES 2
158 #define GHES_IOREMAP_NMI_PAGE(base) (base)
159 #define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE)
162 static struct vm_struct *ghes_ioremap_area;
178 static struct gen_pool *ghes_estatus_pool;
179 static unsigned long ghes_estatus_pool_size_request;
181 static struct irq_work ghes_proc_irq_work;
184 static atomic_t ghes_estatus_cache_alloced;
186 static int ghes_ioremap_init(
void)
190 if (!ghes_ioremap_area) {
191 pr_err(
GHES_PFX "Failed to allocate virtual memory area for atomic ioremap.\n");
198 static void ghes_ioremap_exit(
void)
203 static void __iomem *ghes_ioremap_pfn_nmi(
u64 pfn)
214 static void __iomem *ghes_ioremap_pfn_irq(
u64 pfn)
225 static void ghes_iounmap_nmi(
void __iomem *vaddr_ptr)
227 unsigned long vaddr = (
unsigned long __force)vaddr_ptr;
228 void *
base = ghes_ioremap_area->
addr;
235 static void ghes_iounmap_irq(
void __iomem *vaddr_ptr)
237 unsigned long vaddr = (
unsigned long __force)vaddr_ptr;
238 void *base = ghes_ioremap_area->
addr;
245 static int ghes_estatus_pool_init(
void)
248 if (!ghes_estatus_pool)
253 static void ghes_estatus_pool_free_chunk_page(
struct gen_pool *
pool,
260 static void ghes_estatus_pool_exit(
void)
263 ghes_estatus_pool_free_chunk_page,
NULL);
267 static int ghes_estatus_pool_expand(
unsigned long len)
272 ghes_estatus_pool_size_request +=
PAGE_ALIGN(len);
274 if (size >= ghes_estatus_pool_size_request)
277 for (i = 0; i <
pages; i++) {
281 ret = gen_pool_add(ghes_estatus_pool, addr,
PAGE_SIZE, -1);
289 static void ghes_estatus_pool_shrink(
unsigned long len)
291 ghes_estatus_pool_size_request -=
PAGE_ALIGN(len);
307 error_block_length =
generic->error_block_length;
310 "Error status block length is too long: %u for "
311 "generic hardware error source: %d.\n",
312 error_block_length, generic->header.source_id);
324 apei_unmap_generic_address(&generic->error_status_address);
330 static void ghes_fini(
struct ghes *ghes)
333 apei_unmap_generic_address(&ghes->
generic->error_status_address);
343 static inline int ghes_severity(
int severity)
364 unsigned long flags = 0;
373 vaddr = ghes_ioremap_pfn_nmi(paddr >>
PAGE_SHIFT);
376 vaddr = ghes_ioremap_pfn_irq(paddr >>
PAGE_SHIFT);
379 trunk =
min(trunk, len);
388 ghes_iounmap_nmi(vaddr);
391 ghes_iounmap_irq(vaddr);
392 spin_unlock_irqrestore(&ghes_ioremap_lock_irq, flags);
397 static int ghes_read_estatus(
struct ghes *ghes,
int silent)
406 if (!silent && printk_ratelimit())
408 "Failed to read error status block address for hardware error source: %d.\n",
415 ghes_copy_tofrom_phys(ghes->
estatus, buf_paddr,
417 if (!ghes->
estatus->block_status)
424 len = apei_estatus_len(ghes->
estatus);
425 if (len <
sizeof(*ghes->
estatus))
427 if (len > ghes->
generic->error_block_length)
431 ghes_copy_tofrom_phys(ghes->
estatus + 1,
432 buf_paddr +
sizeof(*ghes->
estatus),
433 len -
sizeof(*ghes->
estatus), 1);
439 if (rc && !silent && printk_ratelimit())
441 "Failed to read error status block!\n");
445 static void ghes_clear_estatus(
struct ghes *ghes)
447 ghes->
estatus->block_status = 0;
451 sizeof(ghes->
estatus->block_status), 0);
467 #ifdef CONFIG_X86_MCE
471 #ifdef CONFIG_ACPI_APEI_MEMORY_FAILURE
481 #ifdef CONFIG_ACPI_APEI_PCIEAER
497 devfn, aer_severity);
505 static void __ghes_print_estatus(
const char *pfx,
510 unsigned int curr_seqno;
521 snprintf(pfx_seq,
sizeof(pfx_seq),
"%s{%u}" HW_ERR, pfx, curr_seqno);
522 printk(
"%s""Hardware error from APEI Generic Hardware Error Source: %d\n",
523 pfx_seq, generic->header.source_id);
527 static int ghes_print_estatus(
const char *pfx,
537 ratelimit = &ratelimit_corrected;
539 ratelimit = &ratelimit_uncorrected;
541 __ghes_print_estatus(pfx,
generic, estatus);
555 unsigned long long now;
559 len = apei_estatus_len(estatus);
568 if (
memcmp(estatus, cache_estatus, len))
594 len = apei_estatus_len(estatus);
602 memcpy(cache_estatus, estatus, len);
620 static void ghes_estatus_cache_rcu_free(
struct rcu_head *
head)
625 ghes_estatus_cache_free(cache);
628 static void ghes_estatus_cache_add(
636 new_cache = ghes_estatus_cache_alloc(
generic, estatus);
637 if (new_cache ==
NULL)
648 duration = now - cache->
time_in;
657 if (period > max_period) {
665 if (slot != -1 &&
cmpxchg(ghes_estatus_caches + slot,
666 slot_cache, new_cache) == slot_cache) {
668 call_rcu(&slot_cache->
rcu, ghes_estatus_cache_rcu_free);
670 ghes_estatus_cache_free(new_cache);
674 static int ghes_proc(
struct ghes *ghes)
678 rc = ghes_read_estatus(ghes, 0);
681 if (!ghes_estatus_cached(ghes->
estatus)) {
687 ghes_clear_estatus(ghes);
691 static void ghes_add_timer(
struct ghes *ghes)
694 unsigned long expire;
696 if (!g->
notify.poll_interval) {
706 static void ghes_poll_func(
unsigned long data)
708 struct ghes *ghes = (
void *)data;
712 ghes_add_timer(ghes);
715 static irqreturn_t ghes_irq_func(
int irq,
void *data)
717 struct ghes *ghes =
data;
720 rc = ghes_proc(ghes);
728 unsigned long event,
void *data)
731 int ret = NOTIFY_DONE;
734 list_for_each_entry_rcu(ghes, &ghes_sci,
list) {
735 if (!ghes_proc(ghes))
765 llnode = llist_del_all(&ghes_estatus_llist);
770 llnode = llist_nodes_reverse(llnode);
776 len = apei_estatus_len(estatus);
778 ghes_do_proc(estatus);
779 if (!ghes_estatus_cached(estatus)) {
780 generic = estatus_node->
generic;
781 if (ghes_print_estatus(
NULL,
generic, estatus))
782 ghes_estatus_cache_add(
generic, estatus);
784 gen_pool_free(ghes_estatus_pool, (
unsigned long)estatus_node,
790 static void ghes_print_queued_estatus(
void)
798 llnode = llist_del_all(&ghes_estatus_llist);
803 llnode = llist_nodes_reverse(llnode);
808 len = apei_estatus_len(estatus);
810 generic = estatus_node->
generic;
811 ghes_print_estatus(
NULL,
generic, estatus);
812 llnode = llnode->
next;
816 static int ghes_notify_nmi(
unsigned int cmd,
struct pt_regs *
regs)
818 struct ghes *ghes, *ghes_global =
NULL;
819 int sev, sev_global = -1;
823 list_for_each_entry_rcu(ghes, &ghes_nmi,
list) {
824 if (ghes_read_estatus(ghes, 1)) {
825 ghes_clear_estatus(ghes);
828 sev = ghes_severity(ghes->
estatus->error_severity);
829 if (sev > sev_global) {
841 ghes_print_queued_estatus();
847 panic(
"Fatal hardware error!");
850 list_for_each_entry_rcu(ghes, &ghes_nmi,
list) {
851 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
858 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
859 if (ghes_estatus_cached(ghes->
estatus))
862 len = apei_estatus_len(ghes->
estatus);
870 llist_add(&estatus_node->
llnode, &ghes_estatus_llist);
874 ghes_clear_estatus(ghes);
876 #ifdef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
886 .notifier_call = ghes_notify_sci,
889 static unsigned long ghes_esource_prealloc_size(
892 unsigned long block_length, prealloc_records, prealloc_size;
894 block_length =
min_t(
unsigned long, generic->error_block_length,
896 prealloc_records =
max_t(
unsigned long,
897 generic->records_to_preallocate, 1);
898 prealloc_size =
min_t(
unsigned long, block_length * prealloc_records,
901 return prealloc_size;
907 struct ghes *ghes =
NULL;
912 if (!generic->enabled)
915 switch (generic->notify.type) {
922 pr_warning(
GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n",
923 generic->header.source_id);
927 generic->notify.type, generic->header.source_id);
932 if (generic->error_block_length <
935 generic->error_block_length,
936 generic->header.source_id);
939 ghes = ghes_new(
generic);
945 switch (generic->notify.type) {
947 ghes->
timer.function = ghes_poll_func;
950 ghes_add_timer(ghes);
955 pr_err(
GHES_PFX "Failed to map GSI to IRQ for generic hardware error source: %d\n",
956 generic->header.source_id);
960 0,
"GHES IRQ", ghes)) {
961 pr_err(
GHES_PFX "Failed to register IRQ for generic hardware error source: %d\n",
962 generic->header.source_id);
968 if (list_empty(&ghes_sci))
970 list_add_rcu(&ghes->
list, &ghes_sci);
974 len = ghes_esource_prealloc_size(
generic);
975 ghes_estatus_pool_expand(len);
977 if (list_empty(&ghes_nmi))
980 list_add_rcu(&ghes->
list, &ghes_nmi);
986 platform_set_drvdata(ghes_dev, ghes);
1003 ghes = platform_get_drvdata(ghes_dev);
1007 switch (generic->notify.type) {
1016 list_del_rcu(&ghes->
list);
1017 if (list_empty(&ghes_sci))
1023 list_del_rcu(&ghes->
list);
1024 if (list_empty(&ghes_nmi))
1032 len = ghes_esource_prealloc_size(
generic);
1033 ghes_estatus_pool_shrink(len);
1043 platform_set_drvdata(ghes_dev,
NULL);
1053 .probe = ghes_probe,
1054 .remove = ghes_remove,
1057 static int __init ghes_init(
void)
1074 init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq);
1076 rc = ghes_ioremap_init();
1080 rc = ghes_estatus_pool_init();
1082 goto err_ioremap_exit;
1095 pr_info(
GHES_PFX "APEI firmware first mode is enabled by APEI bit and WHEA _OSC.\n");
1097 pr_info(
GHES_PFX "APEI firmware first mode is enabled by WHEA _OSC.\n");
1099 pr_info(
GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n");
1105 ghes_estatus_pool_exit();
1107 ghes_ioremap_exit();
1112 static void __exit ghes_exit(
void)
1115 ghes_estatus_pool_exit();
1116 ghes_ioremap_exit();