73 #include <linux/types.h>
75 #include <linux/sched.h>
81 #include <linux/module.h>
82 #include <linux/kernel.h>
90 #include <asm/delay.h>
91 #include <asm/machvec.h>
94 #include <asm/ptrace.h>
97 #include <asm/kexec.h>
100 #include <asm/hw_irq.h>
106 #if defined(IA64_MCA_DEBUG_INFO)
107 # define IA64_MCA_DEBUG(fmt...) printk(fmt)
109 # define IA64_MCA_DEBUG(fmt...)
112 #define NOTIFY_INIT(event, regs, arg, spin) \
114 if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \
115 == NOTIFY_STOP) && ((spin) == 1)) \
116 ia64_mca_spin(__func__); \
119 #define NOTIFY_MCA(event, regs, arg, spin) \
121 if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \
122 == NOTIFY_STOP) && ((spin) == 1)) \
123 ia64_mca_spin(__func__); \
139 static int monarch_cpu = -1;
143 #define MAX_CPE_POLL_INTERVAL (15*60*HZ)
144 #define MIN_CPE_POLL_INTERVAL (2*60*HZ)
145 #define CMC_POLL_INTERVAL (1*60*HZ)
146 #define CPE_HISTORY_LENGTH 5
147 #define CMC_HISTORY_LENGTH 5
158 static int cmc_polling_enabled = 1;
166 static int cpe_poll_enabled = 1;
176 #define mprintk(fmt...) ia64_mca_printk(fmt)
178 #define MLOGBUF_SIZE (512+256*NR_CPUS)
179 #define MLOGBUF_MSGMAX 256
183 static unsigned long mlogbuf_start;
184 static unsigned long mlogbuf_end;
185 static unsigned int mlogbuf_finished = 0;
186 static unsigned long mlogbuf_timestamp = 0;
188 static int loglevel_save = -1;
189 #define BREAK_LOGLEVEL(__console_loglevel) \
190 oops_in_progress = 1; \
191 if (loglevel_save < 0) \
192 loglevel_save = __console_loglevel; \
193 __console_loglevel = 15;
195 #define RESTORE_LOGLEVEL(__console_loglevel) \
196 if (loglevel_save >= 0) { \
197 __console_loglevel = loglevel_save; \
198 loglevel_save = -1; \
200 mlogbuf_finished = 0; \
201 oops_in_progress = 0;
214 printed_len =
vscnprintf(temp_buf,
sizeof(temp_buf), fmt, args);
222 spin_lock(&mlogbuf_wlock);
223 for (p = temp_buf; *
p; p++) {
225 if (next != mlogbuf_start) {
226 mlogbuf[mlogbuf_end] = *
p;
233 mlogbuf[mlogbuf_end] =
'\0';
234 spin_unlock(&mlogbuf_wlock);
249 unsigned int printed_len;
252 while (mlogbuf_start != mlogbuf_end) {
259 index = mlogbuf_start;
260 while (index != mlogbuf_end) {
272 mlogbuf_start =
index;
274 mlogbuf_timestamp = 0;
275 spin_unlock_irqrestore(&mlogbuf_rlock, flags);
286 static void ia64_mlogbuf_finish(
int wait)
293 "MCA/INIT might be dodgy or fail.\n");
299 printk(
"Delaying for 5 seconds...\n");
302 mlogbuf_finished = 1;
308 static void ia64_mlogbuf_dump_from_init(
void)
310 if (mlogbuf_finished)
313 if (mlogbuf_timestamp &&
316 " and the system seems to be messed up.\n");
317 ia64_mlogbuf_finish(0);
321 if (!spin_trylock(&mlogbuf_rlock)) {
323 "Generated messages other than stack dump will be "
324 "buffered to mlogbuf and will be printed later.\n");
326 "this INIT, wait 30sec and assert INIT again.\n");
327 if (!mlogbuf_timestamp)
331 spin_unlock(&mlogbuf_rlock);
336 ia64_mca_spin(
const char *
func)
339 ia64_mlogbuf_finish(0);
347 #define IA64_MAX_LOGS 2
348 #define IA64_MAX_LOG_TYPES 4
360 #define IA64_LOG_ALLOCATE(it, size) \
361 {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
362 (ia64_err_rec_t *)alloc_bootmem(size); \
363 ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
364 (ia64_err_rec_t *)alloc_bootmem(size);}
365 #define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
366 #define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
367 #define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
368 #define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index
369 #define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index
370 #define IA64_LOG_INDEX_INC(it) \
371 {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \
372 ia64_state_log[it].isl_count++;}
373 #define IA64_LOG_INDEX_DEC(it) \
374 ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
375 #define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
376 #define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
377 #define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
386 ia64_log_init(
int sal_info_type)
394 max_size = ia64_sal_get_state_info_size(sal_info_type);
417 ia64_log_get(
int sal_info_type,
u8 **
buffer,
int irq_safe)
428 total_len = ia64_sal_get_state_info(sal_info_type, (
u64 *)log_buffer);
434 IA64_MCA_DEBUG(
"%s: SAL error record type %d retrieved. Record length = %ld\n",
435 __func__, sal_info_type, total_len);
437 *buffer = (
u8 *) log_buffer;
455 ia64_mca_log_sal_error_record(
int sal_info_type)
461 #ifdef IA64_MCA_DEBUG_INFO
462 static const char *
const rec_name[] = {
"MCA",
"INIT",
"CMC",
"CPE" };
465 size = ia64_log_get(sal_info_type, &buffer, irq_safe);
474 sal_info_type <
ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] :
"UNKNOWN");
479 ia64_sal_clear_state_info(sal_info_type);
501 u64 curr_start, curr_end;
504 while (curr <= last) {
508 if ((ip >= curr_start) && (ip <= curr_end)) {
532 ia64_mca_cpe_int_handler (
int cpe_irq,
void *
arg)
544 spin_lock(&cpe_history_lock);
545 if (!cpe_poll_enabled && cpe_vector >= 0) {
551 if (now - cpe_history[i] <=
HZ)
556 if (count >= CPE_HISTORY_LENGTH) {
558 cpe_poll_enabled = 1;
559 spin_unlock(&cpe_history_lock);
567 printk(
KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n");
574 cpe_history[index++] = now;
575 if (index == CPE_HISTORY_LENGTH)
579 spin_unlock(&cpe_history_lock);
612 "Error interrupt vector with SAL (status %ld)\n", isrv.status);
617 "vector %#x registered\n", __func__, cpev);
644 IA64_MCA_DEBUG(
"%s: CPU %d corrected machine check vector %#x registered.\n",
664 ia64_mca_cmc_vector_disable (
void *
dummy)
673 IA64_MCA_DEBUG(
"%s: CPU %d corrected machine check vector %#x disabled.\n",
690 ia64_mca_cmc_vector_enable (
void *dummy)
699 IA64_MCA_DEBUG(
"%s: CPU %d corrected machine check vector %#x enabled.\n",
736 ia64_mca_wakeup(
int cpu)
750 ia64_mca_wakeup_all(
void)
757 ia64_mca_wakeup(cpu);
776 ia64_mca_rendez_int_handler(
int rendez_irq,
void *arg)
792 ia64_sal_mc_rendez();
823 ia64_mca_wakeup_int_handler(
int wakeup_irq,
void *arg)
855 copy_reg(
const u64 *fr,
u64 fnat,
unsigned long *
tr,
unsigned long *tnat)
857 u64 fslot, tslot, nat;
859 fslot = ((
unsigned long)fr >> 3) & 63;
860 tslot = ((
unsigned long)tr >> 3) & 63;
861 *tnat &= ~(1
UL << tslot);
862 nat = (fnat >> fslot) & 1;
863 *tnat |= (nat << tslot);
873 ia64_mca_modify_comm(
const struct task_struct *previous_current)
875 char *
p, comm[
sizeof(
current->comm)];
876 if (previous_current->
pid)
877 snprintf(comm,
sizeof(comm),
"%s %d",
881 if ((p =
strchr(previous_current->
comm,
' ')))
882 l = p - previous_current->
comm;
885 snprintf(comm,
sizeof(comm),
"%s %*s %d",
964 ia64_mca_modify_original_stack(
struct pt_regs *regs,
976 unsigned size =
sizeof(
struct pt_regs) +
978 unsigned long *old_bspstore, *old_bsp;
979 unsigned long *new_bspstore, *new_bsp;
980 unsigned long old_unat, old_rnat, new_rnat, nat;
1004 if (va.
f.reg == 0) {
1011 if (va.
f.reg == 0) {
1016 if (va.
f.reg == 0) {
1031 old_bspstore = (
unsigned long *)ar_bspstore;
1032 old_bsp = (
unsigned long *)ar_bsp;
1033 slots = ia64_rse_num_regs(old_bspstore, old_bsp);
1035 new_bsp = ia64_rse_skip_regs(new_bspstore, slots);
1036 regs->
loadrs = (new_bsp - new_bspstore) * 8 << 16;
1040 msg =
"occurred in user space";
1044 ia64_mca_modify_comm(previous_current);
1049 msg =
"inconsistent previous current and r13";
1055 msg =
"inconsistent r12 and r13";
1059 msg =
"inconsistent ar.bspstore and r13";
1062 va.
p = old_bspstore;
1064 msg =
"old_bspstore is in the wrong region";
1068 msg =
"inconsistent ar.bsp and r13";
1071 size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8;
1072 if (ar_bspstore + size > r12) {
1073 msg =
"no room for blocked state";
1078 ia64_mca_modify_comm(previous_current);
1084 p = (
char *)r12 -
sizeof(*regs);
1085 old_regs = (
struct pt_regs *)p;
1086 memcpy(old_regs, regs,
sizeof(*regs));
1089 finish_pt_regs(old_regs, sos, &old_unat);
1109 memcpy(old_sw, sw,
sizeof(*sw));
1116 old_sw->
b0 = (
u64)ia64_leave_kernel;
1121 previous_current->
thread.ksp = (
u64)p - 16;
1138 if (ia64_rse_is_rnat_slot(new_bspstore)) {
1141 if (ia64_rse_is_rnat_slot(old_bspstore)) {
1142 *old_bspstore++ = old_rnat;
1145 nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1
UL;
1146 old_rnat &= ~(1
UL << ia64_rse_slot_num(old_bspstore));
1147 old_rnat |= (nat << ia64_rse_slot_num(old_bspstore));
1148 *old_bspstore++ = *new_bspstore++;
1154 return previous_current;
1160 finish_pt_regs(regs, sos, &old_unat);
1161 return previous_current;
1173 ia64_wait_for_slaves(
int monarch,
const char *type)
1180 for (i = 0; i < 5000; i++) {
1199 ia64_mlogbuf_finish(0);
1221 static void mca_insert_tr(
u64 iord)
1233 psr = ia64_clear_ic();
1238 if (old_rr != p->
rr) {
1242 ia64_ptr(iord, p->
ifa, p->
itir >> 2);
1245 ia64_itr(0x1, i, p->
ifa, p->
pte, p->
itir >> 2);
1249 ia64_itr(0x2, i, p->
ifa, p->
pte, p->
itir >> 2);
1252 if (old_rr != p->
rr) {
1302 previous_current = ia64_mca_modify_original_stack(regs, sw, sos,
"MCA");
1308 ia64_wait_for_slaves(cpu,
"MCA");
1317 ia64_mca_wakeup_all();
1341 ia64_mlogbuf_finish(1);
1375 static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd);
1376 static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd);
1393 ia64_mca_cmc_int_handler(
int cmc_irq,
void *arg)
1399 IA64_MCA_DEBUG(
"%s: received interrupt vector = %#x on CPU %d\n",
1405 spin_lock(&cmc_history_lock);
1406 if (!cmc_polling_enabled) {
1411 if (now - cmc_history[i] <=
HZ)
1416 if (count >= CMC_HISTORY_LENGTH) {
1418 cmc_polling_enabled = 1;
1419 spin_unlock(&cmc_history_lock);
1424 ia64_mca_cmc_vector_disable(
NULL);
1432 printk(
KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n");
1439 cmc_history[index++] = now;
1440 if (index == CMC_HISTORY_LENGTH)
1444 spin_unlock(&cmc_history_lock);
1468 ia64_mca_cmc_int_caller(
int cmc_irq,
void *arg)
1470 static int start_count = -1;
1476 if (start_count == -1)
1479 ia64_mca_cmc_int_handler(cmc_irq, arg);
1481 cpuid = cpumask_next(cpuid+1, cpu_online_mask);
1483 if (cpuid < nr_cpu_ids) {
1491 cmc_polling_enabled = 0;
1514 ia64_mca_cmc_poll (
unsigned long dummy)
1537 ia64_mca_cpe_int_caller(
int cpe_irq,
void *arg)
1539 static int start_count = -1;
1546 if (start_count == -1)
1549 ia64_mca_cpe_int_handler(cpe_irq, arg);
1551 cpuid = cpumask_next(cpuid+1, cpu_online_mask);
1562 }
else if (cpe_vector < 0) {
1569 cpe_poll_enabled = 0;
1572 if (cpe_poll_enabled)
1573 mod_timer(&cpe_poll_timer, jiffies + poll_time);
1591 ia64_mca_cpe_poll (
unsigned long dummy)
1618 ia64_mlogbuf_dump_from_init();
1630 printk(
" %d (cpu %d task 0x%p)", g->
pid, task_cpu(g), g);
1680 previous_current = ia64_mca_modify_original_stack(regs, sw, sos,
"INIT");
1731 mprintk(
"Slave on cpu %d returning to normal service.\n", cpu);
1747 mprintk(
"Delaying for 5 seconds...\n");
1749 ia64_wait_for_slaves(cpu,
"INIT");
1757 mprintk(
"\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu);
1765 ia64_mca_disable_cpe_polling(
char *
str)
1767 cpe_poll_enabled = 0;
1771 __setup(
"disable_cpe_poll", ia64_mca_disable_cpe_polling);
1773 static struct irqaction cmci_irqaction = {
1774 .handler = ia64_mca_cmc_int_handler,
1779 static struct irqaction cmcp_irqaction = {
1780 .handler = ia64_mca_cmc_int_caller,
1785 static struct irqaction mca_rdzv_irqaction = {
1786 .handler = ia64_mca_rendez_int_handler,
1791 static struct irqaction mca_wkup_irqaction = {
1792 .handler = ia64_mca_wakeup_int_handler,
1798 static struct irqaction mca_cpe_irqaction = {
1799 .
handler = ia64_mca_cpe_int_handler,
1804 static struct irqaction mca_cpep_irqaction = {
1805 .
handler = ia64_mca_cpe_int_caller,
1818 format_mca_init_stack(
void *mca_data,
unsigned long offset,
1819 const char *type,
int cpu)
1832 INIT_LIST_HEAD(&p->
tasks);
1854 static int first_time = 1;
1864 data = mca_bootmem();
1870 panic(
"Could not allocate MCA memory for cpu %d\n",
1899 static void __cpuinit ia64_mca_cmc_vector_adjust(
void *dummy)
1901 unsigned long flags;
1904 if (!cmc_polling_enabled)
1905 ia64_mca_cmc_vector_enable(
NULL);
1913 int hotcpu = (
unsigned long) hcpu;
1926 .notifier_call = mca_cpu_callback
1986 "%ld to %ld milliseconds\n", timeout, isrv.
v0);
1992 "with SAL (status %ld)\n", rc);
2004 "(status %ld)\n", rc);
2008 IA64_MCA_DEBUG(
"%s: registered MCA rendezvous spinloop and wakeup mech.\n", __func__);
2025 "(status %ld)\n", rc);
2029 IA64_MCA_DEBUG(
"%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __func__,
2054 "(status %ld)\n", rc);
2058 printk(
KERN_ERR "Failed to register default monarch INIT process\n");
2062 IA64_MCA_DEBUG(
"%s: registered OS INIT handler with SAL\n", __func__);
2088 ia64_mca_late_init(
void)
2116 cmc_poll_timer.function = ia64_mca_cmc_poll;
2119 cmc_polling_enabled = 0;
2128 cpe_poll_timer.function = ia64_mca_cpe_poll;
2133 if (cpe_vector >= 0) {
2135 irq = local_vector_to_irq(cpe_vector);
2137 cpe_poll_enabled = 0;
2147 "interrupt handler, vector %d\n",
2148 __func__, cpe_vector);
2151 if (cpe_poll_enabled) {
2152 ia64_mca_cpe_poll(0
UL);