6 #include <linux/export.h>
7 #include <linux/kernel.h>
8 #include <linux/sched.h>
27 #include <linux/slab.h>
30 #include <asm/ptrace.h>
32 #include <asm/tlbflush.h>
33 #include <asm/mmu_context.h>
37 #include <asm/timer.h>
40 #include <asm/irq_regs.h>
42 #include <asm/pgtable.h>
43 #include <asm/oplib.h>
44 #include <asm/uaccess.h>
47 #include <asm/sections.h>
51 #include <asm/hypervisor.h>
82 "Cpu%dClkTck\t: %016lx\n",
88 static volatile unsigned long callin_flag = 0;
94 __local_per_cpu_offset = __per_cpu_offset(cpuid);
107 __asm__ __volatile__(
"membar #Sync\n\t"
108 "flush %%g6" : : :
"memory");
120 notify_cpu_starting(cpuid);
135 panic(
"SMP bolixed\n");
146 #define SLAVE (SMP_CACHE_BYTES/sizeof(unsigned long))
148 #define NUM_ROUNDS 64
152 static unsigned long go[
SLAVE + 1];
154 #define DEBUG_TICK_SYNC 0
156 static inline long get_delta (
long *rt,
long *master)
158 unsigned long best_t0 = 0, best_t1 = ~0
UL, best_tm = 0;
159 unsigned long tcenter,
t0,
t1,
tm;
172 if (t1 - t0 < best_t1 - best_t0)
173 best_t0 =
t0, best_t1 =
t1, best_tm =
tm;
176 *rt = best_t1 - best_t0;
177 *master = best_tm - best_t0;
180 tcenter = (best_t0/2 + best_t1/2);
181 if (best_t0 % 2 + best_t1 % 2 == 2)
183 return tcenter - best_tm;
188 long i,
delta, adj, adjust_latency = 0,
done = 0;
189 unsigned long flags, rt, master_time_stamp;
207 delta = get_delta(&rt, &master_time_stamp);
213 adjust_latency += -
delta;
214 adj = -delta + adjust_latency/4;
222 t[
i].master = master_time_stamp;
224 t[
i].lat = adjust_latency/4;
232 printk(
"rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
233 t[i].rt,
t[i].master,
t[i].diff,
t[i].lat);
237 "(last diff %ld cycles, maxerr %lu cycles)\n",
241 static void smp_start_sync_tick_client(
int cpu);
243 static void smp_synchronize_one_tick(
int cpu)
249 smp_start_sync_tick_client(cpu);
270 spin_unlock_irqrestore(&itc_sync_lock, flags);
273 #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
275 static unsigned long kimage_addr_to_ra(
void *
p)
277 unsigned long val = (
unsigned long) p;
282 static void __cpuinit ldom_startcpu_cpuid(
unsigned int cpu,
unsigned long thread_reg,
void **descrp)
284 extern unsigned long sparc64_ttable_tl0;
287 unsigned long trampoline_ra;
289 u64 tte_vaddr, tte_data;
290 unsigned long hv_err;
293 hdesc = kzalloc(
sizeof(*hdesc) +
318 hdesc->
maps[
i].vaddr = tte_vaddr;
319 hdesc->
maps[
i].tte = tte_data;
320 tte_vaddr += 0x400000;
321 tte_data += 0x400000;
327 kimage_addr_to_ra(&sparc64_ttable_tl0),
331 "gives error %lu\n", hv_err);
345 unsigned long entry =
346 (
unsigned long)(&sparc64_cpu_startup);
348 (
unsigned long)(&cpu_new_thread);
356 #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
358 ldom_startcpu_cpuid(cpu,
359 (
unsigned long) cpu_new_thread,
363 prom_startcpu_cpuid(cpu, entry, cookie);
370 for (timeout = 0; timeout < 50000; timeout++) {
379 printk(
"Processor %d is stuck.\n", cpu);
382 cpu_new_thread =
NULL;
396 cpu = (((cpu & 0x3c) << 1) |
397 ((cpu & 0x40) >> 4) |
401 target = (cpu << 14) | 0x70;
412 "wrpr %1, %2, %%pstate\n\t"
413 "stxa %4, [%0] %3\n\t"
414 "stxa %5, [%0+%8] %3\n\t"
416 "stxa %6, [%0+%8] %3\n\t"
418 "stxa %%g0, [%7] %3\n\t"
421 "ldxa [%%g1] 0x7f, %%g0\n\t"
425 "r" (data0),
"r" (data1),
"r" (data2),
"r" (target),
426 "r" (0x10),
"0" (tmp)
432 __asm__ __volatile__(
"ldxa [%%g0] %1, %0"
436 __asm__ __volatile__(
"wrpr %0, 0x0, %%pstate"
443 }
while (result & 0x1);
444 __asm__ __volatile__(
"wrpr %0, 0x0, %%pstate"
447 printk(
"CPU[%d]: mondo stuckage result[%016llx]\n",
462 __asm__ __volatile__(
"rdpr %%pstate, %0" :
"=r" (pstate));
468 for (i = 0; i <
cnt; i++)
469 spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
476 static void cheetah_xcall_deliver(
struct trap_per_cpu *tb,
int cnt)
489 __asm__ (
"rdpr %%ver, %0" :
"=r" (ver));
493 __asm__ __volatile__(
"rdpr %%pstate, %0" :
"=r" (pstate));
497 __asm__ __volatile__(
"wrpr %0, %1, %%pstate\n\t"
501 __asm__ __volatile__(
"stxa %0, [%3] %6\n\t"
502 "stxa %1, [%4] %6\n\t"
503 "stxa %2, [%5] %6\n\t"
506 :
"r" (mondo[0]),
"r" (mondo[1]),
"r" (mondo[2]),
507 "r" (0x40),
"r" (0x50),
"r" (0x60),
515 for (i = 0; i <
cnt; i++) {
522 target = (nr << 14) | 0x70;
524 busy_mask |= (0x1
UL << (nr * 2));
526 target |= (nack_busy_id << 24);
527 busy_mask |= (0x1
UL <<
531 "stxa %%g0, [%0] %1\n\t"
536 if (nack_busy_id == 32) {
545 u64 dispatch_stat, nack_mask;
548 stuck = 100000 * nack_busy_id;
549 nack_mask = busy_mask << 1;
551 __asm__ __volatile__(
"ldxa [%%g0] %1, %0"
552 :
"=r" (dispatch_stat)
554 if (!(dispatch_stat & (busy_mask | nack_mask))) {
555 __asm__ __volatile__(
"wrpr %0, 0x0, %%pstate"
559 for (i = 0; i <
cnt; i++) {
560 if (cpu_list[i] == 0xffff)
562 cpu_list[
i] = 0xffff;
573 }
while (dispatch_stat & busy_mask);
575 __asm__ __volatile__(
"wrpr %0, 0x0, %%pstate"
578 if (dispatch_stat & busy_mask) {
582 printk(
"CPU[%d]: mondo stuckage result[%016llx]\n",
585 int i, this_busy_nack = 0;
595 for (i = 0; i <
cnt; i++) {
603 check_mask = (0x2
UL << (2*
nr));
605 check_mask = (0x2
UL <<
607 if ((dispatch_stat & check_mask) == 0)
608 cpu_list[i] = 0xffff;
610 if (this_busy_nack == 64)
620 static void hypervisor_xcall_deliver(
struct trap_per_cpu *tb,
int cnt)
622 int retries, this_cpu, prev_sent,
i, saw_cpu_error;
634 int forward_progress, n_sent;
650 for (i = 0; i <
cnt; i++) {
651 if (
likely(cpu_list[i] == 0xffff))
655 forward_progress = 0;
656 if (n_sent > prev_sent)
657 forward_progress = 1;
666 for (i = 0; i <
cnt; i++) {
676 saw_cpu_error = (cpu + 1);
677 cpu_list[
i] = 0xffff;
681 goto fatal_mondo_error;
692 goto fatal_mondo_timeout;
702 goto fatal_mondo_cpu_error;
706 fatal_mondo_cpu_error:
708 "(including %d) were in error state\n",
709 this_cpu, saw_cpu_error - 1);
714 " progress after %d retries.\n",
716 goto dump_cpu_list_and_out;
722 "mondo_block_pa(%lx)\n",
725 dump_cpu_list_and_out:
727 for (i = 0; i <
cnt; i++)
728 printk(
"%u ", cpu_list[i]);
737 int this_cpu,
i,
cnt;
774 xcall_deliver_impl(tb, cnt);
785 u64 data0 = (((
u64)ctx)<<32 | (((
u64)func) & 0xffffffff));
787 xcall_deliver(data0, data1, data2, mask);
791 static void smp_cross_call(
unsigned long *
func,
u32 ctx,
u64 data1,
u64 data2)
793 smp_cross_call_masked(func, ctx, data1, data2, cpu_online_mask);
798 static void smp_start_sync_tick_client(
int cpu)
800 xcall_deliver((
u64) &xcall_sync_tick, 0, 0,
808 xcall_deliver((
u64) &xcall_call_function, 0, 0, mask);
815 xcall_deliver((
u64) &xcall_call_function_single, 0, 0,
821 clear_softint(1 << irq);
822 generic_smp_call_function_interrupt();
827 clear_softint(1 << irq);
828 generic_smp_call_function_single_interrupt();
831 static void tsb_sync(
void *
info)
843 tsb_context_switch(mm);
860 extern unsigned long xcall_kgdb_capture;
863 #ifdef DCACHE_ALIASING_POSSIBLE
864 extern unsigned long xcall_flush_dcache_page_cheetah;
868 #ifdef CONFIG_DEBUG_DCFLUSH
870 extern atomic_t dcpage_flushes_xcall;
873 static inline void __local_flush_dcache_page(
struct page *
page)
875 #ifdef DCACHE_ALIASING_POSSIBLE
878 page_mapping(page) !=
NULL));
880 if (page_mapping(page) !=
NULL &&
893 #ifdef CONFIG_DEBUG_DCFLUSH
899 if (cpu == this_cpu) {
900 __local_flush_dcache_page(page);
906 data0 = ((
u64)&xcall_flush_dcache_page_spitfire);
907 if (page_mapping(page) !=
NULL)
908 data0 |= ((
u64)1 << 32);
910 #ifdef DCACHE_ALIASING_POSSIBLE
911 data0 = ((
u64)&xcall_flush_dcache_page_cheetah);
915 xcall_deliver(data0,
__pa(pg_addr),
917 #ifdef CONFIG_DEBUG_DCFLUSH
936 #ifdef CONFIG_DEBUG_DCFLUSH
942 data0 = ((
u64)&xcall_flush_dcache_page_spitfire);
943 if (page_mapping(page) !=
NULL)
944 data0 |= ((
u64)1 << 32);
946 #ifdef DCACHE_ALIASING_POSSIBLE
947 data0 = ((
u64)&xcall_flush_dcache_page_cheetah);
951 xcall_deliver(data0,
__pa(pg_addr),
953 #ifdef CONFIG_DEBUG_DCFLUSH
957 __local_flush_dcache_page(page);
967 clear_softint(1 << irq);
990 smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
996 smp_cross_call(&xcall_kgdb_capture, 0, 0, 0);
1002 smp_cross_call(&xcall_fetch_glob_regs, 0, 0, 0);
1008 sun4v_chip_type >= SUN4V_CHIP_NIAGARA4)
1009 smp_cross_call(&xcall_fetch_glob_pmu_n4, 0, 0, 0);
1011 smp_cross_call(&xcall_fetch_glob_pmu, 0, 0, 0);
1063 cpumask_copy(mm_cpumask(mm),
cpumask_of(cpu));
1064 goto local_flush_and_out;
1067 smp_cross_call_masked(&xcall_flush_tlb_mm,
1071 local_flush_and_out:
1083 cpumask_copy(mm_cpumask(mm),
cpumask_of(cpu));
1085 smp_cross_call_masked(&xcall_flush_tlb_pending,
1086 ctx, nr, (
unsigned long) vaddrs,
1099 smp_cross_call(&xcall_flush_tlb_kernel_range,
1112 static unsigned long penguins_are_doing_time;
1121 #ifdef CAPTURE_DEBUG
1122 printk(
"CPU[%d]: Sending penguins to jail...",
1125 penguins_are_doing_time = 1;
1127 smp_cross_call(&xcall_capture, 0, 0, 0);
1128 while (
atomic_read(&smp_capture_registry) != ncpus)
1130 #ifdef CAPTURE_DEBUG
1139 #ifdef CAPTURE_DEBUG
1140 printk(
"CPU[%d]: Giving pardon to "
1141 "imprisoned penguins\n",
1144 penguins_are_doing_time = 0;
1157 clear_softint(1 << irq);
1161 __asm__ __volatile__(
"flushw");
1165 while (penguins_are_doing_time)
1190 xcall_deliver_impl = spitfire_xcall_deliver;
1192 xcall_deliver_impl = cheetah_xcall_deliver;
1194 xcall_deliver_impl = hypervisor_xcall_deliver;
1239 cpumask_set_cpu(cpu, &smp_commenced_mask);
1249 smp_synchronize_one_tick(cpu);
1255 #ifdef CONFIG_HOTPLUG_CPU
1256 void cpu_play_dead(
void)
1276 cpumask_clear_cpu(cpu, &smp_commenced_mask);
1282 "rdpr %%pstate, %0\n\t"
1283 "wrpr %0, %1, %%pstate"
1302 cpumask_clear_cpu(cpu, &
per_cpu(cpu_sibling_map, i));
1303 cpumask_clear(&
per_cpu(cpu_sibling_map, cpu));
1330 for (i = 0; i < 100; i++) {
1339 #if defined(CONFIG_SUN_LDOMS)
1340 unsigned long hv_err;
1349 }
while (--limit > 0);
1366 xcall_deliver((
u64) &xcall_receive_signal, 0, 0,
1372 clear_softint(1 << irq);
1396 static void *
__init pcpu_alloc_bootmem(
unsigned int cpu,
size_t size,
1400 #ifdef CONFIG_NEED_MULTIPLE_NODES
1406 pr_info(
"cpu %d has no node %d or node-local memory\n",
1408 pr_debug(
"per cpu data for cpu%d %lu bytes at %016lx\n",
1409 cpu, size,
__pa(ptr));
1413 pr_debug(
"per cpu data for cpu%d %lu bytes on node%d at "
1414 "%016lx\n", cpu, size, node,
__pa(ptr));
1422 static void __init pcpu_free_bootmem(
void *ptr,
size_t size)
1427 static int __init pcpu_cpu_distance(
unsigned int from,
unsigned int to)
1435 static void __init pcpu_populate_pte(
unsigned long addr)
1460 unsigned long delta;
1471 pr_warning(
"PERCPU: %s allocator failed (%d), "
1472 "falling back to page size\n",
1481 panic(
"cannot initialize percpu area (err=%d)", rc);