28 #include <asm/cputable.h>
32 #include <asm/processor.h>
34 #include <asm/ptrace.h>
39 #include "../platforms/cell/interrupt.h"
42 #define PPU_PROFILING 0
43 #define SPU_PROFILING_CYCLES 1
44 #define SPU_PROFILING_EVENTS 2
46 #define SPU_EVENT_NUM_START 4100
47 #define SPU_EVENT_NUM_STOP 4399
48 #define SPU_PROFILE_EVENT_ADDR 4363
49 #define SPU_PROFILE_EVENT_ADDR_MASK_A 0x146
50 #define SPU_PROFILE_EVENT_ADDR_MASK_B 0x186
52 #define NUM_SPUS_PER_NODE 8
53 #define SPU_CYCLES_EVENT_NUM 2
55 #define PPU_CYCLES_EVENT_NUM 1
56 #define PPU_CYCLES_GRP_NUM 1
59 #define CBE_COUNT_ALL_CYCLES 0x42800000
64 #define NUM_DEBUG_BUS_WORDS 4
65 #define NUM_INPUT_BUS_WORDS 2
67 #define MAX_SPU_COUNT 0xFFFFFF
73 #define NUM_INTERVAL_CYC 0xFFFFFFFF - 10
80 static unsigned int spu_cycle_reset;
81 static unsigned int profiling_mode;
82 static int spu_evnt_phys_spu_indx;
134 #define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12)
135 #define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4)
136 #define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8)
137 #define GET_POLARITY(x) ((x & 0x00000002) >> 1)
138 #define GET_COUNT_CYCLES(x) (x & 0x00000001)
139 #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
168 static u32 hdw_thread;
170 static u32 virt_cntr_inter_mask;
172 static struct timer_list timer_spu_event_swap;
180 static int pm_rtas_token;
181 static int spu_rtas_token;
184 static int num_counters;
185 static int oprofile_running;
188 static u32 ctr_enabled;
202 passthru, paddr >> 32, paddr & 0xffffffff, length);
205 static void pm_rtas_reset_signals(
u32 node)
221 pm_signal_local.signal_group = 21;
222 pm_signal_local.bus_word = 1;
223 pm_signal_local.sub_unit = 0;
224 pm_signal_local.bit = 0;
240 static int pm_rtas_activate_signals(
u32 node,
u32 count)
255 for (j = 0; j <
count; j++) {
260 pm_signal_local[
i].signal_group
287 static void set_pm_event(
u32 ctr,
int event,
u32 unit_mask)
304 pm_regs.pm07_cntrl[ctr] = 0;
312 signal_bit = (
event % 100);
320 pm_regs.pm07_cntrl[ctr] = 0;
334 if (input_control == 0) {
335 if (signal_bit > 31) {
339 else if (bus_word == 0
xc)
350 pm_regs.pm07_cntrl[ctr] = 0;
355 if (bus_word & (1 << i)) {
356 pm_regs.debug_bus_control |=
357 (bus_type << (30 - (2 *
i)));
360 if (input_bus[j] == 0xff) {
362 pm_regs.group_control |=
363 (i << (30 - (2 *
j)));
374 static void write_pm_cntrl(
int cpu)
382 if (pm_regs.pm_cntrl.enable == 1)
385 if (pm_regs.pm_cntrl.stop_at_max == 1)
388 if (pm_regs.pm_cntrl.trace_mode != 0)
391 if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
393 if (pm_regs.pm_cntrl.freeze == 1)
418 pm_regs.pm_cntrl.count_mode =
424 pm_regs.pm_cntrl.count_mode =
455 static void cell_virtual_cntr(
unsigned long data)
457 int i, prev_hdw_thread, next_hdw_thread;
468 prev_hdw_thread = hdw_thread;
471 hdw_thread = 1 ^ hdw_thread;
472 next_hdw_thread = hdw_thread;
474 pm_regs.group_control = 0;
475 pm_regs.debug_bus_control = 0;
484 for (i = 0; i < num_counters; i++)
486 pmc_cntrl[next_hdw_thread][i].evnts,
487 pmc_cntrl[next_hdw_thread][i].masks);
503 for (i = 0; i < num_counters; i++) {
504 per_cpu(pmc_values, cpu + prev_hdw_thread)[
i]
507 if (
per_cpu(pmc_values, cpu + next_hdw_thread)[i]
524 next_hdw_thread)[i]);
532 for (i = 0; i < num_counters; i++) {
533 if (pmc_cntrl[next_hdw_thread][i].
enabled) {
548 virt_cntr_inter_mask);
552 spin_unlock_irqrestore(&cntr_lock, flags);
557 static void start_virt_cntrs(
void)
560 timer_virt_cntr.function = cell_virtual_cntr;
561 timer_virt_cntr.data = 0
UL;
562 timer_virt_cntr.expires =
jiffies +
HZ / 10;
569 spu_cycle_reset = ctr[0].
count;
575 spu_rtas_token =
rtas_token(
"ibm,cbe-spu-perftools");
577 if (
unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
579 "%s: rtas token ibm,cbe-spu-perftools unknown\n",
593 static void spu_evnt_swap(
unsigned long data)
596 int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
613 cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
616 spu_evnt_phys_spu_indx = 0;
629 + cur_spu_evnt_phys_spu_indx;
631 + spu_evnt_phys_spu_indx;
640 spu_pm_cnt[cur_phys_spu]
647 if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
661 "SPU event swap\n", __func__);
667 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
675 spin_unlock_irqrestore(&cntr_lock, flags);
678 mod_timer(&timer_spu_event_swap, jiffies +
HZ / 25);
681 static void start_spu_event_swap(
void)
684 timer_spu_event_swap.function = spu_evnt_swap;
685 timer_spu_event_swap.data = 0
UL;
686 timer_spu_event_swap.expires =
jiffies +
HZ / 25;
697 spu_evnt_phys_spu_indx = 0;
704 pm_rtas_token =
rtas_token(
"ibm,cbe-perftools");
706 if (
unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
708 "%s: rtas token ibm,cbe-perftools unknown\n",
717 pm_regs.pm_cntrl.trace_buf_ovflw = 1;
723 pm_regs.pm_cntrl.trace_mode = 2;
725 pm_regs.pm_cntrl.spu_addr_trace = 0x1;
745 set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
747 reset_value[0] = 0xFFFFFFFF - ctr[0].
count;
754 spu_pm_cnt[i] = reset_value[0];
765 num_counters = num_ctrs;
769 "%s: Oprofile, number of specified events " \
770 "exceeds number of physical counters\n",
778 for (i = 0; i < num_ctrs; ++
i) {
780 pmc_cntrl[0][
i].evnts = ctr[
i].
event;
782 pmc_cntrl[0][
i].enabled = ctr[
i].
enabled;
783 pmc_cntrl[0][
i].vcntr =
i;
793 for (i = 0; i < num_ctrs; ++i) {
794 if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111))
795 pmc_cntrl[1][
i].evnts = ctr[
i].
event + 19;
796 else if (ctr[i].event == 2203)
797 pmc_cntrl[1][
i].evnts = ctr[
i].
event;
798 else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215))
799 pmc_cntrl[1][
i].evnts = ctr[
i].
event + 16;
801 pmc_cntrl[1][
i].evnts = ctr[
i].
event;
804 pmc_cntrl[1][
i].enabled = ctr[
i].
enabled;
805 pmc_cntrl[1][
i].vcntr =
i;
818 for (i = 0; i < num_counters; ++
i) {
822 reset_value[
i] = 0xFFFFFFFF - ctr[
i].
count;
824 pmc_cntrl[0][i].evnts,
825 pmc_cntrl[0][i].masks);
828 ctr_enabled |= (1 <<
i);
834 for (i = 0; i < num_counters; ++i) {
835 per_cpu(pmc_values, cpu)[
i] = reset_value[
i];
852 pm_regs.group_control = 0;
853 pm_regs.debug_bus_control = 0;
854 pm_regs.pm_cntrl.stop_at_max = 1;
855 pm_regs.pm_cntrl.trace_mode = 0;
856 pm_regs.pm_cntrl.freeze = 1;
857 pm_regs.pm_cntrl.trace_buf_ovflw = 0;
858 pm_regs.pm_cntrl.spu_addr_trace = 0;
866 pm_rtas_token =
rtas_token(
"ibm,cbe-perftools");
868 if (
unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
870 "%s: rtas token ibm,cbe-perftools unknown\n",
877 ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
881 spu_cycle_reset = ctr[0].
count;
889 cell_reg_setup_spu_events(ctr, sys, num_ctrs);
892 ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
930 for (i = 0; i < num_counters; ++
i) {
931 if (ctr_enabled & (1 << i)) {
958 #define MAXLFSR 0xFFFFFF
961 static int initial_lfsr[] = {
962 8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
963 15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
964 4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
965 3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
966 9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
967 2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
968 3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
969 14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
970 11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
971 6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
972 15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
973 7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
974 16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
975 15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
976 15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
977 10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
978 3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
979 3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
980 8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
981 8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
982 4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
983 16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
984 2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
985 14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
986 1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
987 6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
988 10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
989 10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
990 14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
991 7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
992 9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
993 14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
994 13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
995 5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
996 3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
997 6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
998 7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
999 6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
1055 #define V2_16 (0x1 << 16)
1056 #define V2_19 (0x1 << 19)
1057 #define V2_22 (0x1 << 22)
1059 static int calculate_lfsr(
int n)
1069 else if (((n -
V2_16) >> 19) == 0)
1070 index = ((n -
V2_16) >> 12) + 1;
1072 index = ((n -
V2_16 -
V2_19) >> 15 ) + 1 + 128;
1079 if ((index >=
ENTRIES) || (index < 0))
1082 return initial_lfsr[
index];
1085 static int pm_rtas_activate_spu_profiling(
u32 node)
1094 for (i = 0; i <
ARRAY_SIZE(pm_signal_local); i++) {
1096 pm_signal_local[
i].signal_group = 41;
1098 pm_signal_local[
i].bus_word = 1 << i / 2;
1100 pm_signal_local[
i].sub_unit =
i;
1101 pm_signal_local[
i].bit = 63;
1118 #ifdef CONFIG_CPU_FREQ
1120 oprof_cpufreq_notify(
struct notifier_block *nb,
unsigned long val,
void *data)
1144 static void cell_global_stop_spu_cycles(
void)
1147 unsigned int lfsr_value;
1150 oprofile_running = 0;
1153 #ifdef CONFIG_CPU_FREQ
1166 lfsr_value = 0x8f100000;
1174 "%s: rtas call ibm,cbe-spu-perftools " \
1175 "failed, return = %d\n",
1176 __func__, rtn_value);
1186 static void cell_global_stop_spu_events(
void)
1189 oprofile_running = 0;
1212 static void cell_global_stop_ppu(
void)
1222 oprofile_running = 0;
1241 static void cell_global_stop(
void)
1244 cell_global_stop_ppu();
1246 cell_global_stop_spu_events();
1248 cell_global_stop_spu_cycles();
1254 unsigned int lfsr_value;
1258 unsigned int cpu_khzfreq = 0;
1265 #ifdef CONFIG_CPU_FREQ
1294 lfsr_value = calculate_lfsr(spu_cycle_reset);
1297 if (lfsr_value == 0)
1298 lfsr_value = calculate_lfsr(1);
1300 lfsr_value = lfsr_value << 8;
1321 "%s: rtas call ibm,cbe-spu-perftools failed, " \
1322 "return = %d\n", __func__, ret);
1332 oprofile_running = 1;
1336 cell_global_stop_spu_cycles();
1344 u32 interrupt_mask = 0;
1374 if (ctr_enabled & 1) {
1376 enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
1396 start_spu_event_swap();
1398 oprofile_running = 1;
1407 u32 interrupt_mask = 0;
1419 for (i = 0; i < num_counters; ++
i) {
1420 if (ctr_enabled & (1 << i)) {
1422 enable_ctr(cpu, i, pm_regs.pm07_cntrl);
1435 virt_cntr_inter_mask = interrupt_mask;
1436 oprofile_running = 1;
1453 return cell_global_start_spu_cycles(ctr);
1455 return cell_global_start_spu_events(ctr);
1457 return cell_global_start_ppu(ctr);
1488 static void cell_handle_interrupt_spu(
struct pt_regs *
regs,
1494 u64 trace_buffer[2];
1495 u64 last_trace_buffer;
1498 unsigned long sample_array_lock_flags;
1500 unsigned long flags;
1514 trace_entry = 0xfedcba;
1515 last_trace_buffer = 0xdeadbeaf;
1517 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1557 trace_entry = trace_buffer[0]
1558 & 0x00000000FFFF0000;
1563 sample = trace_entry >> 14;
1564 last_trace_buffer = trace_buffer[0];
1566 spu_num = spu_evnt_phys_spu_indx
1573 sample_array_lock_flags);
1575 spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
1576 sample_array_lock_flags);
1586 virt_cntr_inter_mask);
1600 write_pm_cntrl(cpu);
1603 spin_unlock_irqrestore(&cntr_lock, flags);
1606 static void cell_handle_interrupt_ppu(
struct pt_regs *regs,
1612 unsigned long flags = 0;
1642 if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1646 for (i = 0; i < num_counters; ++
i) {
1648 && ctr[i].enabled) {
1663 virt_cntr_inter_mask);
1676 spin_unlock_irqrestore(&cntr_lock, flags);
1679 static void cell_handle_interrupt(
struct pt_regs *regs,
1683 cell_handle_interrupt_ppu(regs, ctr);
1685 cell_handle_interrupt_spu(regs, ctr);
1693 static int cell_sync_start(
void)
1702 static int cell_sync_stop(
void)
1712 .reg_setup = cell_reg_setup,
1713 .cpu_setup = cell_cpu_setup,
1714 .global_start = cell_global_start,
1715 .global_stop = cell_global_stop,
1716 .sync_start = cell_sync_start,
1717 .sync_stop = cell_sync_stop,
1718 .handle_interrupt = cell_handle_interrupt,