Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perf_event.c
Go to the documentation of this file.
1 /*
2  * Performance events support for SH-4A performance counters
3  *
4  * Copyright (C) 2009, 2010 Paul Mundt
5  *
6  * This file is subject to the terms and conditions of the GNU General Public
7  * License. See the file "COPYING" in the main directory of this archive
8  * for more details.
9  */
10 #include <linux/kernel.h>
11 #include <linux/init.h>
12 #include <linux/io.h>
13 #include <linux/irq.h>
14 #include <linux/perf_event.h>
15 #include <asm/processor.h>
16 
17 #define PPC_CCBR(idx) (0xff200800 + (sizeof(u32) * idx))
18 #define PPC_PMCTR(idx) (0xfc100000 + (sizeof(u32) * idx))
19 
20 #define CCBR_CIT_MASK (0x7ff << 6)
21 #define CCBR_DUC (1 << 3)
22 #define CCBR_CMDS (1 << 1)
23 #define CCBR_PPCE (1 << 0)
24 
25 #ifdef CONFIG_CPU_SHX3
26 /*
27  * The PMCAT location for SH-X3 CPUs was quietly moved, while the CCBR
28  * and PMCTR locations remains tentatively constant. This change remains
29  * wholly undocumented, and was simply found through trial and error.
30  *
31  * Early cuts of SH-X3 still appear to use the SH-X/SH-X2 locations, and
32  * it's unclear when this ceased to be the case. For now we always use
33  * the new location (if future parts keep up with this trend then
34  * scanning for them at runtime also remains a viable option.)
35  *
36  * The gap in the register space also suggests that there are other
37  * undocumented counters, so this will need to be revisited at a later
38  * point in time.
39  */
40 #define PPC_PMCAT 0xfc100240
41 #else
42 #define PPC_PMCAT 0xfc100080
43 #endif
44 
45 #define PMCAT_OVF3 (1 << 27)
46 #define PMCAT_CNN3 (1 << 26)
47 #define PMCAT_CLR3 (1 << 25)
48 #define PMCAT_OVF2 (1 << 19)
49 #define PMCAT_CLR2 (1 << 17)
50 #define PMCAT_OVF1 (1 << 11)
51 #define PMCAT_CNN1 (1 << 10)
52 #define PMCAT_CLR1 (1 << 9)
53 #define PMCAT_OVF0 (1 << 3)
54 #define PMCAT_CLR0 (1 << 1)
55 
56 static struct sh_pmu sh4a_pmu;
57 
58 /*
59  * Supported raw event codes:
60  *
61  * Event Code Description
62  * ---------- -----------
63  *
64  * 0x0000 number of elapsed cycles
65  * 0x0200 number of elapsed cycles in privileged mode
66  * 0x0280 number of elapsed cycles while SR.BL is asserted
67  * 0x0202 instruction execution
68  * 0x0203 instruction execution in parallel
69  * 0x0204 number of unconditional branches
70  * 0x0208 number of exceptions
71  * 0x0209 number of interrupts
72  * 0x0220 UTLB miss caused by instruction fetch
73  * 0x0222 UTLB miss caused by operand access
74  * 0x02a0 number of ITLB misses
75  * 0x0028 number of accesses to instruction memories
76  * 0x0029 number of accesses to instruction cache
77  * 0x002a instruction cache miss
78  * 0x022e number of access to instruction X/Y memory
79  * 0x0030 number of reads to operand memories
80  * 0x0038 number of writes to operand memories
81  * 0x0031 number of operand cache read accesses
82  * 0x0039 number of operand cache write accesses
83  * 0x0032 operand cache read miss
84  * 0x003a operand cache write miss
85  * 0x0236 number of reads to operand X/Y memory
86  * 0x023e number of writes to operand X/Y memory
87  * 0x0237 number of reads to operand U memory
88  * 0x023f number of writes to operand U memory
89  * 0x0337 number of U memory read buffer misses
90  * 0x02b4 number of wait cycles due to operand read access
91  * 0x02bc number of wait cycles due to operand write access
92  * 0x0033 number of wait cycles due to operand cache read miss
93  * 0x003b number of wait cycles due to operand cache write miss
94  */
95 
96 /*
97  * Special reserved bits used by hardware emulators, read values will
98  * vary, but writes must always be 0.
99  */
100 #define PMCAT_EMU_CLR_MASK ((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0))
101 
102 static const int sh4a_general_events[] = {
103  [PERF_COUNT_HW_CPU_CYCLES] = 0x0000,
104  [PERF_COUNT_HW_INSTRUCTIONS] = 0x0202,
105  [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0029, /* I-cache */
106  [PERF_COUNT_HW_CACHE_MISSES] = 0x002a, /* I-cache */
110 };
111 
112 #define C(x) PERF_COUNT_HW_CACHE_##x
113 
114 static const int sh4a_cache_events
118 {
119  [ C(L1D) ] = {
120  [ C(OP_READ) ] = {
121  [ C(RESULT_ACCESS) ] = 0x0031,
122  [ C(RESULT_MISS) ] = 0x0032,
123  },
124  [ C(OP_WRITE) ] = {
125  [ C(RESULT_ACCESS) ] = 0x0039,
126  [ C(RESULT_MISS) ] = 0x003a,
127  },
128  [ C(OP_PREFETCH) ] = {
129  [ C(RESULT_ACCESS) ] = 0,
130  [ C(RESULT_MISS) ] = 0,
131  },
132  },
133 
134  [ C(L1I) ] = {
135  [ C(OP_READ) ] = {
136  [ C(RESULT_ACCESS) ] = 0x0029,
137  [ C(RESULT_MISS) ] = 0x002a,
138  },
139  [ C(OP_WRITE) ] = {
140  [ C(RESULT_ACCESS) ] = -1,
141  [ C(RESULT_MISS) ] = -1,
142  },
143  [ C(OP_PREFETCH) ] = {
144  [ C(RESULT_ACCESS) ] = 0,
145  [ C(RESULT_MISS) ] = 0,
146  },
147  },
148 
149  [ C(LL) ] = {
150  [ C(OP_READ) ] = {
151  [ C(RESULT_ACCESS) ] = 0x0030,
152  [ C(RESULT_MISS) ] = 0,
153  },
154  [ C(OP_WRITE) ] = {
155  [ C(RESULT_ACCESS) ] = 0x0038,
156  [ C(RESULT_MISS) ] = 0,
157  },
158  [ C(OP_PREFETCH) ] = {
159  [ C(RESULT_ACCESS) ] = 0,
160  [ C(RESULT_MISS) ] = 0,
161  },
162  },
163 
164  [ C(DTLB) ] = {
165  [ C(OP_READ) ] = {
166  [ C(RESULT_ACCESS) ] = 0x0222,
167  [ C(RESULT_MISS) ] = 0x0220,
168  },
169  [ C(OP_WRITE) ] = {
170  [ C(RESULT_ACCESS) ] = 0,
171  [ C(RESULT_MISS) ] = 0,
172  },
173  [ C(OP_PREFETCH) ] = {
174  [ C(RESULT_ACCESS) ] = 0,
175  [ C(RESULT_MISS) ] = 0,
176  },
177  },
178 
179  [ C(ITLB) ] = {
180  [ C(OP_READ) ] = {
181  [ C(RESULT_ACCESS) ] = 0,
182  [ C(RESULT_MISS) ] = 0x02a0,
183  },
184  [ C(OP_WRITE) ] = {
185  [ C(RESULT_ACCESS) ] = -1,
186  [ C(RESULT_MISS) ] = -1,
187  },
188  [ C(OP_PREFETCH) ] = {
189  [ C(RESULT_ACCESS) ] = -1,
190  [ C(RESULT_MISS) ] = -1,
191  },
192  },
193 
194  [ C(BPU) ] = {
195  [ C(OP_READ) ] = {
196  [ C(RESULT_ACCESS) ] = -1,
197  [ C(RESULT_MISS) ] = -1,
198  },
199  [ C(OP_WRITE) ] = {
200  [ C(RESULT_ACCESS) ] = -1,
201  [ C(RESULT_MISS) ] = -1,
202  },
203  [ C(OP_PREFETCH) ] = {
204  [ C(RESULT_ACCESS) ] = -1,
205  [ C(RESULT_MISS) ] = -1,
206  },
207  },
208 
209  [ C(NODE) ] = {
210  [ C(OP_READ) ] = {
211  [ C(RESULT_ACCESS) ] = -1,
212  [ C(RESULT_MISS) ] = -1,
213  },
214  [ C(OP_WRITE) ] = {
215  [ C(RESULT_ACCESS) ] = -1,
216  [ C(RESULT_MISS) ] = -1,
217  },
218  [ C(OP_PREFETCH) ] = {
219  [ C(RESULT_ACCESS) ] = -1,
220  [ C(RESULT_MISS) ] = -1,
221  },
222  },
223 };
224 
225 static int sh4a_event_map(int event)
226 {
227  return sh4a_general_events[event];
228 }
229 
230 static u64 sh4a_pmu_read(int idx)
231 {
232  return __raw_readl(PPC_PMCTR(idx));
233 }
234 
235 static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx)
236 {
237  unsigned int tmp;
238 
239  tmp = __raw_readl(PPC_CCBR(idx));
240  tmp &= ~(CCBR_CIT_MASK | CCBR_DUC);
241  __raw_writel(tmp, PPC_CCBR(idx));
242 }
243 
244 static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx)
245 {
246  unsigned int tmp;
247 
248  tmp = __raw_readl(PPC_PMCAT);
249  tmp &= ~PMCAT_EMU_CLR_MASK;
250  tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0;
251  __raw_writel(tmp, PPC_PMCAT);
252 
253  tmp = __raw_readl(PPC_CCBR(idx));
254  tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE;
255  __raw_writel(tmp, PPC_CCBR(idx));
256 
258 }
259 
260 static void sh4a_pmu_disable_all(void)
261 {
262  int i;
263 
264  for (i = 0; i < sh4a_pmu.num_events; i++)
266 }
267 
268 static void sh4a_pmu_enable_all(void)
269 {
270  int i;
271 
272  for (i = 0; i < sh4a_pmu.num_events; i++)
274 }
275 
276 static struct sh_pmu sh4a_pmu = {
277  .name = "sh4a",
278  .num_events = 2,
279  .event_map = sh4a_event_map,
280  .max_events = ARRAY_SIZE(sh4a_general_events),
281  .raw_event_mask = 0x3ff,
282  .cache_events = &sh4a_cache_events,
283  .read = sh4a_pmu_read,
284  .disable = sh4a_pmu_disable,
285  .enable = sh4a_pmu_enable,
286  .disable_all = sh4a_pmu_disable_all,
287  .enable_all = sh4a_pmu_enable_all,
288 };
289 
290 static int __init sh4a_pmu_init(void)
291 {
292  /*
293  * Make sure this CPU actually has perf counters.
294  */
295  if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) {
296  pr_notice("HW perf events unsupported, software events only.\n");
297  return -ENODEV;
298  }
299 
300  return register_sh_pmu(&sh4a_pmu);
301 }
302 early_initcall(sh4a_pmu_init);