Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perf_event_knc.c
Go to the documentation of this file.
1 /* Driver for Intel Xeon Phi "Knights Corner" PMU */
2 
3 #include <linux/perf_event.h>
4 #include <linux/types.h>
5 
6 #include <asm/hardirq.h>
7 
8 #include "perf_event.h"
9 
10 static const u64 knc_perfmon_event_map[] =
11 {
12  [PERF_COUNT_HW_CPU_CYCLES] = 0x002a,
13  [PERF_COUNT_HW_INSTRUCTIONS] = 0x0016,
15  [PERF_COUNT_HW_CACHE_MISSES] = 0x0029,
17  [PERF_COUNT_HW_BRANCH_MISSES] = 0x002b,
18 };
19 
20 static __initconst u64 knc_hw_cache_event_ids
24 {
25  [ C(L1D) ] = {
26  [ C(OP_READ) ] = {
27  /* On Xeon Phi event "0" is a valid DATA_READ */
28  /* (L1 Data Cache Reads) Instruction. */
29  /* We code this as ARCH_PERFMON_EVENTSEL_INT as this */
30  /* bit will always be set in x86_pmu_hw_config(). */
31  [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
32  /* DATA_READ */
33  [ C(RESULT_MISS) ] = 0x0003, /* DATA_READ_MISS */
34  },
35  [ C(OP_WRITE) ] = {
36  [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
37  [ C(RESULT_MISS) ] = 0x0004, /* DATA_WRITE_MISS */
38  },
39  [ C(OP_PREFETCH) ] = {
40  [ C(RESULT_ACCESS) ] = 0x0011, /* L1_DATA_PF1 */
41  [ C(RESULT_MISS) ] = 0x001c, /* L1_DATA_PF1_MISS */
42  },
43  },
44  [ C(L1I ) ] = {
45  [ C(OP_READ) ] = {
46  [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
47  [ C(RESULT_MISS) ] = 0x000e, /* CODE_CACHE_MISS */
48  },
49  [ C(OP_WRITE) ] = {
50  [ C(RESULT_ACCESS) ] = -1,
51  [ C(RESULT_MISS) ] = -1,
52  },
53  [ C(OP_PREFETCH) ] = {
54  [ C(RESULT_ACCESS) ] = 0x0,
55  [ C(RESULT_MISS) ] = 0x0,
56  },
57  },
58  [ C(LL ) ] = {
59  [ C(OP_READ) ] = {
60  [ C(RESULT_ACCESS) ] = 0,
61  [ C(RESULT_MISS) ] = 0x10cb, /* L2_READ_MISS */
62  },
63  [ C(OP_WRITE) ] = {
64  [ C(RESULT_ACCESS) ] = 0x10cc, /* L2_WRITE_HIT */
65  [ C(RESULT_MISS) ] = 0,
66  },
67  [ C(OP_PREFETCH) ] = {
68  [ C(RESULT_ACCESS) ] = 0x10fc, /* L2_DATA_PF2 */
69  [ C(RESULT_MISS) ] = 0x10fe, /* L2_DATA_PF2_MISS */
70  },
71  },
72  [ C(DTLB) ] = {
73  [ C(OP_READ) ] = {
74  [ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
75  /* DATA_READ */
76  /* see note on L1 OP_READ */
77  [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
78  },
79  [ C(OP_WRITE) ] = {
80  [ C(RESULT_ACCESS) ] = 0x0001, /* DATA_WRITE */
81  [ C(RESULT_MISS) ] = 0x0002, /* DATA_PAGE_WALK */
82  },
83  [ C(OP_PREFETCH) ] = {
84  [ C(RESULT_ACCESS) ] = 0x0,
85  [ C(RESULT_MISS) ] = 0x0,
86  },
87  },
88  [ C(ITLB) ] = {
89  [ C(OP_READ) ] = {
90  [ C(RESULT_ACCESS) ] = 0x000c, /* CODE_READ */
91  [ C(RESULT_MISS) ] = 0x000d, /* CODE_PAGE_WALK */
92  },
93  [ C(OP_WRITE) ] = {
94  [ C(RESULT_ACCESS) ] = -1,
95  [ C(RESULT_MISS) ] = -1,
96  },
97  [ C(OP_PREFETCH) ] = {
98  [ C(RESULT_ACCESS) ] = -1,
99  [ C(RESULT_MISS) ] = -1,
100  },
101  },
102  [ C(BPU ) ] = {
103  [ C(OP_READ) ] = {
104  [ C(RESULT_ACCESS) ] = 0x0012, /* BRANCHES */
105  [ C(RESULT_MISS) ] = 0x002b, /* BRANCHES_MISPREDICTED */
106  },
107  [ C(OP_WRITE) ] = {
108  [ C(RESULT_ACCESS) ] = -1,
109  [ C(RESULT_MISS) ] = -1,
110  },
111  [ C(OP_PREFETCH) ] = {
112  [ C(RESULT_ACCESS) ] = -1,
113  [ C(RESULT_MISS) ] = -1,
114  },
115  },
116 };
117 
118 
119 static u64 knc_pmu_event_map(int hw_event)
120 {
121  return knc_perfmon_event_map[hw_event];
122 }
123 
124 static struct event_constraint knc_event_constraints[] =
125 {
126  INTEL_EVENT_CONSTRAINT(0xc3, 0x1), /* HWP_L2HIT */
127  INTEL_EVENT_CONSTRAINT(0xc4, 0x1), /* HWP_L2MISS */
128  INTEL_EVENT_CONSTRAINT(0xc8, 0x1), /* L2_READ_HIT_E */
129  INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* L2_READ_HIT_M */
130  INTEL_EVENT_CONSTRAINT(0xca, 0x1), /* L2_READ_HIT_S */
131  INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* L2_READ_MISS */
132  INTEL_EVENT_CONSTRAINT(0xcc, 0x1), /* L2_WRITE_HIT */
133  INTEL_EVENT_CONSTRAINT(0xce, 0x1), /* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
134  INTEL_EVENT_CONSTRAINT(0xcf, 0x1), /* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
135  INTEL_EVENT_CONSTRAINT(0xd7, 0x1), /* L2_VICTIM_REQ_WITH_DATA */
136  INTEL_EVENT_CONSTRAINT(0xe3, 0x1), /* SNP_HITM_BUNIT */
137  INTEL_EVENT_CONSTRAINT(0xe6, 0x1), /* SNP_HIT_L2 */
138  INTEL_EVENT_CONSTRAINT(0xe7, 0x1), /* SNP_HITM_L2 */
139  INTEL_EVENT_CONSTRAINT(0xf1, 0x1), /* L2_DATA_READ_MISS_CACHE_FILL */
140  INTEL_EVENT_CONSTRAINT(0xf2, 0x1), /* L2_DATA_WRITE_MISS_CACHE_FILL */
141  INTEL_EVENT_CONSTRAINT(0xf6, 0x1), /* L2_DATA_READ_MISS_MEM_FILL */
142  INTEL_EVENT_CONSTRAINT(0xf7, 0x1), /* L2_DATA_WRITE_MISS_MEM_FILL */
143  INTEL_EVENT_CONSTRAINT(0xfc, 0x1), /* L2_DATA_PF2 */
144  INTEL_EVENT_CONSTRAINT(0xfd, 0x1), /* L2_DATA_PF2_DROP */
145  INTEL_EVENT_CONSTRAINT(0xfe, 0x1), /* L2_DATA_PF2_MISS */
146  INTEL_EVENT_CONSTRAINT(0xff, 0x1), /* L2_DATA_HIT_INFLIGHT_PF2 */
148 };
149 
150 #define MSR_KNC_IA32_PERF_GLOBAL_STATUS 0x0000002d
151 #define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL 0x0000002e
152 #define MSR_KNC_IA32_PERF_GLOBAL_CTRL 0x0000002f
153 
154 #define KNC_ENABLE_COUNTER0 0x00000001
155 #define KNC_ENABLE_COUNTER1 0x00000002
156 
157 static void knc_pmu_disable_all(void)
158 {
159  u64 val;
160 
161  rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
163  wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
164 }
165 
166 static void knc_pmu_enable_all(int added)
167 {
168  u64 val;
169 
170  rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
172  wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
173 }
174 
175 static inline void
176 knc_pmu_disable_event(struct perf_event *event)
177 {
178  struct hw_perf_event *hwc = &event->hw;
179  u64 val;
180 
181  val = hwc->config;
183 
184  (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
185 }
186 
187 static void knc_pmu_enable_event(struct perf_event *event)
188 {
189  struct hw_perf_event *hwc = &event->hw;
190  u64 val;
191 
192  val = hwc->config;
194 
195  (void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
196 }
197 
198 static inline u64 knc_pmu_get_status(void)
199 {
200  u64 status;
201 
202  rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_STATUS, status);
203 
204  return status;
205 }
206 
207 static inline void knc_pmu_ack_status(u64 ack)
208 {
210 }
211 
212 static int knc_pmu_handle_irq(struct pt_regs *regs)
213 {
214  struct perf_sample_data data;
215  struct cpu_hw_events *cpuc;
216  int handled = 0;
217  int bit, loops;
218  u64 status;
219 
220  cpuc = &__get_cpu_var(cpu_hw_events);
221 
222  knc_pmu_disable_all();
223 
224  status = knc_pmu_get_status();
225  if (!status) {
226  knc_pmu_enable_all(0);
227  return handled;
228  }
229 
230  loops = 0;
231 again:
232  knc_pmu_ack_status(status);
233  if (++loops > 100) {
234  WARN_ONCE(1, "perf: irq loop stuck!\n");
236  goto done;
237  }
238 
239  inc_irq_stat(apic_perf_irqs);
240 
241  for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
242  struct perf_event *event = cpuc->events[bit];
243 
244  handled++;
245 
246  if (!test_bit(bit, cpuc->active_mask))
247  continue;
248 
249  if (!intel_pmu_save_and_restart(event))
250  continue;
251 
252  perf_sample_data_init(&data, 0, event->hw.last_period);
253 
254  if (perf_event_overflow(event, &data, regs))
255  x86_pmu_stop(event, 0);
256  }
257 
258  /*
259  * Repeat if there is more work to be done:
260  */
261  status = knc_pmu_get_status();
262  if (status)
263  goto again;
264 
265 done:
266  knc_pmu_enable_all(0);
267 
268  return handled;
269 }
270 
271 
272 PMU_FORMAT_ATTR(event, "config:0-7" );
273 PMU_FORMAT_ATTR(umask, "config:8-15" );
274 PMU_FORMAT_ATTR(edge, "config:18" );
275 PMU_FORMAT_ATTR(inv, "config:23" );
276 PMU_FORMAT_ATTR(cmask, "config:24-31" );
277 
278 static struct attribute *intel_knc_formats_attr[] = {
279  &format_attr_event.attr,
280  &format_attr_umask.attr,
281  &format_attr_edge.attr,
282  &format_attr_inv.attr,
283  &format_attr_cmask.attr,
284  NULL,
285 };
286 
287 static __initconst struct x86_pmu knc_pmu = {
288  .name = "knc",
289  .handle_irq = knc_pmu_handle_irq,
290  .disable_all = knc_pmu_disable_all,
291  .enable_all = knc_pmu_enable_all,
292  .enable = knc_pmu_enable_event,
293  .disable = knc_pmu_disable_event,
294  .hw_config = x86_pmu_hw_config,
295  .schedule_events = x86_schedule_events,
296  .eventsel = MSR_KNC_EVNTSEL0,
297  .perfctr = MSR_KNC_PERFCTR0,
298  .event_map = knc_pmu_event_map,
299  .max_events = ARRAY_SIZE(knc_perfmon_event_map),
300  .apic = 1,
301  .max_period = (1ULL << 39) - 1,
302  .version = 0,
303  .num_counters = 2,
304  .cntval_bits = 40,
305  .cntval_mask = (1ULL << 40) - 1,
306  .get_event_constraints = x86_get_event_constraints,
307  .event_constraints = knc_event_constraints,
308  .format_attrs = intel_knc_formats_attr,
309 };
310 
312 {
313  x86_pmu = knc_pmu;
314 
315  memcpy(hw_cache_event_ids, knc_hw_cache_event_ids,
316  sizeof(hw_cache_event_ids));
317 
318  return 0;
319 }