Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
perf_event.c
Go to the documentation of this file.
1 #undef DEBUG
2 
3 /*
4  * ARM performance counter support.
5  *
6  * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
7  * Copyright (C) 2010 ARM Ltd., Will Deacon <[email protected]>
8  *
9  * This code is based on the sparc64 perf event code, which is in turn based
10  * on the x86 code. Callchain code is based on the ARM OProfile backtrace
11  * code.
12  */
13 #define pr_fmt(fmt) "hw perfevents: " fmt
14 
15 #include <linux/kernel.h>
16 #include <linux/platform_device.h>
17 #include <linux/pm_runtime.h>
18 #include <linux/uaccess.h>
19 
20 #include <asm/irq_regs.h>
21 #include <asm/pmu.h>
22 #include <asm/stacktrace.h>
23 
24 static int
25 armpmu_map_cache_event(const unsigned (*cache_map)
29  u64 config)
30 {
31  unsigned int cache_type, cache_op, cache_result, ret;
32 
33  cache_type = (config >> 0) & 0xff;
34  if (cache_type >= PERF_COUNT_HW_CACHE_MAX)
35  return -EINVAL;
36 
37  cache_op = (config >> 8) & 0xff;
38  if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX)
39  return -EINVAL;
40 
41  cache_result = (config >> 16) & 0xff;
42  if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
43  return -EINVAL;
44 
45  ret = (int)(*cache_map)[cache_type][cache_op][cache_result];
46 
47  if (ret == CACHE_OP_UNSUPPORTED)
48  return -ENOENT;
49 
50  return ret;
51 }
52 
53 static int
54 armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config)
55 {
56  int mapping = (*event_map)[config];
57  return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping;
58 }
59 
60 static int
61 armpmu_map_raw_event(u32 raw_event_mask, u64 config)
62 {
63  return (int)(config & raw_event_mask);
64 }
65 
66 int
68  const unsigned (*event_map)[PERF_COUNT_HW_MAX],
69  const unsigned (*cache_map)
73  u32 raw_event_mask)
74 {
75  u64 config = event->attr.config;
76 
77  switch (event->attr.type) {
78  case PERF_TYPE_HARDWARE:
79  return armpmu_map_hw_event(event_map, config);
80  case PERF_TYPE_HW_CACHE:
81  return armpmu_map_cache_event(cache_map, config);
82  case PERF_TYPE_RAW:
83  return armpmu_map_raw_event(raw_event_mask, config);
84  }
85 
86  return -ENOENT;
87 }
88 
89 int
91  struct hw_perf_event *hwc,
92  int idx)
93 {
94  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
95  s64 left = local64_read(&hwc->period_left);
96  s64 period = hwc->sample_period;
97  int ret = 0;
98 
99  /* The period may have been changed by PERF_EVENT_IOC_PERIOD */
100  if (unlikely(period != hwc->last_period))
101  left = period - (hwc->last_period - left);
102 
103  if (unlikely(left <= -period)) {
104  left = period;
105  local64_set(&hwc->period_left, left);
106  hwc->last_period = period;
107  ret = 1;
108  }
109 
110  if (unlikely(left <= 0)) {
111  left += period;
112  local64_set(&hwc->period_left, left);
113  hwc->last_period = period;
114  ret = 1;
115  }
116 
117  if (left > (s64)armpmu->max_period)
118  left = armpmu->max_period;
119 
120  local64_set(&hwc->prev_count, (u64)-left);
121 
122  armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
123 
125 
126  return ret;
127 }
128 
129 u64
131  struct hw_perf_event *hwc,
132  int idx)
133 {
134  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
135  u64 delta, prev_raw_count, new_raw_count;
136 
137 again:
138  prev_raw_count = local64_read(&hwc->prev_count);
139  new_raw_count = armpmu->read_counter(idx);
140 
141  if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
142  new_raw_count) != prev_raw_count)
143  goto again;
144 
145  delta = (new_raw_count - prev_raw_count) & armpmu->max_period;
146 
147  local64_add(delta, &event->count);
148  local64_sub(delta, &hwc->period_left);
149 
150  return new_raw_count;
151 }
152 
153 static void
154 armpmu_read(struct perf_event *event)
155 {
156  struct hw_perf_event *hwc = &event->hw;
157 
158  /* Don't read disabled counters! */
159  if (hwc->idx < 0)
160  return;
161 
162  armpmu_event_update(event, hwc, hwc->idx);
163 }
164 
165 static void
166 armpmu_stop(struct perf_event *event, int flags)
167 {
168  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
169  struct hw_perf_event *hwc = &event->hw;
170 
171  /*
172  * ARM pmu always has to update the counter, so ignore
173  * PERF_EF_UPDATE, see comments in armpmu_start().
174  */
175  if (!(hwc->state & PERF_HES_STOPPED)) {
176  armpmu->disable(hwc, hwc->idx);
177  armpmu_event_update(event, hwc, hwc->idx);
178  hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
179  }
180 }
181 
182 static void
183 armpmu_start(struct perf_event *event, int flags)
184 {
185  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
186  struct hw_perf_event *hwc = &event->hw;
187 
188  /*
189  * ARM pmu always has to reprogram the period, so ignore
190  * PERF_EF_RELOAD, see the comment below.
191  */
192  if (flags & PERF_EF_RELOAD)
193  WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
194 
195  hwc->state = 0;
196  /*
197  * Set the period again. Some counters can't be stopped, so when we
198  * were stopped we simply disabled the IRQ source and the counter
199  * may have been left counting. If we don't do this step then we may
200  * get an interrupt too soon or *way* too late if the overflow has
201  * happened since disabling.
202  */
203  armpmu_event_set_period(event, hwc, hwc->idx);
204  armpmu->enable(hwc, hwc->idx);
205 }
206 
207 static void
208 armpmu_del(struct perf_event *event, int flags)
209 {
210  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
211  struct pmu_hw_events *hw_events = armpmu->get_hw_events();
212  struct hw_perf_event *hwc = &event->hw;
213  int idx = hwc->idx;
214 
215  WARN_ON(idx < 0);
216 
217  armpmu_stop(event, PERF_EF_UPDATE);
218  hw_events->events[idx] = NULL;
219  clear_bit(idx, hw_events->used_mask);
220 
222 }
223 
224 static int
225 armpmu_add(struct perf_event *event, int flags)
226 {
227  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
228  struct pmu_hw_events *hw_events = armpmu->get_hw_events();
229  struct hw_perf_event *hwc = &event->hw;
230  int idx;
231  int err = 0;
232 
233  perf_pmu_disable(event->pmu);
234 
235  /* If we don't have a space for the counter then finish early. */
236  idx = armpmu->get_event_idx(hw_events, hwc);
237  if (idx < 0) {
238  err = idx;
239  goto out;
240  }
241 
242  /*
243  * If there is an event in the counter we are going to use then make
244  * sure it is disabled.
245  */
246  event->hw.idx = idx;
247  armpmu->disable(hwc, idx);
248  hw_events->events[idx] = event;
249 
250  hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
251  if (flags & PERF_EF_START)
252  armpmu_start(event, PERF_EF_RELOAD);
253 
254  /* Propagate our changes to the userspace mapping. */
256 
257 out:
258  perf_pmu_enable(event->pmu);
259  return err;
260 }
261 
262 static int
263 validate_event(struct pmu_hw_events *hw_events,
264  struct perf_event *event)
265 {
266  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
267  struct hw_perf_event fake_event = event->hw;
268  struct pmu *leader_pmu = event->group_leader->pmu;
269 
270  if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF)
271  return 1;
272 
273  return armpmu->get_event_idx(hw_events, &fake_event) >= 0;
274 }
275 
276 static int
277 validate_group(struct perf_event *event)
278 {
279  struct perf_event *sibling, *leader = event->group_leader;
280  struct pmu_hw_events fake_pmu;
281  DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
282 
283  /*
284  * Initialise the fake PMU. We only need to populate the
285  * used_mask for the purposes of validation.
286  */
287  memset(fake_used_mask, 0, sizeof(fake_used_mask));
288  fake_pmu.used_mask = fake_used_mask;
289 
290  if (!validate_event(&fake_pmu, leader))
291  return -EINVAL;
292 
293  list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
294  if (!validate_event(&fake_pmu, sibling))
295  return -EINVAL;
296  }
297 
298  if (!validate_event(&fake_pmu, event))
299  return -EINVAL;
300 
301  return 0;
302 }
303 
304 static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
305 {
306  struct arm_pmu *armpmu = (struct arm_pmu *) dev;
307  struct platform_device *plat_device = armpmu->plat_device;
308  struct arm_pmu_platdata *plat = dev_get_platdata(&plat_device->dev);
309 
310  if (plat && plat->handle_irq)
311  return plat->handle_irq(irq, dev, armpmu->handle_irq);
312  else
313  return armpmu->handle_irq(irq, dev);
314 }
315 
316 static void
317 armpmu_release_hardware(struct arm_pmu *armpmu)
318 {
319  armpmu->free_irq();
320  pm_runtime_put_sync(&armpmu->plat_device->dev);
321 }
322 
323 static int
324 armpmu_reserve_hardware(struct arm_pmu *armpmu)
325 {
326  int err;
327  struct platform_device *pmu_device = armpmu->plat_device;
328 
329  if (!pmu_device)
330  return -ENODEV;
331 
332  pm_runtime_get_sync(&pmu_device->dev);
333  err = armpmu->request_irq(armpmu_dispatch_irq);
334  if (err) {
335  armpmu_release_hardware(armpmu);
336  return err;
337  }
338 
339  return 0;
340 }
341 
342 static void
343 hw_perf_event_destroy(struct perf_event *event)
344 {
345  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
346  atomic_t *active_events = &armpmu->active_events;
347  struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex;
348 
349  if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) {
350  armpmu_release_hardware(armpmu);
351  mutex_unlock(pmu_reserve_mutex);
352  }
353 }
354 
355 static int
356 event_requires_mode_exclusion(struct perf_event_attr *attr)
357 {
358  return attr->exclude_idle || attr->exclude_user ||
359  attr->exclude_kernel || attr->exclude_hv;
360 }
361 
362 static int
363 __hw_perf_event_init(struct perf_event *event)
364 {
365  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
366  struct hw_perf_event *hwc = &event->hw;
367  int mapping, err;
368 
369  mapping = armpmu->map_event(event);
370 
371  if (mapping < 0) {
372  pr_debug("event %x:%llx not supported\n", event->attr.type,
373  event->attr.config);
374  return mapping;
375  }
376 
377  /*
378  * We don't assign an index until we actually place the event onto
379  * hardware. Use -1 to signify that we haven't decided where to put it
380  * yet. For SMP systems, each core has it's own PMU so we can't do any
381  * clever allocation or constraints checking at this point.
382  */
383  hwc->idx = -1;
384  hwc->config_base = 0;
385  hwc->config = 0;
386  hwc->event_base = 0;
387 
388  /*
389  * Check whether we need to exclude the counter from certain modes.
390  */
391  if ((!armpmu->set_event_filter ||
392  armpmu->set_event_filter(hwc, &event->attr)) &&
393  event_requires_mode_exclusion(&event->attr)) {
394  pr_debug("ARM performance counters do not support "
395  "mode exclusion\n");
396  return -EOPNOTSUPP;
397  }
398 
399  /*
400  * Store the event encoding into the config_base field.
401  */
402  hwc->config_base |= (unsigned long)mapping;
403 
404  if (!hwc->sample_period) {
405  /*
406  * For non-sampling runs, limit the sample_period to half
407  * of the counter width. That way, the new counter value
408  * is far less likely to overtake the previous one unless
409  * you have some serious IRQ latency issues.
410  */
411  hwc->sample_period = armpmu->max_period >> 1;
412  hwc->last_period = hwc->sample_period;
413  local64_set(&hwc->period_left, hwc->sample_period);
414  }
415 
416  err = 0;
417  if (event->group_leader != event) {
418  err = validate_group(event);
419  if (err)
420  return -EINVAL;
421  }
422 
423  return err;
424 }
425 
426 static int armpmu_event_init(struct perf_event *event)
427 {
428  struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
429  int err = 0;
430  atomic_t *active_events = &armpmu->active_events;
431 
432  /* does not support taken branch sampling */
433  if (has_branch_stack(event))
434  return -EOPNOTSUPP;
435 
436  if (armpmu->map_event(event) == -ENOENT)
437  return -ENOENT;
438 
439  event->destroy = hw_perf_event_destroy;
440 
441  if (!atomic_inc_not_zero(active_events)) {
442  mutex_lock(&armpmu->reserve_mutex);
443  if (atomic_read(active_events) == 0)
444  err = armpmu_reserve_hardware(armpmu);
445 
446  if (!err)
447  atomic_inc(active_events);
448  mutex_unlock(&armpmu->reserve_mutex);
449  }
450 
451  if (err)
452  return err;
453 
454  err = __hw_perf_event_init(event);
455  if (err)
456  hw_perf_event_destroy(event);
457 
458  return err;
459 }
460 
461 static void armpmu_enable(struct pmu *pmu)
462 {
463  struct arm_pmu *armpmu = to_arm_pmu(pmu);
464  struct pmu_hw_events *hw_events = armpmu->get_hw_events();
465  int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
466 
467  if (enabled)
468  armpmu->start();
469 }
470 
471 static void armpmu_disable(struct pmu *pmu)
472 {
473  struct arm_pmu *armpmu = to_arm_pmu(pmu);
474  armpmu->stop();
475 }
476 
477 #ifdef CONFIG_PM_RUNTIME
478 static int armpmu_runtime_resume(struct device *dev)
479 {
480  struct arm_pmu_platdata *plat = dev_get_platdata(dev);
481 
482  if (plat && plat->runtime_resume)
483  return plat->runtime_resume(dev);
484 
485  return 0;
486 }
487 
488 static int armpmu_runtime_suspend(struct device *dev)
489 {
490  struct arm_pmu_platdata *plat = dev_get_platdata(dev);
491 
492  if (plat && plat->runtime_suspend)
493  return plat->runtime_suspend(dev);
494 
495  return 0;
496 }
497 #endif
498 
500  SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL)
501 };
502 
503 static void __init armpmu_init(struct arm_pmu *armpmu)
504 {
505  atomic_set(&armpmu->active_events, 0);
506  mutex_init(&armpmu->reserve_mutex);
507 
508  armpmu->pmu = (struct pmu) {
509  .pmu_enable = armpmu_enable,
510  .pmu_disable = armpmu_disable,
511  .event_init = armpmu_event_init,
512  .add = armpmu_add,
513  .del = armpmu_del,
514  .start = armpmu_start,
515  .stop = armpmu_stop,
516  .read = armpmu_read,
517  };
518 }
519 
520 int armpmu_register(struct arm_pmu *armpmu, char *name, int type)
521 {
522  armpmu_init(armpmu);
523  pr_info("enabled with %s PMU driver, %d counters available\n",
524  armpmu->name, armpmu->num_events);
525  return perf_pmu_register(&armpmu->pmu, name, type);
526 }
527 
528 /*
529  * Callchain handling code.
530  */
531 
532 /*
533  * The registers we're interested in are at the end of the variable
534  * length saved register structure. The fp points at the end of this
535  * structure so the address of this struct is:
536  * (struct frame_tail *)(xxx->fp)-1
537  *
538  * This code has been adapted from the ARM OProfile support.
539  */
540 struct frame_tail {
542  unsigned long sp;
543  unsigned long lr;
544 } __attribute__((packed));
546 /*
547  * Get the return address for a single stackframe and return a pointer to the
548  * next frame tail.
549  */
550 static struct frame_tail __user *
551 user_backtrace(struct frame_tail __user *tail,
552  struct perf_callchain_entry *entry)
553 {
554  struct frame_tail buftail;
555 
556  /* Also check accessibility of one struct frame_tail beyond */
557  if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
558  return NULL;
559  if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
560  return NULL;
561 
562  perf_callchain_store(entry, buftail.lr);
563 
564  /*
565  * Frame pointers should strictly progress back up the stack
566  * (towards higher addresses).
567  */
568  if (tail + 1 >= buftail.fp)
569  return NULL;
570 
571  return buftail.fp - 1;
572 }
573 
574 void
576 {
577  struct frame_tail __user *tail;
578 
579 
580  tail = (struct frame_tail __user *)regs->ARM_fp - 1;
581 
582  while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
583  tail && !((unsigned long)tail & 0x3))
584  tail = user_backtrace(tail, entry);
585 }
586 
587 /*
588  * Gets called by walk_stackframe() for every stackframe. This will be called
589  * whist unwinding the stackframe and is like a subroutine return so we use
590  * the PC.
591  */
592 static int
593 callchain_trace(struct stackframe *fr,
594  void *data)
595 {
596  struct perf_callchain_entry *entry = data;
597  perf_callchain_store(entry, fr->pc);
598  return 0;
599 }
600 
601 void
603 {
604  struct stackframe fr;
605 
606  fr.fp = regs->ARM_fp;
607  fr.sp = regs->ARM_sp;
608  fr.lr = regs->ARM_lr;
609  fr.pc = regs->ARM_pc;
610  walk_stackframe(&fr, callchain_trace, entry);
611 }