18 #include <linux/poll.h>
19 #include <linux/slab.h>
20 #include <linux/hash.h>
24 #include <linux/ptrace.h>
25 #include <linux/reboot.h>
27 #include <linux/device.h>
28 #include <linux/export.h>
36 #include <linux/perf_event.h>
38 #include <linux/hw_breakpoint.h>
43 #include <asm/irq_regs.h>
52 static void remote_function(
void *
data)
104 static int cpu_function_call(
int cpu,
int (*
func) (
void *info),
void *info)
118 #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\
119 PERF_FLAG_FD_OUTPUT |\
120 PERF_FLAG_PID_CGROUP)
125 #define PERF_SAMPLE_BRANCH_PERM_PLM \
126 (PERF_SAMPLE_BRANCH_KERNEL |\
127 PERF_SAMPLE_BRANCH_HV)
158 int sysctl_perf_event_paranoid __read_mostly = 1;
161 int sysctl_perf_event_mlock __read_mostly = 512 + (
PAGE_SIZE / 1024);
166 #define DEFAULT_MAX_SAMPLE_RATE 100000
168 static int max_samples_per_tick __read_mostly =
172 void __user *
buffer,
size_t *lenp,
180 max_samples_per_tick =
DIV_ROUND_UP(sysctl_perf_event_sample_rate,
HZ);
207 static inline u64 perf_clock(
void)
234 #ifdef CONFIG_CGROUP_PERF
241 static inline struct perf_cgroup *
244 return container_of(task_subsys_state(task, perf_subsys_id),
245 struct perf_cgroup, css);
254 return !
event->cgrp ||
event->cgrp == cpuctx->
cgrp;
259 return css_tryget(&event->cgrp->css);
264 css_put(&event->cgrp->css);
269 perf_put_cgroup(event);
275 return event->cgrp !=
NULL;
280 struct perf_cgroup_info *
t;
286 static inline void __update_cgrp_time(
struct perf_cgroup *cgrp)
288 struct perf_cgroup_info *
info;
295 info->time += now - info->timestamp;
296 info->timestamp = now;
299 static inline void update_cgrp_time_from_cpuctx(
struct perf_cpu_context *cpuctx)
301 struct perf_cgroup *cgrp_out = cpuctx->
cgrp;
303 __update_cgrp_time(cgrp_out);
306 static inline void update_cgrp_time_from_event(
struct perf_event *
event)
308 struct perf_cgroup *cgrp;
314 if (!is_cgroup_event(event))
317 cgrp = perf_cgroup_from_task(
current);
321 if (cgrp == event->cgrp)
322 __update_cgrp_time(event->cgrp);
326 perf_cgroup_set_timestamp(
struct task_struct *task,
329 struct perf_cgroup *cgrp;
330 struct perf_cgroup_info *
info;
340 cgrp = perf_cgroup_from_task(task);
345 #define PERF_CGROUP_SWOUT 0x1
346 #define PERF_CGROUP_SWIN 0x2
373 list_for_each_entry_rcu(pmu, &pmus,
entry) {
385 if (cpuctx->
ctx.nr_cgroups > 0) {
386 perf_ctx_lock(cpuctx, cpuctx->
task_ctx);
389 if (mode & PERF_CGROUP_SWOUT) {
398 if (mode & PERF_CGROUP_SWIN) {
405 cpuctx->
cgrp = perf_cgroup_from_task(task);
406 cpu_ctx_sched_in(cpuctx,
EVENT_ALL, task);
409 perf_ctx_unlock(cpuctx, cpuctx->
task_ctx);
418 static inline void perf_cgroup_sched_out(
struct task_struct *task,
421 struct perf_cgroup *cgrp1;
422 struct perf_cgroup *cgrp2 =
NULL;
427 cgrp1 = perf_cgroup_from_task(task);
434 cgrp2 = perf_cgroup_from_task(next);
448 struct perf_cgroup *cgrp1;
449 struct perf_cgroup *cgrp2 =
NULL;
454 cgrp1 = perf_cgroup_from_task(task);
457 cgrp2 = perf_cgroup_from_task(prev);
468 static inline int perf_cgroup_connect(
int fd,
struct perf_event *event,
472 struct perf_cgroup *cgrp;
473 struct cgroup_subsys_state *css;
474 struct fd
f = fdget(fd);
490 if (!perf_tryget_cgroup(event)) {
501 if (group_leader && group_leader->cgrp != cgrp) {
502 perf_detach_cgroup(event);
511 perf_cgroup_set_shadow_time(
struct perf_event *event,
u64 now)
513 struct perf_cgroup_info *
t;
515 event->shadow_ctx_time = now - t->timestamp;
519 perf_cgroup_defer_enabled(
struct perf_event *event)
527 if (is_cgroup_event(event) && !perf_cgroup_match(event))
528 event->cgrp_defer_enabled = 1;
532 perf_cgroup_mark_enabled(
struct perf_event *event,
538 if (!event->cgrp_defer_enabled)
541 event->cgrp_defer_enabled = 0;
543 event->tstamp_enabled = tstamp -
event->total_time_enabled;
546 sub->tstamp_enabled = tstamp - sub->total_time_enabled;
547 sub->cgrp_defer_enabled = 0;
559 static inline void perf_detach_cgroup(
struct perf_event *event)
562 static inline int is_cgroup_event(
struct perf_event *event)
567 static inline u64 perf_cgroup_event_cgrp_time(
struct perf_event *event)
572 static inline void update_cgrp_time_from_event(
struct perf_event *event)
576 static inline void update_cgrp_time_from_cpuctx(
struct perf_cpu_context *cpuctx)
580 static inline void perf_cgroup_sched_out(
struct task_struct *task,
585 static inline void perf_cgroup_sched_in(
struct task_struct *prev,
598 perf_cgroup_set_timestamp(
struct task_struct *task,
609 perf_cgroup_set_shadow_time(
struct perf_event *event,
u64 now)
613 static inline u64 perf_cgroup_event_time(
struct perf_event *event)
619 perf_cgroup_defer_enabled(
struct perf_event *event)
624 perf_cgroup_mark_enabled(
struct perf_event *event,
651 static void perf_pmu_rotate_start(
struct pmu *pmu)
673 put_task_struct(ctx->
task);
692 event =
event->parent;
703 event =
event->parent;
705 return task_pid_nr_ns(p, event->ns);
717 id =
event->parent->id;
728 perf_lock_task_context(
struct task_struct *task,
int ctxn,
unsigned long *flags)
767 perf_pin_task_context(
struct task_struct *task,
int ctxn)
772 ctx = perf_lock_task_context(task, ctxn, &flags);
794 u64 now = perf_clock();
804 if (is_cgroup_event(event))
805 return perf_cgroup_event_time(event);
807 return ctx ? ctx->
time : 0;
814 static void update_event_times(
struct perf_event *event)
832 if (is_cgroup_event(event))
833 run_end = perf_cgroup_event_time(event);
837 run_end =
event->tstamp_stopped;
839 event->total_time_enabled = run_end -
event->tstamp_enabled;
842 run_end =
event->tstamp_stopped;
844 run_end = perf_event_time(event);
846 event->total_time_running = run_end -
event->tstamp_running;
853 static void update_group_times(
struct perf_event *leader)
857 update_event_times(leader);
859 update_event_times(event);
865 if (event->attr.pinned)
866 return &ctx->pinned_groups;
868 return &ctx->flexible_groups;
886 if (event->group_leader == event) {
889 if (is_software_event(event))
892 list = ctx_group_list(event, ctx);
896 if (is_cgroup_event(event))
899 if (has_branch_stack(event))
902 list_add_rcu(&event->event_entry, &ctx->
event_list);
904 perf_pmu_rotate_start(ctx->
pmu);
906 if (event->
attr.inherit_stat)
914 static void perf_event__read_size(
struct perf_event *event)
927 entry +=
sizeof(
u64);
930 nr +=
event->group_leader->nr_siblings;
935 event->read_size =
size;
938 static void perf_event__header_size(
struct perf_event *event)
940 struct perf_sample_data *
data;
944 perf_event__read_size(event);
947 size +=
sizeof(data->ip);
950 size +=
sizeof(data->addr);
953 size +=
sizeof(data->period);
956 size +=
event->read_size;
958 event->header_size =
size;
961 static void perf_event__id_header_size(
struct perf_event *event)
963 struct perf_sample_data *
data;
964 u64 sample_type =
event->attr.sample_type;
968 size +=
sizeof(data->tid_entry);
971 size +=
sizeof(data->time);
974 size +=
sizeof(data->id);
977 size +=
sizeof(data->stream_id);
980 size +=
sizeof(data->cpu_entry);
982 event->id_header_size =
size;
985 static void perf_group_attach(
struct perf_event *event)
997 if (group_leader == event)
1001 !is_software_event(event))
1004 list_add_tail(&event->group_entry, &group_leader->sibling_list);
1005 group_leader->nr_siblings++;
1007 perf_event__header_size(group_leader);
1010 perf_event__header_size(
pos);
1029 if (is_cgroup_event(event)) {
1031 cpuctx = __get_cpu_context(ctx);
1037 if (!ctx->nr_cgroups)
1041 if (has_branch_stack(event))
1042 ctx->nr_branch_stack--;
1045 if (event->attr.inherit_stat)
1048 list_del_rcu(&event->event_entry);
1050 if (event->group_leader == event)
1051 list_del_init(&event->group_entry);
1053 update_group_times(event);
1066 static void perf_group_detach(
struct perf_event *event)
1082 if (event->group_leader != event) {
1083 list_del_init(&event->group_entry);
1084 event->group_leader->nr_siblings--;
1088 if (!list_empty(&event->group_entry))
1089 list = &
event->group_entry;
1098 list_move_tail(&sibling->group_entry, list);
1099 sibling->group_leader = sibling;
1102 sibling->group_flags =
event->group_flags;
1106 perf_event__header_size(event->group_leader);
1109 perf_event__header_size(tmp);
1116 && perf_cgroup_match(event);
1124 u64 tstamp = perf_event_time(event);
1133 && !event_filter_match(event)) {
1134 delta = tstamp -
event->tstamp_stopped;
1135 event->tstamp_running +=
delta;
1136 event->tstamp_stopped =
tstamp;
1143 if (event->pending_disable) {
1144 event->pending_disable = 0;
1147 event->tstamp_stopped =
tstamp;
1148 event->pmu->del(event, 0);
1151 if (!is_software_event(event))
1154 if (event->
attr.freq && event->
attr.sample_freq)
1161 group_sched_out(
struct perf_event *group_event,
1166 int state = group_event->state;
1168 event_sched_out(group_event, cpuctx, ctx);
1174 event_sched_out(event, cpuctx, ctx);
1177 cpuctx->exclusive = 0;
1186 static
int __perf_remove_from_context(
void *info)
1193 event_sched_out(event, cpuctx, ctx);
1194 list_del_event(event, ctx);
1218 static void perf_remove_from_context(
struct perf_event *event)
1230 cpu_function_call(event->cpu, __perf_remove_from_context, event);
1235 if (!task_function_call(task, __perf_remove_from_context, event))
1252 list_del_event(event, ctx);
1282 update_context_time(ctx);
1283 update_cgrp_time_from_event(event);
1284 update_group_times(event);
1285 if (event == event->group_leader)
1286 group_sched_out(event, cpuctx, ctx);
1288 event_sched_out(event, cpuctx, ctx);
1346 update_group_times(event);
1353 static void perf_set_shadow_time(
struct perf_event *event,
1382 if (is_cgroup_event(event))
1383 perf_cgroup_set_shadow_time(event, tstamp);
1385 event->shadow_ctx_time = tstamp - ctx->
timestamp;
1388 #define MAX_INTERRUPTS (~0ULL)
1397 u64 tstamp = perf_event_time(event);
1411 perf_log_throttle(event, 1);
1412 event->hw.interrupts = 0;
1426 event->tstamp_running += tstamp -
event->tstamp_stopped;
1428 perf_set_shadow_time(event, ctx, tstamp);
1430 if (!is_software_event(event))
1433 if (event->
attr.freq && event->
attr.sample_freq)
1436 if (event->
attr.exclusive)
1443 group_sched_in(
struct perf_event *group_event,
1448 struct pmu *pmu = group_event->pmu;
1450 bool simulate =
false;
1457 if (event_sched_in(group_event, cpuctx, ctx)) {
1466 if (event_sched_in(event, cpuctx, ctx)) {
1467 partial_group =
event;
1491 if (event == partial_group)
1495 event->tstamp_running += now -
event->tstamp_stopped;
1496 event->tstamp_stopped = now;
1498 event_sched_out(event, cpuctx, ctx);
1501 event_sched_out(group_event, cpuctx, ctx);
1511 static int group_can_go_on(
struct perf_event *event,
1539 static void add_event_to_ctx(
struct perf_event *event,
1542 u64 tstamp = perf_event_time(event);
1544 list_add_event(event, ctx);
1545 perf_group_attach(event);
1546 event->tstamp_enabled =
tstamp;
1547 event->tstamp_running =
tstamp;
1548 event->tstamp_stopped =
tstamp;
1575 static int __perf_install_in_context(
void *info)
1583 perf_ctx_lock(cpuctx, task_ctx);
1590 task_ctx_sched_out(task_ctx);
1596 if (ctx->
task && task_ctx != ctx) {
1605 task = task_ctx->
task;
1610 update_context_time(ctx);
1616 update_cgrp_time_from_event(event);
1618 add_event_to_ctx(event, ctx);
1623 perf_event_sched_in(cpuctx, task_ctx, task);
1626 perf_ctx_unlock(cpuctx, task_ctx);
1651 if (event->cpu != -1)
1659 cpu_function_call(cpu, __perf_install_in_context, event);
1664 if (!task_function_call(task, __perf_install_in_context, event))
1681 add_event_to_ctx(event, ctx);
1693 static void __perf_event_mark_enabled(
struct perf_event *event)
1696 u64 tstamp = perf_event_time(event);
1699 event->tstamp_enabled = tstamp -
event->total_time_enabled;
1702 sub->tstamp_enabled = tstamp - sub->total_time_enabled;
1709 static int __perf_event_enable(
void *info)
1713 struct perf_event *leader =
event->group_leader;
1721 update_context_time(ctx);
1729 perf_cgroup_set_timestamp(
current, ctx);
1731 __perf_event_mark_enabled(event);
1733 if (!event_filter_match(event)) {
1734 if (is_cgroup_event(event))
1735 perf_cgroup_defer_enabled(event);
1746 if (!group_can_go_on(event, cpuctx, 1)) {
1749 if (event == leader)
1750 err = group_sched_in(event, cpuctx, ctx);
1752 err = event_sched_in(event, cpuctx, ctx);
1760 if (leader != event)
1761 group_sched_out(leader, cpuctx, ctx);
1762 if (leader->
attr.pinned) {
1763 update_group_times(leader);
1792 cpu_function_call(event->cpu, __perf_event_enable, event);
1812 __perf_event_mark_enabled(event);
1818 if (!task_function_call(task, __perf_event_enable, event))
1846 if (event->
attr.inherit || !is_sampling_event(event))
1867 update_context_time(ctx);
1868 update_cgrp_time_from_cpuctx(cpuctx);
1873 if ((is_active &
EVENT_PINNED) && (event_type & EVENT_PINNED)) {
1875 group_sched_out(event, cpuctx, ctx);
1880 group_sched_out(event, cpuctx, ctx);
1899 return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
1900 && ctx1->parent_gen == ctx2->parent_gen
1901 && !ctx1->pin_count && !ctx2->pin_count;
1904 static void __perf_event_sync_stat(
struct perf_event *event,
1909 if (!event->
attr.inherit_stat)
1919 switch (event->state) {
1921 event->pmu->read(event);
1925 update_event_times(event);
1940 swap(event->total_time_enabled, next_event->total_time_enabled);
1941 swap(event->total_time_running, next_event->total_time_running);
1950 #define list_next_entry(pos, member) \
1951 list_entry(pos->member.next, typeof(*pos), member)
1961 update_context_time(ctx);
1969 while (&event->event_entry != &ctx->
event_list &&
1970 &next_event->event_entry != &next_ctx->
event_list) {
1972 __perf_event_sync_stat(event, next_event);
1979 static void perf_event_context_sched_out(
struct task_struct *task,
int ctxn,
1991 cpuctx = __get_cpu_context(ctx);
1997 next_ctx = next->perf_event_ctxp[ctxn];
1998 if (parent && next_ctx &&
2011 if (context_equiv(ctx, next_ctx)) {
2016 task->perf_event_ctxp[ctxn] = next_ctx;
2017 next->perf_event_ctxp[ctxn] =
ctx;
2022 perf_event_sync_stat(ctx, next_ctx);
2037 #define for_each_task_context_nr(ctxn) \
2038 for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
2057 perf_event_context_sched_out(task, ctxn, next);
2065 perf_cgroup_sched_out(task, next);
2088 ctx_sched_out(&cpuctx->
ctx, cpuctx, event_type);
2100 if (!event_filter_match(event))
2104 if (is_cgroup_event(event))
2105 perf_cgroup_mark_enabled(event, ctx);
2107 if (group_can_go_on(event, cpuctx, 1))
2108 group_sched_in(event, cpuctx, ctx);
2115 update_group_times(event);
2136 if (!event_filter_match(event))
2140 if (is_cgroup_event(event))
2141 perf_cgroup_mark_enabled(event, ctx);
2143 if (group_can_go_on(event, cpuctx, can_add_hw)) {
2144 if (group_sched_in(event, cpuctx, ctx))
2165 perf_cgroup_set_timestamp(task, ctx);
2170 if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED))
2171 ctx_pinned_sched_in(ctx, cpuctx);
2174 if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE))
2175 ctx_flexible_sched_in(ctx, cpuctx);
2184 ctx_sched_in(ctx, cpuctx, event_type, task);
2192 cpuctx = __get_cpu_context(ctx);
2196 perf_ctx_lock(cpuctx, ctx);
2203 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2208 perf_event_sched_in(cpuctx, cpuctx->
task_ctx, task);
2211 perf_ctx_unlock(cpuctx, ctx);
2217 perf_pmu_rotate_start(ctx->
pmu);
2236 static void perf_branch_stack_sched_in(
struct task_struct *prev,
2241 unsigned long flags;
2251 list_for_each_entry_rcu(pmu, &pmus, entry) {
2258 if (cpuctx->
ctx.nr_branch_stack > 0
2261 pmu = cpuctx->
ctx.pmu;
2263 perf_ctx_lock(cpuctx, cpuctx->
task_ctx);
2271 perf_ctx_unlock(cpuctx, cpuctx->
task_ctx);
2298 ctx = task->perf_event_ctxp[ctxn];
2302 perf_event_context_sched_in(ctx, task);
2310 perf_cgroup_sched_in(prev, task);
2314 perf_branch_stack_sched_in(prev, task);
2323 int count_fls, nsec_fls, frequency_fls, sec_fls;
2325 count_fls = fls64(count);
2326 nsec_fls = fls64(nsec);
2327 frequency_fls = fls64(frequency);
2344 #define REDUCE_FLS(a, b) \
2346 if (a##_fls > b##_fls) { \
2359 while (count_fls + sec_fls > 64 && nsec_fls + frequency_fls > 64) {
2364 if (count_fls + sec_fls > 64) {
2367 while (count_fls + sec_fls > 64) {
2372 dividend = count *
sec;
2374 dividend = count *
sec;
2376 while (nsec_fls + frequency_fls > 64) {
2387 return div64_u64(dividend, divisor);
2399 period = perf_calculate_period(event, nsec, count);
2401 delta = (
s64)(period - hwc->sample_period);
2402 delta = (delta + 7) / 8;
2404 sample_period = hwc->sample_period +
delta;
2409 hwc->sample_period = sample_period;
2411 if (
local64_read(&hwc->period_left) > 8*sample_period) {
2440 if (!(ctx->
nr_freq || needs_unthr))
2446 list_for_each_entry_rcu(event, &ctx->
event_list, event_entry) {
2450 if (!event_filter_match(event))
2456 hwc->interrupts = 0;
2457 perf_log_throttle(event, 1);
2458 event->pmu->start(event, 0);
2461 if (!event->
attr.freq || !event->
attr.sample_freq)
2470 delta = now - hwc->freq_count_stamp;
2471 hwc->freq_count_stamp = now;
2481 perf_adjust_period(event, period, delta,
false);
2511 int rotate = 0,
remove = 1;
2513 if (cpuctx->
ctx.nr_events) {
2515 if (cpuctx->
ctx.nr_events != cpuctx->
ctx.nr_active)
2529 perf_ctx_lock(cpuctx, cpuctx->
task_ctx);
2532 cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
2534 ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE);
2536 rotate_ctx(&cpuctx->
ctx);
2540 perf_event_sched_in(cpuctx, ctx,
current);
2543 perf_ctx_unlock(cpuctx, cpuctx->
task_ctx);
2563 perf_adjust_freq_unthr_context(ctx, throttled);
2567 perf_adjust_freq_unthr_context(ctx, throttled);
2571 perf_rotate_context(cpuctx);
2575 static int event_enable_on_exec(
struct perf_event *event,
2578 if (!event->
attr.enable_on_exec)
2581 event->attr.enable_on_exec = 0;
2585 __perf_event_mark_enabled(event);
2597 unsigned long flags;
2615 task_ctx_sched_out(ctx);
2618 ret = event_enable_on_exec(event, ctx);
2634 perf_event_context_sched_in(ctx, ctx->
task);
2642 static void __perf_event_read(
void *info)
2660 update_context_time(ctx);
2661 update_cgrp_time_from_event(event);
2663 update_event_times(event);
2665 event->pmu->read(event);
2669 static inline u64 perf_event_count(
struct perf_event *event)
2682 __perf_event_read, event, 1);
2685 unsigned long flags;
2694 update_context_time(ctx);
2695 update_cgrp_time_from_event(event);
2697 update_event_times(event);
2701 return perf_event_count(event);
2718 alloc_perf_context(
struct pmu *pmu,
struct task_struct *task)
2726 __perf_event_init_context(ctx);
2737 find_lively_task_by_vpid(
pid_t vpid)
2752 return ERR_PTR(-
ESRCH);
2761 put_task_struct(task);
2762 return ERR_PTR(err);
2770 find_get_context(
struct pmu *pmu,
struct task_struct *task,
int cpu)
2774 unsigned long flags;
2804 ctx = perf_lock_task_context(task, ctxn, &flags);
2810 ctx = alloc_perf_context(pmu, task);
2823 else if (task->perf_event_ctxp[ctxn])
2844 return ERR_PTR(err);
2847 static void perf_event_free_filter(
struct perf_event *event);
2849 static void free_event_rcu(
struct rcu_head *head)
2856 perf_event_free_filter(event);
2862 static void free_event(
struct perf_event *event)
2866 if (!event->parent) {
2868 static_key_slow_dec_deferred(&perf_sched_events);
2869 if (event->
attr.mmap || event->
attr.mmap_data)
2871 if (event->
attr.comm)
2873 if (event->
attr.task)
2877 if (is_cgroup_event(event)) {
2879 static_key_slow_dec_deferred(&perf_sched_events);
2882 if (has_branch_stack(event)) {
2883 static_key_slow_dec_deferred(&perf_sched_events);
2892 ring_buffer_put(event->rb);
2896 if (is_cgroup_event(event))
2897 perf_detach_cgroup(event);
2900 event->destroy(event);
2903 put_ctx(event->ctx);
2905 call_rcu(&event->rcu_head, free_event_rcu);
2927 perf_group_detach(event);
2929 perf_remove_from_context(event);
2941 static void put_event(
struct perf_event *event)
2945 if (!atomic_long_dec_and_test(&event->refcount))
2976 list_del_init(&event->owner_entry);
2978 put_task_struct(owner);
2999 total += perf_event_read(event);
3000 *enabled +=
event->total_time_enabled +
3002 *running +=
event->total_time_running +
3006 total += perf_event_read(child);
3007 *enabled += child->total_time_enabled;
3008 *running += child->total_time_running;
3016 static int perf_event_read_group(
struct perf_event *event,
3017 u64 read_format,
char __user *
buf)
3019 struct perf_event *leader =
event->group_leader, *sub;
3020 int n = 0, size = 0, ret = -
EFAULT;
3028 values[n++] = 1 + leader->nr_siblings;
3032 values[n++] = running;
3033 values[n++] =
count;
3035 values[n++] = primary_event_id(leader);
3037 size = n *
sizeof(
u64);
3048 if (read_format & PERF_FORMAT_ID)
3049 values[n++] = primary_event_id(sub);
3051 size = n *
sizeof(
u64);
3066 static int perf_event_read_one(
struct perf_event *event,
3067 u64 read_format,
char __user *buf)
3074 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3076 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3077 values[n++] = running;
3078 if (read_format & PERF_FORMAT_ID)
3079 values[n++] = primary_event_id(event);
3084 return n *
sizeof(
u64);
3091 perf_read_hw(
struct perf_event *event,
char __user *buf,
size_t count)
3093 u64 read_format =
event->attr.read_format;
3109 ret = perf_event_read_group(event, read_format, buf);
3111 ret = perf_event_read_one(event, read_format, buf);
3117 perf_read(
struct file *
file,
char __user *buf,
size_t count, loff_t *ppos)
3121 return perf_read_hw(event, buf, count);
3150 ring_buffer_attach(event, rb);
3157 poll_wait(file, &event->waitq, wait);
3162 static void perf_event_reset(
struct perf_event *event)
3164 (
void)perf_event_read(event);
3175 static void perf_event_for_each_child(
struct perf_event *event,
3192 struct perf_event *sibling;
3196 event =
event->group_leader;
3198 perf_event_for_each_child(event,
func);
3200 perf_event_for_each_child(sibling,
func);
3204 static
int perf_event_period(
struct perf_event *event,
u64 __user *
arg)
3210 if (!is_sampling_event(event))
3220 if (event->attr.freq) {
3221 if (value > sysctl_perf_event_sample_rate) {
3226 event->attr.sample_freq =
value;
3228 event->attr.sample_period =
value;
3229 event->hw.sample_period =
value;
3239 static inline int perf_fget_light(
int fd,
struct fd *p)
3241 struct fd f = fdget(fd);
3245 if (f.
file->f_op != &perf_fops) {
3253 static int perf_event_set_output(
struct perf_event *event,
3254 struct perf_event *output_event);
3255 static int perf_event_set_filter(
struct perf_event *event,
void __user *arg);
3257 static long perf_ioctl(
struct file *file,
unsigned int cmd,
unsigned long arg)
3260 void (*func)(
struct perf_event *);
3271 func = perf_event_reset;
3278 return perf_event_period(event, (
u64 __user *)arg);
3284 struct perf_event *output_event;
3286 ret = perf_fget_light(arg, &output);
3289 output_event =
output.file->private_data;
3290 ret = perf_event_set_output(event, output_event);
3293 ret = perf_event_set_output(event,
NULL);
3299 return perf_event_set_filter(event, (
void __user *)arg);
3306 perf_event_for_each(event, func);
3308 perf_event_for_each_child(event, func);
3315 struct perf_event *
event;
3327 struct perf_event *
event;
3337 static int perf_event_index(
struct perf_event *event)
3345 return event->pmu->event_idx(event);
3348 static void calc_timer_values(
struct perf_event *event,
3355 *now = perf_clock();
3356 ctx_time =
event->shadow_ctx_time + *now;
3357 *enabled = ctx_time -
event->tstamp_enabled;
3358 *running = ctx_time -
event->tstamp_running;
3386 calc_timer_values(event, &now, &enabled, &running);
3400 userpg->
index = perf_event_index(event);
3401 userpg->
offset = perf_event_count(event);
3420 static int perf_mmap_fault(
struct vm_area_struct *vma,
struct vm_fault *vmf)
3422 struct perf_event *
event = vma->
vm_file->private_data;
3424 int ret = VM_FAULT_SIGBUS;
3426 if (vmf->flags & FAULT_FLAG_MKWRITE) {
3427 if (vmf->pgoff == 0)
3437 if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
3444 get_page(vmf->page);
3445 vmf->page->mapping = vma->
vm_file->f_mapping;
3446 vmf->page->index = vmf->pgoff;
3455 static void ring_buffer_attach(
struct perf_event *event,
3458 unsigned long flags;
3460 if (!list_empty(&event->rb_entry))
3464 if (!list_empty(&event->rb_entry))
3469 spin_unlock_irqrestore(&rb->
event_lock, flags);
3472 static void ring_buffer_detach(
struct perf_event *event,
3475 unsigned long flags;
3477 if (list_empty(&event->rb_entry))
3481 list_del_init(&event->rb_entry);
3483 spin_unlock_irqrestore(&rb->
event_lock, flags);
3486 static void ring_buffer_wakeup(
struct perf_event *event)
3510 static struct ring_buffer *ring_buffer_get(
struct perf_event *event)
3525 static void ring_buffer_put(
struct ring_buffer *rb)
3527 struct perf_event *
event, *
n;
3528 unsigned long flags;
3535 list_del_init(&event->rb_entry);
3538 spin_unlock_irqrestore(&rb->
event_lock, flags);
3545 struct perf_event *
event = vma->
vm_file->private_data;
3552 struct perf_event *
event = vma->
vm_file->private_data;
3555 unsigned long size = perf_data_size(event->rb);
3559 atomic_long_sub((size >>
PAGE_SHIFT) + 1, &user->locked_vm);
3560 vma->
vm_mm->pinned_vm -=
event->mmap_locked;
3562 ring_buffer_detach(event, rb);
3565 ring_buffer_put(rb);
3570 static const struct vm_operations_struct perf_mmap_vmops = {
3571 .open = perf_mmap_open,
3572 .close = perf_mmap_close,
3573 .fault = perf_mmap_fault,
3574 .page_mkwrite = perf_mmap_fault,
3580 unsigned long user_locked, user_lock_limit;
3582 unsigned long locked, lock_limit;
3584 unsigned long vma_size;
3586 long user_extra,
extra;
3587 int ret = 0, flags = 0;
3594 if (event->cpu == -1 && event->
attr.inherit)
3610 if (vma_size !=
PAGE_SIZE * (1 + nr_pages))
3619 if (event->rb->nr_pages == nr_pages)
3626 user_extra = nr_pages + 1;
3627 user_lock_limit = sysctl_perf_event_mlock >> (
PAGE_SHIFT - 10);
3634 user_locked = atomic_long_read(&user->locked_vm) + user_extra;
3637 if (user_locked > user_lock_limit)
3638 extra = user_locked - user_lock_limit;
3644 if ((locked > lock_limit) && perf_paranoid_tracepoint_raw() &&
3656 event->
attr.watermark ? event->
attr.wakeup_watermark : 0,
3665 atomic_long_add(user_extra, &user->locked_vm);
3666 event->mmap_locked =
extra;
3668 vma->
vm_mm->pinned_vm +=
event->mmap_locked;
3677 vma->
vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
3678 vma->
vm_ops = &perf_mmap_vmops;
3683 static int perf_fasync(
int fd,
struct file *filp,
int on)
3701 .release = perf_release,
3704 .unlocked_ioctl = perf_ioctl,
3705 .compat_ioctl = perf_ioctl,
3707 .fasync = perf_fasync,
3719 ring_buffer_wakeup(event);
3721 if (event->pending_kill) {
3723 event->pending_kill = 0;
3727 static void perf_pending_event(
struct irq_work *entry)
3730 struct perf_event, pending);
3732 if (event->pending_disable) {
3733 event->pending_disable = 0;
3737 if (event->pending_wakeup) {
3738 event->pending_wakeup = 0;
3752 perf_guest_cbs =
cbs;
3759 perf_guest_cbs =
NULL;
3779 static void perf_sample_regs_user(
struct perf_regs_user *regs_user,
3802 static u64 perf_ustack_task_size(
struct pt_regs *regs)
3813 perf_sample_ustack_size(
u16 stack_size,
u16 header_size,
3833 stack_size =
min(stack_size, (
u16) task_size);
3836 header_size += 2 *
sizeof(
u64);
3839 if ((
u16) (header_size + stack_size) < header_size) {
3844 stack_size = USHRT_MAX - header_size -
sizeof(
u64);
3880 rem = __output_copy_user(handle, (
void *) sp, dump_size);
3881 dyn_size = dump_size - rem;
3891 struct perf_sample_data *data,
3892 struct perf_event *event)
3894 u64 sample_type =
event->attr.sample_type;
3897 header->
size +=
event->id_header_size;
3899 if (sample_type & PERF_SAMPLE_TID) {
3901 data->tid_entry.pid = perf_event_pid(event,
current);
3902 data->tid_entry.tid = perf_event_tid(event,
current);
3905 if (sample_type & PERF_SAMPLE_TIME)
3906 data->time = perf_clock();
3908 if (sample_type & PERF_SAMPLE_ID)
3909 data->id = primary_event_id(event);
3911 if (sample_type & PERF_SAMPLE_STREAM_ID)
3912 data->stream_id =
event->id;
3916 data->cpu_entry.reserved = 0;
3921 struct perf_sample_data *data,
3922 struct perf_event *event)
3924 if (event->
attr.sample_id_all)
3925 __perf_event_header__init_id(header, data, event);
3929 struct perf_sample_data *data)
3931 u64 sample_type = data->type;
3933 if (sample_type & PERF_SAMPLE_TID)
3936 if (sample_type & PERF_SAMPLE_TIME)
3939 if (sample_type & PERF_SAMPLE_ID)
3942 if (sample_type & PERF_SAMPLE_STREAM_ID)
3951 struct perf_sample_data *
sample)
3953 if (event->
attr.sample_id_all)
3954 __perf_event__output_id_sample(handle, sample);
3958 struct perf_event *event,
3959 u64 enabled,
u64 running)
3961 u64 read_format =
event->attr.read_format;
3965 values[n++] = perf_event_count(event);
3966 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
3967 values[n++] = enabled +
3970 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
3971 values[n++] = running +
3974 if (read_format & PERF_FORMAT_ID)
3975 values[n++] = primary_event_id(event);
3977 __output_copy(handle, values, n *
sizeof(
u64));
3984 struct perf_event *event,
3985 u64 enabled,
u64 running)
3987 struct perf_event *leader =
event->group_leader, *sub;
3988 u64 read_format =
event->
attr.read_format;
3992 values[n++] = 1 + leader->nr_siblings;
3994 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
3997 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
3998 values[n++] = running;
4000 if (leader != event)
4001 leader->pmu->
read(leader);
4003 values[n++] = perf_event_count(leader);
4004 if (read_format & PERF_FORMAT_ID)
4005 values[n++] = primary_event_id(leader);
4007 __output_copy(handle, values, n *
sizeof(
u64));
4013 sub->pmu->
read(sub);
4015 values[n++] = perf_event_count(sub);
4016 if (read_format & PERF_FORMAT_ID)
4017 values[n++] = primary_event_id(sub);
4019 __output_copy(handle, values, n *
sizeof(
u64));
4023 #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
4024 PERF_FORMAT_TOTAL_TIME_RUNNING)
4027 struct perf_event *event)
4029 u64 enabled = 0, running = 0, now;
4030 u64 read_format =
event->attr.read_format;
4042 calc_timer_values(event, &now, &enabled, &running);
4045 perf_output_read_group(handle, event, enabled, running);
4047 perf_output_read_one(handle, event, enabled, running);
4052 struct perf_sample_data *data,
4053 struct perf_event *event)
4055 u64 sample_type = data->type;
4059 if (sample_type & PERF_SAMPLE_IP)
4062 if (sample_type & PERF_SAMPLE_TID)
4065 if (sample_type & PERF_SAMPLE_TIME)
4068 if (sample_type & PERF_SAMPLE_ADDR)
4071 if (sample_type & PERF_SAMPLE_ID)
4074 if (sample_type & PERF_SAMPLE_STREAM_ID)
4080 if (sample_type & PERF_SAMPLE_PERIOD)
4084 perf_output_read(handle, event);
4087 if (data->callchain) {
4090 if (data->callchain)
4091 size += data->callchain->nr;
4093 size *=
sizeof(
u64);
4095 __output_copy(handle, data->callchain, size);
4105 __output_copy(handle, data->raw->data,
4112 .size =
sizeof(
u32),
4119 if (!event->
attr.watermark) {
4120 int wakeup_events =
event->attr.wakeup_events;
4122 if (wakeup_events) {
4126 if (events >= wakeup_events) {
4134 if (data->br_stack) {
4137 size = data->br_stack->nr
4152 u64 abi = data->regs_user.abi;
4161 u64 mask =
event->attr.sample_regs_user;
4162 perf_output_sample_regs(handle,
4163 data->regs_user.regs,
4169 perf_output_sample_ustack(handle,
4170 data->stack_user_size,
4171 data->regs_user.regs);
4175 struct perf_sample_data *data,
4176 struct perf_event *event,
4179 u64 sample_type =
event->attr.sample_type;
4182 header->
size =
sizeof(*header) +
event->header_size;
4187 __perf_event_header__init_id(header, data, event);
4189 if (sample_type & PERF_SAMPLE_IP)
4192 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
4197 if (data->callchain)
4198 size += data->callchain->nr;
4200 header->
size += size *
sizeof(
u64);
4203 if (sample_type & PERF_SAMPLE_RAW) {
4204 int size =
sizeof(
u32);
4207 size += data->raw->size;
4209 size +=
sizeof(
u32);
4216 int size =
sizeof(
u64);
4217 if (data->br_stack) {
4218 size += data->br_stack->nr
4226 int size =
sizeof(
u64);
4228 perf_sample_regs_user(&data->regs_user, regs);
4230 if (data->regs_user.regs) {
4231 u64 mask =
event->attr.sample_regs_user;
4246 u16 stack_size =
event->attr.sample_stack_user;
4250 perf_sample_regs_user(uregs, regs);
4252 stack_size = perf_sample_ustack_size(stack_size, header->
size,
4261 size +=
sizeof(
u64) + stack_size;
4263 data->stack_user_size = stack_size;
4268 static void perf_event_output(
struct perf_event *event,
4269 struct perf_sample_data *data,
4303 perf_event_read_event(
struct perf_event *event,
4307 struct perf_sample_data sample;
4312 .size =
sizeof(read_event) + event->read_size,
4314 .pid = perf_event_pid(event, task),
4315 .tid = perf_event_tid(event, task),
4325 perf_output_read(&handle, event);
4352 static void perf_event_task_output(
struct perf_event *event,
4356 struct perf_sample_data sample;
4367 task_event->
event_id.pid = perf_event_pid(event, task);
4370 task_event->
event_id.tid = perf_event_tid(event, task);
4382 static int perf_event_task_match(
struct perf_event *event)
4387 if (!event_filter_match(event))
4390 if (event->
attr.comm || event->
attr.mmap ||
4391 event->
attr.mmap_data || event->
attr.task)
4400 struct perf_event *
event;
4402 list_for_each_entry_rcu(event, &ctx->
event_list, event_entry) {
4403 if (perf_event_task_match(event))
4404 perf_event_task_output(event, task_event);
4416 list_for_each_entry_rcu(pmu, &pmus, entry) {
4420 perf_event_task_ctx(&cpuctx->
ctx, task_event);
4430 perf_event_task_ctx(ctx, task_event);
4437 static void perf_event_task(
struct task_struct *task,
4455 .size =
sizeof(task_event.
event_id),
4461 .
time = perf_clock(),
4465 perf_event_task_event(&task_event);
4470 perf_event_task(task,
NULL, 1);
4490 static void perf_event_comm_output(
struct perf_event *event,
4494 struct perf_sample_data sample;
4495 int size = comm_event->
event_id.header.size;
4505 comm_event->
event_id.pid = perf_event_pid(event, comm_event->
task);
4506 comm_event->
event_id.tid = perf_event_tid(event, comm_event->
task);
4509 __output_copy(&handle, comm_event->
comm,
4519 static int perf_event_comm_match(
struct perf_event *event)
4524 if (!event_filter_match(event))
4527 if (event->
attr.comm)
4536 struct perf_event *
event;
4538 list_for_each_entry_rcu(event, &ctx->
event_list, event_entry) {
4539 if (perf_event_comm_match(event))
4540 perf_event_comm_output(event, comm_event);
4553 memset(comm, 0,
sizeof(comm));
4554 strlcpy(comm, comm_event->
task->comm,
sizeof(comm));
4557 comm_event->
comm = comm;
4562 list_for_each_entry_rcu(pmu, &pmus, entry) {
4566 perf_event_comm_ctx(&cpuctx->
ctx, comm_event);
4574 perf_event_comm_ctx(ctx, comm_event);
4588 ctx = task->perf_event_ctxp[ctxn];
4592 perf_event_enable_on_exec(ctx);
4613 perf_event_comm_event(&comm_event);
4637 static void perf_event_mmap_output(
struct perf_event *event,
4641 struct perf_sample_data sample;
4642 int size = mmap_event->
event_id.header.size;
4655 __output_copy(&handle, mmap_event->
file_name,
4665 static int perf_event_mmap_match(
struct perf_event *event,
4672 if (!event_filter_match(event))
4675 if ((!executable && event->
attr.mmap_data) ||
4676 (executable && event->
attr.mmap))
4686 struct perf_event *
event;
4688 list_for_each_entry_rcu(event, &ctx->
event_list, event_entry) {
4689 if (perf_event_mmap_match(event, mmap_event, executable))
4690 perf_event_mmap_output(event, mmap_event);
4699 struct file *file = vma->
vm_file;
4707 memset(tmp, 0,
sizeof(tmp));
4717 name =
strncpy(tmp,
"//enomem",
sizeof(tmp));
4722 name =
strncpy(tmp,
"//toolong",
sizeof(tmp));
4733 name =
strncpy(tmp,
"[vdso]",
sizeof(tmp));
4737 name =
strncpy(tmp,
"[heap]",
sizeof(tmp));
4741 name =
strncpy(tmp,
"[stack]",
sizeof(tmp));
4745 name =
strncpy(tmp,
"//anon",
sizeof(tmp));
4758 list_for_each_entry_rcu(pmu, &pmus, entry) {
4762 perf_event_mmap_ctx(&cpuctx->
ctx, mmap_event,
4771 perf_event_mmap_ctx(ctx, mmap_event,
4807 perf_event_mmap_event(&mmap_event);
4814 static void perf_log_throttle(
struct perf_event *event,
int enable)
4817 struct perf_sample_data sample;
4831 .
time = perf_clock(),
4832 .id = primary_event_id(event),
4833 .stream_id =
event->id,
4855 static int __perf_event_overflow(
struct perf_event *event,
4856 int throttle,
struct perf_sample_data *data,
4868 if (
unlikely(!is_sampling_event(event)))
4872 if (seq != hwc->interrupts_seq) {
4873 hwc->interrupts_seq = seq;
4874 hwc->interrupts = 1;
4878 && hwc->interrupts >= max_samples_per_tick)) {
4881 perf_log_throttle(event, 0);
4886 if (event->
attr.freq) {
4887 u64 now = perf_clock();
4888 s64 delta = now - hwc->freq_time_stamp;
4890 hwc->freq_time_stamp = now;
4893 perf_adjust_period(event, delta, hwc->last_period,
true);
4901 event->pending_kill =
POLL_IN;
4905 event->pending_disable = 1;
4909 if (event->overflow_handler)
4910 event->overflow_handler(event, data, regs);
4912 perf_event_output(event, data, regs);
4914 if (event->fasync && event->pending_kill) {
4915 event->pending_wakeup = 1;
4923 struct perf_sample_data *data,
4926 return __perf_event_overflow(event, 1, data, regs);
4951 static u64 perf_swevent_set_period(
struct perf_event *event)
4954 u64 period = hwc->last_period;
4958 hwc->last_period = hwc->sample_period;
4965 nr = div64_u64(period + val, period);
4974 static void perf_swevent_overflow(
struct perf_event *event,
u64 overflow,
4975 struct perf_sample_data *data,
4982 overflow = perf_swevent_set_period(event);
4988 if (__perf_event_overflow(event, throttle,
5000 static void perf_swevent_event(
struct perf_event *event,
u64 nr,
5001 struct perf_sample_data *data,
5011 if (!is_sampling_event(event))
5014 if ((event->
attr.sample_type & PERF_SAMPLE_PERIOD) && !event->
attr.freq) {
5016 return perf_swevent_overflow(event, 1, data, regs);
5018 data->period =
event->hw.last_period;
5020 if (nr == 1 && hwc->sample_period == 1 && !event->
attr.freq)
5021 return perf_swevent_overflow(event, 1, data, regs);
5026 perf_swevent_overflow(event, 0, data, regs);
5029 static int perf_exclude_event(
struct perf_event *event,
5046 static int perf_swevent_match(
struct perf_event *event,
5049 struct perf_sample_data *data,
5052 if (event->
attr.type != type)
5055 if (event->
attr.config != event_id)
5058 if (perf_exclude_event(event, regs))
5064 static inline u64 swevent_hash(
u64 type,
u32 event_id)
5066 u64 val = event_id | (type << 32);
5074 u64 hash = swevent_hash(type, event_id);
5089 return __find_swevent_head(hlist, type, event_id);
5094 find_swevent_head(
struct swevent_htable *swhash,
struct perf_event *event)
5097 u32 event_id =
event->attr.config;
5098 u64 type =
event->attr.type;
5106 lockdep_is_held(&event->ctx->lock));
5110 return __find_swevent_head(hlist, type, event_id);
5115 struct perf_sample_data *data,
5119 struct perf_event *
event;
5124 head = find_swevent_head_rcu(swhash, type, event_id);
5128 hlist_for_each_entry_rcu(event, node, head,
hlist_entry) {
5129 if (perf_swevent_match(event, type, event_id, data, regs))
5130 perf_swevent_event(event, nr, data, regs);
5140 return get_recursion_context(swhash->
recursion);
5148 put_recursion_context(swhash->
recursion, rctx);
5153 struct perf_sample_data data;
5161 perf_sample_data_init(&data, addr, 0);
5169 static void perf_swevent_read(
struct perf_event *event)
5173 static int perf_swevent_add(
struct perf_event *event,
int flags)
5179 if (is_sampling_event(event)) {
5180 hwc->last_period = hwc->sample_period;
5181 perf_swevent_set_period(event);
5186 head = find_swevent_head(swhash, event);
5190 hlist_add_head_rcu(&event->hlist_entry, head);
5195 static void perf_swevent_del(
struct perf_event *event,
int flags)
5197 hlist_del_rcu(&event->hlist_entry);
5200 static void perf_swevent_start(
struct perf_event *event,
int flags)
5202 event->hw.state = 0;
5205 static void perf_swevent_stop(
struct perf_event *event,
int flags)
5229 static void swevent_hlist_put_cpu(
struct perf_event *event,
int cpu)
5236 swevent_hlist_release(swhash);
5241 static void swevent_hlist_put(
struct perf_event *event)
5245 if (event->cpu != -1) {
5246 swevent_hlist_put_cpu(event, event->cpu);
5251 swevent_hlist_put_cpu(event, cpu);
5254 static
int swevent_hlist_get_cpu(
struct perf_event *event,
int cpu)
5261 if (!swevent_hlist_deref(swhash) &&
cpu_online(cpu)) {
5278 static int swevent_hlist_get(
struct perf_event *event)
5281 int cpu, failed_cpu;
5283 if (event->cpu != -1)
5284 return swevent_hlist_get_cpu(event, event->cpu);
5288 err = swevent_hlist_get_cpu(event, cpu);
5299 if (cpu == failed_cpu)
5301 swevent_hlist_put_cpu(event, cpu);
5310 static void sw_perf_event_destroy(
struct perf_event *event)
5312 u64 event_id =
event->attr.config;
5317 swevent_hlist_put(event);
5320 static int perf_swevent_init(
struct perf_event *event)
5322 int event_id =
event->attr.config;
5330 if (has_branch_stack(event))
5345 if (!event->parent) {
5348 err = swevent_hlist_get(event);
5353 event->destroy = sw_perf_event_destroy;
5359 static int perf_swevent_event_idx(
struct perf_event *event)
5364 static struct pmu perf_swevent = {
5367 .event_init = perf_swevent_init,
5368 .add = perf_swevent_add,
5369 .del = perf_swevent_del,
5370 .start = perf_swevent_start,
5371 .stop = perf_swevent_stop,
5372 .read = perf_swevent_read,
5374 .event_idx = perf_swevent_event_idx,
5377 #ifdef CONFIG_EVENT_TRACING
5379 static int perf_tp_filter_match(
struct perf_event *event,
5380 struct perf_sample_data *data)
5382 void *
record = data->raw->data;
5389 static int perf_tp_event_match(
struct perf_event *event,
5390 struct perf_sample_data *data,
5398 if (event->
attr.exclude_kernel)
5401 if (!perf_tp_filter_match(event, data))
5407 void perf_tp_event(
u64 addr,
u64 count,
void *record,
int entry_size,
5411 struct perf_sample_data data;
5412 struct perf_event *
event;
5420 perf_sample_data_init(&data, addr, 0);
5423 hlist_for_each_entry_rcu(event, node, head,
hlist_entry) {
5424 if (perf_tp_event_match(event, &data, regs))
5425 perf_swevent_event(event, count, &data, regs);
5432 if (task && task !=
current) {
5441 list_for_each_entry_rcu(event, &ctx->
event_list, event_entry) {
5444 if (event->
attr.config != entry->
type)
5446 if (perf_tp_event_match(event, &data, regs))
5447 perf_swevent_event(event, count, &data, regs);
5457 static void tp_perf_event_destroy(
struct perf_event *event)
5462 static int perf_tp_event_init(
struct perf_event *event)
5472 if (has_branch_stack(event))
5479 event->destroy = tp_perf_event_destroy;
5484 static struct pmu perf_tracepoint = {
5487 .event_init = perf_tp_event_init,
5490 .start = perf_swevent_start,
5491 .stop = perf_swevent_stop,
5492 .read = perf_swevent_read,
5494 .event_idx = perf_swevent_event_idx,
5497 static inline void perf_tp_register(
void)
5502 static int perf_event_set_filter(
struct perf_event *event,
void __user *arg)
5511 if (IS_ERR(filter_str))
5512 return PTR_ERR(filter_str);
5514 ret = ftrace_profile_set_filter(event, event->
attr.config, filter_str);
5520 static void perf_event_free_filter(
struct perf_event *event)
5522 ftrace_profile_free_filter(event);
5527 static inline void perf_tp_register(
void)
5531 static int perf_event_set_filter(
struct perf_event *event,
void __user *arg)
5536 static void perf_event_free_filter(
struct perf_event *event)
5542 #ifdef CONFIG_HAVE_HW_BREAKPOINT
5543 void perf_bp_event(
struct perf_event *bp,
void *data)
5545 struct perf_sample_data sample;
5548 perf_sample_data_init(&sample, bp->
attr.bp_addr, 0);
5550 if (!bp->hw.state && !perf_exclude_event(bp, regs))
5551 perf_swevent_event(bp, 1, &sample, regs);
5562 struct perf_sample_data data;
5564 struct perf_event *
event;
5572 event->pmu->read(event);
5574 perf_sample_data_init(&data, 0, event->hw.last_period);
5577 if (regs && !perf_exclude_event(event, regs)) {
5578 if (!(event->
attr.exclude_idle && is_idle_task(
current)))
5579 if (__perf_event_overflow(event, 1, &data, regs))
5583 period =
max_t(
u64, 10000, event->hw.sample_period);
5584 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
5589 static void perf_swevent_start_hrtimer(
struct perf_event *event)
5594 if (!is_sampling_event(event))
5604 period =
max_t(
u64, 10000, hwc->sample_period);
5607 ns_to_ktime(period), 0,
5611 static void perf_swevent_cancel_hrtimer(
struct perf_event *event)
5615 if (is_sampling_event(event)) {
5617 local64_set(&hwc->period_left, ktime_to_ns(remaining));
5623 static void perf_swevent_init_hrtimer(
struct perf_event *event)
5627 if (!is_sampling_event(event))
5631 hwc->hrtimer.function = perf_swevent_hrtimer;
5637 if (event->
attr.freq) {
5638 long freq =
event->attr.sample_freq;
5641 hwc->sample_period =
event->attr.sample_period;
5642 local64_set(&hwc->period_left, hwc->sample_period);
5643 event->attr.freq = 0;
5651 static void cpu_clock_event_update(
struct perf_event *event)
5661 static void cpu_clock_event_start(
struct perf_event *event,
int flags)
5664 perf_swevent_start_hrtimer(event);
5667 static void cpu_clock_event_stop(
struct perf_event *event,
int flags)
5669 perf_swevent_cancel_hrtimer(event);
5670 cpu_clock_event_update(event);
5673 static int cpu_clock_event_add(
struct perf_event *event,
int flags)
5676 cpu_clock_event_start(event, flags);
5681 static void cpu_clock_event_del(
struct perf_event *event,
int flags)
5683 cpu_clock_event_stop(event, flags);
5686 static void cpu_clock_event_read(
struct perf_event *event)
5688 cpu_clock_event_update(event);
5691 static int cpu_clock_event_init(
struct perf_event *event)
5702 if (has_branch_stack(event))
5705 perf_swevent_init_hrtimer(event);
5710 static struct pmu perf_cpu_clock = {
5713 .event_init = cpu_clock_event_init,
5714 .add = cpu_clock_event_add,
5715 .del = cpu_clock_event_del,
5716 .start = cpu_clock_event_start,
5717 .stop = cpu_clock_event_stop,
5718 .read = cpu_clock_event_read,
5720 .event_idx = perf_swevent_event_idx,
5727 static void task_clock_event_update(
struct perf_event *event,
u64 now)
5737 static void task_clock_event_start(
struct perf_event *event,
int flags)
5739 local64_set(&event->hw.prev_count, event->ctx->time);
5740 perf_swevent_start_hrtimer(event);
5743 static void task_clock_event_stop(
struct perf_event *event,
int flags)
5745 perf_swevent_cancel_hrtimer(event);
5746 task_clock_event_update(event, event->ctx->time);
5749 static int task_clock_event_add(
struct perf_event *event,
int flags)
5752 task_clock_event_start(event, flags);
5757 static void task_clock_event_del(
struct perf_event *event,
int flags)
5762 static void task_clock_event_read(
struct perf_event *event)
5764 u64 now = perf_clock();
5765 u64 delta = now -
event->ctx->timestamp;
5768 task_clock_event_update(event, time);
5771 static int task_clock_event_init(
struct perf_event *event)
5782 if (has_branch_stack(event))
5785 perf_swevent_init_hrtimer(event);
5790 static struct pmu perf_task_clock = {
5793 .event_init = task_clock_event_init,
5794 .add = task_clock_event_add,
5795 .del = task_clock_event_del,
5796 .start = task_clock_event_start,
5797 .stop = task_clock_event_stop,
5798 .read = task_clock_event_read,
5800 .event_idx = perf_swevent_event_idx,
5803 static void perf_pmu_nop_void(
struct pmu *pmu)
5807 static int perf_pmu_nop_int(
struct pmu *pmu)
5812 static void perf_pmu_start_txn(
struct pmu *pmu)
5817 static int perf_pmu_commit_txn(
struct pmu *pmu)
5823 static void perf_pmu_cancel_txn(
struct pmu *pmu)
5828 static int perf_event_idx_default(
struct perf_event *event)
5830 return event->hw.idx + 1;
5837 static void *find_pmu_context(
int ctxn)
5852 static void update_pmu_context(
struct pmu *pmu,
struct pmu *old_pmu)
5866 static void free_pmu_context(
struct pmu *pmu)
5876 update_pmu_context(i, pmu);
5885 static struct idr pmu_idr;
5900 static int pmu_bus_running;
5902 .name =
"event_source",
5903 .dev_attrs = pmu_dev_attrs,
5906 static void pmu_dev_release(
struct device *
dev)
5911 static int pmu_dev_alloc(
struct pmu *pmu)
5926 pmu->
dev->bus = &pmu_bus;
5927 pmu->
dev->release = pmu_dev_release;
5971 if (pmu_bus_running) {
5972 ret = pmu_dev_alloc(pmu);
5980 goto got_cpu_context;
5990 __perf_event_init_context(&cpuctx->
ctx);
5994 cpuctx->
ctx.pmu = pmu;
6024 pmu->
event_idx = perf_event_idx_default;
6026 list_add_rcu(&pmu->
entry, &pmus);
6049 list_del_rcu(&pmu->
entry);
6064 free_pmu_context(pmu);
6069 struct pmu *pmu =
NULL;
6073 idx = srcu_read_lock(&pmus_srcu);
6086 list_for_each_entry_rcu(pmu, &pmus, entry) {
6099 srcu_read_unlock(&pmus_srcu, idx);
6107 static struct perf_event *
6110 struct perf_event *group_leader,
6111 struct perf_event *parent_event,
6116 struct perf_event *
event;
6120 if ((
unsigned)cpu >= nr_cpu_ids) {
6121 if (!task || cpu != -1)
6134 group_leader =
event;
6137 INIT_LIST_HEAD(&event->child_list);
6139 INIT_LIST_HEAD(&event->group_entry);
6140 INIT_LIST_HEAD(&event->event_entry);
6141 INIT_LIST_HEAD(&event->sibling_list);
6142 INIT_LIST_HEAD(&event->rb_entry);
6145 init_irq_work(&event->pending, perf_pending_event);
6149 atomic_long_set(&event->refcount, 1);
6151 event->attr = *
attr;
6152 event->group_leader = group_leader;
6156 event->parent = parent_event;
6158 event->ns = get_pid_ns(
current->nsproxy->pid_ns);
6165 #ifdef CONFIG_HAVE_HW_BREAKPOINT
6170 event->hw.bp_target =
task;
6174 if (!overflow_handler && parent_event) {
6175 overflow_handler = parent_event->overflow_handler;
6176 context = parent_event->overflow_handler_context;
6179 event->overflow_handler = overflow_handler;
6180 event->overflow_handler_context =
context;
6190 hwc->sample_period = 1;
6191 hwc->last_period = hwc->sample_period;
6193 local64_set(&hwc->period_left, hwc->sample_period);
6207 else if (IS_ERR(pmu))
6214 return ERR_PTR(err);
6217 if (!event->parent) {
6219 static_key_slow_inc(&perf_sched_events.key);
6220 if (event->
attr.mmap || event->
attr.mmap_data)
6222 if (event->
attr.comm)
6224 if (event->
attr.task)
6226 if (event->
attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
6230 return ERR_PTR(err);
6233 if (has_branch_stack(event)) {
6234 static_key_slow_inc(&perf_sched_events.key);
6256 memset(attr, 0,
sizeof(*attr));
6258 ret =
get_user(size, &uattr->size);
6277 if (size >
sizeof(*attr)) {
6282 addr = (
void __user *)uattr +
sizeof(*attr);
6283 end = (
void __user *)uattr + size;
6285 for (; addr <
end; addr++) {
6292 size =
sizeof(*attr);
6350 if (!arch_perf_have_user_stack_dump())
6368 put_user(
sizeof(*attr), &uattr->size);
6374 perf_event_set_output(
struct perf_event *event,
struct perf_event *output_event)
6383 if (event == output_event)
6389 if (output_event->cpu != event->cpu)
6395 if (output_event->cpu == -1 && output_event->ctx != event->ctx)
6406 rb = ring_buffer_get(output_event);
6414 ring_buffer_detach(event, old_rb);
6420 ring_buffer_put(old_rb);
6435 pid_t,
pid,
int, cpu,
int, group_fd,
unsigned long, flags)
6437 struct perf_event *group_leader =
NULL, *output_event =
NULL;
6438 struct perf_event *
event, *sibling;
6441 struct file *event_file =
NULL;
6442 struct fd group = {
NULL, 0};
6453 err = perf_copy_attr(attr_uptr, &attr);
6463 if (attr.
sample_freq > sysctl_perf_event_sample_rate)
6480 if (group_fd != -1) {
6481 err = perf_fget_light(group_fd, &group);
6484 group_leader = group.
file->private_data;
6486 output_event = group_leader;
6488 group_leader =
NULL;
6491 if (
pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
6492 task = find_lively_task_by_vpid(
pid);
6494 err = PTR_ERR(task);
6501 event = perf_event_alloc(&attr, cpu, task, group_leader,
NULL,
6503 if (IS_ERR(event)) {
6504 err = PTR_ERR(event);
6508 if (flags & PERF_FLAG_PID_CGROUP) {
6509 err = perf_cgroup_connect(
pid, event, &attr, group_leader);
6518 static_key_slow_inc(&perf_sched_events.key);
6528 (is_software_event(event) != is_software_event(group_leader))) {
6529 if (is_software_event(event)) {
6538 pmu = group_leader->pmu;
6539 }
else if (is_software_event(group_leader) &&
6553 ctx = find_get_context(pmu, task, event->cpu);
6560 put_task_struct(task);
6574 if (group_leader->group_leader != group_leader)
6581 if (group_leader->ctx->type != ctx->
type)
6584 if (group_leader->ctx != ctx)
6596 err = perf_event_set_output(event, output_event);
6602 if (IS_ERR(event_file)) {
6603 err = PTR_ERR(event_file);
6611 perf_remove_from_context(group_leader);
6614 perf_remove_from_context(sibling);
6626 perf_install_in_context(ctx, group_leader, event->cpu);
6630 perf_install_in_context(ctx, sibling, event->cpu);
6635 perf_install_in_context(ctx, event, event->cpu);
6637 perf_unpin_context(ctx);
6651 perf_event__header_size(event);
6652 perf_event__id_header_size(event);
6665 perf_unpin_context(ctx);
6672 put_task_struct(task);
6694 struct perf_event *
event;
6701 event = perf_event_alloc(attr, cpu, task,
NULL,
NULL,
6702 overflow_handler, context);
6703 if (IS_ERR(event)) {
6704 err = PTR_ERR(event);
6708 ctx = find_get_context(event->pmu, task, cpu);
6716 perf_install_in_context(ctx, event, cpu);
6718 perf_unpin_context(ctx);
6726 return ERR_PTR(err);
6743 perf_remove_from_context(event);
6745 list_add(&event->event_entry, &events);
6756 perf_install_in_context(dst_ctx, event, dst_cpu);
6763 static void sync_child_event(
struct perf_event *child_event,
6766 struct perf_event *parent_event = child_event->parent;
6769 if (child_event->
attr.inherit_stat)
6770 perf_event_read_event(child_event, child);
6772 child_val = perf_event_count(child_event);
6779 &parent_event->child_total_time_enabled);
6781 &parent_event->child_total_time_running);
6788 list_del_init(&child_event->child_list);
6795 put_event(parent_event);
6799 __perf_event_exit_task(
struct perf_event *child_event,
6803 if (child_event->parent) {
6805 perf_group_detach(child_event);
6809 perf_remove_from_context(child_event);
6816 if (child_event->parent) {
6817 sync_child_event(child_event, child);
6818 free_event(child_event);
6822 static void perf_event_exit_task_context(
struct task_struct *child,
int ctxn)
6824 struct perf_event *child_event, *
tmp;
6826 unsigned long flags;
6828 if (
likely(!child->perf_event_ctxp[ctxn])) {
6829 perf_event_task(child,
NULL, 0);
6848 task_ctx_sched_out(child_ctx);
6849 child->perf_event_ctxp[ctxn] =
NULL;
6855 unclone_ctx(child_ctx);
6856 update_context_time(child_ctx);
6864 perf_event_task(child, child_ctx, 0);
6881 __perf_event_exit_task(child_event, child_ctx, child);
6885 __perf_event_exit_task(child_event, child_ctx, child);
6893 !list_empty(&child_ctx->flexible_groups))
6912 list_del_init(&event->owner_entry);
6920 event->owner =
NULL;
6925 perf_event_exit_task_context(child, ctxn);
6928 static void perf_free_event(
struct perf_event *event,
6931 struct perf_event *parent =
event->parent;
6937 list_del_init(&event->child_list);
6942 perf_group_detach(event);
6943 list_del_event(event, ctx);
6958 ctx = task->perf_event_ctxp[ctxn];
6966 perf_free_event(event, ctx);
6970 perf_free_event(event, ctx);
6993 static struct perf_event *
6994 inherit_event(
struct perf_event *parent_event,
6998 struct perf_event *group_leader,
7001 struct perf_event *child_event;
7002 unsigned long flags;
7010 if (parent_event->parent)
7011 parent_event = parent_event->parent;
7013 child_event = perf_event_alloc(&parent_event->
attr,
7016 group_leader, parent_event,
7018 if (IS_ERR(child_event))
7022 free_event(child_event);
7038 if (parent_event->
attr.freq) {
7039 u64 sample_period = parent_event->hw.sample_period;
7042 hwc->sample_period = sample_period;
7043 hwc->last_period = sample_period;
7048 child_event->ctx = child_ctx;
7049 child_event->overflow_handler = parent_event->overflow_handler;
7050 child_event->overflow_handler_context
7051 = parent_event->overflow_handler_context;
7056 perf_event__header_size(child_event);
7057 perf_event__id_header_size(child_event);
7063 add_event_to_ctx(child_event, child_ctx);
7071 list_add_tail(&child_event->child_list, &parent_event->child_list);
7077 static int inherit_group(
struct perf_event *parent_event,
7083 struct perf_event *leader;
7084 struct perf_event *sub;
7085 struct perf_event *child_ctr;
7087 leader = inherit_event(parent_event, parent, parent_ctx,
7088 child,
NULL, child_ctx);
7090 return PTR_ERR(leader);
7092 child_ctr = inherit_event(sub, parent, parent_ctx,
7093 child, leader, child_ctx);
7094 if (IS_ERR(child_ctr))
7095 return PTR_ERR(child_ctr);
7101 inherit_task_group(
struct perf_event *event,
struct task_struct *parent,
7109 if (!event->
attr.inherit) {
7114 child_ctx = child->perf_event_ctxp[ctxn];
7123 child_ctx = alloc_perf_context(event->pmu, child);
7127 child->perf_event_ctxp[ctxn] = child_ctx;
7130 ret = inherit_group(event, parent, parent_ctx,
7146 struct perf_event *
event;
7148 int inherited_all = 1;
7149 unsigned long flags;
7152 if (
likely(!parent->perf_event_ctxp[ctxn]))
7159 parent_ctx = perf_pin_task_context(parent, ctxn);
7179 ret = inherit_task_group(event, parent, parent_ctx,
7180 child, ctxn, &inherited_all);
7195 ret = inherit_task_group(event, parent, parent_ctx,
7196 child, ctxn, &inherited_all);
7204 child_ctx = child->perf_event_ctxp[ctxn];
7206 if (child_ctx && inherited_all) {
7228 perf_unpin_context(parent_ctx);
7229 put_ctx(parent_ctx);
7241 memset(child->perf_event_ctxp, 0,
sizeof(child->perf_event_ctxp));
7243 INIT_LIST_HEAD(&child->perf_event_list);
7254 static void __init perf_event_init_all_cpus(
void)
7262 INIT_LIST_HEAD(&
per_cpu(rotation_list, cpu));
7266 static void __cpuinit perf_event_init_cpu(
int cpu)
7281 #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC
7282 static void perf_pmu_rotate_stop(
struct pmu *pmu)
7291 static void __perf_event_exit_context(
void *__info)
7296 perf_pmu_rotate_stop(ctx->
pmu);
7299 __perf_remove_from_context(event);
7301 __perf_remove_from_context(event);
7304 static
void perf_event_exit_cpu_context(
int cpu)
7310 idx = srcu_read_lock(&pmus_srcu);
7311 list_for_each_entry_rcu(pmu, &pmus, entry) {
7318 srcu_read_unlock(&pmus_srcu, idx);
7321 static void perf_event_exit_cpu(
int cpu)
7326 swevent_hlist_release(swhash);
7329 perf_event_exit_cpu_context(cpu);
7332 static inline void perf_event_exit_cpu(
int cpu) { }
7336 perf_reboot(
struct notifier_block *notifier,
unsigned long val,
void *
v)
7341 perf_event_exit_cpu(cpu);
7358 unsigned int cpu = (
long)hcpu;
7364 perf_event_init_cpu(cpu);
7369 perf_event_exit_cpu(cpu);
7385 perf_event_init_all_cpus();
7395 WARN(ret,
"hw_breakpoint initialization failed with: %d", ret);
7398 jump_label_rate_limit(&perf_sched_events,
HZ);
7408 static int __init perf_event_sysfs_init(
void)
7423 ret = pmu_dev_alloc(pmu);
7424 WARN(ret,
"Failed to register pmu: %s, reason %d\n", pmu->
name, ret);
7426 pmu_bus_running = 1;
7436 #ifdef CONFIG_CGROUP_PERF
7437 static struct cgroup_subsys_state *perf_cgroup_create(
struct cgroup *
cont)
7439 struct perf_cgroup *jc;
7454 static void perf_cgroup_destroy(
struct cgroup *cont)
7456 struct perf_cgroup *jc;
7457 jc =
container_of(cgroup_subsys_state(cont, perf_subsys_id),
7458 struct perf_cgroup, css);
7463 static int __perf_cgroup_move(
void *info)
7470 static void perf_cgroup_attach(
struct cgroup *cgrp,
struct cgroup_taskset *tset)
7474 cgroup_taskset_for_each(task, cgrp, tset)
7475 task_function_call(task, __perf_cgroup_move, task);
7478 static
void perf_cgroup_exit(
struct cgroup *cgrp,
struct cgroup *old_cgrp,
7489 task_function_call(task, __perf_cgroup_move, task);
7492 struct cgroup_subsys perf_subsys = {
7493 .name =
"perf_event",
7494 .subsys_id = perf_subsys_id,
7495 .create = perf_cgroup_create,
7496 .destroy = perf_cgroup_destroy,
7497 .exit = perf_cgroup_exit,
7498 .attach = perf_cgroup_attach,
7505 .broken_hierarchy =
true,