Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
trace_event_perf.c
Go to the documentation of this file.
1 /*
2  * trace event based perf event profiling/tracing
3  *
4  * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <[email protected]>
5  * Copyright (C) 2009-2010 Frederic Weisbecker <[email protected]>
6  */
7 
8 #include <linux/module.h>
9 #include <linux/kprobes.h>
10 #include "trace.h"
11 
12 static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
13 
14 /*
15  * Force it to be aligned to unsigned long to avoid misaligned accesses
16  * suprises
17  */
18 typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
19  perf_trace_t;
20 
21 /* Count the events in use (per event id, not per instance) */
22 static int total_ref_count;
23 
24 static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25  struct perf_event *p_event)
26 {
27  /* The ftrace function trace is allowed only for root. */
28  if (ftrace_event_is_function(tp_event) &&
29  perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
30  return -EPERM;
31 
32  /* No tracing, just counting, so no obvious leak */
33  if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
34  return 0;
35 
36  /* Some events are ok to be traced by non-root users... */
37  if (p_event->attach_state == PERF_ATTACH_TASK) {
38  if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
39  return 0;
40  }
41 
42  /*
43  * ...otherwise raw tracepoint data can be a severe data leak,
44  * only allow root to have these.
45  */
46  if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
47  return -EPERM;
48 
49  return 0;
50 }
51 
52 static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
53  struct perf_event *p_event)
54 {
55  struct hlist_head __percpu *list;
56  int ret = -ENOMEM;
57  int cpu;
58 
59  p_event->tp_event = tp_event;
60  if (tp_event->perf_refcount++ > 0)
61  return 0;
62 
63  list = alloc_percpu(struct hlist_head);
64  if (!list)
65  goto fail;
66 
68  INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
69 
70  tp_event->perf_events = list;
71 
72  if (!total_ref_count) {
73  char __percpu *buf;
74  int i;
75 
76  for (i = 0; i < PERF_NR_CONTEXTS; i++) {
77  buf = (char __percpu *)alloc_percpu(perf_trace_t);
78  if (!buf)
79  goto fail;
80 
81  perf_trace_buf[i] = buf;
82  }
83  }
84 
85  ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
86  if (ret)
87  goto fail;
88 
89  total_ref_count++;
90  return 0;
91 
92 fail:
93  if (!total_ref_count) {
94  int i;
95 
96  for (i = 0; i < PERF_NR_CONTEXTS; i++) {
97  free_percpu(perf_trace_buf[i]);
98  perf_trace_buf[i] = NULL;
99  }
100  }
101 
102  if (!--tp_event->perf_refcount) {
103  free_percpu(tp_event->perf_events);
104  tp_event->perf_events = NULL;
105  }
106 
107  return ret;
108 }
109 
110 static void perf_trace_event_unreg(struct perf_event *p_event)
111 {
112  struct ftrace_event_call *tp_event = p_event->tp_event;
113  int i;
114 
115  if (--tp_event->perf_refcount > 0)
116  goto out;
117 
118  tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
119 
120  /*
121  * Ensure our callback won't be called anymore. The buffers
122  * will be freed after that.
123  */
124  tracepoint_synchronize_unregister();
125 
126  free_percpu(tp_event->perf_events);
127  tp_event->perf_events = NULL;
128 
129  if (!--total_ref_count) {
130  for (i = 0; i < PERF_NR_CONTEXTS; i++) {
131  free_percpu(perf_trace_buf[i]);
132  perf_trace_buf[i] = NULL;
133  }
134  }
135 out:
136  module_put(tp_event->mod);
137 }
138 
139 static int perf_trace_event_open(struct perf_event *p_event)
140 {
141  struct ftrace_event_call *tp_event = p_event->tp_event;
142  return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
143 }
144 
145 static void perf_trace_event_close(struct perf_event *p_event)
146 {
147  struct ftrace_event_call *tp_event = p_event->tp_event;
148  tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
149 }
150 
151 static int perf_trace_event_init(struct ftrace_event_call *tp_event,
152  struct perf_event *p_event)
153 {
154  int ret;
155 
156  ret = perf_trace_event_perm(tp_event, p_event);
157  if (ret)
158  return ret;
159 
160  ret = perf_trace_event_reg(tp_event, p_event);
161  if (ret)
162  return ret;
163 
164  ret = perf_trace_event_open(p_event);
165  if (ret) {
166  perf_trace_event_unreg(p_event);
167  return ret;
168  }
169 
170  return 0;
171 }
172 
173 int perf_trace_init(struct perf_event *p_event)
174 {
175  struct ftrace_event_call *tp_event;
176  int event_id = p_event->attr.config;
177  int ret = -EINVAL;
178 
180  list_for_each_entry(tp_event, &ftrace_events, list) {
181  if (tp_event->event.type == event_id &&
182  tp_event->class && tp_event->class->reg &&
183  try_module_get(tp_event->mod)) {
184  ret = perf_trace_event_init(tp_event, p_event);
185  if (ret)
186  module_put(tp_event->mod);
187  break;
188  }
189  }
191 
192  return ret;
193 }
194 
195 void perf_trace_destroy(struct perf_event *p_event)
196 {
198  perf_trace_event_close(p_event);
199  perf_trace_event_unreg(p_event);
201 }
202 
203 int perf_trace_add(struct perf_event *p_event, int flags)
204 {
205  struct ftrace_event_call *tp_event = p_event->tp_event;
206  struct hlist_head __percpu *pcpu_list;
207  struct hlist_head *list;
208 
209  pcpu_list = tp_event->perf_events;
210  if (WARN_ON_ONCE(!pcpu_list))
211  return -EINVAL;
212 
213  if (!(flags & PERF_EF_START))
214  p_event->hw.state = PERF_HES_STOPPED;
215 
216  list = this_cpu_ptr(pcpu_list);
217  hlist_add_head_rcu(&p_event->hlist_entry, list);
218 
219  return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
220 }
221 
222 void perf_trace_del(struct perf_event *p_event, int flags)
223 {
224  struct ftrace_event_call *tp_event = p_event->tp_event;
225  hlist_del_rcu(&p_event->hlist_entry);
226  tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
227 }
228 
229 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
230  struct pt_regs *regs, int *rctxp)
231 {
232  struct trace_entry *entry;
233  unsigned long flags;
234  char *raw_data;
235  int pc;
236 
237  BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
238 
239  pc = preempt_count();
240 
242  if (*rctxp < 0)
243  return NULL;
244 
245  raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
246 
247  /* zero the dead bytes from align to not leak stack to user */
248  memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
249 
250  entry = (struct trace_entry *)raw_data;
251  local_save_flags(flags);
252  tracing_generic_entry_update(entry, flags, pc);
253  entry->type = type;
254 
255  return raw_data;
256 }
258 
259 #ifdef CONFIG_FUNCTION_TRACER
260 static void
261 perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
262  struct ftrace_ops *ops, struct pt_regs *pt_regs)
263 {
264  struct ftrace_entry *entry;
265  struct hlist_head *head;
266  struct pt_regs regs;
267  int rctx;
268 
269 #define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
270  sizeof(u64)) - sizeof(u32))
271 
272  BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
273 
274  perf_fetch_caller_regs(&regs);
275 
276  entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
277  if (!entry)
278  return;
279 
280  entry->ip = ip;
281  entry->parent_ip = parent_ip;
282 
283  head = this_cpu_ptr(event_function.perf_events);
284  perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
285  1, &regs, head, NULL);
286 
287 #undef ENTRY_SIZE
288 }
289 
290 static int perf_ftrace_function_register(struct perf_event *event)
291 {
292  struct ftrace_ops *ops = &event->ftrace_ops;
293 
294  ops->flags |= FTRACE_OPS_FL_CONTROL;
295  ops->func = perf_ftrace_function_call;
296  return register_ftrace_function(ops);
297 }
298 
299 static int perf_ftrace_function_unregister(struct perf_event *event)
300 {
301  struct ftrace_ops *ops = &event->ftrace_ops;
302  int ret = unregister_ftrace_function(ops);
303  ftrace_free_filter(ops);
304  return ret;
305 }
306 
307 static void perf_ftrace_function_enable(struct perf_event *event)
308 {
309  ftrace_function_local_enable(&event->ftrace_ops);
310 }
311 
312 static void perf_ftrace_function_disable(struct perf_event *event)
313 {
314  ftrace_function_local_disable(&event->ftrace_ops);
315 }
316 
318  enum trace_reg type, void *data)
319 {
320  switch (type) {
321  case TRACE_REG_REGISTER:
323  break;
324  case TRACE_REG_PERF_REGISTER:
325  case TRACE_REG_PERF_UNREGISTER:
326  return 0;
327  case TRACE_REG_PERF_OPEN:
328  return perf_ftrace_function_register(data);
329  case TRACE_REG_PERF_CLOSE:
330  return perf_ftrace_function_unregister(data);
331  case TRACE_REG_PERF_ADD:
332  perf_ftrace_function_enable(data);
333  return 0;
334  case TRACE_REG_PERF_DEL:
335  perf_ftrace_function_disable(data);
336  return 0;
337  }
338 
339  return -EINVAL;
340 }
341 #endif /* CONFIG_FUNCTION_TRACER */