Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
cpu_buffer.c
Go to the documentation of this file.
1 
22 #include <linux/sched.h>
23 #include <linux/oprofile.h>
24 #include <linux/errno.h>
25 
26 #include "event_buffer.h"
27 #include "cpu_buffer.h"
28 #include "buffer_sync.h"
29 #include "oprof.h"
30 
31 #define OP_BUFFER_FLAGS 0
32 
33 static struct ring_buffer *op_ring_buffer;
34 DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
35 
36 static void wq_sync_buffer(struct work_struct *work);
37 
38 #define DEFAULT_TIMER_EXPIRE (HZ / 10)
39 static int work_enabled;
40 
41 unsigned long oprofile_get_cpu_buffer_size(void)
42 {
44 }
45 
47 {
48  struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
49 
50  cpu_buf->sample_lost_overflow++;
51 }
52 
53 void free_cpu_buffers(void)
54 {
55  if (op_ring_buffer)
56  ring_buffer_free(op_ring_buffer);
57  op_ring_buffer = NULL;
58 }
59 
60 #define RB_EVENT_HDR_SIZE 4
61 
63 {
64  int i;
65 
66  unsigned long buffer_size = oprofile_cpu_buffer_size;
67  unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
68  RB_EVENT_HDR_SIZE);
69 
70  op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
71  if (!op_ring_buffer)
72  goto fail;
73 
75  struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
76 
77  b->last_task = NULL;
78  b->last_is_kernel = -1;
79  b->tracing = 0;
81  b->sample_received = 0;
82  b->sample_lost_overflow = 0;
83  b->backtrace_aborted = 0;
84  b->sample_invalid_eip = 0;
85  b->cpu = i;
86  INIT_DELAYED_WORK(&b->work, wq_sync_buffer);
87  }
88  return 0;
89 
90 fail:
92  return -ENOMEM;
93 }
94 
95 void start_cpu_work(void)
96 {
97  int i;
98 
99  work_enabled = 1;
100 
102  struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
103 
104  /*
105  * Spread the work by 1 jiffy per cpu so they dont all
106  * fire at once.
107  */
109  }
110 }
111 
112 void end_cpu_work(void)
113 {
114  work_enabled = 0;
115 }
116 
117 void flush_cpu_work(void)
118 {
119  int i;
120 
122  struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
123 
124  /* these works are per-cpu, no need for flush_sync */
126  }
127 }
128 
129 /*
130  * This function prepares the cpu buffer to write a sample.
131  *
132  * Struct op_entry is used during operations on the ring buffer while
133  * struct op_sample contains the data that is stored in the ring
134  * buffer. Struct entry can be uninitialized. The function reserves a
135  * data array that is specified by size. Use
136  * op_cpu_buffer_write_commit() after preparing the sample. In case of
137  * errors a null pointer is returned, otherwise the pointer to the
138  * sample.
139  *
140  */
141 struct op_sample
143 {
145  (op_ring_buffer, sizeof(struct op_sample) +
146  size * sizeof(entry->sample->data[0]));
147  if (!entry->event)
148  return NULL;
149  entry->sample = ring_buffer_event_data(entry->event);
150  entry->size = size;
151  entry->data = entry->sample->data;
152 
153  return entry->sample;
154 }
155 
157 {
158  return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
159 }
160 
162 {
163  struct ring_buffer_event *e;
164  e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL);
165  if (!e)
166  return NULL;
167 
168  entry->event = e;
169  entry->sample = ring_buffer_event_data(e);
170  entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
171  / sizeof(entry->sample->data[0]);
172  entry->data = entry->sample->data;
173  return entry->sample;
174 }
175 
176 unsigned long op_cpu_buffer_entries(int cpu)
177 {
178  return ring_buffer_entries_cpu(op_ring_buffer, cpu);
179 }
180 
181 static int
182 op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace,
183  int is_kernel, struct task_struct *task)
184 {
185  struct op_entry entry;
186  struct op_sample *sample;
187  unsigned long flags;
188  int size;
189 
190  flags = 0;
191 
192  if (backtrace)
193  flags |= TRACE_BEGIN;
194 
195  /* notice a switch from user->kernel or vice versa */
196  is_kernel = !!is_kernel;
197  if (cpu_buf->last_is_kernel != is_kernel) {
198  cpu_buf->last_is_kernel = is_kernel;
199  flags |= KERNEL_CTX_SWITCH;
200  if (is_kernel)
201  flags |= IS_KERNEL;
202  }
203 
204  /* notice a task switch */
205  if (cpu_buf->last_task != task) {
206  cpu_buf->last_task = task;
207  flags |= USER_CTX_SWITCH;
208  }
209 
210  if (!flags)
211  /* nothing to do */
212  return 0;
213 
214  if (flags & USER_CTX_SWITCH)
215  size = 1;
216  else
217  size = 0;
218 
219  sample = op_cpu_buffer_write_reserve(&entry, size);
220  if (!sample)
221  return -ENOMEM;
222 
223  sample->eip = ESCAPE_CODE;
224  sample->event = flags;
225 
226  if (size)
227  op_cpu_buffer_add_data(&entry, (unsigned long)task);
228 
230 
231  return 0;
232 }
233 
234 static inline int
235 op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
236  unsigned long pc, unsigned long event)
237 {
238  struct op_entry entry;
239  struct op_sample *sample;
240 
241  sample = op_cpu_buffer_write_reserve(&entry, 0);
242  if (!sample)
243  return -ENOMEM;
244 
245  sample->eip = pc;
246  sample->event = event;
247 
249 }
250 
251 /*
252  * This must be safe from any context.
253  *
254  * is_kernel is needed because on some architectures you cannot
255  * tell if you are in kernel or user space simply by looking at
256  * pc. We tag this in the buffer by generating kernel enter/exit
257  * events whenever is_kernel changes
258  */
259 static int
260 log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
261  unsigned long backtrace, int is_kernel, unsigned long event,
262  struct task_struct *task)
263 {
264  struct task_struct *tsk = task ? task : current;
265  cpu_buf->sample_received++;
266 
267  if (pc == ESCAPE_CODE) {
268  cpu_buf->sample_invalid_eip++;
269  return 0;
270  }
271 
272  if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
273  goto fail;
274 
275  if (op_add_sample(cpu_buf, pc, event))
276  goto fail;
277 
278  return 1;
279 
280 fail:
281  cpu_buf->sample_lost_overflow++;
282  return 0;
283 }
284 
285 static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
286 {
287  cpu_buf->tracing = 1;
288 }
289 
290 static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
291 {
292  cpu_buf->tracing = 0;
293 }
294 
295 static inline void
296 __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
297  unsigned long event, int is_kernel,
298  struct task_struct *task)
299 {
300  struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
301  unsigned long backtrace = oprofile_backtrace_depth;
302 
303  /*
304  * if log_sample() fail we can't backtrace since we lost the
305  * source of this event
306  */
307  if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task))
308  /* failed */
309  return;
310 
311  if (!backtrace)
312  return;
313 
314  oprofile_begin_trace(cpu_buf);
315  oprofile_ops.backtrace(regs, backtrace);
316  oprofile_end_trace(cpu_buf);
317 }
318 
319 void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
320  unsigned long event, int is_kernel,
321  struct task_struct *task)
322 {
323  __oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
324 }
325 
326 void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
327  unsigned long event, int is_kernel)
328 {
329  __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
330 }
331 
332 void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
333 {
334  int is_kernel;
335  unsigned long pc;
336 
337  if (likely(regs)) {
338  is_kernel = !user_mode(regs);
339  pc = profile_pc(regs);
340  } else {
341  is_kernel = 0; /* This value will not be used */
342  pc = ESCAPE_CODE; /* as this causes an early return. */
343  }
344 
345  __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
346 }
347 
348 /*
349  * Add samples with data to the ring buffer.
350  *
351  * Use oprofile_add_data(&entry, val) to add data and
352  * oprofile_write_commit(&entry) to commit the sample.
353  */
354 void
355 oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs,
356  unsigned long pc, int code, int size)
357 {
358  struct op_sample *sample;
359  int is_kernel = !user_mode(regs);
360  struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
361 
362  cpu_buf->sample_received++;
363 
364  /* no backtraces for samples with data */
365  if (op_add_code(cpu_buf, 0, is_kernel, current))
366  goto fail;
367 
368  sample = op_cpu_buffer_write_reserve(entry, size + 2);
369  if (!sample)
370  goto fail;
371  sample->eip = ESCAPE_CODE;
372  sample->event = 0; /* no flags */
373 
374  op_cpu_buffer_add_data(entry, code);
375  op_cpu_buffer_add_data(entry, pc);
376 
377  return;
378 
379 fail:
380  entry->event = NULL;
381  cpu_buf->sample_lost_overflow++;
382 }
383 
384 int oprofile_add_data(struct op_entry *entry, unsigned long val)
385 {
386  if (!entry->event)
387  return 0;
388  return op_cpu_buffer_add_data(entry, val);
389 }
390 
392 {
393  if (!entry->event)
394  return 0;
395  if (op_cpu_buffer_get_size(entry) < 2)
396  /*
397  * the function returns 0 to indicate a too small
398  * buffer, even if there is some space left
399  */
400  return 0;
401  if (!op_cpu_buffer_add_data(entry, (u32)val))
402  return 0;
403  return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
404 }
405 
407 {
408  if (!entry->event)
409  return -EINVAL;
410  return op_cpu_buffer_write_commit(entry);
411 }
412 
413 void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
414 {
415  struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
416  log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
417 }
418 
419 void oprofile_add_trace(unsigned long pc)
420 {
421  struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
422 
423  if (!cpu_buf->tracing)
424  return;
425 
426  /*
427  * broken frame can give an eip with the same value as an
428  * escape code, abort the trace if we get it
429  */
430  if (pc == ESCAPE_CODE)
431  goto fail;
432 
433  if (op_add_sample(cpu_buf, pc, 0))
434  goto fail;
435 
436  return;
437 fail:
438  cpu_buf->tracing = 0;
439  cpu_buf->backtrace_aborted++;
440  return;
441 }
442 
443 /*
444  * This serves to avoid cpu buffer overflow, and makes sure
445  * the task mortuary progresses
446  *
447  * By using schedule_delayed_work_on and then schedule_delayed_work
448  * we guarantee this will stay on the correct cpu
449  */
450 static void wq_sync_buffer(struct work_struct *work)
451 {
452  struct oprofile_cpu_buffer *b =
453  container_of(work, struct oprofile_cpu_buffer, work.work);
454  if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) {
456  return;
457  }
458  sync_buffer(b->cpu);
459 
460  /* don't re-add the work if we're shutting down */
461  if (work_enabled)
463 }