Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
trace_kprobe.c
Go to the documentation of this file.
1 /*
2  * Kprobes-based tracing events
3  *
4  * Created by Masami Hiramatsu <[email protected]>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18  */
19 
20 #include <linux/module.h>
21 #include <linux/uaccess.h>
22 
23 #include "trace_probe.h"
24 
25 #define KPROBE_EVENT_SYSTEM "kprobes"
26 
31 struct trace_probe {
32  struct list_head list;
33  struct kretprobe rp; /* Use rp.kp for kprobe use */
34  unsigned long nhit;
35  unsigned int flags; /* For TP_FLAG_* */
36  const char *symbol; /* symbol name */
39  ssize_t size; /* trace entry size */
40  unsigned int nr_args;
41  struct probe_arg args[];
42 };
43 
44 #define SIZEOF_TRACE_PROBE(n) \
45  (offsetof(struct trace_probe, args) + \
46  (sizeof(struct probe_arg) * (n)))
47 
48 
49 static __kprobes int trace_probe_is_return(struct trace_probe *tp)
50 {
51  return tp->rp.handler != NULL;
52 }
53 
54 static __kprobes const char *trace_probe_symbol(struct trace_probe *tp)
55 {
56  return tp->symbol ? tp->symbol : "unknown";
57 }
58 
59 static __kprobes unsigned long trace_probe_offset(struct trace_probe *tp)
60 {
61  return tp->rp.kp.offset;
62 }
63 
64 static __kprobes bool trace_probe_is_enabled(struct trace_probe *tp)
65 {
66  return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
67 }
68 
69 static __kprobes bool trace_probe_is_registered(struct trace_probe *tp)
70 {
71  return !!(tp->flags & TP_FLAG_REGISTERED);
72 }
73 
74 static __kprobes bool trace_probe_has_gone(struct trace_probe *tp)
75 {
76  return !!(kprobe_gone(&tp->rp.kp));
77 }
78 
79 static __kprobes bool trace_probe_within_module(struct trace_probe *tp,
80  struct module *mod)
81 {
82  int len = strlen(mod->name);
83  const char *name = trace_probe_symbol(tp);
84  return strncmp(mod->name, name, len) == 0 && name[len] == ':';
85 }
86 
87 static __kprobes bool trace_probe_is_on_module(struct trace_probe *tp)
88 {
89  return !!strchr(trace_probe_symbol(tp), ':');
90 }
91 
92 static int register_probe_event(struct trace_probe *tp);
93 static void unregister_probe_event(struct trace_probe *tp);
94 
95 static DEFINE_MUTEX(probe_lock);
96 static LIST_HEAD(probe_list);
97 
98 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
99 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
100  struct pt_regs *regs);
101 
102 /*
103  * Allocate new trace_probe and initialize it (including kprobes).
104  */
105 static struct trace_probe *alloc_trace_probe(const char *group,
106  const char *event,
107  void *addr,
108  const char *symbol,
109  unsigned long offs,
110  int nargs, bool is_return)
111 {
112  struct trace_probe *tp;
113  int ret = -ENOMEM;
114 
115  tp = kzalloc(SIZEOF_TRACE_PROBE(nargs), GFP_KERNEL);
116  if (!tp)
117  return ERR_PTR(ret);
118 
119  if (symbol) {
120  tp->symbol = kstrdup(symbol, GFP_KERNEL);
121  if (!tp->symbol)
122  goto error;
123  tp->rp.kp.symbol_name = tp->symbol;
124  tp->rp.kp.offset = offs;
125  } else
126  tp->rp.kp.addr = addr;
127 
128  if (is_return)
129  tp->rp.handler = kretprobe_dispatcher;
130  else
131  tp->rp.kp.pre_handler = kprobe_dispatcher;
132 
133  if (!event || !is_good_name(event)) {
134  ret = -EINVAL;
135  goto error;
136  }
137 
138  tp->call.class = &tp->class;
139  tp->call.name = kstrdup(event, GFP_KERNEL);
140  if (!tp->call.name)
141  goto error;
142 
143  if (!group || !is_good_name(group)) {
144  ret = -EINVAL;
145  goto error;
146  }
147 
148  tp->class.system = kstrdup(group, GFP_KERNEL);
149  if (!tp->class.system)
150  goto error;
151 
152  INIT_LIST_HEAD(&tp->list);
153  return tp;
154 error:
155  kfree(tp->call.name);
156  kfree(tp->symbol);
157  kfree(tp);
158  return ERR_PTR(ret);
159 }
160 
161 static void free_trace_probe(struct trace_probe *tp)
162 {
163  int i;
164 
165  for (i = 0; i < tp->nr_args; i++)
167 
168  kfree(tp->call.class->system);
169  kfree(tp->call.name);
170  kfree(tp->symbol);
171  kfree(tp);
172 }
173 
174 static struct trace_probe *find_trace_probe(const char *event,
175  const char *group)
176 {
177  struct trace_probe *tp;
178 
179  list_for_each_entry(tp, &probe_list, list)
180  if (strcmp(tp->call.name, event) == 0 &&
181  strcmp(tp->call.class->system, group) == 0)
182  return tp;
183  return NULL;
184 }
185 
186 /* Enable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
187 static int enable_trace_probe(struct trace_probe *tp, int flag)
188 {
189  int ret = 0;
190 
191  tp->flags |= flag;
192  if (trace_probe_is_enabled(tp) && trace_probe_is_registered(tp) &&
193  !trace_probe_has_gone(tp)) {
194  if (trace_probe_is_return(tp))
195  ret = enable_kretprobe(&tp->rp);
196  else
197  ret = enable_kprobe(&tp->rp.kp);
198  }
199 
200  return ret;
201 }
202 
203 /* Disable trace_probe - @flag must be TP_FLAG_TRACE or TP_FLAG_PROFILE */
204 static void disable_trace_probe(struct trace_probe *tp, int flag)
205 {
206  tp->flags &= ~flag;
207  if (!trace_probe_is_enabled(tp) && trace_probe_is_registered(tp)) {
208  if (trace_probe_is_return(tp))
209  disable_kretprobe(&tp->rp);
210  else
211  disable_kprobe(&tp->rp.kp);
212  }
213 }
214 
215 /* Internal register function - just handle k*probes and flags */
216 static int __register_trace_probe(struct trace_probe *tp)
217 {
218  int i, ret;
219 
220  if (trace_probe_is_registered(tp))
221  return -EINVAL;
222 
223  for (i = 0; i < tp->nr_args; i++)
224  traceprobe_update_arg(&tp->args[i]);
225 
226  /* Set/clear disabled flag according to tp->flag */
227  if (trace_probe_is_enabled(tp))
228  tp->rp.kp.flags &= ~KPROBE_FLAG_DISABLED;
229  else
230  tp->rp.kp.flags |= KPROBE_FLAG_DISABLED;
231 
232  if (trace_probe_is_return(tp))
233  ret = register_kretprobe(&tp->rp);
234  else
235  ret = register_kprobe(&tp->rp.kp);
236 
237  if (ret == 0)
238  tp->flags |= TP_FLAG_REGISTERED;
239  else {
240  pr_warning("Could not insert probe at %s+%lu: %d\n",
241  trace_probe_symbol(tp), trace_probe_offset(tp), ret);
242  if (ret == -ENOENT && trace_probe_is_on_module(tp)) {
243  pr_warning("This probe might be able to register after"
244  "target module is loaded. Continue.\n");
245  ret = 0;
246  } else if (ret == -EILSEQ) {
247  pr_warning("Probing address(0x%p) is not an "
248  "instruction boundary.\n",
249  tp->rp.kp.addr);
250  ret = -EINVAL;
251  }
252  }
253 
254  return ret;
255 }
256 
257 /* Internal unregister function - just handle k*probes and flags */
258 static void __unregister_trace_probe(struct trace_probe *tp)
259 {
260  if (trace_probe_is_registered(tp)) {
261  if (trace_probe_is_return(tp))
262  unregister_kretprobe(&tp->rp);
263  else
264  unregister_kprobe(&tp->rp.kp);
265  tp->flags &= ~TP_FLAG_REGISTERED;
266  /* Cleanup kprobe for reuse */
267  if (tp->rp.kp.symbol_name)
268  tp->rp.kp.addr = NULL;
269  }
270 }
271 
272 /* Unregister a trace_probe and probe_event: call with locking probe_lock */
273 static int unregister_trace_probe(struct trace_probe *tp)
274 {
275  /* Enabled event can not be unregistered */
276  if (trace_probe_is_enabled(tp))
277  return -EBUSY;
278 
279  __unregister_trace_probe(tp);
280  list_del(&tp->list);
281  unregister_probe_event(tp);
282 
283  return 0;
284 }
285 
286 /* Register a trace_probe and probe_event */
287 static int register_trace_probe(struct trace_probe *tp)
288 {
289  struct trace_probe *old_tp;
290  int ret;
291 
292  mutex_lock(&probe_lock);
293 
294  /* Delete old (same name) event if exist */
295  old_tp = find_trace_probe(tp->call.name, tp->call.class->system);
296  if (old_tp) {
297  ret = unregister_trace_probe(old_tp);
298  if (ret < 0)
299  goto end;
300  free_trace_probe(old_tp);
301  }
302 
303  /* Register new event */
304  ret = register_probe_event(tp);
305  if (ret) {
306  pr_warning("Failed to register probe event(%d)\n", ret);
307  goto end;
308  }
309 
310  /* Register k*probe */
311  ret = __register_trace_probe(tp);
312  if (ret < 0)
313  unregister_probe_event(tp);
314  else
315  list_add_tail(&tp->list, &probe_list);
316 
317 end:
318  mutex_unlock(&probe_lock);
319  return ret;
320 }
321 
322 /* Module notifier call back, checking event on the module */
323 static int trace_probe_module_callback(struct notifier_block *nb,
324  unsigned long val, void *data)
325 {
326  struct module *mod = data;
327  struct trace_probe *tp;
328  int ret;
329 
330  if (val != MODULE_STATE_COMING)
331  return NOTIFY_DONE;
332 
333  /* Update probes on coming module */
334  mutex_lock(&probe_lock);
335  list_for_each_entry(tp, &probe_list, list) {
336  if (trace_probe_within_module(tp, mod)) {
337  /* Don't need to check busy - this should have gone. */
338  __unregister_trace_probe(tp);
339  ret = __register_trace_probe(tp);
340  if (ret)
341  pr_warning("Failed to re-register probe %s on"
342  "%s: %d\n",
343  tp->call.name, mod->name, ret);
344  }
345  }
346  mutex_unlock(&probe_lock);
347 
348  return NOTIFY_DONE;
349 }
350 
351 static struct notifier_block trace_probe_module_nb = {
352  .notifier_call = trace_probe_module_callback,
353  .priority = 1 /* Invoked after kprobe module callback */
354 };
355 
356 static int create_trace_probe(int argc, char **argv)
357 {
358  /*
359  * Argument syntax:
360  * - Add kprobe: p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS]
361  * - Add kretprobe: r[:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS]
362  * Fetch args:
363  * $retval : fetch return value
364  * $stack : fetch stack address
365  * $stackN : fetch Nth of stack (N:0-)
366  * @ADDR : fetch memory at ADDR (ADDR should be in kernel)
367  * @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
368  * %REG : fetch register REG
369  * Dereferencing memory fetch:
370  * +|-offs(ARG) : fetch memory at ARG +|- offs address.
371  * Alias name of args:
372  * NAME=FETCHARG : set NAME as alias of FETCHARG.
373  * Type of args:
374  * FETCHARG:TYPE : use TYPE instead of unsigned long.
375  */
376  struct trace_probe *tp;
377  int i, ret = 0;
378  bool is_return = false, is_delete = false;
379  char *symbol = NULL, *event = NULL, *group = NULL;
380  char *arg;
381  unsigned long offset = 0;
382  void *addr = NULL;
383  char buf[MAX_EVENT_NAME_LEN];
384 
385  /* argc must be >= 1 */
386  if (argv[0][0] == 'p')
387  is_return = false;
388  else if (argv[0][0] == 'r')
389  is_return = true;
390  else if (argv[0][0] == '-')
391  is_delete = true;
392  else {
393  pr_info("Probe definition must be started with 'p', 'r' or"
394  " '-'.\n");
395  return -EINVAL;
396  }
397 
398  if (argv[0][1] == ':') {
399  event = &argv[0][2];
400  if (strchr(event, '/')) {
401  group = event;
402  event = strchr(group, '/') + 1;
403  event[-1] = '\0';
404  if (strlen(group) == 0) {
405  pr_info("Group name is not specified\n");
406  return -EINVAL;
407  }
408  }
409  if (strlen(event) == 0) {
410  pr_info("Event name is not specified\n");
411  return -EINVAL;
412  }
413  }
414  if (!group)
415  group = KPROBE_EVENT_SYSTEM;
416 
417  if (is_delete) {
418  if (!event) {
419  pr_info("Delete command needs an event name.\n");
420  return -EINVAL;
421  }
422  mutex_lock(&probe_lock);
423  tp = find_trace_probe(event, group);
424  if (!tp) {
425  mutex_unlock(&probe_lock);
426  pr_info("Event %s/%s doesn't exist.\n", group, event);
427  return -ENOENT;
428  }
429  /* delete an event */
430  ret = unregister_trace_probe(tp);
431  if (ret == 0)
432  free_trace_probe(tp);
433  mutex_unlock(&probe_lock);
434  return ret;
435  }
436 
437  if (argc < 2) {
438  pr_info("Probe point is not specified.\n");
439  return -EINVAL;
440  }
441  if (isdigit(argv[1][0])) {
442  if (is_return) {
443  pr_info("Return probe point must be a symbol.\n");
444  return -EINVAL;
445  }
446  /* an address specified */
447  ret = strict_strtoul(&argv[1][0], 0, (unsigned long *)&addr);
448  if (ret) {
449  pr_info("Failed to parse address.\n");
450  return ret;
451  }
452  } else {
453  /* a symbol specified */
454  symbol = argv[1];
455  /* TODO: support .init module functions */
456  ret = traceprobe_split_symbol_offset(symbol, &offset);
457  if (ret) {
458  pr_info("Failed to parse symbol.\n");
459  return ret;
460  }
461  if (offset && is_return) {
462  pr_info("Return probe must be used without offset.\n");
463  return -EINVAL;
464  }
465  }
466  argc -= 2; argv += 2;
467 
468  /* setup a probe */
469  if (!event) {
470  /* Make a new event name */
471  if (symbol)
472  snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld",
473  is_return ? 'r' : 'p', symbol, offset);
474  else
475  snprintf(buf, MAX_EVENT_NAME_LEN, "%c_0x%p",
476  is_return ? 'r' : 'p', addr);
477  event = buf;
478  }
479  tp = alloc_trace_probe(group, event, addr, symbol, offset, argc,
480  is_return);
481  if (IS_ERR(tp)) {
482  pr_info("Failed to allocate trace_probe.(%d)\n",
483  (int)PTR_ERR(tp));
484  return PTR_ERR(tp);
485  }
486 
487  /* parse arguments */
488  ret = 0;
489  for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) {
490  /* Increment count for freeing args in error case */
491  tp->nr_args++;
492 
493  /* Parse argument name */
494  arg = strchr(argv[i], '=');
495  if (arg) {
496  *arg++ = '\0';
497  tp->args[i].name = kstrdup(argv[i], GFP_KERNEL);
498  } else {
499  arg = argv[i];
500  /* If argument name is omitted, set "argN" */
501  snprintf(buf, MAX_EVENT_NAME_LEN, "arg%d", i + 1);
502  tp->args[i].name = kstrdup(buf, GFP_KERNEL);
503  }
504 
505  if (!tp->args[i].name) {
506  pr_info("Failed to allocate argument[%d] name.\n", i);
507  ret = -ENOMEM;
508  goto error;
509  }
510 
511  if (!is_good_name(tp->args[i].name)) {
512  pr_info("Invalid argument[%d] name: %s\n",
513  i, tp->args[i].name);
514  ret = -EINVAL;
515  goto error;
516  }
517 
518  if (traceprobe_conflict_field_name(tp->args[i].name,
519  tp->args, i)) {
520  pr_info("Argument[%d] name '%s' conflicts with "
521  "another field.\n", i, argv[i]);
522  ret = -EINVAL;
523  goto error;
524  }
525 
526  /* Parse fetch argument */
527  ret = traceprobe_parse_probe_arg(arg, &tp->size, &tp->args[i],
528  is_return, true);
529  if (ret) {
530  pr_info("Parse error at argument[%d]. (%d)\n", i, ret);
531  goto error;
532  }
533  }
534 
535  ret = register_trace_probe(tp);
536  if (ret)
537  goto error;
538  return 0;
539 
540 error:
541  free_trace_probe(tp);
542  return ret;
543 }
544 
545 static int release_all_trace_probes(void)
546 {
547  struct trace_probe *tp;
548  int ret = 0;
549 
550  mutex_lock(&probe_lock);
551  /* Ensure no probe is in use. */
552  list_for_each_entry(tp, &probe_list, list)
553  if (trace_probe_is_enabled(tp)) {
554  ret = -EBUSY;
555  goto end;
556  }
557  /* TODO: Use batch unregistration */
558  while (!list_empty(&probe_list)) {
559  tp = list_entry(probe_list.next, struct trace_probe, list);
560  unregister_trace_probe(tp);
561  free_trace_probe(tp);
562  }
563 
564 end:
565  mutex_unlock(&probe_lock);
566 
567  return ret;
568 }
569 
570 /* Probes listing interfaces */
571 static void *probes_seq_start(struct seq_file *m, loff_t *pos)
572 {
573  mutex_lock(&probe_lock);
574  return seq_list_start(&probe_list, *pos);
575 }
576 
577 static void *probes_seq_next(struct seq_file *m, void *v, loff_t *pos)
578 {
579  return seq_list_next(v, &probe_list, pos);
580 }
581 
582 static void probes_seq_stop(struct seq_file *m, void *v)
583 {
584  mutex_unlock(&probe_lock);
585 }
586 
587 static int probes_seq_show(struct seq_file *m, void *v)
588 {
589  struct trace_probe *tp = v;
590  int i;
591 
592  seq_printf(m, "%c", trace_probe_is_return(tp) ? 'r' : 'p');
593  seq_printf(m, ":%s/%s", tp->call.class->system, tp->call.name);
594 
595  if (!tp->symbol)
596  seq_printf(m, " 0x%p", tp->rp.kp.addr);
597  else if (tp->rp.kp.offset)
598  seq_printf(m, " %s+%u", trace_probe_symbol(tp),
599  tp->rp.kp.offset);
600  else
601  seq_printf(m, " %s", trace_probe_symbol(tp));
602 
603  for (i = 0; i < tp->nr_args; i++)
604  seq_printf(m, " %s=%s", tp->args[i].name, tp->args[i].comm);
605  seq_printf(m, "\n");
606 
607  return 0;
608 }
609 
610 static const struct seq_operations probes_seq_op = {
611  .start = probes_seq_start,
612  .next = probes_seq_next,
613  .stop = probes_seq_stop,
614  .show = probes_seq_show
615 };
616 
617 static int probes_open(struct inode *inode, struct file *file)
618 {
619  int ret;
620 
621  if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
622  ret = release_all_trace_probes();
623  if (ret < 0)
624  return ret;
625  }
626 
627  return seq_open(file, &probes_seq_op);
628 }
629 
630 static ssize_t probes_write(struct file *file, const char __user *buffer,
631  size_t count, loff_t *ppos)
632 {
633  return traceprobe_probes_write(file, buffer, count, ppos,
634  create_trace_probe);
635 }
636 
637 static const struct file_operations kprobe_events_ops = {
638  .owner = THIS_MODULE,
639  .open = probes_open,
640  .read = seq_read,
641  .llseek = seq_lseek,
642  .release = seq_release,
643  .write = probes_write,
644 };
645 
646 /* Probes profiling interfaces */
647 static int probes_profile_seq_show(struct seq_file *m, void *v)
648 {
649  struct trace_probe *tp = v;
650 
651  seq_printf(m, " %-44s %15lu %15lu\n", tp->call.name, tp->nhit,
652  tp->rp.kp.nmissed);
653 
654  return 0;
655 }
656 
657 static const struct seq_operations profile_seq_op = {
658  .start = probes_seq_start,
659  .next = probes_seq_next,
660  .stop = probes_seq_stop,
661  .show = probes_profile_seq_show
662 };
663 
664 static int profile_open(struct inode *inode, struct file *file)
665 {
666  return seq_open(file, &profile_seq_op);
667 }
668 
669 static const struct file_operations kprobe_profile_ops = {
670  .owner = THIS_MODULE,
671  .open = profile_open,
672  .read = seq_read,
673  .llseek = seq_lseek,
674  .release = seq_release,
675 };
676 
677 /* Sum up total data length for dynamic arraies (strings) */
678 static __kprobes int __get_data_size(struct trace_probe *tp,
679  struct pt_regs *regs)
680 {
681  int i, ret = 0;
682  u32 len;
683 
684  for (i = 0; i < tp->nr_args; i++)
685  if (unlikely(tp->args[i].fetch_size.fn)) {
686  call_fetch(&tp->args[i].fetch_size, regs, &len);
687  ret += len;
688  }
689 
690  return ret;
691 }
692 
693 /* Store the value of each argument */
694 static __kprobes void store_trace_args(int ent_size, struct trace_probe *tp,
695  struct pt_regs *regs,
696  u8 *data, int maxlen)
697 {
698  int i;
699  u32 end = tp->size;
700  u32 *dl; /* Data (relative) location */
701 
702  for (i = 0; i < tp->nr_args; i++) {
703  if (unlikely(tp->args[i].fetch_size.fn)) {
704  /*
705  * First, we set the relative location and
706  * maximum data length to *dl
707  */
708  dl = (u32 *)(data + tp->args[i].offset);
709  *dl = make_data_rloc(maxlen, end - tp->args[i].offset);
710  /* Then try to fetch string or dynamic array data */
711  call_fetch(&tp->args[i].fetch, regs, dl);
712  /* Reduce maximum length */
713  end += get_rloc_len(*dl);
714  maxlen -= get_rloc_len(*dl);
715  /* Trick here, convert data_rloc to data_loc */
716  *dl = convert_rloc_to_loc(*dl,
717  ent_size + tp->args[i].offset);
718  } else
719  /* Just fetching data normally */
720  call_fetch(&tp->args[i].fetch, regs,
721  data + tp->args[i].offset);
722  }
723 }
724 
725 /* Kprobe handler */
726 static __kprobes void kprobe_trace_func(struct kprobe *kp, struct pt_regs *regs)
727 {
728  struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
730  struct ring_buffer_event *event;
731  struct ring_buffer *buffer;
732  int size, dsize, pc;
733  unsigned long irq_flags;
734  struct ftrace_event_call *call = &tp->call;
735 
736  tp->nhit++;
737 
738  local_save_flags(irq_flags);
739  pc = preempt_count();
740 
741  dsize = __get_data_size(tp, regs);
742  size = sizeof(*entry) + tp->size + dsize;
743 
744  event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
745  size, irq_flags, pc);
746  if (!event)
747  return;
748 
749  entry = ring_buffer_event_data(event);
750  entry->ip = (unsigned long)kp->addr;
751  store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
752 
753  if (!filter_current_check_discard(buffer, call, entry, event))
755  irq_flags, pc, regs);
756 }
757 
758 /* Kretprobe handler */
759 static __kprobes void kretprobe_trace_func(struct kretprobe_instance *ri,
760  struct pt_regs *regs)
761 {
762  struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
764  struct ring_buffer_event *event;
765  struct ring_buffer *buffer;
766  int size, pc, dsize;
767  unsigned long irq_flags;
768  struct ftrace_event_call *call = &tp->call;
769 
770  local_save_flags(irq_flags);
771  pc = preempt_count();
772 
773  dsize = __get_data_size(tp, regs);
774  size = sizeof(*entry) + tp->size + dsize;
775 
776  event = trace_current_buffer_lock_reserve(&buffer, call->event.type,
777  size, irq_flags, pc);
778  if (!event)
779  return;
780 
781  entry = ring_buffer_event_data(event);
782  entry->func = (unsigned long)tp->rp.kp.addr;
783  entry->ret_ip = (unsigned long)ri->ret_addr;
784  store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
785 
786  if (!filter_current_check_discard(buffer, call, entry, event))
788  irq_flags, pc, regs);
789 }
790 
791 /* Event entry printers */
792 enum print_line_t
794  struct trace_event *event)
795 {
797  struct trace_seq *s = &iter->seq;
798  struct trace_probe *tp;
799  u8 *data;
800  int i;
801 
802  field = (struct kprobe_trace_entry_head *)iter->ent;
803  tp = container_of(event, struct trace_probe, call.event);
804 
805  if (!trace_seq_printf(s, "%s: (", tp->call.name))
806  goto partial;
807 
808  if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
809  goto partial;
810 
811  if (!trace_seq_puts(s, ")"))
812  goto partial;
813 
814  data = (u8 *)&field[1];
815  for (i = 0; i < tp->nr_args; i++)
816  if (!tp->args[i].type->print(s, tp->args[i].name,
817  data + tp->args[i].offset, field))
818  goto partial;
819 
820  if (!trace_seq_puts(s, "\n"))
821  goto partial;
822 
823  return TRACE_TYPE_HANDLED;
824 partial:
826 }
827 
828 enum print_line_t
830  struct trace_event *event)
831 {
833  struct trace_seq *s = &iter->seq;
834  struct trace_probe *tp;
835  u8 *data;
836  int i;
837 
838  field = (struct kretprobe_trace_entry_head *)iter->ent;
839  tp = container_of(event, struct trace_probe, call.event);
840 
841  if (!trace_seq_printf(s, "%s: (", tp->call.name))
842  goto partial;
843 
844  if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
845  goto partial;
846 
847  if (!trace_seq_puts(s, " <- "))
848  goto partial;
849 
850  if (!seq_print_ip_sym(s, field->func, flags & ~TRACE_ITER_SYM_OFFSET))
851  goto partial;
852 
853  if (!trace_seq_puts(s, ")"))
854  goto partial;
855 
856  data = (u8 *)&field[1];
857  for (i = 0; i < tp->nr_args; i++)
858  if (!tp->args[i].type->print(s, tp->args[i].name,
859  data + tp->args[i].offset, field))
860  goto partial;
861 
862  if (!trace_seq_puts(s, "\n"))
863  goto partial;
864 
865  return TRACE_TYPE_HANDLED;
866 partial:
868 }
869 
870 
871 static int kprobe_event_define_fields(struct ftrace_event_call *event_call)
872 {
873  int ret, i;
875  struct trace_probe *tp = (struct trace_probe *)event_call->data;
876 
877  DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0);
878  /* Set argument names as fields */
879  for (i = 0; i < tp->nr_args; i++) {
880  ret = trace_define_field(event_call, tp->args[i].type->fmttype,
881  tp->args[i].name,
882  sizeof(field) + tp->args[i].offset,
883  tp->args[i].type->size,
884  tp->args[i].type->is_signed,
885  FILTER_OTHER);
886  if (ret)
887  return ret;
888  }
889  return 0;
890 }
891 
892 static int kretprobe_event_define_fields(struct ftrace_event_call *event_call)
893 {
894  int ret, i;
896  struct trace_probe *tp = (struct trace_probe *)event_call->data;
897 
898  DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0);
899  DEFINE_FIELD(unsigned long, ret_ip, FIELD_STRING_RETIP, 0);
900  /* Set argument names as fields */
901  for (i = 0; i < tp->nr_args; i++) {
902  ret = trace_define_field(event_call, tp->args[i].type->fmttype,
903  tp->args[i].name,
904  sizeof(field) + tp->args[i].offset,
905  tp->args[i].type->size,
906  tp->args[i].type->is_signed,
907  FILTER_OTHER);
908  if (ret)
909  return ret;
910  }
911  return 0;
912 }
913 
914 static int __set_print_fmt(struct trace_probe *tp, char *buf, int len)
915 {
916  int i;
917  int pos = 0;
918 
919  const char *fmt, *arg;
920 
921  if (!trace_probe_is_return(tp)) {
922  fmt = "(%lx)";
923  arg = "REC->" FIELD_STRING_IP;
924  } else {
925  fmt = "(%lx <- %lx)";
926  arg = "REC->" FIELD_STRING_FUNC ", REC->" FIELD_STRING_RETIP;
927  }
928 
929  /* When len=0, we just calculate the needed length */
930 #define LEN_OR_ZERO (len ? len - pos : 0)
931 
932  pos += snprintf(buf + pos, LEN_OR_ZERO, "\"%s", fmt);
933 
934  for (i = 0; i < tp->nr_args; i++) {
935  pos += snprintf(buf + pos, LEN_OR_ZERO, " %s=%s",
936  tp->args[i].name, tp->args[i].type->fmt);
937  }
938 
939  pos += snprintf(buf + pos, LEN_OR_ZERO, "\", %s", arg);
940 
941  for (i = 0; i < tp->nr_args; i++) {
942  if (strcmp(tp->args[i].type->name, "string") == 0)
943  pos += snprintf(buf + pos, LEN_OR_ZERO,
944  ", __get_str(%s)",
945  tp->args[i].name);
946  else
947  pos += snprintf(buf + pos, LEN_OR_ZERO, ", REC->%s",
948  tp->args[i].name);
949  }
950 
951 #undef LEN_OR_ZERO
952 
953  /* return the length of print_fmt */
954  return pos;
955 }
956 
957 static int set_print_fmt(struct trace_probe *tp)
958 {
959  int len;
960  char *print_fmt;
961 
962  /* First: called with 0 length to calculate the needed length */
963  len = __set_print_fmt(tp, NULL, 0);
964  print_fmt = kmalloc(len + 1, GFP_KERNEL);
965  if (!print_fmt)
966  return -ENOMEM;
967 
968  /* Second: actually write the @print_fmt */
969  __set_print_fmt(tp, print_fmt, len + 1);
970  tp->call.print_fmt = print_fmt;
971 
972  return 0;
973 }
974 
975 #ifdef CONFIG_PERF_EVENTS
976 
977 /* Kprobe profile handler */
978 static __kprobes void kprobe_perf_func(struct kprobe *kp,
979  struct pt_regs *regs)
980 {
981  struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
982  struct ftrace_event_call *call = &tp->call;
984  struct hlist_head *head;
985  int size, __size, dsize;
986  int rctx;
987 
988  dsize = __get_data_size(tp, regs);
989  __size = sizeof(*entry) + tp->size + dsize;
990  size = ALIGN(__size + sizeof(u32), sizeof(u64));
991  size -= sizeof(u32);
992  if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
993  "profile buffer not large enough"))
994  return;
995 
996  entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
997  if (!entry)
998  return;
999 
1000  entry->ip = (unsigned long)kp->addr;
1001  memset(&entry[1], 0, dsize);
1002  store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1003 
1004  head = this_cpu_ptr(call->perf_events);
1005  perf_trace_buf_submit(entry, size, rctx,
1006  entry->ip, 1, regs, head, NULL);
1007 }
1008 
1009 /* Kretprobe profile handler */
1010 static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
1011  struct pt_regs *regs)
1012 {
1013  struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1014  struct ftrace_event_call *call = &tp->call;
1016  struct hlist_head *head;
1017  int size, __size, dsize;
1018  int rctx;
1019 
1020  dsize = __get_data_size(tp, regs);
1021  __size = sizeof(*entry) + tp->size + dsize;
1022  size = ALIGN(__size + sizeof(u32), sizeof(u64));
1023  size -= sizeof(u32);
1024  if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
1025  "profile buffer not large enough"))
1026  return;
1027 
1028  entry = perf_trace_buf_prepare(size, call->event.type, regs, &rctx);
1029  if (!entry)
1030  return;
1031 
1032  entry->func = (unsigned long)tp->rp.kp.addr;
1033  entry->ret_ip = (unsigned long)ri->ret_addr;
1034  store_trace_args(sizeof(*entry), tp, regs, (u8 *)&entry[1], dsize);
1035 
1036  head = this_cpu_ptr(call->perf_events);
1037  perf_trace_buf_submit(entry, size, rctx,
1038  entry->ret_ip, 1, regs, head, NULL);
1039 }
1040 #endif /* CONFIG_PERF_EVENTS */
1041 
1042 static __kprobes
1043 int kprobe_register(struct ftrace_event_call *event,
1044  enum trace_reg type, void *data)
1045 {
1046  struct trace_probe *tp = (struct trace_probe *)event->data;
1047 
1048  switch (type) {
1049  case TRACE_REG_REGISTER:
1050  return enable_trace_probe(tp, TP_FLAG_TRACE);
1051  case TRACE_REG_UNREGISTER:
1052  disable_trace_probe(tp, TP_FLAG_TRACE);
1053  return 0;
1054 
1055 #ifdef CONFIG_PERF_EVENTS
1056  case TRACE_REG_PERF_REGISTER:
1057  return enable_trace_probe(tp, TP_FLAG_PROFILE);
1058  case TRACE_REG_PERF_UNREGISTER:
1059  disable_trace_probe(tp, TP_FLAG_PROFILE);
1060  return 0;
1061  case TRACE_REG_PERF_OPEN:
1062  case TRACE_REG_PERF_CLOSE:
1063  case TRACE_REG_PERF_ADD:
1064  case TRACE_REG_PERF_DEL:
1065  return 0;
1066 #endif
1067  }
1068  return 0;
1069 }
1070 
1071 static __kprobes
1072 int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
1073 {
1074  struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp);
1075 
1076  if (tp->flags & TP_FLAG_TRACE)
1077  kprobe_trace_func(kp, regs);
1078 #ifdef CONFIG_PERF_EVENTS
1079  if (tp->flags & TP_FLAG_PROFILE)
1080  kprobe_perf_func(kp, regs);
1081 #endif
1082  return 0; /* We don't tweek kernel, so just return 0 */
1083 }
1084 
1085 static __kprobes
1086 int kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
1087 {
1088  struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp);
1089 
1090  if (tp->flags & TP_FLAG_TRACE)
1091  kretprobe_trace_func(ri, regs);
1092 #ifdef CONFIG_PERF_EVENTS
1093  if (tp->flags & TP_FLAG_PROFILE)
1094  kretprobe_perf_func(ri, regs);
1095 #endif
1096  return 0; /* We don't tweek kernel, so just return 0 */
1097 }
1098 
1099 static struct trace_event_functions kretprobe_funcs = {
1100  .trace = print_kretprobe_event
1101 };
1102 
1103 static struct trace_event_functions kprobe_funcs = {
1104  .trace = print_kprobe_event
1105 };
1106 
1107 static int register_probe_event(struct trace_probe *tp)
1108 {
1109  struct ftrace_event_call *call = &tp->call;
1110  int ret;
1111 
1112  /* Initialize ftrace_event_call */
1113  INIT_LIST_HEAD(&call->class->fields);
1114  if (trace_probe_is_return(tp)) {
1115  call->event.funcs = &kretprobe_funcs;
1116  call->class->define_fields = kretprobe_event_define_fields;
1117  } else {
1118  call->event.funcs = &kprobe_funcs;
1119  call->class->define_fields = kprobe_event_define_fields;
1120  }
1121  if (set_print_fmt(tp) < 0)
1122  return -ENOMEM;
1123  ret = register_ftrace_event(&call->event);
1124  if (!ret) {
1125  kfree(call->print_fmt);
1126  return -ENODEV;
1127  }
1128  call->flags = 0;
1129  call->class->reg = kprobe_register;
1130  call->data = tp;
1131  ret = trace_add_event_call(call);
1132  if (ret) {
1133  pr_info("Failed to register kprobe event: %s\n", call->name);
1134  kfree(call->print_fmt);
1136  }
1137  return ret;
1138 }
1139 
1140 static void unregister_probe_event(struct trace_probe *tp)
1141 {
1142  /* tp->event is unregistered in trace_remove_event_call() */
1144  kfree(tp->call.print_fmt);
1145 }
1146 
1147 /* Make a debugfs interface for controlling probe points */
1148 static __init int init_kprobe_trace(void)
1149 {
1150  struct dentry *d_tracer;
1151  struct dentry *entry;
1152 
1153  if (register_module_notifier(&trace_probe_module_nb))
1154  return -EINVAL;
1155 
1156  d_tracer = tracing_init_dentry();
1157  if (!d_tracer)
1158  return 0;
1159 
1160  entry = debugfs_create_file("kprobe_events", 0644, d_tracer,
1161  NULL, &kprobe_events_ops);
1162 
1163  /* Event list interface */
1164  if (!entry)
1165  pr_warning("Could not create debugfs "
1166  "'kprobe_events' entry\n");
1167 
1168  /* Profile interface */
1169  entry = debugfs_create_file("kprobe_profile", 0444, d_tracer,
1170  NULL, &kprobe_profile_ops);
1171 
1172  if (!entry)
1173  pr_warning("Could not create debugfs "
1174  "'kprobe_profile' entry\n");
1175  return 0;
1176 }
1177 fs_initcall(init_kprobe_trace);
1178 
1179 
1180 #ifdef CONFIG_FTRACE_STARTUP_TEST
1181 
1182 /*
1183  * The "__used" keeps gcc from removing the function symbol
1184  * from the kallsyms table.
1185  */
1186 static __used int kprobe_trace_selftest_target(int a1, int a2, int a3,
1187  int a4, int a5, int a6)
1188 {
1189  return a1 + a2 + a3 + a4 + a5 + a6;
1190 }
1191 
1192 static __init int kprobe_trace_self_tests_init(void)
1193 {
1194  int ret, warn = 0;
1195  int (*target)(int, int, int, int, int, int);
1196  struct trace_probe *tp;
1197 
1198  target = kprobe_trace_selftest_target;
1199 
1200  pr_info("Testing kprobe tracing: ");
1201 
1202  ret = traceprobe_command("p:testprobe kprobe_trace_selftest_target "
1203  "$stack $stack0 +0($stack)",
1204  create_trace_probe);
1205  if (WARN_ON_ONCE(ret)) {
1206  pr_warning("error on probing function entry.\n");
1207  warn++;
1208  } else {
1209  /* Enable trace point */
1210  tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1211  if (WARN_ON_ONCE(tp == NULL)) {
1212  pr_warning("error on getting new probe.\n");
1213  warn++;
1214  } else
1215  enable_trace_probe(tp, TP_FLAG_TRACE);
1216  }
1217 
1218  ret = traceprobe_command("r:testprobe2 kprobe_trace_selftest_target "
1219  "$retval", create_trace_probe);
1220  if (WARN_ON_ONCE(ret)) {
1221  pr_warning("error on probing function return.\n");
1222  warn++;
1223  } else {
1224  /* Enable trace point */
1225  tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1226  if (WARN_ON_ONCE(tp == NULL)) {
1227  pr_warning("error on getting new probe.\n");
1228  warn++;
1229  } else
1230  enable_trace_probe(tp, TP_FLAG_TRACE);
1231  }
1232 
1233  if (warn)
1234  goto end;
1235 
1236  ret = target(1, 2, 3, 4, 5, 6);
1237 
1238  /* Disable trace points before removing it */
1239  tp = find_trace_probe("testprobe", KPROBE_EVENT_SYSTEM);
1240  if (WARN_ON_ONCE(tp == NULL)) {
1241  pr_warning("error on getting test probe.\n");
1242  warn++;
1243  } else
1244  disable_trace_probe(tp, TP_FLAG_TRACE);
1245 
1246  tp = find_trace_probe("testprobe2", KPROBE_EVENT_SYSTEM);
1247  if (WARN_ON_ONCE(tp == NULL)) {
1248  pr_warning("error on getting 2nd test probe.\n");
1249  warn++;
1250  } else
1251  disable_trace_probe(tp, TP_FLAG_TRACE);
1252 
1253  ret = traceprobe_command("-:testprobe", create_trace_probe);
1254  if (WARN_ON_ONCE(ret)) {
1255  pr_warning("error on deleting a probe.\n");
1256  warn++;
1257  }
1258 
1259  ret = traceprobe_command("-:testprobe2", create_trace_probe);
1260  if (WARN_ON_ONCE(ret)) {
1261  pr_warning("error on deleting a probe.\n");
1262  warn++;
1263  }
1264 
1265 end:
1266  release_all_trace_probes();
1267  if (warn)
1268  pr_cont("NG: Some tests are failed. Please check them.\n");
1269  else
1270  pr_cont("OK\n");
1271  return 0;
1272 }
1273 
1274 late_initcall(kprobe_trace_self_tests_init);
1275 
1276 #endif