Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
builtin-top.c
Go to the documentation of this file.
1 /*
2  * builtin-top.c
3  *
4  * Builtin top command: Display a continuously updated profile of
5  * any workload, CPU or specific PID.
6  *
7  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <[email protected]>
8  * 2011, Red Hat Inc, Arnaldo Carvalho de Melo <[email protected]>
9  *
10  * Improvements and fixes by:
11  *
12  * Arjan van de Ven <[email protected]>
13  * Yanmin Zhang <[email protected]>
14  * Wu Fengguang <[email protected]>
15  * Mike Galbraith <[email protected]>
16  * Paul Mackerras <[email protected]>
17  *
18  * Released under the GPL v2. (and only v2, not any later version)
19  */
20 #include "builtin.h"
21 
22 #include "perf.h"
23 
24 #include "util/annotate.h"
25 #include "util/cache.h"
26 #include "util/color.h"
27 #include "util/evlist.h"
28 #include "util/evsel.h"
29 #include "util/session.h"
30 #include "util/symbol.h"
31 #include "util/thread.h"
32 #include "util/thread_map.h"
33 #include "util/top.h"
34 #include "util/util.h"
35 #include <linux/rbtree.h>
36 #include "util/parse-options.h"
37 #include "util/parse-events.h"
38 #include "util/cpumap.h"
39 #include "util/xyarray.h"
40 #include "util/sort.h"
41 #include "util/intlist.h"
42 
43 #include "util/debug.h"
44 
45 #include <assert.h>
46 #include <elf.h>
47 #include <fcntl.h>
48 
49 #include <stdio.h>
50 #include <termios.h>
51 #include <unistd.h>
52 #include <inttypes.h>
53 
54 #include <errno.h>
55 #include <time.h>
56 #include <sched.h>
57 
58 #include <sys/syscall.h>
59 #include <sys/ioctl.h>
60 #include <sys/poll.h>
61 #include <sys/prctl.h>
62 #include <sys/wait.h>
63 #include <sys/uio.h>
64 #include <sys/utsname.h>
65 #include <sys/mman.h>
66 
67 #include <linux/unistd.h>
68 #include <linux/types.h>
69 
71 {
72  char *s = getenv("LINES");
73 
74  if (s != NULL) {
75  ws->ws_row = atoi(s);
76  s = getenv("COLUMNS");
77  if (s != NULL) {
78  ws->ws_col = atoi(s);
79  if (ws->ws_row && ws->ws_col)
80  return;
81  }
82  }
83 #ifdef TIOCGWINSZ
84  if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
85  ws->ws_row && ws->ws_col)
86  return;
87 #endif
88  ws->ws_row = 25;
89  ws->ws_col = 80;
90 }
91 
92 static void perf_top__update_print_entries(struct perf_top *top)
93 {
94  if (top->print_entries > 9)
95  top->print_entries -= 9;
96 }
97 
98 static void perf_top__sig_winch(int sig __maybe_unused,
99  siginfo_t *info __maybe_unused, void *arg)
100 {
101  struct perf_top *top = arg;
102 
104  if (!top->print_entries
105  || (top->print_entries+4) > top->winsize.ws_row) {
106  top->print_entries = top->winsize.ws_row;
107  } else {
108  top->print_entries += 4;
109  top->winsize.ws_row = top->print_entries;
110  }
111  perf_top__update_print_entries(top);
112 }
113 
114 static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
115 {
116  struct symbol *sym;
117  struct annotation *notes;
118  struct map *map;
119  int err = -1;
120 
121  if (!he || !he->ms.sym)
122  return -1;
123 
124  sym = he->ms.sym;
125  map = he->ms.map;
126 
127  /*
128  * We can't annotate with just /proc/kallsyms
129  */
130  if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
131  pr_err("Can't annotate %s: No vmlinux file was found in the "
132  "path\n", sym->name);
133  sleep(1);
134  return -1;
135  }
136 
137  notes = symbol__annotation(sym);
138  if (notes->src != NULL) {
139  pthread_mutex_lock(&notes->lock);
140  goto out_assign;
141  }
142 
143  pthread_mutex_lock(&notes->lock);
144 
145  if (symbol__alloc_hist(sym) < 0) {
146  pthread_mutex_unlock(&notes->lock);
147  pr_err("Not enough memory for annotating '%s' symbol!\n",
148  sym->name);
149  sleep(1);
150  return err;
151  }
152 
153  err = symbol__annotate(sym, map, 0);
154  if (err == 0) {
155 out_assign:
156  top->sym_filter_entry = he;
157  }
158 
159  pthread_mutex_unlock(&notes->lock);
160  return err;
161 }
162 
163 static void __zero_source_counters(struct hist_entry *he)
164 {
165  struct symbol *sym = he->ms.sym;
167 }
168 
169 static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
170 {
171  struct utsname uts;
172  int err = uname(&uts);
173 
174  ui__warning("Out of bounds address found:\n\n"
175  "Addr: %" PRIx64 "\n"
176  "DSO: %s %c\n"
177  "Map: %" PRIx64 "-%" PRIx64 "\n"
178  "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
179  "Arch: %s\n"
180  "Kernel: %s\n"
181  "Tools: %s\n\n"
182  "Not all samples will be on the annotation output.\n\n"
183  "Please report to [email protected]\n",
184  ip, map->dso->long_name, dso__symtab_origin(map->dso),
185  map->start, map->end, sym->start, sym->end,
186  sym->binding == STB_GLOBAL ? 'g' :
187  sym->binding == STB_LOCAL ? 'l' : 'w', sym->name,
188  err ? "[unknown]" : uts.machine,
189  err ? "[unknown]" : uts.release, perf_version_string);
190  if (use_browser <= 0)
191  sleep(5);
192 
193  map->erange_warned = true;
194 }
195 
196 static void perf_top__record_precise_ip(struct perf_top *top,
197  struct hist_entry *he,
198  int counter, u64 ip)
199 {
200  struct annotation *notes;
201  struct symbol *sym;
202  int err;
203 
204  if (he == NULL || he->ms.sym == NULL ||
205  ((top->sym_filter_entry == NULL ||
206  top->sym_filter_entry->ms.sym != he->ms.sym) && use_browser != 1))
207  return;
208 
209  sym = he->ms.sym;
210  notes = symbol__annotation(sym);
211 
212  if (pthread_mutex_trylock(&notes->lock))
213  return;
214 
215  if (notes->src == NULL && symbol__alloc_hist(sym) < 0) {
216  pthread_mutex_unlock(&notes->lock);
217  pr_err("Not enough memory for annotating '%s' symbol!\n",
218  sym->name);
219  sleep(1);
220  return;
221  }
222 
223  ip = he->ms.map->map_ip(he->ms.map, ip);
224  err = symbol__inc_addr_samples(sym, he->ms.map, counter, ip);
225 
226  pthread_mutex_unlock(&notes->lock);
227 
228  if (err == -ERANGE && !he->ms.map->erange_warned)
229  ui__warn_map_erange(he->ms.map, sym, ip);
230 }
231 
232 static void perf_top__show_details(struct perf_top *top)
233 {
234  struct hist_entry *he = top->sym_filter_entry;
235  struct annotation *notes;
236  struct symbol *symbol;
237  int more;
238 
239  if (!he)
240  return;
241 
242  symbol = he->ms.sym;
243  notes = symbol__annotation(symbol);
244 
245  pthread_mutex_lock(&notes->lock);
246 
247  if (notes->src == NULL)
248  goto out_unlock;
249 
250  printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
251  printf(" Events Pcnt (>=%d%%)\n", top->sym_pcnt_filter);
252 
253  more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel->idx,
254  0, top->sym_pcnt_filter, top->print_entries, 4);
255  if (top->zero)
256  symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
257  else
258  symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
259  if (more != 0)
260  printf("%d lines not displayed, maybe increase display entries [e]\n", more);
261 out_unlock:
262  pthread_mutex_unlock(&notes->lock);
263 }
264 
265 static const char CONSOLE_CLEAR[] = "";
266 
267 static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
268  struct addr_location *al,
269  struct perf_sample *sample)
270 {
271  struct hist_entry *he;
272 
273  he = __hists__add_entry(&evsel->hists, al, NULL, sample->period);
274  if (he == NULL)
275  return NULL;
276 
278  return he;
279 }
280 
281 static void perf_top__print_sym_table(struct perf_top *top)
282 {
283  char bf[160];
284  int printed = 0;
285  const int win_width = top->winsize.ws_col - 1;
286 
287  puts(CONSOLE_CLEAR);
288 
289  perf_top__header_snprintf(top, bf, sizeof(bf));
290  printf("%s\n", bf);
291 
293 
294  printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
295 
296  if (top->sym_evsel->hists.stats.nr_lost_warned !=
297  top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST]) {
298  top->sym_evsel->hists.stats.nr_lost_warned =
299  top->sym_evsel->hists.stats.nr_events[PERF_RECORD_LOST];
301  "WARNING: LOST %d chunks, Check IO/CPU overload",
302  top->sym_evsel->hists.stats.nr_lost_warned);
303  ++printed;
304  }
305 
306  if (top->sym_filter_entry) {
307  perf_top__show_details(top);
308  return;
309  }
310 
314  top->hide_user_symbols,
315  top->hide_kernel_symbols);
317  top->winsize.ws_row - 3);
318  putchar('\n');
319  hists__fprintf(&top->sym_evsel->hists, false,
320  top->winsize.ws_row - 4 - printed, win_width, stdout);
321 }
322 
323 static void prompt_integer(int *target, const char *msg)
324 {
325  char *buf = malloc(0), *p;
326  size_t dummy = 0;
327  int tmp;
328 
329  fprintf(stdout, "\n%s: ", msg);
330  if (getline(&buf, &dummy, stdin) < 0)
331  return;
332 
333  p = strchr(buf, '\n');
334  if (p)
335  *p = 0;
336 
337  p = buf;
338  while(*p) {
339  if (!isdigit(*p))
340  goto out_free;
341  p++;
342  }
343  tmp = strtoul(buf, NULL, 10);
344  *target = tmp;
345 out_free:
346  free(buf);
347 }
348 
349 static void prompt_percent(int *target, const char *msg)
350 {
351  int tmp = 0;
352 
353  prompt_integer(&tmp, msg);
354  if (tmp >= 0 && tmp <= 100)
355  *target = tmp;
356 }
357 
358 static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
359 {
360  char *buf = malloc(0), *p;
361  struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
362  struct rb_node *next;
363  size_t dummy = 0;
364 
365  /* zero counters of active symbol */
366  if (syme) {
367  __zero_source_counters(syme);
368  top->sym_filter_entry = NULL;
369  }
370 
371  fprintf(stdout, "\n%s: ", msg);
372  if (getline(&buf, &dummy, stdin) < 0)
373  goto out_free;
374 
375  p = strchr(buf, '\n');
376  if (p)
377  *p = 0;
378 
379  next = rb_first(&top->sym_evsel->hists.entries);
380  while (next) {
381  n = rb_entry(next, struct hist_entry, rb_node);
382  if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
383  found = n;
384  break;
385  }
386  next = rb_next(&n->rb_node);
387  }
388 
389  if (!found) {
390  fprintf(stderr, "Sorry, %s is not active.\n", buf);
391  sleep(1);
392  } else
393  perf_top__parse_source(top, found);
394 
395 out_free:
396  free(buf);
397 }
398 
399 static void perf_top__print_mapped_keys(struct perf_top *top)
400 {
401  char *name = NULL;
402 
403  if (top->sym_filter_entry) {
404  struct symbol *sym = top->sym_filter_entry->ms.sym;
405  name = sym->name;
406  }
407 
408  fprintf(stdout, "\nMapped keys:\n");
409  fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top->delay_secs);
410  fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries);
411 
412  if (top->evlist->nr_entries > 1)
413  fprintf(stdout, "\t[E] active event counter. \t(%s)\n", perf_evsel__name(top->sym_evsel));
414 
415  fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter);
416 
417  fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->sym_pcnt_filter);
418  fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
419  fprintf(stdout, "\t[S] stop annotation.\n");
420 
421  fprintf(stdout,
422  "\t[K] hide kernel_symbols symbols. \t(%s)\n",
423  top->hide_kernel_symbols ? "yes" : "no");
424  fprintf(stdout,
425  "\t[U] hide user symbols. \t(%s)\n",
426  top->hide_user_symbols ? "yes" : "no");
427  fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top->zero ? 1 : 0);
428  fprintf(stdout, "\t[qQ] quit.\n");
429 }
430 
431 static int perf_top__key_mapped(struct perf_top *top, int c)
432 {
433  switch (c) {
434  case 'd':
435  case 'e':
436  case 'f':
437  case 'z':
438  case 'q':
439  case 'Q':
440  case 'K':
441  case 'U':
442  case 'F':
443  case 's':
444  case 'S':
445  return 1;
446  case 'E':
447  return top->evlist->nr_entries > 1 ? 1 : 0;
448  default:
449  break;
450  }
451 
452  return 0;
453 }
454 
455 static void perf_top__handle_keypress(struct perf_top *top, int c)
456 {
457  if (!perf_top__key_mapped(top, c)) {
458  struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
459  struct termios tc, save;
460 
461  perf_top__print_mapped_keys(top);
462  fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
463  fflush(stdout);
464 
465  tcgetattr(0, &save);
466  tc = save;
467  tc.c_lflag &= ~(ICANON | ECHO);
468  tc.c_cc[VMIN] = 0;
469  tc.c_cc[VTIME] = 0;
470  tcsetattr(0, TCSANOW, &tc);
471 
472  poll(&stdin_poll, 1, -1);
473  c = getc(stdin);
474 
475  tcsetattr(0, TCSAFLUSH, &save);
476  if (!perf_top__key_mapped(top, c))
477  return;
478  }
479 
480  switch (c) {
481  case 'd':
482  prompt_integer(&top->delay_secs, "Enter display delay");
483  if (top->delay_secs < 1)
484  top->delay_secs = 1;
485  break;
486  case 'e':
487  prompt_integer(&top->print_entries, "Enter display entries (lines)");
488  if (top->print_entries == 0) {
489  struct sigaction act = {
490  .sa_sigaction = perf_top__sig_winch,
491  .sa_flags = SA_SIGINFO,
492  };
493  perf_top__sig_winch(SIGWINCH, NULL, top);
494  sigaction(SIGWINCH, &act, NULL);
495  } else {
496  perf_top__sig_winch(SIGWINCH, NULL, top);
497  signal(SIGWINCH, SIG_DFL);
498  }
499  break;
500  case 'E':
501  if (top->evlist->nr_entries > 1) {
502  /* Select 0 as the default event: */
503  int counter = 0;
504 
505  fprintf(stderr, "\nAvailable events:");
506 
507  list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
508  fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
509 
510  prompt_integer(&counter, "Enter details event counter");
511 
512  if (counter >= top->evlist->nr_entries) {
513  top->sym_evsel = perf_evlist__first(top->evlist);
514  fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
515  sleep(1);
516  break;
517  }
518  list_for_each_entry(top->sym_evsel, &top->evlist->entries, node)
519  if (top->sym_evsel->idx == counter)
520  break;
521  } else
522  top->sym_evsel = perf_evlist__first(top->evlist);
523  break;
524  case 'f':
525  prompt_integer(&top->count_filter, "Enter display event count filter");
526  break;
527  case 'F':
528  prompt_percent(&top->sym_pcnt_filter,
529  "Enter details display event filter (percent)");
530  break;
531  case 'K':
532  top->hide_kernel_symbols = !top->hide_kernel_symbols;
533  break;
534  case 'q':
535  case 'Q':
536  printf("exiting.\n");
537  if (top->dump_symtab)
538  perf_session__fprintf_dsos(top->session, stderr);
539  exit(0);
540  case 's':
541  perf_top__prompt_symbol(top, "Enter details symbol");
542  break;
543  case 'S':
544  if (!top->sym_filter_entry)
545  break;
546  else {
547  struct hist_entry *syme = top->sym_filter_entry;
548 
549  top->sym_filter_entry = NULL;
550  __zero_source_counters(syme);
551  }
552  break;
553  case 'U':
555  break;
556  case 'z':
557  top->zero = !top->zero;
558  break;
559  default:
560  break;
561  }
562 }
563 
564 static void perf_top__sort_new_samples(void *arg)
565 {
566  struct perf_top *t = arg;
568 
569  if (t->evlist->selected != NULL)
570  t->sym_evsel = t->evlist->selected;
571 
577 }
578 
579 static void *display_thread_tui(void *arg)
580 {
581  struct perf_evsel *pos;
582  struct perf_top *top = arg;
583  const char *help = "For a higher level overview, try: perf top --sort comm,dso";
584 
585  perf_top__sort_new_samples(top);
586 
587  /*
588  * Initialize the uid_filter_str, in the future the TUI will allow
589  * Zooming in/out UIDs. For now juse use whatever the user passed
590  * via --uid.
591  */
592  list_for_each_entry(pos, &top->evlist->entries, node)
593  pos->hists.uid_filter_str = top->target.uid_str;
594 
596  perf_top__sort_new_samples,
597  top, top->delay_secs);
598 
599  exit_browser(0);
600  exit(0);
601  return NULL;
602 }
603 
604 static void *display_thread(void *arg)
605 {
606  struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
607  struct termios tc, save;
608  struct perf_top *top = arg;
609  int delay_msecs, c;
610 
611  tcgetattr(0, &save);
612  tc = save;
613  tc.c_lflag &= ~(ICANON | ECHO);
614  tc.c_cc[VMIN] = 0;
615  tc.c_cc[VTIME] = 0;
616 
618 repeat:
619  delay_msecs = top->delay_secs * 1000;
620  tcsetattr(0, TCSANOW, &tc);
621  /* trash return*/
622  getc(stdin);
623 
624  while (1) {
625  perf_top__print_sym_table(top);
626  /*
627  * Either timeout expired or we got an EINTR due to SIGWINCH,
628  * refresh screen in both cases.
629  */
630  switch (poll(&stdin_poll, 1, delay_msecs)) {
631  case 0:
632  continue;
633  case -1:
634  if (errno == EINTR)
635  continue;
636  /* Fall trhu */
637  default:
638  goto process_hotkey;
639  }
640  }
641 process_hotkey:
642  c = getc(stdin);
643  tcsetattr(0, TCSAFLUSH, &save);
644 
645  perf_top__handle_keypress(top, c);
646  goto repeat;
647 
648  return NULL;
649 }
650 
651 /* Tag samples to be skipped. */
652 static const char *skip_symbols[] = {
653  "intel_idle",
654  "default_idle",
655  "native_safe_halt",
656  "cpu_idle",
657  "enter_idle",
658  "exit_idle",
659  "mwait_idle",
660  "mwait_idle_with_hints",
661  "poll_idle",
662  "ppc64_runlatch_off",
663  "pseries_dedicated_idle_sleep",
664  NULL
665 };
666 
667 static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
668 {
669  const char *name = sym->name;
670  int i;
671 
672  /*
673  * ppc64 uses function descriptors and appends a '.' to the
674  * start of every instruction address. Remove it.
675  */
676  if (name[0] == '.')
677  name++;
678 
679  if (!strcmp(name, "_text") ||
680  !strcmp(name, "_etext") ||
681  !strcmp(name, "_sinittext") ||
682  !strncmp("init_module", name, 11) ||
683  !strncmp("cleanup_module", name, 14) ||
684  strstr(name, "_text_start") ||
685  strstr(name, "_text_end"))
686  return 1;
687 
688  for (i = 0; skip_symbols[i]; i++) {
689  if (!strcmp(skip_symbols[i], name)) {
690  sym->ignore = true;
691  break;
692  }
693  }
694 
695  return 0;
696 }
697 
698 static void perf_event__process_sample(struct perf_tool *tool,
699  const union perf_event *event,
700  struct perf_evsel *evsel,
701  struct perf_sample *sample,
702  struct machine *machine)
703 {
704  struct perf_top *top = container_of(tool, struct perf_top, tool);
705  struct symbol *parent = NULL;
706  u64 ip = event->ip.ip;
707  struct addr_location al;
708  int err;
709 
710  if (!machine && perf_guest) {
711  static struct intlist *seen;
712 
713  if (!seen)
714  seen = intlist__new();
715 
716  if (!intlist__has_entry(seen, event->ip.pid)) {
717  pr_err("Can't find guest [%d]'s kernel information\n",
718  event->ip.pid);
719  intlist__add(seen, event->ip.pid);
720  }
721  return;
722  }
723 
724  if (!machine) {
725  pr_err("%u unprocessable samples recorded.",
726  top->session->hists.stats.nr_unprocessable_samples++);
727  return;
728  }
729 
730  if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
731  top->exact_samples++;
732 
733  if (perf_event__preprocess_sample(event, machine, &al, sample,
734  symbol_filter) < 0 ||
735  al.filtered)
736  return;
737 
738  if (!top->kptr_restrict_warned &&
741  ui__warning(
742 "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
743 "Check /proc/sys/kernel/kptr_restrict.\n\n"
744 "Kernel%s samples will not be resolved.\n",
745  !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ?
746  " modules" : "");
747  if (use_browser <= 0)
748  sleep(5);
749  top->kptr_restrict_warned = true;
750  }
751 
752  if (al.sym == NULL) {
753  const char *msg = "Kernel samples will not be resolved.\n";
754  /*
755  * As we do lazy loading of symtabs we only will know if the
756  * specified vmlinux file is invalid when we actually have a
757  * hit in kernel space and then try to load it. So if we get
758  * here and there are _no_ symbols in the DSO backing the
759  * kernel map, bail out.
760  *
761  * We may never get here, for instance, if we use -K/
762  * --hide-kernel-symbols, even if the user specifies an
763  * invalid --vmlinux ;-)
764  */
765  if (!top->kptr_restrict_warned && !top->vmlinux_warned &&
766  al.map == machine->vmlinux_maps[MAP__FUNCTION] &&
767  RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) {
769  ui__warning("The %s file can't be used.\n%s",
771  } else {
772  ui__warning("A vmlinux file was not found.\n%s",
773  msg);
774  }
775 
776  if (use_browser <= 0)
777  sleep(5);
778  top->vmlinux_warned = true;
779  }
780  }
781 
782  if (al.sym == NULL || !al.sym->ignore) {
783  struct hist_entry *he;
784 
786  sample->callchain) {
787  err = machine__resolve_callchain(machine, evsel,
788  al.thread, sample,
789  &parent);
790 
791  if (err)
792  return;
793  }
794 
795  he = perf_evsel__add_hist_entry(evsel, &al, sample);
796  if (he == NULL) {
797  pr_err("Problem incrementing symbol period, skipping event\n");
798  return;
799  }
800 
803  sample->period);
804  if (err)
805  return;
806  }
807 
808  if (top->sort_has_symbols)
809  perf_top__record_precise_ip(top, he, evsel->idx, ip);
810  }
811 
812  return;
813 }
814 
815 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
816 {
817  struct perf_sample sample;
818  struct perf_evsel *evsel;
819  struct perf_session *session = top->session;
820  union perf_event *event;
821  struct machine *machine;
822  u8 origin;
823  int ret;
824 
825  while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
826  ret = perf_evlist__parse_sample(top->evlist, event, &sample);
827  if (ret) {
828  pr_err("Can't parse sample, err = %d\n", ret);
829  continue;
830  }
831 
832  evsel = perf_evlist__id2evsel(session->evlist, sample.id);
833  assert(evsel != NULL);
834 
835  origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
836 
837  if (event->header.type == PERF_RECORD_SAMPLE)
838  ++top->samples;
839 
840  switch (origin) {
842  ++top->us_samples;
843  if (top->hide_user_symbols)
844  continue;
845  machine = perf_session__find_host_machine(session);
846  break;
848  ++top->kernel_samples;
849  if (top->hide_kernel_symbols)
850  continue;
851  machine = perf_session__find_host_machine(session);
852  break;
854  ++top->guest_kernel_samples;
855  machine = perf_session__find_machine(session, event->ip.pid);
856  break;
858  ++top->guest_us_samples;
859  /*
860  * TODO: we don't process guest user from host side
861  * except simple counting.
862  */
863  /* Fall thru */
864  default:
865  continue;
866  }
867 
868 
869  if (event->header.type == PERF_RECORD_SAMPLE) {
870  perf_event__process_sample(&top->tool, event, evsel,
871  &sample, machine);
872  } else if (event->header.type < PERF_RECORD_MAX) {
873  hists__inc_nr_events(&evsel->hists, event->header.type);
874  perf_event__process(&top->tool, event, &sample, machine);
875  } else
876  ++session->hists.stats.nr_unknown_events;
877  }
878 }
879 
880 static void perf_top__mmap_read(struct perf_top *top)
881 {
882  int i;
883 
884  for (i = 0; i < top->evlist->nr_mmaps; i++)
885  perf_top__mmap_read_idx(top, i);
886 }
887 
888 static void perf_top__start_counters(struct perf_top *top)
889 {
890  struct perf_evsel *counter;
891  struct perf_evlist *evlist = top->evlist;
892 
893  if (top->group)
894  perf_evlist__set_leader(evlist);
895 
896  list_for_each_entry(counter, &evlist->entries, node) {
897  struct perf_event_attr *attr = &counter->attr;
898 
900 
901  if (top->freq) {
903  attr->freq = 1;
904  attr->sample_freq = top->freq;
905  }
906 
907  if (evlist->nr_entries > 1) {
908  attr->sample_type |= PERF_SAMPLE_ID;
909  attr->read_format |= PERF_FORMAT_ID;
910  }
911 
912  if (perf_target__has_cpu(&top->target))
913  attr->sample_type |= PERF_SAMPLE_CPU;
914 
917 
918  attr->mmap = 1;
919  attr->comm = 1;
920  attr->inherit = top->inherit;
921 fallback_missing_features:
922  if (top->exclude_guest_missing)
923  attr->exclude_guest = attr->exclude_host = 0;
924 retry_sample_id:
925  attr->sample_id_all = top->sample_id_all_missing ? 0 : 1;
926 try_again:
927  if (perf_evsel__open(counter, top->evlist->cpus,
928  top->evlist->threads) < 0) {
929  int err = errno;
930 
931  if (err == EPERM || err == EACCES) {
933  goto out_err;
934  } else if (err == EINVAL) {
935  if (!top->exclude_guest_missing &&
936  (attr->exclude_guest || attr->exclude_host)) {
937  pr_debug("Old kernel, cannot exclude "
938  "guest or host samples.\n");
939  top->exclude_guest_missing = true;
940  goto fallback_missing_features;
941  } else if (!top->sample_id_all_missing) {
942  /*
943  * Old kernel, no attr->sample_id_type_all field
944  */
945  top->sample_id_all_missing = true;
946  goto retry_sample_id;
947  }
948  }
949  /*
950  * If it's cycles then fall back to hrtimer
951  * based cpu-clock-tick sw counter, which
952  * is always available even if no PMU support:
953  */
954  if ((err == ENOENT || err == ENXIO) &&
955  (attr->type == PERF_TYPE_HARDWARE) &&
956  (attr->config == PERF_COUNT_HW_CPU_CYCLES)) {
957 
958  if (verbose)
959  ui__warning("Cycles event not supported,\n"
960  "trying to fall back to cpu-clock-ticks\n");
961 
962  attr->type = PERF_TYPE_SOFTWARE;
964  if (counter->name) {
965  free(counter->name);
966  counter->name = NULL;
967  }
968  goto try_again;
969  }
970 
971  if (err == ENOENT) {
972  ui__error("The %s event is not supported.\n",
973  perf_evsel__name(counter));
974  goto out_err;
975  } else if (err == EMFILE) {
976  ui__error("Too many events are opened.\n"
977  "Try again after reducing the number of events\n");
978  goto out_err;
979  }
980 
981  ui__error("The sys_perf_event_open() syscall "
982  "returned with %d (%s). /bin/dmesg "
983  "may provide additional information.\n"
984  "No CONFIG_PERF_EVENTS=y kernel support "
985  "configured?\n", err, strerror(err));
986  goto out_err;
987  }
988  }
989 
990  if (perf_evlist__mmap(evlist, top->mmap_pages, false) < 0) {
991  ui__error("Failed to mmap with %d (%s)\n",
992  errno, strerror(errno));
993  goto out_err;
994  }
995 
996  return;
997 
998 out_err:
999  exit_browser(0);
1000  exit(0);
1001 }
1002 
1003 static int perf_top__setup_sample_type(struct perf_top *top)
1004 {
1005  if (!top->sort_has_symbols) {
1006  if (symbol_conf.use_callchain) {
1007  ui__error("Selected -g but \"sym\" not present in --sort/-s.");
1008  return -EINVAL;
1009  }
1010  } else if (!top->dont_use_callchains && callchain_param.mode != CHAIN_NONE) {
1012  ui__error("Can't register callchain params.\n");
1013  return -EINVAL;
1014  }
1015  }
1016 
1017  return 0;
1018 }
1019 
1020 static int __cmd_top(struct perf_top *top)
1021 {
1022  pthread_t thread;
1023  int ret;
1024  /*
1025  * FIXME: perf_session__new should allow passing a O_MMAP, so that all this
1026  * mmap reading, etc is encapsulated in it. Use O_WRONLY for now.
1027  */
1028  top->session = perf_session__new(NULL, O_WRONLY, false, false, NULL);
1029  if (top->session == NULL)
1030  return -ENOMEM;
1031 
1032  ret = perf_top__setup_sample_type(top);
1033  if (ret)
1034  goto out_delete;
1035 
1036  if (perf_target__has_task(&top->target))
1037  perf_event__synthesize_thread_map(&top->tool, top->evlist->threads,
1039  &top->session->host_machine);
1040  else
1042  &top->session->host_machine);
1043  perf_top__start_counters(top);
1044  top->session->evlist = top->evlist;
1046 
1047  /* Wait for a minimal set of events before starting the snapshot */
1048  poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1049 
1050  perf_top__mmap_read(top);
1051 
1052  if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
1053  display_thread), top)) {
1054  ui__error("Could not create display thread.\n");
1055  exit(-1);
1056  }
1057 
1058  if (top->realtime_prio) {
1059  struct sched_param param;
1060 
1061  param.sched_priority = top->realtime_prio;
1062  if (sched_setscheduler(0, SCHED_FIFO, &param)) {
1063  ui__error("Could not set realtime priority.\n");
1064  exit(-1);
1065  }
1066  }
1067 
1068  while (1) {
1069  u64 hits = top->samples;
1070 
1071  perf_top__mmap_read(top);
1072 
1073  if (hits == top->samples)
1074  ret = poll(top->evlist->pollfd, top->evlist->nr_fds, 100);
1075  }
1076 
1077 out_delete:
1079  top->session = NULL;
1080 
1081  return 0;
1082 }
1083 
1084 static int
1085 parse_callchain_opt(const struct option *opt, const char *arg, int unset)
1086 {
1087  struct perf_top *top = (struct perf_top *)opt->value;
1088  char *tok, *tok2;
1089  char *endptr;
1090 
1091  /*
1092  * --no-call-graph
1093  */
1094  if (unset) {
1095  top->dont_use_callchains = true;
1096  return 0;
1097  }
1098 
1099  symbol_conf.use_callchain = true;
1100 
1101  if (!arg)
1102  return 0;
1103 
1104  tok = strtok((char *)arg, ",");
1105  if (!tok)
1106  return -1;
1107 
1108  /* get the output mode */
1109  if (!strncmp(tok, "graph", strlen(arg)))
1111 
1112  else if (!strncmp(tok, "flat", strlen(arg)))
1114 
1115  else if (!strncmp(tok, "fractal", strlen(arg)))
1117 
1118  else if (!strncmp(tok, "none", strlen(arg))) {
1120  symbol_conf.use_callchain = false;
1121 
1122  return 0;
1123  } else
1124  return -1;
1125 
1126  /* get the min percentage */
1127  tok = strtok(NULL, ",");
1128  if (!tok)
1129  goto setup;
1130 
1131  callchain_param.min_percent = strtod(tok, &endptr);
1132  if (tok == endptr)
1133  return -1;
1134 
1135  /* get the print limit */
1136  tok2 = strtok(NULL, ",");
1137  if (!tok2)
1138  goto setup;
1139 
1140  if (tok2[0] != 'c') {
1141  callchain_param.print_limit = strtod(tok2, &endptr);
1142  tok2 = strtok(NULL, ",");
1143  if (!tok2)
1144  goto setup;
1145  }
1146 
1147  /* get the call chain order */
1148  if (!strcmp(tok2, "caller"))
1150  else if (!strcmp(tok2, "callee"))
1152  else
1153  return -1;
1154 setup:
1156  fprintf(stderr, "Can't register callchain params\n");
1157  return -1;
1158  }
1159  return 0;
1160 }
1161 
1162 int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
1163 {
1164  struct perf_evsel *pos;
1165  int status;
1166  char errbuf[BUFSIZ];
1167  struct perf_top top = {
1168  .count_filter = 5,
1169  .delay_secs = 2,
1170  .freq = 4000, /* 4 KHz */
1171  .mmap_pages = 128,
1172  .sym_pcnt_filter = 5,
1173  .target = {
1174  .uses_mmap = true,
1175  },
1176  };
1177  char callchain_default_opt[] = "fractal,0.5,callee";
1178  const struct option options[] = {
1179  OPT_CALLBACK('e', "event", &top.evlist, "event",
1180  "event selector. use 'perf list' to list available events",
1182  OPT_INTEGER('c', "count", &top.default_interval,
1183  "event period to sample"),
1184  OPT_STRING('p', "pid", &top.target.pid, "pid",
1185  "profile events on existing process id"),
1186  OPT_STRING('t', "tid", &top.target.tid, "tid",
1187  "profile events on existing thread id"),
1188  OPT_BOOLEAN('a', "all-cpus", &top.target.system_wide,
1189  "system-wide collection from all CPUs"),
1190  OPT_STRING('C', "cpu", &top.target.cpu_list, "cpu",
1191  "list of cpus to monitor"),
1192  OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
1193  "file", "vmlinux pathname"),
1194  OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
1195  "hide kernel symbols"),
1196  OPT_UINTEGER('m', "mmap-pages", &top.mmap_pages, "number of mmap data pages"),
1197  OPT_INTEGER('r', "realtime", &top.realtime_prio,
1198  "collect data with this RT SCHED_FIFO priority"),
1199  OPT_INTEGER('d', "delay", &top.delay_secs,
1200  "number of seconds to delay between refreshes"),
1201  OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
1202  "dump the symbol table used for profiling"),
1203  OPT_INTEGER('f', "count-filter", &top.count_filter,
1204  "only display functions with more events than this"),
1205  OPT_BOOLEAN('g', "group", &top.group,
1206  "put the counters into a counter group"),
1207  OPT_BOOLEAN('i', "inherit", &top.inherit,
1208  "child tasks inherit counters"),
1209  OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
1210  "symbol to annotate"),
1211  OPT_BOOLEAN('z', "zero", &top.zero,
1212  "zero history across updates"),
1213  OPT_INTEGER('F', "freq", &top.freq,
1214  "profile at this frequency"),
1215  OPT_INTEGER('E', "entries", &top.print_entries,
1216  "display this many functions"),
1217  OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
1218  "hide user symbols"),
1219  OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
1220  OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
1221  OPT_INCR('v', "verbose", &verbose,
1222  "be more verbose (show counter open errors, etc)"),
1223  OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
1224  "sort by key(s): pid, comm, dso, symbol, parent"),
1225  OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
1226  "Show a column with the number of samples"),
1227  OPT_CALLBACK_DEFAULT('G', "call-graph", &top, "output_type,min_percent, call_order",
1228  "Display callchains using output_type (graph, flat, fractal, or none), min percent threshold and callchain order. "
1229  "Default: fractal,0.5,callee", &parse_callchain_opt,
1230  callchain_default_opt),
1231  OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
1232  "Show a column with the sum of periods"),
1233  OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
1234  "only consider symbols in these dsos"),
1235  OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
1236  "only consider symbols in these comms"),
1237  OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
1238  "only consider these symbols"),
1239  OPT_BOOLEAN(0, "source", &symbol_conf.annotate_src,
1240  "Interleave source code with assembly code (default)"),
1241  OPT_BOOLEAN(0, "asm-raw", &symbol_conf.annotate_asm_raw,
1242  "Display raw encoding of assembly instructions (default)"),
1243  OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
1244  "Specify disassembler style (e.g. -M intel for intel syntax)"),
1245  OPT_STRING('u', "uid", &top.target.uid_str, "user", "user to profile"),
1246  OPT_END()
1247  };
1248  const char * const top_usage[] = {
1249  "perf top [<options>]",
1250  NULL
1251  };
1252 
1253  top.evlist = perf_evlist__new(NULL, NULL);
1254  if (top.evlist == NULL)
1255  return -ENOMEM;
1256 
1257  symbol_conf.exclude_other = false;
1258 
1259  argc = parse_options(argc, argv, options, top_usage, 0);
1260  if (argc)
1261  usage_with_options(top_usage, options);
1262 
1264  sort_order = "dso,symbol";
1265 
1266  setup_sorting(top_usage, options);
1267 
1268  if (top.use_stdio)
1269  use_browser = 0;
1270  else if (top.use_tui)
1271  use_browser = 1;
1272 
1273  setup_browser(false);
1274 
1275  status = perf_target__validate(&top.target);
1276  if (status) {
1277  perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
1278  ui__warning("%s", errbuf);
1279  }
1280 
1281  status = perf_target__parse_uid(&top.target);
1282  if (status) {
1283  int saved_errno = errno;
1284 
1285  perf_target__strerror(&top.target, status, errbuf, BUFSIZ);
1286  ui__error("%s", errbuf);
1287 
1288  status = -saved_errno;
1289  goto out_delete_evlist;
1290  }
1291 
1292  if (perf_target__none(&top.target))
1293  top.target.system_wide = true;
1294 
1295  if (perf_evlist__create_maps(top.evlist, &top.target) < 0)
1296  usage_with_options(top_usage, options);
1297 
1298  if (!top.evlist->nr_entries &&
1299  perf_evlist__add_default(top.evlist) < 0) {
1300  ui__error("Not enough memory for event selector list\n");
1301  return -ENOMEM;
1302  }
1303 
1304  symbol_conf.nr_events = top.evlist->nr_entries;
1305 
1306  if (top.delay_secs < 1)
1307  top.delay_secs = 1;
1308 
1309  /*
1310  * User specified count overrides default frequency.
1311  */
1312  if (top.default_interval)
1313  top.freq = 0;
1314  else if (top.freq) {
1315  top.default_interval = top.freq;
1316  } else {
1317  ui__error("frequency and count are zero, aborting\n");
1318  exit(EXIT_FAILURE);
1319  }
1320 
1321  list_for_each_entry(pos, &top.evlist->entries, node) {
1322  /*
1323  * Fill in the ones not specifically initialized via -c:
1324  */
1325  if (!pos->attr.sample_period)
1326  pos->attr.sample_period = top.default_interval;
1327  }
1328 
1329  top.sym_evsel = perf_evlist__first(top.evlist);
1330 
1331  symbol_conf.priv_size = sizeof(struct annotation);
1332 
1334  if (symbol__init() < 0)
1335  return -1;
1336 
1340 
1341  /*
1342  * Avoid annotation data structures overhead when symbols aren't on the
1343  * sort list.
1344  */
1345  top.sort_has_symbols = sort_sym.list.next != NULL;
1346 
1348  if (top.print_entries == 0) {
1349  struct sigaction act = {
1350  .sa_sigaction = perf_top__sig_winch,
1351  .sa_flags = SA_SIGINFO,
1352  };
1353  perf_top__update_print_entries(&top);
1354  sigaction(SIGWINCH, &act, NULL);
1355  }
1356 
1357  status = __cmd_top(&top);
1358 
1359 out_delete_evlist:
1361 
1362  return status;
1363 }