Linux Kernel  3.7.1
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
builtin-record.c
Go to the documentation of this file.
1 /*
2  * builtin-record.c
3  *
4  * Builtin record command: Record the profile of a workload
5  * (or a CPU, or a PID) into the perf.data output file - for
6  * later analysis via perf report.
7  */
8 #define _FILE_OFFSET_BITS 64
9 
10 #include "builtin.h"
11 
12 #include "perf.h"
13 
14 #include "util/build-id.h"
15 #include "util/util.h"
16 #include "util/parse-options.h"
17 #include "util/parse-events.h"
18 
19 #include "util/header.h"
20 #include "util/event.h"
21 #include "util/evlist.h"
22 #include "util/evsel.h"
23 #include "util/debug.h"
24 #include "util/session.h"
25 #include "util/tool.h"
26 #include "util/symbol.h"
27 #include "util/cpumap.h"
28 #include "util/thread_map.h"
29 
30 #include <unistd.h>
31 #include <sched.h>
32 #include <sys/mman.h>
33 
37 };
38 
39 struct perf_record {
40  struct perf_tool tool;
43  const char *output_name;
46  const char *progname;
47  int output;
48  unsigned int page_size;
51  bool no_buildid;
53  bool force;
54  bool file_new;
56  long samples;
58 };
59 
60 static void advance_output(struct perf_record *rec, size_t size)
61 {
62  rec->bytes_written += size;
63 }
64 
65 static int write_output(struct perf_record *rec, void *buf, size_t size)
66 {
67  while (size) {
68  int ret = write(rec->output, buf, size);
69 
70  if (ret < 0) {
71  pr_err("failed to write\n");
72  return -1;
73  }
74 
75  size -= ret;
76  buf += ret;
77 
78  rec->bytes_written += ret;
79  }
80 
81  return 0;
82 }
83 
84 static int process_synthesized_event(struct perf_tool *tool,
85  union perf_event *event,
87  struct machine *machine __maybe_unused)
88 {
89  struct perf_record *rec = container_of(tool, struct perf_record, tool);
90  if (write_output(rec, event, event->header.size) < 0)
91  return -1;
92 
93  return 0;
94 }
95 
96 static int perf_record__mmap_read(struct perf_record *rec,
97  struct perf_mmap *md)
98 {
99  unsigned int head = perf_mmap__read_head(md);
100  unsigned int old = md->prev;
101  unsigned char *data = md->base + rec->page_size;
102  unsigned long size;
103  void *buf;
104  int rc = 0;
105 
106  if (old == head)
107  return 0;
108 
109  rec->samples++;
110 
111  size = head - old;
112 
113  if ((old & md->mask) + size != (head & md->mask)) {
114  buf = &data[old & md->mask];
115  size = md->mask + 1 - (old & md->mask);
116  old += size;
117 
118  if (write_output(rec, buf, size) < 0) {
119  rc = -1;
120  goto out;
121  }
122  }
123 
124  buf = &data[old & md->mask];
125  size = head - old;
126  old += size;
127 
128  if (write_output(rec, buf, size) < 0) {
129  rc = -1;
130  goto out;
131  }
132 
133  md->prev = old;
134  perf_mmap__write_tail(md, old);
135 
136 out:
137  return rc;
138 }
139 
140 static volatile int done = 0;
141 static volatile int signr = -1;
142 static volatile int child_finished = 0;
143 
144 static void sig_handler(int sig)
145 {
146  if (sig == SIGCHLD)
147  child_finished = 1;
148 
149  done = 1;
150  signr = sig;
151 }
152 
153 static void perf_record__sig_exit(int exit_status __maybe_unused, void *arg)
154 {
155  struct perf_record *rec = arg;
156  int status;
157 
158  if (rec->evlist->workload.pid > 0) {
159  if (!child_finished)
160  kill(rec->evlist->workload.pid, SIGTERM);
161 
162  wait(&status);
163  if (WIFSIGNALED(status))
164  psignal(WTERMSIG(status), rec->progname);
165  }
166 
167  if (signr == -1 || signr == SIGUSR1)
168  return;
169 
170  signal(signr, SIG_DFL);
171  kill(getpid(), signr);
172 }
173 
174 static bool perf_evlist__equal(struct perf_evlist *evlist,
175  struct perf_evlist *other)
176 {
177  struct perf_evsel *pos, *pair;
178 
179  if (evlist->nr_entries != other->nr_entries)
180  return false;
181 
182  pair = perf_evlist__first(other);
183 
184  list_for_each_entry(pos, &evlist->entries, node) {
185  if (memcmp(&pos->attr, &pair->attr, sizeof(pos->attr) != 0))
186  return false;
187  pair = perf_evsel__next(pair);
188  }
189 
190  return true;
191 }
192 
193 static int perf_record__open(struct perf_record *rec)
194 {
195  struct perf_evsel *pos;
196  struct perf_evlist *evlist = rec->evlist;
197  struct perf_session *session = rec->session;
198  struct perf_record_opts *opts = &rec->opts;
199  int rc = 0;
200 
201  perf_evlist__config_attrs(evlist, opts);
202 
203  if (opts->group)
204  perf_evlist__set_leader(evlist);
205 
206  list_for_each_entry(pos, &evlist->entries, node) {
207  struct perf_event_attr *attr = &pos->attr;
208  /*
209  * Check if parse_single_tracepoint_event has already asked for
210  * PERF_SAMPLE_TIME.
211  *
212  * XXX this is kludgy but short term fix for problems introduced by
213  * eac23d1c that broke 'perf script' by having different sample_types
214  * when using multiple tracepoint events when we use a perf binary
215  * that tries to use sample_id_all on an older kernel.
216  *
217  * We need to move counter creation to perf_session, support
218  * different sample_types, etc.
219  */
220  bool time_needed = attr->sample_type & PERF_SAMPLE_TIME;
221 
222 fallback_missing_features:
223  if (opts->exclude_guest_missing)
224  attr->exclude_guest = attr->exclude_host = 0;
225 retry_sample_id:
226  attr->sample_id_all = opts->sample_id_all_missing ? 0 : 1;
227 try_again:
228  if (perf_evsel__open(pos, evlist->cpus, evlist->threads) < 0) {
229  int err = errno;
230 
231  if (err == EPERM || err == EACCES) {
233  rc = -err;
234  goto out;
235  } else if (err == ENODEV && opts->target.cpu_list) {
236  pr_err("No such device - did you specify"
237  " an out-of-range profile CPU?\n");
238  rc = -err;
239  goto out;
240  } else if (err == EINVAL) {
241  if (!opts->exclude_guest_missing &&
242  (attr->exclude_guest || attr->exclude_host)) {
243  pr_debug("Old kernel, cannot exclude "
244  "guest or host samples.\n");
245  opts->exclude_guest_missing = true;
246  goto fallback_missing_features;
247  } else if (!opts->sample_id_all_missing) {
248  /*
249  * Old kernel, no attr->sample_id_type_all field
250  */
251  opts->sample_id_all_missing = true;
252  if (!opts->sample_time && !opts->raw_samples && !time_needed)
253  attr->sample_type &= ~PERF_SAMPLE_TIME;
254 
255  goto retry_sample_id;
256  }
257  }
258 
259  /*
260  * If it's cycles then fall back to hrtimer
261  * based cpu-clock-tick sw counter, which
262  * is always available even if no PMU support.
263  *
264  * PPC returns ENXIO until 2.6.37 (behavior changed
265  * with commit b0a873e).
266  */
267  if ((err == ENOENT || err == ENXIO)
268  && attr->type == PERF_TYPE_HARDWARE
269  && attr->config == PERF_COUNT_HW_CPU_CYCLES) {
270 
271  if (verbose)
272  ui__warning("The cycles event is not supported, "
273  "trying to fall back to cpu-clock-ticks\n");
274  attr->type = PERF_TYPE_SOFTWARE;
276  if (pos->name) {
277  free(pos->name);
278  pos->name = NULL;
279  }
280  goto try_again;
281  }
282 
283  if (err == ENOENT) {
284  ui__error("The %s event is not supported.\n",
285  perf_evsel__name(pos));
286  rc = -err;
287  goto out;
288  }
289 
290  printf("\n");
291  error("sys_perf_event_open() syscall returned with %d "
292  "(%s) for event %s. /bin/dmesg may provide "
293  "additional information.\n",
294  err, strerror(err), perf_evsel__name(pos));
295 
296 #if defined(__i386__) || defined(__x86_64__)
297  if (attr->type == PERF_TYPE_HARDWARE &&
298  err == EOPNOTSUPP) {
299  pr_err("No hardware sampling interrupt available."
300  " No APIC? If so then you can boot the kernel"
301  " with the \"lapic\" boot parameter to"
302  " force-enable it.\n");
303  rc = -err;
304  goto out;
305  }
306 #endif
307 
308  pr_err("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
309  rc = -err;
310  goto out;
311  }
312  }
313 
314  if (perf_evlist__apply_filters(evlist)) {
315  error("failed to set filter with %d (%s)\n", errno,
316  strerror(errno));
317  rc = -1;
318  goto out;
319  }
320 
321  if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
322  if (errno == EPERM) {
323  pr_err("Permission error mapping pages.\n"
324  "Consider increasing "
325  "/proc/sys/kernel/perf_event_mlock_kb,\n"
326  "or try again with a smaller value of -m/--mmap_pages.\n"
327  "(current value: %d)\n", opts->mmap_pages);
328  rc = -errno;
329  } else if (!is_power_of_2(opts->mmap_pages)) {
330  pr_err("--mmap_pages/-m value must be a power of two.");
331  rc = -EINVAL;
332  } else {
333  pr_err("failed to mmap with %d (%s)\n", errno, strerror(errno));
334  rc = -errno;
335  }
336  goto out;
337  }
338 
339  if (rec->file_new)
340  session->evlist = evlist;
341  else {
342  if (!perf_evlist__equal(session->evlist, evlist)) {
343  fprintf(stderr, "incompatible append\n");
344  rc = -1;
345  goto out;
346  }
347  }
348 
350 out:
351  return rc;
352 }
353 
354 static int process_buildids(struct perf_record *rec)
355 {
356  u64 size = lseek(rec->output, 0, SEEK_CUR);
357 
358  if (size == 0)
359  return 0;
360 
361  rec->session->fd = rec->output;
363  size - rec->post_processing_offset,
365 }
366 
367 static void perf_record__exit(int status, void *arg)
368 {
369  struct perf_record *rec = arg;
370 
371  if (status != 0)
372  return;
373 
374  if (!rec->opts.pipe_output) {
375  rec->session->header.data_size += rec->bytes_written;
376 
377  if (!rec->no_buildid)
378  process_buildids(rec);
380  rec->output, true);
383  symbol__exit();
384  }
385 }
386 
387 static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
388 {
389  int err;
390  struct perf_tool *tool = data;
391 
392  if (machine__is_host(machine))
393  return;
394 
395  /*
396  *As for guest kernel when processing subcommand record&report,
397  *we arrange module mmap prior to guest kernel mmap and trigger
398  *a preload dso because default guest module symbols are loaded
399  *from guest kallsyms instead of /lib/modules/XXX/XXX. This
400  *method is used to avoid symbol missing when the first addr is
401  *in module instead of in guest kernel.
402  */
403  err = perf_event__synthesize_modules(tool, process_synthesized_event,
404  machine);
405  if (err < 0)
406  pr_err("Couldn't record guest kernel [%d]'s reference"
407  " relocation symbol.\n", machine->pid);
408 
409  /*
410  * We use _stext for guest kernel because guest kernel's /proc/kallsyms
411  * have no _text sometimes.
412  */
413  err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
414  machine, "_text");
415  if (err < 0)
416  err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
417  machine, "_stext");
418  if (err < 0)
419  pr_err("Couldn't record guest kernel [%d]'s reference"
420  " relocation symbol.\n", machine->pid);
421 }
422 
423 static struct perf_event_header finished_round_event = {
424  .size = sizeof(struct perf_event_header),
426 };
427 
428 static int perf_record__mmap_read_all(struct perf_record *rec)
429 {
430  int i;
431  int rc = 0;
432 
433  for (i = 0; i < rec->evlist->nr_mmaps; i++) {
434  if (rec->evlist->mmap[i].base) {
435  if (perf_record__mmap_read(rec, &rec->evlist->mmap[i]) != 0) {
436  rc = -1;
437  goto out;
438  }
439  }
440  }
441 
443  rc = write_output(rec, &finished_round_event,
444  sizeof(finished_round_event));
445 
446 out:
447  return rc;
448 }
449 
450 static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
451 {
452  struct stat st;
453  int flags;
454  int err, output, feat;
455  unsigned long waking = 0;
456  const bool forks = argc > 0;
457  struct machine *machine;
458  struct perf_tool *tool = &rec->tool;
459  struct perf_record_opts *opts = &rec->opts;
460  struct perf_evlist *evsel_list = rec->evlist;
461  const char *output_name = rec->output_name;
462  struct perf_session *session;
463 
464  rec->progname = argv[0];
465 
466  rec->page_size = sysconf(_SC_PAGE_SIZE);
467 
468  on_exit(perf_record__sig_exit, rec);
469  signal(SIGCHLD, sig_handler);
470  signal(SIGINT, sig_handler);
471  signal(SIGUSR1, sig_handler);
472 
473  if (!output_name) {
474  if (!fstat(STDOUT_FILENO, &st) && S_ISFIFO(st.st_mode))
475  opts->pipe_output = true;
476  else
477  rec->output_name = output_name = "perf.data";
478  }
479  if (output_name) {
480  if (!strcmp(output_name, "-"))
481  opts->pipe_output = true;
482  else if (!stat(output_name, &st) && st.st_size) {
483  if (rec->write_mode == WRITE_FORCE) {
484  char oldname[PATH_MAX];
485  snprintf(oldname, sizeof(oldname), "%s.old",
486  output_name);
487  unlink(oldname);
488  rename(output_name, oldname);
489  }
490  } else if (rec->write_mode == WRITE_APPEND) {
491  rec->write_mode = WRITE_FORCE;
492  }
493  }
494 
495  flags = O_CREAT|O_RDWR;
496  if (rec->write_mode == WRITE_APPEND)
497  rec->file_new = 0;
498  else
499  flags |= O_TRUNC;
500 
501  if (opts->pipe_output)
502  output = STDOUT_FILENO;
503  else
504  output = open(output_name, flags, S_IRUSR | S_IWUSR);
505  if (output < 0) {
506  perror("failed to create output file");
507  return -1;
508  }
509 
510  rec->output = output;
511 
512  session = perf_session__new(output_name, O_WRONLY,
513  rec->write_mode == WRITE_FORCE, false, NULL);
514  if (session == NULL) {
515  pr_err("Not enough memory for reading perf file header\n");
516  return -1;
517  }
518 
519  rec->session = session;
520 
521  for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
522  perf_header__set_feat(&session->header, feat);
523 
524  if (rec->no_buildid)
526 
527  if (!have_tracepoints(&evsel_list->entries))
529 
530  if (!rec->opts.branch_stack)
532 
533  if (!rec->file_new) {
534  err = perf_session__read_header(session, output);
535  if (err < 0)
536  goto out_delete_session;
537  }
538 
539  if (forks) {
540  err = perf_evlist__prepare_workload(evsel_list, opts, argv);
541  if (err < 0) {
542  pr_err("Couldn't run the workload!\n");
543  goto out_delete_session;
544  }
545  }
546 
547  if (perf_record__open(rec) != 0) {
548  err = -1;
549  goto out_delete_session;
550  }
551 
552  /*
553  * perf_session__delete(session) will be called at perf_record__exit()
554  */
555  on_exit(perf_record__exit, rec);
556 
557  if (opts->pipe_output) {
558  err = perf_header__write_pipe(output);
559  if (err < 0)
560  goto out_delete_session;
561  } else if (rec->file_new) {
562  err = perf_session__write_header(session, evsel_list,
563  output, false);
564  if (err < 0)
565  goto out_delete_session;
566  }
567 
568  if (!rec->no_buildid
569  && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
570  pr_err("Couldn't generate buildids. "
571  "Use --no-buildid to profile anyway.\n");
572  err = -1;
573  goto out_delete_session;
574  }
575 
576  rec->post_processing_offset = lseek(output, 0, SEEK_CUR);
577 
578  machine = perf_session__find_host_machine(session);
579  if (!machine) {
580  pr_err("Couldn't find native kernel information.\n");
581  err = -1;
582  goto out_delete_session;
583  }
584 
585  if (opts->pipe_output) {
586  err = perf_event__synthesize_attrs(tool, session,
587  process_synthesized_event);
588  if (err < 0) {
589  pr_err("Couldn't synthesize attrs.\n");
590  goto out_delete_session;
591  }
592 
593  err = perf_event__synthesize_event_types(tool, process_synthesized_event,
594  machine);
595  if (err < 0) {
596  pr_err("Couldn't synthesize event_types.\n");
597  goto out_delete_session;
598  }
599 
600  if (have_tracepoints(&evsel_list->entries)) {
601  /*
602  * FIXME err <= 0 here actually means that
603  * there were no tracepoints so its not really
604  * an error, just that we don't need to
605  * synthesize anything. We really have to
606  * return this more properly and also
607  * propagate errors that now are calling die()
608  */
609  err = perf_event__synthesize_tracing_data(tool, output, evsel_list,
610  process_synthesized_event);
611  if (err <= 0) {
612  pr_err("Couldn't record tracing data.\n");
613  goto out_delete_session;
614  }
615  advance_output(rec, err);
616  }
617  }
618 
619  err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
620  machine, "_text");
621  if (err < 0)
622  err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
623  machine, "_stext");
624  if (err < 0)
625  pr_err("Couldn't record kernel reference relocation symbol\n"
626  "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
627  "Check /proc/kallsyms permission or run as root.\n");
628 
629  err = perf_event__synthesize_modules(tool, process_synthesized_event,
630  machine);
631  if (err < 0)
632  pr_err("Couldn't record kernel module information.\n"
633  "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
634  "Check /proc/modules permission or run as root.\n");
635 
636  if (perf_guest)
637  perf_session__process_machines(session, tool,
638  perf_event__synthesize_guest_os);
639 
640  if (!opts->target.system_wide)
641  err = perf_event__synthesize_thread_map(tool, evsel_list->threads,
642  process_synthesized_event,
643  machine);
644  else
645  err = perf_event__synthesize_threads(tool, process_synthesized_event,
646  machine);
647 
648  if (err != 0)
649  goto out_delete_session;
650 
651  if (rec->realtime_prio) {
652  struct sched_param param;
653 
654  param.sched_priority = rec->realtime_prio;
655  if (sched_setscheduler(0, SCHED_FIFO, &param)) {
656  pr_err("Could not set realtime priority.\n");
657  err = -1;
658  goto out_delete_session;
659  }
660  }
661 
662  perf_evlist__enable(evsel_list);
663 
664  /*
665  * Let the child rip
666  */
667  if (forks)
668  perf_evlist__start_workload(evsel_list);
669 
670  for (;;) {
671  int hits = rec->samples;
672 
673  if (perf_record__mmap_read_all(rec) < 0) {
674  err = -1;
675  goto out_delete_session;
676  }
677 
678  if (hits == rec->samples) {
679  if (done)
680  break;
681  err = poll(evsel_list->pollfd, evsel_list->nr_fds, -1);
682  waking++;
683  }
684 
685  if (done)
686  perf_evlist__disable(evsel_list);
687  }
688 
689  if (quiet || signr == SIGUSR1)
690  return 0;
691 
692  fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
693 
694  /*
695  * Approximate RIP event size: 24 bytes.
696  */
697  fprintf(stderr,
698  "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
699  (double)rec->bytes_written / 1024.0 / 1024.0,
700  output_name,
701  rec->bytes_written / 24);
702 
703  return 0;
704 
705 out_delete_session:
706  perf_session__delete(session);
707  return err;
708 }
709 
710 #define BRANCH_OPT(n, m) \
711  { .name = n, .mode = (m) }
712 
713 #define BRANCH_END { .name = NULL }
714 
715 struct branch_mode {
716  const char *name;
717  int mode;
718 };
719 
720 static const struct branch_mode branch_modes[] = {
728  BRANCH_END
729 };
730 
731 static int
732 parse_branch_stack(const struct option *opt, const char *str, int unset)
733 {
734 #define ONLY_PLM \
735  (PERF_SAMPLE_BRANCH_USER |\
736  PERF_SAMPLE_BRANCH_KERNEL |\
737  PERF_SAMPLE_BRANCH_HV)
738 
739  uint64_t *mode = (uint64_t *)opt->value;
740  const struct branch_mode *br;
741  char *s, *os = NULL, *p;
742  int ret = -1;
743 
744  if (unset)
745  return 0;
746 
747  /*
748  * cannot set it twice, -b + --branch-filter for instance
749  */
750  if (*mode)
751  return -1;
752 
753  /* str may be NULL in case no arg is passed to -b */
754  if (str) {
755  /* because str is read-only */
756  s = os = strdup(str);
757  if (!s)
758  return -1;
759 
760  for (;;) {
761  p = strchr(s, ',');
762  if (p)
763  *p = '\0';
764 
765  for (br = branch_modes; br->name; br++) {
766  if (!strcasecmp(s, br->name))
767  break;
768  }
769  if (!br->name) {
770  ui__warning("unknown branch filter %s,"
771  " check man page\n", s);
772  goto error;
773  }
774 
775  *mode |= br->mode;
776 
777  if (!p)
778  break;
779 
780  s = p + 1;
781  }
782  }
783  ret = 0;
784 
785  /* default to any branch */
786  if ((*mode & ~ONLY_PLM) == 0) {
787  *mode = PERF_SAMPLE_BRANCH_ANY;
788  }
789 error:
790  free(os);
791  return ret;
792 }
793 
794 #ifdef LIBUNWIND_SUPPORT
795 static int get_stack_size(char *str, unsigned long *_size)
796 {
797  char *endptr;
798  unsigned long size;
799  unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
800 
801  size = strtoul(str, &endptr, 0);
802 
803  do {
804  if (*endptr)
805  break;
806 
807  size = round_up(size, sizeof(u64));
808  if (!size || size > max_size)
809  break;
810 
811  *_size = size;
812  return 0;
813 
814  } while (0);
815 
816  pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
817  max_size, str);
818  return -1;
819 }
820 #endif /* LIBUNWIND_SUPPORT */
821 
822 static int
823 parse_callchain_opt(const struct option *opt __maybe_unused, const char *arg,
824  int unset)
825 {
826  struct perf_record *rec = (struct perf_record *)opt->value;
827  char *tok, *name, *saveptr = NULL;
828  char *buf;
829  int ret = -1;
830 
831  /* --no-call-graph */
832  if (unset)
833  return 0;
834 
835  /* We specified default option if none is provided. */
836  BUG_ON(!arg);
837 
838  /* We need buffer that we know we can write to. */
839  buf = malloc(strlen(arg) + 1);
840  if (!buf)
841  return -ENOMEM;
842 
843  strcpy(buf, arg);
844 
845  tok = strtok_r((char *)buf, ",", &saveptr);
846  name = tok ? : (char *)buf;
847 
848  do {
849  /* Framepointer style */
850  if (!strncmp(name, "fp", sizeof("fp"))) {
851  if (!strtok_r(NULL, ",", &saveptr)) {
852  rec->opts.call_graph = CALLCHAIN_FP;
853  ret = 0;
854  } else
855  pr_err("callchain: No more arguments "
856  "needed for -g fp\n");
857  break;
858 
859 #ifdef LIBUNWIND_SUPPORT
860  /* Dwarf style */
861  } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
862  const unsigned long default_stack_dump_size = 8192;
863 
864  ret = 0;
865  rec->opts.call_graph = CALLCHAIN_DWARF;
866  rec->opts.stack_dump_size = default_stack_dump_size;
867 
868  tok = strtok_r(NULL, ",", &saveptr);
869  if (tok) {
870  unsigned long size = 0;
871 
872  ret = get_stack_size(tok, &size);
873  rec->opts.stack_dump_size = size;
874  }
875 
876  if (!ret)
877  pr_debug("callchain: stack dump size %d\n",
878  rec->opts.stack_dump_size);
879 #endif /* LIBUNWIND_SUPPORT */
880  } else {
881  pr_err("callchain: Unknown -g option "
882  "value: %s\n", arg);
883  break;
884  }
885 
886  } while (0);
887 
888  free(buf);
889 
890  if (!ret)
891  pr_debug("callchain: type %d\n", rec->opts.call_graph);
892 
893  return ret;
894 }
895 
896 static const char * const record_usage[] = {
897  "perf record [<options>] [<command>]",
898  "perf record [<options>] -- <command> [<options>]",
899  NULL
900 };
901 
902 /*
903  * XXX Ideally would be local to cmd_record() and passed to a perf_record__new
904  * because we need to have access to it in perf_record__exit, that is called
905  * after cmd_record() exits, but since record_options need to be accessible to
906  * builtin-script, leave it here.
907  *
908  * At least we don't ouch it in all the other functions here directly.
909  *
910  * Just say no to tons of global variables, sigh.
911  */
912 static struct perf_record record = {
913  .opts = {
914  .mmap_pages = UINT_MAX,
915  .user_freq = UINT_MAX,
916  .user_interval = ULLONG_MAX,
917  .freq = 4000,
918  .target = {
919  .uses_mmap = true,
920  },
921  },
922  .write_mode = WRITE_FORCE,
923  .file_new = true,
924 };
925 
926 #define CALLCHAIN_HELP "do call-graph (stack chain/backtrace) recording: "
927 
928 #ifdef LIBUNWIND_SUPPORT
929 static const char callchain_help[] = CALLCHAIN_HELP "[fp] dwarf";
930 #else
931 static const char callchain_help[] = CALLCHAIN_HELP "[fp]";
932 #endif
933 
934 /*
935  * XXX Will stay a global variable till we fix builtin-script.c to stop messing
936  * with it and switch to use the library functions in perf_evlist that came
937  * from builtin-record.c, i.e. use perf_record_opts,
938  * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
939  * using pipes, etc.
940  */
941 const struct option record_options[] = {
942  OPT_CALLBACK('e', "event", &record.evlist, "event",
943  "event selector. use 'perf list' to list available events",
945  OPT_CALLBACK(0, "filter", &record.evlist, "filter",
946  "event filter", parse_filter),
947  OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
948  "record events on existing process id"),
949  OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
950  "record events on existing thread id"),
951  OPT_INTEGER('r', "realtime", &record.realtime_prio,
952  "collect data with this RT SCHED_FIFO priority"),
953  OPT_BOOLEAN('D', "no-delay", &record.opts.no_delay,
954  "collect data without buffering"),
955  OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
956  "collect raw sample records from all opened counters"),
957  OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
958  "system-wide collection from all CPUs"),
959  OPT_BOOLEAN('A', "append", &record.append_file,
960  "append to the output file to do incremental profiling"),
961  OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
962  "list of cpus to monitor"),
963  OPT_BOOLEAN('f', "force", &record.force,
964  "overwrite existing data file (deprecated)"),
965  OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
966  OPT_STRING('o', "output", &record.output_name, "file",
967  "output file name"),
968  OPT_BOOLEAN('i', "no-inherit", &record.opts.no_inherit,
969  "child tasks do not inherit counters"),
970  OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
971  OPT_UINTEGER('m', "mmap-pages", &record.opts.mmap_pages,
972  "number of mmap data pages"),
973  OPT_BOOLEAN(0, "group", &record.opts.group,
974  "put the counters into a counter group"),
975  OPT_CALLBACK_DEFAULT('g', "call-graph", &record, "mode[,dump_size]",
976  callchain_help, &parse_callchain_opt,
977  "fp"),
978  OPT_INCR('v', "verbose", &verbose,
979  "be more verbose (show counter open errors, etc)"),
980  OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
981  OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
982  "per thread counts"),
983  OPT_BOOLEAN('d', "data", &record.opts.sample_address,
984  "Sample addresses"),
985  OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
986  OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
987  OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
988  "don't sample"),
989  OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,
990  "do not update the buildid cache"),
991  OPT_BOOLEAN('B', "no-buildid", &record.no_buildid,
992  "do not collect buildids in perf.data"),
993  OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
994  "monitor event in cgroup name only",
995  parse_cgroups),
996  OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
997  "user to profile"),
998 
999  OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
1000  "branch any", "sample any taken branches",
1001  parse_branch_stack),
1002 
1003  OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
1004  "branch filter mask", "branch stack filter modes",
1005  parse_branch_stack),
1006  OPT_END()
1007 };
1008 
1009 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
1010 {
1011  int err = -ENOMEM;
1012  struct perf_evsel *pos;
1013  struct perf_evlist *evsel_list;
1014  struct perf_record *rec = &record;
1015  char errbuf[BUFSIZ];
1016 
1017  evsel_list = perf_evlist__new(NULL, NULL);
1018  if (evsel_list == NULL)
1019  return -ENOMEM;
1020 
1021  rec->evlist = evsel_list;
1022 
1023  argc = parse_options(argc, argv, record_options, record_usage,
1025  if (!argc && perf_target__none(&rec->opts.target))
1026  usage_with_options(record_usage, record_options);
1027 
1028  if (rec->force && rec->append_file) {
1029  ui__error("Can't overwrite and append at the same time."
1030  " You need to choose between -f and -A");
1031  usage_with_options(record_usage, record_options);
1032  } else if (rec->append_file) {
1033  rec->write_mode = WRITE_APPEND;
1034  } else {
1035  rec->write_mode = WRITE_FORCE;
1036  }
1037 
1038  if (nr_cgroups && !rec->opts.target.system_wide) {
1039  ui__error("cgroup monitoring only available in"
1040  " system-wide mode\n");
1041  usage_with_options(record_usage, record_options);
1042  }
1043 
1044  symbol__init();
1045 
1047  pr_warning(
1048 "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
1049 "check /proc/sys/kernel/kptr_restrict.\n\n"
1050 "Samples in kernel functions may not be resolved if a suitable vmlinux\n"
1051 "file is not found in the buildid cache or in the vmlinux path.\n\n"
1052 "Samples in kernel modules won't be resolved at all.\n\n"
1053 "If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
1054 "even with a suitable vmlinux or kallsyms file.\n\n");
1055 
1056  if (rec->no_buildid_cache || rec->no_buildid)
1058 
1059  if (evsel_list->nr_entries == 0 &&
1060  perf_evlist__add_default(evsel_list) < 0) {
1061  pr_err("Not enough memory for event selector list\n");
1062  goto out_symbol_exit;
1063  }
1064 
1065  err = perf_target__validate(&rec->opts.target);
1066  if (err) {
1067  perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1068  ui__warning("%s", errbuf);
1069  }
1070 
1071  err = perf_target__parse_uid(&rec->opts.target);
1072  if (err) {
1073  int saved_errno = errno;
1074 
1075  perf_target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
1076  ui__error("%s", errbuf);
1077 
1078  err = -saved_errno;
1079  goto out_free_fd;
1080  }
1081 
1082  err = -ENOMEM;
1083  if (perf_evlist__create_maps(evsel_list, &rec->opts.target) < 0)
1084  usage_with_options(record_usage, record_options);
1085 
1086  list_for_each_entry(pos, &evsel_list->entries, node) {
1087  if (perf_header__push_event(pos->attr.config, perf_evsel__name(pos)))
1088  goto out_free_fd;
1089  }
1090 
1091  if (rec->opts.user_interval != ULLONG_MAX)
1092  rec->opts.default_interval = rec->opts.user_interval;
1093  if (rec->opts.user_freq != UINT_MAX)
1094  rec->opts.freq = rec->opts.user_freq;
1095 
1096  /*
1097  * User specified count overrides default frequency.
1098  */
1099  if (rec->opts.default_interval)
1100  rec->opts.freq = 0;
1101  else if (rec->opts.freq) {
1102  rec->opts.default_interval = rec->opts.freq;
1103  } else {
1104  ui__error("frequency and count are zero, aborting\n");
1105  err = -EINVAL;
1106  goto out_free_fd;
1107  }
1108 
1109  err = __cmd_record(&record, argc, argv);
1110 out_free_fd:
1111  perf_evlist__delete_maps(evsel_list);
1112 out_symbol_exit:
1113  symbol__exit();
1114  return err;
1115 }