60 #include <sys/prctl.h>
63 #define DEFAULT_SEPARATOR " "
64 #define CNTR_NOT_SUPPORTED "<not supported>"
65 #define CNTR_NOT_COUNTED "<not counted>"
73 static int run_count = 1;
74 static bool no_inherit =
false;
75 static bool scale =
true;
76 static bool no_aggr =
false;
77 static pid_t child_pid = -1;
78 static bool null_run =
false;
79 static int detailed_run = 0;
80 static bool big_num =
true;
81 static int big_num_opt = -1;
82 static const char *csv_sep =
NULL;
83 static bool csv_output =
false;
84 static bool group =
false;
87 static volatile int done = 0;
93 static int perf_evsel__alloc_stat_priv(
struct perf_evsel *evsel)
99 static void perf_evsel__free_stat_priv(
struct perf_evsel *evsel)
110 static inline int perf_evsel__nr_cpus(
struct perf_evsel *evsel)
112 return perf_evsel__cpus(evsel)->nr;
126 static struct stats walltime_nsecs_stats;
128 static int create_perf_stat_counter(
struct perf_evsel *evsel,
132 bool exclude_guest_missing =
false;
142 if (exclude_guest_missing)
143 evsel->
attr.exclude_guest = evsel->
attr.exclude_host = 0;
145 if (perf_target__has_cpu(&target)) {
152 if (!perf_target__has_task(&target) && (!
group || evsel == first)) {
162 if (ret && errno ==
EINVAL) {
163 if (!exclude_guest_missing &&
164 (evsel->
attr.exclude_guest || evsel->
attr.exclude_host)) {
165 pr_debug(
"Old kernel, cannot exclude "
166 "guest or host samples.\n");
167 exclude_guest_missing =
true;
177 static inline int nsec_counter(
struct perf_evsel *evsel)
198 update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
200 update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
221 static int read_counter_aggr(
struct perf_evsel *counter)
224 u64 *count = counter->
counts->aggr.values;
228 evsel_list->
threads->nr, scale) < 0)
231 for (i = 0; i < 3; i++)
242 update_shadow_stats(counter, count);
256 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
260 count = counter->
counts->cpu[
cpu].values;
262 update_shadow_stats(counter, count);
270 unsigned long long t0,
t1;
273 int child_ready_pipe[2], go_pipe[2];
274 const bool forks = (
argc > 0);
277 if (forks && (
pipe(child_ready_pipe) < 0 ||
pipe(go_pipe) < 0)) {
278 perror(
"failed to create pipes");
283 if ((child_pid = fork()) < 0)
284 perror(
"failed to fork");
287 close(child_ready_pipe[0]);
296 execvp(
"", (
char **)argv);
301 close(child_ready_pipe[1]);
306 if (
read(go_pipe[0], &buf, 1) == -1)
307 perror(
"unable to read pipe");
309 execvp(argv[0], (
char **)argv);
315 if (perf_target__none(&target))
316 evsel_list->
threads->map[0] = child_pid;
321 close(child_ready_pipe[1]);
323 if (
read(child_ready_pipe[0], &buf, 1) == -1)
324 perror(
"unable to read pipe");
325 close(child_ready_pipe[0]);
331 first = perf_evlist__first(evsel_list);
334 if (create_perf_stat_counter(counter, first) < 0) {
343 ui__warning(
"%s event is not supported by the kernel.\n",
350 error(
"You may not have permission to collect %sstats.\n"
351 "\t Consider tweaking"
352 " /proc/sys/kernel/perf_event_paranoid or running as root.",
355 error(
"open_counter returned with %d (%s). "
356 "/bin/dmesg may provide additional information.\n",
362 pr_err(
"Not all events could be opened.\n");
369 error(
"failed to set filter with %d (%s)\n", errno,
382 if (WIFSIGNALED(status))
383 psignal(WTERMSIG(status), argv[0]);
385 while(!
done) sleep(1);
399 read_counter_aggr(counter);
405 return WEXITSTATUS(status);
408 static void print_noise_pct(
double total,
double avg)
413 fprintf(output,
"%s%.2f%%", csv_sep, pct);
415 fprintf(output,
" ( +-%6.2f%% )", pct);
418 static void print_noise(
struct perf_evsel *evsel,
double avg)
429 static void nsec_printout(
int cpu,
struct perf_evsel *evsel,
double avg)
431 double msecs = avg / 1e6;
432 char cpustr[16] = {
'\0', };
433 const char *
fmt = csv_output ?
"%s%.6f%s%s" :
"%s%18.6f%s%-25s";
438 perf_evsel__cpus(evsel)->
map[cpu], csv_sep);
443 fprintf(output,
"%s%s", csv_sep, evsel->
cgrp->name);
449 fprintf(output,
" # %8.3f CPUs utilized ",
465 static const double grc_table[
GRC_MAX_NR][3] = {
472 if (ratio > grc_table[type][0])
474 else if (ratio > grc_table[type][1])
476 else if (ratio > grc_table[type][2])
482 static void print_stalled_cycles_frontend(
int cpu,
484 __maybe_unused,
double avg)
486 double total, ratio = 0.0;
489 total =
avg_stats(&runtime_cycles_stats[cpu]);
492 ratio = avg / total * 100.0;
498 fprintf(output,
" frontend cycles idle ");
501 static void print_stalled_cycles_backend(
int cpu,
503 __maybe_unused,
double avg)
505 double total, ratio = 0.0;
508 total =
avg_stats(&runtime_cycles_stats[cpu]);
511 ratio = avg / total * 100.0;
517 fprintf(output,
" backend cycles idle ");
520 static void print_branch_misses(
int cpu,
524 double total, ratio = 0.0;
527 total =
avg_stats(&runtime_branches_stats[cpu]);
530 ratio = avg / total * 100.0;
536 fprintf(output,
" of all branches ");
539 static void print_l1_dcache_misses(
int cpu,
543 double total, ratio = 0.0;
546 total =
avg_stats(&runtime_l1_dcache_stats[cpu]);
549 ratio = avg / total * 100.0;
555 fprintf(output,
" of all L1-dcache hits ");
558 static void print_l1_icache_misses(
int cpu,
562 double total, ratio = 0.0;
565 total =
avg_stats(&runtime_l1_icache_stats[cpu]);
568 ratio = avg / total * 100.0;
574 fprintf(output,
" of all L1-icache hits ");
577 static void print_dtlb_cache_misses(
int cpu,
581 double total, ratio = 0.0;
584 total =
avg_stats(&runtime_dtlb_cache_stats[cpu]);
587 ratio = avg / total * 100.0;
593 fprintf(output,
" of all dTLB cache hits ");
596 static void print_itlb_cache_misses(
int cpu,
600 double total, ratio = 0.0;
603 total =
avg_stats(&runtime_itlb_cache_stats[cpu]);
606 ratio = avg / total * 100.0;
612 fprintf(output,
" of all iTLB cache hits ");
615 static void print_ll_cache_misses(
int cpu,
619 double total, ratio = 0.0;
622 total =
avg_stats(&runtime_ll_cache_stats[cpu]);
625 ratio = avg / total * 100.0;
631 fprintf(output,
" of all LL-cache hits ");
634 static void abs_printout(
int cpu,
struct perf_evsel *evsel,
double avg)
636 double total, ratio = 0.0;
637 char cpustr[16] = {
'\0', };
643 fmt =
"%s%'18.0f%s%-25s";
645 fmt =
"%s%18.0f%s%-25s";
650 perf_evsel__cpus(evsel)->
map[cpu], csv_sep);
657 fprintf(output,
"%s%s", csv_sep, evsel->
cgrp->name);
663 total =
avg_stats(&runtime_cycles_stats[cpu]);
668 fprintf(output,
" # %5.2f insns per cycle ", ratio);
670 total =
avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
671 total =
max(total,
avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
675 fprintf(output,
"\n # %5.2f stalled cycles per insn", ratio);
679 runtime_branches_stats[cpu].
n != 0) {
680 print_branch_misses(cpu, evsel, avg);
686 runtime_l1_dcache_stats[cpu].n != 0) {
687 print_l1_dcache_misses(cpu, evsel, avg);
693 runtime_l1_icache_stats[cpu].n != 0) {
694 print_l1_icache_misses(cpu, evsel, avg);
700 runtime_dtlb_cache_stats[cpu].n != 0) {
701 print_dtlb_cache_misses(cpu, evsel, avg);
707 runtime_itlb_cache_stats[cpu].n != 0) {
708 print_itlb_cache_misses(cpu, evsel, avg);
714 runtime_ll_cache_stats[cpu].n != 0) {
715 print_ll_cache_misses(cpu, evsel, avg);
717 runtime_cacherefs_stats[cpu].
n != 0) {
718 total =
avg_stats(&runtime_cacherefs_stats[cpu]);
721 ratio = avg * 100 / total;
723 fprintf(output,
" # %8.3f %% of all cache refs ", ratio);
726 print_stalled_cycles_frontend(cpu, evsel, avg);
728 print_stalled_cycles_backend(cpu, evsel, avg);
730 total =
avg_stats(&runtime_nsecs_stats[cpu]);
733 ratio = 1.0 * avg / total;
735 fprintf(output,
" # %8.3f GHz ", ratio);
736 }
else if (runtime_nsecs_stats[cpu].
n != 0) {
739 total =
avg_stats(&runtime_nsecs_stats[cpu]);
742 ratio = 1000.0 * avg / total;
748 fprintf(output,
" # %8.3f %c/sec ", ratio, unit);
758 static void print_counter_aggr(
struct perf_evsel *counter)
769 csv_output ? 0 : -24,
773 fprintf(output,
"%s%s", csv_sep, counter->
cgrp->name);
779 if (nsec_counter(counter))
780 nsec_printout(-1, counter, avg);
782 abs_printout(-1, counter, avg);
784 print_noise(counter, avg);
792 double avg_enabled, avg_running;
797 fprintf(output,
" [%5.2f%%]", 100 * avg_running / avg_enabled);
806 static void print_counter(
struct perf_evsel *counter)
811 for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
815 if (run == 0 || ena == 0) {
816 fprintf(output,
"CPU%*d%s%*s%s%*s",
818 perf_evsel__cpus(counter)->
map[cpu], csv_sep,
822 csv_output ? 0 : -24,
827 csv_sep, counter->
cgrp->name);
833 if (nsec_counter(counter))
834 nsec_printout(cpu, counter, val);
836 abs_printout(cpu, counter, val);
839 print_noise(counter, 1.0);
849 static void print_stat(
int argc,
const char **argv)
858 fprintf(output,
" Performance counter stats for ");
859 if (!perf_target__has_task(&target)) {
860 fprintf(output,
"\'%s", argv[0]);
861 for (i = 1; i <
argc; i++)
862 fprintf(output,
" %s", argv[i]);
863 }
else if (target.
pid)
864 fprintf(output,
"process id \'%s", target.
pid);
866 fprintf(output,
"thread id \'%s", target.
tid);
870 fprintf(output,
" (%d runs)", run_count);
876 print_counter(counter);
879 print_counter_aggr(counter);
885 fprintf(output,
" %17.9f seconds time elapsed",
896 static volatile int signr = -1;
898 static void skip_signal(
int signo)
906 static void sig_atexit(
void)
915 kill(getpid(), signr);
918 static int stat__set_big_num(
const struct option *
opt __maybe_unused,
919 const char *
s __maybe_unused,
int unset)
921 big_num_opt = unset ? 0 : 1;
929 static int add_default_attributes(
void)
1049 if (detailed_run < 1)
1056 if (detailed_run < 2)
1063 if (detailed_run < 3)
1072 bool append_file =
false,
1075 const char *output_name =
NULL;
1076 const struct option options[] = {
1078 "event selector. use 'perf list' to list available events",
1083 "child tasks do not inherit counters"),
1085 "stat events on existing process id"),
1087 "stat events on existing thread id"),
1089 "system-wide collection from all CPUs"),
1091 "put the counters into a counter group"),
1092 OPT_BOOLEAN(
'c',
"scale", &scale,
"scale/normalize counters"),
1094 "be more verbose (show counter open errors, etc)"),
1096 "repeat command and print average + stddev (max: 100)"),
1098 "null run - dont start any counters"),
1099 OPT_INCR(
'd',
"detailed", &detailed_run,
1100 "detailed run - start a lot of events"),
1102 "call sync() before starting a run"),
1104 "print large numbers with thousands\' separators",
1107 "list of cpus to monitor in system-wide"),
1108 OPT_BOOLEAN(
'A',
"no-aggr", &no_aggr,
"disable CPU count aggregation"),
1109 OPT_STRING(
'x',
"field-separator", &csv_sep,
"separator",
1110 "print counts with custom separator"),
1113 OPT_STRING(
'o',
"output", &output_name,
"file",
"output file name"),
1114 OPT_BOOLEAN(0,
"append", &append_file,
"append to the output file"),
1116 "log output to fd, instead of stderr"),
1119 const char *
const stat_usage[] = {
1120 "perf stat [<options>] [<command>]",
1124 int status = -
ENOMEM, run_idx;
1127 setlocale(LC_ALL,
"");
1130 if (evsel_list ==
NULL)
1137 if (output_name &&
strcmp(output_name,
"-"))
1140 if (output_name && output_fd) {
1141 fprintf(stderr,
"cannot use both --output and --log-fd\n");
1145 if (output_fd < 0) {
1146 fprintf(stderr,
"argument to --log-fd must be a > 0\n");
1152 mode = append_file ?
"a" :
"w";
1154 output = fopen(output_name, mode);
1156 perror(
"failed to create output file");
1161 }
else if (output_fd > 0) {
1162 mode = append_file ?
"a" :
"w";
1163 output = fdopen(output_fd, mode);
1165 perror(
"Failed opening logfd");
1172 if (!
strcmp(csv_sep,
"\\t"))
1182 if (big_num_opt == 1) {
1183 fprintf(stderr,
"-B option not supported with -x\n");
1187 }
else if (big_num_opt == 0)
1190 if (!argc && !perf_target__has_task(&target))
1196 if ((no_aggr ||
nr_cgroups) && !perf_target__has_cpu(&target)) {
1197 fprintf(stderr,
"both cgroup and no-aggregation "
1198 "modes only available in system-wide mode\n");
1203 if (add_default_attributes())
1209 if (perf_target__has_task(&target))
1210 pr_err(
"Problems finding threads of monitor\n");
1211 if (perf_target__has_cpu(&target))
1212 perror(
"failed to parse CPUs map");
1219 if (perf_evsel__alloc_stat_priv(pos) < 0 ||
1231 signal(
SIGINT, skip_signal);
1236 for (run_idx = 0; run_idx < run_count; run_idx++) {
1237 if (run_count != 1 &&
verbose)
1244 status = run_perf_stat(argc, argv);
1248 print_stat(argc, argv);
1251 perf_evsel__free_stat_priv(pos);