19 #include <linux/rbtree.h>
24 static int alloc_flag;
25 static int caller_flag;
27 static int alloc_lines = -1;
28 static int caller_lines = -1;
32 static int *cpunode_map;
33 static int max_cpu_num;
48 static struct rb_root root_alloc_stat;
49 static struct rb_root root_alloc_sorted;
50 static struct rb_root root_caller_stat;
51 static struct rb_root root_caller_sorted;
53 static unsigned long total_requested, total_allocated;
54 static unsigned long nr_allocs, nr_cross_allocs;
56 #define PATH_SYS_NODE "/sys/devices/system/node"
58 static int init_cpunode_map(
void)
63 fp = fopen(
"/sys/devices/system/cpu/kernel_max",
"r");
69 if (fscanf(fp,
"%d", &max_cpu_num) < 1) {
70 pr_err(
"Failed to read 'kernel_max' from sysfs");
76 cpunode_map = calloc(max_cpu_num,
sizeof(
int));
78 pr_err(
"%s: calloc failed\n", __func__);
82 for (i = 0; i < max_cpu_num; i++)
91 static int setup_cpunode_map(
void)
93 struct dirent *dent1, *dent2;
98 if (init_cpunode_map())
105 while ((dent1 = readdir(dir1)) !=
NULL) {
106 if (dent1->d_type !=
DT_DIR ||
107 sscanf(dent1->d_name,
"node%u", &mem) < 1)
114 while ((dent2 = readdir(dir2)) !=
NULL) {
115 if (dent2->d_type !=
DT_LNK ||
116 sscanf(dent2->d_name,
"cpu%u", &cpu) < 1)
126 static int insert_alloc_stat(
unsigned long call_site,
unsigned long ptr,
127 int bytes_req,
int bytes_alloc,
int cpu)
139 else if (ptr < data->ptr)
145 if (data && data->
ptr == ptr) {
150 data =
malloc(
sizeof(*data));
152 pr_err(
"%s: malloc failed\n", __func__);
161 rb_link_node(&data->
node, parent, node);
169 static int insert_caller_stat(
unsigned long call_site,
170 int bytes_req,
int bytes_alloc)
172 struct rb_node **node = &root_caller_stat.rb_node;
182 else if (call_site < data->call_site)
188 if (data && data->
call_site == call_site) {
193 data =
malloc(
sizeof(*data));
195 pr_err(
"%s: malloc failed\n", __func__);
204 rb_link_node(&data->
node, parent, node);
211 static int perf_evsel__process_alloc_event(
struct perf_evsel *evsel,
219 if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->
cpu) ||
220 insert_caller_stat(call_site, bytes_req, bytes_alloc))
230 static int perf_evsel__process_alloc_node_event(
struct perf_evsel *evsel,
233 int ret = perf_evsel__process_alloc_event(evsel, sample);
236 int node1 = cpunode_map[sample->
cpu],
249 static struct alloc_stat *search_alloc_stat(
unsigned long ptr,
250 unsigned long call_site,
263 cmp = sort_fn(&key, data);
274 static int perf_evsel__process_free_event(
struct perf_evsel *evsel,
280 s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
287 s_caller = search_alloc_stat(0, s_alloc->
call_site,
288 &root_caller_stat, callsite_cmp);
309 if (thread ==
NULL) {
310 pr_debug(
"problem processing %d event, skipping it.\n",
319 return f(evsel, sample);
326 .sample = process_sample_event,
328 .ordered_samples =
true,
331 static double fragmentation(
unsigned long n_req,
unsigned long n_alloc)
336 return 100.0 - (100.0 * n_req / n_alloc);
340 int n_lines,
int is_caller)
346 printf(
" %-34s |", is_caller ?
"Callsite":
"Alloc Ptr");
347 printf(
" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
352 machine = perf_session__find_host_machine(session);
354 pr_err(
"__print_result: couldn't find kernel information\n");
357 while (next && n_lines--) {
368 sym = machine__find_kernel_function(machine, addr, &map,
NULL);
379 printf(
" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n",
384 (
unsigned long)data->
hit,
392 printf(
" ... | ... | ... | ... | ... | ... \n");
397 static void print_summary(
void)
399 printf(
"\nSUMMARY\n=======\n");
400 printf(
"Total bytes requested: %lu\n", total_requested);
401 printf(
"Total bytes allocated: %lu\n", total_allocated);
402 printf(
"Total bytes wasted on internal fragmentation: %lu\n",
403 total_allocated - total_requested);
404 printf(
"Internal fragmentation: %f%%\n",
405 fragmentation(total_requested, total_allocated));
406 printf(
"Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs);
412 __print_result(&root_caller_sorted, session, caller_lines, 1);
414 __print_result(&root_alloc_sorted, session, alloc_lines, 0);
442 cmp = sort->
cmp(data,
this);
448 new = &((*new)->rb_left);
450 new = &((*new)->rb_right);
453 rb_link_node(&data->
node, parent,
new);
457 static void __sort_result(
struct rb_root *root,
struct rb_root *root_sorted,
470 sort_insert(root_sorted, data, sort_list);
474 static void sort_result(
void)
476 __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort);
477 __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
480 static int __cmd_kmem(
const char *input_name)
485 {
"kmem:kmalloc", perf_evsel__process_alloc_event, },
486 {
"kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
487 {
"kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
488 {
"kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
489 {
"kmem:kfree", perf_evsel__process_free_event, },
490 {
"kmem:kmem_cache_free", perf_evsel__process_free_event, },
504 pr_err(
"Initializing perf session tracepoint handlers failed\n");
513 print_result(session);
610 &callsite_sort_dimension,
612 &bytes_sort_dimension,
613 &frag_sort_dimension,
614 &pingpong_sort_dimension,
617 #define NUM_AVAIL_SORTS \
618 (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
620 static int sort_dimension__add(
const char *tok,
struct list_head *
list)
627 sort =
malloc(
sizeof(*sort));
629 pr_err(
"%s: malloc failed\n", __func__);
632 memcpy(sort, avail_sorts[i],
sizeof(*sort));
641 static int setup_sorting(
struct list_head *sort_list,
const char *
arg)
644 char *
str = strdup(arg);
647 pr_err(
"%s: strdup failed\n", __func__);
655 if (sort_dimension__add(tok, sort_list) < 0) {
656 error(
"Unknown --sort key: '%s'", tok);
667 const char *arg,
int unset __maybe_unused)
672 if (caller_flag > alloc_flag)
673 return setup_sorting(&caller_sort, arg);
675 return setup_sorting(&alloc_sort, arg);
680 static int parse_caller_opt(
const struct option *
opt __maybe_unused,
681 const char *arg __maybe_unused,
682 int unset __maybe_unused)
684 caller_flag = (alloc_flag + 1);
688 static int parse_alloc_opt(
const struct option *
opt __maybe_unused,
689 const char *arg __maybe_unused,
690 int unset __maybe_unused)
692 alloc_flag = (caller_flag + 1);
696 static int parse_line_opt(
const struct option *
opt __maybe_unused,
697 const char *arg,
int unset __maybe_unused)
704 lines = strtoul(arg,
NULL, 10);
706 if (caller_flag > alloc_flag)
707 caller_lines = lines;
714 static int __cmd_record(
int argc,
const char **argv)
716 const char *
const record_args[] = {
717 "record",
"-a",
"-R",
"-f",
"-c",
"1",
718 "-e",
"kmem:kmalloc",
719 "-e",
"kmem:kmalloc_node",
721 "-e",
"kmem:kmem_cache_alloc",
722 "-e",
"kmem:kmem_cache_alloc_node",
723 "-e",
"kmem:kmem_cache_free",
725 unsigned int rec_argc,
i,
j;
726 const char **rec_argv;
728 rec_argc =
ARRAY_SIZE(record_args) + argc - 1;
729 rec_argv = calloc(rec_argc + 1,
sizeof(
char *));
731 if (rec_argv ==
NULL)
735 rec_argv[i] = strdup(record_args[i]);
737 for (j = 1; j < (
unsigned int)argc; j++, i++)
738 rec_argv[i] = argv[j];
746 const char *input_name =
NULL;
747 const struct option kmem_options[] = {
748 OPT_STRING(
'i',
"input", &input_name,
"file",
"input file name"),
750 "show per-callsite statistics", parse_caller_opt),
752 "show per-allocation statistics", parse_alloc_opt),
754 "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
757 OPT_BOOLEAN(0,
"raw-ip", &raw_ip,
"show raw ip instead of symbol"),
760 const char *
const kmem_usage[] = {
761 "perf kmem [<options>] {record|stat}",
764 argc =
parse_options(argc, argv, kmem_options, kmem_usage, 0);
771 if (!
strncmp(argv[0],
"rec", 3)) {
772 return __cmd_record(argc, argv);
773 }
else if (!
strcmp(argv[0],
"stat")) {
774 if (setup_cpunode_map())
777 if (list_empty(&caller_sort))
778 setup_sorting(&caller_sort, default_sort_order);
779 if (list_empty(&alloc_sort))
780 setup_sorting(&alloc_sort, default_sort_order);
782 return __cmd_kmem(input_name);