15 #include <linux/perf_event.h>
16 #include <linux/capability.h>
20 #include <linux/module.h>
22 #include <linux/sched.h>
24 #include <linux/slab.h>
26 #include <linux/bitops.h>
27 #include <linux/device.h>
30 #include <asm/stacktrace.h>
34 #include <asm/timer.h>
64 u64 prev_raw_count, new_raw_count;
80 rdpmcl(hwc->event_base_rdpmc, new_raw_count);
83 new_raw_count) != prev_raw_count)
94 delta = (new_raw_count << shift) - (prev_raw_count << shift);
100 return new_raw_count;
111 reg = &
event->hw.extra_reg;
123 reg->
config =
event->attr.config1;
133 #ifdef CONFIG_X86_LOCAL_APIC
152 for (i--; i >= 0; i--)
158 for (i--; i >= 0; i--)
181 static bool check_hw_exists(
void)
191 reg = x86_pmu_config_addr(i);
192 ret = rdmsrl_safe(reg, &val);
201 ret = rdmsrl_safe(reg, &val);
205 if (val & (0x03 << i*4))
215 reg = x86_pmu_event_addr(0);
216 if (rdmsrl_safe(reg, &val))
219 ret = wrmsrl_safe(reg, val);
220 ret |= rdmsrl_safe(reg, &val_new);
221 if (ret || val != val_new)
230 printk(
KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
236 printk(
KERN_CONT "Broken PMU hardware detected, using software events only.\n");
237 printk(
KERN_ERR "Failed to access perfctr msr (MSR %x is %Lx)\n", reg, val_new);
242 static void hw_perf_event_destroy(
struct perf_event *event)
251 static inline int x86_pmu_initialized(
void)
265 cache_type = (config >> 0) & 0xff;
269 cache_op = (config >> 8) & 0xff;
273 cache_result = (config >> 16) & 0xff;
287 return x86_pmu_extra_regs(val, event);
296 if (!is_sampling_event(event)) {
298 hwc->last_period = hwc->sample_period;
299 local64_set(&hwc->period_left, hwc->sample_period);
312 return x86_pmu_extra_regs(event->
attr.config, event);
315 return set_ext_hw_attr(hwc, event);
335 !attr->
freq && hwc->sample_period == 1) {
359 static inline int precise_br_compat(
struct perf_event *event)
361 u64 m =
event->attr.branch_sample_type;
370 if (!event->
attr.exclude_user)
373 if (!event->
attr.exclude_kernel)
385 if (event->
attr.precise_ip) {
388 if (!event->
attr.exclude_guest)
400 if (event->
attr.precise_ip > precise)
406 if (event->
attr.precise_ip > 1) {
407 u64 *br_type = &
event->attr.branch_sample_type;
409 if (has_branch_stack(event)) {
410 if (!precise_br_compat(event))
425 if (!event->
attr.exclude_user)
428 if (!event->
attr.exclude_kernel)
443 if (!event->
attr.exclude_user)
445 if (!event->
attr.exclude_kernel)
457 static int __x86_pmu_event_init(
struct perf_event *event)
461 if (!x86_pmu_initialized())
480 event->destroy = hw_perf_event_destroy;
483 event->hw.last_cpu = -1;
484 event->hw.last_tag = ~0ULL;
503 rdmsrl(x86_pmu_config_addr(idx), val);
506 val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
507 wrmsrl(x86_pmu_config_addr(idx), val);
511 static void x86_pmu_disable(
struct pmu *
pmu)
515 if (!x86_pmu_initialized())
543 static struct pmu pmu;
545 static inline int is_x86_event(
struct perf_event *event)
547 return event->pmu == &pmu;
566 #define SCHED_STATES_MAX 2
581 int num,
int wmin,
int wmax)
585 memset(sched, 0,
sizeof(*sched));
590 for (idx = 0; idx < num; idx++) {
591 if (c[idx]->
weight == wmin)
596 sched->
state.weight = wmin;
597 sched->
state.unassigned = num;
600 static void perf_sched_save_state(
struct perf_sched *sched)
609 static bool perf_sched_restore_state(
struct perf_sched *sched)
627 static bool __perf_sched_find_counter(
struct perf_sched *sched)
632 if (!sched->
state.unassigned)
649 idx = sched->
state.counter;
661 perf_sched_save_state(sched);
666 static bool perf_sched_find_counter(
struct perf_sched *sched)
668 while (!__perf_sched_find_counter(sched)) {
669 if (!perf_sched_restore_state(sched))
680 static bool perf_sched_next_event(
struct perf_sched *sched)
684 if (!sched->
state.unassigned || !--sched->
state.unassigned)
689 sched->
state.event++;
692 sched->
state.event = 0;
693 sched->
state.weight++;
700 sched->
state.counter = 0;
709 int wmin,
int wmax,
int *assign)
713 perf_sched_init(&sched, constraints, n, wmin, wmax);
716 if (!perf_sched_find_counter(&sched))
719 assign[sched.
state.event] = sched.
state.counter;
720 }
while (perf_sched_next_event(&sched));
722 return sched.
state.unassigned;
729 int i, wmin, wmax, num = 0;
744 for (i = 0; i <
n; i++) {
762 assign[
i] = hwc->idx;
773 if (!assign || num) {
774 for (i = 0; i <
n; i++) {
796 if (is_x86_event(leader)) {
806 if (!is_x86_event(event) ||
819 static inline void x86_assign_hw_event(
struct perf_event *event,
826 hwc->last_tag = ++cpuc->
tags[
i];
829 hwc->config_base = 0;
836 hwc->config_base = x86_pmu_config_addr(hwc->idx);
837 hwc->event_base = x86_pmu_event_addr(hwc->idx);
838 hwc->event_base_rdpmc = hwc->idx;
842 static inline int match_prev_assignment(
struct hw_perf_event *hwc,
846 return hwc->idx == cpuc->
assign[
i] &&
848 hwc->last_tag == cpuc->
tags[
i];
853 static void x86_pmu_enable(
struct pmu *pmu)
860 if (!x86_pmu_initialized())
875 for (i = 0; i < n_running; i++) {
885 if (hwc->idx == -1 ||
886 match_prev_assignment(hwc, cpuc, i))
899 for (i = 0; i < cpuc->
n_events; i++) {
903 if (!match_prev_assignment(hwc, cpuc, i))
904 x86_assign_hw_event(event, cpuc, i);
905 else if (i < n_running)
934 int ret = 0, idx = hwc->idx;
945 hwc->last_period =
period;
952 hwc->last_period =
period;
980 wrmsrl(hwc->event_base,
992 __x86_pmu_enable_event(&event->hw,
1013 ret = n = collect_events(cpuc, event,
false);
1041 cpuc->
n_txn += n - n0;
1049 static void x86_pmu_start(
struct perf_event *event,
int flags)
1052 int idx =
event->hw.idx;
1065 event->hw.state = 0;
1079 unsigned long flags;
1098 pr_info(
"CPU#%d: ctrl: %016llx\n", cpu, ctrl);
1099 pr_info(
"CPU#%d: status: %016llx\n", cpu, status);
1100 pr_info(
"CPU#%d: overflow: %016llx\n", cpu, overflow);
1101 pr_info(
"CPU#%d: fixed: %016llx\n", cpu, fixed);
1102 pr_info(
"CPU#%d: pebs: %016llx\n", cpu, pebs);
1107 rdmsrl(x86_pmu_config_addr(idx), pmc_ctrl);
1108 rdmsrl(x86_pmu_event_addr(idx), pmc_count);
1110 prev_left =
per_cpu(pmc_prev_left[idx], cpu);
1112 pr_info(
"CPU#%d: gen-PMC%d ctrl: %016llx\n",
1113 cpu, idx, pmc_ctrl);
1114 pr_info(
"CPU#%d: gen-PMC%d count: %016llx\n",
1115 cpu, idx, pmc_count);
1116 pr_info(
"CPU#%d: gen-PMC%d left: %016llx\n",
1117 cpu, idx, prev_left);
1122 pr_info(
"CPU#%d: fixed-PMC%d count: %016llx\n",
1123 cpu, idx, pmc_count);
1150 static void x86_pmu_del(
struct perf_event *event,
int flags)
1165 for (i = 0; i < cpuc->
n_events; i++) {
1183 struct perf_sample_data data;
1186 int idx, handled = 0;
1223 perf_sample_data_init(&data, 0, event->hw.last_period);
1264 unsigned int cpu = (
long)hcpu;
1266 int ret = NOTIFY_OK;
1304 static void __init pmu_check_apic(
void)
1310 pr_info(
"no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1311 pr_info(
"no hardware sampling interrupt available.\n");
1324 pr_info(
"Performance Events: ");
1337 pr_cont(
"no PMU driver, software events only.\n");
1344 if (!check_hw_exists())
1380 static inline void x86_pmu_read(
struct perf_event *event)
1390 static void x86_pmu_start_txn(
struct pmu *pmu)
1402 static void x86_pmu_cancel_txn(
struct pmu *pmu)
1418 static int x86_pmu_commit_txn(
struct pmu *pmu)
1426 if (!x86_pmu_initialized())
1475 free_fake_cpuc(cpuc);
1482 static int validate_event(
struct perf_event *event)
1488 fake_cpuc = allocate_fake_cpuc();
1489 if (IS_ERR(fake_cpuc))
1490 return PTR_ERR(fake_cpuc);
1500 free_fake_cpuc(fake_cpuc);
1516 static int validate_group(
struct perf_event *event)
1518 struct perf_event *leader =
event->group_leader;
1522 fake_cpuc = allocate_fake_cpuc();
1523 if (IS_ERR(fake_cpuc))
1524 return PTR_ERR(fake_cpuc);
1531 n = collect_events(fake_cpuc, leader,
true);
1536 n = collect_events(fake_cpuc, event,
false);
1545 free_fake_cpuc(fake_cpuc);
1549 static int x86_pmu_event_init(
struct perf_event *event)
1554 switch (event->
attr.type) {
1564 err = __x86_pmu_event_init(event);
1574 if (event->group_leader != event)
1575 err = validate_group(event);
1577 err = validate_event(event);
1583 event->destroy(event);
1589 static int x86_pmu_event_idx(
struct perf_event *event)
1591 int idx =
event->hw.idx;
1611 static void change_rdpmc(
void *
info)
1628 ret = kstrtoul(buf, 0, &val);
1642 static struct attribute *x86_pmu_attrs[] = {
1643 &dev_attr_rdpmc.attr,
1648 .attrs = x86_pmu_attrs,
1652 &x86_pmu_attr_group,
1653 &x86_pmu_format_group,
1657 static void x86_pmu_flush_branch_stack(
void)
1670 static struct pmu pmu = {
1672 .pmu_disable = x86_pmu_disable,
1674 .attr_groups = x86_pmu_attr_groups,
1676 .event_init = x86_pmu_event_init,
1680 .start = x86_pmu_start,
1682 .read = x86_pmu_read,
1684 .start_txn = x86_pmu_start_txn,
1685 .cancel_txn = x86_pmu_cancel_txn,
1686 .commit_txn = x86_pmu_commit_txn,
1688 .event_idx = x86_pmu_event_idx,
1689 .flush_branch_stack = x86_pmu_flush_branch_stack,
1714 static int backtrace_stack(
void *
data,
char *
name)
1719 static void backtrace_address(
void *
data,
unsigned long addr,
int reliable)
1723 perf_callchain_store(entry, addr);
1727 .stack = backtrace_stack,
1728 .address = backtrace_address,
1740 perf_callchain_store(entry, regs->ip);
1746 valid_user_frame(
const void __user *
fp,
unsigned long size)
1751 static unsigned long get_segment_base(
unsigned int segment)
1754 int idx = segment >> 3;
1760 if (idx >
current->active_mm->context.size)
1763 desc =
current->active_mm->context.ldt;
1771 return get_desc_base(desc + idx);
1774 #ifdef CONFIG_COMPAT
1776 #include <asm/compat.h>
1782 unsigned long ss_base, cs_base;
1789 cs_base = get_segment_base(regs->
cs);
1790 ss_base = get_segment_base(regs->
ss);
1792 fp = compat_ptr(ss_base + regs->bp);
1794 unsigned long bytes;
1795 frame.next_frame = 0;
1796 frame.return_address = 0;
1799 if (bytes !=
sizeof(
frame))
1802 if (!valid_user_frame(fp,
sizeof(
frame)))
1805 perf_callchain_store(entry, cs_base +
frame.return_address);
1806 fp = compat_ptr(ss_base +
frame.next_frame);
1832 if (regs->
flags & (X86_VM_MASK | PERF_EFLAGS_VM))
1835 fp = (
void __user *)regs->bp;
1837 perf_callchain_store(entry, regs->ip);
1842 if (perf_callchain_user32(regs, entry))
1846 unsigned long bytes;
1851 if (bytes !=
sizeof(frame))
1854 if (!valid_user_frame(fp,
sizeof(frame)))
1875 static unsigned long code_segment_base(
struct pt_regs *regs)
1881 if (regs->
flags & X86_VM_MASK)
1882 return 0x10 * regs->
cs;
1888 #ifdef CONFIG_X86_32
1890 return get_segment_base(regs->
cs);
1894 return get_segment_base(regs->
cs);
1905 return regs->ip + code_segment_base(regs);
1924 if (regs->
flags & PERF_EFLAGS_EXACT)