]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - arch/x86/kernel/cpu/perf_event_intel_ds.c
perf: Rework the PMU methods
[mv-sheeva.git] / arch / x86 / kernel / cpu / perf_event_intel_ds.c
index 50e6ff3281fc885a13cb5cb0dee9476c90c8df58..9893a2f77b7ad56ff9736fe7dad1bfa5c98116df 100644 (file)
@@ -37,15 +37,6 @@ struct pebs_record_nhm {
        u64 status, dla, dse, lat;
 };
 
-/*
- * Bits in the debugctlmsr controlling branch tracing.
- */
-#define X86_DEBUGCTL_TR                        (1 << 6)
-#define X86_DEBUGCTL_BTS               (1 << 7)
-#define X86_DEBUGCTL_BTINT             (1 << 8)
-#define X86_DEBUGCTL_BTS_OFF_OS                (1 << 9)
-#define X86_DEBUGCTL_BTS_OFF_USR       (1 << 10)
-
 /*
  * A debug store configuration.
  *
@@ -127,10 +118,8 @@ static int reserve_ds_buffers(void)
 
                err = -ENOMEM;
                ds = kzalloc(sizeof(*ds), GFP_KERNEL);
-               if (unlikely(!ds)) {
-                       kfree(buffer);
+               if (unlikely(!ds))
                        break;
-               }
                per_cpu(cpu_hw_events, cpu).ds = ds;
 
                if (x86_pmu.bts) {
@@ -195,15 +184,15 @@ static void intel_pmu_enable_bts(u64 config)
 
        debugctlmsr = get_debugctlmsr();
 
-       debugctlmsr |= X86_DEBUGCTL_TR;
-       debugctlmsr |= X86_DEBUGCTL_BTS;
-       debugctlmsr |= X86_DEBUGCTL_BTINT;
+       debugctlmsr |= DEBUGCTLMSR_TR;
+       debugctlmsr |= DEBUGCTLMSR_BTS;
+       debugctlmsr |= DEBUGCTLMSR_BTINT;
 
        if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-               debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS;
 
        if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-               debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+               debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR;
 
        update_debugctlmsr(debugctlmsr);
 }
@@ -219,8 +208,8 @@ static void intel_pmu_disable_bts(void)
        debugctlmsr = get_debugctlmsr();
 
        debugctlmsr &=
-               ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
-                 X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+               ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT |
+                 DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR);
 
        update_debugctlmsr(debugctlmsr);
 }
@@ -318,7 +307,7 @@ intel_pebs_constraints(struct perf_event *event)
 {
        struct event_constraint *c;
 
-       if (!event->attr.precise)
+       if (!event->attr.precise_ip)
                return NULL;
 
        if (x86_pmu.pebs_constraints) {
@@ -335,28 +324,29 @@ static void intel_pmu_pebs_enable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
-       u64 val = cpuc->pebs_enabled;
 
        hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
-       val |= 1ULL << hwc->idx;
-       wrmsrl(MSR_IA32_PEBS_ENABLE, val);
+       cpuc->pebs_enabled |= 1ULL << hwc->idx;
+       WARN_ON_ONCE(cpuc->enabled);
 
-       intel_pmu_lbr_enable(event);
+       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+               intel_pmu_lbr_enable(event);
 }
 
 static void intel_pmu_pebs_disable(struct perf_event *event)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct hw_perf_event *hwc = &event->hw;
-       u64 val = cpuc->pebs_enabled;
 
-       val &= ~(1ULL << hwc->idx);
-       wrmsrl(MSR_IA32_PEBS_ENABLE, val);
+       cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
+       if (cpuc->enabled)
+               wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
        hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
 
-       intel_pmu_lbr_disable(event);
+       if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
+               intel_pmu_lbr_disable(event);
 }
 
 static void intel_pmu_pebs_enable_all(void)
@@ -377,8 +367,6 @@ static void intel_pmu_pebs_disable_all(void)
 
 #include <asm/insn.h>
 
-#define MAX_INSN_SIZE  16
-
 static inline bool kernel_ip(unsigned long ip)
 {
 #ifdef CONFIG_X86_32
@@ -395,10 +383,29 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
        unsigned long old_to, to = cpuc->lbr_entries[0].to;
        unsigned long ip = regs->ip;
 
+       /*
+        * We don't need to fixup if the PEBS assist is fault like
+        */
+       if (!x86_pmu.intel_cap.pebs_trap)
+               return 1;
+
+       /*
+        * No LBR entry, no basic block, no rewinding
+        */
        if (!cpuc->lbr_stack.nr || !from || !to)
                return 0;
 
-       if (ip < to)
+       /*
+        * Basic blocks should never cross user/kernel boundaries
+        */
+       if (kernel_ip(ip) != kernel_ip(to))
+               return 0;
+
+       /*
+        * unsigned math, either ip is before the start (impossible) or
+        * the basic block is larger than 1 page (sanity)
+        */
+       if ((ip - to) > PAGE_SIZE)
                return 0;
 
        /*
@@ -416,7 +423,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
 
                old_to = to;
                if (!kernel_ip(ip)) {
-                       int bytes, size = min_t(int, MAX_INSN_SIZE, ip - to);
+                       int bytes, size = MAX_INSN_SIZE;
 
                        bytes = copy_from_user_nmi(buf, (void __user *)to, size);
                        if (bytes != size)
@@ -436,49 +443,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
                return 1;
        }
 
+       /*
+        * Even though we decoded the basic block, the instruction stream
+        * never matched the given IP, either the TO or the IP got corrupted.
+        */
        return 0;
 }
 
 static int intel_pmu_save_and_restart(struct perf_event *event);
-static void intel_pmu_disable_event(struct perf_event *event);
 
-static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+static void __intel_pmu_pebs_event(struct perf_event *event,
+                                  struct pt_regs *iregs, void *__pebs)
 {
-       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-       struct debug_store *ds = cpuc->ds;
-       struct perf_event *event = cpuc->events[0]; /* PMC0 only */
-       struct pebs_record_core *at, *top;
+       /*
+        * We cast to pebs_record_core since that is a subset of
+        * both formats and we don't use the other fields in this
+        * routine.
+        */
+       struct pebs_record_core *pebs = __pebs;
        struct perf_sample_data data;
        struct pt_regs regs;
-       int n;
-
-       if (!event || !ds || !x86_pmu.pebs)
-               return;
-
-       intel_pmu_pebs_disable_all();
-
-       at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
-       top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
-
-       if (top <= at)
-               goto out;
-
-       ds->pebs_index = ds->pebs_buffer_base;
 
        if (!intel_pmu_save_and_restart(event))
-               goto out;
+               return;
 
        perf_sample_data_init(&data, 0);
        data.period = event->hw.last_period;
 
-       n = top - at;
-
-       /*
-        * Should not happen, we program the threshold at 1 and do not
-        * set a reset value.
-        */
-       WARN_ON_ONCE(n > 1);
-
        /*
         * We use the interrupt regs as a base because the PEBS record
         * does not contain a full regs set, specifically it seems to
@@ -490,20 +481,58 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
         * A possible PERF_SAMPLE_REGS will have to transfer all regs.
         */
        regs = *iregs;
-       regs.ip = at->ip;
-       regs.bp = at->bp;
-       regs.sp = at->sp;
+       regs.ip = pebs->ip;
+       regs.bp = pebs->bp;
+       regs.sp = pebs->sp;
 
-       if (intel_pmu_pebs_fixup_ip(&regs))
+       if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
                regs.flags |= PERF_EFLAGS_EXACT;
        else
                regs.flags &= ~PERF_EFLAGS_EXACT;
 
        if (perf_event_overflow(event, 1, &data, &regs))
-               intel_pmu_disable_event(event);
+               x86_pmu_stop(event, 0);
+}
+
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+{
+       struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+       struct debug_store *ds = cpuc->ds;
+       struct perf_event *event = cpuc->events[0]; /* PMC0 only */
+       struct pebs_record_core *at, *top;
+       int n;
+
+       if (!ds || !x86_pmu.pebs)
+               return;
+
+       at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
+       top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
+
+       /*
+        * Whatever else happens, drain the thing
+        */
+       ds->pebs_index = ds->pebs_buffer_base;
+
+       if (!test_bit(0, cpuc->active_mask))
+               return;
+
+       WARN_ON_ONCE(!event);
+
+       if (!event->attr.precise_ip)
+               return;
 
-out:
-       intel_pmu_pebs_enable_all();
+       n = top - at;
+       if (n <= 0)
+               return;
+
+       /*
+        * Should not happen, we program the threshold at 1 and do not
+        * set a reset value.
+        */
+       WARN_ON_ONCE(n > 1);
+       at += n - 1;
+
+       __intel_pmu_pebs_event(event, iregs, at);
 }
 
 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
@@ -511,25 +540,21 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        struct debug_store *ds = cpuc->ds;
        struct pebs_record_nhm *at, *top;
-       struct perf_sample_data data;
        struct perf_event *event = NULL;
-       struct pt_regs regs;
+       u64 status = 0;
        int bit, n;
 
        if (!ds || !x86_pmu.pebs)
                return;
 
-       intel_pmu_pebs_disable_all();
-
        at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
        top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
 
-       if (top <= at)
-               goto out;
-
        ds->pebs_index = ds->pebs_buffer_base;
 
        n = top - at;
+       if (n <= 0)
+               return;
 
        /*
         * Should not happen, we program the threshold at 1 and do not
@@ -538,40 +563,27 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
        WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
 
        for ( ; at < top; at++) {
-               for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
-                       if (!cpuc->events[bit]->attr.precise)
+               for_each_set_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+                       event = cpuc->events[bit];
+                       if (!test_bit(bit, cpuc->active_mask))
                                continue;
 
-                       event = cpuc->events[bit];
-               }
+                       WARN_ON_ONCE(!event);
 
-               if (!event)
-                       continue;
-
-               if (!intel_pmu_save_and_restart(event))
-                       continue;
+                       if (!event->attr.precise_ip)
+                               continue;
 
-               perf_sample_data_init(&data, 0);
-               data.period = event->hw.last_period;
+                       if (__test_and_set_bit(bit, (unsigned long *)&status))
+                               continue;
 
-               /*
-                * See the comment in intel_pmu_drain_pebs_core()
-                */
-               regs = *iregs;
-               regs.ip = at->ip;
-               regs.bp = at->bp;
-               regs.sp = at->sp;
+                       break;
+               }
 
-               if (intel_pmu_pebs_fixup_ip(&regs))
-                       regs.flags |= PERF_EFLAGS_EXACT;
-               else
-                       regs.flags &= ~PERF_EFLAGS_EXACT;
+               if (!event || bit >= MAX_PEBS_EVENTS)
+                       continue;
 
-               if (perf_event_overflow(event, 1, &data, &regs))
-                       intel_pmu_disable_event(event);
+               __intel_pmu_pebs_event(event, iregs, at);
        }
-out:
-       intel_pmu_pebs_enable_all();
 }
 
 /*
@@ -589,34 +601,26 @@ static void intel_ds_init(void)
        x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
        x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
        if (x86_pmu.pebs) {
-               int format = 0;
-
-               if (x86_pmu.version > 1) {
-                       u64 capabilities;
-                       /*
-                        * v2+ has a PEBS format field
-                        */
-                       rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
-                       format = (capabilities >> 8) & 0xf;
-               }
+               char pebs_type = x86_pmu.intel_cap.pebs_trap ?  '+' : '-';
+               int format = x86_pmu.intel_cap.pebs_format;
 
                switch (format) {
                case 0:
-                       printk(KERN_CONT "PEBS v0, ");
+                       printk(KERN_CONT "PEBS fmt0%c, ", pebs_type);
                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
                        x86_pmu.pebs_constraints = intel_core_pebs_events;
                        break;
 
                case 1:
-                       printk(KERN_CONT "PEBS v1, ");
+                       printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
                        x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
                        x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
                        x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
                        break;
 
                default:
-                       printk(KERN_CONT "PEBS unknown format: %d, ", format);
+                       printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type);
                        x86_pmu.pebs = 0;
                        break;
                }
@@ -625,7 +629,7 @@ static void intel_ds_init(void)
 
 #else /* CONFIG_CPU_SUP_INTEL */
 
-static int reseve_ds_buffers(void)
+static int reserve_ds_buffers(void)
 {
        return 0;
 }