]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - arch/x86/kernel/cpu/perf_counter.c
perf_counter: Generic per counter interrupt throttle
[mv-sheeva.git] / arch / x86 / kernel / cpu / perf_counter.c
index 7601c014f8f6497f5036c51436c53c2c25aa13ea..c4b543d1a86fe4dbd60cb1c040250ad0eec5e8bd 100644 (file)
@@ -31,7 +31,6 @@ struct cpu_hw_counters {
        unsigned long           used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
        unsigned long           active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
        unsigned long           interrupts;
-       u64                     throttle_ctrl;
        int                     enabled;
 };
 
@@ -42,8 +41,8 @@ struct x86_pmu {
        const char      *name;
        int             version;
        int             (*handle_irq)(struct pt_regs *, int);
-       u64             (*save_disable_all)(void);
-       void            (*restore_all)(u64);
+       void            (*disable_all)(void);
+       void            (*enable_all)(void);
        void            (*enable)(struct hw_perf_counter *, int);
        void            (*disable)(struct hw_perf_counter *, int);
        unsigned        eventsel;
@@ -56,6 +55,7 @@ struct x86_pmu {
        int             counter_bits;
        u64             counter_mask;
        u64             max_period;
+       u64             intel_ctrl;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -87,11 +87,15 @@ static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK                0x000000FFULL
 #define CORE_EVNTSEL_UNIT_MASK         0x0000FF00ULL
+#define CORE_EVNTSEL_EDGE_MASK         0x00040000ULL
+#define CORE_EVNTSEL_INV_MASK          0x00800000ULL
 #define CORE_EVNTSEL_COUNTER_MASK      0xFF000000ULL
 
 #define CORE_EVNTSEL_MASK              \
        (CORE_EVNTSEL_EVENT_MASK |      \
         CORE_EVNTSEL_UNIT_MASK  |      \
+        CORE_EVNTSEL_EDGE_MASK  |      \
+        CORE_EVNTSEL_INV_MASK  |       \
         CORE_EVNTSEL_COUNTER_MASK)
 
        return event & CORE_EVNTSEL_MASK;
@@ -119,11 +123,15 @@ static u64 amd_pmu_raw_event(u64 event)
 {
 #define K7_EVNTSEL_EVENT_MASK  0x7000000FFULL
 #define K7_EVNTSEL_UNIT_MASK   0x00000FF00ULL
+#define K7_EVNTSEL_EDGE_MASK   0x000040000ULL
+#define K7_EVNTSEL_INV_MASK    0x000800000ULL
 #define K7_EVNTSEL_COUNTER_MASK        0x0FF000000ULL
 
 #define K7_EVNTSEL_MASK                        \
        (K7_EVNTSEL_EVENT_MASK |        \
         K7_EVNTSEL_UNIT_MASK  |        \
+        K7_EVNTSEL_EDGE_MASK  |        \
+        K7_EVNTSEL_INV_MASK   |        \
         K7_EVNTSEL_COUNTER_MASK)
 
        return event & K7_EVNTSEL_MASK;
@@ -285,12 +293,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
                        return -EACCES;
                hwc->nmi = 1;
        }
+       perf_counters_lapic_init(hwc->nmi);
 
-       hwc->irq_period = hw_event->irq_period;
-       if ((s64)hwc->irq_period <= 0 || hwc->irq_period > x86_pmu.max_period)
+       if (!hwc->irq_period)
                hwc->irq_period = x86_pmu.max_period;
 
-       atomic64_set(&hwc->period_left, hwc->irq_period);
+       atomic64_set(&hwc->period_left,
+                       min(x86_pmu.max_period, hwc->irq_period));
 
        /*
         * Raw event type provide the config in the event structure
@@ -311,22 +320,19 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
        return 0;
 }
 
-static u64 intel_pmu_save_disable_all(void)
+static void intel_pmu_disable_all(void)
 {
-       u64 ctrl;
-
-       rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
-
-       return ctrl;
 }
 
-static u64 amd_pmu_save_disable_all(void)
+static void amd_pmu_disable_all(void)
 {
        struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
-       int enabled, idx;
+       int idx;
+
+       if (!cpuc->enabled)
+               return;
 
-       enabled = cpuc->enabled;
        cpuc->enabled = 0;
        /*
         * ensure we write the disable before we start disabling the
@@ -334,8 +340,6 @@ static u64 amd_pmu_save_disable_all(void)
         * right thing.
         */
        barrier();
-       if (!enabled)
-               goto out;
 
        for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                u64 val;
@@ -348,37 +352,31 @@ static u64 amd_pmu_save_disable_all(void)
                val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
                wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
        }
-
-out:
-       return enabled;
 }
 
-u64 hw_perf_save_disable(void)
+void hw_perf_disable(void)
 {
        if (!x86_pmu_initialized())
-               return 0;
-       return x86_pmu.save_disable_all();
+               return;
+       return x86_pmu.disable_all();
 }
-/*
- * Exported because of ACPI idle
- */
-EXPORT_SYMBOL_GPL(hw_perf_save_disable);
 
-static void intel_pmu_restore_all(u64 ctrl)
+static void intel_pmu_enable_all(void)
 {
-       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
+       wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 }
 
-static void amd_pmu_restore_all(u64 ctrl)
+static void amd_pmu_enable_all(void)
 {
        struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
        int idx;
 
-       cpuc->enabled = ctrl;
-       barrier();
-       if (!ctrl)
+       if (cpuc->enabled)
                return;
 
+       cpuc->enabled = 1;
+       barrier();
+
        for (idx = 0; idx < x86_pmu.num_counters; idx++) {
                u64 val;
 
@@ -392,16 +390,12 @@ static void amd_pmu_restore_all(u64 ctrl)
        }
 }
 
-void hw_perf_restore(u64 ctrl)
+void hw_perf_enable(void)
 {
        if (!x86_pmu_initialized())
                return;
-       x86_pmu.restore_all(ctrl);
+       x86_pmu.enable_all();
 }
-/*
- * Exported because of ACPI idle
- */
-EXPORT_SYMBOL_GPL(hw_perf_restore);
 
 static inline u64 intel_pmu_get_status(void)
 {
@@ -473,7 +467,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
                             struct hw_perf_counter *hwc, int idx)
 {
        s64 left = atomic64_read(&hwc->period_left);
-       s64 period = hwc->irq_period;
+       s64 period = min(x86_pmu.max_period, hwc->irq_period);
        int err;
 
        /*
@@ -488,6 +482,11 @@ x86_perf_counter_set_period(struct perf_counter *counter,
                left += period;
                atomic64_set(&hwc->period_left, left);
        }
+       /*
+        * Quirk: certain CPUs dont like it if just 1 event is left:
+        */
+       if (unlikely(left < 2))
+               left = 2;
 
        per_cpu(prev_left[idx], smp_processor_id()) = left;
 
@@ -613,8 +612,6 @@ try_generic:
                hwc->counter_base = x86_pmu.perfctr;
        }
 
-       perf_counters_lapic_init(hwc->nmi);
-
        x86_pmu.disable(hwc, idx);
 
        cpuc->counters[idx] = counter;
@@ -626,6 +623,18 @@ try_generic:
        return 0;
 }
 
+static void x86_pmu_unthrottle(struct perf_counter *counter)
+{
+       struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
+       struct hw_perf_counter *hwc = &counter->hw;
+
+       if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX ||
+                               cpuc->counters[hwc->idx] != counter))
+               return;
+
+       x86_pmu.enable(hwc, hwc->idx);
+}
+
 void perf_counter_print_debug(void)
 {
        u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
@@ -721,30 +730,35 @@ static void intel_pmu_save_and_restart(struct perf_counter *counter)
                intel_pmu_enable_counter(hwc, idx);
 }
 
-/*
- * Maximum interrupt frequency of 100KHz per CPU
- */
-#define PERFMON_MAX_INTERRUPTS (100000/HZ)
-
 /*
  * This handler is triggered by the local APIC, so the APIC IRQ handling
  * rules apply:
  */
 static int intel_pmu_handle_irq(struct pt_regs *regs, int nmi)
 {
-       int bit, cpu = smp_processor_id();
+       struct cpu_hw_counters *cpuc;
+       struct cpu_hw_counters;
+       int bit, cpu, loops;
        u64 ack, status;
-       struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
-       int ret = 0;
 
-       cpuc->throttle_ctrl = intel_pmu_save_disable_all();
+       cpu = smp_processor_id();
+       cpuc = &per_cpu(cpu_hw_counters, cpu);
 
+       perf_disable();
        status = intel_pmu_get_status();
-       if (!status)
-               goto out;
+       if (!status) {
+               perf_enable();
+               return 0;
+       }
 
-       ret = 1;
+       loops = 0;
 again:
+       if (++loops > 100) {
+               WARN_ONCE(1, "perfcounters: irq loop stuck!\n");
+               perf_counter_print_debug();
+               return 1;
+       }
+
        inc_irq_stat(apic_perf_irqs);
        ack = status;
        for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
@@ -767,90 +781,48 @@ again:
        status = intel_pmu_get_status();
        if (status)
                goto again;
-out:
-       /*
-        * Restore - do not reenable when global enable is off or throttled:
-        */
-       if (cpuc->throttle_ctrl) {
-               if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS) {
-                       intel_pmu_restore_all(cpuc->throttle_ctrl);
-               } else {
-                       pr_info("CPU#%d: perfcounters: max interrupt rate exceeded! Throttle on.\n", smp_processor_id());
-               }
-       }
 
-       return ret;
+       perf_enable();
+
+       return 1;
 }
 
 static int amd_pmu_handle_irq(struct pt_regs *regs, int nmi)
 {
-       int cpu = smp_processor_id();
-       struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
-       u64 val;
-       int handled = 0;
+       int cpu, idx, handled = 0;
+       struct cpu_hw_counters *cpuc;
        struct perf_counter *counter;
        struct hw_perf_counter *hwc;
-       int idx, throttle = 0;
-
-       cpuc->throttle_ctrl = cpuc->enabled;
-       cpuc->enabled = 0;
-       barrier();
+       u64 val;
 
-       if (cpuc->throttle_ctrl) {
-               if (++cpuc->interrupts >= PERFMON_MAX_INTERRUPTS)
-                       throttle = 1;
-       }
+       cpu = smp_processor_id();
+       cpuc = &per_cpu(cpu_hw_counters, cpu);
 
        for (idx = 0; idx < x86_pmu.num_counters; idx++) {
-               int disable = 0;
-
                if (!test_bit(idx, cpuc->active_mask))
                        continue;
 
                counter = cpuc->counters[idx];
                hwc = &counter->hw;
+
+               if (counter->hw_event.nmi != nmi)
+                       continue;
+
                val = x86_perf_counter_update(counter, hwc, idx);
                if (val & (1ULL << (x86_pmu.counter_bits - 1)))
-                       goto next;
+                       continue;
 
                /* counter overflow */
                x86_perf_counter_set_period(counter, hwc, idx);
                handled = 1;
                inc_irq_stat(apic_perf_irqs);
-               disable = perf_counter_overflow(counter, nmi, regs, 0);
-
-next:
-               if (disable || throttle)
+               if (perf_counter_overflow(counter, nmi, regs, 0))
                        amd_pmu_disable_counter(hwc, idx);
        }
 
-       if (cpuc->throttle_ctrl && !throttle)
-               cpuc->enabled = 1;
-
        return handled;
 }
 
-void perf_counter_unthrottle(void)
-{
-       struct cpu_hw_counters *cpuc;
-
-       if (!x86_pmu_initialized())
-               return;
-
-       cpuc = &__get_cpu_var(cpu_hw_counters);
-       if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
-               pr_info("CPU#%d: perfcounters: throttle off.\n", smp_processor_id());
-
-               /*
-                * Clear them before re-enabling irqs/NMIs again:
-                */
-               cpuc->interrupts = 0;
-               hw_perf_restore(cpuc->throttle_ctrl);
-       } else {
-               cpuc->interrupts = 0;
-       }
-}
-
 void smp_perf_counter_interrupt(struct pt_regs *regs)
 {
        irq_enter();
@@ -900,7 +872,6 @@ perf_counter_nmi_handler(struct notifier_block *self,
 {
        struct die_args *args = __args;
        struct pt_regs *regs;
-       int ret;
 
        if (!atomic_read(&active_counters))
                return NOTIFY_DONE;
@@ -917,9 +888,16 @@ perf_counter_nmi_handler(struct notifier_block *self,
        regs = args->regs;
 
        apic_write(APIC_LVTPC, APIC_DM_NMI);
-       ret = x86_pmu.handle_irq(regs, 1);
+       /*
+        * Can't rely on the handled return value to say it was our NMI, two
+        * counters could trigger 'simultaneously' raising two back-to-back NMIs.
+        *
+        * If the first NMI handles both, the latter will be empty and daze
+        * the CPU.
+        */
+       x86_pmu.handle_irq(regs, 1);
 
-       return ret ? NOTIFY_STOP : NOTIFY_OK;
+       return NOTIFY_STOP;
 }
 
 static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
@@ -931,8 +909,8 @@ static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
 static struct x86_pmu intel_pmu = {
        .name                   = "Intel",
        .handle_irq             = intel_pmu_handle_irq,
-       .save_disable_all       = intel_pmu_save_disable_all,
-       .restore_all            = intel_pmu_restore_all,
+       .disable_all            = intel_pmu_disable_all,
+       .enable_all             = intel_pmu_enable_all,
        .enable                 = intel_pmu_enable_counter,
        .disable                = intel_pmu_disable_counter,
        .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
@@ -951,8 +929,8 @@ static struct x86_pmu intel_pmu = {
 static struct x86_pmu amd_pmu = {
        .name                   = "AMD",
        .handle_irq             = amd_pmu_handle_irq,
-       .save_disable_all       = amd_pmu_save_disable_all,
-       .restore_all            = amd_pmu_restore_all,
+       .disable_all            = amd_pmu_disable_all,
+       .enable_all             = amd_pmu_enable_all,
        .enable                 = amd_pmu_enable_counter,
        .disable                = amd_pmu_disable_counter,
        .eventsel               = MSR_K7_EVNTSEL0,
@@ -1003,6 +981,8 @@ static int intel_pmu_init(void)
        x86_pmu.counter_bits = eax.split.bit_width;
        x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1;
 
+       rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
+
        return 0;
 }
 
@@ -1057,7 +1037,7 @@ void __init init_hw_perf_counters(void)
 
        pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
 
-       perf_counters_lapic_init(0);
+       perf_counters_lapic_init(1);
        register_die_notifier(&perf_counter_nmi_notifier);
 }
 
@@ -1070,6 +1050,7 @@ static const struct pmu pmu = {
        .enable         = x86_pmu_enable,
        .disable        = x86_pmu_disable,
        .read           = x86_pmu_read,
+       .unthrottle     = x86_pmu_unthrottle,
 };
 
 const struct pmu *hw_perf_counter_init(struct perf_counter *counter)