]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - arch/x86/kernel/cpu/perf_event.c
perf: Generalize callchain_store()
[mv-sheeva.git] / arch / x86 / kernel / cpu / perf_event.c
index c77586061bcbc7642d2f7662f487f4432d2e3ff0..8af28caeafc1344063e3fa9b15c7b2be9fb82a97 100644 (file)
@@ -106,6 +106,7 @@ struct cpu_hw_events {
 
        int                     n_events;
        int                     n_added;
+       int                     n_txn;
        int                     assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
        u64                     tags[X86_PMC_IDX_MAX];
        struct perf_event       *event_list[X86_PMC_IDX_MAX]; /* in enabled order */
@@ -219,6 +220,7 @@ struct x86_pmu {
                                                 struct perf_event *event);
        struct event_constraint *event_constraints;
        void            (*quirks)(void);
+       int             perfctr_second_write;
 
        int             (*cpu_prepare)(int cpu);
        void            (*cpu_starting)(int cpu);
@@ -294,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
         * count to the generic event atomically:
         */
 again:
-       prev_raw_count = atomic64_read(&hwc->prev_count);
+       prev_raw_count = local64_read(&hwc->prev_count);
        rdmsrl(hwc->event_base + idx, new_raw_count);
 
-       if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+       if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
                                        new_raw_count) != prev_raw_count)
                goto again;
 
@@ -312,8 +314,8 @@ again:
        delta = (new_raw_count << shift) - (prev_raw_count << shift);
        delta >>= shift;
 
-       atomic64_add(delta, &event->count);
-       atomic64_sub(delta, &hwc->period_left);
+       local64_add(delta, &event->count);
+       local64_sub(delta, &hwc->period_left);
 
        return new_raw_count;
 }
@@ -437,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
        if (!hwc->sample_period) {
                hwc->sample_period = x86_pmu.max_period;
                hwc->last_period = hwc->sample_period;
-               atomic64_set(&hwc->period_left, hwc->sample_period);
+               local64_set(&hwc->period_left, hwc->sample_period);
        } else {
                /*
                 * If we have a PMU initialized but no APIC
@@ -884,7 +886,7 @@ static int
 x86_perf_event_set_period(struct perf_event *event)
 {
        struct hw_perf_event *hwc = &event->hw;
-       s64 left = atomic64_read(&hwc->period_left);
+       s64 left = local64_read(&hwc->period_left);
        s64 period = hwc->sample_period;
        int ret = 0, idx = hwc->idx;
 
@@ -896,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
         */
        if (unlikely(left <= -period)) {
                left = period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
 
        if (unlikely(left <= 0)) {
                left += period;
-               atomic64_set(&hwc->period_left, left);
+               local64_set(&hwc->period_left, left);
                hwc->last_period = period;
                ret = 1;
        }
@@ -922,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
         * The hw event starts counting from this event offset,
         * mark it to be able to extra future deltas:
         */
-       atomic64_set(&hwc->prev_count, (u64)-left);
+       local64_set(&hwc->prev_count, (u64)-left);
 
-       wrmsrl(hwc->event_base + idx,
+       wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+
+       /*
+        * Due to erratum on certan cpu we need
+        * a second write to be sure the register
+        * is updated properly
+        */
+       if (x86_pmu.perfctr_second_write) {
+               wrmsrl(hwc->event_base + idx,
                        (u64)(-left) & x86_pmu.cntval_mask);
+       }
 
        perf_event_update_userpage(event);
 
@@ -968,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
         * skip the schedulability test here, it will be peformed
         * at commit time(->commit_txn) as a whole
         */
-       if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+       if (cpuc->group_flag & PERF_EVENT_TXN)
                goto out;
 
        ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -983,6 +994,7 @@ static int x86_pmu_enable(struct perf_event *event)
 out:
        cpuc->n_events = n;
        cpuc->n_added += n - n0;
+       cpuc->n_txn += n - n0;
 
        return 0;
 }
@@ -1089,6 +1101,14 @@ static void x86_pmu_disable(struct perf_event *event)
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int i;
 
+       /*
+        * If we're called during a txn, we don't need to do anything.
+        * The events never got scheduled and ->cancel_txn will truncate
+        * the event_list.
+        */
+       if (cpuc->group_flag & PERF_EVENT_TXN)
+               return;
+
        x86_pmu_stop(event);
 
        for (i = 0; i < cpuc->n_events; i++) {
@@ -1378,7 +1398,8 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+       cpuc->group_flag |= PERF_EVENT_TXN;
+       cpuc->n_txn = 0;
 }
 
 /*
@@ -1390,7 +1411,12 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-       cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+       cpuc->group_flag &= ~PERF_EVENT_TXN;
+       /*
+        * Truncate the collected events.
+        */
+       cpuc->n_added -= cpuc->n_txn;
+       cpuc->n_events -= cpuc->n_txn;
 }
 
 /*
@@ -1419,6 +1445,8 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
         */
        memcpy(cpuc->assign, assign, n*sizeof(int));
 
+       cpuc->group_flag &= ~PERF_EVENT_TXN;
+
        return 0;
 }
 
@@ -1543,12 +1571,6 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
  * callchain support
  */
 
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-       if (entry->nr < PERF_MAX_STACK_DEPTH)
-               entry->ip[entry->nr++] = ip;
-}
 
 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
 static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
@@ -1574,7 +1596,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
        struct perf_callchain_entry *entry = data;
 
-       callchain_store(entry, addr);
+       perf_callchain_store(entry, addr);
 }
 
 static const struct stacktrace_ops backtrace_ops = {
@@ -1585,13 +1607,11 @@ static const struct stacktrace_ops backtrace_ops = {
        .walk_stack             = print_context_stack_bp,
 };
 
-#include "../dumpstack.h"
-
 static void
 perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 {
-       callchain_store(entry, PERF_CONTEXT_KERNEL);
-       callchain_store(entry, regs->ip);
+       perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+       perf_callchain_store(entry, regs->ip);
 
        dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 }
@@ -1620,7 +1640,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
                if (fp < compat_ptr(regs->sp))
                        break;
 
-               callchain_store(entry, frame.return_address);
+               perf_callchain_store(entry, frame.return_address);
                fp = compat_ptr(frame.next_frame);
        }
        return 1;
@@ -1644,8 +1664,8 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
 
        fp = (void __user *)regs->bp;
 
-       callchain_store(entry, PERF_CONTEXT_USER);
-       callchain_store(entry, regs->ip);
+       perf_callchain_store(entry, PERF_CONTEXT_USER);
+       perf_callchain_store(entry, regs->ip);
 
        if (perf_callchain_user32(regs, entry))
                return;
@@ -1662,7 +1682,7 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
                if ((unsigned long)fp < regs->sp)
                        break;
 
-               callchain_store(entry, frame.return_address);
+               perf_callchain_store(entry, frame.return_address);
                fp = frame.next_frame;
        }
 }
@@ -1677,9 +1697,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
 
        is_user = user_mode(regs);
 
-       if (is_user && current->state != TASK_RUNNING)
-               return;
-
        if (!is_user)
                perf_callchain_kernel(regs, entry);
 
@@ -1708,22 +1725,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
        return entry;
 }
 
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-       regs->ip = ip;
-       /*
-        * perf_arch_fetch_caller_regs adds another call, we need to increment
-        * the skip level
-        */
-       regs->bp = rewind_frame_pointer(skip + 1);
-       regs->cs = __KERNEL_CS;
-       /*
-        * We abuse bit 3 to pass exact information, see perf_misc_flags
-        * and the comment with PERF_EFLAGS_EXACT.
-        */
-       regs->flags = 0;
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
        unsigned long ip;