]> git.karo-electronics.de Git - mv-sheeva.git/blobdiff - arch/x86/kernel/cpu/perf_event.c
perf: Reduce perf_disable() usage
[mv-sheeva.git] / arch / x86 / kernel / cpu / perf_event.c
index f2da20fda02ddf6fcd449a88ba399fe4ed44af2a..846070ce49c3db27346294a6ff10210b3586f999 100644 (file)
@@ -530,7 +530,7 @@ static int x86_pmu_hw_config(struct perf_event *event)
 /*
  * Setup the hardware configuration for a given attr_type
  */
-static int __hw_perf_event_init(struct perf_event *event)
+static int __x86_pmu_event_init(struct perf_event *event)
 {
        int err;
 
@@ -618,7 +618,7 @@ static void x86_pmu_enable_all(int added)
        }
 }
 
-static const struct pmu pmu;
+static struct pmu pmu;
 
 static inline int is_x86_event(struct perf_event *event)
 {
@@ -969,10 +969,11 @@ static int x86_pmu_enable(struct perf_event *event)
 
        hwc = &event->hw;
 
+       perf_disable();
        n0 = cpuc->n_events;
-       n = collect_events(cpuc, event, false);
-       if (n < 0)
-               return n;
+       ret = n = collect_events(cpuc, event, false);
+       if (ret < 0)
+               goto out;
 
        /*
         * If group events scheduling transaction was started,
@@ -980,23 +981,26 @@ static int x86_pmu_enable(struct perf_event *event)
         * at commit time(->commit_txn) as a whole
         */
        if (cpuc->group_flag & PERF_EVENT_TXN)
-               goto out;
+               goto done_collect;
 
        ret = x86_pmu.schedule_events(cpuc, n, assign);
        if (ret)
-               return ret;
+               goto out;
        /*
         * copy new assignment, now we know it is possible
         * will be used by hw_perf_enable()
         */
        memcpy(cpuc->assign, assign, n*sizeof(int));
 
-out:
+done_collect:
        cpuc->n_events = n;
        cpuc->n_added += n - n0;
        cpuc->n_txn += n - n0;
 
-       return 0;
+       ret = 0;
+out:
+       perf_enable();
+       return ret;
 }
 
 static int x86_pmu_start(struct perf_event *event)
@@ -1154,7 +1158,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs)
                /*
                 * event overflow
                 */
-               handled         = 1;
+               handled++;
                data.period     = event->hw.last_period;
 
                if (!x86_perf_event_set_period(event))
@@ -1200,12 +1204,20 @@ void perf_events_lapic_init(void)
        apic_write(APIC_LVTPC, APIC_DM_NMI);
 }
 
+struct pmu_nmi_state {
+       unsigned int    marked;
+       int             handled;
+};
+
+static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
+
 static int __kprobes
 perf_event_nmi_handler(struct notifier_block *self,
                         unsigned long cmd, void *__args)
 {
        struct die_args *args = __args;
-       struct pt_regs *regs;
+       unsigned int this_nmi;
+       int handled;
 
        if (!atomic_read(&active_events))
                return NOTIFY_DONE;
@@ -1214,22 +1226,47 @@ perf_event_nmi_handler(struct notifier_block *self,
        case DIE_NMI:
        case DIE_NMI_IPI:
                break;
-
+       case DIE_NMIUNKNOWN:
+               this_nmi = percpu_read(irq_stat.__nmi_count);
+               if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+                       /* let the kernel handle the unknown nmi */
+                       return NOTIFY_DONE;
+               /*
+                * This one is a PMU back-to-back nmi. Two events
+                * trigger 'simultaneously' raising two back-to-back
+                * NMIs. If the first NMI handles both, the latter
+                * will be empty and daze the CPU. So, we drop it to
+                * avoid false-positive 'unknown nmi' messages.
+                */
+               return NOTIFY_STOP;
        default:
                return NOTIFY_DONE;
        }
 
-       regs = args->regs;
-
        apic_write(APIC_LVTPC, APIC_DM_NMI);
-       /*
-        * Can't rely on the handled return value to say it was our NMI, two
-        * events could trigger 'simultaneously' raising two back-to-back NMIs.
-        *
-        * If the first NMI handles both, the latter will be empty and daze
-        * the CPU.
-        */
-       x86_pmu.handle_irq(regs);
+
+       handled = x86_pmu.handle_irq(args->regs);
+       if (!handled)
+               return NOTIFY_DONE;
+
+       this_nmi = percpu_read(irq_stat.__nmi_count);
+       if ((handled > 1) ||
+               /* the next nmi could be a back-to-back nmi */
+           ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
+            (__get_cpu_var(pmu_nmi).handled > 1))) {
+               /*
+                * We could have two subsequent back-to-back nmis: The
+                * first handles more than one counter, the 2nd
+                * handles only one counter and the 3rd handles no
+                * counter.
+                *
+                * This is the 2nd nmi because the previous was
+                * handling more than one counter. We will mark the
+                * next (3rd) and then drop it if unhandled.
+                */
+               __get_cpu_var(pmu_nmi).marked   = this_nmi + 1;
+               __get_cpu_var(pmu_nmi).handled  = handled;
+       }
 
        return NOTIFY_STOP;
 }
@@ -1381,6 +1418,7 @@ void __init init_hw_perf_events(void)
        pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
        pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
+       perf_pmu_register(&pmu);
        perf_cpu_notifier(x86_pmu_notifier);
 }
 
@@ -1394,10 +1432,11 @@ static inline void x86_pmu_read(struct perf_event *event)
  * Set the flag to make pmu::enable() not perform the
  * schedulability test, it will be performed at commit time
  */
-static void x86_pmu_start_txn(const struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
+       perf_disable();
        cpuc->group_flag |= PERF_EVENT_TXN;
        cpuc->n_txn = 0;
 }
@@ -1407,7 +1446,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
  * Clear the flag and pmu::enable() will perform the
  * schedulability test.
  */
-static void x86_pmu_cancel_txn(const struct pmu *pmu)
+static void x86_pmu_cancel_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
@@ -1417,6 +1456,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
         */
        cpuc->n_added -= cpuc->n_txn;
        cpuc->n_events -= cpuc->n_txn;
+       perf_enable();
 }
 
 /*
@@ -1424,7 +1464,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
  * Perform the group schedulability test as a whole
  * Return 0 if success
  */
-static int x86_pmu_commit_txn(const struct pmu *pmu)
+static int x86_pmu_commit_txn(struct pmu *pmu)
 {
        struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
        int assign[X86_PMC_IDX_MAX];
@@ -1446,22 +1486,10 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
        memcpy(cpuc->assign, assign, n*sizeof(int));
 
        cpuc->group_flag &= ~PERF_EVENT_TXN;
-
+       perf_enable();
        return 0;
 }
 
-static const struct pmu pmu = {
-       .enable         = x86_pmu_enable,
-       .disable        = x86_pmu_disable,
-       .start          = x86_pmu_start,
-       .stop           = x86_pmu_stop,
-       .read           = x86_pmu_read,
-       .unthrottle     = x86_pmu_unthrottle,
-       .start_txn      = x86_pmu_start_txn,
-       .cancel_txn     = x86_pmu_cancel_txn,
-       .commit_txn     = x86_pmu_commit_txn,
-};
-
 /*
  * validate that we can schedule this event
  */
@@ -1536,12 +1564,22 @@ out:
        return ret;
 }
 
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+int x86_pmu_event_init(struct perf_event *event)
 {
-       const struct pmu *tmp;
+       struct pmu *tmp;
        int err;
 
-       err = __hw_perf_event_init(event);
+       switch (event->attr.type) {
+       case PERF_TYPE_RAW:
+       case PERF_TYPE_HARDWARE:
+       case PERF_TYPE_HW_CACHE:
+               break;
+
+       default:
+               return -ENOENT;
+       }
+
+       err = __x86_pmu_event_init(event);
        if (!err) {
                /*
                 * we temporarily connect event to its pmu
@@ -1561,27 +1599,28 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
        if (err) {
                if (event->destroy)
                        event->destroy(event);
-               return ERR_PTR(err);
        }
 
-       return &pmu;
+       return err;
 }
 
+static struct pmu pmu = {
+       .event_init     = x86_pmu_event_init,
+       .enable         = x86_pmu_enable,
+       .disable        = x86_pmu_disable,
+       .start          = x86_pmu_start,
+       .stop           = x86_pmu_stop,
+       .read           = x86_pmu_read,
+       .unthrottle     = x86_pmu_unthrottle,
+       .start_txn      = x86_pmu_start_txn,
+       .cancel_txn     = x86_pmu_cancel_txn,
+       .commit_txn     = x86_pmu_commit_txn,
+};
+
 /*
  * callchain support
  */
 
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
-       if (entry->nr < PERF_MAX_STACK_DEPTH)
-               entry->ip[entry->nr++] = ip;
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-
-
 static void
 backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
 {
@@ -1602,7 +1641,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable)
 {
        struct perf_callchain_entry *entry = data;
 
-       callchain_store(entry, addr);
+       perf_callchain_store(entry, addr);
 }
 
 static const struct stacktrace_ops backtrace_ops = {
@@ -1613,11 +1652,15 @@ static const struct stacktrace_ops backtrace_ops = {
        .walk_stack             = print_context_stack_bp,
 };
 
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
-       callchain_store(entry, PERF_CONTEXT_KERNEL);
-       callchain_store(entry, regs->ip);
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return;
+       }
+
+       perf_callchain_store(entry, regs->ip);
 
        dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 }
@@ -1646,7 +1689,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
                if (fp < compat_ptr(regs->sp))
                        break;
 
-               callchain_store(entry, frame.return_address);
+               perf_callchain_store(entry, frame.return_address);
                fp = compat_ptr(frame.next_frame);
        }
        return 1;
@@ -1659,19 +1702,20 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
 }
 #endif
 
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
 {
        struct stack_frame frame;
        const void __user *fp;
 
-       if (!user_mode(regs))
-               regs = task_pt_regs(current);
+       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+               /* TODO: We don't support guest os callchain now */
+               return;
+       }
 
        fp = (void __user *)regs->bp;
 
-       callchain_store(entry, PERF_CONTEXT_USER);
-       callchain_store(entry, regs->ip);
+       perf_callchain_store(entry, regs->ip);
 
        if (perf_callchain_user32(regs, entry))
                return;
@@ -1688,52 +1732,11 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
                if ((unsigned long)fp < regs->sp)
                        break;
 
-               callchain_store(entry, frame.return_address);
+               perf_callchain_store(entry, frame.return_address);
                fp = frame.next_frame;
        }
 }
 
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
-       int is_user;
-
-       if (!regs)
-               return;
-
-       is_user = user_mode(regs);
-
-       if (is_user && current->state != TASK_RUNNING)
-               return;
-
-       if (!is_user)
-               perf_callchain_kernel(regs, entry);
-
-       if (current->mm)
-               perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-       struct perf_callchain_entry *entry;
-
-       if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-               /* TODO: We don't support guest os callchain now */
-               return NULL;
-       }
-
-       if (in_nmi())
-               entry = &__get_cpu_var(pmc_nmi_entry);
-       else
-               entry = &__get_cpu_var(pmc_irq_entry);
-
-       entry->nr = 0;
-
-       perf_do_callchain(regs, entry);
-
-       return entry;
-}
-
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
        unsigned long ip;