struct perf_event *event);
struct event_constraint *event_constraints;
void (*quirks)(void);
+ int perfctr_second_write;
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
* count to the generic event atomically:
*/
again:
- prev_raw_count = atomic64_read(&hwc->prev_count);
+ prev_raw_count = local64_read(&hwc->prev_count);
rdmsrl(hwc->event_base + idx, new_raw_count);
- if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &hwc->period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
return new_raw_count;
}
if (!hwc->sample_period) {
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
- atomic64_set(&hwc->period_left, hwc->sample_period);
+ local64_set(&hwc->period_left, hwc->sample_period);
} else {
/*
* If we have a PMU initialized but no APIC
/*
* Setup the hardware configuration for a given attr_type
*/
-static int __hw_perf_event_init(struct perf_event *event)
+static int __x86_pmu_event_init(struct perf_event *event)
{
int err;
}
}
-static const struct pmu pmu;
+static struct pmu pmu;
static inline int is_x86_event(struct perf_event *event)
{
x86_perf_event_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- s64 left = atomic64_read(&hwc->period_left);
+ s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;
*/
if (unlikely(left <= -period)) {
left = period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
* The hw event starts counting from this event offset,
* mark it to be able to extra future deltas:
*/
- atomic64_set(&hwc->prev_count, (u64)-left);
+ local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+
+ /*
+ * Due to erratum on certan cpu we need
+ * a second write to be sure the register
+ * is updated properly
+ */
+ if (x86_pmu.perfctr_second_write) {
+ wrmsrl(hwc->event_base + idx,
(u64)(-left) & x86_pmu.cntval_mask);
+ }
perf_event_update_userpage(event);
hwc = &event->hw;
+ perf_disable();
n0 = cpuc->n_events;
- n = collect_events(cpuc, event, false);
- if (n < 0)
- return n;
+ ret = n = collect_events(cpuc, event, false);
+ if (ret < 0)
+ goto out;
/*
* If group events scheduling transaction was started,
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
- goto out;
+ if (cpuc->group_flag & PERF_EVENT_TXN)
+ goto done_collect;
ret = x86_pmu.schedule_events(cpuc, n, assign);
if (ret)
- return ret;
+ goto out;
/*
* copy new assignment, now we know it is possible
* will be used by hw_perf_enable()
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
-out:
+done_collect:
cpuc->n_events = n;
cpuc->n_added += n - n0;
cpuc->n_txn += n - n0;
- return 0;
+ ret = 0;
+out:
+ perf_enable();
+ return ret;
}
static int x86_pmu_start(struct perf_event *event)
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuc->group_flag & PERF_EVENT_TXN)
return;
x86_pmu_stop(event);
/*
* event overflow
*/
- handled = 1;
+ handled++;
data.period = event->hw.last_period;
if (!x86_perf_event_set_period(event))
apic_write(APIC_LVTPC, APIC_DM_NMI);
}
+struct pmu_nmi_state {
+ unsigned int marked;
+ int handled;
+};
+
+static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi);
+
static int __kprobes
perf_event_nmi_handler(struct notifier_block *self,
unsigned long cmd, void *__args)
{
struct die_args *args = __args;
- struct pt_regs *regs;
+ unsigned int this_nmi;
+ int handled;
if (!atomic_read(&active_events))
return NOTIFY_DONE;
case DIE_NMI:
case DIE_NMI_IPI:
break;
-
+ case DIE_NMIUNKNOWN:
+ this_nmi = percpu_read(irq_stat.__nmi_count);
+ if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+ /* let the kernel handle the unknown nmi */
+ return NOTIFY_DONE;
+ /*
+ * This one is a PMU back-to-back nmi. Two events
+ * trigger 'simultaneously' raising two back-to-back
+ * NMIs. If the first NMI handles both, the latter
+ * will be empty and daze the CPU. So, we drop it to
+ * avoid false-positive 'unknown nmi' messages.
+ */
+ return NOTIFY_STOP;
default:
return NOTIFY_DONE;
}
- regs = args->regs;
-
apic_write(APIC_LVTPC, APIC_DM_NMI);
- /*
- * Can't rely on the handled return value to say it was our NMI, two
- * events could trigger 'simultaneously' raising two back-to-back NMIs.
- *
- * If the first NMI handles both, the latter will be empty and daze
- * the CPU.
- */
- x86_pmu.handle_irq(regs);
+
+ handled = x86_pmu.handle_irq(args->regs);
+ if (!handled)
+ return NOTIFY_DONE;
+
+ this_nmi = percpu_read(irq_stat.__nmi_count);
+ if ((handled > 1) ||
+ /* the next nmi could be a back-to-back nmi */
+ ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
+ (__get_cpu_var(pmu_nmi).handled > 1))) {
+ /*
+ * We could have two subsequent back-to-back nmis: The
+ * first handles more than one counter, the 2nd
+ * handles only one counter and the 3rd handles no
+ * counter.
+ *
+ * This is the 2nd nmi because the previous was
+ * handling more than one counter. We will mark the
+ * next (3rd) and then drop it if unhandled.
+ */
+ __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
+ __get_cpu_var(pmu_nmi).handled = handled;
+ }
return NOTIFY_STOP;
}
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
+ perf_pmu_register(&pmu);
perf_cpu_notifier(x86_pmu_notifier);
}
* Set the flag to make pmu::enable() not perform the
* schedulability test, it will be performed at commit time
*/
-static void x86_pmu_start_txn(const struct pmu *pmu)
+static void x86_pmu_start_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+ perf_disable();
+ cpuc->group_flag |= PERF_EVENT_TXN;
cpuc->n_txn = 0;
}
* Clear the flag and pmu::enable() will perform the
* schedulability test.
*/
-static void x86_pmu_cancel_txn(const struct pmu *pmu)
+static void x86_pmu_cancel_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
/*
* Truncate the collected events.
*/
cpuc->n_added -= cpuc->n_txn;
cpuc->n_events -= cpuc->n_txn;
+ perf_enable();
}
/*
* Perform the group schedulability test as a whole
* Return 0 if success
*/
-static int x86_pmu_commit_txn(const struct pmu *pmu)
+static int x86_pmu_commit_txn(struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int assign[X86_PMC_IDX_MAX];
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
- /*
- * Clear out the txn count so that ->cancel_txn() which gets
- * run after ->commit_txn() doesn't undo things.
- */
- cpuc->n_txn = 0;
-
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
+ perf_enable();
return 0;
}
-static const struct pmu pmu = {
- .enable = x86_pmu_enable,
- .disable = x86_pmu_disable,
- .start = x86_pmu_start,
- .stop = x86_pmu_stop,
- .read = x86_pmu_read,
- .unthrottle = x86_pmu_unthrottle,
- .start_txn = x86_pmu_start_txn,
- .cancel_txn = x86_pmu_cancel_txn,
- .commit_txn = x86_pmu_commit_txn,
-};
-
/*
* validate that we can schedule this event
*/
return ret;
}
-const struct pmu *hw_perf_event_init(struct perf_event *event)
+int x86_pmu_event_init(struct perf_event *event)
{
- const struct pmu *tmp;
+ struct pmu *tmp;
int err;
- err = __hw_perf_event_init(event);
+ switch (event->attr.type) {
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_HARDWARE:
+ case PERF_TYPE_HW_CACHE:
+ break;
+
+ default:
+ return -ENOENT;
+ }
+
+ err = __x86_pmu_event_init(event);
if (!err) {
/*
* we temporarily connect event to its pmu
if (err) {
if (event->destroy)
event->destroy(event);
- return ERR_PTR(err);
}
- return &pmu;
+ return err;
}
+static struct pmu pmu = {
+ .event_init = x86_pmu_event_init,
+ .enable = x86_pmu_enable,
+ .disable = x86_pmu_disable,
+ .start = x86_pmu_start,
+ .stop = x86_pmu_stop,
+ .read = x86_pmu_read,
+ .unthrottle = x86_pmu_unthrottle,
+ .start_txn = x86_pmu_start_txn,
+ .cancel_txn = x86_pmu_cancel_txn,
+ .commit_txn = x86_pmu_commit_txn,
+};
+
/*
* callchain support
*/
-static inline
-void callchain_store(struct perf_callchain_entry *entry, u64 ip)
-{
- if (entry->nr < PERF_MAX_STACK_DEPTH)
- entry->ip[entry->nr++] = ip;
-}
-
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
-static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
-
-
static void
backtrace_warning_symbol(void *data, char *msg, unsigned long symbol)
{
{
struct perf_callchain_entry *entry = data;
- callchain_store(entry, addr);
+ perf_callchain_store(entry, addr);
}
static const struct stacktrace_ops backtrace_ops = {
.walk_stack = print_context_stack_bp,
};
-#include "../dumpstack.h"
-
-static void
-perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
- callchain_store(entry, PERF_CONTEXT_KERNEL);
- callchain_store(entry, regs->ip);
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
+
+ perf_callchain_store(entry, regs->ip);
dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
}
if (fp < compat_ptr(regs->sp))
break;
- callchain_store(entry, frame.return_address);
+ perf_callchain_store(entry, frame.return_address);
fp = compat_ptr(frame.next_frame);
}
return 1;
}
#endif
-static void
-perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
+void
+perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
{
struct stack_frame frame;
const void __user *fp;
- if (!user_mode(regs))
- regs = task_pt_regs(current);
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ /* TODO: We don't support guest os callchain now */
+ return;
+ }
fp = (void __user *)regs->bp;
- callchain_store(entry, PERF_CONTEXT_USER);
- callchain_store(entry, regs->ip);
+ perf_callchain_store(entry, regs->ip);
if (perf_callchain_user32(regs, entry))
return;
if ((unsigned long)fp < regs->sp)
break;
- callchain_store(entry, frame.return_address);
+ perf_callchain_store(entry, frame.return_address);
fp = frame.next_frame;
}
}
-static void
-perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry)
-{
- int is_user;
-
- if (!regs)
- return;
-
- is_user = user_mode(regs);
-
- if (is_user && current->state != TASK_RUNNING)
- return;
-
- if (!is_user)
- perf_callchain_kernel(regs, entry);
-
- if (current->mm)
- perf_callchain_user(regs, entry);
-}
-
-struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
- struct perf_callchain_entry *entry;
-
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- /* TODO: We don't support guest os callchain now */
- return NULL;
- }
-
- if (in_nmi())
- entry = &__get_cpu_var(pmc_nmi_entry);
- else
- entry = &__get_cpu_var(pmc_irq_entry);
-
- entry->nr = 0;
-
- perf_do_callchain(regs, entry);
-
- return entry;
-}
-
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
- regs->ip = ip;
- /*
- * perf_arch_fetch_caller_regs adds another call, we need to increment
- * the skip level
- */
- regs->bp = rewind_frame_pointer(skip + 1);
- regs->cs = __KERNEL_CS;
- /*
- * We abuse bit 3 to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
- */
- regs->flags = 0;
-}
-
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
unsigned long ip;