]> git.karo-electronics.de Git - mv-sheeva.git/commitdiff
Merge branch 'perf/urgent' into perf/core
authorIngo Molnar <mingo@elte.hu>
Fri, 26 Nov 2010 14:07:02 +0000 (15:07 +0100)
committerIngo Molnar <mingo@elte.hu>
Fri, 26 Nov 2010 14:07:02 +0000 (15:07 +0100)
Conflicts:
arch/x86/kernel/apic/hw_nmi.c

Merge reason: Resolve conflict, queue up dependent patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
12 files changed:
arch/x86/Kconfig
arch/x86/kernel/apic/hw_nmi.c
arch/x86/kernel/cpu/perf_event.c
arch/x86/kernel/entry_64.S
arch/x86/kernel/hw_breakpoint.c
include/linux/hw_breakpoint.h
include/linux/perf_event.h
kernel/hw_breakpoint.c
kernel/irq_work.c
kernel/perf_event.c
tools/perf/builtin-record.c
tools/perf/util/symbol.c

index e8327686d3c55d86adb19b3c9c83bc9869e5c3cf..e330da21b84f0636751b7e18e921ecff9cc31f55 100644 (file)
@@ -21,7 +21,7 @@ config X86
        select HAVE_UNSTABLE_SCHED_CLOCK
        select HAVE_IDE
        select HAVE_OPROFILE
-       select HAVE_PERF_EVENTS if (!M386 && !M486)
+       select HAVE_PERF_EVENTS
        select HAVE_IRQ_WORK
        select HAVE_IOREMAP_PROT
        select HAVE_KPROBES
index 3e25afe9a62a85d222aad06c84eb93a9aed25e6b..a0e71cb4fa9ccf6fc790ff1a0d98743fcdbae1fc 100644 (file)
@@ -17,9 +17,6 @@
 #include <linux/nmi.h>
 #include <linux/module.h>
 
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 u64 hw_nmi_get_sample_period(void)
 {
@@ -27,6 +24,10 @@ u64 hw_nmi_get_sample_period(void)
 }
 #endif
 
+
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
 #ifdef arch_trigger_all_cpu_backtrace
 void arch_trigger_all_cpu_backtrace(void)
 {
index 5273c7b90b8b82126ebccad4722f74696df3da68..7c1a4c35fd419fa18b6d7409622de1f3bbddd791 100644 (file)
@@ -372,6 +372,20 @@ static void release_pmc_hardware(void) {}
 
 #endif
 
+static bool check_hw_exists(void)
+{
+       u64 val, val_new = 0;
+       int ret = 0;
+
+       val = 0xabcdUL;
+       ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+       ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
+       if (ret || val != val_new)
+               return false;
+
+       return true;
+}
+
 static void reserve_ds_buffers(void);
 static void release_ds_buffers(void);
 
@@ -1363,6 +1377,12 @@ void __init init_hw_perf_events(void)
 
        pmu_check_apic();
 
+       /* sanity check that the hardware exists or is emulated */
+       if (!check_hw_exists()) {
+               pr_cont("Broken PMU hardware detected, software events only.\n");
+               return;
+       }
+
        pr_cont("%s PMU driver.\n", x86_pmu.name);
 
        if (x86_pmu.quirks)
index fe2690d71c0c9f1d27be243f2ca12183a6705357..e3ba417e869766bb258ef489aa389f4933abf2a3 100644 (file)
@@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
        .endm
 
 /* save partial stack frame */
+       .pushsection .kprobes.text, "ax"
 ENTRY(save_args)
        XCPT_FRAME
        cld
@@ -334,6 +335,7 @@ ENTRY(save_args)
        ret
        CFI_ENDPROC
 END(save_args)
+       .popsection
 
 ENTRY(save_rest)
        PARTIAL_FRAME 1 REST_SKIP+8
index ff15c9dcc25de8be8144fd4d15f1dfd3069314e4..42c59425450727284615b2a920611e21047ce715 100644 (file)
@@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
        dr6_p = (unsigned long *)ERR_PTR(args->err);
        dr6 = *dr6_p;
 
+       /* If it's a single step, TRAP bits are random */
+       if (dr6 & DR_STEP)
+               return NOTIFY_DONE;
+
        /* Do an early return if no trap bits are set in DR6 */
        if ((dr6 & DR_TRAP_BITS) == 0)
                return NOTIFY_DONE;
index a2d6ea49ec564e825ebffc3ebb059f57079fc73d..d1e55fed2c7dc0281cd0cf653bb5a56a399265cb 100644 (file)
@@ -33,6 +33,8 @@ enum bp_type_idx {
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 
+extern int __init init_hw_breakpoint(void);
+
 static inline void hw_breakpoint_init(struct perf_event_attr *attr)
 {
        memset(attr, 0, sizeof(*attr));
@@ -108,6 +110,8 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)
 
 #else /* !CONFIG_HAVE_HW_BREAKPOINT */
 
+static inline int __init init_hw_breakpoint(void) { return 0; }
+
 static inline struct perf_event *
 register_user_hw_breakpoint(struct perf_event_attr *attr,
                            perf_overflow_handler_t triggered,
index 40150f345982cd4fd40e8b80470505e569f05b03..de2c41758e29e20d6a709303f21a6d8242696fa0 100644 (file)
@@ -850,6 +850,7 @@ struct perf_event_context {
        int                             nr_active;
        int                             is_active;
        int                             nr_stat;
+       int                             rotate_disable;
        atomic_t                        refcount;
        struct task_struct              *task;
 
@@ -908,20 +909,6 @@ extern int perf_num_counters(void);
 extern const char *perf_pmu_name(void);
 extern void __perf_event_task_sched_in(struct task_struct *task);
 extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);
-
-extern atomic_t perf_task_events;
-
-static inline void perf_event_task_sched_in(struct task_struct *task)
-{
-       COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
-}
-
-static inline
-void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
-{
-       COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
-}
-
 extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
@@ -1030,6 +1017,21 @@ have_event:
        __perf_sw_event(event_id, nr, nmi, regs, addr);
 }
 
+extern atomic_t perf_task_events;
+
+static inline void perf_event_task_sched_in(struct task_struct *task)
+{
+       COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
+}
+
+static inline
+void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
+{
+       perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
+
+       COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
+}
+
 extern void perf_event_mmap(struct vm_area_struct *vma);
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
 extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
index 2c9120f0afca9872cc4edba57d4e5cfe9311788f..e5325825aeb6e1e4ea0514ee37cfa53412ec4e3c 100644 (file)
@@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
        .read           = hw_breakpoint_pmu_read,
 };
 
-static int __init init_hw_breakpoint(void)
+int __init init_hw_breakpoint(void)
 {
        unsigned int **task_bp_pinned;
        int cpu, err_cpu;
@@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)
 
        return -ENOMEM;
 }
-core_initcall(init_hw_breakpoint);
 
 
index f16763ff8481b727bdf0263c241ddf2e1d6ba09a..90f881904bb1202ba1566f5db2e28eaf128551b9 100644 (file)
@@ -145,7 +145,9 @@ void irq_work_run(void)
                 * Clear the BUSY bit and return to the free state if
                 * no-one else claimed it meanwhile.
                 */
-               cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
+               (void)cmpxchg(&entry->next,
+                             next_flags(NULL, IRQ_WORK_BUSY),
+                             NULL);
        }
 }
 EXPORT_SYMBOL_GPL(irq_work_run);
index 40c3aab648a1cd489aa25cb5313a1a5ce6a5e549..43f757ccf831a04414bcea3d500f2090259fa09b 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/irq_regs.h>
 
@@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
 {
        int ctxn;
 
-       perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
-
        for_each_task_context_nr(ctxn)
                perf_event_context_sched_out(task, ctxn, next);
 }
@@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
 {
        raw_spin_lock(&ctx->lock);
 
-       /* Rotate the first entry last of non-pinned groups */
-       list_rotate_left(&ctx->flexible_groups);
+       /*
+        * Rotate the first entry last of non-pinned groups. Rotation might be
+        * disabled by the inheritance code.
+        */
+       if (!ctx->rotate_disable)
+               list_rotate_left(&ctx->flexible_groups);
 
        raw_spin_unlock(&ctx->lock);
 }
@@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
        raw_spin_unlock_irq(&ctx->lock);
        mutex_unlock(&ctx->mutex);
 
-       mutex_lock(&event->owner->perf_event_mutex);
-       list_del_init(&event->owner_entry);
-       mutex_unlock(&event->owner->perf_event_mutex);
-       put_task_struct(event->owner);
-
        free_event(event);
 
        return 0;
@@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
 static int perf_release(struct inode *inode, struct file *file)
 {
        struct perf_event *event = file->private_data;
+       struct task_struct *owner;
 
        file->private_data = NULL;
 
+       rcu_read_lock();
+       owner = ACCESS_ONCE(event->owner);
+       /*
+        * Matches the smp_wmb() in perf_event_exit_task(). If we observe
+        * !owner it means the list deletion is complete and we can indeed
+        * free this event, otherwise we need to serialize on
+        * owner->perf_event_mutex.
+        */
+       smp_read_barrier_depends();
+       if (owner) {
+               /*
+                * Since delayed_put_task_struct() also drops the last
+                * task reference we can safely take a new reference
+                * while holding the rcu_read_lock().
+                */
+               get_task_struct(owner);
+       }
+       rcu_read_unlock();
+
+       if (owner) {
+               mutex_lock(&owner->perf_event_mutex);
+               /*
+                * We have to re-check the event->owner field, if it is cleared
+                * we raced with perf_event_exit_task(), acquiring the mutex
+                * ensured they're done, and we can proceed with freeing the
+                * event.
+                */
+               if (event->owner)
+                       list_del_init(&event->owner_entry);
+               mutex_unlock(&owner->perf_event_mutex);
+               put_task_struct(owner);
+       }
+
        return perf_event_release_kernel(event);
 }
 
@@ -5668,7 +5700,7 @@ SYSCALL_DEFINE5(perf_event_open,
        mutex_unlock(&ctx->mutex);
 
        event->owner = current;
-       get_task_struct(current);
+
        mutex_lock(&current->perf_event_mutex);
        list_add_tail(&event->owner_entry, &current->perf_event_list);
        mutex_unlock(&current->perf_event_mutex);
@@ -5736,12 +5768,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
        ++ctx->generation;
        mutex_unlock(&ctx->mutex);
 
-       event->owner = current;
-       get_task_struct(current);
-       mutex_lock(&current->perf_event_mutex);
-       list_add_tail(&event->owner_entry, &current->perf_event_list);
-       mutex_unlock(&current->perf_event_mutex);
-
        return event;
 
 err_free:
@@ -5892,8 +5918,24 @@ again:
  */
 void perf_event_exit_task(struct task_struct *child)
 {
+       struct perf_event *event, *tmp;
        int ctxn;
 
+       mutex_lock(&child->perf_event_mutex);
+       list_for_each_entry_safe(event, tmp, &child->perf_event_list,
+                                owner_entry) {
+               list_del_init(&event->owner_entry);
+
+               /*
+                * Ensure the list deletion is visible before we clear
+                * the owner, closes a race against perf_release() where
+                * we need to serialize on the owner->perf_event_mutex.
+                */
+               smp_wmb();
+               event->owner = NULL;
+       }
+       mutex_unlock(&child->perf_event_mutex);
+
        for_each_task_context_nr(ctxn)
                perf_event_exit_task_context(child, ctxn);
 }
@@ -6113,6 +6155,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
        struct perf_event *event;
        struct task_struct *parent = current;
        int inherited_all = 1;
+       unsigned long flags;
        int ret = 0;
 
        child->perf_event_ctxp[ctxn] = NULL;
@@ -6153,6 +6196,15 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
                        break;
        }
 
+       /*
+        * We can't hold ctx->lock when iterating the ->flexible_group list due
+        * to allocations, but we need to prevent rotation because
+        * rotate_ctx() will change the list from interrupt context.
+        */
+       raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+       parent_ctx->rotate_disable = 1;
+       raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+
        list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
                ret = inherit_task_group(event, parent, parent_ctx,
                                         child, ctxn, &inherited_all);
@@ -6160,6 +6212,10 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
                        break;
        }
 
+       raw_spin_lock_irqsave(&parent_ctx->lock, flags);
+       parent_ctx->rotate_disable = 0;
+       raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+
        child_ctx = child->perf_event_ctxp[ctxn];
 
        if (child_ctx && inherited_all) {
@@ -6312,6 +6368,8 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
 
 void __init perf_event_init(void)
 {
+       int ret;
+
        perf_event_init_all_cpus();
        init_srcu_struct(&pmus_srcu);
        perf_pmu_register(&perf_swevent);
@@ -6319,4 +6377,7 @@ void __init perf_event_init(void)
        perf_pmu_register(&perf_task_clock);
        perf_tp_register();
        perf_cpu_notifier(perf_cpu_notify);
+
+       ret = init_hw_breakpoint();
+       WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
 }
index d9dd47885218b0dffc11460776153b3b7bb7a031..3d2cb4899807cf5bdadaba70a65167f16cdd4eef 100644 (file)
@@ -697,17 +697,18 @@ static int __cmd_record(int argc, const char **argv)
        if (err < 0)
                err = event__synthesize_kernel_mmap(process_synthesized_event,
                                                    session, machine, "_stext");
-       if (err < 0) {
-               pr_err("Couldn't record kernel reference relocation symbol.\n");
-               return err;
-       }
+       if (err < 0)
+               pr_err("Couldn't record kernel reference relocation symbol\n"
+                      "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+                      "Check /proc/kallsyms permission or run as root.\n");
 
        err = event__synthesize_modules(process_synthesized_event,
                                        session, machine);
-       if (err < 0) {
-               pr_err("Couldn't record kernel reference relocation symbol.\n");
-               return err;
-       }
+       if (err < 0)
+               pr_err("Couldn't record kernel module information.\n"
+                      "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+                      "Check /proc/modules permission or run as root.\n");
+
        if (perf_guest)
                perf_session__process_machines(session, event__synthesize_guest_os);
 
index b39f499e575a604198bf1bb11d11d6280a091548..0500895a45af530bf52dc949551ed73690978c61 100644 (file)
@@ -295,7 +295,9 @@ static void symbols__insert_by_name(struct rb_root *self, struct symbol *sym)
 {
        struct rb_node **p = &self->rb_node;
        struct rb_node *parent = NULL;
-       struct symbol_name_rb_node *symn = ((void *)sym) - sizeof(*parent), *s;
+       struct symbol_name_rb_node *symn, *s;
+
+       symn = container_of(sym, struct symbol_name_rb_node, sym);
 
        while (*p != NULL) {
                parent = *p;