perf: Deconstify struct pmu

[mv-sheeva.git] / kernel / perf_event.c
diff --git a/kernel/perf_event.c b/kernel/perf_event.c

index 615d024894cf6c110452f8c2d425238acde529dc..fb46fd13f31fb75fd6eca190a51b6a80646770ec 100644 (file)
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -75,7 +75,7 @@ static DEFINE_SPINLOCK(perf_resource_lock);
  /*
   * Architecture provided APIs - weak aliases:
   */
-extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
+extern __weak struct pmu *hw_perf_event_init(struct perf_event *event)
  {
         return NULL;
  }
@@ -214,7 +214,7 @@ static void perf_unpin_context(struct perf_event_context *ctx)
  
  static inline u64 perf_clock(void)
  {
-       return cpu_clock(raw_smp_processor_id());
+       return local_clock();
  }
  
  /*
@@ -402,11 +402,31 @@ static void perf_group_detach(struct perf_event *event)
         }
  }
  
+static inline int
+event_filter_match(struct perf_event *event)
+{
+       return event->cpu == -1 || event->cpu == smp_processor_id();
+}
+
  static void
  event_sched_out(struct perf_event *event,
                   struct perf_cpu_context *cpuctx,
                   struct perf_event_context *ctx)
  {
+       u64 delta;
+       /*
+        * An event which could not be activated because of
+        * filter mismatch still needs to have its timings
+        * maintained, otherwise bogus information is return
+        * via read() for time_enabled, time_running:
+        */
+       if (event->state == PERF_EVENT_STATE_INACTIVE
+           && !event_filter_match(event)) {
+               delta = ctx->time - event->tstamp_stopped;
+               event->tstamp_running += delta;
+               event->tstamp_stopped = ctx->time;
+       }
+
         if (event->state != PERF_EVENT_STATE_ACTIVE)
                 return;
  
@@ -432,9 +452,7 @@ group_sched_out(struct perf_event *group_event,
                 struct perf_event_context *ctx)
  {
         struct perf_event *event;
-
-       if (group_event->state != PERF_EVENT_STATE_ACTIVE)
-               return;
+       int state = group_event->state;
  
         event_sched_out(group_event, cpuctx, ctx);
  
@@ -444,7 +462,7 @@ group_sched_out(struct perf_event *group_event,
         list_for_each_entry(event, &group_event->sibling_list, group_entry)
                 event_sched_out(event, cpuctx, ctx);
  
-       if (group_event->attr.exclusive)
+       if (state == PERF_EVENT_STATE_ACTIVE && group_event->attr.exclusive)
                 cpuctx->exclusive = 0;
  }
  
@@ -673,7 +691,7 @@ group_sched_in(struct perf_event *group_event,
                struct perf_event_context *ctx)
  {
         struct perf_event *event, *partial_group = NULL;
-       const struct pmu *pmu = group_event->pmu;
+       struct pmu *pmu = group_event->pmu;
         bool txn = false;
  
         if (group_event->state == PERF_EVENT_STATE_OFF)
@@ -1763,6 +1781,216 @@ static u64 perf_event_read(struct perf_event *event)
         return perf_event_count(event);
  }
  
+/*
+ * Callchain support
+ */
+
+struct callchain_cpus_entries {
+       struct rcu_head                 rcu_head;
+       struct perf_callchain_entry     *cpu_entries[0];
+};
+
+static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
+static atomic_t nr_callchain_events;
+static DEFINE_MUTEX(callchain_mutex);
+struct callchain_cpus_entries *callchain_cpus_entries;
+
+
+__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
+                                 struct pt_regs *regs)
+{
+}
+
+__weak void perf_callchain_user(struct perf_callchain_entry *entry,
+                               struct pt_regs *regs)
+{
+}
+
+static void release_callchain_buffers_rcu(struct rcu_head *head)
+{
+       struct callchain_cpus_entries *entries;
+       int cpu;
+
+       entries = container_of(head, struct callchain_cpus_entries, rcu_head);
+
+       for_each_possible_cpu(cpu)
+               kfree(entries->cpu_entries[cpu]);
+
+       kfree(entries);
+}
+
+static void release_callchain_buffers(void)
+{
+       struct callchain_cpus_entries *entries;
+
+       entries = callchain_cpus_entries;
+       rcu_assign_pointer(callchain_cpus_entries, NULL);
+       call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
+}
+
+static int alloc_callchain_buffers(void)
+{
+       int cpu;
+       int size;
+       struct callchain_cpus_entries *entries;
+
+       /*
+        * We can't use the percpu allocation API for data that can be
+        * accessed from NMI. Use a temporary manual per cpu allocation
+        * until that gets sorted out.
+        */
+       size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
+               num_possible_cpus();
+
+       entries = kzalloc(size, GFP_KERNEL);
+       if (!entries)
+               return -ENOMEM;
+
+       size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+
+       for_each_possible_cpu(cpu) {
+               entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
+                                                        cpu_to_node(cpu));
+               if (!entries->cpu_entries[cpu])
+                       goto fail;
+       }
+
+       rcu_assign_pointer(callchain_cpus_entries, entries);
+
+       return 0;
+
+fail:
+       for_each_possible_cpu(cpu)
+               kfree(entries->cpu_entries[cpu]);
+       kfree(entries);
+
+       return -ENOMEM;
+}
+
+static int get_callchain_buffers(void)
+{
+       int err = 0;
+       int count;
+
+       mutex_lock(&callchain_mutex);
+
+       count = atomic_inc_return(&nr_callchain_events);
+       if (WARN_ON_ONCE(count < 1)) {
+               err = -EINVAL;
+               goto exit;
+       }
+
+       if (count > 1) {
+               /* If the allocation failed, give up */
+               if (!callchain_cpus_entries)
+                       err = -ENOMEM;
+               goto exit;
+       }
+
+       err = alloc_callchain_buffers();
+       if (err)
+               release_callchain_buffers();
+exit:
+       mutex_unlock(&callchain_mutex);
+
+       return err;
+}
+
+static void put_callchain_buffers(void)
+{
+       if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
+               release_callchain_buffers();
+               mutex_unlock(&callchain_mutex);
+       }
+}
+
+static int get_recursion_context(int *recursion)
+{
+       int rctx;
+
+       if (in_nmi())
+               rctx = 3;
+       else if (in_irq())
+               rctx = 2;
+       else if (in_softirq())
+               rctx = 1;
+       else
+               rctx = 0;
+
+       if (recursion[rctx])
+               return -1;
+
+       recursion[rctx]++;
+       barrier();
+
+       return rctx;
+}
+
+static inline void put_recursion_context(int *recursion, int rctx)
+{
+       barrier();
+       recursion[rctx]--;
+}
+
+static struct perf_callchain_entry *get_callchain_entry(int *rctx)
+{
+       int cpu;
+       struct callchain_cpus_entries *entries;
+
+       *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
+       if (*rctx == -1)
+               return NULL;
+
+       entries = rcu_dereference(callchain_cpus_entries);
+       if (!entries)
+               return NULL;
+
+       cpu = smp_processor_id();
+
+       return &entries->cpu_entries[cpu][*rctx];
+}
+
+static void
+put_callchain_entry(int rctx)
+{
+       put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
+}
+
+static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+       int rctx;
+       struct perf_callchain_entry *entry;
+
+
+       entry = get_callchain_entry(&rctx);
+       if (rctx == -1)
+               return NULL;
+
+       if (!entry)
+               goto exit_put;
+
+       entry->nr = 0;
+
+       if (!user_mode(regs)) {
+               perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+               perf_callchain_kernel(entry, regs);
+               if (current->mm)
+                       regs = task_pt_regs(current);
+               else
+                       regs = NULL;
+       }
+
+       if (regs) {
+               perf_callchain_store(entry, PERF_CONTEXT_USER);
+               perf_callchain_user(entry, regs);
+       }
+
+exit_put:
+       put_callchain_entry(rctx);
+
+       return entry;
+}
+
  /*
   * Initialize the perf_event context in a task_struct:
   */
@@ -1895,6 +2123,8 @@ static void free_event(struct perf_event *event)
                         atomic_dec(&nr_comm_events);
                 if (event->attr.task)
                         atomic_dec(&nr_task_events);
+               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
+                       put_callchain_buffers();
         }
  
         if (event->buffer) {
@@ -2937,55 +3167,6 @@ void perf_event_do_pending(void)
         __perf_pending_run();
  }
  
-DEFINE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
-
-/*
- * Callchain support -- arch specific
- */
-
-__weak struct perf_callchain_entry *perf_callchain_buffer(void)
-{
-       return &__get_cpu_var(perf_callchain_entry);
-}
-
-__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
-                                 struct pt_regs *regs)
-{
-}
-
-__weak void perf_callchain_user(struct perf_callchain_entry *entry,
-                               struct pt_regs *regs)
-{
-}
-
-static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
-{
-       struct perf_callchain_entry *entry;
-
-       entry = perf_callchain_buffer();
-       if (!entry)
-               return NULL;
-
-       entry->nr = 0;
-
-       if (!user_mode(regs)) {
-               perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
-               perf_callchain_kernel(entry, regs);
-               if (current->mm)
-                       regs = task_pt_regs(current);
-               else
-                       regs = NULL;
-       }
-
-       if (regs) {
-               perf_callchain_store(entry, PERF_CONTEXT_USER);
-               perf_callchain_user(entry, regs);
-       }
-
-       return entry;
-}
-
-
  /*
   * We assume there is only KVM supporting the callbacks.
   * Later on, we might change it to a list if there is
@@ -3480,14 +3661,20 @@ static void perf_event_output(struct perf_event *event, int nmi,
         struct perf_output_handle handle;
         struct perf_event_header header;
  
+       /* protect the callchain buffers */
+       rcu_read_lock();
+
         perf_prepare_sample(&header, data, event, regs);
  
         if (perf_output_begin(&handle, event, header.size, nmi, 1))
-               return;
+               goto exit;
  
         perf_output_sample(&handle, &header, data, event);
  
         perf_output_end(&handle);
+
+exit:
+       rcu_read_unlock();
  }
  
  /*
@@ -4243,32 +4430,16 @@ end:
  int perf_swevent_get_recursion_context(void)
  {
         struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-       int rctx;
-
-       if (in_nmi())
-               rctx = 3;
-       else if (in_irq())
-               rctx = 2;
-       else if (in_softirq())
-               rctx = 1;
-       else
-               rctx = 0;
-
-       if (cpuctx->recursion[rctx])
-               return -1;
-
-       cpuctx->recursion[rctx]++;
-       barrier();
  
-       return rctx;
+       return get_recursion_context(cpuctx->recursion);
  }
  EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
  
  void inline perf_swevent_put_recursion_context(int rctx)
  {
         struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
-       barrier();
-       cpuctx->recursion[rctx]--;
+
+       put_recursion_context(cpuctx->recursion, rctx);
  }
  
  void __perf_sw_event(u32 event_id, u64 nr, int nmi,
@@ -4330,7 +4501,7 @@ static int perf_swevent_int(struct perf_event *event)
         return 0;
  }
  
-static const struct pmu perf_ops_generic = {
+static struct pmu perf_ops_generic = {
         .enable         = perf_swevent_enable,
         .disable        = perf_swevent_disable,
         .start          = perf_swevent_int,
@@ -4443,7 +4614,7 @@ static void cpu_clock_perf_event_read(struct perf_event *event)
         cpu_clock_perf_event_update(event);
  }
  
-static const struct pmu perf_ops_cpu_clock = {
+static struct pmu perf_ops_cpu_clock = {
         .enable         = cpu_clock_perf_event_enable,
         .disable        = cpu_clock_perf_event_disable,
         .read           = cpu_clock_perf_event_read,
@@ -4500,7 +4671,7 @@ static void task_clock_perf_event_read(struct perf_event *event)
         task_clock_perf_event_update(event, time);
  }
  
-static const struct pmu perf_ops_task_clock = {
+static struct pmu perf_ops_task_clock = {
         .enable         = task_clock_perf_event_enable,
         .disable        = task_clock_perf_event_disable,
         .read           = task_clock_perf_event_read,
@@ -4614,7 +4785,7 @@ static int swevent_hlist_get(struct perf_event *event)
  
  #ifdef CONFIG_EVENT_TRACING
  
-static const struct pmu perf_ops_tracepoint = {
+static struct pmu perf_ops_tracepoint = {
         .enable         = perf_trace_enable,
         .disable        = perf_trace_disable,
         .start          = perf_swevent_int,
@@ -4678,7 +4849,7 @@ static void tp_perf_event_destroy(struct perf_event *event)
         perf_trace_destroy(event);
  }
  
-static const struct pmu *tp_perf_event_init(struct perf_event *event)
+static struct pmu *tp_perf_event_init(struct perf_event *event)
  {
         int err;
  
@@ -4725,7 +4896,7 @@ static void perf_event_free_filter(struct perf_event *event)
  
  #else
  
-static const struct pmu *tp_perf_event_init(struct perf_event *event)
+static struct pmu *tp_perf_event_init(struct perf_event *event)
  {
         return NULL;
  }
@@ -4747,7 +4918,7 @@ static void bp_perf_event_destroy(struct perf_event *event)
         release_bp_slot(event);
  }
  
-static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+static struct pmu *bp_perf_event_init(struct perf_event *bp)
  {
         int err;
  
@@ -4771,7 +4942,7 @@ void perf_bp_event(struct perf_event *bp, void *data)
                 perf_swevent_add(bp, 1, 1, &sample, regs);
  }
  #else
-static const struct pmu *bp_perf_event_init(struct perf_event *bp)
+static struct pmu *bp_perf_event_init(struct perf_event *bp)
  {
         return NULL;
  }
@@ -4793,9 +4964,9 @@ static void sw_perf_event_destroy(struct perf_event *event)
         swevent_hlist_put(event);
  }
  
-static const struct pmu *sw_perf_event_init(struct perf_event *event)
+static struct pmu *sw_perf_event_init(struct perf_event *event)
  {
-       const struct pmu *pmu = NULL;
+       struct pmu *pmu = NULL;
         u64 event_id = event->attr.config;
  
         /*
@@ -4857,7 +5028,7 @@ perf_event_alloc(struct perf_event_attr *attr,
                    perf_overflow_handler_t overflow_handler,
                    gfp_t gfpflags)
  {
-       const struct pmu *pmu;
+       struct pmu *pmu;
         struct perf_event *event;
         struct hw_perf_event *hwc;
         long err;
@@ -4968,6 +5139,13 @@ done:
                         atomic_inc(&nr_comm_events);
                 if (event->attr.task)
                         atomic_inc(&nr_task_events);
+               if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN) {
+                       err = get_callchain_buffers();
+                       if (err) {
+                               free_event(event);
+                               return ERR_PTR(err);
+                       }
+               }
         }
  
         return event;
@@ -5782,15 +5960,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
  {
         unsigned int cpu = (long)hcpu;
  
-       switch (action) {
+       switch (action & ~CPU_TASKS_FROZEN) {
  
         case CPU_UP_PREPARE:
-       case CPU_UP_PREPARE_FROZEN:
+       case CPU_DOWN_FAILED:
                 perf_event_init_cpu(cpu);
                 break;
  
+       case CPU_UP_CANCELED:
         case CPU_DOWN_PREPARE:
-       case CPU_DOWN_PREPARE_FROZEN:
                 perf_event_exit_cpu(cpu);
                 break;