perf/core: Fix locking for children siblings group read

[karo-tx-linux.git] / kernel / events / core.c
diff --git a/kernel/events/core.c b/kernel/events/core.c

index bc63f8db1b0d218f09ae489fbfbeb042e82f5cf6..c17c0881fd36aef7200f3991b12bdd5a4322b765 100644 (file)
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_mostly;
  static LIST_HEAD(pmus);
  static DEFINE_MUTEX(pmus_lock);
  static struct srcu_struct pmus_srcu;
+static cpumask_var_t perf_online_mask;
  
  /*
   * perf event paranoia level:
@@ -1451,6 +1452,13 @@ static enum event_type_t get_event_type(struct perf_event *event)
  
         lockdep_assert_held(&ctx->lock);
  
+       /*
+        * It's 'group type', really, because if our group leader is
+        * pinned, so are we.
+        */
+       if (event->group_leader != event)
+               event = event->group_leader;
+
         event_type = event->attr.pinned ? EVENT_PINNED : EVENT_FLEXIBLE;
         if (!ctx->task)
                 event_type |= EVENT_CPU;
@@ -3807,14 +3815,6 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
                 if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
                         return ERR_PTR(-EACCES);
  
-               /*
-                * We could be clever and allow to attach a event to an
-                * offline CPU and activate it when the CPU comes up, but
-                * that's for later.
-                */
-               if (!cpu_online(cpu))
-                       return ERR_PTR(-ENODEV);
-
                 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
                 ctx = &cpuctx->ctx;
                 get_ctx(ctx);
@@ -4372,7 +4372,9 @@ EXPORT_SYMBOL_GPL(perf_event_read_value);
  static int __perf_read_group_add(struct perf_event *leader,
                                         u64 read_format, u64 *values)
  {
+       struct perf_event_context *ctx = leader->ctx;
         struct perf_event *sub;
+       unsigned long flags;
         int n = 1; /* skip @nr */
         int ret;
  
@@ -4402,12 +4404,15 @@ static int __perf_read_group_add(struct perf_event *leader,
         if (read_format & PERF_FORMAT_ID)
                 values[n++] = primary_event_id(leader);
  
+       raw_spin_lock_irqsave(&ctx->lock, flags);
+
         list_for_each_entry(sub, &leader->sibling_list, group_entry) {
                 values[n++] += perf_event_count(sub);
                 if (read_format & PERF_FORMAT_ID)
                         values[n++] = primary_event_id(sub);
         }
  
+       raw_spin_unlock_irqrestore(&ctx->lock, flags);
         return 0;
  }
  
@@ -7315,21 +7320,6 @@ int perf_event_account_interrupt(struct perf_event *event)
         return __perf_event_account_interrupt(event, 1);
  }
  
-static bool sample_is_allowed(struct perf_event *event, struct pt_regs *regs)
-{
-       /*
-        * Due to interrupt latency (AKA "skid"), we may enter the
-        * kernel before taking an overflow, even if the PMU is only
-        * counting user events.
-        * To avoid leaking information to userspace, we must always
-        * reject kernel samples when exclude_kernel is set.
-        */
-       if (event->attr.exclude_kernel && !user_mode(regs))
-               return false;
-
-       return true;
-}
-
  /*
   * Generic event overflow handling, sampling.
   */
@@ -7350,12 +7340,6 @@ static int __perf_event_overflow(struct perf_event *event,
  
         ret = __perf_event_account_interrupt(event, throttle);
  
-       /*
-        * For security, drop the skid kernel samples if necessary.
-        */
-       if (!sample_is_allowed(event, regs))
-               return ret;
-
         /*
          * XXX event_limit might not quite work as expected on inherited
          * events
@@ -7723,7 +7707,8 @@ static int swevent_hlist_get_cpu(int cpu)
         int err = 0;
  
         mutex_lock(&swhash->hlist_mutex);
-       if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
+       if (!swevent_hlist_deref(swhash) &&
+           cpumask_test_cpu(cpu, perf_online_mask)) {
                 struct swevent_hlist *hlist;
  
                 hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
@@ -7744,7 +7729,7 @@ static int swevent_hlist_get(void)
  {
         int err, cpu, failed_cpu;
  
-       get_online_cpus();
+       mutex_lock(&pmus_lock);
         for_each_possible_cpu(cpu) {
                 err = swevent_hlist_get_cpu(cpu);
                 if (err) {
@@ -7752,8 +7737,7 @@ static int swevent_hlist_get(void)
                         goto fail;
                 }
         }
-       put_online_cpus();
-
+       mutex_unlock(&pmus_lock);
         return 0;
  fail:
         for_each_possible_cpu(cpu) {
@@ -7761,8 +7745,7 @@ fail:
                         break;
                 swevent_hlist_put_cpu(cpu);
         }
-
-       put_online_cpus();
+       mutex_unlock(&pmus_lock);
         return err;
  }
  
@@ -8940,7 +8923,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
         pmu->hrtimer_interval_ms = timer;
  
         /* update all cpuctx for this PMU */
-       get_online_cpus();
+       cpus_read_lock();
         for_each_online_cpu(cpu) {
                 struct perf_cpu_context *cpuctx;
                 cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
@@ -8949,7 +8932,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
                 cpu_function_call(cpu,
                         (remote_function_f)perf_mux_hrtimer_restart, cpuctx);
         }
-       put_online_cpus();
+       cpus_read_unlock();
         mutex_unlock(&mux_interval_mutex);
  
         return count;
@@ -9079,6 +9062,7 @@ skip_type:
                 lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
                 lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
                 cpuctx->ctx.pmu = pmu;
+               cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
  
                 __perf_mux_hrtimer_init(cpuctx, cpu);
         }
@@ -9903,12 +9887,10 @@ SYSCALL_DEFINE5(perf_event_open,
                 goto err_task;
         }
  
-       get_online_cpus();
-
         if (task) {
                 err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
                 if (err)
-                       goto err_cpus;
+                       goto err_task;
  
                 /*
                  * Reuse ptrace permission checks for now.
@@ -10094,6 +10076,23 @@ SYSCALL_DEFINE5(perf_event_open,
                 goto err_locked;
         }
  
+       if (!task) {
+               /*
+                * Check if the @cpu we're creating an event for is online.
+                *
+                * We use the perf_cpu_context::ctx::mutex to serialize against
+                * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+                */
+               struct perf_cpu_context *cpuctx =
+                       container_of(ctx, struct perf_cpu_context, ctx);
+
+               if (!cpuctx->online) {
+                       err = -ENODEV;
+                       goto err_locked;
+               }
+       }
+
+
         /*
          * Must be under the same ctx::mutex as perf_install_in_context(),
          * because we need to serialize with concurrent event creation.
@@ -10183,8 +10182,6 @@ SYSCALL_DEFINE5(perf_event_open,
                 put_task_struct(task);
         }
  
-       put_online_cpus();
-
         mutex_lock(&current->perf_event_mutex);
         list_add_tail(&event->owner_entry, &current->perf_event_list);
         mutex_unlock(&current->perf_event_mutex);
@@ -10218,8 +10215,6 @@ err_alloc:
  err_cred:
         if (task)
                 mutex_unlock(&task->signal->cred_guard_mutex);
-err_cpus:
-       put_online_cpus();
  err_task:
         if (task)
                 put_task_struct(task);
@@ -10274,6 +10269,21 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
                 goto err_unlock;
         }
  
+       if (!task) {
+               /*
+                * Check if the @cpu we're creating an event for is online.
+                *
+                * We use the perf_cpu_context::ctx::mutex to serialize against
+                * the hotplug notifiers. See perf_event_{init,exit}_cpu().
+                */
+               struct perf_cpu_context *cpuctx =
+                       container_of(ctx, struct perf_cpu_context, ctx);
+               if (!cpuctx->online) {
+                       err = -ENODEV;
+                       goto err_unlock;
+               }
+       }
+
         if (!exclusive_event_installable(event, ctx)) {
                 err = -EBUSY;
                 goto err_unlock;
@@ -10941,6 +10951,8 @@ static void __init perf_event_init_all_cpus(void)
         struct swevent_htable *swhash;
         int cpu;
  
+       zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
+
         for_each_possible_cpu(cpu) {
                 swhash = &per_cpu(swevent_htable, cpu);
                 mutex_init(&swhash->hlist_mutex);
@@ -10956,7 +10968,7 @@ static void __init perf_event_init_all_cpus(void)
         }
  }
  
-int perf_event_init_cpu(unsigned int cpu)
+void perf_swevent_init_cpu(unsigned int cpu)
  {
         struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
  
@@ -10969,7 +10981,6 @@ int perf_event_init_cpu(unsigned int cpu)
                 rcu_assign_pointer(swhash->swevent_hlist, hlist);
         }
         mutex_unlock(&swhash->hlist_mutex);
-       return 0;
  }
  
  #if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
@@ -10987,19 +10998,22 @@ static void __perf_event_exit_context(void *__info)
  
  static void perf_event_exit_cpu_context(int cpu)
  {
+       struct perf_cpu_context *cpuctx;
         struct perf_event_context *ctx;
         struct pmu *pmu;
-       int idx;
  
-       idx = srcu_read_lock(&pmus_srcu);
-       list_for_each_entry_rcu(pmu, &pmus, entry) {
-               ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
+       mutex_lock(&pmus_lock);
+       list_for_each_entry(pmu, &pmus, entry) {
+               cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+               ctx = &cpuctx->ctx;
  
                 mutex_lock(&ctx->mutex);
                 smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
+               cpuctx->online = 0;
                 mutex_unlock(&ctx->mutex);
         }
-       srcu_read_unlock(&pmus_srcu, idx);
+       cpumask_clear_cpu(cpu, perf_online_mask);
+       mutex_unlock(&pmus_lock);
  }
  #else
  
@@ -11007,6 +11021,29 @@ static void perf_event_exit_cpu_context(int cpu) { }
  
  #endif
  
+int perf_event_init_cpu(unsigned int cpu)
+{
+       struct perf_cpu_context *cpuctx;
+       struct perf_event_context *ctx;
+       struct pmu *pmu;
+
+       perf_swevent_init_cpu(cpu);
+
+       mutex_lock(&pmus_lock);
+       cpumask_set_cpu(cpu, perf_online_mask);
+       list_for_each_entry(pmu, &pmus, entry) {
+               cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+               ctx = &cpuctx->ctx;
+
+               mutex_lock(&ctx->mutex);
+               cpuctx->online = 1;
+               mutex_unlock(&ctx->mutex);
+       }
+       mutex_unlock(&pmus_lock);
+
+       return 0;
+}
+
  int perf_event_exit_cpu(unsigned int cpu)
  {
         perf_event_exit_cpu_context(cpu);