]> git.karo-electronics.de Git - karo-tx-linux.git/blobdiff - kernel/perf_event.c
perf: Separate find_get_context() from event initialization
[karo-tx-linux.git] / kernel / perf_event.c
index 1a6cdbf0d0911257fb4d2817b25a00122b96ea25..a3c86a8335c487b555bc91f34302ed7bad49af1b 100644 (file)
  */
 static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
 
-int perf_max_events __read_mostly = 1;
-static int perf_reserved_percpu __read_mostly;
-static int perf_overcommit __read_mostly = 1;
-
 static atomic_t nr_events __read_mostly;
 static atomic_t nr_mmap_events __read_mostly;
 static atomic_t nr_comm_events __read_mostly;
@@ -66,11 +62,6 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
 
 static atomic64_t perf_event_id;
 
-/*
- * Lock for (sysadmin-configurable) event reservations:
- */
-static DEFINE_SPINLOCK(perf_resource_lock);
-
 void __weak perf_event_print_debug(void)       { }
 
 void perf_pmu_disable(struct pmu *pmu)
@@ -424,7 +415,7 @@ event_sched_out(struct perf_event *event,
                event->state = PERF_EVENT_STATE_OFF;
        }
        event->tstamp_stopped = ctx->time;
-       event->pmu->disable(event);
+       event->pmu->del(event, 0);
        event->oncpu = -1;
 
        if (!is_software_event(event))
@@ -480,16 +471,6 @@ static void __perf_event_remove_from_context(void *info)
 
        list_del_event(event, ctx);
 
-       if (!ctx->task) {
-               /*
-                * Allow more per task events with respect to the
-                * reservation:
-                */
-               cpuctx->max_pertask =
-                       min(perf_max_events - ctx->nr_events,
-                           perf_max_events - perf_reserved_percpu);
-       }
-
        raw_spin_unlock(&ctx->lock);
 }
 
@@ -649,7 +630,7 @@ event_sched_in(struct perf_event *event,
         */
        smp_wmb();
 
-       if (event->pmu->enable(event)) {
+       if (event->pmu->add(event, PERF_EF_START)) {
                event->state = PERF_EVENT_STATE_INACTIVE;
                event->oncpu = -1;
                return -EAGAIN;
@@ -823,9 +804,6 @@ static void __perf_install_in_context(void *info)
                }
        }
 
-       if (!err && !ctx->task && cpuctx->max_pertask)
-               cpuctx->max_pertask--;
-
 unlock:
        raw_spin_unlock(&ctx->lock);
 }
@@ -849,6 +827,8 @@ perf_install_in_context(struct perf_event_context *ctx,
 {
        struct task_struct *task = ctx->task;
 
+       event->ctx = ctx;
+
        if (!task) {
                /*
                 * Per cpu events are installed via an smp call and
@@ -1482,22 +1462,6 @@ do {                                     \
        return div64_u64(dividend, divisor);
 }
 
-static void perf_event_stop(struct perf_event *event)
-{
-       if (!event->pmu->stop)
-               return event->pmu->disable(event);
-
-       return event->pmu->stop(event);
-}
-
-static int perf_event_start(struct perf_event *event)
-{
-       if (!event->pmu->start)
-               return event->pmu->enable(event);
-
-       return event->pmu->start(event);
-}
-
 static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
 {
        struct hw_perf_event *hwc = &event->hw;
@@ -1517,9 +1481,9 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
        hwc->sample_period = sample_period;
 
        if (local64_read(&hwc->period_left) > 8*sample_period) {
-               perf_event_stop(event);
+               event->pmu->stop(event, PERF_EF_UPDATE);
                local64_set(&hwc->period_left, 0);
-               perf_event_start(event);
+               event->pmu->start(event, PERF_EF_RELOAD);
        }
 }
 
@@ -1548,7 +1512,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
                 */
                if (interrupts == MAX_INTERRUPTS) {
                        perf_log_throttle(event, 1);
-                       event->pmu->unthrottle(event);
+                       event->pmu->start(event, 0);
                }
 
                if (!event->attr.freq || !event->attr.sample_freq)
@@ -2506,6 +2470,9 @@ int perf_event_task_disable(void)
 
 static int perf_event_index(struct perf_event *event)
 {
+       if (event->hw.state & PERF_HES_STOPPED)
+               return 0;
+
        if (event->state != PERF_EVENT_STATE_ACTIVE)
                return 0;
 
@@ -4120,8 +4087,6 @@ static int __perf_event_overflow(struct perf_event *event, int nmi,
        struct hw_perf_event *hwc = &event->hw;
        int ret = 0;
 
-       throttle = (throttle && event->pmu->unthrottle != NULL);
-
        if (!throttle) {
                hwc->interrupts++;
        } else {
@@ -4246,7 +4211,7 @@ static void perf_swevent_overflow(struct perf_event *event, u64 overflow,
        }
 }
 
-static void perf_swevent_add(struct perf_event *event, u64 nr,
+static void perf_swevent_event(struct perf_event *event, u64 nr,
                               int nmi, struct perf_sample_data *data,
                               struct pt_regs *regs)
 {
@@ -4272,6 +4237,9 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 static int perf_exclude_event(struct perf_event *event,
                              struct pt_regs *regs)
 {
+       if (event->hw.state & PERF_HES_STOPPED)
+               return 0;
+
        if (regs) {
                if (event->attr.exclude_user && user_mode(regs))
                        return 1;
@@ -4371,7 +4339,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 
        hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
                if (perf_swevent_match(event, type, event_id, data, regs))
-                       perf_swevent_add(event, nr, nmi, data, regs);
+                       perf_swevent_event(event, nr, nmi, data, regs);
        }
 end:
        rcu_read_unlock();
@@ -4415,7 +4383,7 @@ static void perf_swevent_read(struct perf_event *event)
 {
 }
 
-static int perf_swevent_enable(struct perf_event *event)
+static int perf_swevent_add(struct perf_event *event, int flags)
 {
        struct hw_perf_event *hwc = &event->hw;
        struct perf_cpu_context *cpuctx;
@@ -4428,6 +4396,8 @@ static int perf_swevent_enable(struct perf_event *event)
                perf_swevent_set_period(event);
        }
 
+       hwc->state = !(flags & PERF_EF_START);
+
        head = find_swevent_head(cpuctx, event);
        if (WARN_ON_ONCE(!head))
                return -EINVAL;
@@ -4437,18 +4407,19 @@ static int perf_swevent_enable(struct perf_event *event)
        return 0;
 }
 
-static void perf_swevent_disable(struct perf_event *event)
+static void perf_swevent_del(struct perf_event *event, int flags)
 {
        hlist_del_rcu(&event->hlist_entry);
 }
 
-static void perf_swevent_void(struct perf_event *event)
+static void perf_swevent_start(struct perf_event *event, int flags)
 {
+       event->hw.state = 0;
 }
 
-static int perf_swevent_int(struct perf_event *event)
+static void perf_swevent_stop(struct perf_event *event, int flags)
 {
-       return 0;
+       event->hw.state = PERF_HES_STOPPED;
 }
 
 /* Deref the hlist from the update side */
@@ -4604,12 +4575,11 @@ static int perf_swevent_init(struct perf_event *event)
 
 static struct pmu perf_swevent = {
        .event_init     = perf_swevent_init,
-       .enable         = perf_swevent_enable,
-       .disable        = perf_swevent_disable,
-       .start          = perf_swevent_int,
-       .stop           = perf_swevent_void,
+       .add            = perf_swevent_add,
+       .del            = perf_swevent_del,
+       .start          = perf_swevent_start,
+       .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
-       .unthrottle     = perf_swevent_void, /* hwc->interrupts already reset */
 };
 
 #ifdef CONFIG_EVENT_TRACING
@@ -4657,7 +4627,7 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
 
        hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
                if (perf_tp_event_match(event, &data, regs))
-                       perf_swevent_add(event, count, 1, &data, regs);
+                       perf_swevent_event(event, count, 1, &data, regs);
        }
 
        perf_swevent_put_recursion_context(rctx);
@@ -4696,12 +4666,11 @@ static int perf_tp_event_init(struct perf_event *event)
 
 static struct pmu perf_tracepoint = {
        .event_init     = perf_tp_event_init,
-       .enable         = perf_trace_enable,
-       .disable        = perf_trace_disable,
-       .start          = perf_swevent_int,
-       .stop           = perf_swevent_void,
+       .add            = perf_trace_add,
+       .del            = perf_trace_del,
+       .start          = perf_swevent_start,
+       .stop           = perf_swevent_stop,
        .read           = perf_swevent_read,
-       .unthrottle     = perf_swevent_void,
 };
 
 static inline void perf_tp_register(void)
@@ -4757,8 +4726,8 @@ void perf_bp_event(struct perf_event *bp, void *data)
 
        perf_sample_data_init(&sample, bp->attr.bp_addr);
 
-       if (!perf_exclude_event(bp, regs))
-               perf_swevent_add(bp, 1, 1, &sample, regs);
+       if (!bp->hw.state && !perf_exclude_event(bp, regs))
+               perf_swevent_event(bp, 1, 1, &sample, regs);
 }
 #endif
 
@@ -4834,32 +4803,39 @@ static void perf_swevent_cancel_hrtimer(struct perf_event *event)
 
 static void cpu_clock_event_update(struct perf_event *event)
 {
-       int cpu = raw_smp_processor_id();
        s64 prev;
        u64 now;
 
-       now = cpu_clock(cpu);
+       now = local_clock();
        prev = local64_xchg(&event->hw.prev_count, now);
        local64_add(now - prev, &event->count);
 }
 
-static int cpu_clock_event_enable(struct perf_event *event)
+static void cpu_clock_event_start(struct perf_event *event, int flags)
 {
-       struct hw_perf_event *hwc = &event->hw;
-       int cpu = raw_smp_processor_id();
-
-       local64_set(&hwc->prev_count, cpu_clock(cpu));
+       local64_set(&event->hw.prev_count, local_clock());
        perf_swevent_start_hrtimer(event);
-
-       return 0;
 }
 
-static void cpu_clock_event_disable(struct perf_event *event)
+static void cpu_clock_event_stop(struct perf_event *event, int flags)
 {
        perf_swevent_cancel_hrtimer(event);
        cpu_clock_event_update(event);
 }
 
+static int cpu_clock_event_add(struct perf_event *event, int flags)
+{
+       if (flags & PERF_EF_START)
+               cpu_clock_event_start(event, flags);
+
+       return 0;
+}
+
+static void cpu_clock_event_del(struct perf_event *event, int flags)
+{
+       cpu_clock_event_stop(event, flags);
+}
+
 static void cpu_clock_event_read(struct perf_event *event)
 {
        cpu_clock_event_update(event);
@@ -4878,8 +4854,10 @@ static int cpu_clock_event_init(struct perf_event *event)
 
 static struct pmu perf_cpu_clock = {
        .event_init     = cpu_clock_event_init,
-       .enable         = cpu_clock_event_enable,
-       .disable        = cpu_clock_event_disable,
+       .add            = cpu_clock_event_add,
+       .del            = cpu_clock_event_del,
+       .start          = cpu_clock_event_start,
+       .stop           = cpu_clock_event_stop,
        .read           = cpu_clock_event_read,
 };
 
@@ -4897,25 +4875,29 @@ static void task_clock_event_update(struct perf_event *event, u64 now)
        local64_add(delta, &event->count);
 }
 
-static int task_clock_event_enable(struct perf_event *event)
+static void task_clock_event_start(struct perf_event *event, int flags)
 {
-       struct hw_perf_event *hwc = &event->hw;
-       u64 now;
-
-       now = event->ctx->time;
-
-       local64_set(&hwc->prev_count, now);
-
+       local64_set(&event->hw.prev_count, event->ctx->time);
        perf_swevent_start_hrtimer(event);
-
-       return 0;
 }
 
-static void task_clock_event_disable(struct perf_event *event)
+static void task_clock_event_stop(struct perf_event *event, int flags)
 {
        perf_swevent_cancel_hrtimer(event);
        task_clock_event_update(event, event->ctx->time);
+}
+
+static int task_clock_event_add(struct perf_event *event, int flags)
+{
+       if (flags & PERF_EF_START)
+               task_clock_event_start(event, flags);
 
+       return 0;
+}
+
+static void task_clock_event_del(struct perf_event *event, int flags)
+{
+       task_clock_event_stop(event, PERF_EF_UPDATE);
 }
 
 static void task_clock_event_read(struct perf_event *event)
@@ -4947,8 +4929,10 @@ static int task_clock_event_init(struct perf_event *event)
 
 static struct pmu perf_task_clock = {
        .event_init     = task_clock_event_init,
-       .enable         = task_clock_event_enable,
-       .disable        = task_clock_event_disable,
+       .add            = task_clock_event_add,
+       .del            = task_clock_event_del,
+       .start          = task_clock_event_start,
+       .stop           = task_clock_event_stop,
        .read           = task_clock_event_read,
 };
 
@@ -5056,20 +5040,17 @@ struct pmu *perf_init_event(struct perf_event *event)
  * Allocate and initialize a event structure
  */
 static struct perf_event *
-perf_event_alloc(struct perf_event_attr *attr,
-                  int cpu,
-                  struct perf_event_context *ctx,
+perf_event_alloc(struct perf_event_attr *attr, int cpu,
                   struct perf_event *group_leader,
                   struct perf_event *parent_event,
-                  perf_overflow_handler_t overflow_handler,
-                  gfp_t gfpflags)
+                  perf_overflow_handler_t overflow_handler)
 {
        struct pmu *pmu;
        struct perf_event *event;
        struct hw_perf_event *hwc;
        long err;
 
-       event = kzalloc(sizeof(*event), gfpflags);
+       event = kzalloc(sizeof(*event), GFP_KERNEL);
        if (!event)
                return ERR_PTR(-ENOMEM);
 
@@ -5094,7 +5075,6 @@ perf_event_alloc(struct perf_event_attr *attr,
        event->attr             = *attr;
        event->group_leader     = group_leader;
        event->pmu              = NULL;
-       event->ctx              = ctx;
        event->oncpu            = -1;
 
        event->parent           = parent_event;
@@ -5339,20 +5319,26 @@ SYSCALL_DEFINE5(perf_event_open,
        if (event_fd < 0)
                return event_fd;
 
+       event = perf_event_alloc(&attr, cpu, group_leader, NULL, NULL);
+       if (IS_ERR(event)) {
+               err = PTR_ERR(event);
+               goto err_fd;
+       }
+
        /*
         * Get the target context (task or percpu):
         */
        ctx = find_get_context(pid, cpu);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
-               goto err_fd;
+               goto err_alloc;
        }
 
        if (group_fd != -1) {
                group_leader = perf_fget_light(group_fd, &fput_needed);
                if (IS_ERR(group_leader)) {
                        err = PTR_ERR(group_leader);
-                       goto err_put_context;
+                       goto err_context;
                }
                group_file = group_leader->filp;
                if (flags & PERF_FLAG_FD_OUTPUT)
@@ -5372,37 +5358,30 @@ SYSCALL_DEFINE5(perf_event_open,
                 * becoming part of another group-sibling):
                 */
                if (group_leader->group_leader != group_leader)
-                       goto err_put_context;
+                       goto err_context;
                /*
                 * Do not allow to attach to a group in a different
                 * task or CPU context:
                 */
                if (group_leader->ctx != ctx)
-                       goto err_put_context;
+                       goto err_context;
                /*
                 * Only a group leader can be exclusive or pinned
                 */
                if (attr.exclusive || attr.pinned)
-                       goto err_put_context;
-       }
-
-       event = perf_event_alloc(&attr, cpu, ctx, group_leader,
-                                    NULL, NULL, GFP_KERNEL);
-       if (IS_ERR(event)) {
-               err = PTR_ERR(event);
-               goto err_put_context;
+                       goto err_context;
        }
 
        if (output_event) {
                err = perf_event_set_output(event, output_event);
                if (err)
-                       goto err_free_put_context;
+                       goto err_context;
        }
 
        event_file = anon_inode_getfile("[perf_event]", &perf_fops, event, O_RDWR);
        if (IS_ERR(event_file)) {
                err = PTR_ERR(event_file);
-               goto err_free_put_context;
+               goto err_context;
        }
 
        event->filp = event_file;
@@ -5428,11 +5407,11 @@ SYSCALL_DEFINE5(perf_event_open,
        fd_install(event_fd, event_file);
        return event_fd;
 
-err_free_put_context:
-       free_event(event);
-err_put_context:
+err_context:
        fput_light(group_file, fput_needed);
        put_ctx(ctx);
+err_alloc:
+       free_event(event);
 err_fd:
        put_unused_fd(event_fd);
        return err;
@@ -5450,25 +5429,24 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
                                 pid_t pid,
                                 perf_overflow_handler_t overflow_handler)
 {
-       struct perf_event *event;
        struct perf_event_context *ctx;
+       struct perf_event *event;
        int err;
 
        /*
         * Get the target context (task or percpu):
         */
 
+       event = perf_event_alloc(attr, cpu, NULL, NULL, overflow_handler);
+       if (IS_ERR(event)) {
+               err = PTR_ERR(event);
+               goto err;
+       }
+
        ctx = find_get_context(pid, cpu);
        if (IS_ERR(ctx)) {
                err = PTR_ERR(ctx);
-               goto err_exit;
-       }
-
-       event = perf_event_alloc(attr, cpu, ctx, NULL,
-                                NULL, overflow_handler, GFP_KERNEL);
-       if (IS_ERR(event)) {
-               err = PTR_ERR(event);
-               goto err_put_context;
+               goto err_free;
        }
 
        event->filp = NULL;
@@ -5486,9 +5464,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
 
        return event;
 
- err_put_context:
-       put_ctx(ctx);
- err_exit:
+err_free:
+       free_event(event);
+err:
        return ERR_PTR(err);
 }
 EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
@@ -5516,9 +5494,9 @@ inherit_event(struct perf_event *parent_event,
                parent_event = parent_event->parent;
 
        child_event = perf_event_alloc(&parent_event->attr,
-                                          parent_event->cpu, child_ctx,
+                                          parent_event->cpu,
                                           group_leader, parent_event,
-                                          NULL, GFP_KERNEL);
+                                          NULL);
        if (IS_ERR(child_event))
                return child_event;
        get_ctx(child_ctx);
@@ -5543,6 +5521,7 @@ inherit_event(struct perf_event *parent_event,
                local64_set(&hwc->period_left, sample_period);
        }
 
+       child_event->ctx = child_ctx;
        child_event->overflow_handler = parent_event->overflow_handler;
 
        /*
@@ -5926,10 +5905,6 @@ static void __cpuinit perf_event_init_cpu(int cpu)
 
        cpuctx = &per_cpu(perf_cpu_context, cpu);
 
-       spin_lock(&perf_resource_lock);
-       cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
-       spin_unlock(&perf_resource_lock);
-
        mutex_lock(&cpuctx->hlist_mutex);
        if (cpuctx->hlist_refcount > 0) {
                struct swevent_hlist *hlist;
@@ -6004,101 +5979,3 @@ void __init perf_event_init(void)
        perf_tp_register();
        perf_cpu_notifier(perf_cpu_notify);
 }
-
-static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,
-                                       struct sysdev_class_attribute *attr,
-                                       char *buf)
-{
-       return sprintf(buf, "%d\n", perf_reserved_percpu);
-}
-
-static ssize_t
-perf_set_reserve_percpu(struct sysdev_class *class,
-                       struct sysdev_class_attribute *attr,
-                       const char *buf,
-                       size_t count)
-{
-       struct perf_cpu_context *cpuctx;
-       unsigned long val;
-       int err, cpu, mpt;
-
-       err = strict_strtoul(buf, 10, &val);
-       if (err)
-               return err;
-       if (val > perf_max_events)
-               return -EINVAL;
-
-       spin_lock(&perf_resource_lock);
-       perf_reserved_percpu = val;
-       for_each_online_cpu(cpu) {
-               cpuctx = &per_cpu(perf_cpu_context, cpu);
-               raw_spin_lock_irq(&cpuctx->ctx.lock);
-               mpt = min(perf_max_events - cpuctx->ctx.nr_events,
-                         perf_max_events - perf_reserved_percpu);
-               cpuctx->max_pertask = mpt;
-               raw_spin_unlock_irq(&cpuctx->ctx.lock);
-       }
-       spin_unlock(&perf_resource_lock);
-
-       return count;
-}
-
-static ssize_t perf_show_overcommit(struct sysdev_class *class,
-                                   struct sysdev_class_attribute *attr,
-                                   char *buf)
-{
-       return sprintf(buf, "%d\n", perf_overcommit);
-}
-
-static ssize_t
-perf_set_overcommit(struct sysdev_class *class,
-                   struct sysdev_class_attribute *attr,
-                   const char *buf, size_t count)
-{
-       unsigned long val;
-       int err;
-
-       err = strict_strtoul(buf, 10, &val);
-       if (err)
-               return err;
-       if (val > 1)
-               return -EINVAL;
-
-       spin_lock(&perf_resource_lock);
-       perf_overcommit = val;
-       spin_unlock(&perf_resource_lock);
-
-       return count;
-}
-
-static SYSDEV_CLASS_ATTR(
-                               reserve_percpu,
-                               0644,
-                               perf_show_reserve_percpu,
-                               perf_set_reserve_percpu
-                       );
-
-static SYSDEV_CLASS_ATTR(
-                               overcommit,
-                               0644,
-                               perf_show_overcommit,
-                               perf_set_overcommit
-                       );
-
-static struct attribute *perfclass_attrs[] = {
-       &attr_reserve_percpu.attr,
-       &attr_overcommit.attr,
-       NULL
-};
-
-static struct attribute_group perfclass_attr_group = {
-       .attrs                  = perfclass_attrs,
-       .name                   = "perf_events",
-};
-
-static int __init perf_event_sysfs_init(void)
-{
-       return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
-                                 &perfclass_attr_group);
-}
-device_initcall(perf_event_sysfs_init);