From: Peter Zijlstra Date: Fri, 11 Jun 2010 11:35:08 +0000 (+0200) Subject: perf: Register PMU implementations X-Git-Url: https://git.karo-electronics.de/?a=commitdiff_plain;h=b0a873ebbf87bf38;p=linux-beck.git perf: Register PMU implementations Simple registration interface for struct pmu, this provides the infrastructure for removing all the weak functions. Signed-off-by: Peter Zijlstra Cc: paulus Cc: stephane eranian Cc: Robert Richter Cc: Will Deacon Cc: Paul Mundt Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Lin Ming Cc: Yanmin Cc: Deng-Cheng Zhu Cc: David Miller Cc: Michael Cree LKML-Reference: Signed-off-by: Ingo Molnar --- diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 56fa41590381..19660b5c298f 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -642,34 +642,39 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static struct pmu pmu = { - .enable = alpha_pmu_enable, - .disable = alpha_pmu_disable, - .read = alpha_pmu_read, - .unthrottle = alpha_pmu_unthrottle, -}; - - /* * Main entry point to initialise a HW performance event. */ -struct pmu *hw_perf_event_init(struct perf_event *event) +static int alpha_pmu_event_init(struct perf_event *event) { int err; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!alpha_pmu) - return ERR_PTR(-ENODEV); + return -ENODEV; /* Do the real initialisation work. */ err = __hw_perf_event_init(event); - if (err) - return ERR_PTR(err); - - return &pmu; + return err; } - +static struct pmu pmu = { + .event_init = alpha_pmu_event_init, + .enable = alpha_pmu_enable, + .disable = alpha_pmu_disable, + .read = alpha_pmu_read, + .unthrottle = alpha_pmu_unthrottle, +}; /* * Main entry point - enable HW performance counters. @@ -838,5 +843,7 @@ void __init init_hw_perf_events(void) /* And set up PMU specification */ alpha_pmu = &ev67_pmu; perf_max_events = alpha_pmu->num_pmcs; + + perf_pmu_register(&pmu); } diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 0671e92c5111..f62f9db35db3 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -306,12 +306,7 @@ out: return err; } -static struct pmu pmu = { - .enable = armpmu_enable, - .disable = armpmu_disable, - .unthrottle = armpmu_unthrottle, - .read = armpmu_read, -}; +static struct pmu pmu; static int validate_event(struct cpu_hw_events *cpuc, @@ -491,20 +486,29 @@ __hw_perf_event_init(struct perf_event *event) return err; } -struct pmu * -hw_perf_event_init(struct perf_event *event) +static int armpmu_event_init(struct perf_event *event) { int err = 0; + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + if (!armpmu) - return ERR_PTR(-ENODEV); + return -ENODEV; event->destroy = hw_perf_event_destroy; if (!atomic_inc_not_zero(&active_events)) { if (atomic_read(&active_events) > perf_max_events) { atomic_dec(&active_events); - return ERR_PTR(-ENOSPC); + return -ENOSPC; } mutex_lock(&pmu_reserve_mutex); @@ -518,15 +522,23 @@ hw_perf_event_init(struct perf_event *event) } if (err) - return ERR_PTR(err); + return err; err = __hw_perf_event_init(event); if (err) hw_perf_event_destroy(event); - return err ? ERR_PTR(err) : &pmu; + return err; } +static struct pmu pmu = { + .event_init = armpmu_event_init, + .enable = armpmu_enable, + .disable = armpmu_disable, + .unthrottle = armpmu_unthrottle, + .read = armpmu_read, +}; + void hw_perf_enable(void) { @@ -2994,6 +3006,8 @@ init_hw_perf_events(void) perf_max_events = -1; } + perf_pmu_register(&pmu); + return 0; } arch_initcall(init_hw_perf_events); diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c index 5f78681ad902..19131b2614b9 100644 --- a/arch/powerpc/kernel/perf_event.c +++ b/arch/powerpc/kernel/perf_event.c @@ -904,16 +904,6 @@ int power_pmu_commit_txn(struct pmu *pmu) return 0; } -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, - .start_txn = power_pmu_start_txn, - .cancel_txn = power_pmu_cancel_txn, - .commit_txn = power_pmu_commit_txn, -}; - /* * Return 1 if we might be able to put event on a limited PMC, * or 0 if not. @@ -1014,7 +1004,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -struct pmu *hw_perf_event_init(struct perf_event *event) +static int power_pmu_event_init(struct perf_event *event) { u64 ev; unsigned long flags; @@ -1026,25 +1016,27 @@ struct pmu *hw_perf_event_init(struct perf_event *event) struct cpu_hw_events *cpuhw; if (!ppmu) - return ERR_PTR(-ENXIO); + return -ENOENT; + switch (event->attr.type) { case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: ev = event->attr.config; break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } + event->hw.config_base = ev; event->hw.idx = 0; @@ -1081,7 +1073,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) */ ev = normal_pmc_alternative(ev, flags); if (!ev) - return ERR_PTR(-EINVAL); + return -EINVAL; } } @@ -1095,19 +1087,19 @@ struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, ctrs, events, cflags); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } events[n] = ev; ctrs[n] = event; cflags[n] = flags; if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); + return -EINVAL; cpuhw = &get_cpu_var(cpu_hw_events); err = power_check_constraints(cpuhw, events, cflags, n + 1); put_cpu_var(cpu_hw_events); if (err) - return ERR_PTR(-EINVAL); + return -EINVAL; event->hw.config = events[n]; event->hw.event_base = cflags[n]; @@ -1132,11 +1124,20 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &power_pmu; + return err; } +struct pmu power_pmu = { + .event_init = power_pmu_event_init, + .enable = power_pmu_enable, + .disable = power_pmu_disable, + .read = power_pmu_read, + .unthrottle = power_pmu_unthrottle, + .start_txn = power_pmu_start_txn, + .cancel_txn = power_pmu_cancel_txn, + .commit_txn = power_pmu_commit_txn, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -1342,6 +1343,7 @@ int register_power_pmu(struct power_pmu *pmu) freeze_events_kernel = MMCR0_FCHV; #endif /* CONFIG_PPC64 */ + perf_pmu_register(&power_pmu); perf_cpu_notifier(power_pmu_notifier); return 0; diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c index d7619b5e7a6e..ea6a804e43fd 100644 --- a/arch/powerpc/kernel/perf_event_fsl_emb.c +++ b/arch/powerpc/kernel/perf_event_fsl_emb.c @@ -378,13 +378,6 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event) local_irq_restore(flags); } -static struct pmu fsl_emb_pmu = { - .enable = fsl_emb_pmu_enable, - .disable = fsl_emb_pmu_disable, - .read = fsl_emb_pmu_read, - .unthrottle = fsl_emb_pmu_unthrottle, -}; - /* * Release the PMU if this is the last perf_event. */ @@ -428,7 +421,7 @@ static int hw_perf_cache_event(u64 config, u64 *eventp) return 0; } -struct pmu *hw_perf_event_init(struct perf_event *event) +static int fsl_emb_pmu_event_init(struct perf_event *event) { u64 ev; struct perf_event *events[MAX_HWEVENTS]; @@ -441,14 +434,14 @@ struct pmu *hw_perf_event_init(struct perf_event *event) case PERF_TYPE_HARDWARE: ev = event->attr.config; if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); + return -EOPNOTSUPP; ev = ppmu->generic_events[ev]; break; case PERF_TYPE_HW_CACHE: err = hw_perf_cache_event(event->attr.config, &ev); if (err) - return ERR_PTR(err); + return err; break; case PERF_TYPE_RAW: @@ -456,12 +449,12 @@ struct pmu *hw_perf_event_init(struct perf_event *event) break; default: - return ERR_PTR(-EINVAL); + return -ENOENT; } event->hw.config = ppmu->xlate_event(ev); if (!(event->hw.config & FSL_EMB_EVENT_VALID)) - return ERR_PTR(-EINVAL); + return -EINVAL; /* * If this is in a group, check if it can go on with all the @@ -473,7 +466,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) n = collect_events(event->group_leader, ppmu->n_counter - 1, events); if (n < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; } if (event->hw.config & FSL_EMB_EVENT_RESTRICTED) { @@ -484,7 +477,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } if (num_restricted >= ppmu->n_restricted) - return ERR_PTR(-EINVAL); + return -EINVAL; } event->hw.idx = -1; @@ -497,7 +490,7 @@ struct pmu *hw_perf_event_init(struct perf_event *event) if (event->attr.exclude_kernel) event->hw.config_base |= PMLCA_FCS; if (event->attr.exclude_idle) - return ERR_PTR(-ENOTSUPP); + return -ENOTSUPP; event->hw.last_period = event->hw.sample_period; local64_set(&event->hw.period_left, event->hw.last_period); @@ -523,11 +516,17 @@ struct pmu *hw_perf_event_init(struct perf_event *event) } event->destroy = hw_perf_event_destroy; - if (err) - return ERR_PTR(err); - return &fsl_emb_pmu; + return err; } +static struct pmu fsl_emb_pmu = { + .event_init = fsl_emb_pmu_event_init, + .enable = fsl_emb_pmu_enable, + .disable = fsl_emb_pmu_disable, + .read = fsl_emb_pmu_read, + .unthrottle = fsl_emb_pmu_unthrottle, +}; + /* * A counter has overflowed; update its count and record * things if requested. Note that interrupts are hard-disabled @@ -651,5 +650,7 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pr_info("%s performance monitor hardware support registered\n", pmu->name); + perf_pmu_register(&fsl_emb_pmu); + return 0; } diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c index 395572c94c6a..8cb206597e0c 100644 --- a/arch/sh/kernel/perf_event.c +++ b/arch/sh/kernel/perf_event.c @@ -257,26 +257,38 @@ static void sh_pmu_read(struct perf_event *event) sh_perf_event_update(event, &event->hw, event->hw.idx); } -static struct pmu pmu = { - .enable = sh_pmu_enable, - .disable = sh_pmu_disable, - .read = sh_pmu_read, -}; - -struct pmu *hw_perf_event_init(struct perf_event *event) +static int sh_pmu_event_init(struct perf_event *event) { - int err = __hw_perf_event_init(event); + int err; + + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HW_CACHE: + case PERF_TYPE_HARDWARE: + err = __hw_perf_event_init(event); + break; + + default: + return -ENOENT; + } + if (unlikely(err)) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } +static struct pmu pmu = { + .event_init = sh_pmu_event_init, + .enable = sh_pmu_enable, + .disable = sh_pmu_disable, + .read = sh_pmu_read, +}; + static void sh_pmu_setup(int cpu) -{ + struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); memset(cpuhw, 0, sizeof(struct cpu_hw_events)); @@ -325,6 +337,7 @@ int __cpuinit register_sh_pmu(struct sh_pmu *pmu) WARN_ON(pmu->num_events > MAX_HWEVENTS); + perf_pmu_register(&pmu); perf_cpu_notifier(sh_pmu_notifier); return 0; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 481b894a5018..bed4327f5a7a 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1025,7 +1025,7 @@ out: return ret; } -static int __hw_perf_event_init(struct perf_event *event) +static int sparc_pmu_event_init(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; struct perf_event *evts[MAX_HWEVENTS]; @@ -1038,17 +1038,27 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&nmi_active) < 0) return -ENODEV; - if (attr->type == PERF_TYPE_HARDWARE) { + switch (attr->type) { + case PERF_TYPE_HARDWARE: if (attr->config >= sparc_pmu->max_events) return -EINVAL; pmap = sparc_pmu->event_map(attr->config); - } else if (attr->type == PERF_TYPE_HW_CACHE) { + break; + + case PERF_TYPE_HW_CACHE: pmap = sparc_map_cache_event(attr->config); if (IS_ERR(pmap)) return PTR_ERR(pmap); - } else + break; + + case PERF_TYPE_RAW: return -EOPNOTSUPP; + default: + return -ENOENT; + + } + /* We save the enable bits in the config_base. */ hwc->config_base = sparc_pmu->irq_bit; if (!attr->exclude_user) @@ -1143,6 +1153,7 @@ static int sparc_pmu_commit_txn(struct pmu *pmu) } static struct pmu pmu = { + .event_init = sparc_pmu_event_init, .enable = sparc_pmu_enable, .disable = sparc_pmu_disable, .read = sparc_pmu_read, @@ -1152,15 +1163,6 @@ static struct pmu pmu = { .commit_txn = sparc_pmu_commit_txn, }; -struct pmu *hw_perf_event_init(struct perf_event *event) -{ - int err = __hw_perf_event_init(event); - - if (err) - return ERR_PTR(err); - return &pmu; -} - void perf_event_print_debug(void) { unsigned long flags; @@ -1280,6 +1282,7 @@ void __init init_hw_perf_events(void) /* All sparc64 PMUs currently have 2 events. */ perf_max_events = 2; + perf_pmu_register(&pmu); register_die_notifier(&perf_event_nmi_notifier); } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fdd97f2e9961..2c89264ee791 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -530,7 +530,7 @@ static int x86_pmu_hw_config(struct perf_event *event) /* * Setup the hardware configuration for a given attr_type */ -static int __hw_perf_event_init(struct perf_event *event) +static int __x86_pmu_event_init(struct perf_event *event) { int err; @@ -1414,6 +1414,7 @@ void __init init_hw_perf_events(void) pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); + perf_pmu_register(&pmu); perf_cpu_notifier(x86_pmu_notifier); } @@ -1483,18 +1484,6 @@ static int x86_pmu_commit_txn(struct pmu *pmu) return 0; } -static struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .start = x86_pmu_start, - .stop = x86_pmu_stop, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, - .start_txn = x86_pmu_start_txn, - .cancel_txn = x86_pmu_cancel_txn, - .commit_txn = x86_pmu_commit_txn, -}; - /* * validate that we can schedule this event */ @@ -1569,12 +1558,22 @@ out: return ret; } -struct pmu *hw_perf_event_init(struct perf_event *event) +int x86_pmu_event_init(struct perf_event *event) { struct pmu *tmp; int err; - err = __hw_perf_event_init(event); + switch (event->attr.type) { + case PERF_TYPE_RAW: + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + break; + + default: + return -ENOENT; + } + + err = __x86_pmu_event_init(event); if (!err) { /* * we temporarily connect event to its pmu @@ -1594,12 +1593,24 @@ struct pmu *hw_perf_event_init(struct perf_event *event) if (err) { if (event->destroy) event->destroy(event); - return ERR_PTR(err); } - return &pmu; + return err; } +static struct pmu pmu = { + .event_init = x86_pmu_event_init, + .enable = x86_pmu_enable, + .disable = x86_pmu_disable, + .start = x86_pmu_start, + .stop = x86_pmu_stop, + .read = x86_pmu_read, + .unthrottle = x86_pmu_unthrottle, + .start_txn = x86_pmu_start_txn, + .cancel_txn = x86_pmu_cancel_txn, + .commit_txn = x86_pmu_commit_txn, +}; + /* * callchain support */ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 09d048b52115..ab72f56eb372 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -561,6 +561,13 @@ struct perf_event; * struct pmu - generic performance monitoring unit */ struct pmu { + struct list_head entry; + + /* + * Should return -ENOENT when the @event doesn't match this pmu + */ + int (*event_init) (struct perf_event *event); + int (*enable) (struct perf_event *event); void (*disable) (struct perf_event *event); int (*start) (struct perf_event *event); @@ -849,7 +856,8 @@ struct perf_output_handle { */ extern int perf_max_events; -extern struct pmu *hw_perf_event_init(struct perf_event *event); +extern int perf_pmu_register(struct pmu *pmu); +extern void perf_pmu_unregister(struct pmu *pmu); extern void perf_event_task_sched_in(struct task_struct *task); extern void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next); diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index d71a987fd2bf..e9c5cfa1fd20 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -565,6 +565,34 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { .priority = 0x7fffffff }; +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static int hw_breakpoint_event_init(struct perf_event *bp) +{ + int err; + + if (bp->attr.type != PERF_TYPE_BREAKPOINT) + return -ENOENT; + + err = register_perf_hw_breakpoint(bp); + if (err) + return err; + + bp->destroy = bp_perf_event_destroy; + + return 0; +} + +static struct pmu perf_breakpoint = { + .event_init = hw_breakpoint_event_init, + .enable = arch_install_hw_breakpoint, + .disable = arch_uninstall_hw_breakpoint, + .read = hw_breakpoint_pmu_read, +}; + static int __init init_hw_breakpoint(void) { unsigned int **task_bp_pinned; @@ -586,6 +614,8 @@ static int __init init_hw_breakpoint(void) constraints_initialized = 1; + perf_pmu_register(&perf_breakpoint); + return register_die_notifier(&hw_breakpoint_exceptions_nb); err_alloc: @@ -601,8 +631,3 @@ static int __init init_hw_breakpoint(void) core_initcall(init_hw_breakpoint); -struct pmu perf_ops_bp = { - .enable = arch_install_hw_breakpoint, - .disable = arch_uninstall_hw_breakpoint, - .read = hw_breakpoint_pmu_read, -}; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fb46fd13f31f..288ce43de57c 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,7 +31,6 @@ #include #include #include -#include #include @@ -72,14 +71,6 @@ static atomic64_t perf_event_id; */ static DEFINE_SPINLOCK(perf_resource_lock); -/* - * Architecture provided APIs - weak aliases: - */ -extern __weak struct pmu *hw_perf_event_init(struct perf_event *event) -{ - return NULL; -} - void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } @@ -4501,182 +4492,6 @@ static int perf_swevent_int(struct perf_event *event) return 0; } -static struct pmu perf_ops_generic = { - .enable = perf_swevent_enable, - .disable = perf_swevent_disable, - .start = perf_swevent_int, - .stop = perf_swevent_void, - .read = perf_swevent_read, - .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ -}; - -/* - * hrtimer based swevent callback - */ - -static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) -{ - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct pt_regs *regs; - struct perf_event *event; - u64 period; - - event = container_of(hrtimer, struct perf_event, hw.hrtimer); - event->pmu->read(event); - - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; - regs = get_irq_regs(); - - if (regs && !perf_exclude_event(event, regs)) { - if (!(event->attr.exclude_idle && current->pid == 0)) - if (perf_event_overflow(event, 0, &data, regs)) - ret = HRTIMER_NORESTART; - } - - period = max_t(u64, 10000, event->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - - return ret; -} - -static void perf_swevent_start_hrtimer(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swevent_hrtimer; - if (hwc->sample_period) { - u64 period; - - if (hwc->remaining) { - if (hwc->remaining < 0) - period = 10000; - else - period = hwc->remaining; - hwc->remaining = 0; - } else { - period = max_t(u64, 10000, hwc->sample_period); - } - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } -} - -static void perf_swevent_cancel_hrtimer(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - - if (hwc->sample_period) { - ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); - hwc->remaining = ktime_to_ns(remaining); - - hrtimer_cancel(&hwc->hrtimer); - } -} - -/* - * Software event: cpu wall time clock - */ - -static void cpu_clock_perf_event_update(struct perf_event *event) -{ - int cpu = raw_smp_processor_id(); - s64 prev; - u64 now; - - now = cpu_clock(cpu); - prev = local64_xchg(&event->hw.prev_count, now); - local64_add(now - prev, &event->count); -} - -static int cpu_clock_perf_event_enable(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int cpu = raw_smp_processor_id(); - - local64_set(&hwc->prev_count, cpu_clock(cpu)); - perf_swevent_start_hrtimer(event); - - return 0; -} - -static void cpu_clock_perf_event_disable(struct perf_event *event) -{ - perf_swevent_cancel_hrtimer(event); - cpu_clock_perf_event_update(event); -} - -static void cpu_clock_perf_event_read(struct perf_event *event) -{ - cpu_clock_perf_event_update(event); -} - -static struct pmu perf_ops_cpu_clock = { - .enable = cpu_clock_perf_event_enable, - .disable = cpu_clock_perf_event_disable, - .read = cpu_clock_perf_event_read, -}; - -/* - * Software event: task time clock - */ - -static void task_clock_perf_event_update(struct perf_event *event, u64 now) -{ - u64 prev; - s64 delta; - - prev = local64_xchg(&event->hw.prev_count, now); - delta = now - prev; - local64_add(delta, &event->count); -} - -static int task_clock_perf_event_enable(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - u64 now; - - now = event->ctx->time; - - local64_set(&hwc->prev_count, now); - - perf_swevent_start_hrtimer(event); - - return 0; -} - -static void task_clock_perf_event_disable(struct perf_event *event) -{ - perf_swevent_cancel_hrtimer(event); - task_clock_perf_event_update(event, event->ctx->time); - -} - -static void task_clock_perf_event_read(struct perf_event *event) -{ - u64 time; - - if (!in_nmi()) { - update_context_time(event->ctx); - time = event->ctx->time; - } else { - u64 now = perf_clock(); - u64 delta = now - event->ctx->timestamp; - time = event->ctx->time + delta; - } - - task_clock_perf_event_update(event, time); -} - -static struct pmu perf_ops_task_clock = { - .enable = task_clock_perf_event_enable, - .disable = task_clock_perf_event_disable, - .read = task_clock_perf_event_read, -}; - /* Deref the hlist from the update side */ static inline struct swevent_hlist * swevent_hlist_deref(struct perf_cpu_context *cpuctx) @@ -4783,17 +4598,63 @@ static int swevent_hlist_get(struct perf_event *event) return err; } -#ifdef CONFIG_EVENT_TRACING +atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; -static struct pmu perf_ops_tracepoint = { - .enable = perf_trace_enable, - .disable = perf_trace_disable, +static void sw_perf_event_destroy(struct perf_event *event) +{ + u64 event_id = event->attr.config; + + WARN_ON(event->parent); + + atomic_dec(&perf_swevent_enabled[event_id]); + swevent_hlist_put(event); +} + +static int perf_swevent_init(struct perf_event *event) +{ + int event_id = event->attr.config; + + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + switch (event_id) { + case PERF_COUNT_SW_CPU_CLOCK: + case PERF_COUNT_SW_TASK_CLOCK: + return -ENOENT; + + default: + break; + } + + if (event_id > PERF_COUNT_SW_MAX) + return -ENOENT; + + if (!event->parent) { + int err; + + err = swevent_hlist_get(event); + if (err) + return err; + + atomic_inc(&perf_swevent_enabled[event_id]); + event->destroy = sw_perf_event_destroy; + } + + return 0; +} + +static struct pmu perf_swevent = { + .event_init = perf_swevent_init, + .enable = perf_swevent_enable, + .disable = perf_swevent_disable, .start = perf_swevent_int, .stop = perf_swevent_void, .read = perf_swevent_read, - .unthrottle = perf_swevent_void, + .unthrottle = perf_swevent_void, /* hwc->interrupts already reset */ }; +#ifdef CONFIG_EVENT_TRACING + static int perf_tp_filter_match(struct perf_event *event, struct perf_sample_data *data) { @@ -4849,10 +4710,13 @@ static void tp_perf_event_destroy(struct perf_event *event) perf_trace_destroy(event); } -static struct pmu *tp_perf_event_init(struct perf_event *event) +static int perf_tp_event_init(struct perf_event *event) { int err; + if (event->attr.type != PERF_TYPE_TRACEPOINT) + return -ENOENT; + /* * Raw tracepoint data is a severe data leak, only allow root to * have these. @@ -4860,15 +4724,30 @@ static struct pmu *tp_perf_event_init(struct perf_event *event) if ((event->attr.sample_type & PERF_SAMPLE_RAW) && perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EPERM); + return -EPERM; err = perf_trace_init(event); if (err) - return NULL; + return err; event->destroy = tp_perf_event_destroy; - return &perf_ops_tracepoint; + return 0; +} + +static struct pmu perf_tracepoint = { + .event_init = perf_tp_event_init, + .enable = perf_trace_enable, + .disable = perf_trace_disable, + .start = perf_swevent_int, + .stop = perf_swevent_void, + .read = perf_swevent_read, + .unthrottle = perf_swevent_void, +}; + +static inline void perf_tp_register(void) +{ + perf_pmu_register(&perf_tracepoint); } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4896,9 +4775,8 @@ static void perf_event_free_filter(struct perf_event *event) #else -static struct pmu *tp_perf_event_init(struct perf_event *event) +static inline void perf_tp_register(void) { - return NULL; } static int perf_event_set_filter(struct perf_event *event, void __user *arg) @@ -4913,105 +4791,247 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_TRACING */ #ifdef CONFIG_HAVE_HW_BREAKPOINT -static void bp_perf_event_destroy(struct perf_event *event) +void perf_bp_event(struct perf_event *bp, void *data) { - release_bp_slot(event); + struct perf_sample_data sample; + struct pt_regs *regs = data; + + perf_sample_data_init(&sample, bp->attr.bp_addr); + + if (!perf_exclude_event(bp, regs)) + perf_swevent_add(bp, 1, 1, &sample, regs); } +#endif + +/* + * hrtimer based swevent callback + */ -static struct pmu *bp_perf_event_init(struct perf_event *bp) +static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) { - int err; + enum hrtimer_restart ret = HRTIMER_RESTART; + struct perf_sample_data data; + struct pt_regs *regs; + struct perf_event *event; + u64 period; - err = register_perf_hw_breakpoint(bp); - if (err) - return ERR_PTR(err); + event = container_of(hrtimer, struct perf_event, hw.hrtimer); + event->pmu->read(event); + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + regs = get_irq_regs(); + + if (regs && !perf_exclude_event(event, regs)) { + if (!(event->attr.exclude_idle && current->pid == 0)) + if (perf_event_overflow(event, 0, &data, regs)) + ret = HRTIMER_NORESTART; + } - bp->destroy = bp_perf_event_destroy; + period = max_t(u64, 10000, event->hw.sample_period); + hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - return &perf_ops_bp; + return ret; } -void perf_bp_event(struct perf_event *bp, void *data) +static void perf_swevent_start_hrtimer(struct perf_event *event) { - struct perf_sample_data sample; - struct pt_regs *regs = data; + struct hw_perf_event *hwc = &event->hw; - perf_sample_data_init(&sample, bp->attr.bp_addr); + hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hwc->hrtimer.function = perf_swevent_hrtimer; + if (hwc->sample_period) { + u64 period; - if (!perf_exclude_event(bp, regs)) - perf_swevent_add(bp, 1, 1, &sample, regs); + if (hwc->remaining) { + if (hwc->remaining < 0) + period = 10000; + else + period = hwc->remaining; + hwc->remaining = 0; + } else { + period = max_t(u64, 10000, hwc->sample_period); + } + __hrtimer_start_range_ns(&hwc->hrtimer, + ns_to_ktime(period), 0, + HRTIMER_MODE_REL, 0); + } } -#else -static struct pmu *bp_perf_event_init(struct perf_event *bp) + +static void perf_swevent_cancel_hrtimer(struct perf_event *event) { - return NULL; + struct hw_perf_event *hwc = &event->hw; + + if (hwc->sample_period) { + ktime_t remaining = hrtimer_get_remaining(&hwc->hrtimer); + hwc->remaining = ktime_to_ns(remaining); + + hrtimer_cancel(&hwc->hrtimer); + } } -void perf_bp_event(struct perf_event *bp, void *regs) +/* + * Software event: cpu wall time clock + */ + +static void cpu_clock_event_update(struct perf_event *event) { + int cpu = raw_smp_processor_id(); + s64 prev; + u64 now; + + now = cpu_clock(cpu); + prev = local64_xchg(&event->hw.prev_count, now); + local64_add(now - prev, &event->count); } -#endif -atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; +static int cpu_clock_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int cpu = raw_smp_processor_id(); -static void sw_perf_event_destroy(struct perf_event *event) + local64_set(&hwc->prev_count, cpu_clock(cpu)); + perf_swevent_start_hrtimer(event); + + return 0; +} + +static void cpu_clock_event_disable(struct perf_event *event) { - u64 event_id = event->attr.config; + perf_swevent_cancel_hrtimer(event); + cpu_clock_event_update(event); +} - WARN_ON(event->parent); +static void cpu_clock_event_read(struct perf_event *event) +{ + cpu_clock_event_update(event); +} - atomic_dec(&perf_swevent_enabled[event_id]); - swevent_hlist_put(event); +static int cpu_clock_event_init(struct perf_event *event) +{ + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + if (event->attr.config != PERF_COUNT_SW_CPU_CLOCK) + return -ENOENT; + + return 0; } -static struct pmu *sw_perf_event_init(struct perf_event *event) +static struct pmu perf_cpu_clock = { + .event_init = cpu_clock_event_init, + .enable = cpu_clock_event_enable, + .disable = cpu_clock_event_disable, + .read = cpu_clock_event_read, +}; + +/* + * Software event: task time clock + */ + +static void task_clock_event_update(struct perf_event *event, u64 now) { - struct pmu *pmu = NULL; - u64 event_id = event->attr.config; + u64 prev; + s64 delta; - /* - * Software events (currently) can't in general distinguish - * between user, kernel and hypervisor events. - * However, context switches and cpu migrations are considered - * to be kernel events, and page faults are never hypervisor - * events. - */ - switch (event_id) { - case PERF_COUNT_SW_CPU_CLOCK: - pmu = &perf_ops_cpu_clock; + prev = local64_xchg(&event->hw.prev_count, now); + delta = now - prev; + local64_add(delta, &event->count); +} - break; - case PERF_COUNT_SW_TASK_CLOCK: - /* - * If the user instantiates this as a per-cpu event, - * use the cpu_clock event instead. - */ - if (event->ctx->task) - pmu = &perf_ops_task_clock; - else - pmu = &perf_ops_cpu_clock; +static int task_clock_event_enable(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 now; - break; - case PERF_COUNT_SW_PAGE_FAULTS: - case PERF_COUNT_SW_PAGE_FAULTS_MIN: - case PERF_COUNT_SW_PAGE_FAULTS_MAJ: - case PERF_COUNT_SW_CONTEXT_SWITCHES: - case PERF_COUNT_SW_CPU_MIGRATIONS: - case PERF_COUNT_SW_ALIGNMENT_FAULTS: - case PERF_COUNT_SW_EMULATION_FAULTS: - if (!event->parent) { - int err; - - err = swevent_hlist_get(event); - if (err) - return ERR_PTR(err); + now = event->ctx->time; - atomic_inc(&perf_swevent_enabled[event_id]); - event->destroy = sw_perf_event_destroy; + local64_set(&hwc->prev_count, now); + + perf_swevent_start_hrtimer(event); + + return 0; +} + +static void task_clock_event_disable(struct perf_event *event) +{ + perf_swevent_cancel_hrtimer(event); + task_clock_event_update(event, event->ctx->time); + +} + +static void task_clock_event_read(struct perf_event *event) +{ + u64 time; + + if (!in_nmi()) { + update_context_time(event->ctx); + time = event->ctx->time; + } else { + u64 now = perf_clock(); + u64 delta = now - event->ctx->timestamp; + time = event->ctx->time + delta; + } + + task_clock_event_update(event, time); +} + +static int task_clock_event_init(struct perf_event *event) +{ + if (event->attr.type != PERF_TYPE_SOFTWARE) + return -ENOENT; + + if (event->attr.config != PERF_COUNT_SW_TASK_CLOCK) + return -ENOENT; + + return 0; +} + +static struct pmu perf_task_clock = { + .event_init = task_clock_event_init, + .enable = task_clock_event_enable, + .disable = task_clock_event_disable, + .read = task_clock_event_read, +}; + +static LIST_HEAD(pmus); +static DEFINE_MUTEX(pmus_lock); +static struct srcu_struct pmus_srcu; + +int perf_pmu_register(struct pmu *pmu) +{ + mutex_lock(&pmus_lock); + list_add_rcu(&pmu->entry, &pmus); + mutex_unlock(&pmus_lock); + + return 0; +} + +void perf_pmu_unregister(struct pmu *pmu) +{ + mutex_lock(&pmus_lock); + list_del_rcu(&pmu->entry); + mutex_unlock(&pmus_lock); + + synchronize_srcu(&pmus_srcu); +} + +struct pmu *perf_init_event(struct perf_event *event) +{ + struct pmu *pmu = NULL; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + int ret = pmu->event_init(event); + if (!ret) + break; + if (ret != -ENOENT) { + pmu = ERR_PTR(ret); + break; } - pmu = &perf_ops_generic; - break; } + srcu_read_unlock(&pmus_srcu, idx); return pmu; } @@ -5092,29 +5112,8 @@ perf_event_alloc(struct perf_event_attr *attr, if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto done; - switch (attr->type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - pmu = hw_perf_event_init(event); - break; - - case PERF_TYPE_SOFTWARE: - pmu = sw_perf_event_init(event); - break; - - case PERF_TYPE_TRACEPOINT: - pmu = tp_perf_event_init(event); - break; + pmu = perf_init_event(event); - case PERF_TYPE_BREAKPOINT: - pmu = bp_perf_event_init(event); - break; - - - default: - break; - } done: err = 0; if (!pmu) @@ -5979,22 +5978,15 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } -/* - * This has to have a higher priority than migration_notifier in sched.c. - */ -static struct notifier_block __cpuinitdata perf_cpu_nb = { - .notifier_call = perf_cpu_notify, - .priority = 20, -}; - void __init perf_event_init(void) { perf_event_init_all_cpus(); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, - (void *)(long)smp_processor_id()); - register_cpu_notifier(&perf_cpu_nb); + init_srcu_struct(&pmus_srcu); + perf_pmu_register(&perf_swevent); + perf_pmu_register(&perf_cpu_clock); + perf_pmu_register(&perf_task_clock); + perf_tp_register(); + perf_cpu_notifier(perf_cpu_notify); } static ssize_t perf_show_reserve_percpu(struct sysdev_class *class,