static const char *csv_sep = NULL;
static bool csv_output = false;
-struct cpu_counts {
- u64 val;
- u64 ena;
- u64 run;
-};
-
static volatile int done = 0;
struct stats
struct perf_stat {
struct stats res_stats[3];
- int scaled;
- struct cpu_counts cpu_counts[];
};
-static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel, int ncpus)
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
{
- size_t priv_size = (sizeof(struct perf_stat) +
- (ncpus * sizeof(struct cpu_counts)));
- evsel->priv = zalloc(priv_size);
+ evsel->priv = zalloc(sizeof(struct perf_stat));
return evsel->priv == NULL ? -ENOMEM : 0;
}
* Read out the results of a single counter:
* aggregate counts across CPUs in system-wide mode
*/
-static void read_counter_aggr(struct perf_evsel *counter)
+static int read_counter_aggr(struct perf_evsel *counter)
{
struct perf_stat *ps = counter->priv;
- u64 count[3], single_count[3];
- int cpu;
- size_t res, nv;
- int scaled;
- int i, thread;
-
- count[0] = count[1] = count[2] = 0;
-
- nv = scale ? 3 : 1;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- for (thread = 0; thread < thread_num; thread++) {
- if (FD(counter, cpu, thread) < 0)
- continue;
-
- res = read(FD(counter, cpu, thread),
- single_count, nv * sizeof(u64));
- assert(res == nv * sizeof(u64));
-
- close(FD(counter, cpu, thread));
- FD(counter, cpu, thread) = -1;
-
- count[0] += single_count[0];
- if (scale) {
- count[1] += single_count[1];
- count[2] += single_count[2];
- }
- }
- }
-
- scaled = 0;
- if (scale) {
- if (count[2] == 0) {
- ps->scaled = -1;
- count[0] = 0;
- return;
- }
+ u64 *count = counter->counts->aggr.values;
+ int i;
- if (count[2] < count[1]) {
- ps->scaled = 1;
- count[0] = (unsigned long long)
- ((double)count[0] * count[1] / count[2] + 0.5);
- }
- }
+ if (__perf_evsel__read(counter, nr_cpus, thread_num, scale) < 0)
+ return -1;
for (i = 0; i < 3; i++)
update_stats(&ps->res_stats[i], count[i]);
update_stats(&runtime_cycles_stats[0], count[0]);
if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[0], count[0]);
+
+ return 0;
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static void read_counter(struct perf_evsel *counter)
+static int read_counter(struct perf_evsel *counter)
{
- struct cpu_counts *cpu_counts = counter->priv;
- u64 count[3];
+ u64 *count;
int cpu;
- size_t res, nv;
-
- count[0] = count[1] = count[2] = 0;
-
- nv = scale ? 3 : 1;
for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
+ return -1;
- if (FD(counter, cpu, 0) < 0)
- continue;
-
- res = read(FD(counter, cpu, 0), count, nv * sizeof(u64));
-
- assert(res == nv * sizeof(u64));
-
- close(FD(counter, cpu, 0));
- FD(counter, cpu, 0) = -1;
-
- if (scale) {
- if (count[2] == 0) {
- count[0] = 0;
- } else if (count[2] < count[1]) {
- count[0] = (unsigned long long)
- ((double)count[0] * count[1] / count[2] + 0.5);
- }
- }
- cpu_counts[cpu].val = count[0]; /* scaled count */
- cpu_counts[cpu].ena = count[1];
- cpu_counts[cpu].run = count[2];
+ count = counter->counts->cpu[cpu].values;
if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
update_stats(&runtime_nsecs_stats[cpu], count[0]);
if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[cpu], count[0]);
}
+
+ return 0;
}
static int run_perf_stat(int argc __used, const char **argv)
update_stats(&walltime_nsecs_stats, t1 - t0);
if (no_aggr) {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list, node) {
read_counter(counter);
+ perf_evsel__close_fd(counter, nr_cpus, 1);
+ }
} else {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list, node) {
read_counter_aggr(counter);
+ perf_evsel__close_fd(counter, nr_cpus, thread_num);
+ }
}
+
return WEXITSTATUS(status);
}
{
struct perf_stat *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
- int scaled = ps->scaled;
+ int scaled = counter->counts->scaled;
if (scaled == -1) {
fprintf(stderr, "%*s%s%-24s\n",
*/
static void print_counter(struct perf_evsel *counter)
{
- struct perf_stat *ps = counter->priv;
u64 ena, run, val;
int cpu;
for (cpu = 0; cpu < nr_cpus; cpu++) {
- val = ps->cpu_counts[cpu].val;
- ena = ps->cpu_counts[cpu].ena;
- run = ps->cpu_counts[cpu].run;
+ val = counter->counts->cpu[cpu].val;
+ ena = counter->counts->cpu[cpu].ena;
+ run = counter->counts->cpu[cpu].run;
if (run == 0 || ena == 0) {
fprintf(stderr, "CPU%*d%s%*s%s%-24s",
csv_output ? 0 : -4,
}
list_for_each_entry(pos, &evsel_list, node) {
- if (perf_evsel__alloc_stat_priv(pos, nr_cpus) < 0 ||
+ if (perf_evsel__alloc_stat_priv(pos) < 0 ||
+ perf_evsel__alloc_counts(pos, nr_cpus) < 0 ||
perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0)
goto out_free_fd;
}
#include "evsel.h"
#include "util.h"
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
{
struct perf_evsel *evsel = zalloc(sizeof(*evsel));
return evsel->fd != NULL ? 0 : -ENOMEM;
}
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
+{
+ evsel->counts = zalloc((sizeof(*evsel->counts) +
+ (ncpus * sizeof(struct perf_counts_values))));
+ return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
void perf_evsel__free_fd(struct perf_evsel *evsel)
{
xyarray__delete(evsel->fd);
evsel->fd = NULL;
}
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ int cpu, thread;
+
+ for (cpu = 0; cpu < ncpus; cpu++)
+ for (thread = 0; thread < nthreads; ++thread) {
+ close(FD(evsel, cpu, thread));
+ FD(evsel, cpu, thread) = -1;
+ }
+}
+
void perf_evsel__delete(struct perf_evsel *evsel)
{
assert(list_empty(&evsel->node));
xyarray__delete(evsel->fd);
free(evsel);
}
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+ int cpu, int thread, bool scale)
+{
+ struct perf_counts_values count;
+ size_t nv = scale ? 3 : 1;
+
+ if (FD(evsel, cpu, thread) < 0)
+ return -EINVAL;
+
+ if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
+ return -errno;
+
+ if (scale) {
+ if (count.run == 0)
+ count.val = 0;
+ else if (count.run < count.ena)
+ count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
+ } else
+ count.ena = count.run = 0;
+
+ evsel->counts->cpu[cpu] = count;
+ return 0;
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel,
+ int ncpus, int nthreads, bool scale)
+{
+ size_t nv = scale ? 3 : 1;
+ int cpu, thread;
+ struct perf_counts_values *aggr = &evsel->counts->aggr, count;
+
+ aggr->val = 0;
+
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ for (thread = 0; thread < nthreads; thread++) {
+ if (FD(evsel, cpu, thread) < 0)
+ continue;
+
+ if (readn(FD(evsel, cpu, thread),
+ &count, nv * sizeof(u64)) < 0)
+ return -errno;
+
+ aggr->val += count.val;
+ if (scale) {
+ aggr->ena += count.ena;
+ aggr->run += count.run;
+ }
+ }
+ }
+
+ evsel->counts->scaled = 0;
+ if (scale) {
+ if (aggr->run == 0) {
+ evsel->counts->scaled = -1;
+ aggr->val = 0;
+ return 0;
+ }
+
+ if (aggr->run < aggr->ena) {
+ evsel->counts->scaled = 1;
+ aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
+ }
+ } else
+ aggr->ena = aggr->run = 0;
+
+ return 0;
+}
#define __PERF_EVSEL_H 1
#include <linux/list.h>
+#include <stdbool.h>
#include <linux/perf_event.h>
#include "types.h"
#include "xyarray.h"
+
+struct perf_counts_values {
+ union {
+ struct {
+ u64 val;
+ u64 ena;
+ u64 run;
+ };
+ u64 values[3];
+ };
+};
+
+struct perf_counts {
+ s8 scaled;
+ struct perf_counts_values aggr;
+ struct perf_counts_values cpu[];
+};
struct perf_evsel {
struct list_head node;
struct perf_event_attr attr;
char *filter;
struct xyarray *fd;
+ struct perf_counts *counts;
int idx;
void *priv;
};
void perf_evsel__delete(struct perf_evsel *evsel);
int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
void perf_evsel__free_fd(struct perf_evsel *evsel);
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
#define perf_evsel__match(evsel, t, c) \
(evsel->attr.type == PERF_TYPE_##t && \
evsel->attr.config == PERF_COUNT_##c)
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+ int cpu, int thread, bool scale);
+
+/**
+ * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+ int cpu, int thread)
+{
+ return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+}
+
+/**
+ * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
+ int cpu, int thread)
+{
+ return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
+ bool scale);
+
+/**
+ * perf_evsel__read - Read the aggregate results on all CPUs
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read(struct perf_evsel *evsel,
+ int ncpus, int nthreads)
+{
+ return __perf_evsel__read(evsel, ncpus, nthreads, false);
+}
+
+/**
+ * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
+ int ncpus, int nthreads)
+{
+ return __perf_evsel__read(evsel, ncpus, nthreads, true);
+}
+
#endif /* __PERF_EVSEL_H */