#include <math.h>
#include <locale.h>
-#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
-
#define DEFAULT_SEPARATOR " "
static struct perf_event_attr default_attrs[] = {
};
static bool system_wide = false;
-static int nr_cpus = 0;
+static struct cpu_map *cpus;
static int run_idx = 0;
static int run_count = 1;
static bool no_aggr = false;
static pid_t target_pid = -1;
static pid_t target_tid = -1;
-static pid_t *all_tids = NULL;
-static int thread_num = 0;
+static struct thread_map *threads;
static pid_t child_pid = -1;
static bool null_run = false;
static bool big_num = true;
static const char *csv_sep = NULL;
static bool csv_output = false;
-struct cpu_counts {
- u64 val;
- u64 ena;
- u64 run;
-};
-
static volatile int done = 0;
struct stats
struct perf_stat {
struct stats res_stats[3];
- int scaled;
- struct cpu_counts cpu_counts[];
};
-static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel, int ncpus)
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
{
- size_t priv_size = (sizeof(struct perf_stat) +
- (ncpus * sizeof(struct cpu_counts)));
- evsel->priv = zalloc(priv_size);
+ evsel->priv = zalloc(sizeof(struct perf_stat));
return evsel->priv == NULL ? -ENOMEM : 0;
}
struct stats runtime_branches_stats[MAX_NR_CPUS];
struct stats walltime_nsecs_stats;
-#define ERR_PERF_OPEN \
-"counter %d, sys_perf_event_open() syscall returned with %d (%s). /bin/dmesg may provide additional information."
-
-static int create_perf_stat_counter(struct perf_evsel *evsel, bool *perm_err)
+static int create_perf_stat_counter(struct perf_evsel *evsel)
{
struct perf_event_attr *attr = &evsel->attr;
- int thread;
- int ncreated = 0;
if (scale)
attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
PERF_FORMAT_TOTAL_TIME_RUNNING;
- if (system_wide) {
- int cpu;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- FD(evsel, cpu, 0) = sys_perf_event_open(attr,
- -1, cpumap[cpu], -1, 0);
- if (FD(evsel, cpu, 0) < 0) {
- if (errno == EPERM || errno == EACCES)
- *perm_err = true;
- error(ERR_PERF_OPEN, evsel->idx,
- FD(evsel, cpu, 0), strerror(errno));
- } else {
- ++ncreated;
- }
- }
- } else {
- attr->inherit = !no_inherit;
- if (target_pid == -1 && target_tid == -1) {
- attr->disabled = 1;
- attr->enable_on_exec = 1;
- }
- for (thread = 0; thread < thread_num; thread++) {
- FD(evsel, 0, thread) = sys_perf_event_open(attr,
- all_tids[thread], -1, -1, 0);
- if (FD(evsel, 0, thread) < 0) {
- if (errno == EPERM || errno == EACCES)
- *perm_err = true;
- error(ERR_PERF_OPEN, evsel->idx,
- FD(evsel, 0, thread),
- strerror(errno));
- } else {
- ++ncreated;
- }
- }
+ if (system_wide)
+ return perf_evsel__open_per_cpu(evsel, cpus);
+
+ attr->inherit = !no_inherit;
+ if (target_pid == -1 && target_tid == -1) {
+ attr->disabled = 1;
+ attr->enable_on_exec = 1;
}
- return ncreated;
+ return perf_evsel__open_per_thread(evsel, threads);
}
/*
* Read out the results of a single counter:
* aggregate counts across CPUs in system-wide mode
*/
-static void read_counter_aggr(struct perf_evsel *counter)
+static int read_counter_aggr(struct perf_evsel *counter)
{
struct perf_stat *ps = counter->priv;
- u64 count[3], single_count[3];
- int cpu;
- size_t res, nv;
- int scaled;
- int i, thread;
-
- count[0] = count[1] = count[2] = 0;
-
- nv = scale ? 3 : 1;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- for (thread = 0; thread < thread_num; thread++) {
- if (FD(counter, cpu, thread) < 0)
- continue;
-
- res = read(FD(counter, cpu, thread),
- single_count, nv * sizeof(u64));
- assert(res == nv * sizeof(u64));
-
- close(FD(counter, cpu, thread));
- FD(counter, cpu, thread) = -1;
-
- count[0] += single_count[0];
- if (scale) {
- count[1] += single_count[1];
- count[2] += single_count[2];
- }
- }
- }
-
- scaled = 0;
- if (scale) {
- if (count[2] == 0) {
- ps->scaled = -1;
- count[0] = 0;
- return;
- }
+ u64 *count = counter->counts->aggr.values;
+ int i;
- if (count[2] < count[1]) {
- ps->scaled = 1;
- count[0] = (unsigned long long)
- ((double)count[0] * count[1] / count[2] + 0.5);
- }
- }
+ if (__perf_evsel__read(counter, cpus->nr, threads->nr, scale) < 0)
+ return -1;
for (i = 0; i < 3; i++)
update_stats(&ps->res_stats[i], count[i]);
update_stats(&runtime_cycles_stats[0], count[0]);
if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[0], count[0]);
+
+ return 0;
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static void read_counter(struct perf_evsel *counter)
+static int read_counter(struct perf_evsel *counter)
{
- struct cpu_counts *cpu_counts = counter->priv;
- u64 count[3];
+ u64 *count;
int cpu;
- size_t res, nv;
- count[0] = count[1] = count[2] = 0;
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
+ return -1;
- nv = scale ? 3 : 1;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
-
- if (FD(counter, cpu, 0) < 0)
- continue;
-
- res = read(FD(counter, cpu, 0), count, nv * sizeof(u64));
-
- assert(res == nv * sizeof(u64));
-
- close(FD(counter, cpu, 0));
- FD(counter, cpu, 0) = -1;
-
- if (scale) {
- if (count[2] == 0) {
- count[0] = 0;
- } else if (count[2] < count[1]) {
- count[0] = (unsigned long long)
- ((double)count[0] * count[1] / count[2] + 0.5);
- }
- }
- cpu_counts[cpu].val = count[0]; /* scaled count */
- cpu_counts[cpu].ena = count[1];
- cpu_counts[cpu].run = count[2];
+ count = counter->counts->cpu[cpu].values;
if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
update_stats(&runtime_nsecs_stats[cpu], count[0]);
if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_stats(&runtime_branches_stats[cpu], count[0]);
}
+
+ return 0;
}
static int run_perf_stat(int argc __used, const char **argv)
unsigned long long t0, t1;
struct perf_evsel *counter;
int status = 0;
- int ncreated = 0;
int child_ready_pipe[2], go_pipe[2];
- bool perm_err = false;
const bool forks = (argc > 0);
char buf;
- if (!system_wide)
- nr_cpus = 1;
-
if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
perror("failed to create pipes");
exit(1);
}
if (target_tid == -1 && target_pid == -1 && !system_wide)
- all_tids[0] = child_pid;
+ threads->map[0] = child_pid;
/*
* Wait for the child to be ready to exec.
close(child_ready_pipe[0]);
}
- list_for_each_entry(counter, &evsel_list, node)
- ncreated += create_perf_stat_counter(counter, &perm_err);
-
- if (ncreated < nr_counters) {
- if (perm_err)
- error("You may not have permission to collect %sstats.\n"
- "\t Consider tweaking"
- " /proc/sys/kernel/perf_event_paranoid or running as root.",
- system_wide ? "system-wide " : "");
- die("Not all events could be opened.\n");
- if (child_pid != -1)
- kill(child_pid, SIGTERM);
- return -1;
+ list_for_each_entry(counter, &evsel_list, node) {
+ if (create_perf_stat_counter(counter) < 0) {
+ if (errno == -EPERM || errno == -EACCES) {
+ error("You may not have permission to collect %sstats.\n"
+ "\t Consider tweaking"
+ " /proc/sys/kernel/perf_event_paranoid or running as root.",
+ system_wide ? "system-wide " : "");
+ } else if (errno == ENOENT) {
+ error("%s event is not supported. ", event_name(counter));
+ } else {
+ error("open_counter returned with %d (%s). "
+ "/bin/dmesg may provide additional information.\n",
+ errno, strerror(errno));
+ }
+ if (child_pid != -1)
+ kill(child_pid, SIGTERM);
+ die("Not all events could be opened.\n");
+ return -1;
+ }
}
/*
update_stats(&walltime_nsecs_stats, t1 - t0);
if (no_aggr) {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list, node) {
read_counter(counter);
+ perf_evsel__close_fd(counter, cpus->nr, 1);
+ }
} else {
- list_for_each_entry(counter, &evsel_list, node)
+ list_for_each_entry(counter, &evsel_list, node) {
read_counter_aggr(counter);
+ perf_evsel__close_fd(counter, cpus->nr, threads->nr);
+ }
}
+
return WEXITSTATUS(status);
}
if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
- cpumap[cpu], csv_sep);
+ cpus->map[cpu], csv_sep);
fprintf(stderr, fmt, cpustr, msecs, csv_sep, event_name(evsel));
if (no_aggr)
sprintf(cpustr, "CPU%*d%s",
csv_output ? 0 : -4,
- cpumap[cpu], csv_sep);
+ cpus->map[cpu], csv_sep);
else
cpu = 0;
{
struct perf_stat *ps = counter->priv;
double avg = avg_stats(&ps->res_stats[0]);
- int scaled = ps->scaled;
+ int scaled = counter->counts->scaled;
if (scaled == -1) {
fprintf(stderr, "%*s%s%-24s\n",
*/
static void print_counter(struct perf_evsel *counter)
{
- struct perf_stat *ps = counter->priv;
u64 ena, run, val;
int cpu;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- val = ps->cpu_counts[cpu].val;
- ena = ps->cpu_counts[cpu].ena;
- run = ps->cpu_counts[cpu].run;
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ val = counter->counts->cpu[cpu].val;
+ ena = counter->counts->cpu[cpu].ena;
+ run = counter->counts->cpu[cpu].run;
if (run == 0 || ena == 0) {
fprintf(stderr, "CPU%*d%s%*s%s%-24s",
csv_output ? 0 : -4,
- cpumap[cpu], csv_sep,
+ cpus->map[cpu], csv_sep,
csv_output ? 0 : 18,
"<not counted>", csv_sep,
event_name(counter));
nr_counters = ARRAY_SIZE(default_attrs);
for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
- pos = perf_evsel__new(default_attrs[c].type,
- default_attrs[c].config,
+ pos = perf_evsel__new(&default_attrs[c],
nr_counters);
if (pos == NULL)
goto out;
}
}
- if (system_wide)
- nr_cpus = read_cpu_map(cpu_list);
- else
- nr_cpus = 1;
+ if (target_pid != -1)
+ target_tid = target_pid;
- if (nr_cpus < 1)
+ threads = thread_map__new(target_pid, target_tid);
+ if (threads == NULL) {
+ pr_err("Problems finding threads of monitor\n");
usage_with_options(stat_usage, options);
+ }
- if (target_pid != -1) {
- target_tid = target_pid;
- thread_num = find_all_tid(target_pid, &all_tids);
- if (thread_num <= 0) {
- fprintf(stderr, "Can't find all threads of pid %d\n",
- target_pid);
- usage_with_options(stat_usage, options);
- }
- } else {
- all_tids=malloc(sizeof(pid_t));
- if (!all_tids)
- return -ENOMEM;
+ if (system_wide)
+ cpus = cpu_map__new(cpu_list);
+ else
+ cpus = cpu_map__dummy_new();
- all_tids[0] = target_tid;
- thread_num = 1;
+ if (cpus == NULL) {
+ perror("failed to parse CPUs map");
+ usage_with_options(stat_usage, options);
+ return -1;
}
list_for_each_entry(pos, &evsel_list, node) {
- if (perf_evsel__alloc_stat_priv(pos, nr_cpus) < 0 ||
- perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0)
+ if (perf_evsel__alloc_stat_priv(pos) < 0 ||
+ perf_evsel__alloc_counts(pos, cpus->nr) < 0 ||
+ perf_evsel__alloc_fd(pos, cpus->nr, threads->nr) < 0)
goto out_free_fd;
}
out_free_fd:
list_for_each_entry(pos, &evsel_list, node)
perf_evsel__free_stat_priv(pos);
+ perf_evsel_list__delete();
out:
+ thread_map__delete(threads);
+ threads = NULL;
return status;
}