perf evsel: Steal the counter reading routines from stat

author Arnaldo Carvalho de Melo <acme@redhat.com>

Mon, 3 Jan 2011 19:45:52 +0000 (17:45 -0200)

committer Arnaldo Carvalho de Melo <acme@redhat.com>

Tue, 4 Jan 2011 02:22:55 +0000 (00:22 -0200)
author Arnaldo Carvalho de Melo <acme@redhat.com>
Mon, 3 Jan 2011 19:45:52 +0000 (17:45 -0200)
committer Arnaldo Carvalho de Melo <acme@redhat.com>
Tue, 4 Jan 2011 02:22:55 +0000 (00:22 -0200)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c

index 589ba3a92423a247943c5a5b78d10357c9567e81..a8b00b44b3cdb8c0fb7d295aa2663217d467174c 100644 (file)
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -93,12 +93,6 @@ static const char            *cpu_list;
  static const char              *csv_sep                        = NULL;
  static bool                    csv_output                      = false;
  
-struct cpu_counts {
-       u64 val;
-       u64 ena;
-       u64 run;
-};
-
  static volatile int done = 0;
  
  struct stats
@@ -108,15 +102,11 @@ struct stats
  
  struct perf_stat {
         struct stats      res_stats[3];
-       int               scaled;
-       struct cpu_counts cpu_counts[];
  };
  
-static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel, int ncpus)
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
  {
-       size_t priv_size = (sizeof(struct perf_stat) +
-                           (ncpus * sizeof(struct cpu_counts)));
-       evsel->priv = zalloc(priv_size);
+       evsel->priv = zalloc(sizeof(struct perf_stat));
         return evsel->priv == NULL ? -ENOMEM : 0;
  }
  
@@ -238,52 +228,14 @@ static inline int nsec_counter(struct perf_evsel *evsel)
   * Read out the results of a single counter:
   * aggregate counts across CPUs in system-wide mode
   */
-static void read_counter_aggr(struct perf_evsel *counter)
+static int read_counter_aggr(struct perf_evsel *counter)
  {
         struct perf_stat *ps = counter->priv;
-       u64 count[3], single_count[3];
-       int cpu;
-       size_t res, nv;
-       int scaled;
-       int i, thread;
-
-       count[0] = count[1] = count[2] = 0;
-
-       nv = scale ? 3 : 1;
-       for (cpu = 0; cpu < nr_cpus; cpu++) {
-               for (thread = 0; thread < thread_num; thread++) {
-                       if (FD(counter, cpu, thread) < 0)
-                               continue;
-
-                       res = read(FD(counter, cpu, thread),
-                                       single_count, nv * sizeof(u64));
-                       assert(res == nv * sizeof(u64));
-
-                       close(FD(counter, cpu, thread));
-                       FD(counter, cpu, thread) = -1;
-
-                       count[0] += single_count[0];
-                       if (scale) {
-                               count[1] += single_count[1];
-                               count[2] += single_count[2];
-                       }
-               }
-       }
-
-       scaled = 0;
-       if (scale) {
-               if (count[2] == 0) {
-                       ps->scaled = -1;
-                       count[0] = 0;
-                       return;
-               }
+       u64 *count = counter->counts->aggr.values;
+       int i;
  
-               if (count[2] < count[1]) {
-                       ps->scaled = 1;
-                       count[0] = (unsigned long long)
-                               ((double)count[0] * count[1] / count[2] + 0.5);
-               }
-       }
+       if (__perf_evsel__read(counter, nr_cpus, thread_num, scale) < 0)
+               return -1;
  
         for (i = 0; i < 3; i++)
                 update_stats(&ps->res_stats[i], count[i]);
@@ -302,46 +254,24 @@ static void read_counter_aggr(struct perf_evsel *counter)
                 update_stats(&runtime_cycles_stats[0], count[0]);
         if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
                 update_stats(&runtime_branches_stats[0], count[0]);
+
+       return 0;
  }
  
  /*
   * Read out the results of a single counter:
   * do not aggregate counts across CPUs in system-wide mode
   */
-static void read_counter(struct perf_evsel *counter)
+static int read_counter(struct perf_evsel *counter)
  {
-       struct cpu_counts *cpu_counts = counter->priv;
-       u64 count[3];
+       u64 *count;
         int cpu;
-       size_t res, nv;
-
-       count[0] = count[1] = count[2] = 0;
-
-       nv = scale ? 3 : 1;
  
         for (cpu = 0; cpu < nr_cpus; cpu++) {
+               if (__perf_evsel__read_on_cpu(counter, cpu, 0, scale) < 0)
+                       return -1;
  
-               if (FD(counter, cpu, 0) < 0)
-                       continue;
-
-               res = read(FD(counter, cpu, 0), count, nv * sizeof(u64));
-
-               assert(res == nv * sizeof(u64));
-
-               close(FD(counter, cpu, 0));
-               FD(counter, cpu, 0) = -1;
-
-               if (scale) {
-                       if (count[2] == 0) {
-                               count[0] = 0;
-                       } else if (count[2] < count[1]) {
-                               count[0] = (unsigned long long)
-                               ((double)count[0] * count[1] / count[2] + 0.5);
-                       }
-               }
-               cpu_counts[cpu].val = count[0]; /* scaled count */
-               cpu_counts[cpu].ena = count[1];
-               cpu_counts[cpu].run = count[2];
+               count = counter->counts->cpu[cpu].values;
  
                 if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
                         update_stats(&runtime_nsecs_stats[cpu], count[0]);
@@ -350,6 +280,8 @@ static void read_counter(struct perf_evsel *counter)
                 if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
                         update_stats(&runtime_branches_stats[cpu], count[0]);
         }
+
+       return 0;
  }
  
  static int run_perf_stat(int argc __used, const char **argv)
@@ -449,12 +381,17 @@ static int run_perf_stat(int argc __used, const char **argv)
         update_stats(&walltime_nsecs_stats, t1 - t0);
  
         if (no_aggr) {
-               list_for_each_entry(counter, &evsel_list, node)
+               list_for_each_entry(counter, &evsel_list, node) {
                         read_counter(counter);
+                       perf_evsel__close_fd(counter, nr_cpus, 1);
+               }
         } else {
-               list_for_each_entry(counter, &evsel_list, node)
+               list_for_each_entry(counter, &evsel_list, node) {
                         read_counter_aggr(counter);
+                       perf_evsel__close_fd(counter, nr_cpus, thread_num);
+               }
         }
+
         return WEXITSTATUS(status);
  }
  
@@ -550,7 +487,7 @@ static void print_counter_aggr(struct perf_evsel *counter)
  {
         struct perf_stat *ps = counter->priv;
         double avg = avg_stats(&ps->res_stats[0]);
-       int scaled = ps->scaled;
+       int scaled = counter->counts->scaled;
  
         if (scaled == -1) {
                 fprintf(stderr, "%*s%s%-24s\n",
@@ -590,14 +527,13 @@ static void print_counter_aggr(struct perf_evsel *counter)
   */
  static void print_counter(struct perf_evsel *counter)
  {
-       struct perf_stat *ps = counter->priv;
         u64 ena, run, val;
         int cpu;
  
         for (cpu = 0; cpu < nr_cpus; cpu++) {
-               val = ps->cpu_counts[cpu].val;
-               ena = ps->cpu_counts[cpu].ena;
-               run = ps->cpu_counts[cpu].run;
+               val = counter->counts->cpu[cpu].val;
+               ena = counter->counts->cpu[cpu].ena;
+               run = counter->counts->cpu[cpu].run;
                 if (run == 0 || ena == 0) {
                         fprintf(stderr, "CPU%*d%s%*s%s%-24s",
                                 csv_output ? 0 : -4,
@@ -818,7 +754,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
         }
  
         list_for_each_entry(pos, &evsel_list, node) {
-               if (perf_evsel__alloc_stat_priv(pos, nr_cpus) < 0 ||
+               if (perf_evsel__alloc_stat_priv(pos) < 0 ||
+                   perf_evsel__alloc_counts(pos, nr_cpus) < 0 ||
                     perf_evsel__alloc_fd(pos, nr_cpus, thread_num) < 0)
                         goto out_free_fd;
         }
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c

index 6539ec912c7068f83a449ee3203baa2fe1e049c3..3f5de5196231e423de78509ffbaf132865c933dd 100644 (file)
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1,6 +1,8 @@
  #include "evsel.h"
  #include "util.h"
  
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
  struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
  {
         struct perf_evsel *evsel = zalloc(sizeof(*evsel));
@@ -21,15 +23,101 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
         return evsel->fd != NULL ? 0 : -ENOMEM;
  }
  
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
+{
+       evsel->counts = zalloc((sizeof(*evsel->counts) +
+                               (ncpus * sizeof(struct perf_counts_values))));
+       return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
  void perf_evsel__free_fd(struct perf_evsel *evsel)
  {
         xyarray__delete(evsel->fd);
         evsel->fd = NULL;
  }
  
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+       int cpu, thread;
+
+       for (cpu = 0; cpu < ncpus; cpu++)
+               for (thread = 0; thread < nthreads; ++thread) {
+                       close(FD(evsel, cpu, thread));
+                       FD(evsel, cpu, thread) = -1;
+               }
+}
+
  void perf_evsel__delete(struct perf_evsel *evsel)
  {
         assert(list_empty(&evsel->node));
         xyarray__delete(evsel->fd);
         free(evsel);
  }
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                             int cpu, int thread, bool scale)
+{
+       struct perf_counts_values count;
+       size_t nv = scale ? 3 : 1;
+
+       if (FD(evsel, cpu, thread) < 0)
+               return -EINVAL;
+
+       if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0)
+               return -errno;
+
+       if (scale) {
+               if (count.run == 0)
+                       count.val = 0;
+               else if (count.run < count.ena)
+                       count.val = (u64)((double)count.val * count.ena / count.run + 0.5);
+       } else
+               count.ena = count.run = 0;
+
+       evsel->counts->cpu[cpu] = count;
+       return 0;
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel,
+                      int ncpus, int nthreads, bool scale)
+{
+       size_t nv = scale ? 3 : 1;
+       int cpu, thread;
+       struct perf_counts_values *aggr = &evsel->counts->aggr, count;
+
+       aggr->val = 0;
+
+       for (cpu = 0; cpu < ncpus; cpu++) {
+               for (thread = 0; thread < nthreads; thread++) {
+                       if (FD(evsel, cpu, thread) < 0)
+                               continue;
+
+                       if (readn(FD(evsel, cpu, thread),
+                                 &count, nv * sizeof(u64)) < 0)
+                               return -errno;
+
+                       aggr->val += count.val;
+                       if (scale) {
+                               aggr->ena += count.ena;
+                               aggr->run += count.run;
+                       }
+               }
+       }
+
+       evsel->counts->scaled = 0;
+       if (scale) {
+               if (aggr->run == 0) {
+                       evsel->counts->scaled = -1;
+                       aggr->val = 0;
+                       return 0;
+               }
+
+               if (aggr->run < aggr->ena) {
+                       evsel->counts->scaled = 1;
+                       aggr->val = (u64)((double)aggr->val * aggr->ena / aggr->run + 0.5);
+               }
+       } else
+               aggr->ena = aggr->run = 0;
+
+       return 0;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h

index 8a5cfb656674336c9056d11a465e3b94776b9ee1..8b48ef1e672cf27f7fa2d075b7596a1c7a76c8ef 100644 (file)
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -2,15 +2,34 @@
  #define __PERF_EVSEL_H 1
  
  #include <linux/list.h>
+#include <stdbool.h>
  #include <linux/perf_event.h>
  #include "types.h"
  #include "xyarray.h"
+ 
+struct perf_counts_values {
+       union {
+               struct {
+                       u64 val;
+                       u64 ena;
+                       u64 run;
+               };
+               u64 values[3];
+       };
+};
+
+struct perf_counts {
+       s8                        scaled;
+       struct perf_counts_values aggr;
+       struct perf_counts_values cpu[];
+};
  
  struct perf_evsel {
         struct list_head        node;
         struct perf_event_attr  attr;
         char                    *filter;
         struct xyarray          *fd;
+       struct perf_counts      *counts;
         int                     idx;
         void                    *priv;
  };
@@ -19,10 +38,70 @@ struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
  void perf_evsel__delete(struct perf_evsel *evsel);
  
  int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
  void perf_evsel__free_fd(struct perf_evsel *evsel);
+void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
  
  #define perf_evsel__match(evsel, t, c)         \
         (evsel->attr.type == PERF_TYPE_##t &&   \
          evsel->attr.config == PERF_COUNT_##c)
  
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                             int cpu, int thread, bool scale);
+
+/**
+ * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+                                         int cpu, int thread)
+{
+       return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+}
+
+/**
+ * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
+                                                int cpu, int thread)
+{
+       return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+}
+
+int __perf_evsel__read(struct perf_evsel *evsel, int ncpus, int nthreads,
+                      bool scale);
+
+/**
+ * perf_evsel__read - Read the aggregate results on all CPUs
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read(struct perf_evsel *evsel,
+                                   int ncpus, int nthreads)
+{
+       return __perf_evsel__read(evsel, ncpus, nthreads, false);
+}
+
+/**
+ * perf_evsel__read_scaled - Read the aggregate results on all CPUs, scaled
+ *
+ * @evsel - event selector to read value
+ * @ncpus - Number of cpus affected, from zero
+ * @nthreads - Number of threads affected, from zero
+ */
+static inline int perf_evsel__read_scaled(struct perf_evsel *evsel,
+                                         int ncpus, int nthreads)
+{
+       return __perf_evsel__read(evsel, ncpus, nthreads, true);
+}
+
  #endif /* __PERF_EVSEL_H */
author	Arnaldo Carvalho de Melo <acme@redhat.com>
	Mon, 3 Jan 2011 19:45:52 +0000 (17:45 -0200)
committer	Arnaldo Carvalho de Melo <acme@redhat.com>
	Tue, 4 Jan 2011 02:22:55 +0000 (00:22 -0200)
tools/perf/builtin-stat.c		patch \| blob \| history
tools/perf/util/evsel.c		patch \| blob \| history
tools/perf/util/evsel.h		patch \| blob \| history